@kitlangton/motel 0.2.1 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,9 @@ import type { AiCallDetail, AiCallSummary, FacetItem, LogItem, SpanItem, StatsIt
7
7
  import { AI_ATTR_MAP, AI_FTS_KEYS, AI_TEXT_SEARCH_KEYS, truncatePreview } from "../domain.js"
8
8
  import { attributeMap, nanosToMilliseconds, parseAnyValue, spanKindLabel, spanStatusLabel, stringifyValue, type OtlpLogExportRequest, type OtlpTraceExportRequest } from "../otlp.js"
9
9
 
10
+ const isSqliteLockError = (error: unknown) =>
11
+ error instanceof Error && /(database is locked|database table is locked|SQLITE_BUSY)/i.test(error.message)
12
+
10
13
  interface SpanRow {
11
14
  readonly trace_id: string
12
15
  readonly span_id: string
@@ -502,108 +505,110 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
502
505
  PRAGMA mmap_size = 268435456;
503
506
  `)
504
507
  } else {
505
- db.exec(`
506
- PRAGMA journal_mode = WAL;
507
- PRAGMA synchronous = NORMAL;
508
- PRAGMA temp_store = MEMORY;
509
- -- Longer busy timeout: the ingest worker holds the write lock
510
- -- for up to a few seconds during big OTLP batches, and the main
511
- -- daemon's retention passes can do the same. 15s gives either
512
- -- side enough slack to serialise instead of erroring.
513
- PRAGMA busy_timeout = 15000;
514
- -- WAL checkpoint automatically when it grows past ~16MB. Without
515
- -- this the WAL happily runs into the hundreds of MB and queries
516
- -- start paying the cost of walking the WAL on every read.
517
- PRAGMA wal_autocheckpoint = 4000;
518
- -- Bump cache above the 2MB default. 64MB fits most hot index pages
519
- -- (trace_summaries, spans, span_attributes indexes) in RAM even on
520
- -- multi-GB databases, cutting cold-read latency meaningfully on
521
- -- picker / search queries that sweep the index.
522
- PRAGMA cache_size = -65536;
523
- -- Let SQLite memory-map the first 256MB of the file. This is a
524
- -- cheap way to avoid read() syscalls on hot pages and lets the OS
525
- -- page cache serve index lookups directly. Safe on macOS and Linux;
526
- -- SQLite silently caps at actual file size for smaller DBs.
527
- PRAGMA mmap_size = 268435456;
528
-
529
- CREATE TABLE IF NOT EXISTS spans (
530
- trace_id TEXT NOT NULL,
531
- span_id TEXT NOT NULL,
532
- parent_span_id TEXT,
533
- service_name TEXT NOT NULL,
534
- scope_name TEXT,
535
- operation_name TEXT NOT NULL,
536
- kind TEXT,
537
- start_time_ms INTEGER NOT NULL,
538
- end_time_ms INTEGER NOT NULL,
539
- duration_ms REAL NOT NULL,
540
- status TEXT NOT NULL,
541
- attributes_json TEXT NOT NULL,
542
- resource_json TEXT NOT NULL,
543
- events_json TEXT NOT NULL,
544
- PRIMARY KEY (trace_id, span_id)
545
- );
546
-
547
- CREATE INDEX IF NOT EXISTS idx_spans_service_time ON spans(service_name, start_time_ms DESC);
548
- CREATE INDEX IF NOT EXISTS idx_spans_trace_time ON spans(trace_id, start_time_ms ASC);
549
- CREATE INDEX IF NOT EXISTS idx_spans_span_id ON spans(span_id);
550
- CREATE INDEX IF NOT EXISTS idx_spans_status_time ON spans(status, start_time_ms DESC);
551
-
552
- CREATE TABLE IF NOT EXISTS logs (
553
- id INTEGER PRIMARY KEY AUTOINCREMENT,
554
- trace_id TEXT,
555
- span_id TEXT,
556
- service_name TEXT NOT NULL,
557
- scope_name TEXT,
558
- severity_text TEXT NOT NULL,
559
- timestamp_ms INTEGER NOT NULL,
560
- body TEXT NOT NULL,
561
- attributes_json TEXT NOT NULL,
562
- resource_json TEXT NOT NULL
563
- );
564
-
565
- CREATE INDEX IF NOT EXISTS idx_logs_service_time ON logs(service_name, timestamp_ms DESC);
566
- CREATE INDEX IF NOT EXISTS idx_logs_trace_time ON logs(trace_id, timestamp_ms DESC);
567
- CREATE INDEX IF NOT EXISTS idx_logs_span_time ON logs(span_id, timestamp_ms DESC);
568
- CREATE INDEX IF NOT EXISTS idx_logs_severity_time ON logs(severity_text, timestamp_ms DESC);
569
-
570
- CREATE TABLE IF NOT EXISTS trace_summaries (
571
- trace_id TEXT PRIMARY KEY,
572
- service_name TEXT NOT NULL,
573
- root_operation_name TEXT NOT NULL,
574
- started_at_ms INTEGER NOT NULL,
575
- ended_at_ms INTEGER NOT NULL,
576
- active_span_count INTEGER NOT NULL DEFAULT 0,
577
- duration_ms REAL NOT NULL,
578
- span_count INTEGER NOT NULL,
579
- error_count INTEGER NOT NULL
580
- );
581
-
582
- CREATE INDEX IF NOT EXISTS idx_trace_summaries_started_at ON trace_summaries(started_at_ms DESC, trace_id DESC);
583
- CREATE INDEX IF NOT EXISTS idx_trace_summaries_service_started_at ON trace_summaries(service_name, started_at_ms DESC, trace_id DESC);
584
- CREATE INDEX IF NOT EXISTS idx_trace_summaries_duration ON trace_summaries(duration_ms DESC);
585
-
586
- CREATE TABLE IF NOT EXISTS span_attributes (
587
- trace_id TEXT NOT NULL,
588
- span_id TEXT NOT NULL,
589
- key TEXT NOT NULL,
590
- value TEXT NOT NULL,
591
- PRIMARY KEY (trace_id, span_id, key)
592
- );
593
-
594
- CREATE INDEX IF NOT EXISTS idx_span_attributes_key_value ON span_attributes(key, value, trace_id, span_id);
595
- CREATE INDEX IF NOT EXISTS idx_span_attributes_trace_span ON span_attributes(trace_id, span_id);
596
-
597
- CREATE TABLE IF NOT EXISTS log_attributes (
598
- log_id INTEGER NOT NULL,
599
- key TEXT NOT NULL,
600
- value TEXT NOT NULL,
601
- PRIMARY KEY (log_id, key)
602
- );
603
-
604
- CREATE INDEX IF NOT EXISTS idx_log_attributes_key_value ON log_attributes(key, value, log_id);
605
- CREATE INDEX IF NOT EXISTS idx_log_attributes_log_id ON log_attributes(log_id);
606
- `)
508
+ db.exec(`
509
+ -- Bump cache above the 2MB default. 64MB fits most hot index pages
510
+ -- (trace_summaries, spans, span_attributes indexes) in RAM even on
511
+ -- multi-GB databases, cutting cold-read latency meaningfully on
512
+ -- picker / search queries that sweep the index.
513
+ PRAGMA cache_size = -65536;
514
+ -- Let SQLite memory-map the first 256MB of the file. This is a
515
+ -- cheap way to avoid read() syscalls on hot pages and lets the OS
516
+ -- page cache serve index lookups directly. Safe on macOS and Linux;
517
+ -- SQLite silently caps at actual file size for smaller DBs.
518
+ PRAGMA mmap_size = 268435456;
519
+ `)
520
+ try {
521
+ db.exec(`
522
+ PRAGMA journal_mode = WAL;
523
+ PRAGMA synchronous = NORMAL;
524
+ PRAGMA temp_store = MEMORY;
525
+ -- WAL checkpoint automatically when it grows past ~16MB. Without
526
+ -- this the WAL happily runs into the hundreds of MB and queries
527
+ -- start paying the cost of walking the WAL on every read.
528
+ PRAGMA wal_autocheckpoint = 4000;
529
+
530
+ CREATE TABLE IF NOT EXISTS spans (
531
+ trace_id TEXT NOT NULL,
532
+ span_id TEXT NOT NULL,
533
+ parent_span_id TEXT,
534
+ service_name TEXT NOT NULL,
535
+ scope_name TEXT,
536
+ operation_name TEXT NOT NULL,
537
+ kind TEXT,
538
+ start_time_ms INTEGER NOT NULL,
539
+ end_time_ms INTEGER NOT NULL,
540
+ duration_ms REAL NOT NULL,
541
+ status TEXT NOT NULL,
542
+ attributes_json TEXT NOT NULL,
543
+ resource_json TEXT NOT NULL,
544
+ events_json TEXT NOT NULL,
545
+ PRIMARY KEY (trace_id, span_id)
546
+ );
547
+
548
+ CREATE INDEX IF NOT EXISTS idx_spans_service_time ON spans(service_name, start_time_ms DESC);
549
+ CREATE INDEX IF NOT EXISTS idx_spans_trace_time ON spans(trace_id, start_time_ms ASC);
550
+ CREATE INDEX IF NOT EXISTS idx_spans_span_id ON spans(span_id);
551
+ CREATE INDEX IF NOT EXISTS idx_spans_status_time ON spans(status, start_time_ms DESC);
552
+
553
+ CREATE TABLE IF NOT EXISTS logs (
554
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
555
+ trace_id TEXT,
556
+ span_id TEXT,
557
+ service_name TEXT NOT NULL,
558
+ scope_name TEXT,
559
+ severity_text TEXT NOT NULL,
560
+ timestamp_ms INTEGER NOT NULL,
561
+ body TEXT NOT NULL,
562
+ attributes_json TEXT NOT NULL,
563
+ resource_json TEXT NOT NULL
564
+ );
565
+
566
+ CREATE INDEX IF NOT EXISTS idx_logs_service_time ON logs(service_name, timestamp_ms DESC);
567
+ CREATE INDEX IF NOT EXISTS idx_logs_trace_time ON logs(trace_id, timestamp_ms DESC);
568
+ CREATE INDEX IF NOT EXISTS idx_logs_span_time ON logs(span_id, timestamp_ms DESC);
569
+ CREATE INDEX IF NOT EXISTS idx_logs_severity_time ON logs(severity_text, timestamp_ms DESC);
570
+
571
+ CREATE TABLE IF NOT EXISTS trace_summaries (
572
+ trace_id TEXT PRIMARY KEY,
573
+ service_name TEXT NOT NULL,
574
+ root_operation_name TEXT NOT NULL,
575
+ started_at_ms INTEGER NOT NULL,
576
+ ended_at_ms INTEGER NOT NULL,
577
+ active_span_count INTEGER NOT NULL DEFAULT 0,
578
+ duration_ms REAL NOT NULL,
579
+ span_count INTEGER NOT NULL,
580
+ error_count INTEGER NOT NULL
581
+ );
582
+
583
+ CREATE INDEX IF NOT EXISTS idx_trace_summaries_started_at ON trace_summaries(started_at_ms DESC, trace_id DESC);
584
+ CREATE INDEX IF NOT EXISTS idx_trace_summaries_service_started_at ON trace_summaries(service_name, started_at_ms DESC, trace_id DESC);
585
+ CREATE INDEX IF NOT EXISTS idx_trace_summaries_duration ON trace_summaries(duration_ms DESC);
586
+
587
+ CREATE TABLE IF NOT EXISTS span_attributes (
588
+ trace_id TEXT NOT NULL,
589
+ span_id TEXT NOT NULL,
590
+ key TEXT NOT NULL,
591
+ value TEXT NOT NULL,
592
+ PRIMARY KEY (trace_id, span_id, key)
593
+ );
594
+
595
+ CREATE INDEX IF NOT EXISTS idx_span_attributes_key_value ON span_attributes(key, value, trace_id, span_id);
596
+ CREATE INDEX IF NOT EXISTS idx_span_attributes_trace_span ON span_attributes(trace_id, span_id);
597
+
598
+ CREATE TABLE IF NOT EXISTS log_attributes (
599
+ log_id INTEGER NOT NULL,
600
+ key TEXT NOT NULL,
601
+ value TEXT NOT NULL,
602
+ PRIMARY KEY (log_id, key)
603
+ );
604
+
605
+ CREATE INDEX IF NOT EXISTS idx_log_attributes_key_value ON log_attributes(key, value, log_id);
606
+ CREATE INDEX IF NOT EXISTS idx_log_attributes_log_id ON log_attributes(log_id);
607
+ `)
608
+ } catch (err) {
609
+ if (!isSqliteLockError(err)) throw err
610
+ console.warn(`motel: writer bootstrap skipped during startup: ${(err as Error).message}`)
611
+ }
607
612
  }
608
613
 
609
614
  // Tables detected at runtime. For writer connections these flags are
@@ -726,6 +731,12 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
726
731
  // ANALYZE / optimize failures are never fatal — queries still work,
727
732
  // they just run with default row estimates.
728
733
  }
734
+ // Longer busy timeout: the ingest worker holds the write lock for up
735
+ // to a few seconds during big OTLP batches, and the daemon's retention
736
+ // passes can do the same. Apply this AFTER startup maintenance so
737
+ // lock-conflicted bootstrap steps fail fast instead of stalling health
738
+ // for the full 15s timeout.
739
+ try { db.exec(`PRAGMA busy_timeout = 15000;`) } catch { /* ignore */ }
729
740
  } // end: if (!opts.readonly) writer init
730
741
 
731
742
  const insertSpan = db.query(`
@@ -774,21 +785,94 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
774
785
  GROUP BY trace_id
775
786
  `)
776
787
 
777
- // One-time full rebuild of the trace_summaries table at open so
778
- // any drift from interrupted ingests gets reconciled. Writer-only
779
- // because the DELETE + INSERT would fail on a readonly connection
780
- // (and would fight the daemon's writer for the lock anyway).
781
- if (!opts.readonly) {
782
- db.query(`DELETE FROM trace_summaries`).run()
783
- rebuildTraceSummaries.run()
784
- }
788
+ const reconcileTraceSummaries = Effect.sync(() => {
789
+ try {
790
+ db.query(`DELETE FROM trace_summaries`).run()
791
+ rebuildTraceSummaries.run()
792
+ } catch (err) {
793
+ if (!isSqliteLockError(err)) throw err
794
+ console.warn(`motel: trace summary rebuild skipped during startup: ${(err as Error).message}`)
795
+ }
796
+ })
785
797
 
786
798
  const deleteSpanAttributes = db.query(`DELETE FROM span_attributes WHERE trace_id = ? AND span_id = ?`)
787
799
  const insertSpanAttribute = db.query(`INSERT INTO span_attributes (trace_id, span_id, key, value) VALUES (?, ?, ?, ?)`)
800
+ const spanAttributeInsertManyByCount = new Map<number, ReturnType<Database["query"]>>()
801
+ const insertSpanAttributesMany = (traceId: string, spanId: string, attributes: Readonly<Record<string, string>>) => {
802
+ const entries = Object.entries(attributes)
803
+ if (entries.length === 0) return
804
+ if (entries.length === 1) {
805
+ const [key, value] = entries[0]!
806
+ insertSpanAttribute.run(traceId, spanId, key, value)
807
+ return
808
+ }
809
+ let query = spanAttributeInsertManyByCount.get(entries.length)
810
+ if (!query) {
811
+ query = db.query(`INSERT INTO span_attributes (trace_id, span_id, key, value) VALUES ${entries.map(() => "(?, ?, ?, ?)").join(", ")}`)
812
+ spanAttributeInsertManyByCount.set(entries.length, query)
813
+ }
814
+ query.run(...entries.flatMap(([key, value]) => [traceId, spanId, key, value]))
815
+ }
788
816
  const deleteSpanOperationSearch = db.query(`DELETE FROM span_operation_fts WHERE trace_id = ? AND span_id = ?`)
789
817
  const insertSpanOperationSearch = db.query(`INSERT INTO span_operation_fts (trace_id, span_id, operation_name) VALUES (?, ?, ?)`)
818
+ const deleteSpanOperationSearchManyByCount = new Map<number, ReturnType<Database["query"]>>()
819
+ const insertSpanOperationSearchManyByCount = new Map<number, ReturnType<Database["query"]>>()
820
+ const updateSpanOperationSearchMany = (operations: ReadonlyArray<readonly [string, string, string]>) => {
821
+ if (operations.length === 0) return
822
+ if (operations.length === 1) {
823
+ const [traceId, spanId, operationName] = operations[0]!
824
+ deleteSpanOperationSearch.run(traceId, spanId)
825
+ insertSpanOperationSearch.run(traceId, spanId, operationName)
826
+ return
827
+ }
828
+
829
+ let deleteQuery = deleteSpanOperationSearchManyByCount.get(operations.length)
830
+ if (!deleteQuery) {
831
+ deleteQuery = db.query(`DELETE FROM span_operation_fts WHERE ${operations.map(() => "(trace_id = ? AND span_id = ?)").join(" OR ")}`)
832
+ deleteSpanOperationSearchManyByCount.set(operations.length, deleteQuery)
833
+ }
834
+ deleteQuery.run(...operations.flatMap(([traceId, spanId]) => [traceId, spanId]))
835
+
836
+ let insertQuery = insertSpanOperationSearchManyByCount.get(operations.length)
837
+ if (!insertQuery) {
838
+ insertQuery = db.query(`INSERT INTO span_operation_fts (trace_id, span_id, operation_name) VALUES ${operations.map(() => "(?, ?, ?)").join(", ")}`)
839
+ insertSpanOperationSearchManyByCount.set(operations.length, insertQuery)
840
+ }
841
+ insertQuery.run(...operations.flatMap(([traceId, spanId, operationName]) => [traceId, spanId, operationName]))
842
+ }
790
843
  const insertLogAttribute = db.query(`INSERT INTO log_attributes (log_id, key, value) VALUES (?, ?, ?)`)
844
+ const logAttributeInsertManyByCount = new Map<number, ReturnType<Database["query"]>>()
845
+ const insertLogAttributesMany = (logId: number, attributes: Readonly<Record<string, string>>) => {
846
+ const entries = Object.entries(attributes)
847
+ if (entries.length === 0) return
848
+ if (entries.length === 1) {
849
+ const [key, value] = entries[0]!
850
+ insertLogAttribute.run(logId, key, value)
851
+ return
852
+ }
853
+ let query = logAttributeInsertManyByCount.get(entries.length)
854
+ if (!query) {
855
+ query = db.query(`INSERT INTO log_attributes (log_id, key, value) VALUES ${entries.map(() => "(?, ?, ?)").join(", ")}`)
856
+ logAttributeInsertManyByCount.set(entries.length, query)
857
+ }
858
+ query.run(...entries.flatMap(([key, value]) => [logId, key, value]))
859
+ }
791
860
  const insertLogBodySearch = db.query(`INSERT INTO log_body_fts (log_id, body) VALUES (?, ?)`)
861
+ const insertLogBodySearchManyByCount = new Map<number, ReturnType<Database["query"]>>()
862
+ const insertLogBodySearchMany = (entries: ReadonlyArray<readonly [string, string]>) => {
863
+ if (entries.length === 0) return
864
+ if (entries.length === 1) {
865
+ const [logId, body] = entries[0]!
866
+ insertLogBodySearch.run(logId, body)
867
+ return
868
+ }
869
+ let query = insertLogBodySearchManyByCount.get(entries.length)
870
+ if (!query) {
871
+ query = db.query(`INSERT INTO log_body_fts (log_id, body) VALUES ${entries.map(() => "(?, ?)").join(", ")}`)
872
+ insertLogBodySearchManyByCount.set(entries.length, query)
873
+ }
874
+ query.run(...entries.flatMap(([logId, body]) => [logId, body]))
875
+ }
792
876
 
793
877
  const maxDbSizeBytes = config.otel.maxDbSizeMb * 1024 * 1024
794
878
 
@@ -887,6 +971,12 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
887
971
  // daemon). The ingest worker and TUI skip it to avoid two writers
888
972
  // competing for the write lock with overlapping DELETE passes.
889
973
  if (opts.runRetention) {
974
+ // Reconcile any summary drift from interrupted ingests, but do it
975
+ // after the server becomes healthy. Running this synchronously at
976
+ // open can sit behind another writer's lock for ~15s and make the
977
+ // daemon look hung even though the port is already bound.
978
+ yield* Effect.forkScoped(reconcileTraceSummaries)
979
+
890
980
  // Enable incremental vacuum so retention can reclaim freed
891
981
  // pages over time instead of needing a stop-the-world VACUUM.
892
982
  // Idempotent: repeat calls after the first are no-ops.
@@ -943,6 +1033,7 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
943
1033
  let insertedSpans = 0
944
1034
  const transaction = db.transaction((request: OtlpTraceExportRequest) => {
945
1035
  const touchedTraceIds = new Set<string>()
1036
+ const touchedOperations: Array<readonly [string, string, string]> = []
946
1037
  for (const resourceSpans of request.resourceSpans ?? []) {
947
1038
  const resourceAttributes = attributeMap(resourceSpans.resource?.attributes)
948
1039
  const serviceName = resourceAttributes["service.name"] || resourceAttributes["service_name"] || "unknown"
@@ -978,20 +1069,21 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
978
1069
  JSON.stringify(events),
979
1070
  )
980
1071
  deleteSpanAttributes.run(span.traceId, span.spanId)
981
- for (const [key, value] of Object.entries(mergedAttributes)) {
982
- insertSpanAttribute.run(span.traceId, span.spanId, key, value)
983
- }
984
- try {
985
- deleteSpanOperationSearch.run(span.traceId, span.spanId)
986
- insertSpanOperationSearch.run(span.traceId, span.spanId, span.name ?? "unknown")
987
- } catch {
988
- // FTS is optional.
989
- }
1072
+ insertSpanAttributesMany(span.traceId, span.spanId, mergedAttributes)
1073
+ touchedOperations.push([span.traceId, span.spanId, span.name ?? "unknown"])
990
1074
  touchedTraceIds.add(span.traceId)
991
1075
  insertedSpans += 1
992
1076
  }
993
1077
  }
994
1078
  }
1079
+ try {
1080
+ const BATCH_SIZE = 500
1081
+ for (let offset = 0; offset < touchedOperations.length; offset += BATCH_SIZE) {
1082
+ updateSpanOperationSearchMany(touchedOperations.slice(offset, offset + BATCH_SIZE))
1083
+ }
1084
+ } catch {
1085
+ // FTS is optional.
1086
+ }
995
1087
  for (const traceId of touchedTraceIds) {
996
1088
  upsertTraceSummary.run(traceId)
997
1089
  }
@@ -1006,6 +1098,7 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1006
1098
  return yield* Effect.sync(() => {
1007
1099
  let insertedLogs = 0
1008
1100
  const transaction = db.transaction((request: OtlpLogExportRequest) => {
1101
+ const touchedLogBodies: Array<readonly [string, string]> = []
1009
1102
  for (const resourceLogs of request.resourceLogs ?? []) {
1010
1103
  const resourceAttributes = attributeMap(resourceLogs.resource?.attributes)
1011
1104
  const serviceName = resourceAttributes["service.name"] || resourceAttributes["service_name"] || "unknown"
@@ -1030,18 +1123,20 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1030
1123
  JSON.stringify(resourceAttributes),
1031
1124
  )
1032
1125
  const logId = Number((result as { lastInsertRowid: number | bigint }).lastInsertRowid)
1033
- for (const [key, value] of Object.entries(mergedAttributes)) {
1034
- insertLogAttribute.run(logId, key, value)
1035
- }
1036
- try {
1037
- insertLogBodySearch.run(String(logId), body)
1038
- } catch {
1039
- // FTS is optional.
1040
- }
1126
+ insertLogAttributesMany(logId, mergedAttributes)
1127
+ touchedLogBodies.push([String(logId), body])
1041
1128
  insertedLogs += 1
1042
1129
  }
1043
1130
  }
1044
1131
  }
1132
+ try {
1133
+ const BATCH_SIZE = 500
1134
+ for (let offset = 0; offset < touchedLogBodies.length; offset += BATCH_SIZE) {
1135
+ insertLogBodySearchMany(touchedLogBodies.slice(offset, offset + BATCH_SIZE))
1136
+ }
1137
+ } catch {
1138
+ // FTS is optional.
1139
+ }
1045
1140
  })
1046
1141
 
1047
1142
  transaction(payload)
@@ -1264,6 +1359,8 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1264
1359
  const candidateLimit = hasContainsFilters ? Math.max(limit * 20, 500) : Math.max(limit * 10, 200)
1265
1360
 
1266
1361
  return yield* Effect.sync(() => {
1362
+ let fromSql = "FROM spans AS s"
1363
+ const joinParams: Array<string | number> = []
1267
1364
  const clauses: string[] = ["s.start_time_ms >= ?"]
1268
1365
  const params: Array<string | number> = [cutoff]
1269
1366
 
@@ -1278,8 +1375,8 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1278
1375
  if (input.operation) {
1279
1376
  const ftsQuery = toFtsMatchQuery(input.operation)
1280
1377
  if (hasFts && ftsQuery) {
1281
- clauses.push("EXISTS (SELECT 1 FROM span_operation_fts WHERE span_operation_fts.trace_id = s.trace_id AND span_operation_fts.span_id = s.span_id AND span_operation_fts MATCH ?)")
1282
- params.push(ftsQuery)
1378
+ fromSql += ` INNER JOIN (SELECT trace_id, span_id FROM span_operation_fts WHERE span_operation_fts MATCH ?) AS span_operation_match ON span_operation_match.trace_id = s.trace_id AND span_operation_match.span_id = s.span_id`
1379
+ joinParams.push(ftsQuery)
1283
1380
  } else {
1284
1381
  clauses.push("s.operation_name LIKE ? COLLATE NOCASE")
1285
1382
  params.push(`%${input.operation}%`)
@@ -1303,42 +1400,90 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1303
1400
  }
1304
1401
 
1305
1402
  const rows = db.query(`
1306
- SELECT trace_id, span_id
1307
- FROM spans AS s
1403
+ SELECT *
1404
+ ${fromSql}
1308
1405
  WHERE ${clauses.join(" AND ")}
1309
1406
  ORDER BY s.start_time_ms DESC
1310
1407
  LIMIT ?
1311
- `).all(...params, candidateLimit) as Array<{ trace_id: string; span_id: string }>
1408
+ `).all(...joinParams, ...params, candidateLimit) as SpanRow[]
1312
1409
 
1313
1410
  const traceIds = [...new Set(rows.map((row) => row.trace_id))]
1314
1411
  if (traceIds.length === 0) return [] as readonly SpanItem[]
1315
1412
 
1413
+ const keyOf = (traceId: string, spanId: string) => `${traceId}:${spanId}`
1414
+ const spanContextById = new Map<string, { readonly parentSpanId: string | null; readonly operationName: string }>()
1415
+ for (const row of rows) {
1416
+ spanContextById.set(keyOf(row.trace_id, row.span_id), {
1417
+ parentSpanId: row.parent_span_id,
1418
+ operationName: row.operation_name,
1419
+ })
1420
+ }
1421
+
1316
1422
  const placeholders = traceIds.map(() => "?").join(", ")
1317
- const spanRows = db.query(`
1318
- SELECT * FROM spans
1319
- WHERE trace_id IN (${placeholders})
1423
+ const rootRows = db.query(`
1424
+ SELECT trace_id, operation_name
1425
+ FROM spans
1426
+ WHERE trace_id IN (${placeholders}) AND parent_span_id IS NULL
1320
1427
  ORDER BY start_time_ms ASC
1321
- `).all(...traceIds) as SpanRow[]
1322
-
1323
- const grouped = new Map<string, SpanRow[]>()
1324
- for (const row of spanRows) {
1325
- const group = grouped.get(row.trace_id) ?? []
1326
- group.push(row)
1327
- grouped.set(row.trace_id, group)
1428
+ `).all(...traceIds) as Array<{ trace_id: string; operation_name: string }>
1429
+ const rootOperationByTraceId = new Map<string, string>()
1430
+ for (const row of rootRows) {
1431
+ if (!rootOperationByTraceId.has(row.trace_id)) {
1432
+ rootOperationByTraceId.set(row.trace_id, row.operation_name)
1433
+ }
1328
1434
  }
1329
1435
 
1330
- const itemById = new Map<string, SpanItem>()
1331
- for (const traceId of traceIds) {
1332
- const traceSpanRows = grouped.get(traceId)
1333
- if (!traceSpanRows) continue
1334
- for (const item of buildSpanItems(traceId, traceSpanRows)) {
1335
- itemById.set(`${item.traceId}:${item.span.spanId}`, item)
1436
+ const spanContextLookup = db.query(`
1437
+ SELECT parent_span_id, operation_name
1438
+ FROM spans
1439
+ WHERE trace_id = ? AND span_id = ?
1440
+ `)
1441
+
1442
+ const getSpanContext = (traceId: string, spanId: string) => {
1443
+ const key = keyOf(traceId, spanId)
1444
+ const cached = spanContextById.get(key)
1445
+ if (cached !== undefined) return cached
1446
+ const row = spanContextLookup.get(traceId, spanId) as { parent_span_id: string | null; operation_name: string } | null
1447
+ if (!row) return null
1448
+ const value = {
1449
+ parentSpanId: row.parent_span_id,
1450
+ operationName: row.operation_name,
1336
1451
  }
1452
+ spanContextById.set(key, value)
1453
+ return value
1454
+ }
1455
+
1456
+ const depthById = new Map<string, number>()
1457
+ const getDepth = (traceId: string, spanId: string, visiting = new Set<string>()): number => {
1458
+ const key = keyOf(traceId, spanId)
1459
+ const cached = depthById.get(key)
1460
+ if (cached !== undefined) return cached
1461
+ if (visiting.has(key)) return 0
1462
+ visiting.add(key)
1463
+ const context = getSpanContext(traceId, spanId)
1464
+ const depth = context?.parentSpanId ? getDepth(traceId, context.parentSpanId, visiting) + 1 : 0
1465
+ depthById.set(key, depth)
1466
+ return depth
1337
1467
  }
1338
1468
 
1339
1469
  return rows
1340
- .map((row) => itemById.get(`${row.trace_id}:${row.span_id}`))
1341
- .filter((item): item is SpanItem => item !== undefined)
1470
+ .map((row) => {
1471
+ const parentContext = row.parent_span_id ? getSpanContext(row.trace_id, row.parent_span_id) : null
1472
+ const parsedSpan = parseSpanRow(row)
1473
+ const span = {
1474
+ ...parsedSpan,
1475
+ depth: getDepth(row.trace_id, row.span_id),
1476
+ warnings: row.parent_span_id && !parentContext
1477
+ ? [`missing span ${row.parent_span_id} (1 child)`]
1478
+ : parsedSpan.warnings,
1479
+ }
1480
+ return {
1481
+ traceId: row.trace_id,
1482
+ rootOperationName: rootOperationByTraceId.get(row.trace_id) ?? span.operationName,
1483
+ parentOperationName: parentContext?.operationName ?? null,
1484
+ span,
1485
+ } satisfies SpanItem
1486
+ })
1342
1487
  .filter((item) => {
1343
1488
  if (input.parentOperation) {
1344
1489
  const needle = input.parentOperation.toLowerCase()
@@ -1648,7 +1793,6 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1648
1793
  })
1649
1794
 
1650
1795
  const listFacets = Effect.fn("motel/TelemetryStore.listFacets")(function* (input: FacetSearch) {
1651
-
1652
1796
  const cutoff = (yield* Clock.currentTimeMillis) - (input.lookbackMinutes ?? config.otel.traceLookbackMinutes) * 60 * 1000
1653
1797
  const limit = input.limit ?? 20
1654
1798
 
@@ -1738,21 +1882,26 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1738
1882
  // FACET_VALUE_MAX_LEN. For opencode this hides `ai.prompt`,
1739
1883
  // `ai.prompt.messages`, and `ai.prompt.tools` — which are 1-6MB text
1740
1884
  // blobs that you'd never want to filter by exact match anyway. The
1741
- // WHERE clause lets SQLite skip reading those pages from disk, taking
1742
- // the picker open time from ~1.2s to ~370ms on a 2GB database.
1885
+ // WHERE clause lets SQLite skip reading those pages from disk. We also
1886
+ // dedupe to one (trace, key, value) row before grouping so repeated
1887
+ // span-level duplicates don't blow up the temp B-trees used for the
1888
+ // picker ranking query.
1743
1889
  const params: Array<string | number> = [FACET_VALUE_MAX_LEN, cutoff]
1744
1890
  if (input.serviceName) params.push(input.serviceName)
1745
1891
  params.push(limit)
1746
1892
  const rows = db.query(`
1747
- SELECT sa.key AS value,
1748
- COUNT(DISTINCT sa.trace_id) AS count,
1749
- COUNT(DISTINCT sa.value) AS distinct_values
1750
- FROM span_attributes sa
1751
- JOIN spans s ON s.trace_id = sa.trace_id AND s.span_id = sa.span_id
1752
- WHERE LENGTH(sa.value) < ?
1753
- AND s.start_time_ms >= ?
1754
- ${input.serviceName ? "AND s.service_name = ?" : ""}
1755
- GROUP BY sa.key
1893
+ SELECT scoped.key AS value,
1894
+ COUNT(DISTINCT scoped.trace_id) AS count,
1895
+ COUNT(DISTINCT scoped.value) AS distinct_values
1896
+ FROM (
1897
+ SELECT DISTINCT sa.trace_id, sa.key, sa.value
1898
+ FROM span_attributes sa
1899
+ JOIN trace_summaries ts ON ts.trace_id = sa.trace_id
1900
+ WHERE LENGTH(sa.value) < ?
1901
+ AND ts.started_at_ms >= ?
1902
+ ${input.serviceName ? "AND ts.service_name = ?" : ""}
1903
+ ) AS scoped
1904
+ GROUP BY scoped.key
1756
1905
  ORDER BY (CASE WHEN distinct_values = 1 THEN 1 ELSE 0 END) ASC,
1757
1906
  distinct_values DESC,
1758
1907
  count DESC,
@@ -6,9 +6,9 @@ export class TraceQueryService extends Context.Service<
6
6
  TraceQueryService,
7
7
  {
8
8
  readonly listServices: Effect.Effect<readonly string[], Error>
9
- readonly listRecentTraces: (serviceName: string, options?: { readonly lookbackMinutes?: number; readonly limit?: number }) => Effect.Effect<readonly TraceItem[], Error>
10
- readonly listTraceSummaries: (serviceName: string, options?: { readonly lookbackMinutes?: number; readonly limit?: number }) => Effect.Effect<readonly TraceSummaryItem[], Error>
11
- readonly searchTraceSummaries: (input: { readonly serviceName?: string | null; readonly operation?: string | null; readonly status?: "ok" | "error" | null; readonly minDurationMs?: number | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>>; readonly aiText?: string | null }) => Effect.Effect<readonly TraceSummaryItem[], Error>
9
+ readonly listRecentTraces: (serviceName: string, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceItem[], Error>
10
+ readonly listTraceSummaries: (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceSummaryItem[], Error>
11
+ readonly searchTraceSummaries: (input: { readonly serviceName?: string | null; readonly operation?: string | null; readonly status?: "ok" | "error" | null; readonly minDurationMs?: number | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>>; readonly aiText?: string | null; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceSummaryItem[], Error>
12
12
  readonly listFacets: (input: { readonly type: "traces" | "logs"; readonly field: string; readonly serviceName?: string | null; readonly key?: string | null; readonly lookbackMinutes?: number; readonly limit?: number }) => Effect.Effect<readonly { readonly value: string; readonly count: number }[], Error>
13
13
  readonly searchTraces: (input: { readonly serviceName?: string | null; readonly operation?: string | null; readonly status?: "ok" | "error" | null; readonly minDurationMs?: number | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>> }) => Effect.Effect<readonly TraceItem[], Error>
14
14
  readonly traceStats: (input: { readonly groupBy: string; readonly agg: "count" | "avg_duration" | "p95_duration" | "error_rate"; readonly serviceName?: string | null; readonly operation?: string | null; readonly status?: "ok" | "error" | null; readonly minDurationMs?: number | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>> }) => Effect.Effect<readonly { readonly group: string; readonly value: number; readonly count: number }[], Error>
@@ -16,7 +16,7 @@ export class TraceQueryService extends Context.Service<
16
16
  readonly getSpan: (spanId: string) => Effect.Effect<SpanItem | null, Error>
17
17
  readonly getAiCall: (spanId: string) => Effect.Effect<AiCallDetail | null, Error>
18
18
  readonly listTraceSpans: (traceId: string) => Effect.Effect<readonly SpanItem[], Error>
19
- readonly searchSpans: (input: { readonly serviceName?: string | null; readonly operation?: string | null; readonly parentOperation?: string | null; readonly status?: "ok" | "error" | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>> }) => Effect.Effect<readonly SpanItem[], Error>
19
+ readonly searchSpans: (input: { readonly serviceName?: string | null; readonly traceId?: string | null; readonly operation?: string | null; readonly parentOperation?: string | null; readonly status?: "ok" | "error" | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>>; readonly attributeContainsFilters?: Readonly<Record<string, string>> }) => Effect.Effect<readonly SpanItem[], Error>
20
20
  }
21
21
  >()("motel/TraceQueryService") {}
22
22
 
@@ -31,7 +31,7 @@ export const TraceQueryServiceLive = Layer.effect(
31
31
  return services
32
32
  })()
33
33
 
34
- const listRecentTraces = Effect.fn("motel/TraceQueryService.listRecentTraces")(function* (serviceName: string, options?: { readonly lookbackMinutes?: number; readonly limit?: number }) {
34
+ const listRecentTraces = Effect.fn("motel/TraceQueryService.listRecentTraces")(function* (serviceName: string, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) {
35
35
  yield* Effect.annotateCurrentSpan({
36
36
  "trace.service_name": serviceName,
37
37
  })
@@ -40,7 +40,7 @@ export const TraceQueryServiceLive = Layer.effect(
40
40
  return traces
41
41
  })
42
42
 
43
- const listTraceSummaries = Effect.fn("motel/TraceQueryService.listTraceSummaries")(function* (serviceName: string, options?: { readonly lookbackMinutes?: number; readonly limit?: number }) {
43
+ const listTraceSummaries = Effect.fn("motel/TraceQueryService.listTraceSummaries")(function* (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) {
44
44
  yield* Effect.annotateCurrentSpan({
45
45
  "trace.service_name": serviceName,
46
46
  })