@kitlangton/motel 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +11 -8
- package/README.md +13 -2
- package/package.json +31 -19
- package/skills/motel-debug/SKILL.md +203 -0
- package/skills/motel-debug/references/effect.md +38 -0
- package/src/App.tsx +3 -5
- package/src/StartupGate.tsx +8 -10
- package/src/cli.ts +15 -16
- package/src/config.ts +7 -1
- package/src/daemon.test.ts +332 -51
- package/src/daemon.ts +103 -152
- package/src/httpApi.ts +1 -0
- package/src/httpListPolicy.test.ts +76 -0
- package/src/httpListPolicy.ts +129 -0
- package/src/localServer.ts +194 -323
- package/src/mcp.ts +2 -1
- package/src/opentui-jsx.d.ts +11 -0
- package/src/otlp.test.ts +65 -0
- package/src/otlp.ts +20 -0
- package/src/otlpProtobuf.ts +35 -0
- package/src/registry.ts +37 -11
- package/src/runtime.ts +2 -6
- package/src/services/AsyncIngest.ts +20 -8
- package/src/services/LogQueryService.ts +11 -25
- package/src/services/TelemetryQuery.ts +62 -0
- package/src/services/TelemetryStore.ts +433 -249
- package/src/services/TraceQueryService.ts +18 -52
- package/src/services/ingestRpc.ts +2 -4
- package/src/services/queryRpc.ts +15 -0
- package/src/services/telemetryQueryWorker.ts +32 -0
- package/src/services/telemetryWorker.ts +5 -8
- package/src/storybook/aiChatStory.tsx +1 -1
- package/src/telemetry.test.ts +307 -41
- package/src/ui/AiChatView.tsx +1 -1
- package/src/ui/AttrFilterModal.tsx +1 -1
- package/src/ui/ServiceLogs.tsx +10 -7
- package/src/ui/SpanContentView.tsx +24 -21
- package/src/ui/TraceDetailsPane.tsx +1 -1
- package/src/ui/TraceList.tsx +1 -1
- package/src/ui/aiState.ts +10 -22
- package/src/ui/app/TraceWorkspace.tsx +2 -1
- package/src/ui/app/useAppLayout.ts +1 -1
- package/src/ui/app/useTraceScreenData.ts +22 -18
- package/src/ui/cachedLoader.test.ts +23 -0
- package/src/ui/cachedLoader.ts +60 -0
- package/src/ui/loaders.ts +34 -53
- package/src/ui/primitives.tsx +1 -1
- package/src/ui/state.ts +2 -0
- package/src/ui/traceDetailsWidth.repro.test.ts +12 -1
- package/src/ui/traceSortNav.repro.seed.ts +1 -1
- package/src/ui/traceSortNav.repro.test.ts +12 -2
- package/src/ui/useAttrFilterPicker.ts +10 -8
- package/src/ui/useKeyboardNav.ts +3 -6
- package/src/ui/waterfallNav.repro.seed.ts +1 -1
- package/src/ui/waterfallNav.repro.test.ts +16 -8
- package/web/dist/assets/index-B01z9BaO.css +2 -0
- package/web/dist/assets/index-M86tcih5.js +22 -0
- package/web/dist/index.html +2 -2
- package/web/dist/assets/index-DnyVo03x.js +0 -27
- package/web/dist/assets/index-DzuHNBGV.css +0 -2
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { Database } from "bun:sqlite"
|
|
2
|
-
import
|
|
2
|
+
import * as BunFileSystem from "@effect/platform-bun/BunFileSystem"
|
|
3
3
|
import { dirname } from "node:path"
|
|
4
|
-
import { Clock, Effect, Layer, Schedule, Context } from "effect"
|
|
4
|
+
import { Cause, Clock, Effect, FileSystem, Layer, Schedule, Context } from "effect"
|
|
5
5
|
import { config } from "../config.js"
|
|
6
6
|
import type { AiCallDetail, AiCallSummary, FacetItem, LogItem, SpanItem, StatsItem, TraceItem, TraceSummaryItem, TraceSpanEvent, TraceSpanItem } from "../domain.js"
|
|
7
7
|
import { AI_ATTR_MAP, AI_FTS_KEYS, AI_TEXT_SEARCH_KEYS, truncatePreview } from "../domain.js"
|
|
8
|
-
import { attributeMap, nanosToMilliseconds, parseAnyValue, spanKindLabel, spanStatusLabel, stringifyValue, type OtlpLogExportRequest, type OtlpTraceExportRequest } from "../otlp.js"
|
|
8
|
+
import { attributeMap, nanosToMilliseconds, normalizeOtlpBinaryId, parseAnyValue, spanKindLabel, spanStatusLabel, stringifyValue, type OtlpLogExportRequest, type OtlpTraceExportRequest } from "../otlp.js"
|
|
9
9
|
|
|
10
10
|
const isSqliteLockError = (error: unknown) =>
|
|
11
11
|
error instanceof Error && /(database is locked|database table is locked|SQLITE_BUSY)/i.test(error.message)
|
|
@@ -196,18 +196,40 @@ const TRACE_SUMMARY_SELECT_SQL = `
|
|
|
196
196
|
FROM spans
|
|
197
197
|
`
|
|
198
198
|
|
|
199
|
+
// Memoize small repeated JSON records. Resource attributes are the primary
|
|
200
|
+
// beneficiary because many spans share the same serialized value; compact
|
|
201
|
+
// repeated span attributes also benefit while large unique payloads bypass
|
|
202
|
+
// the cache to keep memory bounded for long-running daemons.
|
|
203
|
+
const RECORD_PARSE_CACHE_MAX_VALUE_LEN = 1024
|
|
204
|
+
const RECORD_PARSE_CACHE_LIMIT = 256
|
|
205
|
+
const recordParseCache = new Map<string, Record<string, string>>()
|
|
206
|
+
const EMPTY_RECORD: Record<string, string> = {}
|
|
207
|
+
|
|
199
208
|
const parseRecord = (value: string): Record<string, string> => {
|
|
209
|
+
if (value === "" || value === "{}") return EMPTY_RECORD
|
|
210
|
+
const cacheable = value.length <= RECORD_PARSE_CACHE_MAX_VALUE_LEN
|
|
211
|
+
if (cacheable) {
|
|
212
|
+
const cached = recordParseCache.get(value)
|
|
213
|
+
if (cached !== undefined) return cached
|
|
214
|
+
}
|
|
215
|
+
let parsed: Record<string, string>
|
|
200
216
|
try {
|
|
201
|
-
const
|
|
202
|
-
|
|
217
|
+
const json = JSON.parse(value) as Record<string, unknown>
|
|
218
|
+
parsed = Object.fromEntries(Object.entries(json).map(([key, entry]) => [key, stringifyValue(entry)]))
|
|
203
219
|
} catch {
|
|
204
|
-
|
|
220
|
+
parsed = EMPTY_RECORD
|
|
221
|
+
}
|
|
222
|
+
if (cacheable && recordParseCache.size < RECORD_PARSE_CACHE_LIMIT) {
|
|
223
|
+
recordParseCache.set(value, parsed)
|
|
205
224
|
}
|
|
225
|
+
return parsed
|
|
206
226
|
}
|
|
207
227
|
|
|
208
228
|
const parseEvents = (value: string): readonly TraceSpanEvent[] => {
|
|
229
|
+
if (value === "" || value === "[]") return []
|
|
209
230
|
try {
|
|
210
231
|
const parsed = JSON.parse(value) as Array<{ name: string; timestamp: number; attributes: Record<string, string> }>
|
|
232
|
+
if (parsed.length === 0) return []
|
|
211
233
|
return parsed.map((event) => ({
|
|
212
234
|
name: event.name,
|
|
213
235
|
timestamp: new Date(event.timestamp),
|
|
@@ -423,29 +445,41 @@ const buildContainsAttributeMatchSubquery = (
|
|
|
423
445
|
}
|
|
424
446
|
}
|
|
425
447
|
|
|
448
|
+
// Read-only surface of the telemetry store. Pulled out so a readonly
|
|
449
|
+
// SQLite connection (TUI / HTTP query handlers) can be expressed as a
|
|
450
|
+
// distinct service identifier from the writer, without re-declaring
|
|
451
|
+
// every query in a wrapper layer. The writer's value still satisfies
|
|
452
|
+
// this shape — TelemetryStoreLive can provide both identifiers from
|
|
453
|
+
// one underlying object if needed.
|
|
454
|
+
export interface TelemetryStoreReader {
|
|
455
|
+
readonly listServices: Effect.Effect<readonly string[], Error>
|
|
456
|
+
readonly listRecentTraces: (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceItem[], Error>
|
|
457
|
+
readonly listTraceSummaries: (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceSummaryItem[], Error>
|
|
458
|
+
readonly searchTraces: (input: TraceSearch) => Effect.Effect<readonly TraceItem[], Error>
|
|
459
|
+
readonly searchTraceSummaries: (input: TraceSearch) => Effect.Effect<readonly TraceSummaryItem[], Error>
|
|
460
|
+
readonly traceStats: (input: TraceStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
|
|
461
|
+
readonly getTrace: (traceId: string) => Effect.Effect<TraceItem | null, Error>
|
|
462
|
+
readonly getSpan: (spanId: string) => Effect.Effect<SpanItem | null, Error>
|
|
463
|
+
readonly listTraceSpans: (traceId: string) => Effect.Effect<readonly SpanItem[], Error>
|
|
464
|
+
readonly searchSpans: (input: SpanSearch) => Effect.Effect<readonly SpanItem[], Error>
|
|
465
|
+
readonly searchLogs: (input: LogSearch) => Effect.Effect<readonly LogItem[], Error>
|
|
466
|
+
readonly logStats: (input: LogStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
|
|
467
|
+
readonly listFacets: (input: FacetSearch) => Effect.Effect<readonly FacetItem[], Error>
|
|
468
|
+
readonly listRecentLogs: (serviceName: string) => Effect.Effect<readonly LogItem[], Error>
|
|
469
|
+
readonly listTraceLogs: (traceId: string) => Effect.Effect<readonly LogItem[], Error>
|
|
470
|
+
readonly searchAiCalls: (input: AiCallSearch) => Effect.Effect<readonly AiCallSummary[], Error>
|
|
471
|
+
readonly getAiCall: (spanId: string) => Effect.Effect<AiCallDetail | null, Error>
|
|
472
|
+
readonly aiCallStats: (input: AiCallStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
export class TelemetryStoreReadonly extends Context.Service<TelemetryStoreReadonly, TelemetryStoreReader>()("motel/TelemetryStoreReadonly") {}
|
|
476
|
+
|
|
426
477
|
export class TelemetryStore extends Context.Service<
|
|
427
478
|
TelemetryStore,
|
|
428
|
-
{
|
|
479
|
+
TelemetryStoreReader & {
|
|
429
480
|
readonly ingestTraces: (payload: OtlpTraceExportRequest) => Effect.Effect<{ readonly insertedSpans: number }, Error>
|
|
430
481
|
readonly ingestLogs: (payload: OtlpLogExportRequest) => Effect.Effect<{ readonly insertedLogs: number }, Error>
|
|
431
|
-
readonly
|
|
432
|
-
readonly listRecentTraces: (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceItem[], Error>
|
|
433
|
-
readonly listTraceSummaries: (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceSummaryItem[], Error>
|
|
434
|
-
readonly searchTraces: (input: TraceSearch) => Effect.Effect<readonly TraceItem[], Error>
|
|
435
|
-
readonly searchTraceSummaries: (input: TraceSearch) => Effect.Effect<readonly TraceSummaryItem[], Error>
|
|
436
|
-
readonly traceStats: (input: TraceStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
|
|
437
|
-
readonly getTrace: (traceId: string) => Effect.Effect<TraceItem | null, Error>
|
|
438
|
-
readonly getSpan: (spanId: string) => Effect.Effect<SpanItem | null, Error>
|
|
439
|
-
readonly listTraceSpans: (traceId: string) => Effect.Effect<readonly SpanItem[], Error>
|
|
440
|
-
readonly searchSpans: (input: SpanSearch) => Effect.Effect<readonly SpanItem[], Error>
|
|
441
|
-
readonly searchLogs: (input: LogSearch) => Effect.Effect<readonly LogItem[], Error>
|
|
442
|
-
readonly logStats: (input: LogStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
|
|
443
|
-
readonly listFacets: (input: FacetSearch) => Effect.Effect<readonly FacetItem[], Error>
|
|
444
|
-
readonly listRecentLogs: (serviceName: string) => Effect.Effect<readonly LogItem[], Error>
|
|
445
|
-
readonly listTraceLogs: (traceId: string) => Effect.Effect<readonly LogItem[], Error>
|
|
446
|
-
readonly searchAiCalls: (input: AiCallSearch) => Effect.Effect<readonly AiCallSummary[], Error>
|
|
447
|
-
readonly getAiCall: (spanId: string) => Effect.Effect<AiCallDetail | null, Error>
|
|
448
|
-
readonly aiCallStats: (input: AiCallStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
|
|
482
|
+
readonly runRetentionNow: Effect.Effect<void, Error>
|
|
449
483
|
}
|
|
450
484
|
>()("motel/TelemetryStore") {}
|
|
451
485
|
|
|
@@ -462,18 +496,17 @@ export class TelemetryStore extends Context.Service<
|
|
|
462
496
|
*
|
|
463
497
|
* - `runRetention` — fork the background cleanup loop (age + size cap
|
|
464
498
|
* eviction, WAL checkpoint). Only one process should own this at a
|
|
465
|
-
* time.
|
|
466
|
-
* worker and the TUI skip it.
|
|
499
|
+
* time. The ingest worker owns it; the HTTP thread and TUI skip it.
|
|
467
500
|
*/
|
|
468
501
|
export interface TelemetryStoreOptions {
|
|
469
502
|
readonly readonly: boolean
|
|
470
503
|
readonly runRetention: boolean
|
|
471
504
|
}
|
|
472
505
|
|
|
473
|
-
|
|
474
|
-
TelemetryStore,
|
|
506
|
+
const makeTelemetryStoreEffect = (opts: TelemetryStoreOptions) =>
|
|
475
507
|
Effect.gen(function* () {
|
|
476
|
-
|
|
508
|
+
const fileSystem = yield* FileSystem.FileSystem
|
|
509
|
+
yield* fileSystem.makeDirectory(dirname(config.otel.databasePath), { recursive: true })
|
|
477
510
|
const db = yield* Effect.acquireRelease(
|
|
478
511
|
Effect.sync(() => new Database(config.otel.databasePath, {
|
|
479
512
|
create: !opts.readonly,
|
|
@@ -517,6 +550,13 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
517
550
|
-- SQLite silently caps at actual file size for smaller DBs.
|
|
518
551
|
PRAGMA mmap_size = 268435456;
|
|
519
552
|
`)
|
|
553
|
+
// auto_vacuum is a header-level setting: it only takes effect on
|
|
554
|
+
// an empty DB, or on the next VACUUM after a change. Setting it
|
|
555
|
+
// here, BEFORE the first CREATE TABLE, is the only path that
|
|
556
|
+
// makes incremental_vacuum work without a full VACUUM. For
|
|
557
|
+
// existing DBs that predate this setting keep their current mode;
|
|
558
|
+
// Motel never performs a surprise full-file VACUUM at startup.
|
|
559
|
+
try { db.exec(`PRAGMA auto_vacuum = INCREMENTAL;`) } catch { /* ignore */ }
|
|
520
560
|
try {
|
|
521
561
|
db.exec(`
|
|
522
562
|
PRAGMA journal_mode = WAL;
|
|
@@ -526,6 +566,13 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
526
566
|
-- this the WAL happily runs into the hundreds of MB and queries
|
|
527
567
|
-- start paying the cost of walking the WAL on every read.
|
|
528
568
|
PRAGMA wal_autocheckpoint = 4000;
|
|
569
|
+
-- Hard floor for the WAL file. Auto-checkpoint controls *when*
|
|
570
|
+
-- pages move out of the WAL; size_limit controls how much the
|
|
571
|
+
-- WAL file is allowed to grow on disk. 128MB is generous enough
|
|
572
|
+
-- to absorb a long write burst without blocking on truncation,
|
|
573
|
+
-- tight enough that a wedged retention loop can't hide a 20GB
|
|
574
|
+
-- WAL the way a default no-limit configuration can.
|
|
575
|
+
PRAGMA journal_size_limit = 134217728;
|
|
529
576
|
|
|
530
577
|
CREATE TABLE IF NOT EXISTS spans (
|
|
531
578
|
trace_id TEXT NOT NULL,
|
|
@@ -604,6 +651,11 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
604
651
|
|
|
605
652
|
CREATE INDEX IF NOT EXISTS idx_log_attributes_key_value ON log_attributes(key, value, log_id);
|
|
606
653
|
CREATE INDEX IF NOT EXISTS idx_log_attributes_log_id ON log_attributes(log_id);
|
|
654
|
+
|
|
655
|
+
CREATE TABLE IF NOT EXISTS motel_maintenance (
|
|
656
|
+
key TEXT PRIMARY KEY,
|
|
657
|
+
value TEXT NOT NULL
|
|
658
|
+
);
|
|
607
659
|
`)
|
|
608
660
|
} catch (err) {
|
|
609
661
|
if (!isSqliteLockError(err)) throw err
|
|
@@ -624,7 +676,8 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
624
676
|
} catch { hasFts = false }
|
|
625
677
|
try {
|
|
626
678
|
const row = db.query(`SELECT name FROM sqlite_master WHERE type='table' AND name='span_attr_fts'`).get()
|
|
627
|
-
|
|
679
|
+
const backfill = db.query(`SELECT value FROM motel_maintenance WHERE key = 'span_attr_fts_v1'`).get() as { value: string } | null
|
|
680
|
+
hasAttrFts = row !== null && backfill?.value === "complete"
|
|
628
681
|
} catch { hasAttrFts = false }
|
|
629
682
|
}
|
|
630
683
|
|
|
@@ -723,10 +776,6 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
723
776
|
// pay 3-4s on cold open instead of 400ms.
|
|
724
777
|
try {
|
|
725
778
|
db.exec(`PRAGMA analysis_limit = 1000; PRAGMA optimize;`)
|
|
726
|
-
// First-time databases won't have sqlite_stat1 until we run a
|
|
727
|
-
// real ANALYZE. Force it once if stats haven't been collected.
|
|
728
|
-
const hasStats = db.query(`SELECT 1 FROM sqlite_master WHERE name = 'sqlite_stat1' LIMIT 1`).get() !== null
|
|
729
|
-
if (!hasStats) db.exec(`ANALYZE;`)
|
|
730
779
|
} catch {
|
|
731
780
|
// ANALYZE / optimize failures are never fatal — queries still work,
|
|
732
781
|
// they just run with default row estimates.
|
|
@@ -777,22 +826,19 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
777
826
|
)
|
|
778
827
|
`)
|
|
779
828
|
|
|
780
|
-
const rebuildTraceSummaries = db.query(`
|
|
781
|
-
INSERT INTO trace_summaries (
|
|
782
|
-
trace_id, service_name, root_operation_name, started_at_ms, ended_at_ms, active_span_count, duration_ms, span_count, error_count
|
|
783
|
-
)
|
|
784
|
-
${TRACE_SUMMARY_SELECT_SQL}
|
|
785
|
-
GROUP BY trace_id
|
|
786
|
-
`)
|
|
787
|
-
|
|
788
829
|
const reconcileTraceSummaries = Effect.sync(() => {
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
830
|
+
const marker = db.query(`SELECT value FROM motel_maintenance WHERE key = 'trace_summary_cursor'`).get() as { value: string } | null
|
|
831
|
+
const cursor = Number(marker?.value ?? 0)
|
|
832
|
+
const rows = db.query(`SELECT rowid, trace_id FROM spans WHERE rowid > ? ORDER BY rowid ASC LIMIT ?`).all(cursor, config.otel.retentionTraceBatch) as Array<{ rowid: number; trace_id: string }>
|
|
833
|
+
if (rows.length === 0) {
|
|
834
|
+
db.query(`INSERT OR REPLACE INTO motel_maintenance(key, value) VALUES ('trace_summary_cursor', '0')`).run()
|
|
835
|
+
return
|
|
795
836
|
}
|
|
837
|
+
const transaction = db.transaction(() => {
|
|
838
|
+
for (const traceId of new Set(rows.map((row) => row.trace_id))) upsertTraceSummary.run(traceId)
|
|
839
|
+
db.query(`INSERT OR REPLACE INTO motel_maintenance(key, value) VALUES ('trace_summary_cursor', ?)`).run(String(rows.at(-1)!.rowid))
|
|
840
|
+
})
|
|
841
|
+
transaction()
|
|
796
842
|
})
|
|
797
843
|
|
|
798
844
|
const deleteSpanAttributes = db.query(`DELETE FROM span_attributes WHERE trace_id = ? AND span_id = ?`)
|
|
@@ -876,6 +922,61 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
876
922
|
|
|
877
923
|
const maxDbSizeBytes = config.otel.maxDbSizeMb * 1024 * 1024
|
|
878
924
|
|
|
925
|
+
// Freelist-ratio thresholds for the adaptive reclaim loop. Below the
|
|
926
|
+
// LOW threshold there's nothing worth doing; above HIGH we are in the
|
|
927
|
+
// 17GB-DB-with-10GB-freelist failure mode and need to reclaim aggressively
|
|
928
|
+
// even if it costs writer-lock time.
|
|
929
|
+
const FREELIST_LOW_RATIO = 0.05
|
|
930
|
+
const FREELIST_MID_RATIO = 0.20
|
|
931
|
+
const FREELIST_HIGH_RATIO = 0.50
|
|
932
|
+
const VACUUM_PAGES_NORMAL = 2000 // ~8MB/pass
|
|
933
|
+
const VACUUM_PAGES_BUSY = 20000 // ~80MB/pass — used when freelist > 20%
|
|
934
|
+
const VACUUM_PAGES_PANIC = 50000 // ~200MB/pass — only when ratio > 50%
|
|
935
|
+
|
|
936
|
+
const ftsTableNames = ["span_attr_fts", "log_body_fts", "span_operation_fts"] as const
|
|
937
|
+
|
|
938
|
+
const incrementalFtsMerge = (pages: number) => {
|
|
939
|
+
// FTS5 segment merges drop tombstone rows that DELETE leaves behind.
|
|
940
|
+
// Without periodic merges, deleted FTS rows stay on disk indefinitely
|
|
941
|
+
// — a major source of freelist pages on a heavy-deletion workload.
|
|
942
|
+
// `merge=N` is a bounded, online operation: it merges at most N
|
|
943
|
+
// pages of work and returns. Per FTS5 docs, missing tables silently
|
|
944
|
+
// throw; we swallow because not every DB has every FTS table.
|
|
945
|
+
for (const name of ftsTableNames) {
|
|
946
|
+
try { db.query(`INSERT INTO ${name}(${name}) VALUES (?)`).run(`merge=${pages}`) } catch { /* table absent or older schema */ }
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
const reclaimSpace = Effect.fn("motel/TelemetryStore.reclaimSpace")(function* () {
|
|
951
|
+
yield* Effect.sync(() => {
|
|
952
|
+
const pageCount = (db.query(`PRAGMA page_count`).get() as { page_count: number }).page_count
|
|
953
|
+
const freePages = (db.query(`PRAGMA freelist_count`).get() as { freelist_count: number }).freelist_count
|
|
954
|
+
if (pageCount === 0) return
|
|
955
|
+
const ratio = freePages / pageCount
|
|
956
|
+
if (ratio < FREELIST_LOW_RATIO) return
|
|
957
|
+
|
|
958
|
+
// Adaptive vacuum sizing — fixed 2000 pages/min could not keep
|
|
959
|
+
// up with sustained deletions, leaking 10GB of freelist over
|
|
960
|
+
// time. Scale the per-pass work to the size of the backlog so
|
|
961
|
+
// we stay roughly proportional to the deficit.
|
|
962
|
+
const pages =
|
|
963
|
+
ratio >= FREELIST_HIGH_RATIO ? VACUUM_PAGES_PANIC :
|
|
964
|
+
ratio >= FREELIST_MID_RATIO ? VACUUM_PAGES_BUSY :
|
|
965
|
+
VACUUM_PAGES_NORMAL
|
|
966
|
+
|
|
967
|
+
try { db.exec(`PRAGMA incremental_vacuum(${pages});`) } catch { /* ignore */ }
|
|
968
|
+
|
|
969
|
+
// In WAL mode incremental_vacuum only moves pages — the file
|
|
970
|
+
// shrinks on the next checkpoint. PASSIVE silently skips when
|
|
971
|
+
// readers are active (the failure mode the agent's research
|
|
972
|
+
// flagged: checkpoint starvation). Use RESTART normally and
|
|
973
|
+
// TRUNCATE in panic mode to physically shrink the WAL when it
|
|
974
|
+
// has grown.
|
|
975
|
+
const mode = ratio >= FREELIST_HIGH_RATIO ? "TRUNCATE" : "RESTART"
|
|
976
|
+
try { db.exec(`PRAGMA wal_checkpoint(${mode});`) } catch { /* ignore */ }
|
|
977
|
+
})
|
|
978
|
+
})
|
|
979
|
+
|
|
879
980
|
const cleanupExpired = Effect.fn("motel/TelemetryStore.cleanupExpired")(function* () {
|
|
880
981
|
const now = yield* Clock.currentTimeMillis
|
|
881
982
|
|
|
@@ -891,12 +992,12 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
891
992
|
|
|
892
993
|
// Time-based: completed traces whose last span ended before cutoff.
|
|
893
994
|
const timeExpired = db.query(
|
|
894
|
-
`SELECT trace_id FROM trace_summaries WHERE active_span_count = 0 AND ended_at_ms > 0 AND ended_at_ms < ?`,
|
|
895
|
-
).all(cutoff) as readonly { trace_id: string }[]
|
|
995
|
+
`SELECT trace_id FROM trace_summaries WHERE active_span_count = 0 AND ended_at_ms > 0 AND ended_at_ms < ? ORDER BY ended_at_ms ASC LIMIT ?`,
|
|
996
|
+
).all(cutoff, config.otel.retentionTraceBatch) as readonly { trace_id: string }[]
|
|
896
997
|
for (const row of timeExpired) toEvict.add(row.trace_id)
|
|
897
998
|
|
|
898
|
-
// Size-based: if actual data exceeds
|
|
899
|
-
//
|
|
999
|
+
// Size-based: if actual data exceeds the target, drop one bounded
|
|
1000
|
+
// batch of the oldest completed traces. `(page_count - freelist_count)`
|
|
900
1001
|
// ignores freed-but-not-vacuumed pages so a large freelist doesn't
|
|
901
1002
|
// trigger a deletion death spiral.
|
|
902
1003
|
const pageCount = (db.query(`PRAGMA page_count`).get() as { page_count: number }).page_count
|
|
@@ -904,22 +1005,22 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
904
1005
|
const pageSize = (db.query(`PRAGMA page_size`).get() as { page_size: number }).page_size
|
|
905
1006
|
const dbSize = (pageCount - freePages) * pageSize
|
|
906
1007
|
if (dbSize > maxDbSizeBytes) {
|
|
907
|
-
const completedCount = (db.query(
|
|
908
|
-
`SELECT COUNT(*) AS c FROM trace_summaries WHERE active_span_count = 0`,
|
|
909
|
-
).get() as { c: number }).c
|
|
910
|
-
const traceCutCount = Math.max(1, Math.floor(completedCount * 0.2))
|
|
911
1008
|
const oldest = db.query(
|
|
912
1009
|
`SELECT trace_id FROM trace_summaries WHERE active_span_count = 0 ORDER BY started_at_ms ASC LIMIT ?`,
|
|
913
|
-
).all(
|
|
1010
|
+
).all(config.otel.retentionTraceBatch) as readonly { trace_id: string }[]
|
|
914
1011
|
// Set.add dedupes overlap with the time-expired batch above.
|
|
915
1012
|
for (const row of oldest) toEvict.add(row.trace_id)
|
|
916
1013
|
}
|
|
917
1014
|
|
|
918
|
-
//
|
|
919
|
-
//
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
1015
|
+
// Logs have their own retention boundary. A correlated log may refer
|
|
1016
|
+
// to a trace that was sampled elsewhere or never reached Motel, so
|
|
1017
|
+
// tying log eviction to trace_summaries lets those rows grow forever.
|
|
1018
|
+
const expiredLogs = db.query(`DELETE FROM logs WHERE id IN (SELECT id FROM logs WHERE timestamp_ms < ? ORDER BY timestamp_ms ASC LIMIT ?)`).run(cutoff, config.otel.retentionLogBatch)
|
|
1019
|
+
let deletedLogs = Number(expiredLogs.changes) > 0
|
|
1020
|
+
if (dbSize > maxDbSizeBytes) {
|
|
1021
|
+
const oversizedLogs = db.query(`DELETE FROM logs WHERE id IN (SELECT id FROM logs ORDER BY timestamp_ms ASC LIMIT ?)`).run(config.otel.retentionLogBatch)
|
|
1022
|
+
deletedLogs = deletedLogs || Number(oversizedLogs.changes) > 0
|
|
1023
|
+
}
|
|
923
1024
|
|
|
924
1025
|
// Batch the trace-id list so the IN placeholders stay under
|
|
925
1026
|
// SQLite's default limit (~999). Each batch wipes every row
|
|
@@ -942,48 +1043,54 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
942
1043
|
|
|
943
1044
|
// Log-side orphans (log_attributes + FTS) are keyed by log.id,
|
|
944
1045
|
// so prune what no longer has a parent log row.
|
|
945
|
-
db.query(`DELETE FROM log_attributes WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = log_attributes.log_id)`).run()
|
|
1046
|
+
const orphanAttributes = db.query(`DELETE FROM log_attributes WHERE rowid IN (SELECT log_attributes.rowid FROM log_attributes WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = log_attributes.log_id) LIMIT ?)`).run(config.otel.retentionLogBatch)
|
|
1047
|
+
let deletedOrphans = Number(orphanAttributes.changes) > 0
|
|
946
1048
|
try {
|
|
947
|
-
db.query(`DELETE FROM log_body_fts WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = CAST(log_body_fts.log_id AS INTEGER))`).run()
|
|
1049
|
+
const orphanFts = db.query(`DELETE FROM log_body_fts WHERE rowid IN (SELECT rowid FROM log_body_fts WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = CAST(log_body_fts.log_id AS INTEGER)) LIMIT ?)`).run(config.otel.retentionLogBatch)
|
|
1050
|
+
deletedOrphans = deletedOrphans || Number(orphanFts.changes) > 0
|
|
948
1051
|
} catch {
|
|
949
1052
|
// FTS table may not exist on old DBs.
|
|
950
1053
|
}
|
|
951
1054
|
|
|
952
|
-
//
|
|
953
|
-
//
|
|
954
|
-
//
|
|
955
|
-
//
|
|
956
|
-
//
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
//
|
|
963
|
-
//
|
|
964
|
-
//
|
|
965
|
-
//
|
|
966
|
-
|
|
1055
|
+
// Checkpoint after a big delete pass so the freed pages land
|
|
1056
|
+
// in the main DB file and become eligible for incremental
|
|
1057
|
+
// vacuum. Use RESTART (not PASSIVE): PASSIVE silently no-ops
|
|
1058
|
+
// when readers are active, which is the documented mechanism
|
|
1059
|
+
// behind WAL/freelist starvation when ingest is busy.
|
|
1060
|
+
if (toEvict.size === 0 && !deletedLogs && !deletedOrphans) return
|
|
1061
|
+
try { db.exec(`PRAGMA wal_checkpoint(RESTART);`) } catch { /* ignore */ }
|
|
1062
|
+
|
|
1063
|
+
// Incremental FTS5 merge — DELETE on an FTS5-indexed row
|
|
1064
|
+
// leaves a tombstone in the segment tree that only `merge`
|
|
1065
|
+
// reclaims. Skipping this is the second compounding cause
|
|
1066
|
+
// (after fixed-size vacuum) of the slow freelist accretion
|
|
1067
|
+
// that took the DB to 17GB. 100 pages of merge work per
|
|
1068
|
+
// retention tick is bounded and runs in milliseconds.
|
|
1069
|
+
incrementalFtsMerge(100)
|
|
1070
|
+
|
|
1071
|
+
// Actual page reclamation lives in `reclaimSpace`, which
|
|
1072
|
+
// runs on its own faster cadence so the file shrinks even
|
|
1073
|
+
// when no traces are evicted in a given retention tick (e.g.
|
|
1074
|
+
// after a large historical eviction has already happened).
|
|
967
1075
|
})
|
|
968
1076
|
})
|
|
969
1077
|
|
|
970
|
-
// Retention only runs in
|
|
971
|
-
//
|
|
972
|
-
// competing for the write lock with overlapping DELETE passes.
|
|
1078
|
+
// Retention only runs in the ingest worker so maintenance never blocks
|
|
1079
|
+
// the HTTP event loop and no second writer duplicates cleanup work.
|
|
973
1080
|
if (opts.runRetention) {
|
|
974
|
-
//
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
//
|
|
981
|
-
//
|
|
982
|
-
//
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
//
|
|
986
|
-
yield* Effect.forkScoped(Effect.repeat(
|
|
1081
|
+
// Cleanup runs on the telemetry worker, never the HTTP event loop.
|
|
1082
|
+
yield* Effect.forkScoped(Effect.repeat(
|
|
1083
|
+
Effect.andThen(reconcileTraceSummaries, cleanupExpired()).pipe(Effect.catchCause((cause) => Effect.logWarning(`motel: maintenance pass failed: ${Cause.pretty(cause)}`))),
|
|
1084
|
+
Schedule.spaced(`${config.otel.retentionIntervalSeconds} seconds`),
|
|
1085
|
+
))
|
|
1086
|
+
|
|
1087
|
+
// Page reclamation runs on a separate, faster cadence (10s) and
|
|
1088
|
+
// is independent of the eviction loop. The reason: a single sweep
|
|
1089
|
+
// at 60s intervals can move only ~8MB of pages before the next
|
|
1090
|
+
// burst of inserts grows the freelist again. Decoupling lets us
|
|
1091
|
+
// catch up adaptively (see VACUUM_PAGES_BUSY/PANIC) without
|
|
1092
|
+
// changing the cost of the heavier delete sweep.
|
|
1093
|
+
yield* Effect.forkScoped(Effect.repeat(reclaimSpace(), Schedule.spaced("10 seconds")))
|
|
987
1094
|
|
|
988
1095
|
// Periodically refresh query planner stats. `PRAGMA optimize` is a
|
|
989
1096
|
// no-op when nothing has changed, so this is essentially free on idle
|
|
@@ -996,35 +1103,48 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
996
1103
|
yield* Effect.forkScoped(Effect.repeat(refreshPlannerStats, Schedule.spaced("15 minutes")))
|
|
997
1104
|
}
|
|
998
1105
|
|
|
999
|
-
//
|
|
1000
|
-
//
|
|
1001
|
-
// Runs forked so server startup isn't blocked; queries hitting the
|
|
1002
|
-
// FTS will just return empty until the fill lands. On a 2 GB DB with
|
|
1003
|
-
// ~400 matching rows this takes ~3-8 seconds. Writer-only because
|
|
1004
|
-
// it does INSERT INTO ... — readonly connections would error.
|
|
1106
|
+
// Incrementally rebuild historical AI attributes in bounded batches.
|
|
1107
|
+
// Queries fall back to LIKE until the persistent marker is complete.
|
|
1005
1108
|
if (hasAttrFts && !opts.readonly) {
|
|
1006
|
-
const
|
|
1109
|
+
const backfillAttrFtsBatch = Effect.sync(() => {
|
|
1007
1110
|
try {
|
|
1008
|
-
const ftsCount = (db.query(`SELECT COUNT(*) AS c FROM span_attr_fts`).get() as { c: number }).c
|
|
1009
|
-
if (ftsCount > 0) return
|
|
1010
1111
|
const keyList = AI_FTS_KEYS.map((k) => `'${k.replace(/'/g, "''")}'`).join(", ")
|
|
1011
|
-
const
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
INSERT INTO span_attr_fts(
|
|
1020
|
-
|
|
1021
|
-
|
|
1112
|
+
const marker = db.query(`SELECT value FROM motel_maintenance WHERE key = 'span_attr_fts_v1'`).get() as { value: string } | null
|
|
1113
|
+
if (marker?.value === "complete") return false
|
|
1114
|
+
let cursor = 0
|
|
1115
|
+
let maxRowId = 0
|
|
1116
|
+
if (marker) {
|
|
1117
|
+
[cursor, maxRowId] = marker.value.split(":").map(Number)
|
|
1118
|
+
} else {
|
|
1119
|
+
maxRowId = (db.query(`SELECT COALESCE(MAX(rowid), 0) AS value FROM span_attributes`).get() as { value: number }).value
|
|
1120
|
+
db.query(`INSERT INTO span_attr_fts(span_attr_fts) VALUES ('delete-all')`).run()
|
|
1121
|
+
db.query(`INSERT OR REPLACE INTO motel_maintenance(key, value) VALUES ('span_attr_fts_v1', ?)`).run(`0:${maxRowId}`)
|
|
1122
|
+
}
|
|
1123
|
+
const rows = db.query(`SELECT rowid, value FROM span_attributes WHERE key IN (${keyList}) AND rowid > ? AND rowid <= ? ORDER BY rowid ASC LIMIT 500`).all(cursor, maxRowId) as Array<{ rowid: number; value: string }>
|
|
1124
|
+
if (rows.length === 0) {
|
|
1125
|
+
db.query(`UPDATE motel_maintenance SET value = 'complete' WHERE key = 'span_attr_fts_v1'`).run()
|
|
1126
|
+
hasAttrFts = true
|
|
1127
|
+
return false
|
|
1128
|
+
}
|
|
1129
|
+
const insert = db.query(`INSERT INTO span_attr_fts(rowid, value) VALUES (?, ?)`)
|
|
1130
|
+
const transaction = db.transaction(() => {
|
|
1131
|
+
for (const row of rows) insert.run(row.rowid, row.value)
|
|
1132
|
+
db.query(`UPDATE motel_maintenance SET value = ? WHERE key = 'span_attr_fts_v1'`).run(`${rows.at(-1)!.rowid}:${maxRowId}`)
|
|
1133
|
+
})
|
|
1134
|
+
transaction()
|
|
1135
|
+
return true
|
|
1022
1136
|
} catch {
|
|
1023
1137
|
// Backfill failure is never fatal — new ingests still
|
|
1024
1138
|
// populate FTS via the trigger, and queries fall back to
|
|
1025
1139
|
// LIKE when FTS lookups return empty.
|
|
1140
|
+
return true
|
|
1026
1141
|
}
|
|
1027
1142
|
})
|
|
1143
|
+
const backfillAttrFts: Effect.Effect<void> = Effect.suspend(() =>
|
|
1144
|
+
Effect.flatMap(backfillAttrFtsBatch, (pending) =>
|
|
1145
|
+
pending ? Effect.andThen(Effect.sleep("100 millis"), backfillAttrFts) : Effect.void,
|
|
1146
|
+
),
|
|
1147
|
+
)
|
|
1028
1148
|
yield* Effect.forkScoped(backfillAttrFts)
|
|
1029
1149
|
}
|
|
1030
1150
|
|
|
@@ -1042,6 +1162,10 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1042
1162
|
const scopeName = scopeSpans.scope?.name ?? null
|
|
1043
1163
|
|
|
1044
1164
|
for (const span of scopeSpans.spans ?? []) {
|
|
1165
|
+
const traceId = normalizeOtlpBinaryId(span.traceId, 16)
|
|
1166
|
+
const spanId = normalizeOtlpBinaryId(span.spanId, 8)
|
|
1167
|
+
if (!traceId || !spanId) continue
|
|
1168
|
+
const parentSpanId = normalizeOtlpBinaryId(span.parentSpanId, 8)
|
|
1045
1169
|
const spanAttributes = attributeMap(span.attributes)
|
|
1046
1170
|
const mergedAttributes = { ...resourceAttributes, ...spanAttributes }
|
|
1047
1171
|
const startTimeMs = nanosToMilliseconds(span.startTimeUnixNano)
|
|
@@ -1053,9 +1177,9 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1053
1177
|
}))
|
|
1054
1178
|
|
|
1055
1179
|
insertSpan.run(
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1180
|
+
traceId,
|
|
1181
|
+
spanId,
|
|
1182
|
+
parentSpanId,
|
|
1059
1183
|
serviceName,
|
|
1060
1184
|
scopeName,
|
|
1061
1185
|
span.name ?? "unknown",
|
|
@@ -1068,10 +1192,10 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1068
1192
|
JSON.stringify(resourceAttributes),
|
|
1069
1193
|
JSON.stringify(events),
|
|
1070
1194
|
)
|
|
1071
|
-
deleteSpanAttributes.run(
|
|
1072
|
-
insertSpanAttributesMany(
|
|
1073
|
-
touchedOperations.push([
|
|
1074
|
-
touchedTraceIds.add(
|
|
1195
|
+
deleteSpanAttributes.run(traceId, spanId)
|
|
1196
|
+
insertSpanAttributesMany(traceId, spanId, mergedAttributes)
|
|
1197
|
+
touchedOperations.push([traceId, spanId, span.name ?? "unknown"])
|
|
1198
|
+
touchedTraceIds.add(traceId)
|
|
1075
1199
|
insertedSpans += 1
|
|
1076
1200
|
}
|
|
1077
1201
|
}
|
|
@@ -1111,9 +1235,11 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1111
1235
|
const mergedAttributes = { ...resourceAttributes, ...attributes }
|
|
1112
1236
|
const timestampMs = nanosToMilliseconds(record.timeUnixNano ?? record.observedTimeUnixNano)
|
|
1113
1237
|
const body = stringifyValue(parseAnyValue(record.body))
|
|
1238
|
+
const rawTraceId = attributes.traceId || attributes.trace_id || record.traceId || null
|
|
1239
|
+
const rawSpanId = attributes.spanId || attributes.span_id || record.spanId || null
|
|
1114
1240
|
const result = insertLog.run(
|
|
1115
|
-
|
|
1116
|
-
|
|
1241
|
+
normalizeOtlpBinaryId(rawTraceId, 16),
|
|
1242
|
+
normalizeOtlpBinaryId(rawSpanId, 8),
|
|
1117
1243
|
serviceName,
|
|
1118
1244
|
scopeName,
|
|
1119
1245
|
record.severityText ?? "INFO",
|
|
@@ -1145,9 +1271,11 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1145
1271
|
})
|
|
1146
1272
|
|
|
1147
1273
|
const listServices = Effect.fn("motel/TelemetryStore.listServices")(function* () {
|
|
1148
|
-
|
|
1149
1274
|
const cutoff = (yield* Clock.currentTimeMillis) - config.otel.traceLookbackMinutes * 60 * 1000
|
|
1150
|
-
|
|
1275
|
+
const services = yield* Effect.sync(() => {
|
|
1276
|
+
// Discover recent activity from span rows, not trace starts: a
|
|
1277
|
+
// long-running trace can emit a current child after its root ages
|
|
1278
|
+
// outside the lookback window.
|
|
1151
1279
|
const rows = db.query(`
|
|
1152
1280
|
SELECT service_name FROM spans WHERE start_time_ms >= ?
|
|
1153
1281
|
UNION
|
|
@@ -1156,6 +1284,8 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1156
1284
|
`).all(cutoff, cutoff) as Array<{ service_name: string }>
|
|
1157
1285
|
return rows.map((row) => row.service_name)
|
|
1158
1286
|
})
|
|
1287
|
+
yield* Effect.annotateCurrentSpan("trace.service_count", services.length)
|
|
1288
|
+
return services
|
|
1159
1289
|
})()
|
|
1160
1290
|
|
|
1161
1291
|
const loadTracesByIds = (traceIds: readonly string[]) => {
|
|
@@ -1181,15 +1311,19 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1181
1311
|
}
|
|
1182
1312
|
|
|
1183
1313
|
const listRecentTraces = Effect.fn("motel/TelemetryStore.listRecentTraces")(function* (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number }) {
|
|
1314
|
+
yield* Effect.annotateCurrentSpan("trace.service_name", serviceName ?? "all")
|
|
1184
1315
|
const summaries = yield* listTraceSummaries(serviceName, options)
|
|
1185
|
-
|
|
1316
|
+
const traces = yield* Effect.sync(() => loadTracesByIds(summaries.map((summary) => summary.traceId)))
|
|
1317
|
+
yield* Effect.annotateCurrentSpan("trace.result_count", traces.length)
|
|
1318
|
+
return traces
|
|
1186
1319
|
})
|
|
1187
1320
|
|
|
1188
1321
|
const listTraceSummaries = Effect.fn("motel/TelemetryStore.listTraceSummaries")(function* (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) {
|
|
1322
|
+
yield* Effect.annotateCurrentSpan("trace.service_name", serviceName ?? "all")
|
|
1189
1323
|
const cutoff = (yield* Clock.currentTimeMillis) - (options?.lookbackMinutes ?? config.otel.traceLookbackMinutes) * 60 * 1000
|
|
1190
1324
|
const limit = options?.limit ?? config.otel.traceFetchLimit
|
|
1191
1325
|
|
|
1192
|
-
|
|
1326
|
+
const summaries = yield* Effect.sync(() => {
|
|
1193
1327
|
const clauses = ["started_at_ms >= ?"]
|
|
1194
1328
|
const params: Array<string | number> = [cutoff]
|
|
1195
1329
|
|
|
@@ -1211,6 +1345,8 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1211
1345
|
LIMIT ?
|
|
1212
1346
|
`).all(...params, limit) as TraceSummaryRow[]
|
|
1213
1347
|
}).pipe(Effect.map((rows) => rows.map(parseSummaryRow)))
|
|
1348
|
+
yield* Effect.annotateCurrentSpan("trace.result_count", summaries.length)
|
|
1349
|
+
return summaries
|
|
1214
1350
|
})
|
|
1215
1351
|
|
|
1216
1352
|
const searchTraceSummaries = Effect.fn("motel/TelemetryStore.searchTraceSummaries")(function* (input: TraceSearch) {
|
|
@@ -1289,6 +1425,7 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1289
1425
|
})
|
|
1290
1426
|
|
|
1291
1427
|
const getTrace = Effect.fn("motel/TelemetryStore.getTrace")(function* (traceId: string) {
|
|
1428
|
+
yield* Effect.annotateCurrentSpan("trace.trace_id", traceId)
|
|
1292
1429
|
return yield* Effect.sync(() => {
|
|
1293
1430
|
const rows = db.query(`
|
|
1294
1431
|
SELECT * FROM spans WHERE trace_id = ? ORDER BY start_time_ms ASC
|
|
@@ -1298,6 +1435,7 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1298
1435
|
})
|
|
1299
1436
|
|
|
1300
1437
|
const getSpan = Effect.fn("motel/TelemetryStore.getSpan")(function* (spanId: string) {
|
|
1438
|
+
yield* Effect.annotateCurrentSpan("trace.span_id", spanId)
|
|
1301
1439
|
return yield* Effect.sync(() => {
|
|
1302
1440
|
// Fetch only the target span row (uses idx_spans_span_id)
|
|
1303
1441
|
const spanRow = db.query(`SELECT * FROM spans WHERE span_id = ? LIMIT 1`).get(spanId) as SpanRow | null
|
|
@@ -1305,7 +1443,28 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1305
1443
|
|
|
1306
1444
|
const traceId = spanRow.trace_id
|
|
1307
1445
|
|
|
1308
|
-
//
|
|
1446
|
+
// Walk the parent chain in one recursive CTE instead of one query
|
|
1447
|
+
// per hop. Root context remains the earliest root in the trace,
|
|
1448
|
+
// matching full trace hydration even when input has multiple roots.
|
|
1449
|
+
let parentOperationName: string | null = null
|
|
1450
|
+
let depth = 0
|
|
1451
|
+
if (spanRow.parent_span_id) {
|
|
1452
|
+
const ancestors = db.query(`
|
|
1453
|
+
WITH RECURSIVE ancestors(span_id, parent_span_id, operation_name, hop) AS (
|
|
1454
|
+
SELECT span_id, parent_span_id, operation_name, 1
|
|
1455
|
+
FROM spans WHERE trace_id = ? AND span_id = ?
|
|
1456
|
+
UNION ALL
|
|
1457
|
+
SELECT s.span_id, s.parent_span_id, s.operation_name, a.hop + 1
|
|
1458
|
+
FROM ancestors a
|
|
1459
|
+
JOIN spans s ON s.trace_id = ? AND s.span_id = a.parent_span_id
|
|
1460
|
+
)
|
|
1461
|
+
SELECT span_id, parent_span_id, operation_name, hop FROM ancestors ORDER BY hop ASC
|
|
1462
|
+
`).all(traceId, spanRow.parent_span_id, traceId) as Array<{ span_id: string; parent_span_id: string | null; operation_name: string; hop: number }>
|
|
1463
|
+
|
|
1464
|
+
parentOperationName = ancestors[0]?.operation_name ?? null
|
|
1465
|
+
depth = ancestors.length
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1309
1468
|
const rootRow = db.query(`
|
|
1310
1469
|
SELECT operation_name FROM spans
|
|
1311
1470
|
WHERE trace_id = ? AND parent_span_id IS NULL
|
|
@@ -1313,28 +1472,6 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1313
1472
|
`).get(traceId) as { operation_name: string } | null
|
|
1314
1473
|
const rootOperationName = rootRow?.operation_name ?? "unknown"
|
|
1315
1474
|
|
|
1316
|
-
// Get parent operation name if span has a parent (PK lookup)
|
|
1317
|
-
let parentOperationName: string | null = null
|
|
1318
|
-
if (spanRow.parent_span_id) {
|
|
1319
|
-
const parentRow = db.query(`
|
|
1320
|
-
SELECT operation_name FROM spans
|
|
1321
|
-
WHERE trace_id = ? AND span_id = ?
|
|
1322
|
-
`).get(traceId, spanRow.parent_span_id) as { operation_name: string } | null
|
|
1323
|
-
parentOperationName = parentRow?.operation_name ?? null
|
|
1324
|
-
}
|
|
1325
|
-
|
|
1326
|
-
// Compute depth by walking up parent chain (typically 3-5 hops)
|
|
1327
|
-
let depth = 0
|
|
1328
|
-
let currentParentId = spanRow.parent_span_id
|
|
1329
|
-
while (currentParentId) {
|
|
1330
|
-
const parentRow = db.query(`
|
|
1331
|
-
SELECT parent_span_id FROM spans WHERE trace_id = ? AND span_id = ?
|
|
1332
|
-
`).get(traceId, currentParentId) as { parent_span_id: string | null } | null
|
|
1333
|
-
if (!parentRow) break
|
|
1334
|
-
depth++
|
|
1335
|
-
currentParentId = parentRow.parent_span_id
|
|
1336
|
-
}
|
|
1337
|
-
|
|
1338
1475
|
const parsed = parseSpanRow(spanRow)
|
|
1339
1476
|
return {
|
|
1340
1477
|
traceId,
|
|
@@ -1356,9 +1493,22 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1356
1493
|
const cutoff = (yield* Clock.currentTimeMillis) - (input.lookbackMinutes ?? config.otel.traceLookbackMinutes) * 60 * 1000
|
|
1357
1494
|
const limit = input.limit ?? 100
|
|
1358
1495
|
const hasContainsFilters = Object.keys(input.attributeContainsFilters ?? {}).length > 0
|
|
1359
|
-
|
|
1496
|
+
// Only over-fetch when post-filtering will discard rows. Without
|
|
1497
|
+
// a parentOperation filter the SQL `LIMIT` already returns the
|
|
1498
|
+
// final set, and over-fetching just makes us parse JSON blobs
|
|
1499
|
+
// for rows we'll throw away.
|
|
1500
|
+
const needsPostFilter = !!input.parentOperation
|
|
1501
|
+
const candidateLimit = !needsPostFilter
|
|
1502
|
+
? limit
|
|
1503
|
+
: hasContainsFilters
|
|
1504
|
+
? Math.max(limit * 20, 500)
|
|
1505
|
+
: Math.max(limit * 10, 200)
|
|
1360
1506
|
|
|
1361
1507
|
return yield* Effect.sync(() => {
|
|
1508
|
+
// First pass: fetch only the columns needed to filter and
|
|
1509
|
+
// to drive the parent-context lookup. Parsing the heavy
|
|
1510
|
+
// `*_json` blobs is deferred until after we've sliced down
|
|
1511
|
+
// to the final `limit`.
|
|
1362
1512
|
let fromSql = "FROM spans AS s"
|
|
1363
1513
|
const joinParams: Array<string | number> = []
|
|
1364
1514
|
const clauses: string[] = ["s.start_time_ms >= ?"]
|
|
@@ -1399,60 +1549,47 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1399
1549
|
params.push(...containsAttrMatch.params)
|
|
1400
1550
|
}
|
|
1401
1551
|
|
|
1402
|
-
const
|
|
1403
|
-
SELECT
|
|
1552
|
+
const candidateRows = db.query(`
|
|
1553
|
+
SELECT s.trace_id, s.span_id, s.parent_span_id, s.operation_name, s.start_time_ms
|
|
1404
1554
|
${fromSql}
|
|
1405
1555
|
WHERE ${clauses.join(" AND ")}
|
|
1406
1556
|
ORDER BY s.start_time_ms DESC
|
|
1407
1557
|
LIMIT ?
|
|
1408
|
-
`).all(...joinParams, ...params, candidateLimit) as
|
|
1558
|
+
`).all(...joinParams, ...params, candidateLimit) as Array<{ trace_id: string; span_id: string; parent_span_id: string | null; operation_name: string; start_time_ms: number }>
|
|
1409
1559
|
|
|
1410
|
-
const traceIds = [...new Set(
|
|
1560
|
+
const traceIds = [...new Set(candidateRows.map((row) => row.trace_id))]
|
|
1411
1561
|
if (traceIds.length === 0) return [] as readonly SpanItem[]
|
|
1412
1562
|
|
|
1413
1563
|
const keyOf = (traceId: string, spanId: string) => `${traceId}:${spanId}`
|
|
1414
1564
|
const spanContextById = new Map<string, { readonly parentSpanId: string | null; readonly operationName: string }>()
|
|
1415
|
-
for (const row of rows) {
|
|
1416
|
-
spanContextById.set(keyOf(row.trace_id, row.span_id), {
|
|
1417
|
-
parentSpanId: row.parent_span_id,
|
|
1418
|
-
operationName: row.operation_name,
|
|
1419
|
-
})
|
|
1420
|
-
}
|
|
1421
1565
|
|
|
1566
|
+
// Bulk-prefetch parent metadata for every span in every trace
|
|
1567
|
+
// touched by the candidate set. One indexed scan per trace_id
|
|
1568
|
+
// is much cheaper than a per-span lookup loop while computing
|
|
1569
|
+
// depth, and we get the trace-root lookup in the same pass.
|
|
1422
1570
|
const placeholders = traceIds.map(() => "?").join(", ")
|
|
1423
|
-
const
|
|
1424
|
-
SELECT trace_id, operation_name
|
|
1571
|
+
const allSpanRows = db.query(`
|
|
1572
|
+
SELECT trace_id, span_id, parent_span_id, operation_name, start_time_ms
|
|
1425
1573
|
FROM spans
|
|
1426
|
-
WHERE trace_id IN (${placeholders})
|
|
1427
|
-
|
|
1428
|
-
`).all(...traceIds) as Array<{ trace_id: string; operation_name: string }>
|
|
1429
|
-
const rootOperationByTraceId = new Map<string, string>()
|
|
1430
|
-
for (const row of rootRows) {
|
|
1431
|
-
if (!rootOperationByTraceId.has(row.trace_id)) {
|
|
1432
|
-
rootOperationByTraceId.set(row.trace_id, row.operation_name)
|
|
1433
|
-
}
|
|
1434
|
-
}
|
|
1574
|
+
WHERE trace_id IN (${placeholders})
|
|
1575
|
+
`).all(...traceIds) as Array<{ trace_id: string; span_id: string; parent_span_id: string | null; operation_name: string; start_time_ms: number }>
|
|
1435
1576
|
|
|
1436
|
-
const
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
WHERE trace_id = ? AND span_id = ?
|
|
1440
|
-
`)
|
|
1441
|
-
|
|
1442
|
-
const getSpanContext = (traceId: string, spanId: string) => {
|
|
1443
|
-
const key = keyOf(traceId, spanId)
|
|
1444
|
-
const cached = spanContextById.get(key)
|
|
1445
|
-
if (cached !== undefined) return cached
|
|
1446
|
-
const row = spanContextLookup.get(traceId, spanId) as { parent_span_id: string | null; operation_name: string } | null
|
|
1447
|
-
if (!row) return null
|
|
1448
|
-
const value = {
|
|
1577
|
+
const rootOperationByTraceId = new Map<string, { operationName: string; startTimeMs: number }>()
|
|
1578
|
+
for (const row of allSpanRows) {
|
|
1579
|
+
spanContextById.set(keyOf(row.trace_id, row.span_id), {
|
|
1449
1580
|
parentSpanId: row.parent_span_id,
|
|
1450
1581
|
operationName: row.operation_name,
|
|
1582
|
+
})
|
|
1583
|
+
if (row.parent_span_id === null) {
|
|
1584
|
+
const existing = rootOperationByTraceId.get(row.trace_id)
|
|
1585
|
+
if (!existing || row.start_time_ms < existing.startTimeMs) {
|
|
1586
|
+
rootOperationByTraceId.set(row.trace_id, { operationName: row.operation_name, startTimeMs: row.start_time_ms })
|
|
1587
|
+
}
|
|
1451
1588
|
}
|
|
1452
|
-
spanContextById.set(key, value)
|
|
1453
|
-
return value
|
|
1454
1589
|
}
|
|
1455
1590
|
|
|
1591
|
+
const getSpanContext = (traceId: string, spanId: string) => spanContextById.get(keyOf(traceId, spanId)) ?? null
|
|
1592
|
+
|
|
1456
1593
|
const depthById = new Map<string, number>()
|
|
1457
1594
|
const getDepth = (traceId: string, spanId: string, visiting = new Set<string>()): number => {
|
|
1458
1595
|
const key = keyOf(traceId, spanId)
|
|
@@ -1466,32 +1603,57 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1466
1603
|
return depth
|
|
1467
1604
|
}
|
|
1468
1605
|
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1606
|
+
// Apply parentOperation post-filter on the lite candidate set
|
|
1607
|
+
// (cheap — string compare against cached parent op) and then
|
|
1608
|
+
// slice down to the final result size before parsing any JSON.
|
|
1609
|
+
const parentOperationNeedle = input.parentOperation?.toLowerCase() ?? null
|
|
1610
|
+
const filteredLite: typeof candidateRows = []
|
|
1611
|
+
for (const row of candidateRows) {
|
|
1612
|
+
if (parentOperationNeedle) {
|
|
1613
|
+
const parent = row.parent_span_id ? getSpanContext(row.trace_id, row.parent_span_id) : null
|
|
1614
|
+
if (!parent?.operationName.toLowerCase().includes(parentOperationNeedle)) continue
|
|
1615
|
+
}
|
|
1616
|
+
filteredLite.push(row)
|
|
1617
|
+
if (filteredLite.length >= limit) break
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
if (filteredLite.length === 0) return [] as readonly SpanItem[]
|
|
1621
|
+
|
|
1622
|
+
// Hydrate only the kept rows: one batched fetch of the full
|
|
1623
|
+
// SpanRow (with resource_json / attributes_json / events_json)
|
|
1624
|
+
// using SQLite's row-value `IN` syntax, then parseSpanRow per
|
|
1625
|
+
// kept row. Result order follows `filteredLite` so the caller
|
|
1626
|
+
// sees the same ordering the candidate scan produced.
|
|
1627
|
+
const keptValues = filteredLite.map(() => "(?, ?)").join(", ")
|
|
1628
|
+
const fullRows = db.query(`
|
|
1629
|
+
SELECT * FROM spans WHERE (trace_id, span_id) IN (VALUES ${keptValues})
|
|
1630
|
+
`).all(...filteredLite.flatMap((row) => [row.trace_id, row.span_id])) as SpanRow[]
|
|
1631
|
+
const fullRowByKey = new Map<string, SpanRow>()
|
|
1632
|
+
for (const row of fullRows) {
|
|
1633
|
+
fullRowByKey.set(keyOf(row.trace_id, row.span_id), row)
|
|
1634
|
+
}
|
|
1635
|
+
|
|
1636
|
+
const items: SpanItem[] = []
|
|
1637
|
+
for (const lite of filteredLite) {
|
|
1638
|
+
const row = fullRowByKey.get(keyOf(lite.trace_id, lite.span_id))
|
|
1639
|
+
if (!row) continue
|
|
1640
|
+
const parentContext = row.parent_span_id ? getSpanContext(row.trace_id, row.parent_span_id) : null
|
|
1641
|
+
const parsedSpan = parseSpanRow(row)
|
|
1642
|
+
const span = {
|
|
1643
|
+
...parsedSpan,
|
|
1644
|
+
depth: getDepth(row.trace_id, row.span_id),
|
|
1645
|
+
warnings: row.parent_span_id && !parentContext
|
|
1646
|
+
? [`missing span ${row.parent_span_id} (1 child)`]
|
|
1647
|
+
: parsedSpan.warnings,
|
|
1648
|
+
}
|
|
1649
|
+
items.push({
|
|
1650
|
+
traceId: row.trace_id,
|
|
1651
|
+
rootOperationName: rootOperationByTraceId.get(row.trace_id)?.operationName ?? span.operationName,
|
|
1652
|
+
parentOperationName: parentContext?.operationName ?? null,
|
|
1653
|
+
span,
|
|
1493
1654
|
})
|
|
1494
|
-
|
|
1655
|
+
}
|
|
1656
|
+
return items
|
|
1495
1657
|
})
|
|
1496
1658
|
})
|
|
1497
1659
|
|
|
@@ -1789,7 +1951,10 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1789
1951
|
})
|
|
1790
1952
|
|
|
1791
1953
|
const listRecentLogs = Effect.fn("motel/TelemetryStore.listRecentLogs")(function* (serviceName: string) {
|
|
1792
|
-
|
|
1954
|
+
yield* Effect.annotateCurrentSpan("log.service_name", serviceName)
|
|
1955
|
+
const logs = yield* searchLogs({ serviceName, limit: config.otel.logFetchLimit })
|
|
1956
|
+
yield* Effect.annotateCurrentSpan("log.result_count", logs.length)
|
|
1957
|
+
return logs
|
|
1793
1958
|
})
|
|
1794
1959
|
|
|
1795
1960
|
const listFacets = Effect.fn("motel/TelemetryStore.listFacets")(function* (input: FacetSearch) {
|
|
@@ -1882,26 +2047,30 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1882
2047
|
// FACET_VALUE_MAX_LEN. For opencode this hides `ai.prompt`,
|
|
1883
2048
|
// `ai.prompt.messages`, and `ai.prompt.tools` — which are 1-6MB text
|
|
1884
2049
|
// blobs that you'd never want to filter by exact match anyway. The
|
|
1885
|
-
// WHERE clause lets SQLite skip reading those pages from disk.
|
|
1886
|
-
//
|
|
1887
|
-
//
|
|
1888
|
-
//
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
params
|
|
2050
|
+
// WHERE clause lets SQLite skip reading those pages from disk.
|
|
2051
|
+
// COUNT(DISTINCT ...) does its own per-group dedup via a temp B-tree,
|
|
2052
|
+
// so the outer query needs no DISTINCT subquery in front of it. We
|
|
2053
|
+
// pre-filter trace_ids through trace_summaries (an indexed lookup) so
|
|
2054
|
+
// the planner can use a SEMI JOIN against the small in-window set
|
|
2055
|
+
// instead of joining every span_attributes row to trace_summaries.
|
|
2056
|
+
const params: Array<string | number> = []
|
|
2057
|
+
let traceFilter: string
|
|
2058
|
+
if (input.serviceName) {
|
|
2059
|
+
traceFilter = `(SELECT trace_id FROM trace_summaries WHERE started_at_ms >= ? AND service_name = ?)`
|
|
2060
|
+
params.push(cutoff, input.serviceName)
|
|
2061
|
+
} else {
|
|
2062
|
+
traceFilter = `(SELECT trace_id FROM trace_summaries WHERE started_at_ms >= ?)`
|
|
2063
|
+
params.push(cutoff)
|
|
2064
|
+
}
|
|
2065
|
+
params.push(FACET_VALUE_MAX_LEN, limit)
|
|
1892
2066
|
const rows = db.query(`
|
|
1893
|
-
SELECT
|
|
1894
|
-
COUNT(DISTINCT
|
|
1895
|
-
COUNT(DISTINCT
|
|
1896
|
-
FROM
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
WHERE LENGTH(sa.value) < ?
|
|
1901
|
-
AND ts.started_at_ms >= ?
|
|
1902
|
-
${input.serviceName ? "AND ts.service_name = ?" : ""}
|
|
1903
|
-
) AS scoped
|
|
1904
|
-
GROUP BY scoped.key
|
|
2067
|
+
SELECT key AS value,
|
|
2068
|
+
COUNT(DISTINCT trace_id) AS count,
|
|
2069
|
+
COUNT(DISTINCT value) AS distinct_values
|
|
2070
|
+
FROM span_attributes
|
|
2071
|
+
WHERE trace_id IN ${traceFilter}
|
|
2072
|
+
AND LENGTH(value) < ?
|
|
2073
|
+
GROUP BY key
|
|
1905
2074
|
ORDER BY (CASE WHEN distinct_values = 1 THEN 1 ELSE 0 END) ASC,
|
|
1906
2075
|
distinct_values DESC,
|
|
1907
2076
|
count DESC,
|
|
@@ -1938,7 +2107,10 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
1938
2107
|
})
|
|
1939
2108
|
|
|
1940
2109
|
const listTraceLogs = Effect.fn("motel/TelemetryStore.listTraceLogs")(function* (traceId: string) {
|
|
1941
|
-
|
|
2110
|
+
yield* Effect.annotateCurrentSpan("log.trace_id", traceId)
|
|
2111
|
+
const logs = yield* searchLogs({ traceId, limit: config.otel.logFetchLimit })
|
|
2112
|
+
yield* Effect.annotateCurrentSpan("log.result_count", logs.length)
|
|
2113
|
+
return logs
|
|
1942
2114
|
})
|
|
1943
2115
|
|
|
1944
2116
|
// ---------------------------------------------------------------------------
|
|
@@ -2343,28 +2515,40 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
|
|
|
2343
2515
|
searchAiCalls,
|
|
2344
2516
|
getAiCall,
|
|
2345
2517
|
aiCallStats,
|
|
2518
|
+
runRetentionNow: cleanupExpired(),
|
|
2346
2519
|
})
|
|
2347
|
-
})
|
|
2348
|
-
|
|
2520
|
+
})
|
|
2521
|
+
|
|
2522
|
+
/** Compatibility factory for callers constructing a writer/query-capable store layer. */
|
|
2523
|
+
export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) =>
|
|
2524
|
+
Layer.effect(TelemetryStore, makeTelemetryStoreEffect(opts)).pipe(Layer.provide(BunFileSystem.layer))
|
|
2349
2525
|
|
|
2350
2526
|
/**
|
|
2351
|
-
* Default writer
|
|
2352
|
-
* migrations, FTS backfill, and the retention loop.
|
|
2527
|
+
* Default writer runtime used by tests and direct store consumers.
|
|
2353
2528
|
*/
|
|
2354
2529
|
export const TelemetryStoreLive = makeTelemetryStoreLayer({ readonly: false, runRetention: true })
|
|
2355
2530
|
|
|
2356
2531
|
/**
|
|
2357
|
-
*
|
|
2358
|
-
*
|
|
2359
|
-
* the same time (they'd just serialise behind the write lock and
|
|
2360
|
-
* duplicate work).
|
|
2532
|
+
* The ingest worker's writer. It is the managed daemon's sole owner of
|
|
2533
|
+
* schema migrations, FTS backfill, retention, and page reclamation.
|
|
2361
2534
|
*/
|
|
2362
|
-
export const TelemetryStoreWorkerLive =
|
|
2535
|
+
export const TelemetryStoreWorkerLive = TelemetryStoreLive
|
|
2363
2536
|
|
|
2364
2537
|
/**
|
|
2365
|
-
* Read-only instance for query-only processes (currently the TUI
|
|
2366
|
-
* Skips every DDL/DML statement at startup so
|
|
2367
|
-
* opened while a writer is mid-transaction
|
|
2368
|
-
*
|
|
2538
|
+
* Read-only instance for query-only processes (currently the TUI and
|
|
2539
|
+
* HTTP query handlers). Skips every DDL/DML statement at startup so
|
|
2540
|
+
* the connection can be opened while a writer is mid-transaction
|
|
2541
|
+
* without racing for the write lock. Provided as TelemetryStoreReadonly
|
|
2542
|
+
* — a distinct service identifier so it can coexist with the writer
|
|
2543
|
+
* TelemetryStore in the same runtime.
|
|
2369
2544
|
*/
|
|
2370
|
-
export const TelemetryStoreReadonlyLive =
|
|
2545
|
+
export const TelemetryStoreReadonlyLive = Layer.effect(TelemetryStoreReadonly, makeTelemetryStoreEffect({ readonly: true, runRetention: false })).pipe(Layer.provide(BunFileSystem.layer))
|
|
2546
|
+
|
|
2547
|
+
/** Query-worker reader that waits for the sole writer to finish schema bootstrap. */
|
|
2548
|
+
export const TelemetryStoreQueryWorkerLive = Layer.effect(
|
|
2549
|
+
TelemetryStoreReadonly,
|
|
2550
|
+
makeTelemetryStoreEffect({ readonly: true, runRetention: false }).pipe(
|
|
2551
|
+
Effect.map((store) => TelemetryStoreReadonly.of(store)),
|
|
2552
|
+
Effect.retry(Schedule.spaced("50 millis")),
|
|
2553
|
+
),
|
|
2554
|
+
).pipe(Layer.provide(BunFileSystem.layer))
|