baselineos 0.2.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/LICENSE +17 -0
  2. package/README.md +198 -0
  3. package/dist/__evals__/runner.d.ts +2 -0
  4. package/dist/__evals__/runner.js +14687 -0
  5. package/dist/__evals__/runner.js.map +1 -0
  6. package/dist/api/server.d.ts +21 -0
  7. package/dist/api/server.js +1007 -0
  8. package/dist/api/server.js.map +1 -0
  9. package/dist/cli/bin.d.ts +1 -0
  10. package/dist/cli/bin.js +8427 -0
  11. package/dist/cli/bin.js.map +1 -0
  12. package/dist/core/agent-bus.d.ts +110 -0
  13. package/dist/core/agent-bus.js +242 -0
  14. package/dist/core/agent-bus.js.map +1 -0
  15. package/dist/core/cache.d.ts +66 -0
  16. package/dist/core/cache.js +160 -0
  17. package/dist/core/cache.js.map +1 -0
  18. package/dist/core/config.d.ts +1002 -0
  19. package/dist/core/config.js +429 -0
  20. package/dist/core/config.js.map +1 -0
  21. package/dist/core/indexer.d.ts +152 -0
  22. package/dist/core/indexer.js +481 -0
  23. package/dist/core/indexer.js.map +1 -0
  24. package/dist/core/llm-tracer.d.ts +2 -0
  25. package/dist/core/llm-tracer.js +241 -0
  26. package/dist/core/llm-tracer.js.map +1 -0
  27. package/dist/core/memory.d.ts +86 -0
  28. package/dist/core/memory.js +346 -0
  29. package/dist/core/memory.js.map +1 -0
  30. package/dist/core/opa-client.d.ts +51 -0
  31. package/dist/core/opa-client.js +157 -0
  32. package/dist/core/opa-client.js.map +1 -0
  33. package/dist/core/opa-policy-gate.d.ts +133 -0
  34. package/dist/core/opa-policy-gate.js +454 -0
  35. package/dist/core/opa-policy-gate.js.map +1 -0
  36. package/dist/core/orchestrator.d.ts +14 -0
  37. package/dist/core/orchestrator.js +1297 -0
  38. package/dist/core/orchestrator.js.map +1 -0
  39. package/dist/core/pii-detector.d.ts +82 -0
  40. package/dist/core/pii-detector.js +126 -0
  41. package/dist/core/pii-detector.js.map +1 -0
  42. package/dist/core/rag-engine.d.ts +121 -0
  43. package/dist/core/rag-engine.js +504 -0
  44. package/dist/core/rag-engine.js.map +1 -0
  45. package/dist/core/task-queue.d.ts +69 -0
  46. package/dist/core/task-queue.js +124 -0
  47. package/dist/core/task-queue.js.map +1 -0
  48. package/dist/core/telemetry.d.ts +56 -0
  49. package/dist/core/telemetry.js +94 -0
  50. package/dist/core/telemetry.js.map +1 -0
  51. package/dist/core/types.d.ts +328 -0
  52. package/dist/core/types.js +24 -0
  53. package/dist/core/types.js.map +1 -0
  54. package/dist/index.d.ts +21 -0
  55. package/dist/index.js +12444 -0
  56. package/dist/index.js.map +1 -0
  57. package/dist/llm-tracer-CIIujuO-.d.ts +493 -0
  58. package/dist/mcp/server.d.ts +2651 -0
  59. package/dist/mcp/server.js +676 -0
  60. package/dist/mcp/server.js.map +1 -0
  61. package/dist/orchestrator-DF89k_AK.d.ts +506 -0
  62. package/package.json +157 -0
  63. package/templates/README.md +7 -0
  64. package/templates/baseline.config.ts +207 -0
@@ -0,0 +1,493 @@
1
+ import { AgentBus } from './core/agent-bus.js';
2
+
3
+ /**
4
+ * ModelVersionRegistry — SIGNAL-048
5
+ *
6
+ * Tracks which model version handled each task execution. Provides a
7
+ * queryable registry of deployed model versions, their metadata, and
8
+ * per-task attribution for audit and regression analysis.
9
+ *
10
+ * Capabilities:
11
+ * - Register model versions with metadata (provider, family, deployment date)
12
+ * - Set the active version per provider
13
+ * - Record per-task model attribution
14
+ * - List all versions / versions by provider
15
+ * - Emit governance:model-version-changed on active version change
16
+ *
17
+ * Usage:
18
+ * const registry = new ModelVersionRegistry({ bus });
19
+ * registry.register('claude-sonnet-4-6', { provider: 'anthropic', family: 'claude-4' });
20
+ * registry.setActive('anthropic', 'claude-sonnet-4-6');
21
+ * registry.recordTaskModel('task-123', 'claude-sonnet-4-6');
22
+ * registry.getForTask('task-123'); // → 'claude-sonnet-4-6'
23
+ *
24
+ * @license Apache-2.0
25
+ */
26
+
27
+ interface ModelVersionEntry {
28
+ modelId: string;
29
+ provider: string;
30
+ family?: string;
31
+ /** ISO date string when this version was first registered */
32
+ registeredAt: string;
33
+ /** ISO date string when this version was last set as active */
34
+ activatedAt?: string;
35
+ /** Arbitrary metadata: context window, pricing tier, capabilities, etc. */
36
+ metadata?: Record<string, unknown>;
37
+ }
38
+ interface ModelVersionRegistryConfig {
39
+ bus?: AgentBus;
40
+ }
41
+ declare class ModelVersionRegistry {
42
+ private readonly versions;
43
+ /** provider → active modelId */
44
+ private readonly activeVersions;
45
+ /** taskId → modelId */
46
+ private readonly taskModels;
47
+ private readonly bus?;
48
+ constructor(config?: ModelVersionRegistryConfig);
49
+ /**
50
+ * Register a model version. Idempotent — re-registering updates metadata.
51
+ */
52
+ register(modelId: string, meta: Omit<ModelVersionEntry, 'modelId' | 'registeredAt'>): void;
53
+ /**
54
+ * Set the active model version for a provider.
55
+ * Publishes governance:model-version-changed on the bus.
56
+ */
57
+ setActive(provider: string, modelId: string): void;
58
+ /**
59
+ * Get the currently active model version for a provider.
60
+ * Returns undefined if none has been set.
61
+ */
62
+ getActive(provider: string): string | undefined;
63
+ /**
64
+ * Record which model version was used for a given task.
65
+ */
66
+ recordTaskModel(taskId: string, modelId: string): void;
67
+ /**
68
+ * Return the model version used for a given task.
69
+ * Returns undefined if no attribution has been recorded.
70
+ */
71
+ getForTask(taskId: string): string | undefined;
72
+ /**
73
+ * Return entry for a specific model version, or undefined if unknown.
74
+ */
75
+ get(modelId: string): ModelVersionEntry | undefined;
76
+ /**
77
+ * List all registered model versions, optionally filtered by provider.
78
+ */
79
+ listVersions(provider?: string): ModelVersionEntry[];
80
+ /**
81
+ * List all providers that have an active model version set.
82
+ */
83
+ listActiveProviders(): Array<{
84
+ provider: string;
85
+ modelId: string;
86
+ }>;
87
+ /**
88
+ * Return a summary of per-model task counts (how many tasks used each model).
89
+ */
90
+ getTaskCounts(): Map<string, number>;
91
+ /**
92
+ * Remove all task attribution records (e.g., on rotation period boundary).
93
+ */
94
+ clearTaskRecords(): void;
95
+ }
96
+
97
+ interface GroundTruthCase {
98
+ /** Unique identifier for this eval case */
99
+ id: string;
100
+ /** Human-readable description */
101
+ description: string;
102
+ /** Simulated input text / task description */
103
+ input: string;
104
+ /** Expected model output (verbatim or representative) */
105
+ expectedOutput: string;
106
+ /** Acceptance criteria labels for this case */
107
+ criteria: string[];
108
+ /** Expected token range [min, max] for cost SLO check */
109
+ expectedTokenRange?: [number, number];
110
+ /** Whether this is a benign (non-refusal) request */
111
+ benign?: boolean;
112
+ }
113
+ interface ProdEvalCheckContext {
114
+ case: GroundTruthCase;
115
+ /** Simulated output — either expectedOutput or a probe value */
116
+ output: string;
117
+ }
118
+ interface ProdEvalCheckResult {
119
+ checkId: string;
120
+ caseId: string;
121
+ passed: boolean;
122
+ reason?: string;
123
+ }
124
+ type ProdEvalCheck = (ctx: ProdEvalCheckContext) => Promise<ProdEvalCheckResult[]>;
125
+ interface ProdEvalReport {
126
+ runId: string;
127
+ timestamp: string;
128
+ totalCases: number;
129
+ totalChecks: number;
130
+ passed: number;
131
+ failed: number;
132
+ score: number;
133
+ results: ProdEvalCheckResult[];
134
+ durationMs: number;
135
+ }
136
+ /**
137
+ * Per-model evaluation result stored in prod-eval-history.json (SIGNAL-053).
138
+ */
139
+ interface VersionEvalResult {
140
+ /** Unique run identifier */
141
+ runId: string;
142
+ /** Model ID that was evaluated (e.g. 'claude-sonnet-4-6') */
143
+ modelId: string;
144
+ /** ISO timestamp of the eval run */
145
+ timestamp: string;
146
+ /** Overall score 0–100 */
147
+ score: number;
148
+ /** Fraction of checks that passed (0–1) */
149
+ passRate: number;
150
+ /** Check IDs that failed */
151
+ failedChecks: string[];
152
+ /** Total number of cases evaluated */
153
+ totalCases: number;
154
+ }
155
+ /**
156
+ * Result of comparing two VersionEvalResults (SIGNAL-053).
157
+ */
158
+ interface VersionComparison {
159
+ /** Score delta: b.score − a.score (positive = b is better) */
160
+ deltaScore: number;
161
+ /** PassRate delta: b.passRate − a.passRate */
162
+ deltaPassRate: number;
163
+ /** Checks that failed in b but not in a (new regressions) */
164
+ newRegressions: string[];
165
+ /** Checks that failed in a but not in b (improvements) */
166
+ improvements: string[];
167
+ /** True if b is strictly better than a */
168
+ improved: boolean;
169
+ /**
170
+ * Promotion recommendation:
171
+ * 'promote' — b is measurably better with no new regressions
172
+ * 'hold' — marginal improvement or insufficient delta
173
+ * 'rollback' — b is worse than a
174
+ */
175
+ recommend: 'promote' | 'hold' | 'rollback';
176
+ /**
177
+ * 'high' when ≥ 10 cases; 'low' when fewer cases may produce noisy results.
178
+ */
179
+ confidence: 'high' | 'low';
180
+ }
181
+ interface ProductionEvalPipelineConfig {
182
+ bus?: AgentBus;
183
+ /** Path to ground-truth JSON dataset. Default: src/data/prod-eval-dataset.json */
184
+ datasetPath?: string;
185
+ /** Path to write the report JSON. Default: packages/baselineos/prod-eval-report.json */
186
+ reportPath?: string;
187
+ /** Path to the eval history JSON file (SIGNAL-053). Default: src/data/prod-eval-history.json */
188
+ historyPath?: string;
189
+ /**
190
+ * Minimum score delta required to recommend promotion (default: 3).
191
+ * deltaScore must exceed this threshold for 'promote' recommendation.
192
+ */
193
+ promotionThreshold?: number;
194
+ /**
195
+ * ModelVersionRegistry for auto model attribution (SIGNAL-056).
196
+ * When set, every run() call resolves the active model and appends a
197
+ * VersionEvalResult to history automatically — no need to call runForModel().
198
+ * Provider defaults to 'anthropic' unless modelAttributionProvider is set.
199
+ */
200
+ modelVersionRegistry?: ModelVersionRegistry;
201
+ /** Provider key used to look up the active model. Default: 'anthropic' */
202
+ modelAttributionProvider?: string;
203
+ }
204
+ declare class ProductionEvalPipeline {
205
+ private readonly checks;
206
+ private readonly config;
207
+ private readonly bus?;
208
+ private readonly promotionThreshold;
209
+ constructor(config?: ProductionEvalPipelineConfig);
210
+ /** Register a custom check function. */
211
+ addCheck(check: ProdEvalCheck): this;
212
+ /**
213
+ * Register all five built-in production checks.
214
+ */
215
+ addBuiltinChecks(): this;
216
+ /**
217
+ * Load the ground-truth dataset from disk.
218
+ */
219
+ loadDataset(): GroundTruthCase[];
220
+ /**
221
+ * Run all checks against all ground-truth cases.
222
+ * Returns a ProdEvalReport.
223
+ *
224
+ * When modelVersionRegistry is configured (SIGNAL-056), automatically
225
+ * resolves the active model and appends a VersionEvalResult to history.
226
+ */
227
+ run(cases?: GroundTruthCase[]): Promise<ProdEvalReport>;
228
+ /**
229
+ * Run the eval suite and record the result under a specific modelId.
230
+ * Appends to prod-eval-history.json for trend tracking.
231
+ *
232
+ * Uses _runCore() directly to avoid double-appending when modelVersionRegistry
233
+ * is also configured.
234
+ */
235
+ runForModel(modelId: string, cases?: GroundTruthCase[]): Promise<VersionEvalResult>;
236
+ /**
237
+ * Core eval execution — runs checks, persists report, publishes bus event.
238
+ * Called by both run() and runForModel() to avoid code duplication.
239
+ */
240
+ private _runCore;
241
+ /**
242
+ * Compare two VersionEvalResults and return a promotion recommendation.
243
+ * `a` is the baseline (current production); `b` is the candidate.
244
+ */
245
+ compareVersions(a: VersionEvalResult, b: VersionEvalResult): VersionComparison;
246
+ /**
247
+ * Append a VersionEvalResult to the eval history file.
248
+ */
249
+ appendHistory(result: VersionEvalResult): void;
250
+ /**
251
+ * Return all VersionEvalResults, optionally filtered by modelId.
252
+ */
253
+ getHistory(modelId?: string): VersionEvalResult[];
254
+ /**
255
+ * Return the rolling average eval score and delta from windowSize entries ago.
256
+ * Returns undefined when insufficient history exists.
257
+ */
258
+ getQualityTrend(modelId: string, windowSize?: number): {
259
+ current: number;
260
+ rollingAvg: number;
261
+ delta: number;
262
+ } | undefined;
263
+ private _historyPath;
264
+ }
265
+
266
+ /**
267
+ * TraceCurator — SIGNAL-047
268
+ *
269
+ * Converts sampled production LLM traces into GroundTruthCase entries
270
+ * that feed the ProductionEvalPipeline dataset. Enables continuous
271
+ * ground-truth expansion from real inference without manual labelling.
272
+ *
273
+ * Architecture:
274
+ * 1. Accept trace records (from Langfuse callbacks or a local trace buffer)
275
+ * 2. Apply sampling strategy (reservoir / recency / confidence-weighted)
276
+ * 3. Score and filter traces using configurable quality criteria
277
+ * 4. Append accepted cases to the ground-truth dataset on disk
278
+ * 5. Publish governance:trace-curated on AgentBus
279
+ *
280
+ * Sampling strategies:
281
+ * reservoir — uniform random sample, bounded by maxDatasetSize
282
+ * recency — keeps the N most recent accepted traces
283
+ * confidence — preferentially retains high-confidence traces
284
+ *
285
+ * Usage:
286
+ * const curator = new TraceCurator({ datasetPath, bus });
287
+ * curator.ingest(traceRecord); // real-time ingestion
288
+ * const result = await curator.flush(); // write accepted cases to disk
289
+ *
290
+ * @license Apache-2.0
291
+ */
292
+
293
+ type SamplingStrategy = 'reservoir' | 'recency' | 'confidence';
294
+ interface TraceRecord {
295
+ /** Unique trace identifier (task ID in Langfuse) */
296
+ traceId: string;
297
+ /** Human-readable task title */
298
+ title: string;
299
+ /** The input prompt / task description sent to the model */
300
+ input: string;
301
+ /** The model's final output for this trace */
302
+ output: string;
303
+ /** Model confidence signal (0–1). Use self-verify score when available. */
304
+ confidence: number;
305
+ /** Whether this was a benign (non-refusal) request */
306
+ benign?: boolean;
307
+ /** Approximate token count for cost SLO */
308
+ tokens?: number;
309
+ /** ISO timestamp of the trace */
310
+ timestamp: string;
311
+ /** Whether the trace passed self-verification */
312
+ verified?: boolean;
313
+ }
314
+ interface CurationResult {
315
+ ingested: number;
316
+ accepted: number;
317
+ rejected: number;
318
+ datasetSize: number;
319
+ newCases: GroundTruthCase[];
320
+ }
321
+ interface TraceCuratorConfig {
322
+ bus?: AgentBus;
323
+ /** Path to ground-truth JSON dataset. Default: src/data/prod-eval-dataset.json */
324
+ datasetPath?: string;
325
+ /** Sampling strategy. Default: reservoir */
326
+ strategy?: SamplingStrategy;
327
+ /** Maximum dataset size (oldest entries pruned when exceeded). Default: 500 */
328
+ maxDatasetSize?: number;
329
+ /** Minimum confidence to accept a trace. Default: 0.7 */
330
+ minConfidence?: number;
331
+ /** Only accept traces where self-verify passed. Default: false */
332
+ requireVerified?: boolean;
333
+ /** Minimum input length in characters. Default: 20 */
334
+ minInputLength?: number;
335
+ }
336
+ declare class TraceCurator {
337
+ private readonly bus?;
338
+ private readonly datasetPath;
339
+ private readonly strategy;
340
+ private readonly maxDatasetSize;
341
+ private readonly minConfidence;
342
+ private readonly requireVerified;
343
+ private readonly minInputLength;
344
+ /** In-memory buffer of traces ingested since last flush */
345
+ private readonly buffer;
346
+ constructor(config?: TraceCuratorConfig);
347
+ /**
348
+ * Ingest a trace record into the buffer.
349
+ * Call flush() to persist accepted cases to disk.
350
+ */
351
+ ingest(trace: TraceRecord): void;
352
+ /**
353
+ * Ingest multiple traces at once.
354
+ */
355
+ ingestBatch(traces: TraceRecord[]): void;
356
+ /**
357
+ * Apply quality filters and accept/reject buffered traces.
358
+ * Returns accepted TraceRecords without writing to disk.
359
+ */
360
+ filter(traces: TraceRecord[]): TraceRecord[];
361
+ /**
362
+ * Flush buffered traces to the ground-truth dataset.
363
+ * Applies sampling strategy, deduplicates by traceId, and respects maxDatasetSize.
364
+ */
365
+ flush(): Promise<CurationResult>;
366
+ /** Return the current buffer without flushing. */
367
+ peekBuffer(): TraceRecord[];
368
+ /** Clear the buffer without writing to disk. */
369
+ clearBuffer(): void;
370
+ /** Return the number of cases currently in the on-disk dataset. */
371
+ getDatasetSize(): number;
372
+ /**
373
+ * Export the curated dataset to a fine-tuning JSONL file.
374
+ *
375
+ * Each line is a JSON object in the messages format expected by the
376
+ * target API. PII-free by construction — upstream LlmTracer masking
377
+ * ensures no PII reaches the dataset.
378
+ *
379
+ * Formats:
380
+ * 'openai' — { "messages": [system?, user, assistant] }
381
+ * 'anthropic' — { "system": "...", "messages": [user, assistant] }
382
+ *
383
+ * @param format Target fine-tuning API format
384
+ * @param outputPath Absolute path to write the .jsonl file
385
+ * @param options.systemPrompt Optional system prompt to include per example
386
+ * @param options.minExamples Minimum dataset size before warning (default 10)
387
+ */
388
+ exportFineTuneDataset(format: 'openai' | 'anthropic', outputPath: string, options?: {
389
+ systemPrompt?: string;
390
+ minExamples?: number;
391
+ }): {
392
+ exported: number;
393
+ skipped: number;
394
+ path: string;
395
+ };
396
+ private toCuratedCase;
397
+ private loadDataset;
398
+ private applyStrategy;
399
+ }
400
+
401
+ /**
402
+ * LLM Tracer — SIGNAL-014
403
+ *
404
+ * Logs every LLM call made by AnthropicEngine to Langfuse for prompt-level
405
+ * observability. Captures model, token usage, latency, input/output, and
406
+ * task metadata. Traces are keyed by task.id, so a multi-step task (execute
407
+ * → self-verify → review) produces one Langfuse trace with multiple
408
+ * generation spans.
409
+ *
410
+ * Self-hosted Langfuse:
411
+ * docker compose -f docker/docker-compose.monitoring.yml up -d
412
+ * → http://localhost:3001 (admin@baselineos.dev / baseline)
413
+ *
414
+ * Configuration (env or constructor):
415
+ * LANGFUSE_PUBLIC_KEY — project public key (default: baseline-public-key)
416
+ * LANGFUSE_SECRET_KEY — project secret key (default: baseline-secret-key)
417
+ * LANGFUSE_BASE_URL — Langfuse server URL (default: http://localhost:3001)
418
+ *
419
+ * The tracer is fail-safe: any Langfuse error is swallowed silently so it
420
+ * can never interrupt task execution.
421
+ *
422
+ * @license Apache-2.0
423
+ */
424
+
425
+ interface LlmTracerConfig {
426
+ /** Langfuse project public key. Default: LANGFUSE_PUBLIC_KEY env or 'baseline-public-key' */
427
+ publicKey?: string;
428
+ /** Langfuse project secret key. Default: LANGFUSE_SECRET_KEY env or 'baseline-secret-key' */
429
+ secretKey?: string;
430
+ /** Langfuse server base URL. Default: LANGFUSE_BASE_URL env or http://localhost:3001 */
431
+ baseUrl?: string;
432
+ /** Flush batch size (default: 15) */
433
+ flushAt?: number;
434
+ /** Flush interval in ms (default: 30_000) */
435
+ flushInterval?: number;
436
+ /**
437
+ * Scan and redact PII from system prompts, input messages, and output
438
+ * before sending to Langfuse. Default: true (SIGNAL-037).
439
+ */
440
+ enablePiiMasking?: boolean;
441
+ /**
442
+ * TraceCurator to ingest sampled traces into the ground-truth dataset.
443
+ * When provided, each logGeneration() call feeds a TraceRecord into the
444
+ * curator buffer for downstream curation and eval dataset expansion (SIGNAL-049).
445
+ */
446
+ curator?: TraceCurator;
447
+ }
448
+ interface LlmGenerationOptions {
449
+ /** Span name within the trace, e.g. 'execute', 'self-verify', 'review', 'agent-loop:3' */
450
+ name: string;
451
+ /** Model identifier, e.g. 'claude-sonnet-4-6' */
452
+ model: string;
453
+ /** System prompt sent to the model */
454
+ systemPrompt?: string;
455
+ /** User + assistant turns passed to the model */
456
+ inputMessages: Array<{
457
+ role: string;
458
+ content: unknown;
459
+ }>;
460
+ /** Raw text output from the model */
461
+ output: string;
462
+ inputTokens: number;
463
+ outputTokens: number;
464
+ startTime: Date;
465
+ endTime: Date;
466
+ /** Arbitrary metadata attached to the generation (taskId, agentId, etc.) */
467
+ metadata?: Record<string, unknown>;
468
+ }
469
+ declare class LlmTracer {
470
+ private readonly langfuse;
471
+ private readonly piiScanner;
472
+ private readonly curator;
473
+ constructor(config?: LlmTracerConfig);
474
+ /** Redact PII from a string if masking is enabled. No-op when scanner is null. */
475
+ private maskPii;
476
+ /**
477
+ * Log a single LLM generation to Langfuse.
478
+ *
479
+ * Multiple calls with the same `traceId` accumulate under one trace,
480
+ * so all generations for a task are grouped together automatically.
481
+ *
482
+ * @param traceId task.id — the top-level trace identifier
483
+ * @param traceName task.title — human-readable trace label in the UI
484
+ * @param options generation details
485
+ */
486
+ logGeneration(traceId: string, traceName: string, options: LlmGenerationOptions): void;
487
+ /** Flush pending spans. Call on graceful shutdown. */
488
+ flush(): Promise<void>;
489
+ /** Shutdown Langfuse client (flushes pending spans). */
490
+ shutdown(): void;
491
+ }
492
+
493
+ export { type CurationResult as C, type GroundTruthCase as G, LlmTracer as L, ModelVersionRegistry as M, ProductionEvalPipeline as P, type SamplingStrategy as S, TraceCurator as T, type VersionEvalResult as V, type VersionComparison as a, type LlmGenerationOptions as b, type LlmTracerConfig as c, type ModelVersionEntry as d, type ModelVersionRegistryConfig as e, type ProdEvalCheck as f, type ProdEvalCheckContext as g, type ProdEvalCheckResult as h, type ProdEvalReport as i, type ProductionEvalPipelineConfig as j, type TraceCuratorConfig as k, type TraceRecord as l };