@tangle-network/agent-runtime 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as _tangle_network_agent_eval from '@tangle-network/agent-eval';
2
- import { FindingSubject, TraceAnalystKindSpec, AnalystFinding } from '@tangle-network/agent-eval';
2
+ import { FindingSubject, TraceAnalystKindSpec, AnalystFinding, TraceStore, RunCompleteHook, FeedbackLabel, FeedbackTrajectoryStore } from '@tangle-network/agent-eval';
3
+ import { R as RuntimeStreamEvent } from './types-afLuHk1G.js';
3
4
  import { I as ImprovementAdapter, K as KnowledgeAdapter, a as RunAnalystLoopResult } from './types-D_MXrmJP.js';
4
5
 
5
6
  /**
@@ -230,16 +231,61 @@ interface JudgeConfig<TRunOutput> {
230
231
  }
231
232
  interface AgentRuntime<TPersona, TRunOutput> {
232
233
  /**
233
- * Invoke the agent against one persona. Returns the structured run
234
- * output the rubric will score.
234
+ * Invoke the agent against one persona. Returns BOTH:
235
+ * - `events`: an `AsyncIterable<RuntimeStreamEvent>` the chat-centric
236
+ * product consumes verbatim (SSE / WebSocket / inline render).
237
+ * **Streaming is mandatory — never collapse this to a single Promise.**
238
+ * The agent's existing `runChatTurn` (or equivalent async generator)
239
+ * plugs in here directly.
240
+ * - `output`: a `Promise<TRunOutput>` resolved AFTER the event stream
241
+ * drains. The eval substrate awaits this for rubric scoring; chat
242
+ * products usually ignore it (they already rendered incrementally).
235
243
  *
236
- * `ctx.emitter` is the substrate-threaded `TraceEmitter` — agents
237
- * SHOULD record their LLM calls / tool calls through it for capture
238
- * integrity. `ctx.deadlineMs` is wall-clock; the runtime SHOULD
239
- * honour it for graceful cancel.
244
+ * Implementation contract:
245
+ * 1. `act` MUST return immediately (synchronous construction of the
246
+ * `events` iterator + the `output` promise).
247
+ * 2. Iterating `events` drives the underlying LLM/tool calls — the
248
+ * caller chooses when to consume.
249
+ * 3. `output` resolves only after the iterator yields its terminal
250
+ * event (typically `task_end`); see `collectAgentRun` helper.
251
+ *
252
+ * `ctx.emitter` is the substrate-threaded `TraceEmitter` — runtimes
253
+ * SHOULD record LLM/tool spans through it for capture integrity.
254
+ * `ctx.deadlineMs` is wall-clock; the runtime SHOULD honour for graceful
255
+ * cancel. `ctx.signal` is the standard abort signal.
240
256
  */
241
- act: (persona: TPersona, ctx: AgentRunContext) => Promise<TRunOutput>;
257
+ act: (persona: TPersona, ctx: AgentRunContext) => AgentRunInvocation<TRunOutput>;
258
+ }
259
+ interface AgentRunInvocation<TRunOutput> {
260
+ /** Live stream of typed runtime events. Consumed by chat UX directly. */
261
+ events: AsyncIterable<RuntimeStreamEvent>;
262
+ /** Final structured output the rubric scores. Resolves after `events` drains. */
263
+ output: Promise<TRunOutput>;
242
264
  }
265
+ /**
266
+ * Stub for agents whose `runtime.act` is not yet wired to the substrate's
267
+ * eval path. Preserves the streaming contract (empty event stream + a
268
+ * rejected `output` promise that tells the caller exactly what to fix).
269
+ *
270
+ * Per-vertical manifests usually start with this stub and replace it with
271
+ * the agent's real streaming runtime (`runChatTurn` or equivalent) once
272
+ * the eval path consumes the manifest end-to-end.
273
+ */
274
+ declare function unimplementedAgentRun<TRunOutput = unknown>(reason?: string): AgentRunInvocation<TRunOutput>;
275
+ /**
276
+ * Drain `act`'s `events` into an array AND await its `output`. Useful for
277
+ * eval / outcome-measurement code paths that don't care about live
278
+ * rendering. The events array is preserved so the substrate can inspect
279
+ * tool calls / readiness / questions retrospectively.
280
+ *
281
+ * IMPORTANT: chat-centric UX MUST NOT call this — it defeats streaming
282
+ * (no incremental render). Use `for await (const ev of invocation.events)`
283
+ * directly in the chat surface.
284
+ */
285
+ declare function collectAgentRun<TRunOutput>(invocation: AgentRunInvocation<TRunOutput>): Promise<{
286
+ events: ReadonlyArray<RuntimeStreamEvent>;
287
+ output: TRunOutput;
288
+ }>;
243
289
  interface AgentRunContext {
244
290
  /** Substrate-managed trace emitter. */
245
291
  emitter: _tangle_network_agent_eval.TraceEmitter;
@@ -534,4 +580,166 @@ declare function measureOutcome<TProposal, TEdit>(result: RunAnalystLoopResult<T
534
580
  outcome: OutcomeMeasurement;
535
581
  }>;
536
582
 
537
- export { type AgentManifest, AgentManifestError, type AgentRubric, type AgentRunContext, type AgentRuntime, type AgentSurfaces, type AnalystConfig, type AutoApplyPolicy, type CreateSurfaceImprovementAdapterOpts, type CreateSurfaceKnowledgeAdapterOpts, type DraftPatchInput, type DraftPatchOutput, type JudgeConfig, type KnowledgeAdapterDeps, type OutcomeMeasurement, type OutcomeMeasurementOpts, type ResolvedSurface, type RubricDimension, type SurfaceImprovementEdit, type SurfaceValidationIssue, createSurfaceImprovementAdapter, createSurfaceKnowledgeAdapter, defineAgent, measureOutcome, renderSurfaceIssues, resolveSubjectPath, validateSurfaces };
583
+ /**
584
+ * `createProductionTraceSink` — the production-side capture primitive
585
+ * every vertical agent's chat handler wires in once.
586
+ *
587
+ * Closes the data-leak: until now, every production chat session emitted
588
+ * zero replayable trace data. Eval runs captured everything; production
589
+ * captured nothing. RL training corpora, research analyses, and the
590
+ * self-improvement loop all ran on synthetic personas. This primitive
591
+ * makes every real user conversation a piece of data the downstream
592
+ * channels (Prime Intellect, GEPA, research, canaries, analyst loop)
593
+ * can consume.
594
+ *
595
+ * Wiring (per agent, ~10 lines in the production chat handler):
596
+ *
597
+ * ```ts
598
+ * const sink = createProductionTraceSink({
599
+ * projectId: 'tax-agent',
600
+ * otlp: { endpoint: env.LANGFUSE_OTEL_ENDPOINT, authHeader: env.LANGFUSE_OTEL_AUTH },
601
+ * runRecordStore: drizzleRunRecordStore(db),
602
+ * feedbackStore: drizzleFeedbackStore(db),
603
+ * })
604
+ *
605
+ * const emitter = new TraceEmitter(sink.traceStore, {
606
+ * onRunComplete: [sink.onRunComplete],
607
+ * })
608
+ * await emitter.startRun({
609
+ * scenarioId: sessionId,
610
+ * projectId: 'tax-agent',
611
+ * layer: 'app-runtime',
612
+ * })
613
+ * // ... existing chat flow, with LLM/tool spans emitted ...
614
+ * await emitter.endRun({ pass, score })
615
+ * // sink.onRunComplete fires automatically:
616
+ * // 1. composes RunRecord, persists to runRecordStore
617
+ * // 2. exports run as OTLP, POSTs to Langfuse
618
+ * // 3. logs failures (does NOT throw — never crashes the chat handler)
619
+ * ```
620
+ *
621
+ * Separately, the agent's feedback endpoint calls `sink.recordFeedback`
622
+ * to write user thumbs-up/thumbs-down (or richer labels) into the
623
+ * FeedbackTrajectory store — the corpus DPO/KTO trainers consume.
624
+ *
625
+ * Cloudflare Worker semantics: the sink buffers spans in memory through
626
+ * the request lifetime (via agent-eval's `InMemoryTraceStore`).
627
+ * `onRunComplete` is awaited (typically inside `ctx.waitUntil`) so the
628
+ * worker stays alive long enough to flush. The OTLP POST is fire-and-
629
+ * forget — failures are logged but never surface to the chat user.
630
+ */
631
+
632
+ interface ProductionTraceSinkOpts {
633
+ /**
634
+ * Stable agent identifier — appears in OTLP `service.name`, every
635
+ * RunRecord row, every FeedbackTrajectory row. MUST match the
636
+ * agent's repo name to keep cross-repo telemetry joinable.
637
+ */
638
+ projectId: string;
639
+ /**
640
+ * OTLP forwarding target. Typically Langfuse's HTTP receiver. Omit to
641
+ * disable OTLP export (RunRecord persistence still works).
642
+ *
643
+ * `authHeader` is the full header value (e.g. `Basic <base64>`); the
644
+ * sink does NOT base64-encode for you.
645
+ */
646
+ otlp?: {
647
+ endpoint: string;
648
+ authHeader?: string;
649
+ /** Optional resource attributes merged into every span batch. */
650
+ resourceAttributes?: Record<string, string | number | boolean>;
651
+ };
652
+ /**
653
+ * Durable RunRecord persistence. Per-vertical agents implement this
654
+ * over their own DB (Drizzle / D1 / Postgres). Optional — when omitted,
655
+ * RunRecords stay in-memory and are lost when the worker isolate ends.
656
+ */
657
+ runRecordStore?: ProductionRunRecordStore;
658
+ /**
659
+ * Durable feedback persistence. Used by `recordFeedback`; agents wire
660
+ * their thumbs-up/down + free-text feedback endpoints to call into the
661
+ * sink, which writes a `FeedbackLabel` into a `FeedbackTrajectory`.
662
+ *
663
+ * Optional — when omitted, `recordFeedback` is a no-op.
664
+ */
665
+ feedbackStore?: FeedbackTrajectoryStore;
666
+ /**
667
+ * Pluggable fetch — defaults to globalThis.fetch. Tests inject a
668
+ * mocked fetch.
669
+ */
670
+ fetch?: typeof fetch;
671
+ /**
672
+ * Pluggable structured logger — defaults to console.warn for failures.
673
+ * The sink NEVER throws on flush failure; it logs and returns.
674
+ */
675
+ log?: (msg: string, fields?: Record<string, unknown>) => void;
676
+ }
677
+ /**
678
+ * Durable per-agent RunRecord persistence. Each vertical implements over
679
+ * its own DB. The sink calls `append` once per `endRun`.
680
+ */
681
+ interface ProductionRunRecordStore {
682
+ append(record: ProductionRunRecord): Promise<void>;
683
+ }
684
+ /**
685
+ * Minimal canonical row the sink composes on `endRun`. Per-agent DB
686
+ * adapters extend with their own fields; the sink only writes what
687
+ * the runtime canonically captures.
688
+ */
689
+ interface ProductionRunRecord {
690
+ runId: string;
691
+ projectId: string;
692
+ scenarioId: string;
693
+ variantId?: string;
694
+ startedAt: string;
695
+ endedAt: string;
696
+ status: 'completed' | 'failed' | 'aborted';
697
+ pass?: boolean;
698
+ score?: number;
699
+ failureClass?: string;
700
+ notes?: string;
701
+ /** Echoed back from `emitter.startRun({ tags })`. */
702
+ tags?: Record<string, string>;
703
+ /** Span row count — useful for diagnostics. */
704
+ spanCount: number;
705
+ }
706
+ interface ProductionTraceSink {
707
+ /**
708
+ * The TraceStore the agent's `TraceEmitter` writes to. In-memory by
709
+ * design: spans accumulate through the chat session, flush at
710
+ * `onRunComplete`. The runtime never reads from this store directly —
711
+ * the sink reads from it during the flush step.
712
+ */
713
+ traceStore: TraceStore;
714
+ /**
715
+ * Hook the agent passes into
716
+ * `new TraceEmitter(store, { onRunComplete: [sink.onRunComplete] })`.
717
+ * Fires once per chat session at `endRun` time. Composes the
718
+ * RunRecord, persists, and ships OTLP. Errors are logged, never thrown.
719
+ */
720
+ onRunComplete: RunCompleteHook;
721
+ /**
722
+ * Append a user feedback label (thumbs-up/down, correction, severity)
723
+ * to the FeedbackTrajectory for a completed run. Creates a minimal
724
+ * trajectory anchored to the run if one doesn't exist; appends the
725
+ * label if it does. No-op when `feedbackStore` is undefined.
726
+ *
727
+ * Returns the trajectory id (existing or freshly created) for the
728
+ * agent's API to link back to the session, or `null` on no-op /
729
+ * error.
730
+ */
731
+ recordFeedback(input: RecordFeedbackInput): Promise<string | null>;
732
+ }
733
+ interface RecordFeedbackInput {
734
+ /** Run id from the original `emitter.startRun`. */
735
+ runId: string;
736
+ /** The user-supplied feedback label. */
737
+ label: FeedbackLabel;
738
+ /** Optional scenario id (mirrors the run's). */
739
+ scenarioId?: string;
740
+ /** Optional pre-existing trajectory id if the agent tracks them separately. */
741
+ trajectoryId?: string;
742
+ }
743
+ declare function createProductionTraceSink(opts: ProductionTraceSinkOpts): ProductionTraceSink;
744
+
745
+ export { type AgentManifest, AgentManifestError, type AgentRubric, type AgentRunContext, type AgentRunInvocation, type AgentRuntime, type AgentSurfaces, type AnalystConfig, type AutoApplyPolicy, type CreateSurfaceImprovementAdapterOpts, type CreateSurfaceKnowledgeAdapterOpts, type DraftPatchInput, type DraftPatchOutput, type JudgeConfig, type KnowledgeAdapterDeps, type OutcomeMeasurement, type OutcomeMeasurementOpts, type ProductionRunRecord, type ProductionRunRecordStore, type ProductionTraceSink, type ProductionTraceSinkOpts, type RecordFeedbackInput, type ResolvedSurface, type RubricDimension, type SurfaceImprovementEdit, type SurfaceValidationIssue, collectAgentRun, createProductionTraceSink, createSurfaceImprovementAdapter, createSurfaceKnowledgeAdapter, defineAgent, measureOutcome, renderSurfaceIssues, resolveSubjectPath, unimplementedAgentRun, validateSurfaces };
package/dist/agent.js CHANGED
@@ -6,49 +6,67 @@ import {
6
6
  import { existsSync } from "fs";
7
7
  import { isAbsolute, join } from "path";
8
8
  function resolveSubjectPath(subject, surfaces, repoRoot) {
9
- const rel = relativePathForSubject(subject, surfaces);
10
- if (rel === null) return null;
11
- const abs = isAbsolute(rel) ? rel : join(repoRoot, rel);
12
- const exists = existsSync(abs);
9
+ const candidates = candidatePathsForSubject(subject, surfaces);
10
+ if (candidates.length === 0) return null;
11
+ for (const rel of candidates) {
12
+ const abs = isAbsolute(rel) ? rel : join(repoRoot, rel);
13
+ if (existsSync(abs)) {
14
+ return { absolutePath: abs, repoRelativePath: rel, exists: true, intent: "edit-existing" };
15
+ }
16
+ }
17
+ const fallback = candidates[0];
18
+ const fallbackAbs = isAbsolute(fallback) ? fallback : join(repoRoot, fallback);
13
19
  return {
14
- absolutePath: abs,
15
- repoRelativePath: rel,
16
- exists,
17
- intent: exists ? "edit-existing" : "create-new"
20
+ absolutePath: fallbackAbs,
21
+ repoRelativePath: fallback,
22
+ exists: false,
23
+ intent: "create-new"
18
24
  };
19
25
  }
20
- function relativePathForSubject(subject, surfaces) {
26
+ function candidatePathsForSubject(subject, surfaces) {
21
27
  switch (subject.kind) {
22
28
  case "knowledge.wiki":
23
29
  case "knowledge.stale":
24
- return join(surfaces.knowledge, `${subject.slug}.md`);
30
+ return [join(surfaces.knowledge, `${subject.slug}.md`)];
25
31
  case "knowledge.claim":
26
- return join(surfaces.knowledge, "claims", `${slugify(subject.topic)}.md`);
32
+ return [join(surfaces.knowledge, "claims", `${slugify(subject.topic)}.md`)];
27
33
  case "knowledge.raw":
28
- return join(surfaces.knowledge, "raw", `${subject.sourceId}.md`);
29
- case "system-prompt":
30
- return join(surfaces.systemPrompt, `${slugify(subject.section)}.md`);
34
+ return [join(surfaces.knowledge, "raw", `${subject.sourceId}.md`)];
35
+ case "system-prompt": {
36
+ const slug = slugify(subject.section);
37
+ return [
38
+ join(surfaces.systemPrompt, `${slug}.md`),
39
+ join(surfaces.systemPrompt, slug, "SKILL.md"),
40
+ join(surfaces.systemPrompt, slug, "index.md")
41
+ ];
42
+ }
31
43
  case "tool-doc":
32
- return subject.aspect ? join(surfaces.tools, subject.tool, `${slugify(subject.aspect)}.md`) : join(surfaces.tools, subject.tool, "README.md");
44
+ if (subject.aspect) {
45
+ return [join(surfaces.tools, subject.tool, `${slugify(subject.aspect)}.md`)];
46
+ }
47
+ return [
48
+ join(surfaces.tools, subject.tool, "README.md"),
49
+ join(surfaces.tools, `${subject.tool}.md`)
50
+ ];
33
51
  case "new-tool":
34
- return join(surfaces.tools, subject.name, "README.md");
52
+ return [join(surfaces.tools, subject.name, "README.md")];
35
53
  case "rag":
36
- if (!surfaces.rag) return null;
37
- return join(surfaces.rag, subject.corpus, `${subject.docId}.md`);
54
+ if (!surfaces.rag) return [];
55
+ return [join(surfaces.rag, subject.corpus, `${subject.docId}.md`)];
38
56
  case "memory":
39
- if (!surfaces.memory) return null;
40
- return join(surfaces.memory, `${slugify(subject.key)}.json`);
57
+ if (!surfaces.memory) return [];
58
+ return [join(surfaces.memory, `${slugify(subject.key)}.json`)];
41
59
  case "scaffolding":
42
- if (!surfaces.scaffolding) return null;
43
- return join(surfaces.scaffolding, `${slugify(subject.concern)}.md`);
60
+ if (!surfaces.scaffolding) return [];
61
+ return [join(surfaces.scaffolding, `${slugify(subject.concern)}.md`)];
44
62
  case "output-schema":
45
- if (!surfaces.outputSchema) return null;
46
- return surfaces.outputSchema;
63
+ if (!surfaces.outputSchema) return [];
64
+ return [surfaces.outputSchema];
47
65
  case "websearch.outdated":
48
66
  case "prior-run-summary":
49
- return null;
67
+ return [];
50
68
  case "cluster":
51
- return null;
69
+ return [];
52
70
  }
53
71
  }
54
72
  function slugify(s) {
@@ -63,11 +81,7 @@ function validateSurfaces(surfaces, repoRoot) {
63
81
  "knowledge"
64
82
  ];
65
83
  const fileSurfaces = ["rubric"];
66
- const optionalDirSurfaces = [
67
- "scaffolding",
68
- "memory",
69
- "rag"
70
- ];
84
+ const optionalDirSurfaces = ["scaffolding", "memory", "rag"];
71
85
  const optionalFileSurfaces = ["outputSchema"];
72
86
  for (const key of dirSurfaces) {
73
87
  const p = surfaces[key];
@@ -119,6 +133,19 @@ function renderSurfaceIssues(issues, repoRoot) {
119
133
  }
120
134
 
121
135
  // src/agent/define-agent.ts
136
+ function unimplementedAgentRun(reason = "AgentRuntime.act is not yet wired for this manifest") {
137
+ return {
138
+ events: (async function* empty() {
139
+ })(),
140
+ output: Promise.reject(new Error(reason))
141
+ };
142
+ }
143
+ async function collectAgentRun(invocation) {
144
+ const events = [];
145
+ for await (const ev of invocation.events) events.push(ev);
146
+ const output = await invocation.output;
147
+ return { events, output };
148
+ }
122
149
  var AgentManifestError = class extends Error {
123
150
  constructor(message, agentId, issues = []) {
124
151
  super(message);
@@ -155,8 +182,8 @@ function defineAgent(manifest) {
155
182
  }
156
183
 
157
184
  // src/agent/improvement-adapter.ts
158
- import { readFileSync } from "fs";
159
185
  import { spawnSync } from "child_process";
186
+ import { readFileSync } from "fs";
160
187
  import {
161
188
  parseFindingSubject
162
189
  } from "@tangle-network/agent-eval";
@@ -254,7 +281,9 @@ function createSurfaceImprovementAdapter(opts) {
254
281
  return { applied, warnings };
255
282
  }
256
283
  if (mode === "open-pr" && !opts.ghRepo) {
257
- warnings.push("createSurfaceImprovementAdapter: mode=open-pr requires `ghRepo`; falling back to no-op");
284
+ warnings.push(
285
+ "createSurfaceImprovementAdapter: mode=open-pr requires `ghRepo`; falling back to no-op"
286
+ );
258
287
  return { applied, warnings };
259
288
  }
260
289
  for (const edit of edits) {
@@ -306,7 +335,9 @@ function openPullRequest(paths, edits, repoRoot, ghRepo, baseBranch) {
306
335
  `Automated analyst-loop edits \u2014 review carefully before merge.`,
307
336
  "",
308
337
  `Source findings:`,
309
- ...edits.map((e) => ` - ${e.sourceFindingId} (confidence ${e.confidence.toFixed(2)}, severity ${e.severity})`),
338
+ ...edits.map(
339
+ (e) => ` - ${e.sourceFindingId} (confidence ${e.confidence.toFixed(2)}, severity ${e.severity})`
340
+ ),
310
341
  "",
311
342
  "Rationales:",
312
343
  ...edits.map((e) => `
@@ -462,14 +493,124 @@ function meanComposite(rows) {
462
493
  if (rows.length === 0) return 0;
463
494
  return rows.reduce((acc, r) => acc + r.composite, 0) / rows.length;
464
495
  }
496
+
497
+ // src/agent/production-trace-sink.ts
498
+ import {
499
+ exportRunAsOtlp,
500
+ InMemoryTraceStore
501
+ } from "@tangle-network/agent-eval";
502
+ function createProductionTraceSink(opts) {
503
+ const log = opts.log ?? defaultLog;
504
+ const fetchImpl = opts.fetch ?? globalThis.fetch;
505
+ const traceStore = new InMemoryTraceStore();
506
+ const onRunComplete = async (ctx) => {
507
+ if (opts.runRecordStore) {
508
+ try {
509
+ const record = await composeRunRecord(traceStore, ctx, opts.projectId);
510
+ await opts.runRecordStore.append(record);
511
+ } catch (err) {
512
+ log("runRecordStore.append failed", {
513
+ runId: ctx.runId,
514
+ error: err instanceof Error ? err.message : String(err)
515
+ });
516
+ }
517
+ }
518
+ if (opts.otlp) {
519
+ try {
520
+ const resourceAttrs = {
521
+ "service.name": opts.projectId,
522
+ ...opts.otlp.resourceAttributes ?? {}
523
+ };
524
+ const otlpPayload = await exportRunAsOtlp(traceStore, ctx.runId, resourceAttrs);
525
+ const headers = { "content-type": "application/json" };
526
+ if (opts.otlp.authHeader) headers.authorization = opts.otlp.authHeader;
527
+ const res = await fetchImpl(opts.otlp.endpoint, {
528
+ method: "POST",
529
+ headers,
530
+ body: JSON.stringify(otlpPayload)
531
+ });
532
+ if (!res.ok) {
533
+ log("OTLP POST non-2xx", {
534
+ runId: ctx.runId,
535
+ status: res.status,
536
+ endpoint: opts.otlp.endpoint
537
+ });
538
+ }
539
+ } catch (err) {
540
+ log("OTLP POST threw", {
541
+ runId: ctx.runId,
542
+ error: err instanceof Error ? err.message : String(err),
543
+ endpoint: opts.otlp.endpoint
544
+ });
545
+ }
546
+ }
547
+ };
548
+ const recordFeedback = async (input) => {
549
+ if (!opts.feedbackStore) return null;
550
+ const trajectoryId = input.trajectoryId ?? `traj-${input.runId}`;
551
+ try {
552
+ const existing = await opts.feedbackStore.get(trajectoryId);
553
+ if (existing) {
554
+ await opts.feedbackStore.appendLabel(trajectoryId, input.label);
555
+ return trajectoryId;
556
+ }
557
+ await opts.feedbackStore.save({
558
+ id: trajectoryId,
559
+ projectId: opts.projectId,
560
+ scenarioId: input.scenarioId ?? input.runId,
561
+ task: { intent: "chat", context: { runId: input.runId } },
562
+ attempts: [],
563
+ labels: [input.label],
564
+ createdAt: (/* @__PURE__ */ new Date()).toISOString()
565
+ });
566
+ return trajectoryId;
567
+ } catch (err) {
568
+ log("feedbackStore write failed", {
569
+ runId: input.runId,
570
+ error: err instanceof Error ? err.message : String(err)
571
+ });
572
+ return null;
573
+ }
574
+ };
575
+ return { traceStore, onRunComplete, recordFeedback };
576
+ }
577
+ async function composeRunRecord(store, ctx, projectId) {
578
+ const run = await store.getRun(ctx.runId);
579
+ const spans = await store.spans({ runId: ctx.runId });
580
+ const now = Date.now();
581
+ const startedAtMs = run?.startedAt ?? now;
582
+ const endedAtMs = run?.endedAt ?? now;
583
+ return {
584
+ runId: ctx.runId,
585
+ projectId,
586
+ scenarioId: run?.scenarioId ?? ctx.runId,
587
+ variantId: run?.variantId,
588
+ startedAt: new Date(startedAtMs).toISOString(),
589
+ endedAt: new Date(endedAtMs).toISOString(),
590
+ status: ctx.status,
591
+ pass: ctx.outcome?.pass,
592
+ score: ctx.outcome?.score,
593
+ failureClass: ctx.outcome?.failureClass,
594
+ notes: ctx.outcome?.notes,
595
+ tags: run?.tags,
596
+ spanCount: spans.length
597
+ };
598
+ }
599
+ function defaultLog(msg, fields) {
600
+ if (fields) console.warn(`[production-trace-sink] ${msg}`, fields);
601
+ else console.warn(`[production-trace-sink] ${msg}`);
602
+ }
465
603
  export {
466
604
  AgentManifestError,
605
+ collectAgentRun,
606
+ createProductionTraceSink,
467
607
  createSurfaceImprovementAdapter,
468
608
  createSurfaceKnowledgeAdapter,
469
609
  defineAgent,
470
610
  measureOutcome,
471
611
  renderSurfaceIssues,
472
612
  resolveSubjectPath,
613
+ unimplementedAgentRun,
473
614
  validateSurfaces
474
615
  };
475
616
  //# sourceMappingURL=agent.js.map