@tangle-network/agent-runtime 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +217 -9
- package/dist/agent.js +175 -34
- package/dist/agent.js.map +1 -1
- package/dist/index.d.ts +4 -389
- package/dist/types-afLuHk1G.d.ts +390 -0
- package/package.json +1 -1
package/dist/agent.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as _tangle_network_agent_eval from '@tangle-network/agent-eval';
|
|
2
|
-
import { FindingSubject, TraceAnalystKindSpec, AnalystFinding } from '@tangle-network/agent-eval';
|
|
2
|
+
import { FindingSubject, TraceAnalystKindSpec, AnalystFinding, TraceStore, RunCompleteHook, FeedbackLabel, FeedbackTrajectoryStore } from '@tangle-network/agent-eval';
|
|
3
|
+
import { R as RuntimeStreamEvent } from './types-afLuHk1G.js';
|
|
3
4
|
import { I as ImprovementAdapter, K as KnowledgeAdapter, a as RunAnalystLoopResult } from './types-D_MXrmJP.js';
|
|
4
5
|
|
|
5
6
|
/**
|
|
@@ -230,16 +231,61 @@ interface JudgeConfig<TRunOutput> {
|
|
|
230
231
|
}
|
|
231
232
|
interface AgentRuntime<TPersona, TRunOutput> {
|
|
232
233
|
/**
|
|
233
|
-
* Invoke the agent against one persona. Returns
|
|
234
|
-
*
|
|
234
|
+
* Invoke the agent against one persona. Returns BOTH:
|
|
235
|
+
* - `events`: an `AsyncIterable<RuntimeStreamEvent>` the chat-centric
|
|
236
|
+
* product consumes verbatim (SSE / WebSocket / inline render).
|
|
237
|
+
* **Streaming is mandatory — never collapse this to a single Promise.**
|
|
238
|
+
* The agent's existing `runChatTurn` (or equivalent async generator)
|
|
239
|
+
* plugs in here directly.
|
|
240
|
+
* - `output`: a `Promise<TRunOutput>` resolved AFTER the event stream
|
|
241
|
+
* drains. The eval substrate awaits this for rubric scoring; chat
|
|
242
|
+
* products usually ignore it (they already rendered incrementally).
|
|
235
243
|
*
|
|
236
|
-
*
|
|
237
|
-
*
|
|
238
|
-
*
|
|
239
|
-
*
|
|
244
|
+
* Implementation contract:
|
|
245
|
+
* 1. `act` MUST return immediately (synchronous construction of the
|
|
246
|
+
* `events` iterator + the `output` promise).
|
|
247
|
+
* 2. Iterating `events` drives the underlying LLM/tool calls — the
|
|
248
|
+
* caller chooses when to consume.
|
|
249
|
+
* 3. `output` resolves only after the iterator yields its terminal
|
|
250
|
+
* event (typically `task_end`); see `collectAgentRun` helper.
|
|
251
|
+
*
|
|
252
|
+
* `ctx.emitter` is the substrate-threaded `TraceEmitter` — runtimes
|
|
253
|
+
* SHOULD record LLM/tool spans through it for capture integrity.
|
|
254
|
+
* `ctx.deadlineMs` is wall-clock; the runtime SHOULD honour for graceful
|
|
255
|
+
* cancel. `ctx.signal` is the standard abort signal.
|
|
240
256
|
*/
|
|
241
|
-
act: (persona: TPersona, ctx: AgentRunContext) =>
|
|
257
|
+
act: (persona: TPersona, ctx: AgentRunContext) => AgentRunInvocation<TRunOutput>;
|
|
258
|
+
}
|
|
259
|
+
interface AgentRunInvocation<TRunOutput> {
|
|
260
|
+
/** Live stream of typed runtime events. Consumed by chat UX directly. */
|
|
261
|
+
events: AsyncIterable<RuntimeStreamEvent>;
|
|
262
|
+
/** Final structured output the rubric scores. Resolves after `events` drains. */
|
|
263
|
+
output: Promise<TRunOutput>;
|
|
242
264
|
}
|
|
265
|
+
/**
|
|
266
|
+
* Stub for agents whose `runtime.act` is not yet wired to the substrate's
|
|
267
|
+
* eval path. Preserves the streaming contract (empty event stream + a
|
|
268
|
+
* rejected `output` promise that tells the caller exactly what to fix).
|
|
269
|
+
*
|
|
270
|
+
* Per-vertical manifests usually start with this stub and replace it with
|
|
271
|
+
* the agent's real streaming runtime (`runChatTurn` or equivalent) once
|
|
272
|
+
* the eval path consumes the manifest end-to-end.
|
|
273
|
+
*/
|
|
274
|
+
declare function unimplementedAgentRun<TRunOutput = unknown>(reason?: string): AgentRunInvocation<TRunOutput>;
|
|
275
|
+
/**
|
|
276
|
+
* Drain `act`'s `events` into an array AND await its `output`. Useful for
|
|
277
|
+
* eval / outcome-measurement code paths that don't care about live
|
|
278
|
+
* rendering. The events array is preserved so the substrate can inspect
|
|
279
|
+
* tool calls / readiness / questions retrospectively.
|
|
280
|
+
*
|
|
281
|
+
* IMPORTANT: chat-centric UX MUST NOT call this — it defeats streaming
|
|
282
|
+
* (no incremental render). Use `for await (const ev of invocation.events)`
|
|
283
|
+
* directly in the chat surface.
|
|
284
|
+
*/
|
|
285
|
+
declare function collectAgentRun<TRunOutput>(invocation: AgentRunInvocation<TRunOutput>): Promise<{
|
|
286
|
+
events: ReadonlyArray<RuntimeStreamEvent>;
|
|
287
|
+
output: TRunOutput;
|
|
288
|
+
}>;
|
|
243
289
|
interface AgentRunContext {
|
|
244
290
|
/** Substrate-managed trace emitter. */
|
|
245
291
|
emitter: _tangle_network_agent_eval.TraceEmitter;
|
|
@@ -534,4 +580,166 @@ declare function measureOutcome<TProposal, TEdit>(result: RunAnalystLoopResult<T
|
|
|
534
580
|
outcome: OutcomeMeasurement;
|
|
535
581
|
}>;
|
|
536
582
|
|
|
537
|
-
|
|
583
|
+
/**
|
|
584
|
+
* `createProductionTraceSink` — the production-side capture primitive
|
|
585
|
+
* every vertical agent's chat handler wires in once.
|
|
586
|
+
*
|
|
587
|
+
* Closes the data-leak: until now, every production chat session emitted
|
|
588
|
+
* zero replayable trace data. Eval runs captured everything; production
|
|
589
|
+
* captured nothing. RL training corpora, research analyses, and the
|
|
590
|
+
* self-improvement loop all ran on synthetic personas. This primitive
|
|
591
|
+
* makes every real user conversation a piece of data the downstream
|
|
592
|
+
* channels (Prime Intellect, GEPA, research, canaries, analyst loop)
|
|
593
|
+
* can consume.
|
|
594
|
+
*
|
|
595
|
+
* Wiring (per agent, ~10 lines in the production chat handler):
|
|
596
|
+
*
|
|
597
|
+
* ```ts
|
|
598
|
+
* const sink = createProductionTraceSink({
|
|
599
|
+
* projectId: 'tax-agent',
|
|
600
|
+
* otlp: { endpoint: env.LANGFUSE_OTEL_ENDPOINT, authHeader: env.LANGFUSE_OTEL_AUTH },
|
|
601
|
+
* runRecordStore: drizzleRunRecordStore(db),
|
|
602
|
+
* feedbackStore: drizzleFeedbackStore(db),
|
|
603
|
+
* })
|
|
604
|
+
*
|
|
605
|
+
* const emitter = new TraceEmitter(sink.traceStore, {
|
|
606
|
+
* onRunComplete: [sink.onRunComplete],
|
|
607
|
+
* })
|
|
608
|
+
* await emitter.startRun({
|
|
609
|
+
* scenarioId: sessionId,
|
|
610
|
+
* projectId: 'tax-agent',
|
|
611
|
+
* layer: 'app-runtime',
|
|
612
|
+
* })
|
|
613
|
+
* // ... existing chat flow, with LLM/tool spans emitted ...
|
|
614
|
+
* await emitter.endRun({ pass, score })
|
|
615
|
+
* // sink.onRunComplete fires automatically:
|
|
616
|
+
* // 1. composes RunRecord, persists to runRecordStore
|
|
617
|
+
* // 2. exports run as OTLP, POSTs to Langfuse
|
|
618
|
+
* // 3. logs failures (does NOT throw — never crashes the chat handler)
|
|
619
|
+
* ```
|
|
620
|
+
*
|
|
621
|
+
* Separately, the agent's feedback endpoint calls `sink.recordFeedback`
|
|
622
|
+
* to write user thumbs-up/thumbs-down (or richer labels) into the
|
|
623
|
+
* FeedbackTrajectory store — the corpus DPO/KTO trainers consume.
|
|
624
|
+
*
|
|
625
|
+
* Cloudflare Worker semantics: the sink buffers spans in memory through
|
|
626
|
+
* the request lifetime (via agent-eval's `InMemoryTraceStore`).
|
|
627
|
+
* `onRunComplete` is awaited (typically inside `ctx.waitUntil`) so the
|
|
628
|
+
* worker stays alive long enough to flush. The OTLP POST is fire-and-
|
|
629
|
+
* forget — failures are logged but never surface to the chat user.
|
|
630
|
+
*/
|
|
631
|
+
|
|
632
|
+
interface ProductionTraceSinkOpts {
|
|
633
|
+
/**
|
|
634
|
+
* Stable agent identifier — appears in OTLP `service.name`, every
|
|
635
|
+
* RunRecord row, every FeedbackTrajectory row. MUST match the
|
|
636
|
+
* agent's repo name to keep cross-repo telemetry joinable.
|
|
637
|
+
*/
|
|
638
|
+
projectId: string;
|
|
639
|
+
/**
|
|
640
|
+
* OTLP forwarding target. Typically Langfuse's HTTP receiver. Omit to
|
|
641
|
+
* disable OTLP export (RunRecord persistence still works).
|
|
642
|
+
*
|
|
643
|
+
* `authHeader` is the full header value (e.g. `Basic <base64>`); the
|
|
644
|
+
* sink does NOT base64-encode for you.
|
|
645
|
+
*/
|
|
646
|
+
otlp?: {
|
|
647
|
+
endpoint: string;
|
|
648
|
+
authHeader?: string;
|
|
649
|
+
/** Optional resource attributes merged into every span batch. */
|
|
650
|
+
resourceAttributes?: Record<string, string | number | boolean>;
|
|
651
|
+
};
|
|
652
|
+
/**
|
|
653
|
+
* Durable RunRecord persistence. Per-vertical agents implement this
|
|
654
|
+
* over their own DB (Drizzle / D1 / Postgres). Optional — when omitted,
|
|
655
|
+
* RunRecords stay in-memory and are lost when the worker isolate ends.
|
|
656
|
+
*/
|
|
657
|
+
runRecordStore?: ProductionRunRecordStore;
|
|
658
|
+
/**
|
|
659
|
+
* Durable feedback persistence. Used by `recordFeedback`; agents wire
|
|
660
|
+
* their thumbs-up/down + free-text feedback endpoints to call into the
|
|
661
|
+
* sink, which writes a `FeedbackLabel` into a `FeedbackTrajectory`.
|
|
662
|
+
*
|
|
663
|
+
* Optional — when omitted, `recordFeedback` is a no-op.
|
|
664
|
+
*/
|
|
665
|
+
feedbackStore?: FeedbackTrajectoryStore;
|
|
666
|
+
/**
|
|
667
|
+
* Pluggable fetch — defaults to globalThis.fetch. Tests inject a
|
|
668
|
+
* mocked fetch.
|
|
669
|
+
*/
|
|
670
|
+
fetch?: typeof fetch;
|
|
671
|
+
/**
|
|
672
|
+
* Pluggable structured logger — defaults to console.warn for failures.
|
|
673
|
+
* The sink NEVER throws on flush failure; it logs and returns.
|
|
674
|
+
*/
|
|
675
|
+
log?: (msg: string, fields?: Record<string, unknown>) => void;
|
|
676
|
+
}
|
|
677
|
+
/**
|
|
678
|
+
* Durable per-agent RunRecord persistence. Each vertical implements over
|
|
679
|
+
* its own DB. The sink calls `append` once per `endRun`.
|
|
680
|
+
*/
|
|
681
|
+
interface ProductionRunRecordStore {
|
|
682
|
+
append(record: ProductionRunRecord): Promise<void>;
|
|
683
|
+
}
|
|
684
|
+
/**
|
|
685
|
+
* Minimal canonical row the sink composes on `endRun`. Per-agent DB
|
|
686
|
+
* adapters extend with their own fields; the sink only writes what
|
|
687
|
+
* the runtime canonically captures.
|
|
688
|
+
*/
|
|
689
|
+
interface ProductionRunRecord {
|
|
690
|
+
runId: string;
|
|
691
|
+
projectId: string;
|
|
692
|
+
scenarioId: string;
|
|
693
|
+
variantId?: string;
|
|
694
|
+
startedAt: string;
|
|
695
|
+
endedAt: string;
|
|
696
|
+
status: 'completed' | 'failed' | 'aborted';
|
|
697
|
+
pass?: boolean;
|
|
698
|
+
score?: number;
|
|
699
|
+
failureClass?: string;
|
|
700
|
+
notes?: string;
|
|
701
|
+
/** Echoed back from `emitter.startRun({ tags })`. */
|
|
702
|
+
tags?: Record<string, string>;
|
|
703
|
+
/** Span row count — useful for diagnostics. */
|
|
704
|
+
spanCount: number;
|
|
705
|
+
}
|
|
706
|
+
interface ProductionTraceSink {
|
|
707
|
+
/**
|
|
708
|
+
* The TraceStore the agent's `TraceEmitter` writes to. In-memory by
|
|
709
|
+
* design: spans accumulate through the chat session, flush at
|
|
710
|
+
* `onRunComplete`. The runtime never reads from this store directly —
|
|
711
|
+
* the sink reads from it during the flush step.
|
|
712
|
+
*/
|
|
713
|
+
traceStore: TraceStore;
|
|
714
|
+
/**
|
|
715
|
+
* Hook the agent passes into
|
|
716
|
+
* `new TraceEmitter(store, { onRunComplete: [sink.onRunComplete] })`.
|
|
717
|
+
* Fires once per chat session at `endRun` time. Composes the
|
|
718
|
+
* RunRecord, persists, and ships OTLP. Errors are logged, never thrown.
|
|
719
|
+
*/
|
|
720
|
+
onRunComplete: RunCompleteHook;
|
|
721
|
+
/**
|
|
722
|
+
* Append a user feedback label (thumbs-up/down, correction, severity)
|
|
723
|
+
* to the FeedbackTrajectory for a completed run. Creates a minimal
|
|
724
|
+
* trajectory anchored to the run if one doesn't exist; appends the
|
|
725
|
+
* label if it does. No-op when `feedbackStore` is undefined.
|
|
726
|
+
*
|
|
727
|
+
* Returns the trajectory id (existing or freshly created) for the
|
|
728
|
+
* agent's API to link back to the session, or `null` on no-op /
|
|
729
|
+
* error.
|
|
730
|
+
*/
|
|
731
|
+
recordFeedback(input: RecordFeedbackInput): Promise<string | null>;
|
|
732
|
+
}
|
|
733
|
+
interface RecordFeedbackInput {
|
|
734
|
+
/** Run id from the original `emitter.startRun`. */
|
|
735
|
+
runId: string;
|
|
736
|
+
/** The user-supplied feedback label. */
|
|
737
|
+
label: FeedbackLabel;
|
|
738
|
+
/** Optional scenario id (mirrors the run's). */
|
|
739
|
+
scenarioId?: string;
|
|
740
|
+
/** Optional pre-existing trajectory id if the agent tracks them separately. */
|
|
741
|
+
trajectoryId?: string;
|
|
742
|
+
}
|
|
743
|
+
declare function createProductionTraceSink(opts: ProductionTraceSinkOpts): ProductionTraceSink;
|
|
744
|
+
|
|
745
|
+
export { type AgentManifest, AgentManifestError, type AgentRubric, type AgentRunContext, type AgentRunInvocation, type AgentRuntime, type AgentSurfaces, type AnalystConfig, type AutoApplyPolicy, type CreateSurfaceImprovementAdapterOpts, type CreateSurfaceKnowledgeAdapterOpts, type DraftPatchInput, type DraftPatchOutput, type JudgeConfig, type KnowledgeAdapterDeps, type OutcomeMeasurement, type OutcomeMeasurementOpts, type ProductionRunRecord, type ProductionRunRecordStore, type ProductionTraceSink, type ProductionTraceSinkOpts, type RecordFeedbackInput, type ResolvedSurface, type RubricDimension, type SurfaceImprovementEdit, type SurfaceValidationIssue, collectAgentRun, createProductionTraceSink, createSurfaceImprovementAdapter, createSurfaceKnowledgeAdapter, defineAgent, measureOutcome, renderSurfaceIssues, resolveSubjectPath, unimplementedAgentRun, validateSurfaces };
|
package/dist/agent.js
CHANGED
|
@@ -6,49 +6,67 @@ import {
|
|
|
6
6
|
import { existsSync } from "fs";
|
|
7
7
|
import { isAbsolute, join } from "path";
|
|
8
8
|
function resolveSubjectPath(subject, surfaces, repoRoot) {
|
|
9
|
-
const
|
|
10
|
-
if (
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
const candidates = candidatePathsForSubject(subject, surfaces);
|
|
10
|
+
if (candidates.length === 0) return null;
|
|
11
|
+
for (const rel of candidates) {
|
|
12
|
+
const abs = isAbsolute(rel) ? rel : join(repoRoot, rel);
|
|
13
|
+
if (existsSync(abs)) {
|
|
14
|
+
return { absolutePath: abs, repoRelativePath: rel, exists: true, intent: "edit-existing" };
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
const fallback = candidates[0];
|
|
18
|
+
const fallbackAbs = isAbsolute(fallback) ? fallback : join(repoRoot, fallback);
|
|
13
19
|
return {
|
|
14
|
-
absolutePath:
|
|
15
|
-
repoRelativePath:
|
|
16
|
-
exists,
|
|
17
|
-
intent:
|
|
20
|
+
absolutePath: fallbackAbs,
|
|
21
|
+
repoRelativePath: fallback,
|
|
22
|
+
exists: false,
|
|
23
|
+
intent: "create-new"
|
|
18
24
|
};
|
|
19
25
|
}
|
|
20
|
-
function
|
|
26
|
+
function candidatePathsForSubject(subject, surfaces) {
|
|
21
27
|
switch (subject.kind) {
|
|
22
28
|
case "knowledge.wiki":
|
|
23
29
|
case "knowledge.stale":
|
|
24
|
-
return join(surfaces.knowledge, `${subject.slug}.md`);
|
|
30
|
+
return [join(surfaces.knowledge, `${subject.slug}.md`)];
|
|
25
31
|
case "knowledge.claim":
|
|
26
|
-
return join(surfaces.knowledge, "claims", `${slugify(subject.topic)}.md`);
|
|
32
|
+
return [join(surfaces.knowledge, "claims", `${slugify(subject.topic)}.md`)];
|
|
27
33
|
case "knowledge.raw":
|
|
28
|
-
return join(surfaces.knowledge, "raw", `${subject.sourceId}.md`);
|
|
29
|
-
case "system-prompt":
|
|
30
|
-
|
|
34
|
+
return [join(surfaces.knowledge, "raw", `${subject.sourceId}.md`)];
|
|
35
|
+
case "system-prompt": {
|
|
36
|
+
const slug = slugify(subject.section);
|
|
37
|
+
return [
|
|
38
|
+
join(surfaces.systemPrompt, `${slug}.md`),
|
|
39
|
+
join(surfaces.systemPrompt, slug, "SKILL.md"),
|
|
40
|
+
join(surfaces.systemPrompt, slug, "index.md")
|
|
41
|
+
];
|
|
42
|
+
}
|
|
31
43
|
case "tool-doc":
|
|
32
|
-
|
|
44
|
+
if (subject.aspect) {
|
|
45
|
+
return [join(surfaces.tools, subject.tool, `${slugify(subject.aspect)}.md`)];
|
|
46
|
+
}
|
|
47
|
+
return [
|
|
48
|
+
join(surfaces.tools, subject.tool, "README.md"),
|
|
49
|
+
join(surfaces.tools, `${subject.tool}.md`)
|
|
50
|
+
];
|
|
33
51
|
case "new-tool":
|
|
34
|
-
return join(surfaces.tools, subject.name, "README.md");
|
|
52
|
+
return [join(surfaces.tools, subject.name, "README.md")];
|
|
35
53
|
case "rag":
|
|
36
|
-
if (!surfaces.rag) return
|
|
37
|
-
return join(surfaces.rag, subject.corpus, `${subject.docId}.md`);
|
|
54
|
+
if (!surfaces.rag) return [];
|
|
55
|
+
return [join(surfaces.rag, subject.corpus, `${subject.docId}.md`)];
|
|
38
56
|
case "memory":
|
|
39
|
-
if (!surfaces.memory) return
|
|
40
|
-
return join(surfaces.memory, `${slugify(subject.key)}.json`);
|
|
57
|
+
if (!surfaces.memory) return [];
|
|
58
|
+
return [join(surfaces.memory, `${slugify(subject.key)}.json`)];
|
|
41
59
|
case "scaffolding":
|
|
42
|
-
if (!surfaces.scaffolding) return
|
|
43
|
-
return join(surfaces.scaffolding, `${slugify(subject.concern)}.md`);
|
|
60
|
+
if (!surfaces.scaffolding) return [];
|
|
61
|
+
return [join(surfaces.scaffolding, `${slugify(subject.concern)}.md`)];
|
|
44
62
|
case "output-schema":
|
|
45
|
-
if (!surfaces.outputSchema) return
|
|
46
|
-
return surfaces.outputSchema;
|
|
63
|
+
if (!surfaces.outputSchema) return [];
|
|
64
|
+
return [surfaces.outputSchema];
|
|
47
65
|
case "websearch.outdated":
|
|
48
66
|
case "prior-run-summary":
|
|
49
|
-
return
|
|
67
|
+
return [];
|
|
50
68
|
case "cluster":
|
|
51
|
-
return
|
|
69
|
+
return [];
|
|
52
70
|
}
|
|
53
71
|
}
|
|
54
72
|
function slugify(s) {
|
|
@@ -63,11 +81,7 @@ function validateSurfaces(surfaces, repoRoot) {
|
|
|
63
81
|
"knowledge"
|
|
64
82
|
];
|
|
65
83
|
const fileSurfaces = ["rubric"];
|
|
66
|
-
const optionalDirSurfaces = [
|
|
67
|
-
"scaffolding",
|
|
68
|
-
"memory",
|
|
69
|
-
"rag"
|
|
70
|
-
];
|
|
84
|
+
const optionalDirSurfaces = ["scaffolding", "memory", "rag"];
|
|
71
85
|
const optionalFileSurfaces = ["outputSchema"];
|
|
72
86
|
for (const key of dirSurfaces) {
|
|
73
87
|
const p = surfaces[key];
|
|
@@ -119,6 +133,19 @@ function renderSurfaceIssues(issues, repoRoot) {
|
|
|
119
133
|
}
|
|
120
134
|
|
|
121
135
|
// src/agent/define-agent.ts
|
|
136
|
+
function unimplementedAgentRun(reason = "AgentRuntime.act is not yet wired for this manifest") {
|
|
137
|
+
return {
|
|
138
|
+
events: (async function* empty() {
|
|
139
|
+
})(),
|
|
140
|
+
output: Promise.reject(new Error(reason))
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
async function collectAgentRun(invocation) {
|
|
144
|
+
const events = [];
|
|
145
|
+
for await (const ev of invocation.events) events.push(ev);
|
|
146
|
+
const output = await invocation.output;
|
|
147
|
+
return { events, output };
|
|
148
|
+
}
|
|
122
149
|
var AgentManifestError = class extends Error {
|
|
123
150
|
constructor(message, agentId, issues = []) {
|
|
124
151
|
super(message);
|
|
@@ -155,8 +182,8 @@ function defineAgent(manifest) {
|
|
|
155
182
|
}
|
|
156
183
|
|
|
157
184
|
// src/agent/improvement-adapter.ts
|
|
158
|
-
import { readFileSync } from "fs";
|
|
159
185
|
import { spawnSync } from "child_process";
|
|
186
|
+
import { readFileSync } from "fs";
|
|
160
187
|
import {
|
|
161
188
|
parseFindingSubject
|
|
162
189
|
} from "@tangle-network/agent-eval";
|
|
@@ -254,7 +281,9 @@ function createSurfaceImprovementAdapter(opts) {
|
|
|
254
281
|
return { applied, warnings };
|
|
255
282
|
}
|
|
256
283
|
if (mode === "open-pr" && !opts.ghRepo) {
|
|
257
|
-
warnings.push(
|
|
284
|
+
warnings.push(
|
|
285
|
+
"createSurfaceImprovementAdapter: mode=open-pr requires `ghRepo`; falling back to no-op"
|
|
286
|
+
);
|
|
258
287
|
return { applied, warnings };
|
|
259
288
|
}
|
|
260
289
|
for (const edit of edits) {
|
|
@@ -306,7 +335,9 @@ function openPullRequest(paths, edits, repoRoot, ghRepo, baseBranch) {
|
|
|
306
335
|
`Automated analyst-loop edits \u2014 review carefully before merge.`,
|
|
307
336
|
"",
|
|
308
337
|
`Source findings:`,
|
|
309
|
-
...edits.map(
|
|
338
|
+
...edits.map(
|
|
339
|
+
(e) => ` - ${e.sourceFindingId} (confidence ${e.confidence.toFixed(2)}, severity ${e.severity})`
|
|
340
|
+
),
|
|
310
341
|
"",
|
|
311
342
|
"Rationales:",
|
|
312
343
|
...edits.map((e) => `
|
|
@@ -462,14 +493,124 @@ function meanComposite(rows) {
|
|
|
462
493
|
if (rows.length === 0) return 0;
|
|
463
494
|
return rows.reduce((acc, r) => acc + r.composite, 0) / rows.length;
|
|
464
495
|
}
|
|
496
|
+
|
|
497
|
+
// src/agent/production-trace-sink.ts
|
|
498
|
+
import {
|
|
499
|
+
exportRunAsOtlp,
|
|
500
|
+
InMemoryTraceStore
|
|
501
|
+
} from "@tangle-network/agent-eval";
|
|
502
|
+
function createProductionTraceSink(opts) {
|
|
503
|
+
const log = opts.log ?? defaultLog;
|
|
504
|
+
const fetchImpl = opts.fetch ?? globalThis.fetch;
|
|
505
|
+
const traceStore = new InMemoryTraceStore();
|
|
506
|
+
const onRunComplete = async (ctx) => {
|
|
507
|
+
if (opts.runRecordStore) {
|
|
508
|
+
try {
|
|
509
|
+
const record = await composeRunRecord(traceStore, ctx, opts.projectId);
|
|
510
|
+
await opts.runRecordStore.append(record);
|
|
511
|
+
} catch (err) {
|
|
512
|
+
log("runRecordStore.append failed", {
|
|
513
|
+
runId: ctx.runId,
|
|
514
|
+
error: err instanceof Error ? err.message : String(err)
|
|
515
|
+
});
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
if (opts.otlp) {
|
|
519
|
+
try {
|
|
520
|
+
const resourceAttrs = {
|
|
521
|
+
"service.name": opts.projectId,
|
|
522
|
+
...opts.otlp.resourceAttributes ?? {}
|
|
523
|
+
};
|
|
524
|
+
const otlpPayload = await exportRunAsOtlp(traceStore, ctx.runId, resourceAttrs);
|
|
525
|
+
const headers = { "content-type": "application/json" };
|
|
526
|
+
if (opts.otlp.authHeader) headers.authorization = opts.otlp.authHeader;
|
|
527
|
+
const res = await fetchImpl(opts.otlp.endpoint, {
|
|
528
|
+
method: "POST",
|
|
529
|
+
headers,
|
|
530
|
+
body: JSON.stringify(otlpPayload)
|
|
531
|
+
});
|
|
532
|
+
if (!res.ok) {
|
|
533
|
+
log("OTLP POST non-2xx", {
|
|
534
|
+
runId: ctx.runId,
|
|
535
|
+
status: res.status,
|
|
536
|
+
endpoint: opts.otlp.endpoint
|
|
537
|
+
});
|
|
538
|
+
}
|
|
539
|
+
} catch (err) {
|
|
540
|
+
log("OTLP POST threw", {
|
|
541
|
+
runId: ctx.runId,
|
|
542
|
+
error: err instanceof Error ? err.message : String(err),
|
|
543
|
+
endpoint: opts.otlp.endpoint
|
|
544
|
+
});
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
};
|
|
548
|
+
const recordFeedback = async (input) => {
|
|
549
|
+
if (!opts.feedbackStore) return null;
|
|
550
|
+
const trajectoryId = input.trajectoryId ?? `traj-${input.runId}`;
|
|
551
|
+
try {
|
|
552
|
+
const existing = await opts.feedbackStore.get(trajectoryId);
|
|
553
|
+
if (existing) {
|
|
554
|
+
await opts.feedbackStore.appendLabel(trajectoryId, input.label);
|
|
555
|
+
return trajectoryId;
|
|
556
|
+
}
|
|
557
|
+
await opts.feedbackStore.save({
|
|
558
|
+
id: trajectoryId,
|
|
559
|
+
projectId: opts.projectId,
|
|
560
|
+
scenarioId: input.scenarioId ?? input.runId,
|
|
561
|
+
task: { intent: "chat", context: { runId: input.runId } },
|
|
562
|
+
attempts: [],
|
|
563
|
+
labels: [input.label],
|
|
564
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
565
|
+
});
|
|
566
|
+
return trajectoryId;
|
|
567
|
+
} catch (err) {
|
|
568
|
+
log("feedbackStore write failed", {
|
|
569
|
+
runId: input.runId,
|
|
570
|
+
error: err instanceof Error ? err.message : String(err)
|
|
571
|
+
});
|
|
572
|
+
return null;
|
|
573
|
+
}
|
|
574
|
+
};
|
|
575
|
+
return { traceStore, onRunComplete, recordFeedback };
|
|
576
|
+
}
|
|
577
|
+
async function composeRunRecord(store, ctx, projectId) {
|
|
578
|
+
const run = await store.getRun(ctx.runId);
|
|
579
|
+
const spans = await store.spans({ runId: ctx.runId });
|
|
580
|
+
const now = Date.now();
|
|
581
|
+
const startedAtMs = run?.startedAt ?? now;
|
|
582
|
+
const endedAtMs = run?.endedAt ?? now;
|
|
583
|
+
return {
|
|
584
|
+
runId: ctx.runId,
|
|
585
|
+
projectId,
|
|
586
|
+
scenarioId: run?.scenarioId ?? ctx.runId,
|
|
587
|
+
variantId: run?.variantId,
|
|
588
|
+
startedAt: new Date(startedAtMs).toISOString(),
|
|
589
|
+
endedAt: new Date(endedAtMs).toISOString(),
|
|
590
|
+
status: ctx.status,
|
|
591
|
+
pass: ctx.outcome?.pass,
|
|
592
|
+
score: ctx.outcome?.score,
|
|
593
|
+
failureClass: ctx.outcome?.failureClass,
|
|
594
|
+
notes: ctx.outcome?.notes,
|
|
595
|
+
tags: run?.tags,
|
|
596
|
+
spanCount: spans.length
|
|
597
|
+
};
|
|
598
|
+
}
|
|
599
|
+
function defaultLog(msg, fields) {
|
|
600
|
+
if (fields) console.warn(`[production-trace-sink] ${msg}`, fields);
|
|
601
|
+
else console.warn(`[production-trace-sink] ${msg}`);
|
|
602
|
+
}
|
|
465
603
|
export {
|
|
466
604
|
AgentManifestError,
|
|
605
|
+
collectAgentRun,
|
|
606
|
+
createProductionTraceSink,
|
|
467
607
|
createSurfaceImprovementAdapter,
|
|
468
608
|
createSurfaceKnowledgeAdapter,
|
|
469
609
|
defineAgent,
|
|
470
610
|
measureOutcome,
|
|
471
611
|
renderSurfaceIssues,
|
|
472
612
|
resolveSubjectPath,
|
|
613
|
+
unimplementedAgentRun,
|
|
473
614
|
validateSurfaces
|
|
474
615
|
};
|
|
475
616
|
//# sourceMappingURL=agent.js.map
|