npm - @tangle-network/agent-eval - Versions diffs - 0.23.1 → 0.25.0 - Mend

@tangle-network/agent-eval 0.23.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

package/CHANGELOG.md +145 -0
package/README.md +212 -79
package/dist/baseline-4R5deP0N.d.ts +108 -0
package/dist/benchmarks/index.d.ts +3 -2
package/dist/benchmarks/index.js +1 -1
package/dist/builder-eval/index.d.ts +249 -0
package/dist/builder-eval/index.js +391 -0
package/dist/builder-eval/index.js.map +1 -0
package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
package/dist/chunk-2A5XJB43.js.map +1 -0
package/dist/chunk-47X6LRCE.js +76 -0
package/dist/chunk-47X6LRCE.js.map +1 -0
package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
package/dist/chunk-4F5DQN55.js.map +1 -0
package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
package/dist/chunk-4S4BM3QQ.js.map +1 -0
package/dist/chunk-5BKGXME7.js +65 -0
package/dist/chunk-5BKGXME7.js.map +1 -0
package/dist/{chunk-6KQG5HAH.js → chunk-5LBB5B3Z.js} +376 -72
package/dist/chunk-5LBB5B3Z.js.map +1 -0
package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
package/dist/chunk-6QDKWHLS.js.map +1 -0
package/dist/{chunk-VQQSPGSM.js → chunk-EDUKQ5AM.js} +247 -189
package/dist/chunk-EDUKQ5AM.js.map +1 -0
package/dist/chunk-I4MBDTY5.js +272 -0
package/dist/chunk-I4MBDTY5.js.map +1 -0
package/dist/chunk-JLZQWFV3.js +618 -0
package/dist/chunk-JLZQWFV3.js.map +1 -0
package/dist/chunk-K2TPS5LB.js +569 -0
package/dist/chunk-K2TPS5LB.js.map +1 -0
package/dist/chunk-KKHDIONI.js +414 -0
package/dist/chunk-KKHDIONI.js.map +1 -0
package/dist/chunk-KMPRBJK4.js +74 -0
package/dist/chunk-KMPRBJK4.js.map +1 -0
package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
package/dist/chunk-KTGTIOFD.js.map +1 -0
package/dist/chunk-LSH4MMOZ.js +838 -0
package/dist/chunk-LSH4MMOZ.js.map +1 -0
package/dist/chunk-NG236HPC.js +57 -0
package/dist/chunk-NG236HPC.js.map +1 -0
package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
package/dist/chunk-NLMNWKVM.js.map +1 -0
package/dist/chunk-NU65VQ7M.js +99 -0
package/dist/chunk-NU65VQ7M.js.map +1 -0
package/dist/chunk-OWLAAMME.js +250 -0
package/dist/chunk-OWLAAMME.js.map +1 -0
package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
package/dist/chunk-PC4UYEBM.js.map +1 -0
package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
package/dist/chunk-RAF443UI.js.map +1 -0
package/dist/chunk-RZTMDUO7.js +49 -0
package/dist/chunk-RZTMDUO7.js.map +1 -0
package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
package/dist/chunk-SESZDQPX.js.map +1 -0
package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
package/dist/chunk-TVVP3ZZQ.js.map +1 -0
package/dist/chunk-WWYCWKUM.js +196 -0
package/dist/chunk-WWYCWKUM.js.map +1 -0
package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
package/dist/chunk-YRZ4M5GS.js.map +1 -0
package/dist/chunk-ZN274SWR.js +613 -0
package/dist/chunk-ZN274SWR.js.map +1 -0
package/dist/cli.js +10 -6
package/dist/cli.js.map +1 -1
package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
package/dist/control.d.ts +8 -6
package/dist/control.js +10 -7
package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
package/dist/errors-BZ9sTdz7.d.ts +70 -0
package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
package/dist/governance/index.d.ts +5 -0
package/dist/governance/index.js +18 -0
package/dist/governance/index.js.map +1 -0
package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
package/dist/index-Oj9fAPPN.d.ts +270 -0
package/dist/index.d.ts +2018 -3003
package/dist/index.js +7443 -9102
package/dist/index.js.map +1 -1
package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
package/dist/knowledge/index.d.ts +102 -0
package/dist/knowledge/index.js +18 -0
package/dist/knowledge/index.js.map +1 -0
package/dist/meta-eval/index.d.ts +99 -0
package/dist/meta-eval/index.js +324 -0
package/dist/meta-eval/index.js.map +1 -0
package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
package/dist/openapi.json +491 -1
package/dist/optimization.d.ts +11 -8
package/dist/optimization.js +11 -9
package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
package/dist/pipelines/index.d.ts +172 -0
package/dist/pipelines/index.js +345 -0
package/dist/pipelines/index.js.map +1 -0
package/dist/prm/index.d.ts +99 -0
package/dist/prm/index.js +222 -0
package/dist/prm/index.js.map +1 -0
package/dist/query-DODUYdPg.d.ts +30 -0
package/dist/release-report-BNgMdqPF.d.ts +292 -0
package/dist/replay-BL96gCEP.d.ts +226 -0
package/dist/reporting.d.ts +10 -295
package/dist/reporting.js +10 -6
package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-BPT8x_NT.d.ts} +148 -146
package/dist/rl.d.ts +1762 -8
package/dist/rl.js +2035 -58
package/dist/rl.js.map +1 -1
package/dist/rubric-D5tjHNJQ.d.ts +72 -0
package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
package/dist/sequential-Dgz1n51-.d.ts +139 -0
package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-C7VPYEj2.d.ts} +3 -76
package/dist/telemetry/file.js +4 -1
package/dist/telemetry/file.js.map +1 -1
package/dist/telemetry/index.js +57 -57
package/dist/telemetry/index.js.map +1 -1
package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
package/dist/traces.d.ts +142 -387
package/dist/traces.js +1302 -40
package/dist/traces.js.map +1 -1
package/dist/trajectory-CnoBo-JY.d.ts +32 -0
package/dist/wire/index.d.ts +369 -25
package/dist/wire/index.js +22 -3
package/package.json +44 -18
package/dist/chunk-42I2QC2L.js.map +0 -1
package/dist/chunk-5IIQKMD5.js.map +0 -1
package/dist/chunk-6KQG5HAH.js.map +0 -1
package/dist/chunk-6M774GY6.js.map +0 -1
package/dist/chunk-7EAUOUQS.js.map +0 -1
package/dist/chunk-AXHNWLIX.js.map +0 -1
package/dist/chunk-EXGR4XEM.js.map +0 -1
package/dist/chunk-IOXMGMHQ.js.map +0 -1
package/dist/chunk-KAO3Q65R.js.map +0 -1
package/dist/chunk-LZKIOBG2.js +0 -2026
package/dist/chunk-LZKIOBG2.js.map +0 -1
package/dist/chunk-QBW3YBTR.js.map +0 -1
package/dist/chunk-QUKKGHTZ.js.map +0 -1
package/dist/chunk-SQQLHODJ.js.map +0 -1
package/dist/chunk-V5QSWN7L.js +0 -1310
package/dist/chunk-V5QSWN7L.js.map +0 -1
package/dist/chunk-VQQSPGSM.js.map +0 -1
package/dist/chunk-XPHOZPOM.js +0 -1947
package/dist/chunk-XPHOZPOM.js.map +0 -1
package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
package/dist/index-ekBXweiQ.d.ts +0 -1894
package/dist/sequential-DgU2mFsE.d.ts +0 -304

package/dist/multi-layer-verifier-LkP3LVKj.d.ts ADDED Viewed

@@ -0,0 +1,141 @@
+/**
+ * Multi-layer verifier — ordered pipeline of verification layers.
+ *
+ * Different contract from {@link JudgeRunner} (which runs parallel
+ * specs against a sandbox). MultiLayerVerifier is a DAG of layers
+ * (install → typecheck → build → lint → serve → semantic → …) with
+ * dependency-based skip, per-layer findings, soft-fail semantics, and
+ * an aggregated `blendedScore` across all passed layers.
+ *
+ * Use when you want:
+ *   - ordered stages where a failing upstream stage skips downstream ones
+ *   - each stage produces rich `findings` (severity + message + evidence)
+ *   - a single composite score across stages with per-stage weights
+ *   - soft-fail stages whose failure doesn't abort the pipeline
+ *
+ * Use {@link JudgeRunner} when you want:
+ *   - N independent judges running in parallel against the same artifact
+ *   - no inter-judge dependencies
+ *   - boolean `passed` per judge + overall
+ *
+ * Both primitives compose — JudgeRunner can be invoked as a single
+ * layer inside a MultiLayerVerifier if that suits the caller.
+ */
+type LayerStatus = 'pass' | 'fail' | 'skipped' | 'error' | 'timeout';
+type Severity = 'critical' | 'major' | 'minor' | 'info';
+interface Finding {
+    severity: Severity;
+    message: string;
+    evidence?: string;
+    /** Optional layer name the finding belongs to (set by the verifier if omitted). */
+    layer?: string;
+    /**
+     * Free-form structured payload — used by `multiToolchainLayer` to attach
+     * `{ adapter: 'pnpm' }`, by judges to attach evidence pointers, etc.
+     * Renderers MAY interrogate; agent-eval primitives never assume shape.
+     */
+    detail?: Record<string, unknown>;
+}
+interface LayerResult {
+    layer: string;
+    status: LayerStatus;
+    /** 0..1 score, optional — layers that don't produce a numeric score omit. */
+    score?: number;
+    durationMs: number;
+    findings: Finding[];
+    /** Short human-readable summary (one line). */
+    reason?: string;
+    /**
+     * Numeric layer-level diagnostics: error counts, warning counts,
+     * cyclomatic complexity, total adapter wall-time, etc. Keyed by
+     * diagnostic name; null = "diagnostic not applicable / not measured."
+     * Renderers that know the keys can display them; ones that don't,
+     * ignore. Free-form on purpose — consumers type the value shape in
+     * their own namespace. Added in 0.10.
+     */
+    diagnostics?: Record<string, number | null>;
+    /** Any rich per-layer detail — rendered as-is by consumers that know the layer. */
+    detail?: Record<string, unknown>;
+}
+interface VerifyContext<Env = unknown> {
+    /** Per-run opaque context the caller provides. Layers destructure what they need. */
+    env: Env;
+    /** Previously-computed results from layers that already ran. */
+    prior: Record<string, LayerResult>;
+    /** Signal — if aborted, layers MUST bail within reasonable wall. */
+    signal: AbortSignal;
+}
+interface Layer<Env = unknown> {
+    name: string;
+    /** Stages that must have `status: 'pass'` before this layer runs. */
+    dependsOn?: string[];
+    /**
+     * Weight in the composite `blendedScore`. Default 1.0. Layers with weight 0
+     * contribute findings but not score.
+     */
+    weight?: number;
+    /**
+     * If true, a `fail` status contributes to `blendedScore` (as 0) instead of
+     * being dropped — use for layers whose failure is a real signal. Default:
+     * fail drops from numerator + denominator, matching VB's existing semantics.
+     */
+    failContributesToScore?: boolean;
+    /** Optional per-layer wall-cap in ms. Honored by the verifier (AbortSignal). */
+    capMs?: number;
+    run: (ctx: VerifyContext<Env>) => Promise<LayerResult> | LayerResult;
+}
+interface VerifyOptions<Env = unknown> {
+    env: Env;
+    /**
+     * Overall wall cap. Default: sum of layer capMs, or Infinity if any layer
+     * omits a cap. The verifier short-circuits remaining layers on overall cap.
+     */
+    overallCapMs?: number;
+    /** Called with each layer result as it completes. */
+    onLayer?: (result: LayerResult) => void;
+}
+interface VerificationReport {
+    layers: LayerResult[];
+    passCount: number;
+    failCount: number;
+    skippedCount: number;
+    errorCount: number;
+    /** True iff at least one scored layer ran AND every scored layer passed. */
+    allPass: boolean;
+    /**
+     * Weighted mean of `score` across contributing layers. 0 when no layers
+     * contributed. See {@link Layer.failContributesToScore} for fail semantics.
+     */
+    blendedScore: number;
+    durationMs: number;
+    startedAt: string;
+    finishedAt: string;
+}
+/**
+ * Grade a semantic-concept-style judge result into a single layer status.
+ *
+ * Pass when overall score >= threshold AND no critical-severity concept gap.
+ * Fail otherwise. Use inside a `Layer.run` when wrapping a concept judge.
+ *
+ * Generalized from VerticalBench H3 fix: `failingConcepts.length === 0` was
+ * too strict — a single concept at 6/10 failed the entire layer despite
+ * overall score being >= 0.7. Now we trust the judge's own `severity` field:
+ * `critical` findings veto; `major`/`minor` reduce the score but don't veto.
+ */
+declare function gradeSemanticStatus(input: {
+    score: number;
+    findings: Array<{
+        severity: Severity;
+        present?: boolean;
+        score?: number;
+    }>;
+    available: boolean;
+    threshold?: number;
+}): LayerStatus;
+declare class MultiLayerVerifier<Env = unknown> {
+    private readonly layers;
+    constructor(layers: Layer<Env>[]);
+    run(opts: VerifyOptions<Env>): Promise<VerificationReport>;
+}
+export { type Finding as F, type Layer as L, MultiLayerVerifier as M, type Severity as S, type VerificationReport as V, type LayerResult as a, type VerifyContext as b, type LayerStatus as c, type VerifyOptions as d, gradeSemanticStatus as g };

package/dist/openapi.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "openapi": "3.1.0",
   "info": {
     "title": "@tangle-network/agent-eval — wire protocol",
-    "version": "0.23.1",
+    "version": "0.25.0",
     "description": "HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: 1.0.0. Bumps on breaking changes to request/response schemas.",
     "contact": {
       "name": "Tangle Network",
@@ -382,6 +382,377 @@
         "required": [
           "error"
         ]
+      },
+      "TracesIngestRequest": {
+        "type": "object",
+        "properties": {
+          "events": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/TraceEvent"
+            },
+            "minItems": 1,
+            "maxItems": 10000,
+            "description": "Batch of events. Max 10k per call — bigger streams should be chunked."
+          }
+        },
+        "required": [
+          "events"
+        ]
+      },
+      "TraceEvent": {
+        "type": "object",
+        "properties": {
+          "eventId": {
+            "type": "string",
+            "minLength": 1,
+            "description": "Stable id for the event. Use ULID or UUID."
+          },
+          "runId": {
+            "type": "string",
+            "minLength": 1,
+            "description": "Run this event belongs to."
+          },
+          "spanId": {
+            "type": "string",
+            "description": "Span that emitted the event, if any."
+          },
+          "kind": {
+            "type": "string",
+            "enum": [
+              "log",
+              "error",
+              "budget_decrement",
+              "budget_breach",
+              "state_mutation",
+              "policy_violation",
+              "redaction_applied",
+              "custom"
+            ],
+            "description": "Coarse event category — matches the TraceSchema v1 EventKind enum."
+          },
+          "timestamp": {
+            "type": "integer",
+            "minimum": 0,
+            "description": "Unix millis. Must be monotonically non-decreasing within a span."
+          },
+          "payload": {
+            "type": "object",
+            "additionalProperties": {},
+            "description": "Free-form payload — the runtime owns the shape."
+          }
+        },
+        "required": [
+          "eventId",
+          "runId",
+          "kind",
+          "timestamp",
+          "payload"
+        ]
+      },
+      "TracesIngestResponse": {
+        "type": "object",
+        "properties": {
+          "accepted": {
+            "type": "integer",
+            "minimum": 0,
+            "description": "Number of events persisted."
+          },
+          "rejected": {
+            "type": "integer",
+            "minimum": 0,
+            "description": "Number of events the store refused — see `errors[]` for reasons."
+          },
+          "errors": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "eventId": {
+                  "type": "string",
+                  "description": "Event id this error applies to."
+                },
+                "message": {
+                  "type": "string",
+                  "description": "Why the event was rejected."
+                }
+              },
+              "required": [
+                "eventId",
+                "message"
+              ]
+            },
+            "default": []
+          }
+        },
+        "required": [
+          "accepted",
+          "rejected"
+        ]
+      },
+      "FeedbackTrajectory": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "minLength": 1,
+            "description": "Stable id; idempotency key for the trajectory."
+          },
+          "projectId": {
+            "type": "string"
+          },
+          "scenarioId": {
+            "type": "string"
+          },
+          "task": {
+            "type": "object",
+            "properties": {
+              "intent": {
+                "type": "string",
+                "minLength": 1
+              },
+              "context": {}
+            },
+            "required": [
+              "intent"
+            ]
+          },
+          "attempts": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/FeedbackAttempt"
+            },
+            "default": []
+          },
+          "labels": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/FeedbackLabel"
+            },
+            "default": []
+          },
+          "outcome": {
+            "type": "object",
+            "properties": {
+              "success": {
+                "type": "boolean"
+              },
+              "score": {
+                "type": "number"
+              },
+              "metrics": {
+                "type": "object",
+                "additionalProperties": {
+                  "type": "number"
+                }
+              },
+              "costUsd": {
+                "type": "number"
+              },
+              "detail": {
+                "type": "string"
+              },
+              "observedAt": {
+                "type": "string"
+              },
+              "metadata": {
+                "type": "object",
+                "additionalProperties": {}
+              }
+            }
+          },
+          "split": {
+            "type": "string",
+            "enum": [
+              "train",
+              "dev",
+              "test",
+              "holdout"
+            ]
+          },
+          "tags": {
+            "type": "object",
+            "additionalProperties": {
+              "type": "string"
+            }
+          },
+          "createdAt": {
+            "type": "string",
+            "description": "ISO-8601 UTC."
+          },
+          "updatedAt": {
+            "type": "string"
+          },
+          "metadata": {
+            "type": "object",
+            "additionalProperties": {}
+          }
+        },
+        "required": [
+          "id",
+          "task",
+          "createdAt"
+        ]
+      },
+      "FeedbackAttempt": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "minLength": 1
+          },
+          "stepIndex": {
+            "type": "integer",
+            "minimum": 0
+          },
+          "artifactType": {
+            "type": "string",
+            "enum": [
+              "text",
+              "code",
+              "plan",
+              "research",
+              "action",
+              "ui",
+              "decision",
+              "data",
+              "other"
+            ]
+          },
+          "artifact": {},
+          "options": {
+            "type": "array",
+            "items": {}
+          },
+          "proposedAction": {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string"
+              },
+              "risk": {
+                "type": "string",
+                "enum": [
+                  "low",
+                  "medium",
+                  "high"
+                ]
+              },
+              "costUsd": {
+                "type": "number"
+              },
+              "externalSideEffect": {
+                "type": "boolean"
+              },
+              "requiresApproval": {
+                "type": "boolean"
+              },
+              "metadata": {
+                "type": "object",
+                "additionalProperties": {}
+              }
+            },
+            "required": [
+              "type"
+            ]
+          },
+          "feedback": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/FeedbackLabel"
+            }
+          },
+          "createdAt": {
+            "type": "string"
+          },
+          "metadata": {
+            "type": "object",
+            "additionalProperties": {}
+          }
+        },
+        "required": [
+          "id",
+          "stepIndex",
+          "artifactType",
+          "createdAt"
+        ]
+      },
+      "FeedbackLabel": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string"
+          },
+          "source": {
+            "type": "string",
+            "enum": [
+              "user",
+              "judge",
+              "environment",
+              "metric",
+              "policy",
+              "system"
+            ]
+          },
+          "kind": {
+            "type": "string",
+            "enum": [
+              "approve",
+              "reject",
+              "select",
+              "edit",
+              "rank",
+              "rate",
+              "comment",
+              "metric_outcome",
+              "policy_block",
+              "revision_request"
+            ]
+          },
+          "value": {},
+          "reason": {
+            "type": "string"
+          },
+          "severity": {
+            "type": "string",
+            "enum": [
+              "info",
+              "warning",
+              "error",
+              "critical"
+            ]
+          },
+          "createdAt": {
+            "type": "string",
+            "description": "ISO-8601 UTC."
+          },
+          "metadata": {
+            "type": "object",
+            "additionalProperties": {}
+          }
+        },
+        "required": [
+          "source",
+          "kind",
+          "createdAt"
+        ]
+      },
+      "FeedbackIngestResponse": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "Trajectory id that was persisted."
+          },
+          "persisted": {
+            "type": "boolean",
+            "description": "True when the trajectory was saved (idempotent on id)."
+          }
+        },
+        "required": [
+          "id",
+          "persisted"
+        ]
       }
     },
     "parameters": {}
@@ -496,6 +867,125 @@
           }
         }
       }
+    },
+    "/v1/traces/ingest": {
+      "post": {
+        "summary": "Ingest a batch of production TraceEvents",
+        "description": "Append a batch of TraceEvents to the configured TraceStore. Accepts application/json ({events:[...]}) or application/x-ndjson (one event per line). Returns counts of accepted + rejected events.",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/TracesIngestRequest"
+              }
+            },
+            "application/x-ndjson": {
+              "schema": {
+                "$ref": "#/components/schemas/TracesIngestRequest"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Ingestion summary",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/TracesIngestResponse"
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Validation error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                }
+              }
+            }
+          },
+          "401": {
+            "description": "Unauthorized (when bearer auth is configured)",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                }
+              }
+            }
+          },
+          "503": {
+            "description": "No trace store configured",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/v1/feedback": {
+      "post": {
+        "summary": "Ingest a FeedbackTrajectory from production",
+        "description": "Persist a single FeedbackTrajectory. Idempotent on trajectory.id — re-posting replaces the prior record. Used by production runtimes to forward user 👍/👎/edits into the eval substrate.",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/FeedbackTrajectory"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Persisted",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/FeedbackIngestResponse"
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Validation error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                }
+              }
+            }
+          },
+          "401": {
+            "description": "Unauthorized (when bearer auth is configured)",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                }
+              }
+            }
+          },
+          "503": {
+            "description": "No feedback store configured",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                }
+              }
+            }
+          }
+        }
+      }
     }
   },
   "webhooks": {}

package/dist/optimization.d.ts CHANGED Viewed

@@ -1,8 +1,11 @@
-export { C as CallbackResearcher, a as CallbackResearcherOptions, b as CampaignFactoryParams, c as CampaignIntegrityPolicy, d as CampaignRunContext, e as CampaignRunOutcome, f as CampaignRunner, g as CampaignScenario, h as CampaignVariant, E as EvalCampaignOptions, i as EvalCampaignResult, j as ExperimentPlan, k as ExperimentResult, F as FailedRun, l as FailureMode, N as NoopResearcher, R as Researcher, S as SteeringChange, r as runEvalCampaign } from './eval-campaign-Ds5QljIh.js';
-export { A as ActionableSideInfo, a as AsiSeverity, D as DEFAULT_MUTATION_PRIMITIVES, E as EvolvableVariant, G as GenerationReport, I as InMemoryTrialCache, M as MultiShotGateConfig, b as MultiShotGateResult, c as MultiShotMutateAdapter, d as MultiShotOptimizationConfig, e as MultiShotOptimizationResult, f as MultiShotRun, g as MultiShotRunInput, h as MultiShotRunner, i as MultiShotScore, j as MultiShotScorer, k as MultiShotSplit, l as MultiShotTrace, m as MultiShotTrialResult, n as MultiShotVariant, o as MutateAdapter, P as PromptEvolutionConfig, p as PromptEvolutionEvent, q as PromptEvolutionResult, R as ReflectionContext, r as ReflectionProposal, S as ScenarioAggregate, s as ScoreAdapter, T as TrialCache, t as TrialResult, u as TrialTrace, V as VariantAggregate, v as buildReflectionPrompt, w as defaultMultiShotObjectives, x as parseReflectionResponse, y as runMultiShotOptimization, z as runPromptEvolution, B as trialTraceFromMultiShotTrial } from './summary-report-Ce1r4EYo.js';
-export { F as FeedbackArtifactType, a as FeedbackAttempt, b as FeedbackLabel, c as FeedbackLabelKind, d as FeedbackLabelSource, e as FeedbackOptimizerRow, f as FeedbackOutcome, g as FeedbackReplayAdapter, h as FeedbackReplayResult, i as FeedbackSeverity, j as FeedbackSplitPolicy, k as FeedbackTask, l as FeedbackTrajectory, m as FeedbackTrajectoryFilter, n as FeedbackTrajectoryStore, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-c43WGtTX.js';
-import './run-record-DNiOMBrZ.js';
-import './integrity-Cr5YodSY.js';
-import './store-u47QaJ9G.js';
-import './emitter-B2XqDKFU.js';
-import './dataset-B9qvlm_o.js';
+export { C as CallbackResearcher, a as CallbackResearcherOptions, b as CampaignFactoryParams, c as CampaignIntegrityPolicy, d as CampaignRunContext, e as CampaignRunOutcome, f as CampaignRunner, g as CampaignScenario, h as CampaignVariant, E as EvalCampaignOptions, i as EvalCampaignResult, j as ExperimentPlan, k as ExperimentResult, F as FailedRun, l as FailureMode, N as NoopResearcher, R as Researcher, S as SteeringChange, r as runEvalCampaign } from './researcher-BPT8x_NT.js';
+export { F as FeedbackArtifactType, a as FeedbackAttempt, b as FeedbackLabel, c as FeedbackLabelKind, d as FeedbackLabelSource, e as FeedbackOptimizerRow, f as FeedbackOutcome, g as FeedbackReplayAdapter, h as FeedbackReplayResult, i as FeedbackSeverity, j as FeedbackSplitPolicy, k as FeedbackTask, l as FeedbackTrajectory, m as FeedbackTrajectoryFilter, n as FeedbackTrajectoryStore, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-DfFdrraJ.js';
+export { A as ActionableSideInfo, a as AsiSeverity, D as DEFAULT_MUTATION_PRIMITIVES, E as EvolvableVariant, G as GenerationReport, I as InMemoryTrialCache, M as MultiShotGateConfig, b as MultiShotGateResult, c as MultiShotMutateAdapter, d as MultiShotOptimizationConfig, e as MultiShotOptimizationResult, f as MultiShotRun, g as MultiShotRunInput, h as MultiShotRunner, i as MultiShotScore, j as MultiShotScorer, k as MultiShotSplit, l as MultiShotTrace, m as MultiShotTrialResult, n as MultiShotVariant, o as MutateAdapter, P as PromptEvolutionConfig, p as PromptEvolutionEvent, q as PromptEvolutionResult, R as ReflectionContext, r as ReflectionProposal, S as ScenarioAggregate, s as ScoreAdapter, T as TrialCache, t as TrialResult, u as TrialTrace, V as VariantAggregate, v as buildReflectionPrompt, w as defaultMultiShotObjectives, x as parseReflectionResponse, y as runMultiShotOptimization, z as runPromptEvolution, B as trialTraceFromMultiShotTrial } from './summary-report-C7VPYEj2.js';
+import './errors-BZ9sTdz7.js';
+import './integrity-DK2EBVZC.js';
+import './store-Db2Bv8Cf.js';
+import './run-record-CqzahIbx.js';
+import './emitter-DP_cSSiw.js';
+import './control-runtime-BuJHoLg0.js';
+import './dataset-CiK_3LDr.js';
+import './failure-cluster-C2EGSDiT.js';