@vauban-org/agent-sdk 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,231 @@
1
+ /**
2
+ * quality/index.ts — Outcome quality scoring (ADR-ECO-039).
3
+ *
4
+ * Promoted from forge consumer in SDK 1.8.0 — see Brief 2026-05-17.
5
+ *
6
+ * Companion to {@link OutcomeRecord.quality}. Each agent's
7
+ * `outcomeMapping(feedback)` returns a normalized quality score (0..1)
8
+ * computed from feedback signals. The Command Center backend reads this
9
+ * into the `agent_run.outcome_quality` column (NUMERIC(5,4)), which
10
+ * powers the EconomicObserver ROI weighting and the Quality Foreman
11
+ * regression checks.
12
+ *
13
+ * Design notes:
14
+ * - Pure function (no I/O, no SDK imports beyond types) — safe to import
15
+ * from any agent and cheap to unit-test.
16
+ * - Signals are agent-agnostic — each agent maps its own feedback fields
17
+ * into a shared {@link QualityInputs} shape.
18
+ * - Default score is `0.5` ("neutral, no information"). Signals nudge it
19
+ * up (positive outcomes) or down (errors). Final value is clamped to
20
+ * `[0, 1]`.
21
+ * - `customScore` lets an agent provide its own pre-computed score (e.g.
22
+ * a model-based eval) and bypass the heuristic.
23
+ * - Prod-validated on 33 forge agents (ADR-ECO-039 rollout 2026-05).
24
+ *
25
+ * @public
26
+ */
27
+
28
+ /**
29
+ * Feedback signals consumed by {@link computeQuality}. All fields are
30
+ * optional — pass only the signals the calling agent emits.
31
+ *
32
+ * @public
33
+ */
34
+ export interface QualityInputs {
35
+ /** Sentinel-style: number of threats blocked (e.g. paused contracts). */
36
+ readonly threatsBlocked?: number;
37
+ /** Sentinel/ops-style: alerts sent to operators. */
38
+ readonly alertsSent?: number;
39
+ /** Content-style: number of posts/articles successfully published. */
40
+ readonly postsPublished?: number;
41
+ /** Content-style: posts blocked by HITL or constitutional gate. */
42
+ readonly postsRejectedByHITL?: number;
43
+ /** Finance/treasury-style: invoices or proposals processed successfully. */
44
+ readonly invoicesProcessed?: number;
45
+ /** Generic: number of errors encountered during the cycle. */
46
+ readonly errorsEncountered?: number;
47
+ /** Lessons / retrospective bullets extracted (proxy for reflection depth). */
48
+ readonly lessons?: readonly unknown[];
49
+ /** Explicit override — bypasses the heuristic when defined. */
50
+ readonly customScore?: number;
51
+ }
52
+
53
+ /**
54
+ * Per-signal contribution to the final score. Returned by
55
+ * {@link computeQualityWithBreakdown}.
56
+ *
57
+ * @public
58
+ */
59
+ export interface QualityContribution {
60
+ /** Source signal name (matches a {@link QualityInputs} field). */
61
+ readonly signal: string;
62
+ /** Signed delta applied to the running score (positive or negative). */
63
+ readonly delta: number;
64
+ /** Human-readable explanation, e.g. `"1 post published"`. */
65
+ readonly reason: string;
66
+ }
67
+
68
+ /**
69
+ * Breakdown view of {@link computeQuality} — score plus the ordered list of
70
+ * non-zero contributions. Powers debug-grade observability surfaces
71
+ * (e.g. `/agents/[id]` quality trend tooltip).
72
+ *
73
+ * @public
74
+ */
75
+ export interface QualityBreakdown {
76
+ /** Final clamped score in `[0, 1]`. */
77
+ readonly score: number;
78
+ /** Contributions in evaluation order. Empty when no signal applies. */
79
+ readonly contributions: readonly QualityContribution[];
80
+ }
81
+
82
+ function clamp01(n: number): number {
83
+ if (!Number.isFinite(n)) return 0.5;
84
+ return Math.max(0, Math.min(1, n));
85
+ }
86
+
87
+ /**
88
+ * Compute a 0..1 quality score from agent feedback signals.
89
+ *
90
+ * Defaults to `0.5` when no signal is provided. Each positive signal nudges
91
+ * the score upward; errors and HITL rejections nudge it downward. The
92
+ * result is clamped to `[0, 1]`.
93
+ *
94
+ * @example
95
+ * computeQuality({ postsPublished: 1 }) // → 0.7
96
+ * computeQuality({ errorsEncountered: 1 }) // → 0.3
97
+ * computeQuality({ customScore: 0.95 }) // → 0.95
98
+ * computeQuality({}) // → 0.5
99
+ *
100
+ * @public
101
+ */
102
+ export function computeQuality(inputs: QualityInputs): number {
103
+ if (typeof inputs.customScore === "number") {
104
+ return clamp01(inputs.customScore);
105
+ }
106
+
107
+ let q = 0.5;
108
+
109
+ if ((inputs.errorsEncountered ?? 0) > 0) q -= 0.2;
110
+ if ((inputs.postsRejectedByHITL ?? 0) > 0) q -= 0.1;
111
+
112
+ if ((inputs.postsPublished ?? 0) > 0) q += 0.2;
113
+ if ((inputs.threatsBlocked ?? 0) > 0) q += 0.3;
114
+ if ((inputs.alertsSent ?? 0) > 0) q += 0.1;
115
+ if ((inputs.invoicesProcessed ?? 0) > 0) q += 0.2;
116
+ if ((inputs.lessons?.length ?? 0) > 0) q += 0.05;
117
+
118
+ return clamp01(q);
119
+ }
120
+
121
+ /**
122
+ * Same heuristic as {@link computeQuality}, but also returns the ordered
123
+ * list of non-zero contributions for debug-grade observability.
124
+ *
125
+ * When `customScore` is provided, the breakdown contains a single entry
126
+ * tagged `"customScore"` and the heuristic is bypassed.
127
+ *
128
+ * @example
129
+ * computeQualityWithBreakdown({ postsPublished: 1, lessons: ["a"] })
130
+ * // → {
131
+ * // score: 0.75,
132
+ * // contributions: [
133
+ * // { signal: "postsPublished", delta: 0.2, reason: "1 post published" },
134
+ * // { signal: "lessons", delta: 0.05, reason: "1 lesson extracted" },
135
+ * // ],
136
+ * // }
137
+ *
138
+ * @public
139
+ */
140
+ export function computeQualityWithBreakdown(
141
+ inputs: QualityInputs
142
+ ): QualityBreakdown {
143
+ if (typeof inputs.customScore === "number") {
144
+ const clamped = clamp01(inputs.customScore);
145
+ return {
146
+ score: clamped,
147
+ contributions: [
148
+ {
149
+ signal: "customScore",
150
+ delta: clamped - 0.5,
151
+ reason: `customScore override = ${inputs.customScore}`,
152
+ },
153
+ ],
154
+ };
155
+ }
156
+
157
+ let q = 0.5;
158
+ const contributions: QualityContribution[] = [];
159
+
160
+ const errors = inputs.errorsEncountered ?? 0;
161
+ if (errors > 0) {
162
+ q -= 0.2;
163
+ contributions.push({
164
+ signal: "errorsEncountered",
165
+ delta: -0.2,
166
+ reason: `${errors} error${errors === 1 ? "" : "s"} encountered`,
167
+ });
168
+ }
169
+
170
+ const rejects = inputs.postsRejectedByHITL ?? 0;
171
+ if (rejects > 0) {
172
+ q -= 0.1;
173
+ contributions.push({
174
+ signal: "postsRejectedByHITL",
175
+ delta: -0.1,
176
+ reason: `${rejects} post${rejects === 1 ? "" : "s"} rejected by HITL`,
177
+ });
178
+ }
179
+
180
+ const posts = inputs.postsPublished ?? 0;
181
+ if (posts > 0) {
182
+ q += 0.2;
183
+ contributions.push({
184
+ signal: "postsPublished",
185
+ delta: 0.2,
186
+ reason: `${posts} post${posts === 1 ? "" : "s"} published`,
187
+ });
188
+ }
189
+
190
+ const threats = inputs.threatsBlocked ?? 0;
191
+ if (threats > 0) {
192
+ q += 0.3;
193
+ contributions.push({
194
+ signal: "threatsBlocked",
195
+ delta: 0.3,
196
+ reason: `${threats} threat${threats === 1 ? "" : "s"} blocked`,
197
+ });
198
+ }
199
+
200
+ const alerts = inputs.alertsSent ?? 0;
201
+ if (alerts > 0) {
202
+ q += 0.1;
203
+ contributions.push({
204
+ signal: "alertsSent",
205
+ delta: 0.1,
206
+ reason: `${alerts} alert${alerts === 1 ? "" : "s"} sent`,
207
+ });
208
+ }
209
+
210
+ const invoices = inputs.invoicesProcessed ?? 0;
211
+ if (invoices > 0) {
212
+ q += 0.2;
213
+ contributions.push({
214
+ signal: "invoicesProcessed",
215
+ delta: 0.2,
216
+ reason: `${invoices} invoice${invoices === 1 ? "" : "s"} processed`,
217
+ });
218
+ }
219
+
220
+ const lessonCount = inputs.lessons?.length ?? 0;
221
+ if (lessonCount > 0) {
222
+ q += 0.05;
223
+ contributions.push({
224
+ signal: "lessons",
225
+ delta: 0.05,
226
+ reason: `${lessonCount} lesson${lessonCount === 1 ? "" : "s"} extracted`,
227
+ });
228
+ }
229
+
230
+ return { score: clamp01(q), contributions };
231
+ }