@vauban-org/agent-sdk 1.7.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRACT.md +67 -0
- package/dist/alerts/digest.d.ts +113 -0
- package/dist/alerts/digest.d.ts.map +1 -0
- package/dist/alerts/digest.js +160 -0
- package/dist/alerts/digest.js.map +1 -0
- package/dist/alerts/index.d.ts +12 -0
- package/dist/alerts/index.d.ts.map +1 -0
- package/dist/alerts/index.js +11 -0
- package/dist/alerts/index.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/orchestration/ooda/types.d.ts +3 -2
- package/dist/orchestration/ooda/types.d.ts.map +1 -1
- package/dist/orchestration/ooda/types.js.map +1 -1
- package/dist/quality/index.d.ts +114 -0
- package/dist/quality/index.d.ts.map +1 -0
- package/dist/quality/index.js +168 -0
- package/dist/quality/index.js.map +1 -0
- package/package.json +1 -1
- package/src/alerts/digest.ts +260 -0
- package/src/alerts/index.ts +12 -0
- package/src/index.ts +15 -0
- package/src/orchestration/ooda/types.ts +3 -2
- package/src/quality/index.ts +231 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* quality/index.ts — Outcome quality scoring (ADR-ECO-039).
|
|
3
|
+
*
|
|
4
|
+
* Promoted from forge consumer in SDK 1.8.0 — see Brief 2026-05-17.
|
|
5
|
+
*
|
|
6
|
+
* Companion to {@link OutcomeRecord.quality}. Each agent's
|
|
7
|
+
* `outcomeMapping(feedback)` returns a normalized quality score (0..1)
|
|
8
|
+
* computed from feedback signals. The Command Center backend reads this
|
|
9
|
+
* into the `agent_run.outcome_quality` column (NUMERIC(5,4)), which
|
|
10
|
+
* powers the EconomicObserver ROI weighting and the Quality Foreman
|
|
11
|
+
* regression checks.
|
|
12
|
+
*
|
|
13
|
+
* Design notes:
|
|
14
|
+
* - Pure function (no I/O, no SDK imports beyond types) — safe to import
|
|
15
|
+
* from any agent and cheap to unit-test.
|
|
16
|
+
* - Signals are agent-agnostic — each agent maps its own feedback fields
|
|
17
|
+
* into a shared {@link QualityInputs} shape.
|
|
18
|
+
* - Default score is `0.5` ("neutral, no information"). Signals nudge it
|
|
19
|
+
* up (positive outcomes) or down (errors). Final value is clamped to
|
|
20
|
+
* `[0, 1]`.
|
|
21
|
+
* - `customScore` lets an agent provide its own pre-computed score (e.g.
|
|
22
|
+
* a model-based eval) and bypass the heuristic.
|
|
23
|
+
* - Prod-validated on 33 forge agents (ADR-ECO-039 rollout 2026-05).
|
|
24
|
+
*
|
|
25
|
+
* @public
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Feedback signals consumed by {@link computeQuality}. All fields are
|
|
30
|
+
* optional — pass only the signals the calling agent emits.
|
|
31
|
+
*
|
|
32
|
+
* @public
|
|
33
|
+
*/
|
|
34
|
+
export interface QualityInputs {
|
|
35
|
+
/** Sentinel-style: number of threats blocked (e.g. paused contracts). */
|
|
36
|
+
readonly threatsBlocked?: number;
|
|
37
|
+
/** Sentinel/ops-style: alerts sent to operators. */
|
|
38
|
+
readonly alertsSent?: number;
|
|
39
|
+
/** Content-style: number of posts/articles successfully published. */
|
|
40
|
+
readonly postsPublished?: number;
|
|
41
|
+
/** Content-style: posts blocked by HITL or constitutional gate. */
|
|
42
|
+
readonly postsRejectedByHITL?: number;
|
|
43
|
+
/** Finance/treasury-style: invoices or proposals processed successfully. */
|
|
44
|
+
readonly invoicesProcessed?: number;
|
|
45
|
+
/** Generic: number of errors encountered during the cycle. */
|
|
46
|
+
readonly errorsEncountered?: number;
|
|
47
|
+
/** Lessons / retrospective bullets extracted (proxy for reflection depth). */
|
|
48
|
+
readonly lessons?: readonly unknown[];
|
|
49
|
+
/** Explicit override — bypasses the heuristic when defined. */
|
|
50
|
+
readonly customScore?: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Per-signal contribution to the final score. Returned by
|
|
55
|
+
* {@link computeQualityWithBreakdown}.
|
|
56
|
+
*
|
|
57
|
+
* @public
|
|
58
|
+
*/
|
|
59
|
+
export interface QualityContribution {
|
|
60
|
+
/** Source signal name (matches a {@link QualityInputs} field). */
|
|
61
|
+
readonly signal: string;
|
|
62
|
+
/** Signed delta applied to the running score (positive or negative). */
|
|
63
|
+
readonly delta: number;
|
|
64
|
+
/** Human-readable explanation, e.g. `"1 post published"`. */
|
|
65
|
+
readonly reason: string;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Breakdown view of {@link computeQuality} — score plus the ordered list of
|
|
70
|
+
* non-zero contributions. Powers debug-grade observability surfaces
|
|
71
|
+
* (e.g. `/agents/[id]` quality trend tooltip).
|
|
72
|
+
*
|
|
73
|
+
* @public
|
|
74
|
+
*/
|
|
75
|
+
export interface QualityBreakdown {
|
|
76
|
+
/** Final clamped score in `[0, 1]`. */
|
|
77
|
+
readonly score: number;
|
|
78
|
+
/** Contributions in evaluation order. Empty when no signal applies. */
|
|
79
|
+
readonly contributions: readonly QualityContribution[];
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function clamp01(n: number): number {
|
|
83
|
+
if (!Number.isFinite(n)) return 0.5;
|
|
84
|
+
return Math.max(0, Math.min(1, n));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Compute a 0..1 quality score from agent feedback signals.
|
|
89
|
+
*
|
|
90
|
+
* Defaults to `0.5` when no signal is provided. Each positive signal nudges
|
|
91
|
+
* the score upward; errors and HITL rejections nudge it downward. The
|
|
92
|
+
* result is clamped to `[0, 1]`.
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* computeQuality({ postsPublished: 1 }) // → 0.7
|
|
96
|
+
* computeQuality({ errorsEncountered: 1 }) // → 0.3
|
|
97
|
+
* computeQuality({ customScore: 0.95 }) // → 0.95
|
|
98
|
+
* computeQuality({}) // → 0.5
|
|
99
|
+
*
|
|
100
|
+
* @public
|
|
101
|
+
*/
|
|
102
|
+
export function computeQuality(inputs: QualityInputs): number {
|
|
103
|
+
if (typeof inputs.customScore === "number") {
|
|
104
|
+
return clamp01(inputs.customScore);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
let q = 0.5;
|
|
108
|
+
|
|
109
|
+
if ((inputs.errorsEncountered ?? 0) > 0) q -= 0.2;
|
|
110
|
+
if ((inputs.postsRejectedByHITL ?? 0) > 0) q -= 0.1;
|
|
111
|
+
|
|
112
|
+
if ((inputs.postsPublished ?? 0) > 0) q += 0.2;
|
|
113
|
+
if ((inputs.threatsBlocked ?? 0) > 0) q += 0.3;
|
|
114
|
+
if ((inputs.alertsSent ?? 0) > 0) q += 0.1;
|
|
115
|
+
if ((inputs.invoicesProcessed ?? 0) > 0) q += 0.2;
|
|
116
|
+
if ((inputs.lessons?.length ?? 0) > 0) q += 0.05;
|
|
117
|
+
|
|
118
|
+
return clamp01(q);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Same heuristic as {@link computeQuality}, but also returns the ordered
|
|
123
|
+
* list of non-zero contributions for debug-grade observability.
|
|
124
|
+
*
|
|
125
|
+
* When `customScore` is provided, the breakdown contains a single entry
|
|
126
|
+
* tagged `"customScore"` and the heuristic is bypassed.
|
|
127
|
+
*
|
|
128
|
+
* @example
|
|
129
|
+
* computeQualityWithBreakdown({ postsPublished: 1, lessons: ["a"] })
|
|
130
|
+
* // → {
|
|
131
|
+
* // score: 0.75,
|
|
132
|
+
* // contributions: [
|
|
133
|
+
* // { signal: "postsPublished", delta: 0.2, reason: "1 post published" },
|
|
134
|
+
* // { signal: "lessons", delta: 0.05, reason: "1 lesson extracted" },
|
|
135
|
+
* // ],
|
|
136
|
+
* // }
|
|
137
|
+
*
|
|
138
|
+
* @public
|
|
139
|
+
*/
|
|
140
|
+
export function computeQualityWithBreakdown(
|
|
141
|
+
inputs: QualityInputs
|
|
142
|
+
): QualityBreakdown {
|
|
143
|
+
if (typeof inputs.customScore === "number") {
|
|
144
|
+
const clamped = clamp01(inputs.customScore);
|
|
145
|
+
return {
|
|
146
|
+
score: clamped,
|
|
147
|
+
contributions: [
|
|
148
|
+
{
|
|
149
|
+
signal: "customScore",
|
|
150
|
+
delta: clamped - 0.5,
|
|
151
|
+
reason: `customScore override = ${inputs.customScore}`,
|
|
152
|
+
},
|
|
153
|
+
],
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
let q = 0.5;
|
|
158
|
+
const contributions: QualityContribution[] = [];
|
|
159
|
+
|
|
160
|
+
const errors = inputs.errorsEncountered ?? 0;
|
|
161
|
+
if (errors > 0) {
|
|
162
|
+
q -= 0.2;
|
|
163
|
+
contributions.push({
|
|
164
|
+
signal: "errorsEncountered",
|
|
165
|
+
delta: -0.2,
|
|
166
|
+
reason: `${errors} error${errors === 1 ? "" : "s"} encountered`,
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const rejects = inputs.postsRejectedByHITL ?? 0;
|
|
171
|
+
if (rejects > 0) {
|
|
172
|
+
q -= 0.1;
|
|
173
|
+
contributions.push({
|
|
174
|
+
signal: "postsRejectedByHITL",
|
|
175
|
+
delta: -0.1,
|
|
176
|
+
reason: `${rejects} post${rejects === 1 ? "" : "s"} rejected by HITL`,
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const posts = inputs.postsPublished ?? 0;
|
|
181
|
+
if (posts > 0) {
|
|
182
|
+
q += 0.2;
|
|
183
|
+
contributions.push({
|
|
184
|
+
signal: "postsPublished",
|
|
185
|
+
delta: 0.2,
|
|
186
|
+
reason: `${posts} post${posts === 1 ? "" : "s"} published`,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const threats = inputs.threatsBlocked ?? 0;
|
|
191
|
+
if (threats > 0) {
|
|
192
|
+
q += 0.3;
|
|
193
|
+
contributions.push({
|
|
194
|
+
signal: "threatsBlocked",
|
|
195
|
+
delta: 0.3,
|
|
196
|
+
reason: `${threats} threat${threats === 1 ? "" : "s"} blocked`,
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const alerts = inputs.alertsSent ?? 0;
|
|
201
|
+
if (alerts > 0) {
|
|
202
|
+
q += 0.1;
|
|
203
|
+
contributions.push({
|
|
204
|
+
signal: "alertsSent",
|
|
205
|
+
delta: 0.1,
|
|
206
|
+
reason: `${alerts} alert${alerts === 1 ? "" : "s"} sent`,
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const invoices = inputs.invoicesProcessed ?? 0;
|
|
211
|
+
if (invoices > 0) {
|
|
212
|
+
q += 0.2;
|
|
213
|
+
contributions.push({
|
|
214
|
+
signal: "invoicesProcessed",
|
|
215
|
+
delta: 0.2,
|
|
216
|
+
reason: `${invoices} invoice${invoices === 1 ? "" : "s"} processed`,
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const lessonCount = inputs.lessons?.length ?? 0;
|
|
221
|
+
if (lessonCount > 0) {
|
|
222
|
+
q += 0.05;
|
|
223
|
+
contributions.push({
|
|
224
|
+
signal: "lessons",
|
|
225
|
+
delta: 0.05,
|
|
226
|
+
reason: `${lessonCount} lesson${lessonCount === 1 ? "" : "s"} extracted`,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return { score: clamp01(q), contributions };
|
|
231
|
+
}
|