@agwab/pi-workflow 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -15
- package/agents/researcher.md +17 -7
- package/dist/artifact-graph-runtime.js +1 -0
- package/dist/compiler.d.ts +2 -0
- package/dist/compiler.js +29 -4
- package/dist/dynamic-generated-task-runtime.js +4 -3
- package/dist/dynamic-runtime-bundle.js +3 -2
- package/dist/engine.d.ts +2 -0
- package/dist/engine.js +3 -2
- package/dist/extension.js +240 -16
- package/dist/store.js +1 -0
- package/dist/subagent-backend.js +82 -27
- package/dist/tool-metadata.d.ts +1 -0
- package/dist/tool-metadata.js +13 -1
- package/dist/types.d.ts +3 -0
- package/dist/workflow-artifact-extension.js +3 -2
- package/dist/workflow-artifact-tool.js +84 -4
- package/dist/workflow-progress-health.d.ts +37 -0
- package/dist/workflow-progress-health.js +296 -0
- package/dist/workflow-runtime.d.ts +6 -0
- package/dist/workflow-runtime.js +33 -10
- package/dist/workflow-view.d.ts +2 -0
- package/dist/workflow-view.js +97 -18
- package/dist/workflow-web-source-extension.d.ts +43 -0
- package/dist/workflow-web-source-extension.js +1194 -0
- package/dist/workflow-web-source.d.ts +171 -0
- package/dist/workflow-web-source.js +915 -0
- package/docs/usage.md +32 -18
- package/node_modules/@agwab/pi-subagent/package.json +1 -1
- package/node_modules/@agwab/pi-subagent/src/api.ts +245 -132
- package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +243 -163
- package/node_modules/@agwab/pi-subagent/src/core/constants.ts +117 -90
- package/node_modules/@agwab/pi-subagent/src/core/validation.ts +728 -475
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +305 -209
- package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +750 -439
- package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +422 -268
- package/package.json +7 -7
- package/skills/workflow-guide/scaffolds/object-tool-fallback/schemas/fetch-control.schema.json +1 -1
- package/skills/workflow-guide/scaffolds/object-tool-fallback/spec.json +4 -3
- package/src/artifact-graph-runtime.ts +1 -0
- package/src/compiler.ts +43 -3
- package/src/dynamic-generated-task-runtime.ts +4 -2
- package/src/dynamic-runtime-bundle.ts +3 -2
- package/src/engine.ts +7 -16
- package/src/extension.ts +299 -22
- package/src/store.ts +1 -0
- package/src/subagent-backend.ts +121 -37
- package/src/tool-metadata.ts +22 -1
- package/src/types.ts +4 -0
- package/src/workflow-artifact-extension.ts +3 -2
- package/src/workflow-artifact-tool.ts +96 -4
- package/src/workflow-progress-health.ts +461 -0
- package/src/workflow-runtime.ts +50 -13
- package/src/workflow-view.ts +186 -41
- package/src/workflow-web-source-extension.ts +1411 -0
- package/src/workflow-web-source.ts +1294 -0
- package/workflows/README.md +1 -1
- package/workflows/deep-research/helpers/claim-evidence-gate.mjs +552 -44
- package/workflows/deep-research/helpers/final-audit-packet.mjs +396 -0
- package/workflows/deep-research/helpers/normalize-input-packet.mjs +545 -0
- package/workflows/deep-research/helpers/render-executive.mjs +1199 -192
- package/workflows/deep-research/helpers/sanitize-verification-candidates.mjs +624 -0
- package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +37 -8
- package/workflows/deep-research/schemas/deep-research-final-synthesis-control.schema.json +110 -0
- package/workflows/deep-research/schemas/deep-research-normalize-claims-control.schema.json +45 -4
- package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +0 -2
- package/workflows/deep-research/spec.json +71 -26
- package/workflows/deep-review/helpers/render-review-report.mjs +502 -0
- package/workflows/deep-review/schemas/deep-review-render-control.schema.json +50 -0
- package/workflows/deep-review/spec.json +22 -1
|
@@ -50,8 +50,11 @@ function collectUrls(value, urls = new Set()) {
|
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
function looksLikeLocalSourceRef(value) {
|
|
53
|
-
const text = String(value ?? "")
|
|
54
|
-
|
|
53
|
+
const text = String(value ?? "")
|
|
54
|
+
.trim()
|
|
55
|
+
.replace(/^(?:file|repo):/i, "")
|
|
56
|
+
.replace(/#L\d+(?:-L?\d+)?$/i, "");
|
|
57
|
+
return /^(?:\.?[\w.-]+\/)?[\w./-]+\.(?:md|json|ya?ml|ts|tsx|js|mjs|cjs|py|go|rs|zig|txt|sol|java|kt|swift|rb|php|c|cc|cpp|h|hpp)$/i.test(
|
|
55
58
|
text,
|
|
56
59
|
);
|
|
57
60
|
}
|
|
@@ -60,32 +63,180 @@ function collectEvidenceRefs(claim) {
|
|
|
60
63
|
const refs = new Set([...collectUrls(claim)]);
|
|
61
64
|
for (const row of Array.isArray(claim?.evidence) ? claim.evidence : []) {
|
|
62
65
|
if (!row || typeof row !== "object") continue;
|
|
63
|
-
for (const value of [
|
|
66
|
+
for (const value of [
|
|
67
|
+
row.url,
|
|
68
|
+
row.source,
|
|
69
|
+
row.file,
|
|
70
|
+
row.path,
|
|
71
|
+
row.sourceRef,
|
|
72
|
+
]) {
|
|
64
73
|
if (typeof value !== "string") continue;
|
|
65
|
-
if (
|
|
74
|
+
if (
|
|
75
|
+
/^https?:\/\//i.test(value) ||
|
|
76
|
+
isWorkflowSourceRef(value) ||
|
|
77
|
+
looksLikeLocalSourceRef(value)
|
|
78
|
+
)
|
|
66
79
|
refs.add(value.trim());
|
|
67
80
|
}
|
|
68
81
|
}
|
|
69
82
|
return refs;
|
|
70
83
|
}
|
|
71
84
|
|
|
85
|
+
function collectWorkflowSourceRefs(value, refs = new Set()) {
|
|
86
|
+
if (typeof value === "string") {
|
|
87
|
+
for (const match of value.matchAll(/\bwsrc_[a-f0-9]{32}\b/g))
|
|
88
|
+
refs.add(match[0]);
|
|
89
|
+
return refs;
|
|
90
|
+
}
|
|
91
|
+
if (Array.isArray(value)) {
|
|
92
|
+
for (const item of value) collectWorkflowSourceRefs(item, refs);
|
|
93
|
+
return refs;
|
|
94
|
+
}
|
|
95
|
+
if (value && typeof value === "object") {
|
|
96
|
+
for (const item of Object.values(value))
|
|
97
|
+
collectWorkflowSourceRefs(item, refs);
|
|
98
|
+
}
|
|
99
|
+
return refs;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function isWorkflowSourceRef(value) {
|
|
103
|
+
return /^wsrc_[a-f0-9]{32}$/.test(String(value ?? "").trim());
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function sourceUrlArray(value) {
|
|
107
|
+
if (!Array.isArray(value)) return [];
|
|
108
|
+
return value
|
|
109
|
+
.filter((item) => typeof item === "string" && item.trim())
|
|
110
|
+
.map((item) => item.trim());
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function stripCitationUrlPunctuation(value) {
|
|
114
|
+
return String(value ?? "")
|
|
115
|
+
.trim()
|
|
116
|
+
.replace(/[.,;:]+$/u, "");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function canonicalUrlKeys(value) {
|
|
120
|
+
const raw = stripCitationUrlPunctuation(value);
|
|
121
|
+
if (!/^https?:\/\//i.test(raw)) return [];
|
|
122
|
+
const keys = new Set([raw]);
|
|
123
|
+
try {
|
|
124
|
+
const url = new URL(raw);
|
|
125
|
+
url.protocol = url.protocol.toLowerCase();
|
|
126
|
+
url.hostname = url.hostname.toLowerCase();
|
|
127
|
+
url.hash = "";
|
|
128
|
+
const serialized = stripCitationUrlPunctuation(url.toString());
|
|
129
|
+
keys.add(serialized);
|
|
130
|
+
if (url.pathname !== "/" && url.pathname.endsWith("/")) {
|
|
131
|
+
url.pathname = url.pathname.replace(/\/+$/u, "");
|
|
132
|
+
keys.add(stripCitationUrlPunctuation(url.toString()));
|
|
133
|
+
}
|
|
134
|
+
} catch {
|
|
135
|
+
// Keep the trimmed raw URL key only; malformed strings should not throw from
|
|
136
|
+
// the evidence gate.
|
|
137
|
+
}
|
|
138
|
+
return [...keys].filter(Boolean);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function addUrlSourceRef(urlToSourceRef, url, sourceRef) {
|
|
142
|
+
if (!isWorkflowSourceRef(sourceRef)) return;
|
|
143
|
+
for (const key of canonicalUrlKeys(url)) {
|
|
144
|
+
if (!urlToSourceRef.has(key)) urlToSourceRef.set(key, sourceRef.trim());
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function buildUrlSourceRefLookup(normalizeInputPacket) {
|
|
149
|
+
const urlToSourceRef = new Map();
|
|
150
|
+
const sourceCards = asArray(normalizeInputPacket?.packet?.research?.sources);
|
|
151
|
+
for (const source of sourceCards) {
|
|
152
|
+
if (!source || typeof source !== "object") continue;
|
|
153
|
+
addUrlSourceRef(urlToSourceRef, source.url, source.sourceRef);
|
|
154
|
+
}
|
|
155
|
+
const sourceRefIndex = asArray(
|
|
156
|
+
normalizeInputPacket?.packet?.research?.sourceRefIndex,
|
|
157
|
+
);
|
|
158
|
+
for (const source of sourceRefIndex) {
|
|
159
|
+
if (!source || typeof source !== "object") continue;
|
|
160
|
+
addUrlSourceRef(urlToSourceRef, source.url, source.sourceRef);
|
|
161
|
+
}
|
|
162
|
+
return urlToSourceRef;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function sourceRefsForUrls(urls, urlToSourceRef) {
|
|
166
|
+
const refs = [];
|
|
167
|
+
const seen = new Set();
|
|
168
|
+
for (const url of urls) {
|
|
169
|
+
for (const key of canonicalUrlKeys(url)) {
|
|
170
|
+
const sourceRef = urlToSourceRef.get(key);
|
|
171
|
+
if (!sourceRef || seen.has(sourceRef)) continue;
|
|
172
|
+
seen.add(sourceRef);
|
|
173
|
+
refs.push(sourceRef);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return refs;
|
|
177
|
+
}
|
|
178
|
+
|
|
72
179
|
// Structured evidence check: at least one evidence row carrying both a source
|
|
73
180
|
// reference (HTTP URL or local repository file path) and a quote/excerpt. Unlike
|
|
74
181
|
// a keyword scan over the serialized claim, this cannot be satisfied by merely
|
|
75
182
|
// mentioning a URL/path in prose.
|
|
76
183
|
function hasFetchedEvidence(claim) {
|
|
184
|
+
return (
|
|
185
|
+
Array.isArray(claim?.evidence) && claim.evidence.some(hasStrongEvidenceRow)
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function hasStrongEvidenceRow(row) {
|
|
190
|
+
if (!row || typeof row !== "object") return false;
|
|
191
|
+
const refs = [row.url, row.source, row.file, row.path, row.sourceRef].filter(
|
|
192
|
+
(value) => typeof value === "string",
|
|
193
|
+
);
|
|
194
|
+
const hasExternalRef = refs.some(
|
|
195
|
+
(value) => /^https?:\/\//i.test(value) || isWorkflowSourceRef(value),
|
|
196
|
+
);
|
|
197
|
+
const hasLocalRef = refs.some((value) => looksLikeLocalSourceRef(value));
|
|
198
|
+
const hasLocatedLocalRef =
|
|
199
|
+
hasLocalRef &&
|
|
200
|
+
(refs.some(hasLineFragment) || hasLocalEvidenceLocation(row));
|
|
201
|
+
const sourceRef = hasExternalRef || hasLocatedLocalRef;
|
|
202
|
+
const quote = typeof row.quote === "string" && row.quote.trim().length > 0;
|
|
203
|
+
if (!sourceRef || !quote) return false;
|
|
204
|
+
if (isCandidateEvidenceRow(row)) return false;
|
|
205
|
+
return true;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function hasLineFragment(value) {
|
|
209
|
+
return /#L\d+(?:-L?\d+)?$/i.test(String(value ?? "").trim());
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function hasLocalEvidenceLocation(row) {
|
|
213
|
+
return [
|
|
214
|
+
row.line,
|
|
215
|
+
row.lineStart,
|
|
216
|
+
row.lineEnd,
|
|
217
|
+
row.lines,
|
|
218
|
+
row.excerptLocation,
|
|
219
|
+
].some(
|
|
220
|
+
(value) =>
|
|
221
|
+
typeof value === "number" ||
|
|
222
|
+
(typeof value === "string" && value.trim().length > 0),
|
|
223
|
+
);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function isCandidateEvidenceRow(row) {
|
|
227
|
+
return (
|
|
228
|
+
row?.candidateOnly === true ||
|
|
229
|
+
row?.matchType === "terms" ||
|
|
230
|
+
row?.sourceRead?.matchType === "terms"
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function strongEvidenceIssue(claim) {
|
|
77
235
|
const rows = Array.isArray(claim?.evidence) ? claim.evidence : [];
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
);
|
|
83
|
-
const sourceRef = refs.some(
|
|
84
|
-
(value) => /^https?:\/\//i.test(value) || looksLikeLocalSourceRef(value),
|
|
85
|
-
);
|
|
86
|
-
const quote = typeof row.quote === "string" && row.quote.trim().length > 0;
|
|
87
|
-
return sourceRef && quote;
|
|
88
|
-
});
|
|
236
|
+
if (rows.length === 0) return "missing_structured_evidence_rows";
|
|
237
|
+
if (rows.some(isCandidateEvidenceRow))
|
|
238
|
+
return "candidate_only_evidence_not_strong";
|
|
239
|
+
return "evidence_rows_missing_source_or_quote";
|
|
89
240
|
}
|
|
90
241
|
|
|
91
242
|
function hasExactQuantitativeClaim(value) {
|
|
@@ -105,9 +256,9 @@ function verdictOf(claim) {
|
|
|
105
256
|
);
|
|
106
257
|
}
|
|
107
258
|
|
|
108
|
-
function withVerdict(claim, verdict, reason) {
|
|
259
|
+
function withVerdict(claim, verdict, reason, details = {}) {
|
|
109
260
|
const previous = verdictOf(claim);
|
|
110
|
-
const gate = { previous, verdict, reason };
|
|
261
|
+
const gate = { previous, verdict, reason, ...details };
|
|
111
262
|
return {
|
|
112
263
|
...claim,
|
|
113
264
|
status: verdict,
|
|
@@ -122,6 +273,132 @@ function withVerdict(claim, verdict, reason) {
|
|
|
122
273
|
};
|
|
123
274
|
}
|
|
124
275
|
|
|
276
|
+
function claimIdOf(claim) {
|
|
277
|
+
if (!claim || typeof claim !== "object")
|
|
278
|
+
return { id: null, reason: "not_an_object" };
|
|
279
|
+
let invalid = null;
|
|
280
|
+
for (const field of ["id", "claimId"]) {
|
|
281
|
+
if (!(field in claim)) continue;
|
|
282
|
+
if (typeof claim[field] !== "string") {
|
|
283
|
+
invalid ??= { id: null, reason: "non_string_claim_id", field };
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
const id = claim[field].trim();
|
|
287
|
+
if (!id) {
|
|
288
|
+
invalid ??= { id: null, reason: "blank_claim_id", field };
|
|
289
|
+
continue;
|
|
290
|
+
}
|
|
291
|
+
return { id, field };
|
|
292
|
+
}
|
|
293
|
+
return invalid ?? { id: null, reason: "missing_claim_id" };
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
function compactStrings(values) {
|
|
297
|
+
const out = [];
|
|
298
|
+
const seen = new Set();
|
|
299
|
+
for (const value of values) {
|
|
300
|
+
if (typeof value !== "string") continue;
|
|
301
|
+
const text = value.trim();
|
|
302
|
+
if (!text || seen.has(text)) continue;
|
|
303
|
+
seen.add(text);
|
|
304
|
+
out.push(text);
|
|
305
|
+
}
|
|
306
|
+
return out;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function canonicalVerifierStatus(status) {
|
|
310
|
+
return status === "partiallySupported" ? "partially_supported" : status;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
function conservativeVerifierStatus(statuses) {
|
|
314
|
+
const normalized = statuses.map(canonicalVerifierStatus);
|
|
315
|
+
for (const status of [
|
|
316
|
+
"conflicting",
|
|
317
|
+
"unsupported",
|
|
318
|
+
"partially_supported",
|
|
319
|
+
"unverified",
|
|
320
|
+
]) {
|
|
321
|
+
if (normalized.includes(status)) return status;
|
|
322
|
+
}
|
|
323
|
+
if (normalized.every((status) => status === "verified")) return "verified";
|
|
324
|
+
return (
|
|
325
|
+
normalized.find((status) => typeof status === "string" && status) ??
|
|
326
|
+
"unverified"
|
|
327
|
+
);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
function issueForVerifierRow({ sourceId, claim, reason, claimId, index }) {
|
|
331
|
+
return {
|
|
332
|
+
sourceId,
|
|
333
|
+
...(Number.isInteger(index) ? { index } : {}),
|
|
334
|
+
...(claimId ? { claimId } : {}),
|
|
335
|
+
reason,
|
|
336
|
+
status: verdictOf(claim),
|
|
337
|
+
nextStep:
|
|
338
|
+
reason === "unknown_claim_id"
|
|
339
|
+
? "Verify-claims output did not match any normalized verification candidate; quarantine it from claim counts."
|
|
340
|
+
: "Verifier output is missing a usable string id/claimId; rerun or repair the verifier row before counting it.",
|
|
341
|
+
};
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
function gapForVerifierIssue(issue) {
|
|
345
|
+
return {
|
|
346
|
+
...(issue.claimId ? { claimId: issue.claimId } : {}),
|
|
347
|
+
evidenceState: issue.reason,
|
|
348
|
+
reason: issue.reason,
|
|
349
|
+
nextStep: issue.nextStep,
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
function mergeVerifierRows(rows) {
|
|
354
|
+
const first = rows[0];
|
|
355
|
+
if (rows.length === 1)
|
|
356
|
+
return { sourceId: first.sourceId, claim: first.claim, duplicate: null };
|
|
357
|
+
const sourceIds = rows.map((row) => row.sourceId);
|
|
358
|
+
const statusInputs = rows.map((row) => verdictOf(row.claim));
|
|
359
|
+
const selectedStatus = conservativeVerifierStatus(statusInputs);
|
|
360
|
+
const selectedRow =
|
|
361
|
+
rows.find(
|
|
362
|
+
(row) => canonicalVerifierStatus(verdictOf(row.claim)) === selectedStatus,
|
|
363
|
+
) ?? first;
|
|
364
|
+
const merged = { ...selectedRow.claim };
|
|
365
|
+
const evidence = rows.flatMap((row) =>
|
|
366
|
+
Array.isArray(row.claim?.evidence) ? row.claim.evidence : [],
|
|
367
|
+
);
|
|
368
|
+
if (evidence.length > 0) merged.evidence = evidence;
|
|
369
|
+
for (const field of ["sourceRefs", "sourceUrls", "factSlotIds"]) {
|
|
370
|
+
const values = compactStrings(
|
|
371
|
+
rows.flatMap((row) => row.claim?.[field] ?? []),
|
|
372
|
+
);
|
|
373
|
+
if (values.length > 0) merged[field] = values;
|
|
374
|
+
}
|
|
375
|
+
merged.status = selectedStatus;
|
|
376
|
+
merged.verdict = selectedStatus;
|
|
377
|
+
merged.verdictDigest = {
|
|
378
|
+
...(merged.verdictDigest ?? {}),
|
|
379
|
+
status: selectedStatus,
|
|
380
|
+
verdict: selectedStatus,
|
|
381
|
+
duplicateVerifierRows: {
|
|
382
|
+
rowCount: rows.length,
|
|
383
|
+
sourceIds,
|
|
384
|
+
statusInputs,
|
|
385
|
+
selectedStatus,
|
|
386
|
+
},
|
|
387
|
+
};
|
|
388
|
+
return {
|
|
389
|
+
sourceId: selectedRow.sourceId,
|
|
390
|
+
claim: merged,
|
|
391
|
+
duplicate: {
|
|
392
|
+
claimId: first.claimId,
|
|
393
|
+
rowCount: rows.length,
|
|
394
|
+
sourceIds,
|
|
395
|
+
statusInputs,
|
|
396
|
+
selectedStatus,
|
|
397
|
+
action: "merged_evidence_and_selected_conservative_status",
|
|
398
|
+
},
|
|
399
|
+
};
|
|
400
|
+
}
|
|
401
|
+
|
|
125
402
|
const STATUS_BUCKETS = {
|
|
126
403
|
verified: "verified",
|
|
127
404
|
partially_supported: "partiallySupported",
|
|
@@ -138,18 +415,42 @@ function findSource(sources, stageId) {
|
|
|
138
415
|
|
|
139
416
|
export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
140
417
|
const plan = findSource(sources, "plan");
|
|
141
|
-
const
|
|
418
|
+
const normalizeClaims = findSource(sources, "normalize-claims");
|
|
419
|
+
const sanitizedCandidates =
|
|
420
|
+
findSource(sources, "sanitize-claims") ??
|
|
421
|
+
findSource(sources, "sanitize-verification-candidates");
|
|
422
|
+
const normalized = sanitizedCandidates ?? normalizeClaims;
|
|
423
|
+
const normalizeInputPacket = findSource(sources, "normalize-input-packet");
|
|
424
|
+
const urlToSourceRef = buildUrlSourceRefLookup(normalizeInputPacket);
|
|
425
|
+
const candidateRecords = [];
|
|
142
426
|
const candidatesById = new Map();
|
|
143
|
-
|
|
427
|
+
const invalidNormalizedCandidates = [];
|
|
428
|
+
for (const [index, candidate] of asArray(
|
|
144
429
|
normalized?.claimInventory?.verificationCandidates,
|
|
145
|
-
)) {
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
430
|
+
).entries()) {
|
|
431
|
+
const idCheck = claimIdOf(candidate);
|
|
432
|
+
if (!idCheck.id) {
|
|
433
|
+
invalidNormalizedCandidates.push({
|
|
434
|
+
index,
|
|
435
|
+
reason: idCheck.reason,
|
|
436
|
+
nextStep:
|
|
437
|
+
"normalize-claims emitted a verification candidate without a usable string id; it cannot be deterministically joined.",
|
|
438
|
+
});
|
|
439
|
+
continue;
|
|
440
|
+
}
|
|
441
|
+
if (candidatesById.has(idCheck.id)) {
|
|
442
|
+
invalidNormalizedCandidates.push({
|
|
443
|
+
index,
|
|
444
|
+
claimId: idCheck.id,
|
|
445
|
+
reason: "duplicate_normalized_candidate_id",
|
|
446
|
+
nextStep:
|
|
447
|
+
"normalize-claims emitted duplicate candidate ids; only the first candidate is canonical for verifier joins.",
|
|
448
|
+
});
|
|
449
|
+
continue;
|
|
152
450
|
}
|
|
451
|
+
const normalizedCandidate = { ...candidate, id: idCheck.id };
|
|
452
|
+
candidateRecords.push(normalizedCandidate);
|
|
453
|
+
candidatesById.set(idCheck.id, normalizedCandidate);
|
|
153
454
|
}
|
|
154
455
|
|
|
155
456
|
const claims = Object.entries(sources ?? {})
|
|
@@ -158,11 +459,12 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
158
459
|
specId === "verify-claims" || specId.startsWith("verify-claims."),
|
|
159
460
|
)
|
|
160
461
|
.flatMap(([sourceId, source]) =>
|
|
161
|
-
asArray(source).map((claim) => ({ sourceId, claim })),
|
|
462
|
+
asArray(source).map((claim, index) => ({ sourceId, claim, index })),
|
|
162
463
|
);
|
|
163
464
|
// Legacy layout: when no verify-claims.* source ids exist (for example a
|
|
164
465
|
// single from: string dependency), fall back to every non-plan/non-normalize
|
|
165
|
-
// source.
|
|
466
|
+
// source. Exclude sanitizer sources because they are canonicalizer inputs, not
|
|
467
|
+
// verifier verdict rows.
|
|
166
468
|
const verifierClaims =
|
|
167
469
|
claims.length > 0
|
|
168
470
|
? claims
|
|
@@ -170,40 +472,114 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
170
472
|
.filter(
|
|
171
473
|
([specId]) =>
|
|
172
474
|
!specId.startsWith("plan") &&
|
|
173
|
-
!specId.startsWith("normalize-claims")
|
|
475
|
+
!specId.startsWith("normalize-claims") &&
|
|
476
|
+
!specId.startsWith("normalize-input-packet") &&
|
|
477
|
+
!specId.startsWith("sanitize-claims") &&
|
|
478
|
+
!specId.startsWith("sanitize-verification-candidates"),
|
|
174
479
|
)
|
|
175
480
|
.flatMap(([sourceId, source]) =>
|
|
176
|
-
asArray(source).map((claim) => ({ sourceId, claim })),
|
|
481
|
+
asArray(source).map((claim, index) => ({ sourceId, claim, index })),
|
|
177
482
|
);
|
|
178
483
|
|
|
179
484
|
const auditedClaims = [];
|
|
180
485
|
const remainingGaps = [];
|
|
181
486
|
const identityJoinNotes = [];
|
|
487
|
+
const sourceRefJoinFailures = [];
|
|
488
|
+
const invalidVerifierRows = [];
|
|
489
|
+
const duplicateVerifierRows = [];
|
|
182
490
|
const gateSummary = {
|
|
183
491
|
total: 0,
|
|
184
492
|
unchanged: 0,
|
|
185
493
|
downgraded: 0,
|
|
186
494
|
identityRejoined: 0,
|
|
495
|
+
sourceRefsRejoined: 0,
|
|
496
|
+
sourceRefsBackfilledFromUrls: 0,
|
|
497
|
+
sourceRefJoinFailures: 0,
|
|
498
|
+
verifierRowsTotal: verifierClaims.length,
|
|
499
|
+
validVerifierRows: 0,
|
|
500
|
+
invalidVerifierRows: 0,
|
|
501
|
+
missingVerifierResults: 0,
|
|
502
|
+
duplicateVerifierClaims: 0,
|
|
503
|
+
duplicateVerifierRows: 0,
|
|
504
|
+
duplicateStatusConflicts: 0,
|
|
505
|
+
invalidNormalizedCandidates: invalidNormalizedCandidates.length,
|
|
187
506
|
};
|
|
507
|
+
const verifierRowsById = new Map();
|
|
508
|
+
const legacyVerifierRows = [];
|
|
509
|
+
for (const { sourceId, claim, index } of verifierClaims) {
|
|
510
|
+
const idCheck = claimIdOf(claim);
|
|
511
|
+
if (!idCheck.id) {
|
|
512
|
+
const issue = issueForVerifierRow({
|
|
513
|
+
sourceId,
|
|
514
|
+
claim,
|
|
515
|
+
index,
|
|
516
|
+
reason: idCheck.reason,
|
|
517
|
+
});
|
|
518
|
+
invalidVerifierRows.push(issue);
|
|
519
|
+
remainingGaps.push(gapForVerifierIssue(issue));
|
|
520
|
+
gateSummary.invalidVerifierRows += 1;
|
|
521
|
+
continue;
|
|
522
|
+
}
|
|
523
|
+
if (candidateRecords.length > 0 && !candidatesById.has(idCheck.id)) {
|
|
524
|
+
const issue = issueForVerifierRow({
|
|
525
|
+
sourceId,
|
|
526
|
+
claim,
|
|
527
|
+
index,
|
|
528
|
+
claimId: idCheck.id,
|
|
529
|
+
reason: "unknown_claim_id",
|
|
530
|
+
});
|
|
531
|
+
invalidVerifierRows.push(issue);
|
|
532
|
+
remainingGaps.push(gapForVerifierIssue(issue));
|
|
533
|
+
gateSummary.invalidVerifierRows += 1;
|
|
534
|
+
continue;
|
|
535
|
+
}
|
|
536
|
+
const row = {
|
|
537
|
+
sourceId,
|
|
538
|
+
claimId: idCheck.id,
|
|
539
|
+
claim: { ...claim, [idCheck.field ?? "id"]: idCheck.id },
|
|
540
|
+
};
|
|
541
|
+
gateSummary.validVerifierRows += 1;
|
|
542
|
+
if (candidateRecords.length > 0) {
|
|
543
|
+
const rows = verifierRowsById.get(idCheck.id) ?? [];
|
|
544
|
+
rows.push(row);
|
|
545
|
+
verifierRowsById.set(idCheck.id, rows);
|
|
546
|
+
} else {
|
|
547
|
+
legacyVerifierRows.push(row);
|
|
548
|
+
}
|
|
549
|
+
}
|
|
188
550
|
|
|
189
|
-
|
|
190
|
-
|
|
551
|
+
function auditClaim({
|
|
552
|
+
sourceId,
|
|
553
|
+
claim,
|
|
554
|
+
candidate,
|
|
555
|
+
claimId,
|
|
556
|
+
missingVerifierResult = false,
|
|
557
|
+
}) {
|
|
558
|
+
if (!claim || typeof claim !== "object") return;
|
|
191
559
|
gateSummary.total += 1;
|
|
192
|
-
const
|
|
560
|
+
const evidenceRefs = [...collectEvidenceRefs(claim)];
|
|
561
|
+
const workflowSourceRefs = new Set([...collectWorkflowSourceRefs(claim)]);
|
|
193
562
|
const exactQuantitative = hasExactQuantitativeClaim(claim);
|
|
194
563
|
const fetched = hasFetchedEvidence(claim);
|
|
195
|
-
let next = {
|
|
564
|
+
let next = {
|
|
565
|
+
...claim,
|
|
566
|
+
...(claimId ? { id: claimId } : {}),
|
|
567
|
+
...(sourceId ? { sourceId } : {}),
|
|
568
|
+
sourceUrls: evidenceRefs,
|
|
569
|
+
evidenceRefs,
|
|
570
|
+
};
|
|
571
|
+
if (missingVerifierResult) {
|
|
572
|
+
next = withVerdict(
|
|
573
|
+
next,
|
|
574
|
+
"unverified",
|
|
575
|
+
"normalized verification candidate had no verifier result",
|
|
576
|
+
);
|
|
577
|
+
}
|
|
196
578
|
|
|
197
579
|
// Identity join: the normalizer's candidate record is authoritative for
|
|
198
580
|
// claim id, claim text, and factSlotIds. Verifier echoes drift.
|
|
199
|
-
const claimId =
|
|
200
|
-
typeof next.id === "string"
|
|
201
|
-
? next.id
|
|
202
|
-
: typeof next.claimId === "string"
|
|
203
|
-
? next.claimId
|
|
204
|
-
: null;
|
|
205
|
-
const candidate = claimId ? candidatesById.get(claimId) : null;
|
|
206
581
|
if (candidate) {
|
|
582
|
+
if (claimId) next.id = claimId;
|
|
207
583
|
if (
|
|
208
584
|
typeof candidate.claim === "string" &&
|
|
209
585
|
candidate.claim &&
|
|
@@ -218,30 +594,84 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
218
594
|
}
|
|
219
595
|
if (Array.isArray(candidate.factSlotIds))
|
|
220
596
|
next.factSlotIds = [...candidate.factSlotIds];
|
|
597
|
+
const beforeSourceRefCount = workflowSourceRefs.size;
|
|
598
|
+
for (const sourceRef of collectWorkflowSourceRefs(candidate))
|
|
599
|
+
workflowSourceRefs.add(sourceRef);
|
|
600
|
+
if (workflowSourceRefs.size > beforeSourceRefCount)
|
|
601
|
+
gateSummary.sourceRefsRejoined += 1;
|
|
602
|
+
}
|
|
603
|
+
const beforeUrlBackfillSourceRefCount = workflowSourceRefs.size;
|
|
604
|
+
for (const sourceRef of sourceRefsForUrls(
|
|
605
|
+
[
|
|
606
|
+
...sourceUrlArray(candidate?.sourceUrls),
|
|
607
|
+
...evidenceRefs.filter((ref) => /^https?:\/\//i.test(ref)),
|
|
608
|
+
],
|
|
609
|
+
urlToSourceRef,
|
|
610
|
+
))
|
|
611
|
+
workflowSourceRefs.add(sourceRef);
|
|
612
|
+
if (workflowSourceRefs.size > beforeUrlBackfillSourceRefCount) {
|
|
613
|
+
gateSummary.sourceRefsRejoined += 1;
|
|
614
|
+
gateSummary.sourceRefsBackfilledFromUrls +=
|
|
615
|
+
workflowSourceRefs.size - beforeUrlBackfillSourceRefCount;
|
|
616
|
+
}
|
|
617
|
+
if (workflowSourceRefs.size > 0) next.sourceRefs = [...workflowSourceRefs];
|
|
618
|
+
const httpSourceUrls = [
|
|
619
|
+
...new Set([
|
|
620
|
+
...sourceUrlArray(candidate?.sourceUrls).filter((ref) =>
|
|
621
|
+
/^https?:\/\//i.test(ref),
|
|
622
|
+
),
|
|
623
|
+
...evidenceRefs.filter((ref) => /^https?:\/\//i.test(ref)),
|
|
624
|
+
]),
|
|
625
|
+
];
|
|
626
|
+
if (
|
|
627
|
+
claimId &&
|
|
628
|
+
candidate &&
|
|
629
|
+
workflowSourceRefs.size === 0 &&
|
|
630
|
+
httpSourceUrls.length > 0
|
|
631
|
+
) {
|
|
632
|
+
const failure = {
|
|
633
|
+
claimId,
|
|
634
|
+
evidenceState: "source_ref_not_available",
|
|
635
|
+
sourceUrls: httpSourceUrls,
|
|
636
|
+
nextStep:
|
|
637
|
+
"Preserve sourceRefs from workflow_web_fetch_source through research and normalization when available.",
|
|
638
|
+
};
|
|
639
|
+
sourceRefJoinFailures.push(failure);
|
|
640
|
+
gateSummary.sourceRefJoinFailures += 1;
|
|
221
641
|
}
|
|
222
642
|
|
|
223
643
|
const verdict = verdictOf(next);
|
|
644
|
+
const exactQuantitativeForGate =
|
|
645
|
+
exactQuantitative || hasExactQuantitativeClaim(next);
|
|
224
646
|
if (
|
|
225
647
|
verdict === "verified" &&
|
|
226
648
|
options.requireFetchedEvidenceForVerified !== false &&
|
|
227
649
|
!fetched
|
|
228
650
|
) {
|
|
651
|
+
const reasonCode =
|
|
652
|
+
options.downgradeExactQuantitativeWithoutSource !== false &&
|
|
653
|
+
exactQuantitativeForGate &&
|
|
654
|
+
evidenceRefs.length === 0
|
|
655
|
+
? "exact_quantitative_without_source_reference"
|
|
656
|
+
: strongEvidenceIssue(next);
|
|
229
657
|
next = withVerdict(
|
|
230
658
|
next,
|
|
231
659
|
"partially_supported",
|
|
232
660
|
"verified claim lacked structured evidence rows with both source reference and quote",
|
|
661
|
+
{ reasonCode },
|
|
233
662
|
);
|
|
234
663
|
}
|
|
235
664
|
if (
|
|
236
665
|
verdictOf(next) === "verified" &&
|
|
237
666
|
options.downgradeExactQuantitativeWithoutSource !== false &&
|
|
238
667
|
exactQuantitative &&
|
|
239
|
-
|
|
668
|
+
evidenceRefs.length === 0
|
|
240
669
|
) {
|
|
241
670
|
next = withVerdict(
|
|
242
671
|
next,
|
|
243
672
|
"partially_supported",
|
|
244
673
|
"exact quantitative claim lacked structured source reference evidence",
|
|
674
|
+
{ reasonCode: "exact_quantitative_without_source_reference" },
|
|
245
675
|
);
|
|
246
676
|
}
|
|
247
677
|
|
|
@@ -249,8 +679,10 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
249
679
|
gateSummary.downgraded += 1;
|
|
250
680
|
remainingGaps.push({
|
|
251
681
|
claimId: next.id ?? next.claimId,
|
|
252
|
-
evidenceState:
|
|
253
|
-
|
|
682
|
+
evidenceState:
|
|
683
|
+
next.evidenceGate?.reasonCode ?? "insufficient_for_verified",
|
|
684
|
+
reason: next.evidenceGate?.reason,
|
|
685
|
+
sourceUrls: evidenceRefs,
|
|
254
686
|
nextStep:
|
|
255
687
|
"Fetch or inspect primary source evidence for the exact claim before using it as verified.",
|
|
256
688
|
});
|
|
@@ -260,6 +692,74 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
260
692
|
auditedClaims.push(next);
|
|
261
693
|
}
|
|
262
694
|
|
|
695
|
+
if (candidateRecords.length > 0) {
|
|
696
|
+
for (const candidate of candidateRecords) {
|
|
697
|
+
const rows = verifierRowsById.get(candidate.id) ?? [];
|
|
698
|
+
if (rows.length === 0) {
|
|
699
|
+
gateSummary.missingVerifierResults += 1;
|
|
700
|
+
remainingGaps.push({
|
|
701
|
+
claimId: candidate.id,
|
|
702
|
+
evidenceState: "missing_verifier_result",
|
|
703
|
+
reason: "normalized verification candidate had no verifier result",
|
|
704
|
+
sourceUrls: sourceUrlArray(candidate.sourceUrls),
|
|
705
|
+
relatedFactSlotIds: Array.isArray(candidate.factSlotIds)
|
|
706
|
+
? [...candidate.factSlotIds]
|
|
707
|
+
: [],
|
|
708
|
+
nextStep:
|
|
709
|
+
"Run or repair the verifier for this normalized candidate before treating the claim as supported.",
|
|
710
|
+
});
|
|
711
|
+
auditClaim({
|
|
712
|
+
sourceId: null,
|
|
713
|
+
claim: candidate,
|
|
714
|
+
candidate,
|
|
715
|
+
claimId: candidate.id,
|
|
716
|
+
missingVerifierResult: true,
|
|
717
|
+
});
|
|
718
|
+
continue;
|
|
719
|
+
}
|
|
720
|
+
const merged = mergeVerifierRows(rows);
|
|
721
|
+
if (merged.duplicate) {
|
|
722
|
+
const statuses = merged.duplicate.statusInputs.map((status) =>
|
|
723
|
+
status === "partiallySupported" ? "partially_supported" : status,
|
|
724
|
+
);
|
|
725
|
+
const hasStatusConflict = new Set(statuses).size > 1;
|
|
726
|
+
const duplicate = {
|
|
727
|
+
...merged.duplicate,
|
|
728
|
+
statusConflict: hasStatusConflict,
|
|
729
|
+
};
|
|
730
|
+
duplicateVerifierRows.push(duplicate);
|
|
731
|
+
gateSummary.duplicateVerifierClaims += 1;
|
|
732
|
+
gateSummary.duplicateVerifierRows += rows.length - 1;
|
|
733
|
+
if (hasStatusConflict) {
|
|
734
|
+
gateSummary.duplicateStatusConflicts += 1;
|
|
735
|
+
remainingGaps.push({
|
|
736
|
+
claimId: candidate.id,
|
|
737
|
+
evidenceState: "duplicate_verifier_rows_conflicting",
|
|
738
|
+
reason:
|
|
739
|
+
"multiple verifier rows for the same normalized candidate disagreed; the gate selected a conservative status",
|
|
740
|
+
nextStep:
|
|
741
|
+
"Inspect duplicate verify-claims outputs before using this claim as a hard decision threshold.",
|
|
742
|
+
});
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
auditClaim({
|
|
746
|
+
sourceId: merged.sourceId,
|
|
747
|
+
claim: merged.claim,
|
|
748
|
+
candidate,
|
|
749
|
+
claimId: candidate.id,
|
|
750
|
+
});
|
|
751
|
+
}
|
|
752
|
+
} else {
|
|
753
|
+
for (const row of legacyVerifierRows) {
|
|
754
|
+
auditClaim({
|
|
755
|
+
sourceId: row.sourceId,
|
|
756
|
+
claim: row.claim,
|
|
757
|
+
candidate: null,
|
|
758
|
+
claimId: row.claimId,
|
|
759
|
+
});
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
|
|
263
763
|
// Deterministic status partition + counts for the synthesis stage.
|
|
264
764
|
const statusPartitions = {
|
|
265
765
|
verified: [],
|
|
@@ -314,6 +814,8 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
314
814
|
factSlotIds: claim.factSlotIds,
|
|
315
815
|
status: verdictOf(claim),
|
|
316
816
|
confidence: claim.confidence,
|
|
817
|
+
sourceRefs: claim.sourceRefs,
|
|
818
|
+
sourceUrls: claim.sourceUrls,
|
|
317
819
|
verdictDigest: claim.verdictDigest,
|
|
318
820
|
correctionOrCounterclaim: claim.correctionOrCounterclaim,
|
|
319
821
|
}));
|
|
@@ -323,6 +825,10 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
323
825
|
claimDigests,
|
|
324
826
|
gateSummary,
|
|
325
827
|
remainingGaps,
|
|
828
|
+
sourceRefJoinFailures,
|
|
829
|
+
invalidVerifierRows,
|
|
830
|
+
duplicateVerifierRows,
|
|
831
|
+
invalidNormalizedCandidates,
|
|
326
832
|
statusPartitions,
|
|
327
833
|
verdictCounts,
|
|
328
834
|
slotCoverageCheck: {
|
|
@@ -331,5 +837,7 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
331
837
|
droppedSlotIds,
|
|
332
838
|
},
|
|
333
839
|
identityJoinNotes,
|
|
840
|
+
precisionGuardDiagnostics:
|
|
841
|
+
normalizeInputPacket?.packet?.precisionGuard?.summary,
|
|
334
842
|
};
|
|
335
843
|
}
|