@agwab/pi-workflow 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -3
- package/agents/researcher.md +17 -7
- package/dist/artifact-graph-runtime.js +1 -0
- package/dist/compiler.js +2 -2
- package/dist/dynamic-generated-task-runtime.js +4 -3
- package/dist/dynamic-runtime-bundle.js +3 -2
- package/dist/extension.js +40 -1
- package/dist/subagent-backend.js +82 -27
- package/dist/tool-metadata.d.ts +1 -0
- package/dist/tool-metadata.js +13 -1
- package/dist/workflow-artifact-extension.js +3 -2
- package/dist/workflow-artifact-tool.js +84 -4
- package/dist/workflow-web-source-extension.d.ts +43 -0
- package/dist/workflow-web-source-extension.js +1194 -0
- package/dist/workflow-web-source.d.ts +171 -0
- package/dist/workflow-web-source.js +897 -0
- package/docs/usage.md +32 -45
- package/node_modules/@agwab/pi-subagent/package.json +1 -1
- package/node_modules/@agwab/pi-subagent/src/api.ts +245 -132
- package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +243 -163
- package/node_modules/@agwab/pi-subagent/src/core/constants.ts +117 -90
- package/node_modules/@agwab/pi-subagent/src/core/validation.ts +728 -475
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +305 -209
- package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +750 -439
- package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +422 -268
- package/package.json +3 -4
- package/skills/workflow-guide/scaffolds/object-tool-fallback/schemas/fetch-control.schema.json +1 -1
- package/skills/workflow-guide/scaffolds/object-tool-fallback/spec.json +4 -3
- package/src/artifact-graph-runtime.ts +1 -0
- package/src/compiler.ts +2 -1
- package/src/dynamic-generated-task-runtime.ts +4 -2
- package/src/dynamic-runtime-bundle.ts +3 -2
- package/src/extension.ts +46 -1
- package/src/subagent-backend.ts +121 -37
- package/src/tool-metadata.ts +22 -1
- package/src/workflow-artifact-extension.ts +3 -2
- package/src/workflow-artifact-tool.ts +96 -4
- package/src/workflow-web-source-extension.ts +1411 -0
- package/src/workflow-web-source.ts +1171 -0
- package/workflows/README.md +1 -1
- package/workflows/deep-research/helpers/claim-evidence-gate.mjs +474 -40
- package/workflows/deep-research/helpers/final-audit-packet.mjs +219 -0
- package/workflows/deep-research/helpers/normalize-input-packet.mjs +436 -0
- package/workflows/deep-research/helpers/render-executive.mjs +571 -198
- package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +35 -8
- package/workflows/deep-research/schemas/deep-research-normalize-claims-control.schema.json +45 -4
- package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +0 -2
- package/workflows/deep-research/spec.json +36 -21
- package/workflows/deep-review/helpers/render-review-report.mjs +502 -0
- package/workflows/deep-review/schemas/deep-review-render-control.schema.json +50 -0
- package/workflows/deep-review/spec.json +22 -1
- package/docs/release.md +0 -89
- package/node_modules/@pondwader/socks5-server/.DS_Store +0 -0
- package/node_modules/commander/.DS_Store +0 -0
- package/node_modules/jiti/.DS_Store +0 -0
- package/node_modules/node-forge/.DS_Store +0 -0
- package/node_modules/shell-quote/.DS_Store +0 -0
- package/node_modules/zod/.DS_Store +0 -0
package/workflows/README.md
CHANGED
|
@@ -71,4 +71,4 @@ export default async function helper({ sources, options, context }) {
|
|
|
71
71
|
|
|
72
72
|
Helper refs are intentionally directory-local only. Allowed refs start with `./` and point to `.mjs` files inside the workflow bundle directory. Parent-directory refs, absolute paths, home-relative paths, protocol refs (`file://`, `https://`), and `npm:` refs are rejected. This is containment and reproducibility, not a sandbox: helper code still runs inside the workflow process and is not constrained by subagent tool allowlists.
|
|
73
73
|
|
|
74
|
-
Workflow runs
|
|
74
|
+
Workflow runs now prefer normalized web-source tools: `workflow_web_search`, `workflow_web_fetch_source`, and `workflow_web_source_read`. Normalized web sources are stored inside `.pi/workflows/<run-id>/web-source-cache/`, model-visible tool results expose only compact cards/source refs/snippets, and agents should use `workflow_web_source_read` instead of reading cache files directly. Batch several source fetches with `urls: [...]` or `sources: [...]`, and batch several snippets from one sourceRef with `queries: [...]` or `reads: [...]` to reduce repeated tool turns; use `claim` plus distinctive `terms` for candidate quote windows with match metadata when the exact quote is unknown. Deep-research also uses support helpers to compact normalize inputs and preserve audited verdict/sourceRef ledgers before final synthesis. Custom extension `fetch_content` providers are disabled by default for normalized source fetches unless the workflow security policy explicitly trusts private-host behavior; this avoids accepting opaque provider network fetches as SSRF-safe. Legacy `fetch_content` tasks still use `.pi/workflows/<run-id>/source-cache/fetch-content/`; set `PI_WORKFLOW_FETCH_CONTENT_CACHE=0` to opt out for a run. Treat cache-enabled benchmark runs as a separate cohort from older uncached measurements.
|
|
@@ -60,32 +60,137 @@ function collectEvidenceRefs(claim) {
|
|
|
60
60
|
const refs = new Set([...collectUrls(claim)]);
|
|
61
61
|
for (const row of Array.isArray(claim?.evidence) ? claim.evidence : []) {
|
|
62
62
|
if (!row || typeof row !== "object") continue;
|
|
63
|
-
for (const value of [row.url, row.source, row.file, row.path]) {
|
|
63
|
+
for (const value of [row.url, row.source, row.file, row.path, row.sourceRef]) {
|
|
64
64
|
if (typeof value !== "string") continue;
|
|
65
|
-
if (/^https?:\/\//i.test(value) || looksLikeLocalSourceRef(value))
|
|
65
|
+
if (/^https?:\/\//i.test(value) || isWorkflowSourceRef(value) || looksLikeLocalSourceRef(value))
|
|
66
66
|
refs.add(value.trim());
|
|
67
67
|
}
|
|
68
68
|
}
|
|
69
69
|
return refs;
|
|
70
70
|
}
|
|
71
71
|
|
|
72
|
+
function collectWorkflowSourceRefs(value, refs = new Set()) {
|
|
73
|
+
if (typeof value === "string") {
|
|
74
|
+
for (const match of value.matchAll(/\bwsrc_[a-f0-9]{32}\b/g)) refs.add(match[0]);
|
|
75
|
+
return refs;
|
|
76
|
+
}
|
|
77
|
+
if (Array.isArray(value)) {
|
|
78
|
+
for (const item of value) collectWorkflowSourceRefs(item, refs);
|
|
79
|
+
return refs;
|
|
80
|
+
}
|
|
81
|
+
if (value && typeof value === "object") {
|
|
82
|
+
for (const item of Object.values(value)) collectWorkflowSourceRefs(item, refs);
|
|
83
|
+
}
|
|
84
|
+
return refs;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function isWorkflowSourceRef(value) {
|
|
88
|
+
return /^wsrc_[a-f0-9]{32}$/.test(String(value ?? "").trim());
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function sourceUrlArray(value) {
|
|
92
|
+
if (!Array.isArray(value)) return [];
|
|
93
|
+
return value.filter((item) => typeof item === "string" && item.trim()).map((item) => item.trim());
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function stripCitationUrlPunctuation(value) {
|
|
97
|
+
return String(value ?? "")
|
|
98
|
+
.trim()
|
|
99
|
+
.replace(/[.,;:]+$/u, "");
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function canonicalUrlKeys(value) {
|
|
103
|
+
const raw = stripCitationUrlPunctuation(value);
|
|
104
|
+
if (!/^https?:\/\//i.test(raw)) return [];
|
|
105
|
+
const keys = new Set([raw]);
|
|
106
|
+
try {
|
|
107
|
+
const url = new URL(raw);
|
|
108
|
+
url.protocol = url.protocol.toLowerCase();
|
|
109
|
+
url.hostname = url.hostname.toLowerCase();
|
|
110
|
+
url.hash = "";
|
|
111
|
+
const serialized = stripCitationUrlPunctuation(url.toString());
|
|
112
|
+
keys.add(serialized);
|
|
113
|
+
if (url.pathname !== "/" && url.pathname.endsWith("/")) {
|
|
114
|
+
url.pathname = url.pathname.replace(/\/+$/u, "");
|
|
115
|
+
keys.add(stripCitationUrlPunctuation(url.toString()));
|
|
116
|
+
}
|
|
117
|
+
} catch {
|
|
118
|
+
// Keep the trimmed raw URL key only; malformed strings should not throw from
|
|
119
|
+
// the evidence gate.
|
|
120
|
+
}
|
|
121
|
+
return [...keys].filter(Boolean);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function addUrlSourceRef(urlToSourceRef, url, sourceRef) {
|
|
125
|
+
if (!isWorkflowSourceRef(sourceRef)) return;
|
|
126
|
+
for (const key of canonicalUrlKeys(url)) {
|
|
127
|
+
if (!urlToSourceRef.has(key)) urlToSourceRef.set(key, sourceRef.trim());
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function buildUrlSourceRefLookup(normalizeInputPacket) {
|
|
132
|
+
const urlToSourceRef = new Map();
|
|
133
|
+
const sourceCards = asArray(normalizeInputPacket?.packet?.research?.sources);
|
|
134
|
+
for (const source of sourceCards) {
|
|
135
|
+
if (!source || typeof source !== "object") continue;
|
|
136
|
+
addUrlSourceRef(urlToSourceRef, source.url, source.sourceRef);
|
|
137
|
+
}
|
|
138
|
+
const sourceRefIndex = asArray(normalizeInputPacket?.packet?.research?.sourceRefIndex);
|
|
139
|
+
for (const source of sourceRefIndex) {
|
|
140
|
+
if (!source || typeof source !== "object") continue;
|
|
141
|
+
addUrlSourceRef(urlToSourceRef, source.url, source.sourceRef);
|
|
142
|
+
}
|
|
143
|
+
return urlToSourceRef;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function sourceRefsForUrls(urls, urlToSourceRef) {
|
|
147
|
+
const refs = [];
|
|
148
|
+
const seen = new Set();
|
|
149
|
+
for (const url of urls) {
|
|
150
|
+
for (const key of canonicalUrlKeys(url)) {
|
|
151
|
+
const sourceRef = urlToSourceRef.get(key);
|
|
152
|
+
if (!sourceRef || seen.has(sourceRef)) continue;
|
|
153
|
+
seen.add(sourceRef);
|
|
154
|
+
refs.push(sourceRef);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return refs;
|
|
158
|
+
}
|
|
159
|
+
|
|
72
160
|
// Structured evidence check: at least one evidence row carrying both a source
|
|
73
161
|
// reference (HTTP URL or local repository file path) and a quote/excerpt. Unlike
|
|
74
162
|
// a keyword scan over the serialized claim, this cannot be satisfied by merely
|
|
75
163
|
// mentioning a URL/path in prose.
|
|
76
164
|
function hasFetchedEvidence(claim) {
|
|
165
|
+
return Array.isArray(claim?.evidence) && claim.evidence.some(hasStrongEvidenceRow);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function hasStrongEvidenceRow(row) {
|
|
169
|
+
if (!row || typeof row !== "object") return false;
|
|
170
|
+
const refs = [row.url, row.source, row.file, row.path, row.sourceRef].filter(
|
|
171
|
+
(value) => typeof value === "string",
|
|
172
|
+
);
|
|
173
|
+
const sourceRef = refs.some(
|
|
174
|
+
(value) =>
|
|
175
|
+
/^https?:\/\//i.test(value) ||
|
|
176
|
+
isWorkflowSourceRef(value) ||
|
|
177
|
+
looksLikeLocalSourceRef(value),
|
|
178
|
+
);
|
|
179
|
+
const quote = typeof row.quote === "string" && row.quote.trim().length > 0;
|
|
180
|
+
if (!sourceRef || !quote) return false;
|
|
181
|
+
if (isCandidateEvidenceRow(row)) return false;
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function isCandidateEvidenceRow(row) {
|
|
186
|
+
return row?.candidateOnly === true || row?.matchType === "terms" || row?.sourceRead?.matchType === "terms";
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function strongEvidenceIssue(claim) {
|
|
77
190
|
const rows = Array.isArray(claim?.evidence) ? claim.evidence : [];
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
(value) => typeof value === "string",
|
|
82
|
-
);
|
|
83
|
-
const sourceRef = refs.some(
|
|
84
|
-
(value) => /^https?:\/\//i.test(value) || looksLikeLocalSourceRef(value),
|
|
85
|
-
);
|
|
86
|
-
const quote = typeof row.quote === "string" && row.quote.trim().length > 0;
|
|
87
|
-
return sourceRef && quote;
|
|
88
|
-
});
|
|
191
|
+
if (rows.length === 0) return "missing_structured_evidence_rows";
|
|
192
|
+
if (rows.some(isCandidateEvidenceRow)) return "candidate_only_evidence_not_strong";
|
|
193
|
+
return "evidence_rows_missing_source_or_quote";
|
|
89
194
|
}
|
|
90
195
|
|
|
91
196
|
function hasExactQuantitativeClaim(value) {
|
|
@@ -105,9 +210,9 @@ function verdictOf(claim) {
|
|
|
105
210
|
);
|
|
106
211
|
}
|
|
107
212
|
|
|
108
|
-
function withVerdict(claim, verdict, reason) {
|
|
213
|
+
function withVerdict(claim, verdict, reason, details = {}) {
|
|
109
214
|
const previous = verdictOf(claim);
|
|
110
|
-
const gate = { previous, verdict, reason };
|
|
215
|
+
const gate = { previous, verdict, reason, ...details };
|
|
111
216
|
return {
|
|
112
217
|
...claim,
|
|
113
218
|
status: verdict,
|
|
@@ -122,6 +227,125 @@ function withVerdict(claim, verdict, reason) {
|
|
|
122
227
|
};
|
|
123
228
|
}
|
|
124
229
|
|
|
230
|
+
function claimIdOf(claim) {
|
|
231
|
+
if (!claim || typeof claim !== "object")
|
|
232
|
+
return { id: null, reason: "not_an_object" };
|
|
233
|
+
let invalid = null;
|
|
234
|
+
for (const field of ["id", "claimId"]) {
|
|
235
|
+
if (!(field in claim)) continue;
|
|
236
|
+
if (typeof claim[field] !== "string") {
|
|
237
|
+
invalid ??= { id: null, reason: "non_string_claim_id", field };
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
const id = claim[field].trim();
|
|
241
|
+
if (!id) {
|
|
242
|
+
invalid ??= { id: null, reason: "blank_claim_id", field };
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
245
|
+
return { id, field };
|
|
246
|
+
}
|
|
247
|
+
return invalid ?? { id: null, reason: "missing_claim_id" };
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function compactStrings(values) {
|
|
251
|
+
const out = [];
|
|
252
|
+
const seen = new Set();
|
|
253
|
+
for (const value of values) {
|
|
254
|
+
if (typeof value !== "string") continue;
|
|
255
|
+
const text = value.trim();
|
|
256
|
+
if (!text || seen.has(text)) continue;
|
|
257
|
+
seen.add(text);
|
|
258
|
+
out.push(text);
|
|
259
|
+
}
|
|
260
|
+
return out;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function canonicalVerifierStatus(status) {
|
|
264
|
+
return status === "partiallySupported" ? "partially_supported" : status;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function conservativeVerifierStatus(statuses) {
|
|
268
|
+
const normalized = statuses.map(canonicalVerifierStatus);
|
|
269
|
+
for (const status of [
|
|
270
|
+
"conflicting",
|
|
271
|
+
"unsupported",
|
|
272
|
+
"partially_supported",
|
|
273
|
+
"unverified",
|
|
274
|
+
]) {
|
|
275
|
+
if (normalized.includes(status)) return status;
|
|
276
|
+
}
|
|
277
|
+
if (normalized.every((status) => status === "verified")) return "verified";
|
|
278
|
+
return normalized.find((status) => typeof status === "string" && status) ?? "unverified";
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function issueForVerifierRow({ sourceId, claim, reason, claimId, index }) {
|
|
282
|
+
return {
|
|
283
|
+
sourceId,
|
|
284
|
+
...(Number.isInteger(index) ? { index } : {}),
|
|
285
|
+
...(claimId ? { claimId } : {}),
|
|
286
|
+
reason,
|
|
287
|
+
status: verdictOf(claim),
|
|
288
|
+
nextStep:
|
|
289
|
+
reason === "unknown_claim_id"
|
|
290
|
+
? "Verify-claims output did not match any normalized verification candidate; quarantine it from claim counts."
|
|
291
|
+
: "Verifier output is missing a usable string id/claimId; rerun or repair the verifier row before counting it.",
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function gapForVerifierIssue(issue) {
|
|
296
|
+
return {
|
|
297
|
+
...(issue.claimId ? { claimId: issue.claimId } : {}),
|
|
298
|
+
evidenceState: issue.reason,
|
|
299
|
+
reason: issue.reason,
|
|
300
|
+
nextStep: issue.nextStep,
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function mergeVerifierRows(rows) {
|
|
305
|
+
const first = rows[0];
|
|
306
|
+
if (rows.length === 1) return { sourceId: first.sourceId, claim: first.claim, duplicate: null };
|
|
307
|
+
const sourceIds = rows.map((row) => row.sourceId);
|
|
308
|
+
const statusInputs = rows.map((row) => verdictOf(row.claim));
|
|
309
|
+
const selectedStatus = conservativeVerifierStatus(statusInputs);
|
|
310
|
+
const selectedRow =
|
|
311
|
+
rows.find((row) => canonicalVerifierStatus(verdictOf(row.claim)) === selectedStatus) ??
|
|
312
|
+
first;
|
|
313
|
+
const merged = { ...selectedRow.claim };
|
|
314
|
+
const evidence = rows.flatMap((row) =>
|
|
315
|
+
Array.isArray(row.claim?.evidence) ? row.claim.evidence : [],
|
|
316
|
+
);
|
|
317
|
+
if (evidence.length > 0) merged.evidence = evidence;
|
|
318
|
+
for (const field of ["sourceRefs", "sourceUrls", "factSlotIds"]) {
|
|
319
|
+
const values = compactStrings(rows.flatMap((row) => row.claim?.[field] ?? []));
|
|
320
|
+
if (values.length > 0) merged[field] = values;
|
|
321
|
+
}
|
|
322
|
+
merged.status = selectedStatus;
|
|
323
|
+
merged.verdict = selectedStatus;
|
|
324
|
+
merged.verdictDigest = {
|
|
325
|
+
...(merged.verdictDigest ?? {}),
|
|
326
|
+
status: selectedStatus,
|
|
327
|
+
verdict: selectedStatus,
|
|
328
|
+
duplicateVerifierRows: {
|
|
329
|
+
rowCount: rows.length,
|
|
330
|
+
sourceIds,
|
|
331
|
+
statusInputs,
|
|
332
|
+
selectedStatus,
|
|
333
|
+
},
|
|
334
|
+
};
|
|
335
|
+
return {
|
|
336
|
+
sourceId: selectedRow.sourceId,
|
|
337
|
+
claim: merged,
|
|
338
|
+
duplicate: {
|
|
339
|
+
claimId: first.claimId,
|
|
340
|
+
rowCount: rows.length,
|
|
341
|
+
sourceIds,
|
|
342
|
+
statusInputs,
|
|
343
|
+
selectedStatus,
|
|
344
|
+
action: "merged_evidence_and_selected_conservative_status",
|
|
345
|
+
},
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
|
|
125
349
|
const STATUS_BUCKETS = {
|
|
126
350
|
verified: "verified",
|
|
127
351
|
partially_supported: "partiallySupported",
|
|
@@ -139,17 +363,37 @@ function findSource(sources, stageId) {
|
|
|
139
363
|
export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
140
364
|
const plan = findSource(sources, "plan");
|
|
141
365
|
const normalized = findSource(sources, "normalize-claims");
|
|
366
|
+
const normalizeInputPacket = findSource(sources, "normalize-input-packet");
|
|
367
|
+
const urlToSourceRef = buildUrlSourceRefLookup(normalizeInputPacket);
|
|
368
|
+
const candidateRecords = [];
|
|
142
369
|
const candidatesById = new Map();
|
|
143
|
-
|
|
370
|
+
const invalidNormalizedCandidates = [];
|
|
371
|
+
for (const [index, candidate] of asArray(
|
|
144
372
|
normalized?.claimInventory?.verificationCandidates,
|
|
145
|
-
)) {
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
373
|
+
).entries()) {
|
|
374
|
+
const idCheck = claimIdOf(candidate);
|
|
375
|
+
if (!idCheck.id) {
|
|
376
|
+
invalidNormalizedCandidates.push({
|
|
377
|
+
index,
|
|
378
|
+
reason: idCheck.reason,
|
|
379
|
+
nextStep:
|
|
380
|
+
"normalize-claims emitted a verification candidate without a usable string id; it cannot be deterministically joined.",
|
|
381
|
+
});
|
|
382
|
+
continue;
|
|
383
|
+
}
|
|
384
|
+
if (candidatesById.has(idCheck.id)) {
|
|
385
|
+
invalidNormalizedCandidates.push({
|
|
386
|
+
index,
|
|
387
|
+
claimId: idCheck.id,
|
|
388
|
+
reason: "duplicate_normalized_candidate_id",
|
|
389
|
+
nextStep:
|
|
390
|
+
"normalize-claims emitted duplicate candidate ids; only the first candidate is canonical for verifier joins.",
|
|
391
|
+
});
|
|
392
|
+
continue;
|
|
152
393
|
}
|
|
394
|
+
const normalizedCandidate = { ...candidate, id: idCheck.id };
|
|
395
|
+
candidateRecords.push(normalizedCandidate);
|
|
396
|
+
candidatesById.set(idCheck.id, normalizedCandidate);
|
|
153
397
|
}
|
|
154
398
|
|
|
155
399
|
const claims = Object.entries(sources ?? {})
|
|
@@ -158,7 +402,7 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
158
402
|
specId === "verify-claims" || specId.startsWith("verify-claims."),
|
|
159
403
|
)
|
|
160
404
|
.flatMap(([sourceId, source]) =>
|
|
161
|
-
asArray(source).map((claim) => ({ sourceId, claim })),
|
|
405
|
+
asArray(source).map((claim, index) => ({ sourceId, claim, index })),
|
|
162
406
|
);
|
|
163
407
|
// Legacy layout: when no verify-claims.* source ids exist (for example a
|
|
164
408
|
// single from: string dependency), fall back to every non-plan/non-normalize
|
|
@@ -170,40 +414,106 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
170
414
|
.filter(
|
|
171
415
|
([specId]) =>
|
|
172
416
|
!specId.startsWith("plan") &&
|
|
173
|
-
!specId.startsWith("normalize-claims")
|
|
417
|
+
!specId.startsWith("normalize-claims") &&
|
|
418
|
+
!specId.startsWith("normalize-input-packet"),
|
|
174
419
|
)
|
|
175
420
|
.flatMap(([sourceId, source]) =>
|
|
176
|
-
asArray(source).map((claim) => ({ sourceId, claim })),
|
|
421
|
+
asArray(source).map((claim, index) => ({ sourceId, claim, index })),
|
|
177
422
|
);
|
|
178
423
|
|
|
179
424
|
const auditedClaims = [];
|
|
180
425
|
const remainingGaps = [];
|
|
181
426
|
const identityJoinNotes = [];
|
|
427
|
+
const sourceRefJoinFailures = [];
|
|
428
|
+
const invalidVerifierRows = [];
|
|
429
|
+
const duplicateVerifierRows = [];
|
|
182
430
|
const gateSummary = {
|
|
183
431
|
total: 0,
|
|
184
432
|
unchanged: 0,
|
|
185
433
|
downgraded: 0,
|
|
186
434
|
identityRejoined: 0,
|
|
435
|
+
sourceRefsRejoined: 0,
|
|
436
|
+
sourceRefsBackfilledFromUrls: 0,
|
|
437
|
+
sourceRefJoinFailures: 0,
|
|
438
|
+
verifierRowsTotal: verifierClaims.length,
|
|
439
|
+
validVerifierRows: 0,
|
|
440
|
+
invalidVerifierRows: 0,
|
|
441
|
+
missingVerifierResults: 0,
|
|
442
|
+
duplicateVerifierClaims: 0,
|
|
443
|
+
duplicateVerifierRows: 0,
|
|
444
|
+
duplicateStatusConflicts: 0,
|
|
445
|
+
invalidNormalizedCandidates: invalidNormalizedCandidates.length,
|
|
187
446
|
};
|
|
447
|
+
const verifierRowsById = new Map();
|
|
448
|
+
const legacyVerifierRows = [];
|
|
449
|
+
for (const { sourceId, claim, index } of verifierClaims) {
|
|
450
|
+
const idCheck = claimIdOf(claim);
|
|
451
|
+
if (!idCheck.id) {
|
|
452
|
+
const issue = issueForVerifierRow({
|
|
453
|
+
sourceId,
|
|
454
|
+
claim,
|
|
455
|
+
index,
|
|
456
|
+
reason: idCheck.reason,
|
|
457
|
+
});
|
|
458
|
+
invalidVerifierRows.push(issue);
|
|
459
|
+
remainingGaps.push(gapForVerifierIssue(issue));
|
|
460
|
+
gateSummary.invalidVerifierRows += 1;
|
|
461
|
+
continue;
|
|
462
|
+
}
|
|
463
|
+
if (candidateRecords.length > 0 && !candidatesById.has(idCheck.id)) {
|
|
464
|
+
const issue = issueForVerifierRow({
|
|
465
|
+
sourceId,
|
|
466
|
+
claim,
|
|
467
|
+
index,
|
|
468
|
+
claimId: idCheck.id,
|
|
469
|
+
reason: "unknown_claim_id",
|
|
470
|
+
});
|
|
471
|
+
invalidVerifierRows.push(issue);
|
|
472
|
+
remainingGaps.push(gapForVerifierIssue(issue));
|
|
473
|
+
gateSummary.invalidVerifierRows += 1;
|
|
474
|
+
continue;
|
|
475
|
+
}
|
|
476
|
+
const row = {
|
|
477
|
+
sourceId,
|
|
478
|
+
claimId: idCheck.id,
|
|
479
|
+
claim: { ...claim, [idCheck.field ?? "id"]: idCheck.id },
|
|
480
|
+
};
|
|
481
|
+
gateSummary.validVerifierRows += 1;
|
|
482
|
+
if (candidateRecords.length > 0) {
|
|
483
|
+
const rows = verifierRowsById.get(idCheck.id) ?? [];
|
|
484
|
+
rows.push(row);
|
|
485
|
+
verifierRowsById.set(idCheck.id, rows);
|
|
486
|
+
} else {
|
|
487
|
+
legacyVerifierRows.push(row);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
188
490
|
|
|
189
|
-
|
|
190
|
-
if (!claim || typeof claim !== "object")
|
|
491
|
+
function auditClaim({ sourceId, claim, candidate, claimId, missingVerifierResult = false }) {
|
|
492
|
+
if (!claim || typeof claim !== "object") return;
|
|
191
493
|
gateSummary.total += 1;
|
|
192
|
-
const
|
|
494
|
+
const evidenceRefs = [...collectEvidenceRefs(claim)];
|
|
495
|
+
const workflowSourceRefs = new Set([...collectWorkflowSourceRefs(claim)]);
|
|
193
496
|
const exactQuantitative = hasExactQuantitativeClaim(claim);
|
|
194
497
|
const fetched = hasFetchedEvidence(claim);
|
|
195
|
-
let next = {
|
|
498
|
+
let next = {
|
|
499
|
+
...claim,
|
|
500
|
+
...(claimId ? { id: claimId } : {}),
|
|
501
|
+
...(sourceId ? { sourceId } : {}),
|
|
502
|
+
sourceUrls: evidenceRefs,
|
|
503
|
+
evidenceRefs,
|
|
504
|
+
};
|
|
505
|
+
if (missingVerifierResult) {
|
|
506
|
+
next = withVerdict(
|
|
507
|
+
next,
|
|
508
|
+
"unverified",
|
|
509
|
+
"normalized verification candidate had no verifier result",
|
|
510
|
+
);
|
|
511
|
+
}
|
|
196
512
|
|
|
197
513
|
// Identity join: the normalizer's candidate record is authoritative for
|
|
198
514
|
// claim id, claim text, and factSlotIds. Verifier echoes drift.
|
|
199
|
-
const claimId =
|
|
200
|
-
typeof next.id === "string"
|
|
201
|
-
? next.id
|
|
202
|
-
: typeof next.claimId === "string"
|
|
203
|
-
? next.claimId
|
|
204
|
-
: null;
|
|
205
|
-
const candidate = claimId ? candidatesById.get(claimId) : null;
|
|
206
515
|
if (candidate) {
|
|
516
|
+
if (claimId) next.id = claimId;
|
|
207
517
|
if (
|
|
208
518
|
typeof candidate.claim === "string" &&
|
|
209
519
|
candidate.claim &&
|
|
@@ -218,30 +528,81 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
218
528
|
}
|
|
219
529
|
if (Array.isArray(candidate.factSlotIds))
|
|
220
530
|
next.factSlotIds = [...candidate.factSlotIds];
|
|
531
|
+
const beforeSourceRefCount = workflowSourceRefs.size;
|
|
532
|
+
for (const sourceRef of collectWorkflowSourceRefs(candidate))
|
|
533
|
+
workflowSourceRefs.add(sourceRef);
|
|
534
|
+
if (workflowSourceRefs.size > beforeSourceRefCount)
|
|
535
|
+
gateSummary.sourceRefsRejoined += 1;
|
|
536
|
+
}
|
|
537
|
+
const beforeUrlBackfillSourceRefCount = workflowSourceRefs.size;
|
|
538
|
+
for (const sourceRef of sourceRefsForUrls(
|
|
539
|
+
[
|
|
540
|
+
...sourceUrlArray(candidate?.sourceUrls),
|
|
541
|
+
...evidenceRefs.filter((ref) => /^https?:\/\//i.test(ref)),
|
|
542
|
+
],
|
|
543
|
+
urlToSourceRef,
|
|
544
|
+
))
|
|
545
|
+
workflowSourceRefs.add(sourceRef);
|
|
546
|
+
if (workflowSourceRefs.size > beforeUrlBackfillSourceRefCount) {
|
|
547
|
+
gateSummary.sourceRefsRejoined += 1;
|
|
548
|
+
gateSummary.sourceRefsBackfilledFromUrls +=
|
|
549
|
+
workflowSourceRefs.size - beforeUrlBackfillSourceRefCount;
|
|
550
|
+
}
|
|
551
|
+
if (workflowSourceRefs.size > 0) next.sourceRefs = [...workflowSourceRefs];
|
|
552
|
+
if (
|
|
553
|
+
claimId &&
|
|
554
|
+
candidate &&
|
|
555
|
+
workflowSourceRefs.size === 0 &&
|
|
556
|
+
(sourceUrlArray(candidate.sourceUrls).length > 0 ||
|
|
557
|
+
evidenceRefs.some((ref) => /^https?:\/\//i.test(ref)))
|
|
558
|
+
) {
|
|
559
|
+
const failure = {
|
|
560
|
+
claimId,
|
|
561
|
+
evidenceState: "source_ref_not_available",
|
|
562
|
+
sourceUrls: [
|
|
563
|
+
...new Set([
|
|
564
|
+
...sourceUrlArray(candidate?.sourceUrls),
|
|
565
|
+
...evidenceRefs.filter((ref) => /^https?:\/\//i.test(ref)),
|
|
566
|
+
]),
|
|
567
|
+
],
|
|
568
|
+
nextStep:
|
|
569
|
+
"Preserve sourceRefs from workflow_web_fetch_source through research and normalization when available.",
|
|
570
|
+
};
|
|
571
|
+
sourceRefJoinFailures.push(failure);
|
|
572
|
+
gateSummary.sourceRefJoinFailures += 1;
|
|
221
573
|
}
|
|
222
574
|
|
|
223
575
|
const verdict = verdictOf(next);
|
|
576
|
+
const exactQuantitativeForGate = exactQuantitative || hasExactQuantitativeClaim(next);
|
|
224
577
|
if (
|
|
225
578
|
verdict === "verified" &&
|
|
226
579
|
options.requireFetchedEvidenceForVerified !== false &&
|
|
227
580
|
!fetched
|
|
228
581
|
) {
|
|
582
|
+
const reasonCode =
|
|
583
|
+
options.downgradeExactQuantitativeWithoutSource !== false &&
|
|
584
|
+
exactQuantitativeForGate &&
|
|
585
|
+
evidenceRefs.length === 0
|
|
586
|
+
? "exact_quantitative_without_source_reference"
|
|
587
|
+
: strongEvidenceIssue(next);
|
|
229
588
|
next = withVerdict(
|
|
230
589
|
next,
|
|
231
590
|
"partially_supported",
|
|
232
591
|
"verified claim lacked structured evidence rows with both source reference and quote",
|
|
592
|
+
{ reasonCode },
|
|
233
593
|
);
|
|
234
594
|
}
|
|
235
595
|
if (
|
|
236
596
|
verdictOf(next) === "verified" &&
|
|
237
597
|
options.downgradeExactQuantitativeWithoutSource !== false &&
|
|
238
598
|
exactQuantitative &&
|
|
239
|
-
|
|
599
|
+
evidenceRefs.length === 0
|
|
240
600
|
) {
|
|
241
601
|
next = withVerdict(
|
|
242
602
|
next,
|
|
243
603
|
"partially_supported",
|
|
244
604
|
"exact quantitative claim lacked structured source reference evidence",
|
|
605
|
+
{ reasonCode: "exact_quantitative_without_source_reference" },
|
|
245
606
|
);
|
|
246
607
|
}
|
|
247
608
|
|
|
@@ -249,8 +610,9 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
249
610
|
gateSummary.downgraded += 1;
|
|
250
611
|
remainingGaps.push({
|
|
251
612
|
claimId: next.id ?? next.claimId,
|
|
252
|
-
evidenceState: "insufficient_for_verified",
|
|
253
|
-
|
|
613
|
+
evidenceState: next.evidenceGate?.reasonCode ?? "insufficient_for_verified",
|
|
614
|
+
reason: next.evidenceGate?.reason,
|
|
615
|
+
sourceUrls: evidenceRefs,
|
|
254
616
|
nextStep:
|
|
255
617
|
"Fetch or inspect primary source evidence for the exact claim before using it as verified.",
|
|
256
618
|
});
|
|
@@ -260,6 +622,71 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
260
622
|
auditedClaims.push(next);
|
|
261
623
|
}
|
|
262
624
|
|
|
625
|
+
if (candidateRecords.length > 0) {
|
|
626
|
+
for (const candidate of candidateRecords) {
|
|
627
|
+
const rows = verifierRowsById.get(candidate.id) ?? [];
|
|
628
|
+
if (rows.length === 0) {
|
|
629
|
+
gateSummary.missingVerifierResults += 1;
|
|
630
|
+
remainingGaps.push({
|
|
631
|
+
claimId: candidate.id,
|
|
632
|
+
evidenceState: "missing_verifier_result",
|
|
633
|
+
reason: "normalized verification candidate had no verifier result",
|
|
634
|
+
sourceUrls: sourceUrlArray(candidate.sourceUrls),
|
|
635
|
+
relatedFactSlotIds: Array.isArray(candidate.factSlotIds)
|
|
636
|
+
? [...candidate.factSlotIds]
|
|
637
|
+
: [],
|
|
638
|
+
nextStep:
|
|
639
|
+
"Run or repair the verifier for this normalized candidate before treating the claim as supported.",
|
|
640
|
+
});
|
|
641
|
+
auditClaim({
|
|
642
|
+
sourceId: null,
|
|
643
|
+
claim: candidate,
|
|
644
|
+
candidate,
|
|
645
|
+
claimId: candidate.id,
|
|
646
|
+
missingVerifierResult: true,
|
|
647
|
+
});
|
|
648
|
+
continue;
|
|
649
|
+
}
|
|
650
|
+
const merged = mergeVerifierRows(rows);
|
|
651
|
+
if (merged.duplicate) {
|
|
652
|
+
const statuses = merged.duplicate.statusInputs.map((status) =>
|
|
653
|
+
status === "partiallySupported" ? "partially_supported" : status,
|
|
654
|
+
);
|
|
655
|
+
const hasStatusConflict = new Set(statuses).size > 1;
|
|
656
|
+
const duplicate = { ...merged.duplicate, statusConflict: hasStatusConflict };
|
|
657
|
+
duplicateVerifierRows.push(duplicate);
|
|
658
|
+
gateSummary.duplicateVerifierClaims += 1;
|
|
659
|
+
gateSummary.duplicateVerifierRows += rows.length - 1;
|
|
660
|
+
if (hasStatusConflict) {
|
|
661
|
+
gateSummary.duplicateStatusConflicts += 1;
|
|
662
|
+
remainingGaps.push({
|
|
663
|
+
claimId: candidate.id,
|
|
664
|
+
evidenceState: "duplicate_verifier_rows_conflicting",
|
|
665
|
+
reason:
|
|
666
|
+
"multiple verifier rows for the same normalized candidate disagreed; the gate selected a conservative status",
|
|
667
|
+
nextStep:
|
|
668
|
+
"Inspect duplicate verify-claims outputs before using this claim as a hard decision threshold.",
|
|
669
|
+
});
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
auditClaim({
|
|
673
|
+
sourceId: merged.sourceId,
|
|
674
|
+
claim: merged.claim,
|
|
675
|
+
candidate,
|
|
676
|
+
claimId: candidate.id,
|
|
677
|
+
});
|
|
678
|
+
}
|
|
679
|
+
} else {
|
|
680
|
+
for (const row of legacyVerifierRows) {
|
|
681
|
+
auditClaim({
|
|
682
|
+
sourceId: row.sourceId,
|
|
683
|
+
claim: row.claim,
|
|
684
|
+
candidate: null,
|
|
685
|
+
claimId: row.claimId,
|
|
686
|
+
});
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
|
|
263
690
|
// Deterministic status partition + counts for the synthesis stage.
|
|
264
691
|
const statusPartitions = {
|
|
265
692
|
verified: [],
|
|
@@ -314,6 +741,8 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
314
741
|
factSlotIds: claim.factSlotIds,
|
|
315
742
|
status: verdictOf(claim),
|
|
316
743
|
confidence: claim.confidence,
|
|
744
|
+
sourceRefs: claim.sourceRefs,
|
|
745
|
+
sourceUrls: claim.sourceUrls,
|
|
317
746
|
verdictDigest: claim.verdictDigest,
|
|
318
747
|
correctionOrCounterclaim: claim.correctionOrCounterclaim,
|
|
319
748
|
}));
|
|
@@ -323,6 +752,10 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
323
752
|
claimDigests,
|
|
324
753
|
gateSummary,
|
|
325
754
|
remainingGaps,
|
|
755
|
+
sourceRefJoinFailures,
|
|
756
|
+
invalidVerifierRows,
|
|
757
|
+
duplicateVerifierRows,
|
|
758
|
+
invalidNormalizedCandidates,
|
|
326
759
|
statusPartitions,
|
|
327
760
|
verdictCounts,
|
|
328
761
|
slotCoverageCheck: {
|
|
@@ -331,5 +764,6 @@ export default async function claimEvidenceGate({ sources, options = {} }) {
|
|
|
331
764
|
droppedSlotIds,
|
|
332
765
|
},
|
|
333
766
|
identityJoinNotes,
|
|
767
|
+
precisionGuardDiagnostics: normalizeInputPacket?.packet?.precisionGuard?.summary,
|
|
334
768
|
};
|
|
335
769
|
}
|