@agwab/pi-workflow 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +14 -3
  2. package/agents/researcher.md +17 -7
  3. package/dist/artifact-graph-runtime.js +1 -0
  4. package/dist/compiler.js +2 -2
  5. package/dist/dynamic-generated-task-runtime.js +4 -3
  6. package/dist/dynamic-runtime-bundle.js +3 -2
  7. package/dist/extension.js +40 -1
  8. package/dist/subagent-backend.js +82 -27
  9. package/dist/tool-metadata.d.ts +1 -0
  10. package/dist/tool-metadata.js +13 -1
  11. package/dist/workflow-artifact-extension.js +3 -2
  12. package/dist/workflow-artifact-tool.js +84 -4
  13. package/dist/workflow-web-source-extension.d.ts +43 -0
  14. package/dist/workflow-web-source-extension.js +1194 -0
  15. package/dist/workflow-web-source.d.ts +171 -0
  16. package/dist/workflow-web-source.js +897 -0
  17. package/docs/usage.md +32 -18
  18. package/node_modules/@agwab/pi-subagent/package.json +1 -1
  19. package/node_modules/@agwab/pi-subagent/src/api.ts +245 -132
  20. package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +243 -163
  21. package/node_modules/@agwab/pi-subagent/src/core/constants.ts +117 -90
  22. package/node_modules/@agwab/pi-subagent/src/core/validation.ts +728 -475
  23. package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +305 -209
  24. package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +750 -439
  25. package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +422 -268
  26. package/package.json +2 -2
  27. package/skills/workflow-guide/scaffolds/object-tool-fallback/schemas/fetch-control.schema.json +1 -1
  28. package/skills/workflow-guide/scaffolds/object-tool-fallback/spec.json +4 -3
  29. package/src/artifact-graph-runtime.ts +1 -0
  30. package/src/compiler.ts +2 -1
  31. package/src/dynamic-generated-task-runtime.ts +4 -2
  32. package/src/dynamic-runtime-bundle.ts +3 -2
  33. package/src/extension.ts +46 -1
  34. package/src/subagent-backend.ts +121 -37
  35. package/src/tool-metadata.ts +22 -1
  36. package/src/workflow-artifact-extension.ts +3 -2
  37. package/src/workflow-artifact-tool.ts +96 -4
  38. package/src/workflow-web-source-extension.ts +1411 -0
  39. package/src/workflow-web-source.ts +1171 -0
  40. package/workflows/README.md +1 -1
  41. package/workflows/deep-research/helpers/claim-evidence-gate.mjs +474 -40
  42. package/workflows/deep-research/helpers/final-audit-packet.mjs +219 -0
  43. package/workflows/deep-research/helpers/normalize-input-packet.mjs +436 -0
  44. package/workflows/deep-research/helpers/render-executive.mjs +571 -198
  45. package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +35 -8
  46. package/workflows/deep-research/schemas/deep-research-normalize-claims-control.schema.json +45 -4
  47. package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +0 -2
  48. package/workflows/deep-research/spec.json +36 -21
  49. package/workflows/deep-review/helpers/render-review-report.mjs +502 -0
  50. package/workflows/deep-review/schemas/deep-review-render-control.schema.json +50 -0
  51. package/workflows/deep-review/spec.json +22 -1
@@ -0,0 +1,897 @@
1
+ import { createHash } from "node:crypto";
2
+ import { appendFile, mkdir, readFile, readdir, rename, writeFile } from "node:fs/promises";
3
+ import { isIP } from "node:net";
4
+ import { dirname, resolve } from "node:path";
5
+ export const WORKFLOW_WEB_SOURCE_CACHE_SCHEMA = "workflow-web-source-cache-v1";
6
+ export const WORKFLOW_WEB_SOURCE_INDEX_SCHEMA = "workflow-web-source-index-v1";
7
+ export const WORKFLOW_WEB_SOURCE_INDEX_EVENT_SCHEMA = "workflow-web-source-index-event-v1";
8
+ export const WORKFLOW_WEB_SOURCE_EVENT_SCHEMA = "workflow-web-source-event-v1";
9
+ export const WORKFLOW_WEB_SOURCE_TOOLS = [
10
+ "workflow_web_search",
11
+ "workflow_web_fetch_source",
12
+ "workflow_web_source_read",
13
+ ];
14
+ export const DEFAULT_WORKFLOW_WEB_SOURCE_POLICY = {
15
+ previewChars: 800,
16
+ duplicatePreviewChars: 160,
17
+ sourceReadMaxChars: 1_200,
18
+ searchSnippetChars: 240,
19
+ perTaskVisibleCharBudget: 12_000,
20
+ };
21
+ export const DEFAULT_WORKFLOW_WEB_SECURITY_POLICY = {
22
+ allowPrivateHosts: false,
23
+ cacheRawProviderPayloads: false,
24
+ };
25
+ const SENSITIVE_QUERY_PARAM_PATTERN = /(^|[-_])(access[-_]?token|auth|code|credential|key|password|secret|session|signature|sig|token)([-_]|$)/i;
26
+ const PRIVATE_HOST_PATTERNS = [
27
+ /^localhost$/i,
28
+ /^127\./,
29
+ /^0\./,
30
+ /^10\./,
31
+ /^192\.168\./,
32
+ /^169\.254\./,
33
+ /^metadata\.google\.internal$/i,
34
+ ];
35
+ export function normalizeWorkflowWebSourcePolicy(policy) {
36
+ return {
37
+ ...DEFAULT_WORKFLOW_WEB_SOURCE_POLICY,
38
+ ...(policy ?? {}),
39
+ };
40
+ }
41
+ export function normalizeWorkflowWebSecurityPolicy(policy) {
42
+ return {
43
+ ...DEFAULT_WORKFLOW_WEB_SECURITY_POLICY,
44
+ ...(policy ?? {}),
45
+ };
46
+ }
47
+ export function isWorkflowWebSourceTool(tool) {
48
+ return WORKFLOW_WEB_SOURCE_TOOLS.includes(tool);
49
+ }
50
+ export function createWorkflowWebVisibleBudget(limit) {
51
+ return { limit: Math.max(0, Math.floor(limit)), used: 0 };
52
+ }
53
+ export function consumeWorkflowWebVisibleBudget(budget, text, maxChars) {
54
+ const remainingBefore = Math.max(0, budget.limit - budget.used);
55
+ const allowed = Math.max(0, Math.min(maxChars, remainingBefore));
56
+ const truncated = text.length > allowed;
57
+ const visible = text.slice(0, allowed);
58
+ budget.used += visible.length;
59
+ return {
60
+ text: visible,
61
+ truncated,
62
+ remaining: Math.max(0, budget.limit - budget.used),
63
+ used: budget.used,
64
+ };
65
+ }
66
+ export function validateWorkflowWebUrl(url, security = DEFAULT_WORKFLOW_WEB_SECURITY_POLICY) {
67
+ let parsed;
68
+ try {
69
+ parsed = new URL(url);
70
+ }
71
+ catch {
72
+ return { ok: false, reason: "invalid_url" };
73
+ }
74
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
75
+ return { ok: false, reason: "unsafe_scheme" };
76
+ }
77
+ const host = parsed.hostname.toLowerCase().replace(/^\[|\]$/g, "");
78
+ if (!security.allowPrivateHosts && isPrivateHostname(host)) {
79
+ return { ok: false, reason: "private_host_blocked" };
80
+ }
81
+ return { ok: true, normalizedUrl: parsed.href, domain: host };
82
+ }
83
+ export function sanitizeUrlForModel(url) {
84
+ let parsed;
85
+ try {
86
+ parsed = new URL(url);
87
+ }
88
+ catch {
89
+ return redactInlineSecrets(url);
90
+ }
91
+ return sanitizeParsedUrlForModel(parsed);
92
+ }
93
+ function sanitizeParsedUrlForModel(parsed) {
94
+ parsed.username = "";
95
+ parsed.password = "";
96
+ for (const key of [...parsed.searchParams.keys()]) {
97
+ if (SENSITIVE_QUERY_PARAM_PATTERN.test(key)) {
98
+ parsed.searchParams.set(key, "REDACTED");
99
+ }
100
+ }
101
+ parsed.hash = redactUrlFragment(parsed.hash);
102
+ return redactInlineSecretsNoUrls(parsed.href);
103
+ }
104
+ export function sourceRefFor(url, text) {
105
+ return `wsrc_${hashString(`${sourceUrlCacheKey(url)}\0${text}`).slice(0, 32)}`;
106
+ }
107
+ export function sourceUrlCacheKey(url) {
108
+ return `urlkey_${hashString(canonicalUrlForCache(url)).slice(0, 32)}`;
109
+ }
110
+ function sourceUrlDisplayCacheKey(url) {
111
+ let parsed;
112
+ try {
113
+ parsed = new URL(sanitizeUrlForModel(url));
114
+ }
115
+ catch {
116
+ return sanitizeUrlForModel(url).trim();
117
+ }
118
+ parsed.hash = shouldKeepFragmentForCache(parsed.hash) ? parsed.hash : "";
119
+ parsed.hostname = parsed.hostname.toLowerCase();
120
+ if (parsed.pathname.length > 1 && parsed.pathname.endsWith("/")) {
121
+ parsed.pathname = parsed.pathname.slice(0, -1);
122
+ }
123
+ const sortedParams = [...parsed.searchParams.entries()].sort(([left], [right]) => left.localeCompare(right));
124
+ parsed.search = "";
125
+ for (const [key, value] of sortedParams) {
126
+ parsed.searchParams.append(key, value);
127
+ }
128
+ return parsed.href;
129
+ }
130
+ function canonicalUrlForCache(url) {
131
+ let parsed;
132
+ try {
133
+ parsed = new URL(url);
134
+ }
135
+ catch {
136
+ return url.trim();
137
+ }
138
+ parsed.hostname = parsed.hostname.toLowerCase();
139
+ parsed.hash = shouldKeepFragmentForCache(parsed.hash) ? parsed.hash : "";
140
+ if (parsed.pathname.length > 1 && parsed.pathname.endsWith("/")) {
141
+ parsed.pathname = parsed.pathname.slice(0, -1);
142
+ }
143
+ const sortedParams = [...parsed.searchParams.entries()].sort(([left], [right]) => left.localeCompare(right));
144
+ parsed.search = "";
145
+ for (const [key, value] of sortedParams) {
146
+ parsed.searchParams.append(key, value);
147
+ }
148
+ return parsed.href;
149
+ }
150
+ export function createWorkflowWebSource(options) {
151
+ const checked = validateWorkflowWebUrl(options.url, {
152
+ ...DEFAULT_WORKFLOW_WEB_SECURITY_POLICY,
153
+ allowPrivateHosts: true,
154
+ });
155
+ const domain = checked.ok ? checked.domain : "unknown";
156
+ const redactedUrl = sanitizeUrlForModel(options.url);
157
+ const contentHash = hashString(options.text);
158
+ return {
159
+ schema: WORKFLOW_WEB_SOURCE_CACHE_SCHEMA,
160
+ sourceRef: sourceRefFor(options.url, options.text),
161
+ createdAt: new Date().toISOString(),
162
+ runId: options.config.runId,
163
+ taskId: options.config.taskId,
164
+ url: redactedUrl,
165
+ redactedUrl,
166
+ urlKey: sourceUrlCacheKey(options.url),
167
+ domain,
168
+ ...(options.title ? { title: options.title } : {}),
169
+ ...(options.provider ? { provider: options.provider } : {}),
170
+ contentHash,
171
+ text: options.text,
172
+ textChars: options.text.length,
173
+ ...(options.extractionLossy !== undefined
174
+ ? { extractionLossy: options.extractionLossy }
175
+ : {}),
176
+ ...(options.metadata ? { metadata: options.metadata } : {}),
177
+ };
178
+ }
179
+ export async function writeWorkflowWebSource(config, source) {
180
+ await mkdir(resolve(config.cacheDir, "sources"), { recursive: true });
181
+ await writeJsonAtomic(sourceObjectPath(config, source.sourceRef), source);
182
+ const entry = sourceToIndexEntry(source);
183
+ await appendWorkflowWebSourceIndexEvent(config, entry);
184
+ const index = await readWorkflowWebSourceIndex(config);
185
+ const withoutExisting = index.sources.filter((indexEntry) => indexEntry.sourceRef !== source.sourceRef);
186
+ withoutExisting.push(entry);
187
+ await writeJsonAtomic(indexPath(config), {
188
+ ...index,
189
+ updatedAt: new Date().toISOString(),
190
+ sources: mergeSourceIndexEntries(withoutExisting),
191
+ });
192
+ }
193
+ export async function readWorkflowWebSource(config, sourceRef) {
194
+ if (!isWorkflowWebSourceRef(sourceRef))
195
+ return undefined;
196
+ try {
197
+ const parsed = JSON.parse(await readFile(sourceObjectPath(config, sourceRef), "utf8"));
198
+ if (!isRecord(parsed))
199
+ return undefined;
200
+ if (parsed.schema !== WORKFLOW_WEB_SOURCE_CACHE_SCHEMA)
201
+ return undefined;
202
+ if (parsed.sourceRef !== sourceRef)
203
+ return undefined;
204
+ if (typeof parsed.text !== "string")
205
+ return undefined;
206
+ return parsed;
207
+ }
208
+ catch {
209
+ return undefined;
210
+ }
211
+ }
212
+ export async function readWorkflowWebSourceIndex(config) {
213
+ const base = await readWorkflowWebSourceIndexFile(config);
214
+ const ledgerEntries = await readWorkflowWebSourceIndexLedger(config);
215
+ if (ledgerEntries.length === 0)
216
+ return base;
217
+ return {
218
+ ...base,
219
+ updatedAt: new Date().toISOString(),
220
+ sources: mergeSourceIndexEntries([...base.sources, ...ledgerEntries]),
221
+ };
222
+ }
223
+ export async function findWorkflowWebSourceByUrl(config, url) {
224
+ const redactedUrl = sanitizeUrlForModel(url);
225
+ const targetKey = sourceUrlCacheKey(url);
226
+ const targetDisplayKey = sourceUrlDisplayCacheKey(redactedUrl);
227
+ const index = await readWorkflowWebSourceIndex(config);
228
+ const existing = [...index.sources].reverse().find((entry) => {
229
+ return sourceIndexEntryMatchesUrl(entry, url, redactedUrl, targetKey, targetDisplayKey);
230
+ });
231
+ if (existing) {
232
+ const source = await readWorkflowWebSource(config, existing.sourceRef);
233
+ if (source)
234
+ return source;
235
+ }
236
+ return findWorkflowWebSourceByUrlFromSources(config, url, redactedUrl, targetKey, targetDisplayKey);
237
+ }
238
+ function sourceIndexEntryMatchesUrl(entry, url, redactedUrl, targetKey, targetDisplayKey) {
239
+ if (entry.urlKey)
240
+ return entry.urlKey === targetKey;
241
+ if (redactedUrlIdentityUnsafe(redactedUrl) || redactedUrlIdentityUnsafe(entry.redactedUrl) || redactedUrlIdentityUnsafe(entry.url)) {
242
+ return false;
243
+ }
244
+ return (entry.redactedUrl === redactedUrl ||
245
+ entry.url === url ||
246
+ sourceUrlDisplayCacheKey(entry.redactedUrl) === targetDisplayKey ||
247
+ sourceUrlDisplayCacheKey(entry.url) === targetDisplayKey);
248
+ }
249
+ function redactedUrlIdentityUnsafe(url) {
250
+ return /REDACTED/.test(url) || /[?&#][^=]*(?:token|secret|password|signature|sig|key|auth|session|credential)[^=]*=/i.test(url);
251
+ }
252
+ async function findWorkflowWebSourceByUrlFromSources(config, url, redactedUrl, targetKey, targetDisplayKey) {
253
+ let entries;
254
+ try {
255
+ entries = await readdir(resolve(config.cacheDir, "sources"));
256
+ }
257
+ catch {
258
+ return undefined;
259
+ }
260
+ for (const entry of entries.reverse()) {
261
+ if (!entry.endsWith(".json"))
262
+ continue;
263
+ const sourceRef = entry.slice(0, -".json".length);
264
+ const source = await readWorkflowWebSource(config, sourceRef);
265
+ if (!source)
266
+ continue;
267
+ if (source.urlKey) {
268
+ if (source.urlKey === targetKey)
269
+ return source;
270
+ continue;
271
+ }
272
+ if (redactedUrlIdentityUnsafe(redactedUrl) || redactedUrlIdentityUnsafe(source.redactedUrl) || redactedUrlIdentityUnsafe(source.url)) {
273
+ continue;
274
+ }
275
+ if (source.redactedUrl === redactedUrl ||
276
+ source.url === url ||
277
+ sourceUrlDisplayCacheKey(source.redactedUrl) === targetDisplayKey ||
278
+ sourceUrlDisplayCacheKey(source.url) === targetDisplayKey) {
279
+ return source;
280
+ }
281
+ }
282
+ return undefined;
283
+ }
284
+ export async function recordWorkflowWebSourceEvent(config, event, data = {}) {
285
+ await mkdir(resolve(config.cacheDir), { recursive: true });
286
+ await appendFile(resolve(config.cacheDir, "events.jsonl"), `${JSON.stringify({
287
+ schema: WORKFLOW_WEB_SOURCE_EVENT_SCHEMA,
288
+ at: new Date().toISOString(),
289
+ runId: config.runId,
290
+ taskId: config.taskId,
291
+ event,
292
+ ...redactRecordForModel(data),
293
+ })}\n`, "utf8");
294
+ }
295
+ export function buildWorkflowWebSourceCard(options) {
296
+ const previewLimit = options.duplicate
297
+ ? options.policy.duplicatePreviewChars
298
+ : options.policy.previewChars;
299
+ const preview = consumeWorkflowWebVisibleBudget(options.budget, redactInlineSecrets(options.source.text), previewLimit);
300
+ return {
301
+ sourceRef: options.source.sourceRef,
302
+ url: options.source.redactedUrl,
303
+ domain: options.source.domain,
304
+ ...(options.source.title ? { title: options.source.title } : {}),
305
+ preview: preview.text,
306
+ textChars: options.source.textChars,
307
+ fullContentCached: true,
308
+ duplicate: Boolean(options.duplicate),
309
+ budget: {
310
+ limit: options.budget.limit,
311
+ used: preview.used,
312
+ remaining: preview.remaining,
313
+ truncated: preview.truncated,
314
+ },
315
+ next: `Use workflow_web_source_read with sourceRef=${options.source.sourceRef} and an exact query for one quote, queries:[...] or reads:[...] to batch several quotes, or claim+terms when the exact quote is unknown. Do not read workflow cache files directly.`,
316
+ };
317
+ }
318
+ export function readWorkflowWebSourceSnippet(options) {
319
+ return readWorkflowWebSourceSnippets({
320
+ source: options.source,
321
+ requests: [
322
+ {
323
+ query: options.query,
324
+ claim: options.claim,
325
+ terms: options.terms,
326
+ maxChars: options.maxChars,
327
+ },
328
+ ],
329
+ maxChars: options.maxChars,
330
+ budget: options.budget,
331
+ })[0] ?? { status: "not_found", visibleChars: 0 };
332
+ }
333
+ export function readWorkflowWebSourceSnippets(options) {
334
+ let normalizedSource;
335
+ const getNormalizedSource = () => {
336
+ normalizedSource ??= normalizeForSearch(options.source.text);
337
+ return normalizedSource;
338
+ };
339
+ return options.requests.map((request) => readWorkflowWebSourceSnippetWithCache({
340
+ source: options.source,
341
+ request,
342
+ maxChars: request.maxChars ?? options.maxChars,
343
+ budget: options.budget,
344
+ getNormalizedSource,
345
+ }));
346
+ }
347
+ export function extractTextFromToolResult(result) {
348
+ if (!isRecord(result))
349
+ return "";
350
+ const content = result.content;
351
+ if (!Array.isArray(content))
352
+ return "";
353
+ return content
354
+ .map((entry) => {
355
+ if (!isRecord(entry))
356
+ return "";
357
+ const text = entry.text;
358
+ return typeof text === "string" ? text : "";
359
+ })
360
+ .filter(Boolean)
361
+ .join("\n\n");
362
+ }
363
+ export function extractTitleFromToolResult(result) {
364
+ if (!isRecord(result))
365
+ return undefined;
366
+ const details = result.details;
367
+ if (isRecord(details) && typeof details.title === "string")
368
+ return details.title;
369
+ const text = extractTextFromToolResult(result);
370
+ const heading = text.match(/^#\s+(.+)$/m)?.[1]?.trim();
371
+ return heading ? heading.slice(0, 200) : undefined;
372
+ }
373
+ export function extractSearchCandidates(result, policy = DEFAULT_WORKFLOW_WEB_SOURCE_POLICY) {
374
+ const text = extractTextFromToolResult(result);
375
+ if (!text.trim())
376
+ return [];
377
+ const urls = [...text.matchAll(/https?:\/\/[^\s)\]>"']+/g)].map((match) => match[0]);
378
+ if (urls.length === 0) {
379
+ return [
380
+ {
381
+ snippet: redactInlineSecrets(text.trim().slice(0, policy.searchSnippetChars)),
382
+ },
383
+ ];
384
+ }
385
+ return [...new Set(urls)].slice(0, 10).map((url) => {
386
+ const checked = validateWorkflowWebUrl(url, {
387
+ ...DEFAULT_WORKFLOW_WEB_SECURITY_POLICY,
388
+ allowPrivateHosts: true,
389
+ });
390
+ return {
391
+ url: sanitizeUrlForModel(url),
392
+ domain: checked.ok ? checked.domain : undefined,
393
+ snippet: redactInlineSecrets(nearbySnippet(text, url, policy.searchSnippetChars)),
394
+ };
395
+ });
396
+ }
397
+ export function toolResultFromJson(value) {
398
+ return {
399
+ content: [{ type: "text", text: `${JSON.stringify(value)}\n` }],
400
+ details: { workflowWebSource: true },
401
+ };
402
+ }
403
+ export function errorToolResult(code, message, extra = {}) {
404
+ return toolResultFromJson({ status: "blocked", code, message, ...extra });
405
+ }
406
+ function redactUrlFragment(hash) {
407
+ if (!hash)
408
+ return "";
409
+ const raw = hash.startsWith("#") ? hash.slice(1) : hash;
410
+ if (!raw)
411
+ return "";
412
+ try {
413
+ const params = new URLSearchParams(raw);
414
+ let changed = false;
415
+ for (const key of [...params.keys()]) {
416
+ if (SENSITIVE_QUERY_PARAM_PATTERN.test(key)) {
417
+ params.set(key, "REDACTED");
418
+ changed = true;
419
+ }
420
+ }
421
+ if (changed)
422
+ return `#${params.toString()}`;
423
+ }
424
+ catch {
425
+ // Fall through to inline redaction.
426
+ }
427
+ const redacted = redactInlineSecrets(raw);
428
+ return redacted ? `#${redacted}` : "";
429
+ }
430
+ function shouldKeepFragmentForCache(hash) {
431
+ if (!hash)
432
+ return false;
433
+ const raw = hash.startsWith("#") ? hash.slice(1) : hash;
434
+ return raw.startsWith("/") || raw.startsWith("!") || raw.includes("?");
435
+ }
436
+ function sourceToIndexEntry(source) {
437
+ return {
438
+ sourceRef: source.sourceRef,
439
+ createdAt: source.createdAt,
440
+ url: source.url,
441
+ redactedUrl: source.redactedUrl,
442
+ ...(source.urlKey ? { urlKey: source.urlKey } : {}),
443
+ domain: source.domain,
444
+ ...(source.title ? { title: source.title } : {}),
445
+ contentHash: source.contentHash,
446
+ textChars: source.textChars,
447
+ ...(source.provider ? { provider: source.provider } : {}),
448
+ };
449
+ }
450
+ function readWorkflowWebSourceSnippetWithCache(options) {
451
+ const query = options.request.query?.trim() ?? "";
452
+ if (query) {
453
+ const exactIndex = options.source.text.indexOf(query);
454
+ if (exactIndex >= 0) {
455
+ return snippetForMatch({
456
+ text: options.source.text,
457
+ start: exactIndex,
458
+ end: exactIndex + query.length,
459
+ matchType: "exact",
460
+ maxChars: options.maxChars,
461
+ budget: options.budget,
462
+ });
463
+ }
464
+ const sourceNorm = options.getNormalizedSource();
465
+ const queryNorm = normalizeForSearch(query);
466
+ const normalizedIndex = sourceNorm.normalized.indexOf(queryNorm.normalized);
467
+ if (normalizedIndex >= 0) {
468
+ const start = sourceNorm.map[normalizedIndex] ?? 0;
469
+ const endMapIndex = Math.min(sourceNorm.map.length - 1, normalizedIndex + Math.max(0, queryNorm.normalized.length - 1));
470
+ const end = (sourceNorm.map[endMapIndex] ?? start) + 1;
471
+ return snippetForMatch({
472
+ text: options.source.text,
473
+ start,
474
+ end,
475
+ matchType: "normalized",
476
+ maxChars: options.maxChars,
477
+ budget: options.budget,
478
+ });
479
+ }
480
+ }
481
+ const termNeedles = prepareTermNeedles(options.request.terms, options.request.claim);
482
+ if (termNeedles.length === 0)
483
+ return { status: "not_found", visibleChars: 0 };
484
+ return snippetForTerms({
485
+ text: options.source.text,
486
+ normalizedSource: options.getNormalizedSource(),
487
+ terms: termNeedles,
488
+ maxChars: options.maxChars,
489
+ budget: options.budget,
490
+ });
491
+ }
492
+ function snippetForTerms(options) {
493
+ const needles = options.terms
494
+ .map((term) => ({ raw: term, normalized: normalizeForSearch(term).normalized }))
495
+ .filter((term) => term.normalized.length > 0);
496
+ if (needles.length === 0)
497
+ return { status: "not_found", visibleChars: 0 };
498
+ const candidates = [];
499
+ for (const needle of needles) {
500
+ let fromIndex = 0;
501
+ let occurrenceCount = 0;
502
+ while (occurrenceCount < 20) {
503
+ const normalizedIndex = options.normalizedSource.normalized.indexOf(needle.normalized, fromIndex);
504
+ if (normalizedIndex < 0)
505
+ break;
506
+ const start = options.normalizedSource.map[normalizedIndex] ?? 0;
507
+ const endMapIndex = Math.min(options.normalizedSource.map.length - 1, normalizedIndex + Math.max(0, needle.normalized.length - 1));
508
+ const end = (options.normalizedSource.map[endMapIndex] ?? start) + 1;
509
+ candidates.push(scoreTermWindow(options.text, start, end, options.maxChars, needles));
510
+ fromIndex = normalizedIndex + Math.max(1, needle.normalized.length);
511
+ occurrenceCount += 1;
512
+ }
513
+ }
514
+ if (candidates.length === 0)
515
+ return { status: "not_found", visibleChars: 0 };
516
+ const best = candidates.sort((left, right) => {
517
+ if (right.score !== left.score)
518
+ return right.score - left.score;
519
+ return right.matchedTerms.length - left.matchedTerms.length;
520
+ })[0];
521
+ const raw = redactInlineSecrets(options.text.slice(best.start, best.end));
522
+ const consumed = consumeWorkflowWebVisibleBudget(options.budget, raw, options.maxChars);
523
+ return {
524
+ status: "matched",
525
+ matchType: "terms",
526
+ quote: consumed.text,
527
+ startOffset: best.start,
528
+ endOffset: best.end,
529
+ visibleChars: consumed.text.length,
530
+ matchedTerms: best.matchedTerms,
531
+ missingTerms: best.missingTerms,
532
+ coverageRatio: best.matchedTerms.length / Math.max(1, needles.length),
533
+ candidateOnly: true,
534
+ };
535
+ }
536
+ function scoreTermWindow(text, matchStart, matchEnd, maxChars, terms) {
537
+ const center = Math.floor((matchStart + matchEnd) / 2);
538
+ const start = Math.max(0, center - Math.floor(maxChars / 2));
539
+ const end = Math.min(text.length, start + maxChars);
540
+ const windowNorm = normalizeForSearch(text.slice(start, end)).normalized;
541
+ const matchedTerms = terms
542
+ .filter((term) => windowNorm.includes(term.normalized))
543
+ .map((term) => term.raw);
544
+ const missingTerms = terms
545
+ .filter((term) => !windowNorm.includes(term.normalized))
546
+ .map((term) => term.raw);
547
+ const occurrenceScore = terms.reduce((score, term) => {
548
+ return score + (windowNorm.includes(term.normalized) ? term.normalized.length : 0);
549
+ }, 0);
550
+ return {
551
+ start,
552
+ end,
553
+ matchedTerms,
554
+ missingTerms,
555
+ score: matchedTerms.length * 1_000 + occurrenceScore,
556
+ };
557
+ }
558
+ function prepareTermNeedles(terms, claim) {
559
+ const explicitTerms = dedupeStrings((terms ?? []).map((term) => term.trim()).filter(Boolean));
560
+ if (explicitTerms.length > 0)
561
+ return explicitTerms.slice(0, 16);
562
+ if (!claim?.trim())
563
+ return [];
564
+ return extractClaimTerms(claim).slice(0, 16);
565
+ }
566
+ function extractClaimTerms(claim) {
567
+ const tokens = claim
568
+ .match(/[\p{L}\p{N}][\p{L}\p{N}._/-]{2,}/gu)
569
+ ?.map((token) => token.toLowerCase()) ?? [];
570
+ const filtered = tokens.filter((token) => !SOURCE_READ_STOPWORDS.has(token));
571
+ return dedupeStrings(filtered).sort((left, right) => right.length - left.length);
572
+ }
573
+ function dedupeStrings(values) {
574
+ const seen = new Set();
575
+ const deduped = [];
576
+ for (const value of values) {
577
+ const key = normalizeForSearch(value).normalized;
578
+ if (!key || seen.has(key))
579
+ continue;
580
+ seen.add(key);
581
+ deduped.push(value);
582
+ }
583
+ return deduped;
584
+ }
585
+ const SOURCE_READ_STOPWORDS = new Set([
586
+ "about",
587
+ "across",
588
+ "after",
589
+ "against",
590
+ "also",
591
+ "because",
592
+ "before",
593
+ "between",
594
+ "claim",
595
+ "claims",
596
+ "could",
597
+ "does",
598
+ "from",
599
+ "have",
600
+ "into",
601
+ "more",
602
+ "must",
603
+ "only",
604
+ "other",
605
+ "over",
606
+ "should",
607
+ "source",
608
+ "sources",
609
+ "than",
610
+ "that",
611
+ "their",
612
+ "there",
613
+ "these",
614
+ "this",
615
+ "through",
616
+ "under",
617
+ "using",
618
+ "when",
619
+ "where",
620
+ "which",
621
+ "with",
622
+ "without",
623
+ ]);
624
+ function snippetForMatch(options) {
625
+ const matchLength = Math.max(0, options.end - options.start);
626
+ const slack = Math.max(0, options.maxChars - matchLength);
627
+ const before = Math.floor(slack / 2);
628
+ const snippetStart = Math.max(0, options.start - before);
629
+ const snippetEnd = Math.min(options.text.length, snippetStart + options.maxChars);
630
+ const raw = redactInlineSecrets(options.text.slice(snippetStart, snippetEnd));
631
+ const consumed = consumeWorkflowWebVisibleBudget(options.budget, raw, options.maxChars);
632
+ return {
633
+ status: "matched",
634
+ matchType: options.matchType,
635
+ quote: consumed.text,
636
+ startOffset: options.start,
637
+ endOffset: options.end,
638
+ visibleChars: consumed.text.length,
639
+ };
640
+ }
641
+ function normalizeForSearch(text) {
642
+ let normalized = "";
643
+ const map = [];
644
+ let previousWhitespace = false;
645
+ for (let index = 0; index < text.length; index += 1) {
646
+ const raw = text[index];
647
+ let folded = raw.normalize("NFKC").toLowerCase();
648
+ folded = folded
649
+ .replace(/[\u2018\u2019\u201A\u201B]/g, "'")
650
+ .replace(/[\u201C\u201D\u201E\u201F]/g, '"')
651
+ .replace(/[\u2010-\u2015\u2212]/g, "-");
652
+ if (/\s/.test(folded)) {
653
+ if (!previousWhitespace) {
654
+ normalized += " ";
655
+ map.push(index);
656
+ }
657
+ previousWhitespace = true;
658
+ continue;
659
+ }
660
+ previousWhitespace = false;
661
+ for (const char of folded) {
662
+ normalized += char;
663
+ map.push(index);
664
+ }
665
+ }
666
+ return { normalized: normalized.trim(), map };
667
+ }
668
+ function nearbySnippet(text, needle, maxChars) {
669
+ const index = text.indexOf(needle);
670
+ if (index < 0)
671
+ return text.trim().slice(0, maxChars);
672
+ const start = Math.max(0, index - Math.floor(maxChars / 2));
673
+ return text.slice(start, start + maxChars).trim();
674
+ }
675
+ async function readWorkflowWebSourceIndexFile(config) {
676
+ try {
677
+ const parsed = JSON.parse(await readFile(indexPath(config), "utf8"));
678
+ if (!isRecord(parsed) || parsed.schema !== WORKFLOW_WEB_SOURCE_INDEX_SCHEMA) {
679
+ throw new Error("invalid index");
680
+ }
681
+ const sources = Array.isArray(parsed.sources)
682
+ ? parsed.sources.flatMap((entry) => {
683
+ const normalized = sourceIndexEntryFromUnknown(entry);
684
+ return normalized ? [normalized] : [];
685
+ })
686
+ : [];
687
+ return {
688
+ schema: WORKFLOW_WEB_SOURCE_INDEX_SCHEMA,
689
+ updatedAt: typeof parsed.updatedAt === "string" ? parsed.updatedAt : new Date().toISOString(),
690
+ runId: typeof parsed.runId === "string" ? parsed.runId : config.runId,
691
+ sources: mergeSourceIndexEntries(sources),
692
+ };
693
+ }
694
+ catch {
695
+ return emptyWorkflowWebSourceIndex(config);
696
+ }
697
+ }
698
+ async function appendWorkflowWebSourceIndexEvent(config, entry) {
699
+ await mkdir(resolve(config.cacheDir), { recursive: true });
700
+ await appendFile(indexEventsPath(config), `${JSON.stringify({
701
+ schema: WORKFLOW_WEB_SOURCE_INDEX_EVENT_SCHEMA,
702
+ at: new Date().toISOString(),
703
+ runId: config.runId,
704
+ taskId: config.taskId,
705
+ entry,
706
+ })}\n`, "utf8");
707
+ }
708
+ async function readWorkflowWebSourceIndexLedger(config) {
709
+ let text;
710
+ try {
711
+ text = await readFile(indexEventsPath(config), "utf8");
712
+ }
713
+ catch {
714
+ return [];
715
+ }
716
+ const entries = [];
717
+ for (const line of text.split(/\r?\n/)) {
718
+ if (!line.trim())
719
+ continue;
720
+ try {
721
+ const parsed = JSON.parse(line);
722
+ if (!isRecord(parsed) || parsed.schema !== WORKFLOW_WEB_SOURCE_INDEX_EVENT_SCHEMA)
723
+ continue;
724
+ const entry = sourceIndexEntryFromUnknown(parsed.entry);
725
+ if (entry)
726
+ entries.push(entry);
727
+ }
728
+ catch {
729
+ // Ignore torn or corrupt ledger lines; source file scan still provides a final fallback.
730
+ }
731
+ }
732
+ return entries;
733
+ }
734
+ function sourceIndexEntryFromUnknown(value) {
735
+ if (!isRecord(value))
736
+ return undefined;
737
+ if (typeof value.sourceRef !== "string" || !isWorkflowWebSourceRef(value.sourceRef))
738
+ return undefined;
739
+ if (typeof value.createdAt !== "string")
740
+ return undefined;
741
+ if (typeof value.url !== "string")
742
+ return undefined;
743
+ if (typeof value.redactedUrl !== "string")
744
+ return undefined;
745
+ if (typeof value.domain !== "string")
746
+ return undefined;
747
+ if (typeof value.contentHash !== "string")
748
+ return undefined;
749
+ if (!Number.isFinite(Number(value.textChars)))
750
+ return undefined;
751
+ return {
752
+ sourceRef: value.sourceRef,
753
+ createdAt: value.createdAt,
754
+ url: value.url,
755
+ redactedUrl: value.redactedUrl,
756
+ ...(typeof value.urlKey === "string" ? { urlKey: value.urlKey } : {}),
757
+ domain: value.domain,
758
+ ...(typeof value.title === "string" ? { title: value.title } : {}),
759
+ contentHash: value.contentHash,
760
+ textChars: Number(value.textChars),
761
+ ...(typeof value.provider === "string" ? { provider: value.provider } : {}),
762
+ };
763
+ }
764
+ function mergeSourceIndexEntries(entries) {
765
+ const bySourceRef = new Map();
766
+ for (const entry of entries)
767
+ bySourceRef.set(entry.sourceRef, entry);
768
+ return [...bySourceRef.values()].sort((left, right) => left.createdAt.localeCompare(right.createdAt));
769
+ }
770
+ function emptyWorkflowWebSourceIndex(config) {
771
+ return {
772
+ schema: WORKFLOW_WEB_SOURCE_INDEX_SCHEMA,
773
+ updatedAt: new Date().toISOString(),
774
+ runId: config.runId,
775
+ sources: [],
776
+ };
777
+ }
778
+ function indexPath(config) {
779
+ return resolve(config.cacheDir, "index.json");
780
+ }
781
+ function indexEventsPath(config) {
782
+ return resolve(config.cacheDir, "index-events.jsonl");
783
+ }
784
+ function sourceObjectPath(config, sourceRef) {
785
+ if (!isWorkflowWebSourceRef(sourceRef)) {
786
+ throw new Error("invalid workflow web sourceRef");
787
+ }
788
+ const sourcesDir = resolve(config.cacheDir, "sources");
789
+ const path = resolve(sourcesDir, `${sourceRef}.json`);
790
+ if (!path.startsWith(`${sourcesDir}/`)) {
791
+ throw new Error("workflow web sourceRef escaped source cache");
792
+ }
793
+ return path;
794
+ }
795
+ function isWorkflowWebSourceRef(sourceRef) {
796
+ return /^wsrc_[a-f0-9]{32}$/.test(sourceRef);
797
+ }
798
+ async function writeJsonAtomic(path, value) {
799
+ await mkdir(dirname(path), { recursive: true });
800
+ const tmp = `${path}.${process.pid}.${Date.now()}.tmp`;
801
+ await writeFile(tmp, `${JSON.stringify(value, null, 2)}\n`, "utf8");
802
+ await rename(tmp, path);
803
+ }
804
+ function hashString(value) {
805
+ return createHash("sha256").update(value).digest("hex");
806
+ }
807
+ function isPrivateHostname(host) {
808
+ if (PRIVATE_HOST_PATTERNS.some((pattern) => pattern.test(host)))
809
+ return true;
810
+ return nonPublicIpReason(host) !== undefined;
811
+ }
812
+ function nonPublicIpReason(address) {
813
+ const lower = address.toLowerCase().replace(/^\[|\]$/g, "");
814
+ const mappedIpv4 = lower.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/)?.[1];
815
+ if (mappedIpv4)
816
+ return nonPublicIpReason(mappedIpv4);
817
+ const hexMapped = lower.match(/^::ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/);
818
+ if (hexMapped) {
819
+ const high = Number.parseInt(hexMapped[1], 16);
820
+ const low = Number.parseInt(hexMapped[2], 16);
821
+ return nonPublicIpReason(`${high >> 8}.${high & 255}.${low >> 8}.${low & 255}`);
822
+ }
823
+ if (isIP(lower) === 4) {
824
+ const parts = lower.split(".").map((part) => Number(part));
825
+ if (parts.length !== 4 || parts.some((part) => !Number.isInteger(part) || part < 0 || part > 255))
826
+ return "non_public_ip_blocked";
827
+ const [a, b, c, d] = parts;
828
+ if (a === 0 || a === 10 || a === 127 || a >= 224)
829
+ return "non_public_ip_blocked";
830
+ if (a === 100 && b >= 64 && b <= 127)
831
+ return "non_public_ip_blocked";
832
+ if (a === 169 && b === 254)
833
+ return "non_public_ip_blocked";
834
+ if (a === 172 && b >= 16 && b <= 31)
835
+ return "non_public_ip_blocked";
836
+ if (a === 192 && b === 168)
837
+ return "non_public_ip_blocked";
838
+ if (a === 192 && b === 0 && (c === 0 || c === 2))
839
+ return "non_public_ip_blocked";
840
+ if (a === 198 && (b === 18 || b === 19))
841
+ return "non_public_ip_blocked";
842
+ if (a === 198 && b === 51 && c === 100)
843
+ return "non_public_ip_blocked";
844
+ if (a === 203 && b === 0 && c === 113)
845
+ return "non_public_ip_blocked";
846
+ if (a === 255 && b === 255 && c === 255 && d === 255)
847
+ return "non_public_ip_blocked";
848
+ }
849
+ if (isIP(lower) === 6) {
850
+ if (lower === "::" || lower === "::1")
851
+ return "non_public_ip_blocked";
852
+ if (lower.startsWith("fc") || lower.startsWith("fd"))
853
+ return "non_public_ip_blocked";
854
+ if (lower.startsWith("fe80") || lower.startsWith("ff"))
855
+ return "non_public_ip_blocked";
856
+ if (lower.startsWith("2001:db8"))
857
+ return "non_public_ip_blocked";
858
+ }
859
+ return undefined;
860
+ }
861
+ function redactRecordForModel(value) {
862
+ return Object.fromEntries(Object.entries(value).map(([key, item]) => [key, redactValueForModel(item)]));
863
+ }
864
+ function redactValueForModel(value) {
865
+ if (typeof value === "string")
866
+ return redactInlineSecrets(sanitizeUrlMaybe(value));
867
+ if (Array.isArray(value))
868
+ return value.map((item) => redactValueForModel(item));
869
+ if (!isRecord(value))
870
+ return value;
871
+ return redactRecordForModel(value);
872
+ }
873
+ function sanitizeUrlMaybe(value) {
874
+ return /^https?:\/\//i.test(value) ? sanitizeUrlForModel(value) : value;
875
+ }
876
+ function redactInlineSecrets(value) {
877
+ const withSanitizedUrls = value.replace(/https?:\/\/[^\s)\]}>"']+/gi, (match) => {
878
+ const trailing = match.match(/[.,;:!?]+$/)?.[0] ?? "";
879
+ const core = trailing ? match.slice(0, -trailing.length) : match;
880
+ try {
881
+ return `${sanitizeParsedUrlForModel(new URL(core))}${trailing}`;
882
+ }
883
+ catch {
884
+ return match;
885
+ }
886
+ });
887
+ return redactInlineSecretsNoUrls(withSanitizedUrls);
888
+ }
889
+ function redactInlineSecretsNoUrls(value) {
890
+ return value
891
+ .replace(/(authorization|cookie|set-cookie):\s*[^\n\r]+/gi, "$1: REDACTED")
892
+ .replace(/(token|secret|password|api[-_]?key)=([^\s&]+)/gi, "$1=REDACTED")
893
+ .replace(/\/Users\/[^\s:'")]+/g, "/Users/REDACTED");
894
+ }
895
+ function isRecord(value) {
896
+ return typeof value === "object" && value !== null && !Array.isArray(value);
897
+ }