@agwab/pi-workflow 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +14 -3
  2. package/agents/researcher.md +17 -7
  3. package/dist/artifact-graph-runtime.js +1 -0
  4. package/dist/compiler.js +2 -2
  5. package/dist/dynamic-generated-task-runtime.js +4 -3
  6. package/dist/dynamic-runtime-bundle.js +3 -2
  7. package/dist/extension.js +40 -1
  8. package/dist/subagent-backend.js +82 -27
  9. package/dist/tool-metadata.d.ts +1 -0
  10. package/dist/tool-metadata.js +13 -1
  11. package/dist/workflow-artifact-extension.js +3 -2
  12. package/dist/workflow-artifact-tool.js +84 -4
  13. package/dist/workflow-web-source-extension.d.ts +43 -0
  14. package/dist/workflow-web-source-extension.js +1194 -0
  15. package/dist/workflow-web-source.d.ts +171 -0
  16. package/dist/workflow-web-source.js +897 -0
  17. package/docs/usage.md +32 -45
  18. package/node_modules/@agwab/pi-subagent/package.json +1 -1
  19. package/node_modules/@agwab/pi-subagent/src/api.ts +245 -132
  20. package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +243 -163
  21. package/node_modules/@agwab/pi-subagent/src/core/constants.ts +117 -90
  22. package/node_modules/@agwab/pi-subagent/src/core/validation.ts +728 -475
  23. package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +305 -209
  24. package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +750 -439
  25. package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +422 -268
  26. package/package.json +3 -4
  27. package/skills/workflow-guide/scaffolds/object-tool-fallback/schemas/fetch-control.schema.json +1 -1
  28. package/skills/workflow-guide/scaffolds/object-tool-fallback/spec.json +4 -3
  29. package/src/artifact-graph-runtime.ts +1 -0
  30. package/src/compiler.ts +2 -1
  31. package/src/dynamic-generated-task-runtime.ts +4 -2
  32. package/src/dynamic-runtime-bundle.ts +3 -2
  33. package/src/extension.ts +46 -1
  34. package/src/subagent-backend.ts +121 -37
  35. package/src/tool-metadata.ts +22 -1
  36. package/src/workflow-artifact-extension.ts +3 -2
  37. package/src/workflow-artifact-tool.ts +96 -4
  38. package/src/workflow-web-source-extension.ts +1411 -0
  39. package/src/workflow-web-source.ts +1171 -0
  40. package/workflows/README.md +1 -1
  41. package/workflows/deep-research/helpers/claim-evidence-gate.mjs +474 -40
  42. package/workflows/deep-research/helpers/final-audit-packet.mjs +219 -0
  43. package/workflows/deep-research/helpers/normalize-input-packet.mjs +436 -0
  44. package/workflows/deep-research/helpers/render-executive.mjs +571 -198
  45. package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +35 -8
  46. package/workflows/deep-research/schemas/deep-research-normalize-claims-control.schema.json +45 -4
  47. package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +0 -2
  48. package/workflows/deep-research/spec.json +36 -21
  49. package/workflows/deep-review/helpers/render-review-report.mjs +502 -0
  50. package/workflows/deep-review/schemas/deep-review-render-control.schema.json +50 -0
  51. package/workflows/deep-review/spec.json +22 -1
  52. package/docs/release.md +0 -89
  53. package/node_modules/@pondwader/socks5-server/.DS_Store +0 -0
  54. package/node_modules/commander/.DS_Store +0 -0
  55. package/node_modules/jiti/.DS_Store +0 -0
  56. package/node_modules/node-forge/.DS_Store +0 -0
  57. package/node_modules/shell-quote/.DS_Store +0 -0
  58. package/node_modules/zod/.DS_Store +0 -0
@@ -0,0 +1,1411 @@
1
+ import { lookup } from "node:dns/promises";
2
+ import { request as httpRequest } from "node:http";
3
+ import { request as httpsRequest } from "node:https";
4
+ import { mkdir, readFile, rm, stat, writeFile } from "node:fs/promises";
5
+ import { isIP } from "node:net";
6
+ import { dirname, extname, isAbsolute, resolve } from "node:path";
7
+ import { pathToFileURL } from "node:url";
8
+ import { Type } from "typebox";
9
+
10
+ import {
11
+ buildWorkflowWebSourceCard,
12
+ createWorkflowWebSource,
13
+ createWorkflowWebVisibleBudget,
14
+ DEFAULT_WORKFLOW_WEB_SECURITY_POLICY,
15
+ errorToolResult,
16
+ extractSearchCandidates,
17
+ extractTextFromToolResult,
18
+ extractTitleFromToolResult,
19
+ findWorkflowWebSourceByUrl,
20
+ normalizeWorkflowWebSecurityPolicy,
21
+ normalizeWorkflowWebSourcePolicy,
22
+ readWorkflowWebSource,
23
+ readWorkflowWebSourceSnippets,
24
+ recordWorkflowWebSourceEvent,
25
+ sanitizeUrlForModel,
26
+ sourceUrlCacheKey,
27
+ toolResultFromJson,
28
+ validateWorkflowWebUrl,
29
+ writeWorkflowWebSource,
30
+ type WorkflowWebSecurityPolicy,
31
+ type WorkflowWebSource,
32
+ type WorkflowWebSourceCacheConfig,
33
+ type WorkflowWebSourcePolicy,
34
+ type WorkflowWebSourceReadRequest,
35
+ type WorkflowWebSourceReadResult,
36
+ } from "./workflow-web-source.js";
37
+
38
+ export const WORKFLOW_WEB_SOURCE_LAUNCH_CONFIG_SCHEMA =
39
+ "workflow-web-source-launch-config-v1" as const;
40
+
41
+ export interface WorkflowWebProviderLaunchConfig {
42
+ kind: "pi-web-access" | "extension" | "none";
43
+ extensionPath?: string;
44
+ }
45
+
46
+ export interface WorkflowWebSourceLaunchConfig extends WorkflowWebSourceCacheConfig {
47
+ schema: typeof WORKFLOW_WEB_SOURCE_LAUNCH_CONFIG_SCHEMA;
48
+ workflowName?: string;
49
+ stageId?: string;
50
+ taskKey?: string;
51
+ cwd: string;
52
+ provider: WorkflowWebProviderLaunchConfig;
53
+ webSourcePolicy?: Partial<WorkflowWebSourcePolicy>;
54
+ securityPolicy?: Partial<WorkflowWebSecurityPolicy>;
55
+ exposeLegacyTools?: boolean;
56
+ }
57
+
58
+ export interface WorkflowWebSourceExtensionWrapperOptions {
59
+ wrapperPath: string;
60
+ importPath: string;
61
+ providerExtensionPath?: string;
62
+ config: WorkflowWebSourceLaunchConfig;
63
+ }
64
+
65
+ type ToolResult = {
66
+ content?: Array<Record<string, unknown>>;
67
+ details?: Record<string, unknown>;
68
+ [key: string]: unknown;
69
+ };
70
+
71
+ type ToolSpec = {
72
+ name?: string;
73
+ execute?: (
74
+ toolCallId: string,
75
+ params: unknown,
76
+ signal?: AbortSignal,
77
+ onUpdate?: unknown,
78
+ ctx?: unknown,
79
+ ) => Promise<ToolResult>;
80
+ [key: string]: unknown;
81
+ };
82
+
83
+ type PiLike = Record<string | symbol, unknown> & {
84
+ registerTool(tool: ToolSpec): void;
85
+ appendEntry?(type: string, data: unknown): void;
86
+ };
87
+
88
+ type ProviderExtension = (pi: PiLike) => void;
89
+
90
+ type CapturedProviderTools = Map<string, ToolSpec>;
91
+
92
+ type FetchFailure = {
93
+ code: string;
94
+ message: string;
95
+ extra: Record<string, unknown>;
96
+ reason?: string;
97
+ createdAt?: string;
98
+ };
99
+
100
+ const PROVIDER_TOOL_NAMES = new Set([
101
+ "web_search",
102
+ "code_search",
103
+ "fetch_content",
104
+ "get_search_content",
105
+ ]);
106
+
107
+ export function registerWorkflowWebSourceExtension(
108
+ pi: PiLike,
109
+ config: WorkflowWebSourceLaunchConfig,
110
+ providerExtension?: ProviderExtension,
111
+ ): void {
112
+ const policy = normalizeWorkflowWebSourcePolicy(config.webSourcePolicy);
113
+ const security = normalizeWorkflowWebSecurityPolicy(config.securityPolicy);
114
+ const budget = createWorkflowWebVisibleBudget(policy.perTaskVisibleCharBudget);
115
+ const providerTools: CapturedProviderTools = new Map();
116
+ const sourceCache: Map<string, WorkflowWebSource> = new Map();
117
+ const fetchInFlight: Map<string, Promise<ReturnType<typeof toolResultFromJson>>> = new Map();
118
+ const fetchFailures: Map<string, FetchFailure> = new Map();
119
+
120
+ if (providerExtension) {
121
+ providerExtension(providerCapturePi(pi, providerTools, Boolean(config.exposeLegacyTools)));
122
+ }
123
+
124
+ pi.registerTool({
125
+ name: "workflow_web_search",
126
+ description:
127
+ "Search the web through the workflow web-source provider and return compact candidate cards only.",
128
+ parameters: Type.Object({
129
+ query: Type.Optional(Type.String({ description: "Single search query." })),
130
+ queries: Type.Optional(Type.Array(Type.String(), { description: "Multiple search queries." })),
131
+ numResults: Type.Optional(Type.Number({ description: "Results per query." })),
132
+ }),
133
+ execute: async (toolCallId, params, signal, onUpdate, ctx) => {
134
+ const providerTool = providerTools.get("web_search");
135
+ if (!providerTool?.execute) {
136
+ const missing = missingProviderStatus(providerTools, "search");
137
+ await recordWorkflowWebSourceEvent(config, "missing_provider", {
138
+ tool: "workflow_web_search",
139
+ code: missing.code,
140
+ });
141
+ return errorToolResult(missing.code, missing.message);
142
+ }
143
+ const providerParams = isRecord(params)
144
+ ? { ...params, workflow: params.workflow ?? "none" }
145
+ : params;
146
+ const result = await providerTool.execute(
147
+ toolCallId,
148
+ providerParams,
149
+ signal,
150
+ onUpdate,
151
+ ctx,
152
+ );
153
+ const candidates = extractSearchCandidates(result, policy).map((candidate) => {
154
+ const consumed = consumeText(candidate.snippet, policy.searchSnippetChars);
155
+ return {
156
+ ...candidate,
157
+ snippet: consumed.text,
158
+ budget: consumed.budget,
159
+ };
160
+ });
161
+ await recordWorkflowWebSourceEvent(config, "search", {
162
+ candidateCount: candidates.length,
163
+ visibleChars: budget.used,
164
+ });
165
+ return toolResultFromJson({
166
+ status: "ok",
167
+ tool: "workflow_web_search",
168
+ candidates,
169
+ budget: budgetSnapshot(),
170
+ next: "Use workflow_web_fetch_source for a promising URL, then workflow_web_source_read for exact evidence quotes.",
171
+ });
172
+ },
173
+ });
174
+
175
+ pi.registerTool({
176
+ name: "workflow_web_fetch_source",
177
+ description:
178
+ "Fetch one or more URLs into the workflow web-source cache and return compact source cards with sourceRefs.",
179
+ parameters: Type.Object({
180
+ url: Type.Optional(Type.String({ description: "Single URL to fetch into the workflow web-source cache." })),
181
+ urls: Type.Optional(Type.Array(Type.String(), { description: "Multiple URLs to fetch in one tool call. Prefer this over repeated fetch calls when caching several promising sources." })),
182
+ sources: Type.Optional(Type.Array(Type.Object({
183
+ url: Type.String({ description: "URL to fetch into the workflow web-source cache." }),
184
+ title: Type.Optional(Type.String({ description: "Optional source title override." })),
185
+ }), { description: "Multiple URL/title objects to fetch in one tool call." })),
186
+ title: Type.Optional(Type.String({ description: "Optional source title override for single-url fetches." })),
187
+ titles: Type.Optional(Type.Array(Type.String(), { description: "Optional title overrides paired by index with urls." })),
188
+ }),
189
+ execute: async (toolCallId, params, signal, onUpdate, ctx) => {
190
+ const batchRequested = fetchSourceBatchRequested(params);
191
+ if (batchRequested) {
192
+ const requests = fetchSourceRequestsFromParams(params);
193
+ if (requests.length === 0) {
194
+ return errorToolResult(
195
+ "invalid_params",
196
+ "workflow_web_fetch_source requires url, urls, or sources parameters.",
197
+ );
198
+ }
199
+ const results: Array<Record<string, unknown>> = [];
200
+ const cards: Record<string, unknown>[] = [];
201
+ for (const [index, request] of requests.entries()) {
202
+ const result = await fetchWorkflowWebSourceOnce(
203
+ `${toolCallId}-${index}`,
204
+ request,
205
+ signal,
206
+ onUpdate,
207
+ ctx,
208
+ );
209
+ const payload = payloadFromToolResult(result);
210
+ const card = isRecord(payload.card) ? payload.card : null;
211
+ if (card) cards.push(card);
212
+ results.push({
213
+ index,
214
+ url: sanitizeUrlForModel(request.url),
215
+ status: typeof payload.status === "string" ? payload.status : "unknown",
216
+ ...(typeof payload.code === "string" ? { code: payload.code } : {}),
217
+ ...(typeof payload.message === "string" ? { message: payload.message } : {}),
218
+ ...(typeof card?.sourceRef === "string" ? { sourceRef: card.sourceRef } : {}),
219
+ ...(card ? { cardIndex: cards.length - 1 } : {}),
220
+ });
221
+ }
222
+ const status =
223
+ cards.length === results.length
224
+ ? "ok"
225
+ : cards.length > 0
226
+ ? "partial"
227
+ : "failed";
228
+ await recordWorkflowWebSourceEvent(config, "fetch_batch", {
229
+ requested: requests.length,
230
+ succeeded: cards.length,
231
+ visibleChars: budget.used,
232
+ });
233
+ return toolResultFromJson({
234
+ status,
235
+ tool: "workflow_web_fetch_source",
236
+ cards,
237
+ results,
238
+ budget: budgetSnapshot(status !== "ok"),
239
+ next: "Use returned sourceRefs with workflow_web_source_read; batch snippets with reads:[...] or queries:[...] when possible.",
240
+ });
241
+ }
242
+ return await fetchWorkflowWebSourceOnce(toolCallId, params, signal, onUpdate, ctx);
243
+ },
244
+ });
245
+
246
+ async function fetchWorkflowWebSourceOnce(
247
+ toolCallId: string,
248
+ params: unknown,
249
+ signal?: AbortSignal,
250
+ onUpdate?: unknown,
251
+ ctx?: unknown,
252
+ ): Promise<ToolResult> {
253
+ const url = urlFromParams(params);
254
+ if (!url) {
255
+ return errorToolResult(
256
+ "invalid_params",
257
+ "workflow_web_fetch_source requires a url string parameter.",
258
+ );
259
+ }
260
+ const checked = validateWorkflowWebUrl(url, security);
261
+ if (!checked.ok) {
262
+ await recordWorkflowWebSourceEvent(config, "blocked_url", {
263
+ url: sanitizeUrlForModel(url),
264
+ reason: checked.reason,
265
+ });
266
+ return errorToolResult("blocked_url", "URL blocked by workflow web-source security policy.", {
267
+ reason: checked.reason,
268
+ url: sanitizeUrlForModel(url),
269
+ });
270
+ }
271
+ const fetchUrl = canonicalWorkflowWebFetchUrl(checked.normalizedUrl);
272
+ const existing = await findWorkflowWebSourceByUrl(config, fetchUrl);
273
+ if (existing) {
274
+ sourceCache.set(existing.sourceRef, existing);
275
+ const card = buildWorkflowWebSourceCard({
276
+ source: existing,
277
+ policy,
278
+ budget,
279
+ duplicate: true,
280
+ });
281
+ await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
282
+ sourceRef: existing.sourceRef,
283
+ url: existing.redactedUrl,
284
+ visibleChars: budget.used,
285
+ });
286
+ return toolResultFromJson({ status: "ok", tool: "workflow_web_fetch_source", card });
287
+ }
288
+ const fetchKey = sourceUrlCacheKey(fetchUrl);
289
+ const cachedFailure = fetchFailures.get(fetchKey) ?? await readDurableFetchFailure(config, fetchKey);
290
+ if (cachedFailure) {
291
+ fetchFailures.set(fetchKey, cachedFailure);
292
+ await recordWorkflowWebSourceEvent(config, "fetch_negative_cache_hit", {
293
+ url: sanitizeUrlForModel(fetchUrl),
294
+ code: cachedFailure.code,
295
+ });
296
+ return errorToolResult(cachedFailure.code, cachedFailure.message, cachedFailure.extra);
297
+ }
298
+ const inFlight = fetchInFlight.get(fetchKey);
299
+ if (inFlight) {
300
+ const result = await inFlight;
301
+ const source = await findWorkflowWebSourceByUrl(config, fetchUrl);
302
+ if (!source) return result;
303
+ sourceCache.set(source.sourceRef, source);
304
+ const card = buildWorkflowWebSourceCard({ source, policy, budget, duplicate: true });
305
+ await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
306
+ sourceRef: source.sourceRef,
307
+ url: source.redactedUrl,
308
+ visibleChars: budget.used,
309
+ });
310
+ return toolResultFromJson({ status: "ok", tool: "workflow_web_fetch_source", card });
311
+ }
312
+ const fetchPromise = withWorkflowWebFetchLock(config, fetchKey, signal, async () => {
313
+ const lockedExisting = await findWorkflowWebSourceByUrl(config, fetchUrl);
314
+ if (lockedExisting) {
315
+ sourceCache.set(lockedExisting.sourceRef, lockedExisting);
316
+ const card = buildWorkflowWebSourceCard({ source: lockedExisting, policy, budget, duplicate: true });
317
+ await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
318
+ sourceRef: lockedExisting.sourceRef,
319
+ url: lockedExisting.redactedUrl,
320
+ visibleChars: budget.used,
321
+ });
322
+ return toolResultFromJson({ status: "ok", tool: "workflow_web_fetch_source", card });
323
+ }
324
+ const lockedFailure = await readDurableFetchFailure(config, fetchKey);
325
+ if (lockedFailure) {
326
+ fetchFailures.set(fetchKey, lockedFailure);
327
+ await recordWorkflowWebSourceEvent(config, "fetch_negative_cache_hit", {
328
+ url: sanitizeUrlForModel(fetchUrl),
329
+ code: lockedFailure.code,
330
+ });
331
+ return errorToolResult(lockedFailure.code, lockedFailure.message, lockedFailure.extra);
332
+ }
333
+ let text: string;
334
+ let title = titleFromParams(params);
335
+ let providerKind: string = config.provider.kind;
336
+ let extractionLossy: boolean | undefined;
337
+ if (config.provider.kind === "pi-web-access") {
338
+ const safeFetch = await safeFetchWorkflowWebText(
339
+ fetchUrl,
340
+ security,
341
+ signal,
342
+ );
343
+ if (!safeFetch.ok) {
344
+ await recordWorkflowWebSourceEvent(config, "blocked_provider_url", {
345
+ url: sanitizeUrlForModel(safeFetch.url),
346
+ reason: safeFetch.reason,
347
+ });
348
+ return await cachedFetchFailureResult(config, fetchFailures, fetchKey, {
349
+ code: "blocked_url",
350
+ message:
351
+ "URL was blocked by workflow web-source security policy before content fetch.",
352
+ extra: { reason: safeFetch.reason, url: sanitizeUrlForModel(safeFetch.url) },
353
+ reason: safeFetch.reason,
354
+ });
355
+ }
356
+ text = safeFetch.text;
357
+ title = title ?? safeFetch.title;
358
+ extractionLossy = safeFetch.extractionLossy;
359
+ providerKind = "pi-web-access-safe-fetch";
360
+ } else {
361
+ const providerTool = providerTools.get("fetch_content");
362
+ if (!providerTool?.execute) {
363
+ const missing = missingProviderStatus(providerTools, "fetch");
364
+ await recordWorkflowWebSourceEvent(config, "missing_provider", {
365
+ tool: "workflow_web_fetch_source",
366
+ code: missing.code,
367
+ });
368
+ return errorToolResult(missing.code, missing.message);
369
+ }
370
+ if (!security.allowPrivateHosts) {
371
+ await recordWorkflowWebSourceEvent(config, "blocked_provider_fetch", {
372
+ url: sanitizeUrlForModel(fetchUrl),
373
+ reason: "untrusted_provider_fetch",
374
+ });
375
+ return errorToolResult(
376
+ "untrusted_provider_fetch",
377
+ "Custom provider fetch_content is disabled unless securityPolicy.allowPrivateHosts is true; use the default safe fetch provider or a trusted provider configuration.",
378
+ { url: sanitizeUrlForModel(fetchUrl) },
379
+ );
380
+ }
381
+ const providerHostCheck = await validateResolvedHost(fetchUrl, security);
382
+ if (!providerHostCheck.ok) {
383
+ await recordWorkflowWebSourceEvent(config, "blocked_provider_url", {
384
+ url: sanitizeUrlForModel(providerHostCheck.url),
385
+ reason: providerHostCheck.reason,
386
+ });
387
+ return await cachedFetchFailureResult(config, fetchFailures, fetchKey, {
388
+ code: "blocked_url",
389
+ message: "URL was blocked by workflow web-source security policy before provider fetch.",
390
+ extra: {
391
+ reason: providerHostCheck.reason,
392
+ url: sanitizeUrlForModel(providerHostCheck.url),
393
+ },
394
+ reason: providerHostCheck.reason,
395
+ });
396
+ }
397
+ const result = await providerTool.execute(
398
+ toolCallId,
399
+ { ...(isRecord(params) ? params : {}), url: fetchUrl },
400
+ signal,
401
+ onUpdate,
402
+ ctx,
403
+ );
404
+ const providerUrlCheck = await validateProviderResultUrls(result, security);
405
+ if (!providerUrlCheck.ok) {
406
+ await recordWorkflowWebSourceEvent(config, "blocked_provider_url", {
407
+ url: sanitizeUrlForModel(providerUrlCheck.url),
408
+ reason: providerUrlCheck.reason,
409
+ });
410
+ return await cachedFetchFailureResult(config, fetchFailures, fetchKey, {
411
+ code: "blocked_url",
412
+ message: "Provider result URL was blocked by workflow web-source security policy.",
413
+ extra: {
414
+ reason: providerUrlCheck.reason,
415
+ url: sanitizeUrlForModel(providerUrlCheck.url),
416
+ },
417
+ reason: providerUrlCheck.reason,
418
+ });
419
+ }
420
+ text = extractTextFromToolResult(result);
421
+ title = title ?? extractTitleFromToolResult(result);
422
+ }
423
+ if (!text.trim()) {
424
+ await recordWorkflowWebSourceEvent(config, "fetch_empty", {
425
+ url: sanitizeUrlForModel(fetchUrl),
426
+ });
427
+ return await cachedFetchFailureResult(config, fetchFailures, fetchKey, {
428
+ code: "empty_source",
429
+ message: "Provider returned no extractable text for this URL.",
430
+ extra: { url: sanitizeUrlForModel(fetchUrl) },
431
+ reason: "empty_source",
432
+ });
433
+ }
434
+ const source = createWorkflowWebSource({
435
+ config,
436
+ url: fetchUrl,
437
+ text,
438
+ title,
439
+ provider: providerKind,
440
+ extractionLossy,
441
+ });
442
+ await writeWorkflowWebSource(config, source);
443
+ sourceCache.set(source.sourceRef, source);
444
+ const card = buildWorkflowWebSourceCard({ source, policy, budget });
445
+ await recordWorkflowWebSourceEvent(config, "fetch_write", {
446
+ sourceRef: source.sourceRef,
447
+ url: source.redactedUrl,
448
+ textChars: source.textChars,
449
+ visibleChars: budget.used,
450
+ });
451
+ return toolResultFromJson({ status: "ok", tool: "workflow_web_fetch_source", card });
452
+ }).catch(async (error: unknown) => {
453
+ const message = error instanceof Error ? error.message : "workflow_web_fetch_failed";
454
+ const code = message === "fetch_lock_timeout" ? "fetch_lock_timeout" : "workflow_web_fetch_failed";
455
+ await recordWorkflowWebSourceEvent(config, "fetch_failed", {
456
+ url: sanitizeUrlForModel(fetchUrl),
457
+ code,
458
+ });
459
+ return errorToolResult(code, "Workflow web-source fetch failed before a source could be cached.", {
460
+ url: sanitizeUrlForModel(fetchUrl),
461
+ });
462
+ });
463
+ fetchInFlight.set(fetchKey, fetchPromise);
464
+ try {
465
+ return await fetchPromise;
466
+ } finally {
467
+ fetchInFlight.delete(fetchKey);
468
+ }
469
+ }
470
+
471
+ pi.registerTool({
472
+ name: "workflow_web_source_read",
473
+ description:
474
+ "Read one or more narrow exact/fuzzy/term-matched snippets from a cached workflow web source by sourceRef.",
475
+ parameters: Type.Object({
476
+ sourceRef: Type.String({ description: "Opaque sourceRef returned by workflow_web_fetch_source." }),
477
+ query: Type.Optional(Type.String({ description: "Exact or fuzzy text to locate in the cached source." })),
478
+ queries: Type.Optional(Type.Array(Type.String(), { description: "Multiple exact/fuzzy texts to locate in one cached source. Prefer this over repeated calls when reading several snippets from the same sourceRef." })),
479
+ exact: Type.Optional(Type.String({ description: "Exact text to locate in the cached source." })),
480
+ exactTexts: Type.Optional(Type.Array(Type.String(), { description: "Multiple exact texts to locate in one cached source." })),
481
+ claim: Type.Optional(Type.String({ description: "Claim to locate when the exact quote is not known. Use with terms for deterministic quote harvesting." })),
482
+ terms: Type.Optional(Type.Array(Type.String(), { description: "Important terms that should co-occur in the returned source window." })),
483
+ reads: Type.Optional(Type.Array(Type.Object({
484
+ query: Type.Optional(Type.String({ description: "Exact or fuzzy text to locate." })),
485
+ exact: Type.Optional(Type.String({ description: "Exact text to locate." })),
486
+ exactText: Type.Optional(Type.String({ description: "Exact text to locate." })),
487
+ text: Type.Optional(Type.String({ description: "Text to locate." })),
488
+ claim: Type.Optional(Type.String({ description: "Claim to locate when exact quote is unknown." })),
489
+ terms: Type.Optional(Type.Array(Type.String(), { description: "Important terms for deterministic quote harvesting." })),
490
+ maxChars: Type.Optional(Type.Number({ description: "Maximum visible snippet characters for this read." })),
491
+ }), { description: "Mixed batch reads for one sourceRef; each item can use query or claim+terms." })),
492
+ maxChars: Type.Optional(Type.Number({ description: "Maximum visible snippet characters per query." })),
493
+ }),
494
+ execute: async (_toolCallId, params) => {
495
+ const sourceRef = stringParam(params, "sourceRef") ?? stringParam(params, "source_ref");
496
+ const requests = sourceReadRequestsFromParams(params);
497
+ if (!sourceRef || requests.length === 0) {
498
+ return errorToolResult(
499
+ "invalid_params",
500
+ "workflow_web_source_read requires sourceRef and query/exactText, claim/terms, queries/exactTexts, or reads parameters.",
501
+ );
502
+ }
503
+ const source = await readCachedWorkflowWebSource(sourceRef);
504
+ if (!source) {
505
+ await recordWorkflowWebSourceEvent(config, "source_read_missing", { sourceRef });
506
+ return errorToolResult("source_not_found", "No cached workflow web source exists for sourceRef.", {
507
+ sourceRef,
508
+ });
509
+ }
510
+ const maxChars = positiveIntParam(params, "maxChars") ?? policy.sourceReadMaxChars;
511
+ const perQueryMaxChars = Math.min(maxChars, policy.sourceReadMaxChars);
512
+ const reads = readWorkflowWebSourceSnippets({
513
+ source,
514
+ requests: requests.map((request) => ({
515
+ ...request,
516
+ maxChars: Math.min(request.maxChars ?? perQueryMaxChars, policy.sourceReadMaxChars),
517
+ })),
518
+ maxChars: perQueryMaxChars,
519
+ budget,
520
+ });
521
+ const results = reads.map((read, index) => {
522
+ const request = requests[index]!;
523
+ const status = sourceReadResponseStatus(read);
524
+ return {
525
+ index,
526
+ ...(request.query ? { query: request.query } : {}),
527
+ ...(request.claim ? { claim: request.claim } : {}),
528
+ ...(request.terms?.length ? { terms: request.terms } : {}),
529
+ status,
530
+ matchType: read.matchType,
531
+ matchedTerms: read.matchedTerms,
532
+ missingTerms: read.missingTerms,
533
+ coverageRatio: read.coverageRatio,
534
+ candidateOnly: read.candidateOnly,
535
+ quote: status === "budget_exhausted" ? undefined : read.quote,
536
+ startOffset: read.startOffset,
537
+ endOffset: read.endOffset,
538
+ visibleChars: read.visibleChars,
539
+ };
540
+ });
541
+ const responseStatus = aggregateSourceReadStatus(results.map((result) => result.status));
542
+ const visibleChars = results.reduce((total, result) => total + result.visibleChars, 0);
543
+ await recordWorkflowWebSourceEvent(config, "source_read", {
544
+ sourceRef,
545
+ status: responseStatus,
546
+ resultCount: results.length,
547
+ visibleChars,
548
+ });
549
+ if (requests.length === 1 && !sourceReadBatchRequested(params)) {
550
+ const result = results[0]!;
551
+ return toolResultFromJson({
552
+ status: result.status,
553
+ tool: "workflow_web_source_read",
554
+ sourceRef,
555
+ url: source.redactedUrl,
556
+ ...(result.query ? { query: result.query } : {}),
557
+ ...(result.claim ? { claim: result.claim } : {}),
558
+ ...(result.terms?.length ? { terms: result.terms } : {}),
559
+ matchType: result.matchType,
560
+ matchedTerms: result.matchedTerms,
561
+ missingTerms: result.missingTerms,
562
+ coverageRatio: result.coverageRatio,
563
+ candidateOnly: result.candidateOnly,
564
+ quote: result.status === "budget_exhausted" ? undefined : result.quote,
565
+ startOffset: result.startOffset,
566
+ endOffset: result.endOffset,
567
+ budget: budgetSnapshot(result.status === "budget_exhausted"),
568
+ next:
569
+ result.status === "budget_exhausted"
570
+ ? "Visible web-source budget is exhausted for this task; cite the sourceRef as an evidence gap or use a smaller query in a fresh task."
571
+ : undefined,
572
+ });
573
+ }
574
+ return toolResultFromJson({
575
+ status: responseStatus,
576
+ tool: "workflow_web_source_read",
577
+ sourceRef,
578
+ url: source.redactedUrl,
579
+ results,
580
+ budget: budgetSnapshot(results.some((result) => result.status === "budget_exhausted")),
581
+ next:
582
+ responseStatus === "budget_exhausted"
583
+ ? "Visible web-source budget is exhausted for this task; cite missing quotes as evidence gaps or use smaller query batches in a fresh task."
584
+ : undefined,
585
+ });
586
+ },
587
+ });
588
+
589
+ async function readCachedWorkflowWebSource(sourceRef: string): Promise<WorkflowWebSource | undefined> {
590
+ const cached = sourceCache.get(sourceRef);
591
+ if (cached) return cached;
592
+ const source = await readWorkflowWebSource(config, sourceRef);
593
+ if (source) sourceCache.set(sourceRef, source);
594
+ return source;
595
+ }
596
+
597
+ function consumeText(text: string, maxChars: number) {
598
+ const remainingBefore = Math.max(0, budget.limit - budget.used);
599
+ const allowed = Math.max(0, Math.min(maxChars, remainingBefore));
600
+ const visible = text.slice(0, allowed);
601
+ budget.used += visible.length;
602
+ return { text: visible, budget: budgetSnapshot(text.length > allowed) };
603
+ }
604
+
605
+ function budgetSnapshot(truncated = false) {
606
+ return {
607
+ limit: budget.limit,
608
+ used: budget.used,
609
+ remaining: Math.max(0, budget.limit - budget.used),
610
+ truncated,
611
+ };
612
+ }
613
+ }
614
+
615
+ export function buildWorkflowWebSourceExtensionWrapper(
616
+ options: Omit<WorkflowWebSourceExtensionWrapperOptions, "wrapperPath">,
617
+ ): string {
618
+ const providerImport = options.providerExtensionPath
619
+ ? `import providerExtension from ${JSON.stringify(extensionImportSpecifier(options.providerExtensionPath))};`
620
+ : "const providerExtension = undefined;";
621
+ return [
622
+ `import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";`,
623
+ providerImport,
624
+ `import { registerWorkflowWebSourceExtension } from ${JSON.stringify(extensionImportSpecifier(options.importPath))};`,
625
+ "",
626
+ "export default function workflowWebSourceGeneratedExtension(pi: ExtensionAPI): void {",
627
+ ` registerWorkflowWebSourceExtension(pi as any, ${JSON.stringify(options.config, null, "\t").replace(/\n/g, "\n\t")}, providerExtension as any);`,
628
+ "}",
629
+ "",
630
+ ].join("\n");
631
+ }
632
+
633
+ export async function writeWorkflowWebSourceExtensionWrapper(
634
+ options: WorkflowWebSourceExtensionWrapperOptions,
635
+ ): Promise<string> {
636
+ const wrapperPath = resolve(options.wrapperPath);
637
+ await mkdir(dirname(wrapperPath), { recursive: true });
638
+ const content = buildWorkflowWebSourceExtensionWrapper({
639
+ importPath: options.importPath,
640
+ providerExtensionPath: options.providerExtensionPath,
641
+ config: options.config,
642
+ });
643
+ await writeFile(wrapperPath, content, "utf8");
644
+ return wrapperPath;
645
+ }
646
+
647
+ async function sleep(ms: number): Promise<void> {
648
+ await new Promise((resolveSleep) => setTimeout(resolveSleep, ms));
649
+ }
650
+
651
+ function missingProviderStatus(
652
+ providerTools: CapturedProviderTools,
653
+ capability: "search" | "fetch",
654
+ ): { code: "no_web_provider" | "missing_web_capability"; message: string } {
655
+ if (providerTools.size === 0) {
656
+ return {
657
+ code: "no_web_provider",
658
+ message:
659
+ "No workflow web provider is configured. Configure a web provider extension or use a workflow without web tools.",
660
+ };
661
+ }
662
+ return {
663
+ code: "missing_web_capability",
664
+ message: `The configured workflow web provider does not expose ${capability} capability. Configure a provider with that capability or report the evidence gap.`,
665
+ };
666
+ }
667
+
668
+ async function cachedFetchFailureResult(
669
+ config: WorkflowWebSourceCacheConfig,
670
+ cache: Map<string, FetchFailure>,
671
+ key: string,
672
+ failure: { code: string; message: string; extra: Record<string, unknown>; reason: string },
673
+ ): Promise<ReturnType<typeof toolResultFromJson>> {
674
+ const cached = {
675
+ code: failure.code,
676
+ message: failure.message,
677
+ extra: failure.extra,
678
+ reason: failure.reason,
679
+ createdAt: new Date().toISOString(),
680
+ };
681
+ if (shouldCacheFetchFailure(failure.reason)) {
682
+ cache.set(key, cached);
683
+ await writeDurableFetchFailure(config, key, cached);
684
+ } else if (shouldCacheFetchFailureInMemory(failure.reason)) {
685
+ cache.set(key, cached);
686
+ }
687
+ return errorToolResult(failure.code, failure.message, failure.extra);
688
+ }
689
+
690
+ const FETCH_LOCK_STALE_MS = 60_000;
691
+ const FETCH_LOCK_WAIT_MS = 75_000;
692
+
693
+ async function withWorkflowWebFetchLock<T>(
694
+ config: WorkflowWebSourceCacheConfig,
695
+ key: string,
696
+ signal: AbortSignal | undefined,
697
+ fn: () => Promise<T>,
698
+ ): Promise<T> {
699
+ const release = await acquireWorkflowWebFetchLock(config, key, signal);
700
+ try {
701
+ return await fn();
702
+ } finally {
703
+ await release();
704
+ }
705
+ }
706
+
707
+ async function acquireWorkflowWebFetchLock(
708
+ config: WorkflowWebSourceCacheConfig,
709
+ key: string,
710
+ signal?: AbortSignal,
711
+ ): Promise<() => Promise<void>> {
712
+ const lockDir = fetchLockPath(config, key);
713
+ await mkdir(dirname(lockDir), { recursive: true });
714
+ const started = Date.now();
715
+ for (;;) {
716
+ if (signal?.aborted) throw new Error("aborted");
717
+ try {
718
+ await mkdir(lockDir);
719
+ await writeFile(
720
+ resolve(lockDir, "owner.json"),
721
+ `${JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString(), key }, null, 2)}\n`,
722
+ "utf8",
723
+ );
724
+ return async () => {
725
+ await rm(lockDir, { recursive: true, force: true });
726
+ };
727
+ } catch (error) {
728
+ if (!isFileExistsError(error)) throw error;
729
+ await removeStaleFetchLock(lockDir);
730
+ if (Date.now() - started > FETCH_LOCK_WAIT_MS) {
731
+ throw new Error("fetch_lock_timeout");
732
+ }
733
+ await sleep(100);
734
+ }
735
+ }
736
+ }
737
+
738
+ async function removeStaleFetchLock(lockDir: string): Promise<void> {
739
+ try {
740
+ const current = await stat(lockDir);
741
+ if (Date.now() - current.mtimeMs > FETCH_LOCK_STALE_MS) {
742
+ await rm(lockDir, { recursive: true, force: true });
743
+ }
744
+ } catch {
745
+ // Missing or unreadable lock will be retried by the caller.
746
+ }
747
+ }
748
+
749
+ async function readDurableFetchFailure(
750
+ config: WorkflowWebSourceCacheConfig,
751
+ key: string,
752
+ ): Promise<FetchFailure | undefined> {
753
+ try {
754
+ const parsed = JSON.parse(await readFile(fetchFailurePath(config, key), "utf8")) as unknown;
755
+ return normalizeFetchFailure(parsed);
756
+ } catch {
757
+ return undefined;
758
+ }
759
+ }
760
+
761
+ async function writeDurableFetchFailure(
762
+ config: WorkflowWebSourceCacheConfig,
763
+ key: string,
764
+ failure: FetchFailure,
765
+ ): Promise<void> {
766
+ await mkdir(dirname(fetchFailurePath(config, key)), { recursive: true });
767
+ await writeFile(
768
+ fetchFailurePath(config, key),
769
+ `${JSON.stringify({ schema: "workflow-web-source-fetch-failure-v1", ...failure }, null, 2)}\n`,
770
+ "utf8",
771
+ );
772
+ }
773
+
774
+ function normalizeFetchFailure(value: unknown): FetchFailure | undefined {
775
+ if (!isRecord(value)) return undefined;
776
+ if (typeof value.code !== "string" || typeof value.message !== "string") return undefined;
777
+ const extra = isRecord(value.extra) ? value.extra : {};
778
+ return {
779
+ code: value.code,
780
+ message: value.message,
781
+ extra,
782
+ ...(typeof value.reason === "string" ? { reason: value.reason } : {}),
783
+ ...(typeof value.createdAt === "string" ? { createdAt: value.createdAt } : {}),
784
+ };
785
+ }
786
+
787
+ function fetchLockPath(config: WorkflowWebSourceCacheConfig, key: string): string {
788
+ return resolve(config.cacheDir, "fetch-locks", fetchCacheFileKey(key));
789
+ }
790
+
791
+ function fetchFailurePath(config: WorkflowWebSourceCacheConfig, key: string): string {
792
+ return resolve(config.cacheDir, "fetch-negative-cache", `${fetchCacheFileKey(key)}.json`);
793
+ }
794
+
795
+ function fetchCacheFileKey(key: string): string {
796
+ return /^urlkey_[a-f0-9]{32}$/.test(key) ? key : "urlkey_invalid";
797
+ }
798
+
799
+ function isFileExistsError(error: unknown): boolean {
800
+ return isRecord(error) && error.code === "EEXIST";
801
+ }
802
+
803
+ function shouldCacheFetchFailure(reason: string): boolean {
804
+ return (
805
+ reason === "invalid_url" ||
806
+ reason === "unsafe_scheme" ||
807
+ reason === "private_host_blocked" ||
808
+ reason === "non_public_ip_blocked" ||
809
+ reason === "http_404" ||
810
+ reason === "http_410" ||
811
+ reason === "unsupported_content_type"
812
+ );
813
+ }
814
+
815
+ function shouldCacheFetchFailureInMemory(reason: string): boolean {
816
+ return reason === "empty_source" || reason === "dns_resolution_failed" || reason.includes("ENOTFOUND");
817
+ }
818
+
819
+ const WORKFLOW_WEB_FETCH_TIMEOUT_MS = 30_000;
820
+ const WORKFLOW_WEB_FETCH_MAX_CHARS = 1_000_000;
821
+
822
+ async function safeFetchWorkflowWebText(
823
+ url: string,
824
+ security: WorkflowWebSecurityPolicy,
825
+ signal?: AbortSignal,
826
+ ): Promise<
827
+ | { ok: true; url: string; text: string; title?: string; extractionLossy?: boolean }
828
+ | { ok: false; reason: string; url: string }
829
+ > {
830
+ let current = url;
831
+ for (let redirectCount = 0; redirectCount < 6; redirectCount += 1) {
832
+ const checked = validateWorkflowWebUrl(current, security);
833
+ if (!checked.ok) return { ok: false, reason: checked.reason, url: current };
834
+ const response = await safeFetchOnce(checked.normalizedUrl, security, signal);
835
+ if (!response.ok) return response;
836
+ if (response.status >= 300 && response.status < 400) {
837
+ if (!response.location)
838
+ return { ok: false, reason: "redirect_without_location", url: checked.normalizedUrl };
839
+ current = new URL(response.location, checked.normalizedUrl).href;
840
+ continue;
841
+ }
842
+ if (response.status < 200 || response.status >= 300) {
843
+ return { ok: false, reason: `http_${response.status}`, url: checked.normalizedUrl };
844
+ }
845
+ const extracted = extractWorkflowWebResponseText(response.text, response.contentType);
846
+ return {
847
+ ok: true,
848
+ url: checked.normalizedUrl,
849
+ text: extracted.text,
850
+ title: extracted.title,
851
+ extractionLossy: extracted.lossy || response.truncated,
852
+ };
853
+ }
854
+ return { ok: false, reason: "too_many_redirects", url: current };
855
+ }
856
+
857
+ function safeFetchOnce(
858
+ url: string,
859
+ security: WorkflowWebSecurityPolicy,
860
+ signal?: AbortSignal,
861
+ ): Promise<
862
+ | { ok: true; status: number; location?: string; text: string; contentType?: string; truncated?: boolean }
863
+ | { ok: false; reason: string; url: string }
864
+ > {
865
+ const parsed = new URL(url);
866
+ const request = parsed.protocol === "https:" ? httpsRequest : httpRequest;
867
+ return new Promise((resolveResult) => {
868
+ let settled = false;
869
+ const settle = (
870
+ result:
871
+ | { ok: true; status: number; location?: string; text: string; contentType?: string; truncated?: boolean }
872
+ | { ok: false; reason: string; url: string },
873
+ ) => {
874
+ if (settled) return;
875
+ settled = true;
876
+ resolveResult(result);
877
+ };
878
+ const req = request(
879
+ parsed,
880
+ {
881
+ method: "GET",
882
+ headers: {
883
+ accept: "text/plain,text/html,application/json,application/xml;q=0.9,*/*;q=0.1",
884
+ "user-agent": "pi-workflow-web-source/1",
885
+ },
886
+ lookup(hostname, options, callback) {
887
+ lookupPublicAddress(hostname, security)
888
+ .then((address) => {
889
+ if (isLookupAllOptions(options)) {
890
+ callback(null, [{ address: address.address, family: address.family }]);
891
+ return;
892
+ }
893
+ callback(null, address.address, address.family);
894
+ })
895
+ .catch((error: unknown) => {
896
+ const reason = error instanceof Error ? error.message : "dns_resolution_failed";
897
+ callback(new Error(reason), "", 4);
898
+ });
899
+ },
900
+ },
901
+ (res) => {
902
+ res.setEncoding("utf8");
903
+ let text = "";
904
+ let truncated = false;
905
+ const contentType = Array.isArray(res.headers["content-type"])
906
+ ? res.headers["content-type"][0]
907
+ : res.headers["content-type"];
908
+ const status = res.statusCode ?? 0;
909
+ if (status >= 200 && status < 300 && contentType && !isWorkflowWebTextContentType(contentType)) {
910
+ res.resume();
911
+ settle({ ok: false, reason: "unsupported_content_type", url });
912
+ return;
913
+ }
914
+ res.on("data", (chunk: string) => {
915
+ if (settled) return;
916
+ if (text.length + chunk.length > WORKFLOW_WEB_FETCH_MAX_CHARS) {
917
+ text += chunk.slice(0, Math.max(0, WORKFLOW_WEB_FETCH_MAX_CHARS - text.length));
918
+ truncated = true;
919
+ req.destroy(new Error("workflow_fetch_truncated"));
920
+ return;
921
+ }
922
+ text += chunk;
923
+ });
924
+ res.on("end", () => {
925
+ const location = Array.isArray(res.headers.location)
926
+ ? res.headers.location[0]
927
+ : res.headers.location;
928
+ settle({
929
+ ok: true,
930
+ status,
931
+ ...(location ? { location } : {}),
932
+ ...(contentType ? { contentType } : {}),
933
+ ...(truncated ? { truncated } : {}),
934
+ text,
935
+ });
936
+ });
937
+ res.on("close", () => {
938
+ if (!truncated) return;
939
+ settle({
940
+ ok: true,
941
+ status,
942
+ ...(contentType ? { contentType } : {}),
943
+ truncated,
944
+ text,
945
+ });
946
+ });
947
+ },
948
+ );
949
+ req.setTimeout(WORKFLOW_WEB_FETCH_TIMEOUT_MS, () => {
950
+ req.destroy(new Error("fetch_timeout"));
951
+ });
952
+ req.on("error", (error: Error) => {
953
+ if (error.message === "workflow_fetch_truncated") return;
954
+ settle({ ok: false, reason: error.message || "url_fetch_failed", url });
955
+ });
956
+ if (signal) {
957
+ signal.addEventListener(
958
+ "abort",
959
+ () => {
960
+ req.destroy(new Error("aborted"));
961
+ },
962
+ { once: true },
963
+ );
964
+ }
965
+ req.end();
966
+ });
967
+ }
968
+
969
+ async function lookupPublicAddress(
970
+ hostname: string,
971
+ security: WorkflowWebSecurityPolicy,
972
+ ): Promise<{ address: string; family: number }> {
973
+ const addresses = await lookup(hostname, { all: true, verbatim: true });
974
+ for (const address of addresses) {
975
+ const reason = security.allowPrivateHosts
976
+ ? undefined
977
+ : privateIpReason(address.address);
978
+ if (!reason) return address;
979
+ }
980
+ throw new Error(addresses.length > 0 ? "private_host_blocked" : "dns_resolution_failed");
981
+ }
982
+
983
+ async function validateResolvedHost(
984
+ url: string,
985
+ security: WorkflowWebSecurityPolicy,
986
+ ): Promise<{ ok: true } | { ok: false; reason: string; url: string }> {
987
+ if (security.allowPrivateHosts) return { ok: true };
988
+ let parsed: URL;
989
+ try {
990
+ parsed = new URL(url);
991
+ } catch {
992
+ return { ok: false, reason: "invalid_url", url };
993
+ }
994
+ try {
995
+ const addresses = await lookup(parsed.hostname, { all: true, verbatim: true });
996
+ for (const address of addresses) {
997
+ const reason = privateIpReason(address.address);
998
+ if (reason) return { ok: false, reason, url };
999
+ }
1000
+ return { ok: true };
1001
+ } catch {
1002
+ return { ok: false, reason: "dns_resolution_failed", url };
1003
+ }
1004
+ }
1005
+
1006
+ function isLookupAllOptions(options: unknown): boolean {
1007
+ return isRecord(options) && options.all === true;
1008
+ }
1009
+
1010
+ function privateIpReason(address: string): string | undefined {
1011
+ const lower = address.toLowerCase().replace(/^\[|\]$/g, "");
1012
+ const mappedIpv4 = lower.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/)?.[1];
1013
+ if (mappedIpv4) return privateIpReason(mappedIpv4);
1014
+ const hexMapped = lower.match(/^::ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/);
1015
+ if (hexMapped) {
1016
+ const high = Number.parseInt(hexMapped[1]!, 16);
1017
+ const low = Number.parseInt(hexMapped[2]!, 16);
1018
+ return privateIpReason(`${high >> 8}.${high & 255}.${low >> 8}.${low & 255}`);
1019
+ }
1020
+ if (isIP(lower) === 4) {
1021
+ const parts = lower.split(".").map((part) => Number(part));
1022
+ if (parts.length !== 4 || parts.some((part) => !Number.isInteger(part) || part < 0 || part > 255)) return "private_host_blocked";
1023
+ const [a, b, c, d] = parts as [number, number, number, number];
1024
+ if (a === 0 || a === 10 || a === 127 || a >= 224) return "private_host_blocked";
1025
+ if (a === 100 && b >= 64 && b <= 127) return "private_host_blocked";
1026
+ if (a === 169 && b === 254) return "private_host_blocked";
1027
+ if (a === 172 && b >= 16 && b <= 31) return "private_host_blocked";
1028
+ if (a === 192 && b === 168) return "private_host_blocked";
1029
+ if (a === 192 && b === 0 && (c === 0 || c === 2)) return "private_host_blocked";
1030
+ if (a === 198 && (b === 18 || b === 19)) return "private_host_blocked";
1031
+ if (a === 198 && b === 51 && c === 100) return "private_host_blocked";
1032
+ if (a === 203 && b === 0 && c === 113) return "private_host_blocked";
1033
+ if (a === 255 && b === 255 && c === 255 && d === 255) return "private_host_blocked";
1034
+ }
1035
+ if (isIP(lower) === 6) {
1036
+ if (lower === "::" || lower === "::1") return "private_host_blocked";
1037
+ if (lower.startsWith("fc") || lower.startsWith("fd")) return "private_host_blocked";
1038
+ if (lower.startsWith("fe80") || lower.startsWith("ff")) return "private_host_blocked";
1039
+ if (lower.startsWith("2001:db8")) return "private_host_blocked";
1040
+ }
1041
+ return undefined;
1042
+ }
1043
+
1044
+ async function validateProviderResultUrls(
1045
+ result: unknown,
1046
+ security: WorkflowWebSecurityPolicy,
1047
+ ): Promise<{ ok: true } | { ok: false; reason: string; url: string }> {
1048
+ for (const url of providerResultUrls(result)) {
1049
+ const checked = validateWorkflowWebUrl(url, security);
1050
+ if (!checked.ok) return { ok: false, reason: checked.reason, url };
1051
+ const resolved = await validateResolvedHost(checked.normalizedUrl, security);
1052
+ if (!resolved.ok) return resolved;
1053
+ }
1054
+ if (!security.allowPrivateHosts) {
1055
+ for (const address of providerResolvedIps(result)) {
1056
+ const reason = privateIpReason(address);
1057
+ if (reason) return { ok: false, reason, url: address };
1058
+ }
1059
+ }
1060
+ return { ok: true };
1061
+ }
1062
+
1063
+ function providerResultUrls(result: unknown): string[] {
1064
+ if (!isRecord(result)) return [];
1065
+ const details = result.details;
1066
+ if (!isRecord(details)) return [];
1067
+ const urls: string[] = [];
1068
+ for (const key of ["finalUrl", "resolvedUrl", "effectiveUrl", "url"]) {
1069
+ const value = details[key];
1070
+ if (typeof value === "string") urls.push(value);
1071
+ }
1072
+ const detailsUrls = details.urls;
1073
+ if (Array.isArray(detailsUrls)) {
1074
+ for (const item of detailsUrls) {
1075
+ if (typeof item === "string") urls.push(item);
1076
+ if (isRecord(item)) {
1077
+ for (const key of ["finalUrl", "resolvedUrl", "effectiveUrl", "url"]) {
1078
+ const value = item[key];
1079
+ if (typeof value === "string") urls.push(value);
1080
+ }
1081
+ }
1082
+ }
1083
+ }
1084
+ return [...new Set(urls)];
1085
+ }
1086
+
1087
+ function providerResolvedIps(result: unknown): string[] {
1088
+ if (!isRecord(result)) return [];
1089
+ const details = result.details;
1090
+ if (!isRecord(details)) return [];
1091
+ const values: string[] = [];
1092
+ for (const key of ["resolvedIp", "ip", "address"]) {
1093
+ const value = details[key];
1094
+ if (typeof value === "string") values.push(value);
1095
+ }
1096
+ const resolvedIps = details.resolvedIps;
1097
+ if (Array.isArray(resolvedIps)) {
1098
+ for (const value of resolvedIps) {
1099
+ if (typeof value === "string") values.push(value);
1100
+ }
1101
+ }
1102
+ return [...new Set(values)];
1103
+ }
1104
+
1105
+ function providerCapturePi(
1106
+ pi: PiLike,
1107
+ providerTools: CapturedProviderTools,
1108
+ exposeLegacyTools: boolean,
1109
+ ): PiLike {
1110
+ return new Proxy(pi, {
1111
+ get(target, property, receiver) {
1112
+ if (property === "registerTool") {
1113
+ return (tool: ToolSpec) => {
1114
+ if (tool.name && PROVIDER_TOOL_NAMES.has(tool.name)) {
1115
+ providerTools.set(tool.name, tool);
1116
+ if (!exposeLegacyTools) return;
1117
+ }
1118
+ target.registerTool(tool);
1119
+ };
1120
+ }
1121
+ if (property === "appendEntry" || property === "sendMessage") {
1122
+ return () => undefined;
1123
+ }
1124
+ const value = Reflect.get(target, property, receiver);
1125
+ return typeof value === "function" ? value.bind(target) : value;
1126
+ },
1127
+ }) as PiLike;
1128
+ }
1129
+
1130
+ interface WorkflowWebFetchSourceRequest {
1131
+ url: string;
1132
+ title?: string;
1133
+ }
1134
+
1135
+ function fetchSourceBatchRequested(params: unknown): boolean {
1136
+ return Boolean(
1137
+ isRecord(params) &&
1138
+ (Array.isArray(params.urls) || Array.isArray(params.sources)),
1139
+ );
1140
+ }
1141
+
1142
+ function canonicalWorkflowWebFetchUrl(url: string): string {
1143
+ let parsed: URL;
1144
+ try {
1145
+ parsed = new URL(url);
1146
+ } catch {
1147
+ return url.trim();
1148
+ }
1149
+ parsed.hostname = parsed.hostname.toLowerCase();
1150
+ if (!shouldKeepWorkflowWebFragment(parsed.hash)) parsed.hash = "";
1151
+ if (parsed.pathname.length > 1 && parsed.pathname.endsWith("/")) {
1152
+ parsed.pathname = parsed.pathname.slice(0, -1);
1153
+ }
1154
+ const sortedParams = [...parsed.searchParams.entries()].sort(([left], [right]) =>
1155
+ left.localeCompare(right),
1156
+ );
1157
+ parsed.search = "";
1158
+ for (const [key, value] of sortedParams) parsed.searchParams.append(key, value);
1159
+ return parsed.href;
1160
+ }
1161
+
1162
+ function shouldKeepWorkflowWebFragment(hash: string): boolean {
1163
+ if (!hash) return false;
1164
+ const raw = hash.startsWith("#") ? hash.slice(1) : hash;
1165
+ return raw.startsWith("/") || raw.startsWith("!") || raw.includes("?");
1166
+ }
1167
+
1168
+ function fetchSourceRequestsFromParams(params: unknown): WorkflowWebFetchSourceRequest[] {
1169
+ if (!isRecord(params)) return [];
1170
+ const requests: WorkflowWebFetchSourceRequest[] = [];
1171
+ const titles = Array.isArray(params.titles) ? params.titles : [];
1172
+ if (Array.isArray(params.sources)) {
1173
+ for (const source of params.sources) {
1174
+ if (!isRecord(source) || typeof source.url !== "string" || !source.url.trim()) continue;
1175
+ requests.push({
1176
+ url: source.url.trim(),
1177
+ ...(typeof source.title === "string" && source.title.trim()
1178
+ ? { title: source.title.trim() }
1179
+ : {}),
1180
+ });
1181
+ }
1182
+ }
1183
+ if (Array.isArray(params.urls)) {
1184
+ for (const [index, url] of params.urls.entries()) {
1185
+ if (typeof url !== "string" || !url.trim()) continue;
1186
+ const title = titles[index];
1187
+ requests.push({
1188
+ url: url.trim(),
1189
+ ...(typeof title === "string" && title.trim() ? { title: title.trim() } : {}),
1190
+ });
1191
+ }
1192
+ }
1193
+ if (typeof params.url === "string" && params.url.trim()) {
1194
+ requests.push({
1195
+ url: params.url.trim(),
1196
+ ...(typeof params.title === "string" && params.title.trim()
1197
+ ? { title: params.title.trim() }
1198
+ : {}),
1199
+ });
1200
+ }
1201
+ return dedupeFetchSourceRequests(requests).slice(0, 20);
1202
+ }
1203
+
1204
+ function dedupeFetchSourceRequests(requests: WorkflowWebFetchSourceRequest[]): WorkflowWebFetchSourceRequest[] {
1205
+ const deduped: WorkflowWebFetchSourceRequest[] = [];
1206
+ const seen = new Set<string>();
1207
+ for (const request of requests) {
1208
+ const key = sourceUrlCacheKey(request.url);
1209
+ if (seen.has(key)) continue;
1210
+ seen.add(key);
1211
+ deduped.push(request);
1212
+ }
1213
+ return deduped;
1214
+ }
1215
+
1216
+ function payloadFromToolResult(result: ToolResult): Record<string, unknown> {
1217
+ const text = result.content?.find((item) => typeof item.text === "string")?.text;
1218
+ if (typeof text !== "string") return {};
1219
+ try {
1220
+ const payload = JSON.parse(text);
1221
+ return isRecord(payload) ? payload : {};
1222
+ } catch {
1223
+ return {};
1224
+ }
1225
+ }
1226
+
1227
+ function urlFromParams(params: unknown): string | undefined {
1228
+ if (!isRecord(params)) return undefined;
1229
+ if (typeof params.url === "string") return params.url;
1230
+ if (Array.isArray(params.urls)) {
1231
+ return params.urls.find((item): item is string => typeof item === "string");
1232
+ }
1233
+ return undefined;
1234
+ }
1235
+
1236
+ function titleFromParams(params: unknown): string | undefined {
1237
+ return stringParam(params, "title");
1238
+ }
1239
+
1240
+ function sourceReadRequestsFromParams(params: unknown): WorkflowWebSourceReadRequest[] {
1241
+ const requests: WorkflowWebSourceReadRequest[] = [];
1242
+ if (isRecord(params) && Array.isArray(params.reads)) {
1243
+ for (const item of params.reads) {
1244
+ const request = sourceReadRequestFromRecord(item);
1245
+ if (request) requests.push(request);
1246
+ }
1247
+ }
1248
+ for (const query of stringArrayParam(params, "queries")) requests.push({ query });
1249
+ for (const query of stringArrayParam(params, "exactTexts")) requests.push({ query });
1250
+ for (const query of stringArrayParam(params, "texts")) requests.push({ query });
1251
+ const query =
1252
+ stringParam(params, "query") ??
1253
+ stringParam(params, "exactText") ??
1254
+ stringParam(params, "exact") ??
1255
+ stringParam(params, "text");
1256
+ const claim = stringParam(params, "claim");
1257
+ const terms = stringArrayParam(params, "terms");
1258
+ if (query || claim || terms.length > 0) requests.push({ query, claim, terms });
1259
+ return dedupeSourceReadRequests(requests).slice(0, 20);
1260
+ }
1261
+
1262
+ function sourceReadRequestFromRecord(value: unknown): WorkflowWebSourceReadRequest | undefined {
1263
+ if (!isRecord(value)) return undefined;
1264
+ const query =
1265
+ stringParam(value, "query") ??
1266
+ stringParam(value, "exactText") ??
1267
+ stringParam(value, "exact") ??
1268
+ stringParam(value, "text");
1269
+ const claim = stringParam(value, "claim");
1270
+ const terms = stringArrayParam(value, "terms");
1271
+ const maxChars = positiveIntParam(value, "maxChars");
1272
+ if (!query && !claim && terms.length === 0) return undefined;
1273
+ return { query, claim, terms, maxChars };
1274
+ }
1275
+
1276
+ function dedupeSourceReadRequests(requests: WorkflowWebSourceReadRequest[]): WorkflowWebSourceReadRequest[] {
1277
+ const deduped: WorkflowWebSourceReadRequest[] = [];
1278
+ const seen = new Set<string>();
1279
+ for (const request of requests) {
1280
+ const key = JSON.stringify({
1281
+ query: request.query?.toLowerCase(),
1282
+ claim: request.claim?.toLowerCase(),
1283
+ terms: request.terms?.map((term) => term.toLowerCase()).sort(),
1284
+ maxChars: request.maxChars,
1285
+ });
1286
+ if (seen.has(key)) continue;
1287
+ seen.add(key);
1288
+ deduped.push(request);
1289
+ }
1290
+ return deduped;
1291
+ }
1292
+
1293
+ function sourceReadBatchRequested(params: unknown): boolean {
1294
+ return (
1295
+ (isRecord(params) && Array.isArray(params.reads) && params.reads.length > 0) ||
1296
+ stringArrayParam(params, "queries").length > 0 ||
1297
+ stringArrayParam(params, "exactTexts").length > 0 ||
1298
+ stringArrayParam(params, "texts").length > 0
1299
+ );
1300
+ }
1301
+
1302
+ type SourceReadToolStatus = "ok" | "candidate" | "budget_exhausted" | "not_found";
1303
+
1304
+ function sourceReadResponseStatus(
1305
+ read: WorkflowWebSourceReadResult,
1306
+ ): SourceReadToolStatus {
1307
+ if (read.status === "matched" && !read.quote) return "budget_exhausted";
1308
+ if (read.status === "matched" && read.candidateOnly) return "candidate";
1309
+ if (read.status === "matched") return "ok";
1310
+ return "not_found";
1311
+ }
1312
+
1313
+ function aggregateSourceReadStatus(
1314
+ statuses: SourceReadToolStatus[],
1315
+ ): "ok" | "candidate" | "partial" | "budget_exhausted" | "not_found" {
1316
+ if (statuses.every((status) => status === "ok")) return "ok";
1317
+ if (statuses.every((status) => status === "candidate")) return "candidate";
1318
+ if (statuses.every((status) => status === "not_found")) return "not_found";
1319
+ if (statuses.every((status) => status === "budget_exhausted")) return "budget_exhausted";
1320
+ return "partial";
1321
+ }
1322
+
1323
+ function stringArrayParam(params: unknown, key: string): string[] {
1324
+ if (!isRecord(params)) return [];
1325
+ const value = params[key];
1326
+ if (!Array.isArray(value)) return [];
1327
+ return value
1328
+ .filter((item): item is string => typeof item === "string")
1329
+ .map((item) => item.trim())
1330
+ .filter(Boolean);
1331
+ }
1332
+
1333
+ function stringParam(params: unknown, key: string): string | undefined {
1334
+ if (!isRecord(params)) return undefined;
1335
+ const value = params[key];
1336
+ return typeof value === "string" && value.trim() ? value.trim() : undefined;
1337
+ }
1338
+
1339
+ function positiveIntParam(params: unknown, key: string): number | undefined {
1340
+ if (!isRecord(params)) return undefined;
1341
+ const value = params[key];
1342
+ return Number.isInteger(value) && (value as number) > 0
1343
+ ? (value as number)
1344
+ : undefined;
1345
+ }
1346
+
1347
+ function isWorkflowWebTextContentType(contentType: string): boolean {
1348
+ return /^(text\/|application\/(json|xml|xhtml\+xml|ld\+json)|[^;]+\+json\b|[^;]+\+xml\b)/i.test(contentType.trim());
1349
+ }
1350
+
1351
+ function extractWorkflowWebResponseText(
1352
+ text: string,
1353
+ contentType?: string,
1354
+ ): { text: string; title?: string; lossy?: boolean } {
1355
+ const looksHtml = /html/i.test(contentType ?? "") || /<html[\s>]|<body[\s>]|<title[\s>]/i.test(text);
1356
+ if (!looksHtml) {
1357
+ return { text, title: titleFromPlainText(text) };
1358
+ }
1359
+ const title = decodeHtmlEntities(text.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1]?.trim() ?? "").slice(0, 200) || undefined;
1360
+ const body = text
1361
+ .replace(/<script\b[\s\S]*?<\/script>/gi, " ")
1362
+ .replace(/<style\b[\s\S]*?<\/style>/gi, " ")
1363
+ .replace(/<noscript\b[\s\S]*?<\/noscript>/gi, " ")
1364
+ .replace(/<svg\b[\s\S]*?<\/svg>/gi, " ")
1365
+ .replace(/<[^>]+>/g, " ");
1366
+ return {
1367
+ text: decodeHtmlEntities(body).replace(/\s+/g, " ").trim(),
1368
+ title,
1369
+ lossy: true,
1370
+ };
1371
+ }
1372
+
1373
+ function titleFromPlainText(text: string): string | undefined {
1374
+ const markdownTitle = text.match(/^#\s+(.+)$/m)?.[1]?.trim();
1375
+ return markdownTitle ? markdownTitle.slice(0, 200) : undefined;
1376
+ }
1377
+
1378
+ function decodeHtmlEntities(value: string): string {
1379
+ return value
1380
+ .replace(/&nbsp;/gi, " ")
1381
+ .replace(/&amp;/gi, "&")
1382
+ .replace(/&lt;/gi, "<")
1383
+ .replace(/&gt;/gi, ">")
1384
+ .replace(/&quot;/gi, '"')
1385
+ .replace(/&#39;|&apos;/gi, "'")
1386
+ .replace(/&#(\d+);/g, (_match, code) => {
1387
+ const value = Number(code);
1388
+ return isValidCodePoint(value) ? String.fromCodePoint(value) : "";
1389
+ })
1390
+ .replace(/&#x([0-9a-f]+);/gi, (_match, code) => {
1391
+ const value = Number.parseInt(code, 16);
1392
+ return isValidCodePoint(value) ? String.fromCodePoint(value) : "";
1393
+ });
1394
+ }
1395
+
1396
+ function isValidCodePoint(value: number): boolean {
1397
+ return Number.isInteger(value) && value >= 0 && value <= 0x10ffff;
1398
+ }
1399
+
1400
+ function isRecord(value: unknown): value is Record<string, unknown> {
1401
+ return typeof value === "object" && value !== null && !Array.isArray(value);
1402
+ }
1403
+
1404
+ function extensionImportSpecifier(importPath: string): string {
1405
+ if (isAbsolute(importPath)) return pathToFileURL(resolve(importPath)).href;
1406
+ return importPath;
1407
+ }
1408
+
1409
+ export function workflowWebSourceModuleImportPath(modulePath: string): string {
1410
+ return resolve(dirname(modulePath), `workflow-web-source-extension${extname(modulePath)}`);
1411
+ }