@agwab/pi-workflow 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/README.md +3 -1
  2. package/dist/artifact-graph-runtime.d.ts +1 -1
  3. package/dist/artifact-graph-runtime.js +10 -5
  4. package/dist/artifact-graph-schema.js +127 -5
  5. package/dist/compiler.js +52 -19
  6. package/dist/dynamic-generated-task-runtime.js +3 -1
  7. package/dist/dynamic-profiles.d.ts +1 -1
  8. package/dist/engine-run-graph.d.ts +3 -0
  9. package/dist/engine-run-graph.js +194 -4
  10. package/dist/engine.d.ts +5 -0
  11. package/dist/engine.js +389 -41
  12. package/dist/extension.d.ts +2 -1
  13. package/dist/extension.js +30 -8
  14. package/dist/index.d.ts +11 -3
  15. package/dist/index.js +6 -1
  16. package/dist/prompt-json.d.ts +7 -0
  17. package/dist/prompt-json.js +13 -0
  18. package/dist/roles.d.ts +1 -1
  19. package/dist/roles.js +5 -8
  20. package/dist/store.d.ts +20 -1
  21. package/dist/store.js +139 -35
  22. package/dist/strings.d.ts +11 -0
  23. package/dist/strings.js +24 -0
  24. package/dist/subagent-backend.js +710 -40
  25. package/dist/types.d.ts +107 -1
  26. package/dist/verification-ontology.d.ts +31 -0
  27. package/dist/verification-ontology.js +66 -0
  28. package/dist/workflow-artifact-tool.js +5 -6
  29. package/dist/workflow-artifacts.d.ts +7 -0
  30. package/dist/workflow-artifacts.js +55 -4
  31. package/dist/workflow-fetch-cache-extension.d.ts +1 -0
  32. package/dist/workflow-fetch-cache-extension.js +57 -9
  33. package/dist/workflow-metrics.d.ts +113 -0
  34. package/dist/workflow-metrics.js +272 -0
  35. package/dist/workflow-output-artifacts.js +5 -3
  36. package/dist/workflow-partial-output.d.ts +45 -0
  37. package/dist/workflow-partial-output.js +205 -0
  38. package/dist/workflow-progress-health.js +42 -10
  39. package/dist/workflow-runtime.js +10 -1
  40. package/dist/workflow-view.js +3 -1
  41. package/dist/workflow-web-source-extension.js +194 -52
  42. package/dist/workflow-web-source.d.ts +2 -1
  43. package/dist/workflow-web-source.js +109 -30
  44. package/docs/usage.md +76 -29
  45. package/node_modules/@agwab/pi-subagent/README.md +3 -3
  46. package/node_modules/@agwab/pi-subagent/api.mjs +1 -0
  47. package/node_modules/@agwab/pi-subagent/docs/usage.md +63 -12
  48. package/node_modules/@agwab/pi-subagent/package.json +2 -2
  49. package/node_modules/@agwab/pi-subagent/src/api.ts +54 -1
  50. package/node_modules/@agwab/pi-subagent/src/artifacts/registry.ts +9 -4
  51. package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +8 -0
  52. package/node_modules/@agwab/pi-subagent/src/core/constants.ts +9 -0
  53. package/node_modules/@agwab/pi-subagent/src/core/validation.ts +21 -0
  54. package/node_modules/@agwab/pi-subagent/src/index.ts +1046 -576
  55. package/node_modules/@agwab/pi-subagent/src/orchestrate/async.ts +279 -156
  56. package/node_modules/@agwab/pi-subagent/src/orchestrate/interrupt.ts +165 -89
  57. package/node_modules/@agwab/pi-subagent/src/orchestrate/reconcile.ts +111 -65
  58. package/node_modules/@agwab/pi-subagent/src/orchestrate/run-ref.ts +219 -0
  59. package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +88 -8
  60. package/node_modules/@agwab/pi-subagent/src/orchestrate/status.ts +614 -298
  61. package/node_modules/@agwab/pi-subagent/src/panel.ts +1356 -560
  62. package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +53 -5
  63. package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +13 -6
  64. package/package.json +2 -2
  65. package/skills/workflow-guide/SKILL.md +1 -0
  66. package/src/artifact-graph-runtime.ts +19 -13
  67. package/src/artifact-graph-schema.ts +143 -3
  68. package/src/cli.mjs +52 -0
  69. package/src/compiler.ts +63 -18
  70. package/src/dynamic-generated-task-runtime.ts +3 -1
  71. package/src/dynamic-profiles.ts +1 -1
  72. package/src/engine-run-graph.ts +246 -4
  73. package/src/engine.ts +545 -38
  74. package/src/extension.ts +36 -6
  75. package/src/index.ts +52 -1
  76. package/src/prompt-json.ts +13 -0
  77. package/src/roles.ts +6 -9
  78. package/src/store.ts +194 -42
  79. package/src/strings.ts +38 -0
  80. package/src/subagent-backend.ts +921 -62
  81. package/src/types.ts +116 -2
  82. package/src/verification-ontology.ts +88 -0
  83. package/src/workflow-artifact-tool.ts +5 -7
  84. package/src/workflow-artifacts.ts +83 -3
  85. package/src/workflow-fetch-cache-extension.ts +78 -13
  86. package/src/workflow-metrics.ts +478 -0
  87. package/src/workflow-output-artifacts.ts +5 -3
  88. package/src/workflow-partial-output.ts +299 -0
  89. package/src/workflow-progress-health.ts +47 -15
  90. package/src/workflow-runtime.ts +18 -2
  91. package/src/workflow-view.ts +2 -1
  92. package/src/workflow-web-source-extension.ts +654 -232
  93. package/src/workflow-web-source.ts +153 -39
  94. package/workflows/README.md +7 -25
  95. package/workflows/deep-research/batched-verification.spec.json +253 -0
  96. package/workflows/deep-research/helpers/batch-verification-candidates.mjs +136 -0
  97. package/workflows/deep-research/helpers/claim-evidence-gate.mjs +229 -36
  98. package/workflows/deep-research/helpers/final-audit-packet.mjs +1 -4
  99. package/workflows/deep-research/helpers/normalize-input-packet.mjs +81 -2
  100. package/workflows/deep-research/helpers/render-executive.mjs +40 -26
  101. package/workflows/deep-research/helpers/sanitize-verification-candidates.mjs +89 -15
  102. package/workflows/deep-research/helpers/shadow-select-verification.mjs +229 -0
  103. package/workflows/deep-research/helpers/verification-ontology.mjs +77 -0
  104. package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +3 -3
  105. package/workflows/deep-research/schemas/deep-research-research-questions-control.schema.json +38 -0
  106. package/workflows/deep-research/schemas/deep-research-sanitize-claims-control.schema.json +63 -0
  107. package/workflows/deep-research/schemas/deep-research-verify-claims-batch-control.schema.json +47 -0
  108. package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +13 -3
  109. package/workflows/deep-research/spec.json +32 -12
  110. package/workflows/impact-review/spec.json +3 -3
  111. package/workflows/spec-review/helpers/spec-review-pipeline.mjs +1 -8
  112. package/dist/dynamic-loader.d.ts +0 -25
  113. package/dist/dynamic-loader.js +0 -13
  114. package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stderr +0 -0
  115. package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stdout +0 -13
  116. package/src/dynamic-loader.ts +0 -49
  117. package/workflows/impact-review/schemas/docs-release-impact-control.schema.json +0 -42
  118. package/workflows/impact-review/schemas/security-performance-impact-control.schema.json +0 -42
  119. package/workflows/impact-review/schemas/state-data-impact-control.schema.json +0 -42
@@ -11,7 +11,6 @@ import {
11
11
  buildWorkflowWebSourceCard,
12
12
  createWorkflowWebSource,
13
13
  createWorkflowWebVisibleBudget,
14
- DEFAULT_WORKFLOW_WEB_SECURITY_POLICY,
15
14
  errorToolResult,
16
15
  extractSearchCandidates,
17
16
  extractTextFromToolResult,
@@ -43,7 +42,8 @@ export interface WorkflowWebProviderLaunchConfig {
43
42
  extensionPath?: string;
44
43
  }
45
44
 
46
- export interface WorkflowWebSourceLaunchConfig extends WorkflowWebSourceCacheConfig {
45
+ export interface WorkflowWebSourceLaunchConfig
46
+ extends WorkflowWebSourceCacheConfig {
47
47
  schema: typeof WORKFLOW_WEB_SOURCE_LAUNCH_CONFIG_SCHEMA;
48
48
  workflowName?: string;
49
49
  stageId?: string;
@@ -111,14 +111,21 @@ export function registerWorkflowWebSourceExtension(
111
111
  ): void {
112
112
  const policy = normalizeWorkflowWebSourcePolicy(config.webSourcePolicy);
113
113
  const security = normalizeWorkflowWebSecurityPolicy(config.securityPolicy);
114
- const budget = createWorkflowWebVisibleBudget(policy.perTaskVisibleCharBudget);
114
+ const budget = createWorkflowWebVisibleBudget(
115
+ policy.perTaskVisibleCharBudget,
116
+ );
115
117
  const providerTools: CapturedProviderTools = new Map();
116
118
  const sourceCache: Map<string, WorkflowWebSource> = new Map();
117
- const fetchInFlight: Map<string, Promise<ReturnType<typeof toolResultFromJson>>> = new Map();
119
+ const fetchInFlight: Map<
120
+ string,
121
+ Promise<ReturnType<typeof toolResultFromJson>>
122
+ > = new Map();
118
123
  const fetchFailures: Map<string, FetchFailure> = new Map();
119
124
 
120
125
  if (providerExtension) {
121
- providerExtension(providerCapturePi(pi, providerTools, Boolean(config.exposeLegacyTools)));
126
+ providerExtension(
127
+ providerCapturePi(pi, providerTools, Boolean(config.exposeLegacyTools)),
128
+ );
122
129
  }
123
130
 
124
131
  pi.registerTool({
@@ -126,9 +133,15 @@ export function registerWorkflowWebSourceExtension(
126
133
  description:
127
134
  "Search the web through the workflow web-source provider and return compact candidate cards only.",
128
135
  parameters: Type.Object({
129
- query: Type.Optional(Type.String({ description: "Single search query." })),
130
- queries: Type.Optional(Type.Array(Type.String(), { description: "Multiple search queries." })),
131
- numResults: Type.Optional(Type.Number({ description: "Results per query." })),
136
+ query: Type.Optional(
137
+ Type.String({ description: "Single search query." }),
138
+ ),
139
+ queries: Type.Optional(
140
+ Type.Array(Type.String(), { description: "Multiple search queries." }),
141
+ ),
142
+ numResults: Type.Optional(
143
+ Type.Number({ description: "Results per query." }),
144
+ ),
132
145
  }),
133
146
  execute: async (toolCallId, params, signal, onUpdate, ctx) => {
134
147
  const providerTool = providerTools.get("web_search");
@@ -150,14 +163,19 @@ export function registerWorkflowWebSourceExtension(
150
163
  onUpdate,
151
164
  ctx,
152
165
  );
153
- const candidates = extractSearchCandidates(result, policy).map((candidate) => {
154
- const consumed = consumeText(candidate.snippet, policy.searchSnippetChars);
155
- return {
156
- ...candidate,
157
- snippet: consumed.text,
158
- budget: consumed.budget,
159
- };
160
- });
166
+ const candidates = extractSearchCandidates(result, policy).map(
167
+ (candidate) => {
168
+ const consumed = consumeText(
169
+ candidate.snippet,
170
+ policy.searchSnippetChars,
171
+ );
172
+ return {
173
+ ...candidate,
174
+ snippet: consumed.text,
175
+ budget: consumed.budget,
176
+ };
177
+ },
178
+ );
161
179
  await recordWorkflowWebSourceEvent(config, "search", {
162
180
  candidateCount: candidates.length,
163
181
  visibleChars: budget.used,
@@ -177,14 +195,44 @@ export function registerWorkflowWebSourceExtension(
177
195
  description:
178
196
  "Fetch one or more URLs into the workflow web-source cache and return compact source cards with sourceRefs.",
179
197
  parameters: Type.Object({
180
- url: Type.Optional(Type.String({ description: "Single URL to fetch into the workflow web-source cache." })),
181
- urls: Type.Optional(Type.Array(Type.String(), { description: "Multiple URLs to fetch in one tool call. Prefer this over repeated fetch calls when caching several promising sources." })),
182
- sources: Type.Optional(Type.Array(Type.Object({
183
- url: Type.String({ description: "URL to fetch into the workflow web-source cache." }),
184
- title: Type.Optional(Type.String({ description: "Optional source title override." })),
185
- }), { description: "Multiple URL/title objects to fetch in one tool call." })),
186
- title: Type.Optional(Type.String({ description: "Optional source title override for single-url fetches." })),
187
- titles: Type.Optional(Type.Array(Type.String(), { description: "Optional title overrides paired by index with urls." })),
198
+ url: Type.Optional(
199
+ Type.String({
200
+ description:
201
+ "Single URL to fetch into the workflow web-source cache.",
202
+ }),
203
+ ),
204
+ urls: Type.Optional(
205
+ Type.Array(Type.String(), {
206
+ description:
207
+ "Multiple URLs to fetch in one tool call. Prefer this over repeated fetch calls when caching several promising sources.",
208
+ }),
209
+ ),
210
+ sources: Type.Optional(
211
+ Type.Array(
212
+ Type.Object({
213
+ url: Type.String({
214
+ description: "URL to fetch into the workflow web-source cache.",
215
+ }),
216
+ title: Type.Optional(
217
+ Type.String({ description: "Optional source title override." }),
218
+ ),
219
+ }),
220
+ {
221
+ description:
222
+ "Multiple URL/title objects to fetch in one tool call.",
223
+ },
224
+ ),
225
+ ),
226
+ title: Type.Optional(
227
+ Type.String({
228
+ description: "Optional source title override for single-url fetches.",
229
+ }),
230
+ ),
231
+ titles: Type.Optional(
232
+ Type.Array(Type.String(), {
233
+ description: "Optional title overrides paired by index with urls.",
234
+ }),
235
+ ),
188
236
  }),
189
237
  execute: async (toolCallId, params, signal, onUpdate, ctx) => {
190
238
  const batchRequested = fetchSourceBatchRequested(params);
@@ -212,10 +260,15 @@ export function registerWorkflowWebSourceExtension(
212
260
  results.push({
213
261
  index,
214
262
  url: sanitizeUrlForModel(request.url),
215
- status: typeof payload.status === "string" ? payload.status : "unknown",
263
+ status:
264
+ typeof payload.status === "string" ? payload.status : "unknown",
216
265
  ...(typeof payload.code === "string" ? { code: payload.code } : {}),
217
- ...(typeof payload.message === "string" ? { message: payload.message } : {}),
218
- ...(typeof card?.sourceRef === "string" ? { sourceRef: card.sourceRef } : {}),
266
+ ...(typeof payload.message === "string"
267
+ ? { message: payload.message }
268
+ : {}),
269
+ ...(typeof card?.sourceRef === "string"
270
+ ? { sourceRef: card.sourceRef }
271
+ : {}),
219
272
  ...(card ? { cardIndex: cards.length - 1 } : {}),
220
273
  });
221
274
  }
@@ -239,7 +292,13 @@ export function registerWorkflowWebSourceExtension(
239
292
  next: "Use returned sourceRefs with workflow_web_source_read; batch snippets with reads:[...] or queries:[...] when possible.",
240
293
  });
241
294
  }
242
- return await fetchWorkflowWebSourceOnce(toolCallId, params, signal, onUpdate, ctx);
295
+ return await fetchWorkflowWebSourceOnce(
296
+ toolCallId,
297
+ params,
298
+ signal,
299
+ onUpdate,
300
+ ctx,
301
+ );
243
302
  },
244
303
  });
245
304
 
@@ -250,85 +309,132 @@ export function registerWorkflowWebSourceExtension(
250
309
  onUpdate?: unknown,
251
310
  ctx?: unknown,
252
311
  ): Promise<ToolResult> {
253
- const url = urlFromParams(params);
254
- if (!url) {
255
- return errorToolResult(
256
- "invalid_params",
257
- "workflow_web_fetch_source requires a url string parameter.",
258
- );
259
- }
260
- const checked = validateWorkflowWebUrl(url, security);
261
- if (!checked.ok) {
262
- await recordWorkflowWebSourceEvent(config, "blocked_url", {
263
- url: sanitizeUrlForModel(url),
264
- reason: checked.reason,
265
- });
266
- return errorToolResult("blocked_url", "URL blocked by workflow web-source security policy.", {
312
+ const url = urlFromParams(params);
313
+ if (!url) {
314
+ return errorToolResult(
315
+ "invalid_params",
316
+ "workflow_web_fetch_source requires a url string parameter.",
317
+ );
318
+ }
319
+ const checked = validateWorkflowWebUrl(url, security);
320
+ if (!checked.ok) {
321
+ await recordWorkflowWebSourceEvent(config, "blocked_url", {
322
+ url: sanitizeUrlForModel(url),
323
+ reason: checked.reason,
324
+ });
325
+ return errorToolResult(
326
+ "blocked_url",
327
+ "URL blocked by workflow web-source security policy.",
328
+ {
267
329
  reason: checked.reason,
268
330
  url: sanitizeUrlForModel(url),
269
- });
270
- }
271
- const fetchUrl = canonicalWorkflowWebFetchUrl(checked.normalizedUrl);
272
- const existing = await findWorkflowWebSourceByUrl(config, fetchUrl);
273
- if (existing) {
274
- sourceCache.set(existing.sourceRef, existing);
275
- const card = buildWorkflowWebSourceCard({
276
- source: existing,
277
- policy,
278
- budget,
279
- duplicate: true,
280
- });
281
- await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
282
- sourceRef: existing.sourceRef,
283
- url: existing.redactedUrl,
284
- visibleChars: budget.used,
285
- });
286
- return toolResultFromJson({ status: "ok", tool: "workflow_web_fetch_source", card });
287
- }
288
- const fetchKey = sourceUrlCacheKey(fetchUrl);
289
- const cachedFailure = fetchFailures.get(fetchKey) ?? await readDurableFetchFailure(config, fetchKey);
290
- if (cachedFailure) {
291
- fetchFailures.set(fetchKey, cachedFailure);
292
- await recordWorkflowWebSourceEvent(config, "fetch_negative_cache_hit", {
293
- url: sanitizeUrlForModel(fetchUrl),
294
- code: cachedFailure.code,
295
- });
296
- return errorToolResult(cachedFailure.code, cachedFailure.message, cachedFailure.extra);
297
- }
298
- const inFlight = fetchInFlight.get(fetchKey);
299
- if (inFlight) {
300
- const result = await inFlight;
301
- const source = await findWorkflowWebSourceByUrl(config, fetchUrl);
302
- if (!source) return result;
303
- sourceCache.set(source.sourceRef, source);
304
- const card = buildWorkflowWebSourceCard({ source, policy, budget, duplicate: true });
305
- await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
306
- sourceRef: source.sourceRef,
307
- url: source.redactedUrl,
308
- visibleChars: budget.used,
309
- });
310
- return toolResultFromJson({ status: "ok", tool: "workflow_web_fetch_source", card });
311
- }
312
- const fetchPromise = withWorkflowWebFetchLock(config, fetchKey, signal, async () => {
313
- const lockedExisting = await findWorkflowWebSourceByUrl(config, fetchUrl);
331
+ },
332
+ );
333
+ }
334
+ const fetchUrl = canonicalWorkflowWebFetchUrl(checked.normalizedUrl);
335
+ const existing = await findWorkflowWebSourceByUrl(config, fetchUrl);
336
+ if (existing) {
337
+ sourceCache.set(existing.sourceRef, existing);
338
+ const card = buildWorkflowWebSourceCard({
339
+ source: existing,
340
+ policy,
341
+ budget,
342
+ duplicate: true,
343
+ });
344
+ await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
345
+ sourceRef: existing.sourceRef,
346
+ url: existing.redactedUrl,
347
+ visibleChars: budget.used,
348
+ });
349
+ return toolResultFromJson({
350
+ status: "ok",
351
+ tool: "workflow_web_fetch_source",
352
+ card,
353
+ });
354
+ }
355
+ const fetchKey = sourceUrlCacheKey(fetchUrl);
356
+ const cachedFailure =
357
+ fetchFailures.get(fetchKey) ??
358
+ (await readDurableFetchFailure(config, fetchKey));
359
+ if (cachedFailure) {
360
+ fetchFailures.set(fetchKey, cachedFailure);
361
+ await recordWorkflowWebSourceEvent(config, "fetch_negative_cache_hit", {
362
+ url: sanitizeUrlForModel(fetchUrl),
363
+ code: cachedFailure.code,
364
+ });
365
+ return errorToolResult(
366
+ cachedFailure.code,
367
+ cachedFailure.message,
368
+ cachedFailure.extra,
369
+ );
370
+ }
371
+ const inFlight = fetchInFlight.get(fetchKey);
372
+ if (inFlight) {
373
+ const result = await inFlight;
374
+ const source = await findWorkflowWebSourceByUrl(config, fetchUrl);
375
+ if (!source) return result;
376
+ sourceCache.set(source.sourceRef, source);
377
+ const card = buildWorkflowWebSourceCard({
378
+ source,
379
+ policy,
380
+ budget,
381
+ duplicate: true,
382
+ });
383
+ await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
384
+ sourceRef: source.sourceRef,
385
+ url: source.redactedUrl,
386
+ visibleChars: budget.used,
387
+ });
388
+ return toolResultFromJson({
389
+ status: "ok",
390
+ tool: "workflow_web_fetch_source",
391
+ card,
392
+ });
393
+ }
394
+ const fetchPromise = withWorkflowWebFetchLock(
395
+ config,
396
+ fetchKey,
397
+ signal,
398
+ async () => {
399
+ const lockedExisting = await findWorkflowWebSourceByUrl(
400
+ config,
401
+ fetchUrl,
402
+ );
314
403
  if (lockedExisting) {
315
404
  sourceCache.set(lockedExisting.sourceRef, lockedExisting);
316
- const card = buildWorkflowWebSourceCard({ source: lockedExisting, policy, budget, duplicate: true });
405
+ const card = buildWorkflowWebSourceCard({
406
+ source: lockedExisting,
407
+ policy,
408
+ budget,
409
+ duplicate: true,
410
+ });
317
411
  await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
318
412
  sourceRef: lockedExisting.sourceRef,
319
413
  url: lockedExisting.redactedUrl,
320
414
  visibleChars: budget.used,
321
415
  });
322
- return toolResultFromJson({ status: "ok", tool: "workflow_web_fetch_source", card });
416
+ return toolResultFromJson({
417
+ status: "ok",
418
+ tool: "workflow_web_fetch_source",
419
+ card,
420
+ });
323
421
  }
324
422
  const lockedFailure = await readDurableFetchFailure(config, fetchKey);
325
423
  if (lockedFailure) {
326
424
  fetchFailures.set(fetchKey, lockedFailure);
327
- await recordWorkflowWebSourceEvent(config, "fetch_negative_cache_hit", {
328
- url: sanitizeUrlForModel(fetchUrl),
329
- code: lockedFailure.code,
330
- });
331
- return errorToolResult(lockedFailure.code, lockedFailure.message, lockedFailure.extra);
425
+ await recordWorkflowWebSourceEvent(
426
+ config,
427
+ "fetch_negative_cache_hit",
428
+ {
429
+ url: sanitizeUrlForModel(fetchUrl),
430
+ code: lockedFailure.code,
431
+ },
432
+ );
433
+ return errorToolResult(
434
+ lockedFailure.code,
435
+ lockedFailure.message,
436
+ lockedFailure.extra,
437
+ );
332
438
  }
333
439
  let text: string;
334
440
  let title = titleFromParams(params);
@@ -345,13 +451,21 @@ export function registerWorkflowWebSourceExtension(
345
451
  url: sanitizeUrlForModel(safeFetch.url),
346
452
  reason: safeFetch.reason,
347
453
  });
348
- return await cachedFetchFailureResult(config, fetchFailures, fetchKey, {
349
- code: "blocked_url",
350
- message:
351
- "URL was blocked by workflow web-source security policy before content fetch.",
352
- extra: { reason: safeFetch.reason, url: sanitizeUrlForModel(safeFetch.url) },
353
- reason: safeFetch.reason,
354
- });
454
+ return await cachedFetchFailureResult(
455
+ config,
456
+ fetchFailures,
457
+ fetchKey,
458
+ {
459
+ code: "blocked_url",
460
+ message:
461
+ "URL was blocked by workflow web-source security policy before content fetch.",
462
+ extra: {
463
+ reason: safeFetch.reason,
464
+ url: sanitizeUrlForModel(safeFetch.url),
465
+ },
466
+ reason: safeFetch.reason,
467
+ },
468
+ );
355
469
  }
356
470
  text = safeFetch.text;
357
471
  title = title ?? safeFetch.title;
@@ -368,31 +482,44 @@ export function registerWorkflowWebSourceExtension(
368
482
  return errorToolResult(missing.code, missing.message);
369
483
  }
370
484
  if (!security.allowPrivateHosts) {
371
- await recordWorkflowWebSourceEvent(config, "blocked_provider_fetch", {
372
- url: sanitizeUrlForModel(fetchUrl),
373
- reason: "untrusted_provider_fetch",
374
- });
485
+ await recordWorkflowWebSourceEvent(
486
+ config,
487
+ "blocked_provider_fetch",
488
+ {
489
+ url: sanitizeUrlForModel(fetchUrl),
490
+ reason: "untrusted_provider_fetch",
491
+ },
492
+ );
375
493
  return errorToolResult(
376
494
  "untrusted_provider_fetch",
377
495
  "Custom provider fetch_content is disabled unless securityPolicy.allowPrivateHosts is true; use the default safe fetch provider or a trusted provider configuration.",
378
496
  { url: sanitizeUrlForModel(fetchUrl) },
379
497
  );
380
498
  }
381
- const providerHostCheck = await validateResolvedHost(fetchUrl, security);
499
+ const providerHostCheck = await validateResolvedHost(
500
+ fetchUrl,
501
+ security,
502
+ );
382
503
  if (!providerHostCheck.ok) {
383
504
  await recordWorkflowWebSourceEvent(config, "blocked_provider_url", {
384
505
  url: sanitizeUrlForModel(providerHostCheck.url),
385
506
  reason: providerHostCheck.reason,
386
507
  });
387
- return await cachedFetchFailureResult(config, fetchFailures, fetchKey, {
388
- code: "blocked_url",
389
- message: "URL was blocked by workflow web-source security policy before provider fetch.",
390
- extra: {
508
+ return await cachedFetchFailureResult(
509
+ config,
510
+ fetchFailures,
511
+ fetchKey,
512
+ {
513
+ code: "blocked_url",
514
+ message:
515
+ "URL was blocked by workflow web-source security policy before provider fetch.",
516
+ extra: {
517
+ reason: providerHostCheck.reason,
518
+ url: sanitizeUrlForModel(providerHostCheck.url),
519
+ },
391
520
  reason: providerHostCheck.reason,
392
- url: sanitizeUrlForModel(providerHostCheck.url),
393
521
  },
394
- reason: providerHostCheck.reason,
395
- });
522
+ );
396
523
  }
397
524
  const result = await providerTool.execute(
398
525
  toolCallId,
@@ -401,21 +528,30 @@ export function registerWorkflowWebSourceExtension(
401
528
  onUpdate,
402
529
  ctx,
403
530
  );
404
- const providerUrlCheck = await validateProviderResultUrls(result, security);
531
+ const providerUrlCheck = await validateProviderResultUrls(
532
+ result,
533
+ security,
534
+ );
405
535
  if (!providerUrlCheck.ok) {
406
536
  await recordWorkflowWebSourceEvent(config, "blocked_provider_url", {
407
537
  url: sanitizeUrlForModel(providerUrlCheck.url),
408
538
  reason: providerUrlCheck.reason,
409
539
  });
410
- return await cachedFetchFailureResult(config, fetchFailures, fetchKey, {
411
- code: "blocked_url",
412
- message: "Provider result URL was blocked by workflow web-source security policy.",
413
- extra: {
540
+ return await cachedFetchFailureResult(
541
+ config,
542
+ fetchFailures,
543
+ fetchKey,
544
+ {
545
+ code: "blocked_url",
546
+ message:
547
+ "Provider result URL was blocked by workflow web-source security policy.",
548
+ extra: {
549
+ reason: providerUrlCheck.reason,
550
+ url: sanitizeUrlForModel(providerUrlCheck.url),
551
+ },
414
552
  reason: providerUrlCheck.reason,
415
- url: sanitizeUrlForModel(providerUrlCheck.url),
416
553
  },
417
- reason: providerUrlCheck.reason,
418
- });
554
+ );
419
555
  }
420
556
  text = extractTextFromToolResult(result);
421
557
  title = title ?? extractTitleFromToolResult(result);
@@ -424,12 +560,17 @@ export function registerWorkflowWebSourceExtension(
424
560
  await recordWorkflowWebSourceEvent(config, "fetch_empty", {
425
561
  url: sanitizeUrlForModel(fetchUrl),
426
562
  });
427
- return await cachedFetchFailureResult(config, fetchFailures, fetchKey, {
428
- code: "empty_source",
429
- message: "Provider returned no extractable text for this URL.",
430
- extra: { url: sanitizeUrlForModel(fetchUrl) },
431
- reason: "empty_source",
432
- });
563
+ return await cachedFetchFailureResult(
564
+ config,
565
+ fetchFailures,
566
+ fetchKey,
567
+ {
568
+ code: "empty_source",
569
+ message: "Provider returned no extractable text for this URL.",
570
+ extra: { url: sanitizeUrlForModel(fetchUrl) },
571
+ reason: "empty_source",
572
+ },
573
+ );
433
574
  }
434
575
  const source = createWorkflowWebSource({
435
576
  config,
@@ -448,24 +589,37 @@ export function registerWorkflowWebSourceExtension(
448
589
  textChars: source.textChars,
449
590
  visibleChars: budget.used,
450
591
  });
451
- return toolResultFromJson({ status: "ok", tool: "workflow_web_fetch_source", card });
452
- }).catch(async (error: unknown) => {
453
- const message = error instanceof Error ? error.message : "workflow_web_fetch_failed";
454
- const code = message === "fetch_lock_timeout" ? "fetch_lock_timeout" : "workflow_web_fetch_failed";
455
- await recordWorkflowWebSourceEvent(config, "fetch_failed", {
456
- url: sanitizeUrlForModel(fetchUrl),
457
- code,
458
- });
459
- return errorToolResult(code, "Workflow web-source fetch failed before a source could be cached.", {
460
- url: sanitizeUrlForModel(fetchUrl),
592
+ return toolResultFromJson({
593
+ status: "ok",
594
+ tool: "workflow_web_fetch_source",
595
+ card,
461
596
  });
597
+ },
598
+ ).catch(async (error: unknown) => {
599
+ const message =
600
+ error instanceof Error ? error.message : "workflow_web_fetch_failed";
601
+ const code =
602
+ message === "fetch_lock_timeout"
603
+ ? "fetch_lock_timeout"
604
+ : "workflow_web_fetch_failed";
605
+ await recordWorkflowWebSourceEvent(config, "fetch_failed", {
606
+ url: sanitizeUrlForModel(fetchUrl),
607
+ code,
462
608
  });
463
- fetchInFlight.set(fetchKey, fetchPromise);
464
- try {
465
- return await fetchPromise;
466
- } finally {
467
- fetchInFlight.delete(fetchKey);
468
- }
609
+ return errorToolResult(
610
+ code,
611
+ "Workflow web-source fetch failed before a source could be cached.",
612
+ {
613
+ url: sanitizeUrlForModel(fetchUrl),
614
+ },
615
+ );
616
+ });
617
+ fetchInFlight.set(fetchKey, fetchPromise);
618
+ try {
619
+ return await fetchPromise;
620
+ } finally {
621
+ fetchInFlight.delete(fetchKey);
622
+ }
469
623
  }
470
624
 
471
625
  pi.registerTool({
@@ -473,26 +627,90 @@ export function registerWorkflowWebSourceExtension(
473
627
  description:
474
628
  "Read one or more narrow exact/fuzzy/term-matched snippets from a cached workflow web source by sourceRef.",
475
629
  parameters: Type.Object({
476
- sourceRef: Type.String({ description: "Opaque sourceRef returned by workflow_web_fetch_source." }),
477
- query: Type.Optional(Type.String({ description: "Exact or fuzzy text to locate in the cached source." })),
478
- queries: Type.Optional(Type.Array(Type.String(), { description: "Multiple exact/fuzzy texts to locate in one cached source. Prefer this over repeated calls when reading several snippets from the same sourceRef." })),
479
- exact: Type.Optional(Type.String({ description: "Exact text to locate in the cached source." })),
480
- exactTexts: Type.Optional(Type.Array(Type.String(), { description: "Multiple exact texts to locate in one cached source." })),
481
- claim: Type.Optional(Type.String({ description: "Claim to locate when the exact quote is not known. Use with terms for deterministic quote harvesting." })),
482
- terms: Type.Optional(Type.Array(Type.String(), { description: "Important terms that should co-occur in the returned source window." })),
483
- reads: Type.Optional(Type.Array(Type.Object({
484
- query: Type.Optional(Type.String({ description: "Exact or fuzzy text to locate." })),
485
- exact: Type.Optional(Type.String({ description: "Exact text to locate." })),
486
- exactText: Type.Optional(Type.String({ description: "Exact text to locate." })),
487
- text: Type.Optional(Type.String({ description: "Text to locate." })),
488
- claim: Type.Optional(Type.String({ description: "Claim to locate when exact quote is unknown." })),
489
- terms: Type.Optional(Type.Array(Type.String(), { description: "Important terms for deterministic quote harvesting." })),
490
- maxChars: Type.Optional(Type.Number({ description: "Maximum visible snippet characters for this read." })),
491
- }), { description: "Mixed batch reads for one sourceRef; each item can use query or claim+terms." })),
492
- maxChars: Type.Optional(Type.Number({ description: "Maximum visible snippet characters per query." })),
630
+ sourceRef: Type.String({
631
+ description: "Opaque sourceRef returned by workflow_web_fetch_source.",
632
+ }),
633
+ query: Type.Optional(
634
+ Type.String({
635
+ description: "Exact or fuzzy text to locate in the cached source.",
636
+ }),
637
+ ),
638
+ queries: Type.Optional(
639
+ Type.Array(Type.String(), {
640
+ description:
641
+ "Multiple exact/fuzzy texts to locate in one cached source. Prefer this over repeated calls when reading several snippets from the same sourceRef.",
642
+ }),
643
+ ),
644
+ exact: Type.Optional(
645
+ Type.String({
646
+ description: "Exact text to locate in the cached source.",
647
+ }),
648
+ ),
649
+ exactTexts: Type.Optional(
650
+ Type.Array(Type.String(), {
651
+ description: "Multiple exact texts to locate in one cached source.",
652
+ }),
653
+ ),
654
+ claim: Type.Optional(
655
+ Type.String({
656
+ description:
657
+ "Claim to locate when the exact quote is not known. Use with terms for deterministic quote harvesting.",
658
+ }),
659
+ ),
660
+ terms: Type.Optional(
661
+ Type.Array(Type.String(), {
662
+ description:
663
+ "Important terms that should co-occur in the returned source window.",
664
+ }),
665
+ ),
666
+ reads: Type.Optional(
667
+ Type.Array(
668
+ Type.Object({
669
+ query: Type.Optional(
670
+ Type.String({ description: "Exact or fuzzy text to locate." }),
671
+ ),
672
+ exact: Type.Optional(
673
+ Type.String({ description: "Exact text to locate." }),
674
+ ),
675
+ exactText: Type.Optional(
676
+ Type.String({ description: "Exact text to locate." }),
677
+ ),
678
+ text: Type.Optional(
679
+ Type.String({ description: "Text to locate." }),
680
+ ),
681
+ claim: Type.Optional(
682
+ Type.String({
683
+ description: "Claim to locate when exact quote is unknown.",
684
+ }),
685
+ ),
686
+ terms: Type.Optional(
687
+ Type.Array(Type.String(), {
688
+ description:
689
+ "Important terms for deterministic quote harvesting.",
690
+ }),
691
+ ),
692
+ maxChars: Type.Optional(
693
+ Type.Number({
694
+ description:
695
+ "Maximum visible snippet characters for this read.",
696
+ }),
697
+ ),
698
+ }),
699
+ {
700
+ description:
701
+ "Mixed batch reads for one sourceRef; each item can use query or claim+terms.",
702
+ },
703
+ ),
704
+ ),
705
+ maxChars: Type.Optional(
706
+ Type.Number({
707
+ description: "Maximum visible snippet characters per query.",
708
+ }),
709
+ ),
493
710
  }),
494
711
  execute: async (_toolCallId, params) => {
495
- const sourceRef = stringParam(params, "sourceRef") ?? stringParam(params, "source_ref");
712
+ const sourceRef =
713
+ stringParam(params, "sourceRef") ?? stringParam(params, "source_ref");
496
714
  const requests = sourceReadRequestsFromParams(params);
497
715
  if (!sourceRef || requests.length === 0) {
498
716
  return errorToolResult(
@@ -502,18 +720,28 @@ export function registerWorkflowWebSourceExtension(
502
720
  }
503
721
  const source = await readCachedWorkflowWebSource(sourceRef);
504
722
  if (!source) {
505
- await recordWorkflowWebSourceEvent(config, "source_read_missing", { sourceRef });
506
- return errorToolResult("source_not_found", "No cached workflow web source exists for sourceRef.", {
723
+ await recordWorkflowWebSourceEvent(config, "source_read_missing", {
507
724
  sourceRef,
508
725
  });
726
+ return errorToolResult(
727
+ "source_not_found",
728
+ "No cached workflow web source exists for sourceRef.",
729
+ {
730
+ sourceRef,
731
+ },
732
+ );
509
733
  }
510
- const maxChars = positiveIntParam(params, "maxChars") ?? policy.sourceReadMaxChars;
734
+ const maxChars =
735
+ positiveIntParam(params, "maxChars") ?? policy.sourceReadMaxChars;
511
736
  const perQueryMaxChars = Math.min(maxChars, policy.sourceReadMaxChars);
512
737
  const reads = readWorkflowWebSourceSnippets({
513
738
  source,
514
739
  requests: requests.map((request) => ({
515
740
  ...request,
516
- maxChars: Math.min(request.maxChars ?? perQueryMaxChars, policy.sourceReadMaxChars),
741
+ maxChars: Math.min(
742
+ request.maxChars ?? perQueryMaxChars,
743
+ policy.sourceReadMaxChars,
744
+ ),
517
745
  })),
518
746
  maxChars: perQueryMaxChars,
519
747
  budget,
@@ -532,14 +760,20 @@ export function registerWorkflowWebSourceExtension(
532
760
  missingTerms: read.missingTerms,
533
761
  coverageRatio: read.coverageRatio,
534
762
  candidateOnly: read.candidateOnly,
763
+ truncated: read.truncated,
535
764
  quote: status === "budget_exhausted" ? undefined : read.quote,
536
765
  startOffset: read.startOffset,
537
766
  endOffset: read.endOffset,
538
767
  visibleChars: read.visibleChars,
539
768
  };
540
769
  });
541
- const responseStatus = aggregateSourceReadStatus(results.map((result) => result.status));
542
- const visibleChars = results.reduce((total, result) => total + result.visibleChars, 0);
770
+ const responseStatus = aggregateSourceReadStatus(
771
+ results.map((result) => result.status),
772
+ );
773
+ const visibleChars = results.reduce(
774
+ (total, result) => total + result.visibleChars,
775
+ 0,
776
+ );
543
777
  await recordWorkflowWebSourceEvent(config, "source_read", {
544
778
  sourceRef,
545
779
  status: responseStatus,
@@ -561,32 +795,48 @@ export function registerWorkflowWebSourceExtension(
561
795
  missingTerms: result.missingTerms,
562
796
  coverageRatio: result.coverageRatio,
563
797
  candidateOnly: result.candidateOnly,
564
- quote: result.status === "budget_exhausted" ? undefined : result.quote,
798
+ truncated: result.truncated,
799
+ quote:
800
+ result.status === "budget_exhausted" ? undefined : result.quote,
565
801
  startOffset: result.startOffset,
566
802
  endOffset: result.endOffset,
567
- budget: budgetSnapshot(result.status === "budget_exhausted"),
803
+ budget: budgetSnapshot(
804
+ result.status === "budget_exhausted" ||
805
+ result.status === "truncated",
806
+ ),
568
807
  next:
569
808
  result.status === "budget_exhausted"
570
809
  ? "Visible web-source budget is exhausted for this task; cite the sourceRef as an evidence gap or use a smaller query in a fresh task."
571
- : undefined,
810
+ : result.status === "truncated"
811
+ ? "The matched web-source snippet was truncated by the visible budget or maxChars; use a smaller exact query or a fresh task if the full quote is required."
812
+ : undefined,
572
813
  });
573
814
  }
815
+ const hasBudgetExhaustedRead = results.some(
816
+ (result) => result.status === "budget_exhausted",
817
+ );
818
+ const hasTruncatedRead = results.some(
819
+ (result) => result.status === "truncated",
820
+ );
574
821
  return toolResultFromJson({
575
822
  status: responseStatus,
576
823
  tool: "workflow_web_source_read",
577
824
  sourceRef,
578
825
  url: source.redactedUrl,
579
826
  results,
580
- budget: budgetSnapshot(results.some((result) => result.status === "budget_exhausted")),
581
- next:
582
- responseStatus === "budget_exhausted"
583
- ? "Visible web-source budget is exhausted for this task; cite missing quotes as evidence gaps or use smaller query batches in a fresh task."
827
+ budget: budgetSnapshot(hasBudgetExhaustedRead || hasTruncatedRead),
828
+ next: hasBudgetExhaustedRead
829
+ ? "Visible web-source budget is exhausted for this task; cite missing quotes as evidence gaps or use smaller query batches in a fresh task."
830
+ : hasTruncatedRead
831
+ ? "One or more matched web-source snippets were truncated by the visible budget or maxChars; use smaller exact queries or a fresh task if full quotes are required."
584
832
  : undefined,
585
833
  });
586
834
  },
587
835
  });
588
836
 
589
- async function readCachedWorkflowWebSource(sourceRef: string): Promise<WorkflowWebSource | undefined> {
837
+ async function readCachedWorkflowWebSource(
838
+ sourceRef: string,
839
+ ): Promise<WorkflowWebSource | undefined> {
590
840
  const cached = sourceCache.get(sourceRef);
591
841
  if (cached) return cached;
592
842
  const source = await readWorkflowWebSource(config, sourceRef);
@@ -669,7 +919,12 @@ async function cachedFetchFailureResult(
669
919
  config: WorkflowWebSourceCacheConfig,
670
920
  cache: Map<string, FetchFailure>,
671
921
  key: string,
672
- failure: { code: string; message: string; extra: Record<string, unknown>; reason: string },
922
+ failure: {
923
+ code: string;
924
+ message: string;
925
+ extra: Record<string, unknown>;
926
+ reason: string;
927
+ },
673
928
  ): Promise<ReturnType<typeof toolResultFromJson>> {
674
929
  const cached = {
675
930
  code: failure.code,
@@ -687,8 +942,8 @@ async function cachedFetchFailureResult(
687
942
  return errorToolResult(failure.code, failure.message, failure.extra);
688
943
  }
689
944
 
690
- const FETCH_LOCK_STALE_MS = 60_000;
691
- const FETCH_LOCK_WAIT_MS = 75_000;
945
+ const FETCH_LOCK_STALE_MS = 4 * 60_000;
946
+ const FETCH_LOCK_WAIT_MS = 5 * 60_000;
692
947
 
693
948
  async function withWorkflowWebFetchLock<T>(
694
949
  config: WorkflowWebSourceCacheConfig,
@@ -715,14 +970,15 @@ async function acquireWorkflowWebFetchLock(
715
970
  for (;;) {
716
971
  if (signal?.aborted) throw new Error("aborted");
717
972
  try {
973
+ const ownerId = `${process.pid}:${Date.now()}:${Math.random().toString(36).slice(2)}`;
718
974
  await mkdir(lockDir);
719
975
  await writeFile(
720
976
  resolve(lockDir, "owner.json"),
721
- `${JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString(), key }, null, 2)}\n`,
977
+ `${JSON.stringify({ ownerId, pid: process.pid, createdAt: new Date().toISOString(), key }, null, 2)}\n`,
722
978
  "utf8",
723
979
  );
724
980
  return async () => {
725
- await rm(lockDir, { recursive: true, force: true });
981
+ await releaseWorkflowWebFetchLock(lockDir, ownerId);
726
982
  };
727
983
  } catch (error) {
728
984
  if (!isFileExistsError(error)) throw error;
@@ -735,6 +991,19 @@ async function acquireWorkflowWebFetchLock(
735
991
  }
736
992
  }
737
993
 
994
+ async function releaseWorkflowWebFetchLock(
995
+ lockDir: string,
996
+ ownerId: string,
997
+ ): Promise<void> {
998
+ try {
999
+ const current = await readFetchLockOwner(lockDir);
1000
+ if (current?.ownerId !== ownerId) return;
1001
+ await rm(lockDir, { recursive: true, force: true });
1002
+ } catch {
1003
+ // Missing or unreadable lock will be retried by the caller.
1004
+ }
1005
+ }
1006
+
738
1007
  async function removeStaleFetchLock(lockDir: string): Promise<void> {
739
1008
  try {
740
1009
  const current = await stat(lockDir);
@@ -746,12 +1015,29 @@ async function removeStaleFetchLock(lockDir: string): Promise<void> {
746
1015
  }
747
1016
  }
748
1017
 
1018
+ async function readFetchLockOwner(
1019
+ lockDir: string,
1020
+ ): Promise<{ ownerId?: string } | undefined> {
1021
+ try {
1022
+ const parsed = JSON.parse(
1023
+ await readFile(resolve(lockDir, "owner.json"), "utf8"),
1024
+ ) as unknown;
1025
+ return isRecord(parsed) && typeof parsed.ownerId === "string"
1026
+ ? { ownerId: parsed.ownerId }
1027
+ : undefined;
1028
+ } catch {
1029
+ return undefined;
1030
+ }
1031
+ }
1032
+
749
1033
  async function readDurableFetchFailure(
750
1034
  config: WorkflowWebSourceCacheConfig,
751
1035
  key: string,
752
1036
  ): Promise<FetchFailure | undefined> {
753
1037
  try {
754
- const parsed = JSON.parse(await readFile(fetchFailurePath(config, key), "utf8")) as unknown;
1038
+ const parsed = JSON.parse(
1039
+ await readFile(fetchFailurePath(config, key), "utf8"),
1040
+ ) as unknown;
755
1041
  return normalizeFetchFailure(parsed);
756
1042
  } catch {
757
1043
  return undefined;
@@ -773,23 +1059,36 @@ async function writeDurableFetchFailure(
773
1059
 
774
1060
  function normalizeFetchFailure(value: unknown): FetchFailure | undefined {
775
1061
  if (!isRecord(value)) return undefined;
776
- if (typeof value.code !== "string" || typeof value.message !== "string") return undefined;
1062
+ if (typeof value.code !== "string" || typeof value.message !== "string")
1063
+ return undefined;
777
1064
  const extra = isRecord(value.extra) ? value.extra : {};
778
1065
  return {
779
1066
  code: value.code,
780
1067
  message: value.message,
781
1068
  extra,
782
1069
  ...(typeof value.reason === "string" ? { reason: value.reason } : {}),
783
- ...(typeof value.createdAt === "string" ? { createdAt: value.createdAt } : {}),
1070
+ ...(typeof value.createdAt === "string"
1071
+ ? { createdAt: value.createdAt }
1072
+ : {}),
784
1073
  };
785
1074
  }
786
1075
 
787
- function fetchLockPath(config: WorkflowWebSourceCacheConfig, key: string): string {
1076
+ function fetchLockPath(
1077
+ config: WorkflowWebSourceCacheConfig,
1078
+ key: string,
1079
+ ): string {
788
1080
  return resolve(config.cacheDir, "fetch-locks", fetchCacheFileKey(key));
789
1081
  }
790
1082
 
791
- function fetchFailurePath(config: WorkflowWebSourceCacheConfig, key: string): string {
792
- return resolve(config.cacheDir, "fetch-negative-cache", `${fetchCacheFileKey(key)}.json`);
1083
+ function fetchFailurePath(
1084
+ config: WorkflowWebSourceCacheConfig,
1085
+ key: string,
1086
+ ): string {
1087
+ return resolve(
1088
+ config.cacheDir,
1089
+ "fetch-negative-cache",
1090
+ `${fetchCacheFileKey(key)}.json`,
1091
+ );
793
1092
  }
794
1093
 
795
1094
  function fetchCacheFileKey(key: string): string {
@@ -813,7 +1112,11 @@ function shouldCacheFetchFailure(reason: string): boolean {
813
1112
  }
814
1113
 
815
1114
  function shouldCacheFetchFailureInMemory(reason: string): boolean {
816
- return reason === "empty_source" || reason === "dns_resolution_failed" || reason.includes("ENOTFOUND");
1115
+ return (
1116
+ reason === "empty_source" ||
1117
+ reason === "dns_resolution_failed" ||
1118
+ reason.includes("ENOTFOUND")
1119
+ );
817
1120
  }
818
1121
 
819
1122
  const WORKFLOW_WEB_FETCH_TIMEOUT_MS = 30_000;
@@ -824,25 +1127,46 @@ async function safeFetchWorkflowWebText(
824
1127
  security: WorkflowWebSecurityPolicy,
825
1128
  signal?: AbortSignal,
826
1129
  ): Promise<
827
- | { ok: true; url: string; text: string; title?: string; extractionLossy?: boolean }
1130
+ | {
1131
+ ok: true;
1132
+ url: string;
1133
+ text: string;
1134
+ title?: string;
1135
+ extractionLossy?: boolean;
1136
+ }
828
1137
  | { ok: false; reason: string; url: string }
829
1138
  > {
830
1139
  let current = url;
831
1140
  for (let redirectCount = 0; redirectCount < 6; redirectCount += 1) {
832
1141
  const checked = validateWorkflowWebUrl(current, security);
833
1142
  if (!checked.ok) return { ok: false, reason: checked.reason, url: current };
834
- const response = await safeFetchOnce(checked.normalizedUrl, security, signal);
1143
+ const response = await safeFetchOnce(
1144
+ checked.normalizedUrl,
1145
+ security,
1146
+ signal,
1147
+ );
835
1148
  if (!response.ok) return response;
836
1149
  if (response.status >= 300 && response.status < 400) {
837
1150
  if (!response.location)
838
- return { ok: false, reason: "redirect_without_location", url: checked.normalizedUrl };
1151
+ return {
1152
+ ok: false,
1153
+ reason: "redirect_without_location",
1154
+ url: checked.normalizedUrl,
1155
+ };
839
1156
  current = new URL(response.location, checked.normalizedUrl).href;
840
1157
  continue;
841
1158
  }
842
1159
  if (response.status < 200 || response.status >= 300) {
843
- return { ok: false, reason: `http_${response.status}`, url: checked.normalizedUrl };
1160
+ return {
1161
+ ok: false,
1162
+ reason: `http_${response.status}`,
1163
+ url: checked.normalizedUrl,
1164
+ };
844
1165
  }
845
- const extracted = extractWorkflowWebResponseText(response.text, response.contentType);
1166
+ const extracted = extractWorkflowWebResponseText(
1167
+ response.text,
1168
+ response.contentType,
1169
+ );
846
1170
  return {
847
1171
  ok: true,
848
1172
  url: checked.normalizedUrl,
@@ -859,7 +1183,14 @@ function safeFetchOnce(
859
1183
  security: WorkflowWebSecurityPolicy,
860
1184
  signal?: AbortSignal,
861
1185
  ): Promise<
862
- | { ok: true; status: number; location?: string; text: string; contentType?: string; truncated?: boolean }
1186
+ | {
1187
+ ok: true;
1188
+ status: number;
1189
+ location?: string;
1190
+ text: string;
1191
+ contentType?: string;
1192
+ truncated?: boolean;
1193
+ }
863
1194
  | { ok: false; reason: string; url: string }
864
1195
  > {
865
1196
  const parsed = new URL(url);
@@ -868,7 +1199,14 @@ function safeFetchOnce(
868
1199
  let settled = false;
869
1200
  const settle = (
870
1201
  result:
871
- | { ok: true; status: number; location?: string; text: string; contentType?: string; truncated?: boolean }
1202
+ | {
1203
+ ok: true;
1204
+ status: number;
1205
+ location?: string;
1206
+ text: string;
1207
+ contentType?: string;
1208
+ truncated?: boolean;
1209
+ }
872
1210
  | { ok: false; reason: string; url: string },
873
1211
  ) => {
874
1212
  if (settled) return;
@@ -880,20 +1218,26 @@ function safeFetchOnce(
880
1218
  {
881
1219
  method: "GET",
882
1220
  headers: {
883
- accept: "text/plain,text/html,application/json,application/xml;q=0.9,*/*;q=0.1",
1221
+ accept:
1222
+ "text/plain,text/html,application/json,application/xml;q=0.9,*/*;q=0.1",
884
1223
  "user-agent": "pi-workflow-web-source/1",
885
1224
  },
886
1225
  lookup(hostname, options, callback) {
887
1226
  lookupPublicAddress(hostname, security)
888
1227
  .then((address) => {
889
1228
  if (isLookupAllOptions(options)) {
890
- callback(null, [{ address: address.address, family: address.family }]);
1229
+ callback(null, [
1230
+ { address: address.address, family: address.family },
1231
+ ]);
891
1232
  return;
892
1233
  }
893
1234
  callback(null, address.address, address.family);
894
1235
  })
895
1236
  .catch((error: unknown) => {
896
- const reason = error instanceof Error ? error.message : "dns_resolution_failed";
1237
+ const reason =
1238
+ error instanceof Error
1239
+ ? error.message
1240
+ : "dns_resolution_failed";
897
1241
  callback(new Error(reason), "", 4);
898
1242
  });
899
1243
  },
@@ -906,7 +1250,12 @@ function safeFetchOnce(
906
1250
  ? res.headers["content-type"][0]
907
1251
  : res.headers["content-type"];
908
1252
  const status = res.statusCode ?? 0;
909
- if (status >= 200 && status < 300 && contentType && !isWorkflowWebTextContentType(contentType)) {
1253
+ if (
1254
+ status >= 200 &&
1255
+ status < 300 &&
1256
+ contentType &&
1257
+ !isWorkflowWebTextContentType(contentType)
1258
+ ) {
910
1259
  res.resume();
911
1260
  settle({ ok: false, reason: "unsupported_content_type", url });
912
1261
  return;
@@ -914,7 +1263,10 @@ function safeFetchOnce(
914
1263
  res.on("data", (chunk: string) => {
915
1264
  if (settled) return;
916
1265
  if (text.length + chunk.length > WORKFLOW_WEB_FETCH_MAX_CHARS) {
917
- text += chunk.slice(0, Math.max(0, WORKFLOW_WEB_FETCH_MAX_CHARS - text.length));
1266
+ text += chunk.slice(
1267
+ 0,
1268
+ Math.max(0, WORKFLOW_WEB_FETCH_MAX_CHARS - text.length),
1269
+ );
918
1270
  truncated = true;
919
1271
  req.destroy(new Error("workflow_fetch_truncated"));
920
1272
  return;
@@ -977,7 +1329,9 @@ async function lookupPublicAddress(
977
1329
  : privateIpReason(address.address);
978
1330
  if (!reason) return address;
979
1331
  }
980
- throw new Error(addresses.length > 0 ? "private_host_blocked" : "dns_resolution_failed");
1332
+ throw new Error(
1333
+ addresses.length > 0 ? "private_host_blocked" : "dns_resolution_failed",
1334
+ );
981
1335
  }
982
1336
 
983
1337
  async function validateResolvedHost(
@@ -992,7 +1346,10 @@ async function validateResolvedHost(
992
1346
  return { ok: false, reason: "invalid_url", url };
993
1347
  }
994
1348
  try {
995
- const addresses = await lookup(parsed.hostname, { all: true, verbatim: true });
1349
+ const addresses = await lookup(parsed.hostname, {
1350
+ all: true,
1351
+ verbatim: true,
1352
+ });
996
1353
  for (const address of addresses) {
997
1354
  const reason = privateIpReason(address.address);
998
1355
  if (reason) return { ok: false, reason, url };
@@ -1015,27 +1372,38 @@ function privateIpReason(address: string): string | undefined {
1015
1372
  if (hexMapped) {
1016
1373
  const high = Number.parseInt(hexMapped[1]!, 16);
1017
1374
  const low = Number.parseInt(hexMapped[2]!, 16);
1018
- return privateIpReason(`${high >> 8}.${high & 255}.${low >> 8}.${low & 255}`);
1375
+ return privateIpReason(
1376
+ `${high >> 8}.${high & 255}.${low >> 8}.${low & 255}`,
1377
+ );
1019
1378
  }
1020
1379
  if (isIP(lower) === 4) {
1021
1380
  const parts = lower.split(".").map((part) => Number(part));
1022
- if (parts.length !== 4 || parts.some((part) => !Number.isInteger(part) || part < 0 || part > 255)) return "private_host_blocked";
1381
+ if (
1382
+ parts.length !== 4 ||
1383
+ parts.some((part) => !Number.isInteger(part) || part < 0 || part > 255)
1384
+ )
1385
+ return "private_host_blocked";
1023
1386
  const [a, b, c, d] = parts as [number, number, number, number];
1024
- if (a === 0 || a === 10 || a === 127 || a >= 224) return "private_host_blocked";
1387
+ if (a === 0 || a === 10 || a === 127 || a >= 224)
1388
+ return "private_host_blocked";
1025
1389
  if (a === 100 && b >= 64 && b <= 127) return "private_host_blocked";
1026
1390
  if (a === 169 && b === 254) return "private_host_blocked";
1027
1391
  if (a === 172 && b >= 16 && b <= 31) return "private_host_blocked";
1028
1392
  if (a === 192 && b === 168) return "private_host_blocked";
1029
- if (a === 192 && b === 0 && (c === 0 || c === 2)) return "private_host_blocked";
1393
+ if (a === 192 && b === 0 && (c === 0 || c === 2))
1394
+ return "private_host_blocked";
1030
1395
  if (a === 198 && (b === 18 || b === 19)) return "private_host_blocked";
1031
1396
  if (a === 198 && b === 51 && c === 100) return "private_host_blocked";
1032
1397
  if (a === 203 && b === 0 && c === 113) return "private_host_blocked";
1033
- if (a === 255 && b === 255 && c === 255 && d === 255) return "private_host_blocked";
1398
+ if (a === 255 && b === 255 && c === 255 && d === 255)
1399
+ return "private_host_blocked";
1034
1400
  }
1035
1401
  if (isIP(lower) === 6) {
1036
1402
  if (lower === "::" || lower === "::1") return "private_host_blocked";
1037
- if (lower.startsWith("fc") || lower.startsWith("fd")) return "private_host_blocked";
1038
- if (lower.startsWith("fe80") || lower.startsWith("ff")) return "private_host_blocked";
1403
+ if (lower.startsWith("fc") || lower.startsWith("fd"))
1404
+ return "private_host_blocked";
1405
+ if (lower.startsWith("fe80") || lower.startsWith("ff"))
1406
+ return "private_host_blocked";
1039
1407
  if (lower.startsWith("2001:db8")) return "private_host_blocked";
1040
1408
  }
1041
1409
  return undefined;
@@ -1048,7 +1416,10 @@ async function validateProviderResultUrls(
1048
1416
  for (const url of providerResultUrls(result)) {
1049
1417
  const checked = validateWorkflowWebUrl(url, security);
1050
1418
  if (!checked.ok) return { ok: false, reason: checked.reason, url };
1051
- const resolved = await validateResolvedHost(checked.normalizedUrl, security);
1419
+ const resolved = await validateResolvedHost(
1420
+ checked.normalizedUrl,
1421
+ security,
1422
+ );
1052
1423
  if (!resolved.ok) return resolved;
1053
1424
  }
1054
1425
  if (!security.allowPrivateHosts) {
@@ -1151,11 +1522,12 @@ function canonicalWorkflowWebFetchUrl(url: string): string {
1151
1522
  if (parsed.pathname.length > 1 && parsed.pathname.endsWith("/")) {
1152
1523
  parsed.pathname = parsed.pathname.slice(0, -1);
1153
1524
  }
1154
- const sortedParams = [...parsed.searchParams.entries()].sort(([left], [right]) =>
1155
- left.localeCompare(right),
1525
+ const sortedParams = [...parsed.searchParams.entries()].sort(
1526
+ ([left], [right]) => left.localeCompare(right),
1156
1527
  );
1157
1528
  parsed.search = "";
1158
- for (const [key, value] of sortedParams) parsed.searchParams.append(key, value);
1529
+ for (const [key, value] of sortedParams)
1530
+ parsed.searchParams.append(key, value);
1159
1531
  return parsed.href;
1160
1532
  }
1161
1533
 
@@ -1165,13 +1537,20 @@ function shouldKeepWorkflowWebFragment(hash: string): boolean {
1165
1537
  return raw.startsWith("/") || raw.startsWith("!") || raw.includes("?");
1166
1538
  }
1167
1539
 
1168
- function fetchSourceRequestsFromParams(params: unknown): WorkflowWebFetchSourceRequest[] {
1540
+ function fetchSourceRequestsFromParams(
1541
+ params: unknown,
1542
+ ): WorkflowWebFetchSourceRequest[] {
1169
1543
  if (!isRecord(params)) return [];
1170
1544
  const requests: WorkflowWebFetchSourceRequest[] = [];
1171
1545
  const titles = Array.isArray(params.titles) ? params.titles : [];
1172
1546
  if (Array.isArray(params.sources)) {
1173
1547
  for (const source of params.sources) {
1174
- if (!isRecord(source) || typeof source.url !== "string" || !source.url.trim()) continue;
1548
+ if (
1549
+ !isRecord(source) ||
1550
+ typeof source.url !== "string" ||
1551
+ !source.url.trim()
1552
+ )
1553
+ continue;
1175
1554
  requests.push({
1176
1555
  url: source.url.trim(),
1177
1556
  ...(typeof source.title === "string" && source.title.trim()
@@ -1186,7 +1565,9 @@ function fetchSourceRequestsFromParams(params: unknown): WorkflowWebFetchSourceR
1186
1565
  const title = titles[index];
1187
1566
  requests.push({
1188
1567
  url: url.trim(),
1189
- ...(typeof title === "string" && title.trim() ? { title: title.trim() } : {}),
1568
+ ...(typeof title === "string" && title.trim()
1569
+ ? { title: title.trim() }
1570
+ : {}),
1190
1571
  });
1191
1572
  }
1192
1573
  }
@@ -1201,7 +1582,9 @@ function fetchSourceRequestsFromParams(params: unknown): WorkflowWebFetchSourceR
1201
1582
  return dedupeFetchSourceRequests(requests).slice(0, 20);
1202
1583
  }
1203
1584
 
1204
- function dedupeFetchSourceRequests(requests: WorkflowWebFetchSourceRequest[]): WorkflowWebFetchSourceRequest[] {
1585
+ function dedupeFetchSourceRequests(
1586
+ requests: WorkflowWebFetchSourceRequest[],
1587
+ ): WorkflowWebFetchSourceRequest[] {
1205
1588
  const deduped: WorkflowWebFetchSourceRequest[] = [];
1206
1589
  const seen = new Set<string>();
1207
1590
  for (const request of requests) {
@@ -1214,7 +1597,9 @@ function dedupeFetchSourceRequests(requests: WorkflowWebFetchSourceRequest[]): W
1214
1597
  }
1215
1598
 
1216
1599
  function payloadFromToolResult(result: ToolResult): Record<string, unknown> {
1217
- const text = result.content?.find((item) => typeof item.text === "string")?.text;
1600
+ const text = result.content?.find(
1601
+ (item) => typeof item.text === "string",
1602
+ )?.text;
1218
1603
  if (typeof text !== "string") return {};
1219
1604
  try {
1220
1605
  const payload = JSON.parse(text);
@@ -1237,7 +1622,9 @@ function titleFromParams(params: unknown): string | undefined {
1237
1622
  return stringParam(params, "title");
1238
1623
  }
1239
1624
 
1240
- function sourceReadRequestsFromParams(params: unknown): WorkflowWebSourceReadRequest[] {
1625
+ function sourceReadRequestsFromParams(
1626
+ params: unknown,
1627
+ ): WorkflowWebSourceReadRequest[] {
1241
1628
  const requests: WorkflowWebSourceReadRequest[] = [];
1242
1629
  if (isRecord(params) && Array.isArray(params.reads)) {
1243
1630
  for (const item of params.reads) {
@@ -1245,9 +1632,12 @@ function sourceReadRequestsFromParams(params: unknown): WorkflowWebSourceReadReq
1245
1632
  if (request) requests.push(request);
1246
1633
  }
1247
1634
  }
1248
- for (const query of stringArrayParam(params, "queries")) requests.push({ query });
1249
- for (const query of stringArrayParam(params, "exactTexts")) requests.push({ query });
1250
- for (const query of stringArrayParam(params, "texts")) requests.push({ query });
1635
+ for (const query of stringArrayParam(params, "queries"))
1636
+ requests.push({ query });
1637
+ for (const query of stringArrayParam(params, "exactTexts"))
1638
+ requests.push({ query });
1639
+ for (const query of stringArrayParam(params, "texts"))
1640
+ requests.push({ query });
1251
1641
  const query =
1252
1642
  stringParam(params, "query") ??
1253
1643
  stringParam(params, "exactText") ??
@@ -1255,11 +1645,14 @@ function sourceReadRequestsFromParams(params: unknown): WorkflowWebSourceReadReq
1255
1645
  stringParam(params, "text");
1256
1646
  const claim = stringParam(params, "claim");
1257
1647
  const terms = stringArrayParam(params, "terms");
1258
- if (query || claim || terms.length > 0) requests.push({ query, claim, terms });
1648
+ if (query || claim || terms.length > 0)
1649
+ requests.push({ query, claim, terms });
1259
1650
  return dedupeSourceReadRequests(requests).slice(0, 20);
1260
1651
  }
1261
1652
 
1262
- function sourceReadRequestFromRecord(value: unknown): WorkflowWebSourceReadRequest | undefined {
1653
+ function sourceReadRequestFromRecord(
1654
+ value: unknown,
1655
+ ): WorkflowWebSourceReadRequest | undefined {
1263
1656
  if (!isRecord(value)) return undefined;
1264
1657
  const query =
1265
1658
  stringParam(value, "query") ??
@@ -1273,7 +1666,9 @@ function sourceReadRequestFromRecord(value: unknown): WorkflowWebSourceReadReque
1273
1666
  return { query, claim, terms, maxChars };
1274
1667
  }
1275
1668
 
1276
- function dedupeSourceReadRequests(requests: WorkflowWebSourceReadRequest[]): WorkflowWebSourceReadRequest[] {
1669
+ function dedupeSourceReadRequests(
1670
+ requests: WorkflowWebSourceReadRequest[],
1671
+ ): WorkflowWebSourceReadRequest[] {
1277
1672
  const deduped: WorkflowWebSourceReadRequest[] = [];
1278
1673
  const seen = new Set<string>();
1279
1674
  for (const request of requests) {
@@ -1292,18 +1687,27 @@ function dedupeSourceReadRequests(requests: WorkflowWebSourceReadRequest[]): Wor
1292
1687
 
1293
1688
  function sourceReadBatchRequested(params: unknown): boolean {
1294
1689
  return (
1295
- (isRecord(params) && Array.isArray(params.reads) && params.reads.length > 0) ||
1690
+ (isRecord(params) &&
1691
+ Array.isArray(params.reads) &&
1692
+ params.reads.length > 0) ||
1296
1693
  stringArrayParam(params, "queries").length > 0 ||
1297
1694
  stringArrayParam(params, "exactTexts").length > 0 ||
1298
1695
  stringArrayParam(params, "texts").length > 0
1299
1696
  );
1300
1697
  }
1301
1698
 
1302
- type SourceReadToolStatus = "ok" | "candidate" | "budget_exhausted" | "not_found";
1699
+ type SourceReadToolStatus =
1700
+ | "ok"
1701
+ | "candidate"
1702
+ | "truncated"
1703
+ | "budget_exhausted"
1704
+ | "not_found";
1303
1705
 
1304
1706
  function sourceReadResponseStatus(
1305
1707
  read: WorkflowWebSourceReadResult,
1306
1708
  ): SourceReadToolStatus {
1709
+ if (read.status === "truncated" && !read.quote) return "budget_exhausted";
1710
+ if (read.status === "truncated") return "truncated";
1307
1711
  if (read.status === "matched" && !read.quote) return "budget_exhausted";
1308
1712
  if (read.status === "matched" && read.candidateOnly) return "candidate";
1309
1713
  if (read.status === "matched") return "ok";
@@ -1312,11 +1716,19 @@ function sourceReadResponseStatus(
1312
1716
 
1313
1717
  function aggregateSourceReadStatus(
1314
1718
  statuses: SourceReadToolStatus[],
1315
- ): "ok" | "candidate" | "partial" | "budget_exhausted" | "not_found" {
1719
+ ):
1720
+ | "ok"
1721
+ | "candidate"
1722
+ | "partial"
1723
+ | "truncated"
1724
+ | "budget_exhausted"
1725
+ | "not_found" {
1316
1726
  if (statuses.every((status) => status === "ok")) return "ok";
1317
1727
  if (statuses.every((status) => status === "candidate")) return "candidate";
1728
+ if (statuses.every((status) => status === "truncated")) return "truncated";
1318
1729
  if (statuses.every((status) => status === "not_found")) return "not_found";
1319
- if (statuses.every((status) => status === "budget_exhausted")) return "budget_exhausted";
1730
+ if (statuses.every((status) => status === "budget_exhausted"))
1731
+ return "budget_exhausted";
1320
1732
  return "partial";
1321
1733
  }
1322
1734
 
@@ -1345,18 +1757,25 @@ function positiveIntParam(params: unknown, key: string): number | undefined {
1345
1757
  }
1346
1758
 
1347
1759
  function isWorkflowWebTextContentType(contentType: string): boolean {
1348
- return /^(text\/|application\/(json|xml|xhtml\+xml|ld\+json)|[^;]+\+json\b|[^;]+\+xml\b)/i.test(contentType.trim());
1760
+ return /^(text\/|application\/(json|xml|xhtml\+xml|ld\+json)|[^;]+\+json\b|[^;]+\+xml\b)/i.test(
1761
+ contentType.trim(),
1762
+ );
1349
1763
  }
1350
1764
 
1351
1765
  function extractWorkflowWebResponseText(
1352
1766
  text: string,
1353
1767
  contentType?: string,
1354
1768
  ): { text: string; title?: string; lossy?: boolean } {
1355
- const looksHtml = /html/i.test(contentType ?? "") || /<html[\s>]|<body[\s>]|<title[\s>]/i.test(text);
1769
+ const looksHtml =
1770
+ /html/i.test(contentType ?? "") ||
1771
+ /<html[\s>]|<body[\s>]|<title[\s>]/i.test(text);
1356
1772
  if (!looksHtml) {
1357
1773
  return { text, title: titleFromPlainText(text) };
1358
1774
  }
1359
- const title = decodeHtmlEntities(text.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1]?.trim() ?? "").slice(0, 200) || undefined;
1775
+ const title =
1776
+ decodeHtmlEntities(
1777
+ text.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1]?.trim() ?? "",
1778
+ ).slice(0, 200) || undefined;
1360
1779
  const body = text
1361
1780
  .replace(/<script\b[\s\S]*?<\/script>/gi, " ")
1362
1781
  .replace(/<style\b[\s\S]*?<\/style>/gi, " ")
@@ -1407,5 +1826,8 @@ function extensionImportSpecifier(importPath: string): string {
1407
1826
  }
1408
1827
 
1409
1828
  export function workflowWebSourceModuleImportPath(modulePath: string): string {
1410
- return resolve(dirname(modulePath), `workflow-web-source-extension${extname(modulePath)}`);
1829
+ return resolve(
1830
+ dirname(modulePath),
1831
+ `workflow-web-source-extension${extname(modulePath)}`,
1832
+ );
1411
1833
  }