@agwab/pi-workflow 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/artifact-graph-runtime.d.ts +1 -1
- package/dist/artifact-graph-runtime.js +10 -5
- package/dist/artifact-graph-schema.js +127 -5
- package/dist/compiler.js +52 -19
- package/dist/dynamic-generated-task-runtime.js +3 -1
- package/dist/dynamic-profiles.d.ts +1 -1
- package/dist/engine-run-graph.d.ts +3 -0
- package/dist/engine-run-graph.js +194 -4
- package/dist/engine.d.ts +5 -0
- package/dist/engine.js +389 -41
- package/dist/extension.d.ts +2 -1
- package/dist/extension.js +30 -8
- package/dist/index.d.ts +11 -3
- package/dist/index.js +6 -1
- package/dist/prompt-json.d.ts +7 -0
- package/dist/prompt-json.js +13 -0
- package/dist/roles.d.ts +1 -1
- package/dist/roles.js +5 -8
- package/dist/store.d.ts +20 -1
- package/dist/store.js +139 -35
- package/dist/strings.d.ts +11 -0
- package/dist/strings.js +24 -0
- package/dist/subagent-backend.js +710 -40
- package/dist/types.d.ts +107 -1
- package/dist/verification-ontology.d.ts +31 -0
- package/dist/verification-ontology.js +66 -0
- package/dist/workflow-artifact-tool.js +5 -6
- package/dist/workflow-artifacts.d.ts +7 -0
- package/dist/workflow-artifacts.js +55 -4
- package/dist/workflow-fetch-cache-extension.d.ts +1 -0
- package/dist/workflow-fetch-cache-extension.js +57 -9
- package/dist/workflow-metrics.d.ts +113 -0
- package/dist/workflow-metrics.js +272 -0
- package/dist/workflow-output-artifacts.js +5 -3
- package/dist/workflow-partial-output.d.ts +45 -0
- package/dist/workflow-partial-output.js +205 -0
- package/dist/workflow-progress-health.js +42 -10
- package/dist/workflow-runtime.js +10 -1
- package/dist/workflow-view.js +3 -1
- package/dist/workflow-web-source-extension.js +194 -52
- package/dist/workflow-web-source.d.ts +2 -1
- package/dist/workflow-web-source.js +109 -30
- package/docs/usage.md +76 -29
- package/node_modules/@agwab/pi-subagent/README.md +3 -3
- package/node_modules/@agwab/pi-subagent/api.mjs +1 -0
- package/node_modules/@agwab/pi-subagent/docs/usage.md +63 -12
- package/node_modules/@agwab/pi-subagent/package.json +2 -2
- package/node_modules/@agwab/pi-subagent/src/api.ts +54 -1
- package/node_modules/@agwab/pi-subagent/src/artifacts/registry.ts +9 -4
- package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +8 -0
- package/node_modules/@agwab/pi-subagent/src/core/constants.ts +9 -0
- package/node_modules/@agwab/pi-subagent/src/core/validation.ts +21 -0
- package/node_modules/@agwab/pi-subagent/src/index.ts +1046 -576
- package/node_modules/@agwab/pi-subagent/src/orchestrate/async.ts +279 -156
- package/node_modules/@agwab/pi-subagent/src/orchestrate/interrupt.ts +165 -89
- package/node_modules/@agwab/pi-subagent/src/orchestrate/reconcile.ts +111 -65
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run-ref.ts +219 -0
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +88 -8
- package/node_modules/@agwab/pi-subagent/src/orchestrate/status.ts +614 -298
- package/node_modules/@agwab/pi-subagent/src/panel.ts +1356 -560
- package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +53 -5
- package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +13 -6
- package/package.json +2 -2
- package/skills/workflow-guide/SKILL.md +1 -0
- package/src/artifact-graph-runtime.ts +19 -13
- package/src/artifact-graph-schema.ts +143 -3
- package/src/cli.mjs +52 -0
- package/src/compiler.ts +63 -18
- package/src/dynamic-generated-task-runtime.ts +3 -1
- package/src/dynamic-profiles.ts +1 -1
- package/src/engine-run-graph.ts +246 -4
- package/src/engine.ts +545 -38
- package/src/extension.ts +36 -6
- package/src/index.ts +52 -1
- package/src/prompt-json.ts +13 -0
- package/src/roles.ts +6 -9
- package/src/store.ts +194 -42
- package/src/strings.ts +38 -0
- package/src/subagent-backend.ts +921 -62
- package/src/types.ts +116 -2
- package/src/verification-ontology.ts +88 -0
- package/src/workflow-artifact-tool.ts +5 -7
- package/src/workflow-artifacts.ts +83 -3
- package/src/workflow-fetch-cache-extension.ts +78 -13
- package/src/workflow-metrics.ts +478 -0
- package/src/workflow-output-artifacts.ts +5 -3
- package/src/workflow-partial-output.ts +299 -0
- package/src/workflow-progress-health.ts +47 -15
- package/src/workflow-runtime.ts +18 -2
- package/src/workflow-view.ts +2 -1
- package/src/workflow-web-source-extension.ts +654 -232
- package/src/workflow-web-source.ts +153 -39
- package/workflows/README.md +7 -25
- package/workflows/deep-research/batched-verification.spec.json +253 -0
- package/workflows/deep-research/helpers/batch-verification-candidates.mjs +136 -0
- package/workflows/deep-research/helpers/claim-evidence-gate.mjs +229 -36
- package/workflows/deep-research/helpers/final-audit-packet.mjs +1 -4
- package/workflows/deep-research/helpers/normalize-input-packet.mjs +81 -2
- package/workflows/deep-research/helpers/render-executive.mjs +40 -26
- package/workflows/deep-research/helpers/sanitize-verification-candidates.mjs +89 -15
- package/workflows/deep-research/helpers/shadow-select-verification.mjs +229 -0
- package/workflows/deep-research/helpers/verification-ontology.mjs +77 -0
- package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +3 -3
- package/workflows/deep-research/schemas/deep-research-research-questions-control.schema.json +38 -0
- package/workflows/deep-research/schemas/deep-research-sanitize-claims-control.schema.json +63 -0
- package/workflows/deep-research/schemas/deep-research-verify-claims-batch-control.schema.json +47 -0
- package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +13 -3
- package/workflows/deep-research/spec.json +32 -12
- package/workflows/impact-review/spec.json +3 -3
- package/workflows/spec-review/helpers/spec-review-pipeline.mjs +1 -8
- package/dist/dynamic-loader.d.ts +0 -25
- package/dist/dynamic-loader.js +0 -13
- package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stderr +0 -0
- package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stdout +0 -13
- package/src/dynamic-loader.ts +0 -49
- package/workflows/impact-review/schemas/docs-release-impact-control.schema.json +0 -42
- package/workflows/impact-review/schemas/security-performance-impact-control.schema.json +0 -42
- package/workflows/impact-review/schemas/state-data-impact-control.schema.json +0 -42
|
@@ -11,7 +11,6 @@ import {
|
|
|
11
11
|
buildWorkflowWebSourceCard,
|
|
12
12
|
createWorkflowWebSource,
|
|
13
13
|
createWorkflowWebVisibleBudget,
|
|
14
|
-
DEFAULT_WORKFLOW_WEB_SECURITY_POLICY,
|
|
15
14
|
errorToolResult,
|
|
16
15
|
extractSearchCandidates,
|
|
17
16
|
extractTextFromToolResult,
|
|
@@ -43,7 +42,8 @@ export interface WorkflowWebProviderLaunchConfig {
|
|
|
43
42
|
extensionPath?: string;
|
|
44
43
|
}
|
|
45
44
|
|
|
46
|
-
export interface WorkflowWebSourceLaunchConfig
|
|
45
|
+
export interface WorkflowWebSourceLaunchConfig
|
|
46
|
+
extends WorkflowWebSourceCacheConfig {
|
|
47
47
|
schema: typeof WORKFLOW_WEB_SOURCE_LAUNCH_CONFIG_SCHEMA;
|
|
48
48
|
workflowName?: string;
|
|
49
49
|
stageId?: string;
|
|
@@ -111,14 +111,21 @@ export function registerWorkflowWebSourceExtension(
|
|
|
111
111
|
): void {
|
|
112
112
|
const policy = normalizeWorkflowWebSourcePolicy(config.webSourcePolicy);
|
|
113
113
|
const security = normalizeWorkflowWebSecurityPolicy(config.securityPolicy);
|
|
114
|
-
const budget = createWorkflowWebVisibleBudget(
|
|
114
|
+
const budget = createWorkflowWebVisibleBudget(
|
|
115
|
+
policy.perTaskVisibleCharBudget,
|
|
116
|
+
);
|
|
115
117
|
const providerTools: CapturedProviderTools = new Map();
|
|
116
118
|
const sourceCache: Map<string, WorkflowWebSource> = new Map();
|
|
117
|
-
const fetchInFlight: Map<
|
|
119
|
+
const fetchInFlight: Map<
|
|
120
|
+
string,
|
|
121
|
+
Promise<ReturnType<typeof toolResultFromJson>>
|
|
122
|
+
> = new Map();
|
|
118
123
|
const fetchFailures: Map<string, FetchFailure> = new Map();
|
|
119
124
|
|
|
120
125
|
if (providerExtension) {
|
|
121
|
-
providerExtension(
|
|
126
|
+
providerExtension(
|
|
127
|
+
providerCapturePi(pi, providerTools, Boolean(config.exposeLegacyTools)),
|
|
128
|
+
);
|
|
122
129
|
}
|
|
123
130
|
|
|
124
131
|
pi.registerTool({
|
|
@@ -126,9 +133,15 @@ export function registerWorkflowWebSourceExtension(
|
|
|
126
133
|
description:
|
|
127
134
|
"Search the web through the workflow web-source provider and return compact candidate cards only.",
|
|
128
135
|
parameters: Type.Object({
|
|
129
|
-
query: Type.Optional(
|
|
130
|
-
|
|
131
|
-
|
|
136
|
+
query: Type.Optional(
|
|
137
|
+
Type.String({ description: "Single search query." }),
|
|
138
|
+
),
|
|
139
|
+
queries: Type.Optional(
|
|
140
|
+
Type.Array(Type.String(), { description: "Multiple search queries." }),
|
|
141
|
+
),
|
|
142
|
+
numResults: Type.Optional(
|
|
143
|
+
Type.Number({ description: "Results per query." }),
|
|
144
|
+
),
|
|
132
145
|
}),
|
|
133
146
|
execute: async (toolCallId, params, signal, onUpdate, ctx) => {
|
|
134
147
|
const providerTool = providerTools.get("web_search");
|
|
@@ -150,14 +163,19 @@ export function registerWorkflowWebSourceExtension(
|
|
|
150
163
|
onUpdate,
|
|
151
164
|
ctx,
|
|
152
165
|
);
|
|
153
|
-
const candidates = extractSearchCandidates(result, policy).map(
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
166
|
+
const candidates = extractSearchCandidates(result, policy).map(
|
|
167
|
+
(candidate) => {
|
|
168
|
+
const consumed = consumeText(
|
|
169
|
+
candidate.snippet,
|
|
170
|
+
policy.searchSnippetChars,
|
|
171
|
+
);
|
|
172
|
+
return {
|
|
173
|
+
...candidate,
|
|
174
|
+
snippet: consumed.text,
|
|
175
|
+
budget: consumed.budget,
|
|
176
|
+
};
|
|
177
|
+
},
|
|
178
|
+
);
|
|
161
179
|
await recordWorkflowWebSourceEvent(config, "search", {
|
|
162
180
|
candidateCount: candidates.length,
|
|
163
181
|
visibleChars: budget.used,
|
|
@@ -177,14 +195,44 @@ export function registerWorkflowWebSourceExtension(
|
|
|
177
195
|
description:
|
|
178
196
|
"Fetch one or more URLs into the workflow web-source cache and return compact source cards with sourceRefs.",
|
|
179
197
|
parameters: Type.Object({
|
|
180
|
-
url: Type.Optional(
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
198
|
+
url: Type.Optional(
|
|
199
|
+
Type.String({
|
|
200
|
+
description:
|
|
201
|
+
"Single URL to fetch into the workflow web-source cache.",
|
|
202
|
+
}),
|
|
203
|
+
),
|
|
204
|
+
urls: Type.Optional(
|
|
205
|
+
Type.Array(Type.String(), {
|
|
206
|
+
description:
|
|
207
|
+
"Multiple URLs to fetch in one tool call. Prefer this over repeated fetch calls when caching several promising sources.",
|
|
208
|
+
}),
|
|
209
|
+
),
|
|
210
|
+
sources: Type.Optional(
|
|
211
|
+
Type.Array(
|
|
212
|
+
Type.Object({
|
|
213
|
+
url: Type.String({
|
|
214
|
+
description: "URL to fetch into the workflow web-source cache.",
|
|
215
|
+
}),
|
|
216
|
+
title: Type.Optional(
|
|
217
|
+
Type.String({ description: "Optional source title override." }),
|
|
218
|
+
),
|
|
219
|
+
}),
|
|
220
|
+
{
|
|
221
|
+
description:
|
|
222
|
+
"Multiple URL/title objects to fetch in one tool call.",
|
|
223
|
+
},
|
|
224
|
+
),
|
|
225
|
+
),
|
|
226
|
+
title: Type.Optional(
|
|
227
|
+
Type.String({
|
|
228
|
+
description: "Optional source title override for single-url fetches.",
|
|
229
|
+
}),
|
|
230
|
+
),
|
|
231
|
+
titles: Type.Optional(
|
|
232
|
+
Type.Array(Type.String(), {
|
|
233
|
+
description: "Optional title overrides paired by index with urls.",
|
|
234
|
+
}),
|
|
235
|
+
),
|
|
188
236
|
}),
|
|
189
237
|
execute: async (toolCallId, params, signal, onUpdate, ctx) => {
|
|
190
238
|
const batchRequested = fetchSourceBatchRequested(params);
|
|
@@ -212,10 +260,15 @@ export function registerWorkflowWebSourceExtension(
|
|
|
212
260
|
results.push({
|
|
213
261
|
index,
|
|
214
262
|
url: sanitizeUrlForModel(request.url),
|
|
215
|
-
status:
|
|
263
|
+
status:
|
|
264
|
+
typeof payload.status === "string" ? payload.status : "unknown",
|
|
216
265
|
...(typeof payload.code === "string" ? { code: payload.code } : {}),
|
|
217
|
-
...(typeof payload.message === "string"
|
|
218
|
-
|
|
266
|
+
...(typeof payload.message === "string"
|
|
267
|
+
? { message: payload.message }
|
|
268
|
+
: {}),
|
|
269
|
+
...(typeof card?.sourceRef === "string"
|
|
270
|
+
? { sourceRef: card.sourceRef }
|
|
271
|
+
: {}),
|
|
219
272
|
...(card ? { cardIndex: cards.length - 1 } : {}),
|
|
220
273
|
});
|
|
221
274
|
}
|
|
@@ -239,7 +292,13 @@ export function registerWorkflowWebSourceExtension(
|
|
|
239
292
|
next: "Use returned sourceRefs with workflow_web_source_read; batch snippets with reads:[...] or queries:[...] when possible.",
|
|
240
293
|
});
|
|
241
294
|
}
|
|
242
|
-
return await fetchWorkflowWebSourceOnce(
|
|
295
|
+
return await fetchWorkflowWebSourceOnce(
|
|
296
|
+
toolCallId,
|
|
297
|
+
params,
|
|
298
|
+
signal,
|
|
299
|
+
onUpdate,
|
|
300
|
+
ctx,
|
|
301
|
+
);
|
|
243
302
|
},
|
|
244
303
|
});
|
|
245
304
|
|
|
@@ -250,85 +309,132 @@ export function registerWorkflowWebSourceExtension(
|
|
|
250
309
|
onUpdate?: unknown,
|
|
251
310
|
ctx?: unknown,
|
|
252
311
|
): Promise<ToolResult> {
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
312
|
+
const url = urlFromParams(params);
|
|
313
|
+
if (!url) {
|
|
314
|
+
return errorToolResult(
|
|
315
|
+
"invalid_params",
|
|
316
|
+
"workflow_web_fetch_source requires a url string parameter.",
|
|
317
|
+
);
|
|
318
|
+
}
|
|
319
|
+
const checked = validateWorkflowWebUrl(url, security);
|
|
320
|
+
if (!checked.ok) {
|
|
321
|
+
await recordWorkflowWebSourceEvent(config, "blocked_url", {
|
|
322
|
+
url: sanitizeUrlForModel(url),
|
|
323
|
+
reason: checked.reason,
|
|
324
|
+
});
|
|
325
|
+
return errorToolResult(
|
|
326
|
+
"blocked_url",
|
|
327
|
+
"URL blocked by workflow web-source security policy.",
|
|
328
|
+
{
|
|
267
329
|
reason: checked.reason,
|
|
268
330
|
url: sanitizeUrlForModel(url),
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
const
|
|
313
|
-
|
|
331
|
+
},
|
|
332
|
+
);
|
|
333
|
+
}
|
|
334
|
+
const fetchUrl = canonicalWorkflowWebFetchUrl(checked.normalizedUrl);
|
|
335
|
+
const existing = await findWorkflowWebSourceByUrl(config, fetchUrl);
|
|
336
|
+
if (existing) {
|
|
337
|
+
sourceCache.set(existing.sourceRef, existing);
|
|
338
|
+
const card = buildWorkflowWebSourceCard({
|
|
339
|
+
source: existing,
|
|
340
|
+
policy,
|
|
341
|
+
budget,
|
|
342
|
+
duplicate: true,
|
|
343
|
+
});
|
|
344
|
+
await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
|
|
345
|
+
sourceRef: existing.sourceRef,
|
|
346
|
+
url: existing.redactedUrl,
|
|
347
|
+
visibleChars: budget.used,
|
|
348
|
+
});
|
|
349
|
+
return toolResultFromJson({
|
|
350
|
+
status: "ok",
|
|
351
|
+
tool: "workflow_web_fetch_source",
|
|
352
|
+
card,
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
const fetchKey = sourceUrlCacheKey(fetchUrl);
|
|
356
|
+
const cachedFailure =
|
|
357
|
+
fetchFailures.get(fetchKey) ??
|
|
358
|
+
(await readDurableFetchFailure(config, fetchKey));
|
|
359
|
+
if (cachedFailure) {
|
|
360
|
+
fetchFailures.set(fetchKey, cachedFailure);
|
|
361
|
+
await recordWorkflowWebSourceEvent(config, "fetch_negative_cache_hit", {
|
|
362
|
+
url: sanitizeUrlForModel(fetchUrl),
|
|
363
|
+
code: cachedFailure.code,
|
|
364
|
+
});
|
|
365
|
+
return errorToolResult(
|
|
366
|
+
cachedFailure.code,
|
|
367
|
+
cachedFailure.message,
|
|
368
|
+
cachedFailure.extra,
|
|
369
|
+
);
|
|
370
|
+
}
|
|
371
|
+
const inFlight = fetchInFlight.get(fetchKey);
|
|
372
|
+
if (inFlight) {
|
|
373
|
+
const result = await inFlight;
|
|
374
|
+
const source = await findWorkflowWebSourceByUrl(config, fetchUrl);
|
|
375
|
+
if (!source) return result;
|
|
376
|
+
sourceCache.set(source.sourceRef, source);
|
|
377
|
+
const card = buildWorkflowWebSourceCard({
|
|
378
|
+
source,
|
|
379
|
+
policy,
|
|
380
|
+
budget,
|
|
381
|
+
duplicate: true,
|
|
382
|
+
});
|
|
383
|
+
await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
|
|
384
|
+
sourceRef: source.sourceRef,
|
|
385
|
+
url: source.redactedUrl,
|
|
386
|
+
visibleChars: budget.used,
|
|
387
|
+
});
|
|
388
|
+
return toolResultFromJson({
|
|
389
|
+
status: "ok",
|
|
390
|
+
tool: "workflow_web_fetch_source",
|
|
391
|
+
card,
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
const fetchPromise = withWorkflowWebFetchLock(
|
|
395
|
+
config,
|
|
396
|
+
fetchKey,
|
|
397
|
+
signal,
|
|
398
|
+
async () => {
|
|
399
|
+
const lockedExisting = await findWorkflowWebSourceByUrl(
|
|
400
|
+
config,
|
|
401
|
+
fetchUrl,
|
|
402
|
+
);
|
|
314
403
|
if (lockedExisting) {
|
|
315
404
|
sourceCache.set(lockedExisting.sourceRef, lockedExisting);
|
|
316
|
-
const card = buildWorkflowWebSourceCard({
|
|
405
|
+
const card = buildWorkflowWebSourceCard({
|
|
406
|
+
source: lockedExisting,
|
|
407
|
+
policy,
|
|
408
|
+
budget,
|
|
409
|
+
duplicate: true,
|
|
410
|
+
});
|
|
317
411
|
await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
|
|
318
412
|
sourceRef: lockedExisting.sourceRef,
|
|
319
413
|
url: lockedExisting.redactedUrl,
|
|
320
414
|
visibleChars: budget.used,
|
|
321
415
|
});
|
|
322
|
-
return toolResultFromJson({
|
|
416
|
+
return toolResultFromJson({
|
|
417
|
+
status: "ok",
|
|
418
|
+
tool: "workflow_web_fetch_source",
|
|
419
|
+
card,
|
|
420
|
+
});
|
|
323
421
|
}
|
|
324
422
|
const lockedFailure = await readDurableFetchFailure(config, fetchKey);
|
|
325
423
|
if (lockedFailure) {
|
|
326
424
|
fetchFailures.set(fetchKey, lockedFailure);
|
|
327
|
-
await recordWorkflowWebSourceEvent(
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
425
|
+
await recordWorkflowWebSourceEvent(
|
|
426
|
+
config,
|
|
427
|
+
"fetch_negative_cache_hit",
|
|
428
|
+
{
|
|
429
|
+
url: sanitizeUrlForModel(fetchUrl),
|
|
430
|
+
code: lockedFailure.code,
|
|
431
|
+
},
|
|
432
|
+
);
|
|
433
|
+
return errorToolResult(
|
|
434
|
+
lockedFailure.code,
|
|
435
|
+
lockedFailure.message,
|
|
436
|
+
lockedFailure.extra,
|
|
437
|
+
);
|
|
332
438
|
}
|
|
333
439
|
let text: string;
|
|
334
440
|
let title = titleFromParams(params);
|
|
@@ -345,13 +451,21 @@ export function registerWorkflowWebSourceExtension(
|
|
|
345
451
|
url: sanitizeUrlForModel(safeFetch.url),
|
|
346
452
|
reason: safeFetch.reason,
|
|
347
453
|
});
|
|
348
|
-
return await cachedFetchFailureResult(
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
454
|
+
return await cachedFetchFailureResult(
|
|
455
|
+
config,
|
|
456
|
+
fetchFailures,
|
|
457
|
+
fetchKey,
|
|
458
|
+
{
|
|
459
|
+
code: "blocked_url",
|
|
460
|
+
message:
|
|
461
|
+
"URL was blocked by workflow web-source security policy before content fetch.",
|
|
462
|
+
extra: {
|
|
463
|
+
reason: safeFetch.reason,
|
|
464
|
+
url: sanitizeUrlForModel(safeFetch.url),
|
|
465
|
+
},
|
|
466
|
+
reason: safeFetch.reason,
|
|
467
|
+
},
|
|
468
|
+
);
|
|
355
469
|
}
|
|
356
470
|
text = safeFetch.text;
|
|
357
471
|
title = title ?? safeFetch.title;
|
|
@@ -368,31 +482,44 @@ export function registerWorkflowWebSourceExtension(
|
|
|
368
482
|
return errorToolResult(missing.code, missing.message);
|
|
369
483
|
}
|
|
370
484
|
if (!security.allowPrivateHosts) {
|
|
371
|
-
await recordWorkflowWebSourceEvent(
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
485
|
+
await recordWorkflowWebSourceEvent(
|
|
486
|
+
config,
|
|
487
|
+
"blocked_provider_fetch",
|
|
488
|
+
{
|
|
489
|
+
url: sanitizeUrlForModel(fetchUrl),
|
|
490
|
+
reason: "untrusted_provider_fetch",
|
|
491
|
+
},
|
|
492
|
+
);
|
|
375
493
|
return errorToolResult(
|
|
376
494
|
"untrusted_provider_fetch",
|
|
377
495
|
"Custom provider fetch_content is disabled unless securityPolicy.allowPrivateHosts is true; use the default safe fetch provider or a trusted provider configuration.",
|
|
378
496
|
{ url: sanitizeUrlForModel(fetchUrl) },
|
|
379
497
|
);
|
|
380
498
|
}
|
|
381
|
-
const providerHostCheck = await validateResolvedHost(
|
|
499
|
+
const providerHostCheck = await validateResolvedHost(
|
|
500
|
+
fetchUrl,
|
|
501
|
+
security,
|
|
502
|
+
);
|
|
382
503
|
if (!providerHostCheck.ok) {
|
|
383
504
|
await recordWorkflowWebSourceEvent(config, "blocked_provider_url", {
|
|
384
505
|
url: sanitizeUrlForModel(providerHostCheck.url),
|
|
385
506
|
reason: providerHostCheck.reason,
|
|
386
507
|
});
|
|
387
|
-
return await cachedFetchFailureResult(
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
508
|
+
return await cachedFetchFailureResult(
|
|
509
|
+
config,
|
|
510
|
+
fetchFailures,
|
|
511
|
+
fetchKey,
|
|
512
|
+
{
|
|
513
|
+
code: "blocked_url",
|
|
514
|
+
message:
|
|
515
|
+
"URL was blocked by workflow web-source security policy before provider fetch.",
|
|
516
|
+
extra: {
|
|
517
|
+
reason: providerHostCheck.reason,
|
|
518
|
+
url: sanitizeUrlForModel(providerHostCheck.url),
|
|
519
|
+
},
|
|
391
520
|
reason: providerHostCheck.reason,
|
|
392
|
-
url: sanitizeUrlForModel(providerHostCheck.url),
|
|
393
521
|
},
|
|
394
|
-
|
|
395
|
-
});
|
|
522
|
+
);
|
|
396
523
|
}
|
|
397
524
|
const result = await providerTool.execute(
|
|
398
525
|
toolCallId,
|
|
@@ -401,21 +528,30 @@ export function registerWorkflowWebSourceExtension(
|
|
|
401
528
|
onUpdate,
|
|
402
529
|
ctx,
|
|
403
530
|
);
|
|
404
|
-
const providerUrlCheck = await validateProviderResultUrls(
|
|
531
|
+
const providerUrlCheck = await validateProviderResultUrls(
|
|
532
|
+
result,
|
|
533
|
+
security,
|
|
534
|
+
);
|
|
405
535
|
if (!providerUrlCheck.ok) {
|
|
406
536
|
await recordWorkflowWebSourceEvent(config, "blocked_provider_url", {
|
|
407
537
|
url: sanitizeUrlForModel(providerUrlCheck.url),
|
|
408
538
|
reason: providerUrlCheck.reason,
|
|
409
539
|
});
|
|
410
|
-
return await cachedFetchFailureResult(
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
540
|
+
return await cachedFetchFailureResult(
|
|
541
|
+
config,
|
|
542
|
+
fetchFailures,
|
|
543
|
+
fetchKey,
|
|
544
|
+
{
|
|
545
|
+
code: "blocked_url",
|
|
546
|
+
message:
|
|
547
|
+
"Provider result URL was blocked by workflow web-source security policy.",
|
|
548
|
+
extra: {
|
|
549
|
+
reason: providerUrlCheck.reason,
|
|
550
|
+
url: sanitizeUrlForModel(providerUrlCheck.url),
|
|
551
|
+
},
|
|
414
552
|
reason: providerUrlCheck.reason,
|
|
415
|
-
url: sanitizeUrlForModel(providerUrlCheck.url),
|
|
416
553
|
},
|
|
417
|
-
|
|
418
|
-
});
|
|
554
|
+
);
|
|
419
555
|
}
|
|
420
556
|
text = extractTextFromToolResult(result);
|
|
421
557
|
title = title ?? extractTitleFromToolResult(result);
|
|
@@ -424,12 +560,17 @@ export function registerWorkflowWebSourceExtension(
|
|
|
424
560
|
await recordWorkflowWebSourceEvent(config, "fetch_empty", {
|
|
425
561
|
url: sanitizeUrlForModel(fetchUrl),
|
|
426
562
|
});
|
|
427
|
-
return await cachedFetchFailureResult(
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
563
|
+
return await cachedFetchFailureResult(
|
|
564
|
+
config,
|
|
565
|
+
fetchFailures,
|
|
566
|
+
fetchKey,
|
|
567
|
+
{
|
|
568
|
+
code: "empty_source",
|
|
569
|
+
message: "Provider returned no extractable text for this URL.",
|
|
570
|
+
extra: { url: sanitizeUrlForModel(fetchUrl) },
|
|
571
|
+
reason: "empty_source",
|
|
572
|
+
},
|
|
573
|
+
);
|
|
433
574
|
}
|
|
434
575
|
const source = createWorkflowWebSource({
|
|
435
576
|
config,
|
|
@@ -448,24 +589,37 @@ export function registerWorkflowWebSourceExtension(
|
|
|
448
589
|
textChars: source.textChars,
|
|
449
590
|
visibleChars: budget.used,
|
|
450
591
|
});
|
|
451
|
-
return toolResultFromJson({
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
await recordWorkflowWebSourceEvent(config, "fetch_failed", {
|
|
456
|
-
url: sanitizeUrlForModel(fetchUrl),
|
|
457
|
-
code,
|
|
458
|
-
});
|
|
459
|
-
return errorToolResult(code, "Workflow web-source fetch failed before a source could be cached.", {
|
|
460
|
-
url: sanitizeUrlForModel(fetchUrl),
|
|
592
|
+
return toolResultFromJson({
|
|
593
|
+
status: "ok",
|
|
594
|
+
tool: "workflow_web_fetch_source",
|
|
595
|
+
card,
|
|
461
596
|
});
|
|
597
|
+
},
|
|
598
|
+
).catch(async (error: unknown) => {
|
|
599
|
+
const message =
|
|
600
|
+
error instanceof Error ? error.message : "workflow_web_fetch_failed";
|
|
601
|
+
const code =
|
|
602
|
+
message === "fetch_lock_timeout"
|
|
603
|
+
? "fetch_lock_timeout"
|
|
604
|
+
: "workflow_web_fetch_failed";
|
|
605
|
+
await recordWorkflowWebSourceEvent(config, "fetch_failed", {
|
|
606
|
+
url: sanitizeUrlForModel(fetchUrl),
|
|
607
|
+
code,
|
|
462
608
|
});
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
609
|
+
return errorToolResult(
|
|
610
|
+
code,
|
|
611
|
+
"Workflow web-source fetch failed before a source could be cached.",
|
|
612
|
+
{
|
|
613
|
+
url: sanitizeUrlForModel(fetchUrl),
|
|
614
|
+
},
|
|
615
|
+
);
|
|
616
|
+
});
|
|
617
|
+
fetchInFlight.set(fetchKey, fetchPromise);
|
|
618
|
+
try {
|
|
619
|
+
return await fetchPromise;
|
|
620
|
+
} finally {
|
|
621
|
+
fetchInFlight.delete(fetchKey);
|
|
622
|
+
}
|
|
469
623
|
}
|
|
470
624
|
|
|
471
625
|
pi.registerTool({
|
|
@@ -473,26 +627,90 @@ export function registerWorkflowWebSourceExtension(
|
|
|
473
627
|
description:
|
|
474
628
|
"Read one or more narrow exact/fuzzy/term-matched snippets from a cached workflow web source by sourceRef.",
|
|
475
629
|
parameters: Type.Object({
|
|
476
|
-
sourceRef: Type.String({
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
630
|
+
sourceRef: Type.String({
|
|
631
|
+
description: "Opaque sourceRef returned by workflow_web_fetch_source.",
|
|
632
|
+
}),
|
|
633
|
+
query: Type.Optional(
|
|
634
|
+
Type.String({
|
|
635
|
+
description: "Exact or fuzzy text to locate in the cached source.",
|
|
636
|
+
}),
|
|
637
|
+
),
|
|
638
|
+
queries: Type.Optional(
|
|
639
|
+
Type.Array(Type.String(), {
|
|
640
|
+
description:
|
|
641
|
+
"Multiple exact/fuzzy texts to locate in one cached source. Prefer this over repeated calls when reading several snippets from the same sourceRef.",
|
|
642
|
+
}),
|
|
643
|
+
),
|
|
644
|
+
exact: Type.Optional(
|
|
645
|
+
Type.String({
|
|
646
|
+
description: "Exact text to locate in the cached source.",
|
|
647
|
+
}),
|
|
648
|
+
),
|
|
649
|
+
exactTexts: Type.Optional(
|
|
650
|
+
Type.Array(Type.String(), {
|
|
651
|
+
description: "Multiple exact texts to locate in one cached source.",
|
|
652
|
+
}),
|
|
653
|
+
),
|
|
654
|
+
claim: Type.Optional(
|
|
655
|
+
Type.String({
|
|
656
|
+
description:
|
|
657
|
+
"Claim to locate when the exact quote is not known. Use with terms for deterministic quote harvesting.",
|
|
658
|
+
}),
|
|
659
|
+
),
|
|
660
|
+
terms: Type.Optional(
|
|
661
|
+
Type.Array(Type.String(), {
|
|
662
|
+
description:
|
|
663
|
+
"Important terms that should co-occur in the returned source window.",
|
|
664
|
+
}),
|
|
665
|
+
),
|
|
666
|
+
reads: Type.Optional(
|
|
667
|
+
Type.Array(
|
|
668
|
+
Type.Object({
|
|
669
|
+
query: Type.Optional(
|
|
670
|
+
Type.String({ description: "Exact or fuzzy text to locate." }),
|
|
671
|
+
),
|
|
672
|
+
exact: Type.Optional(
|
|
673
|
+
Type.String({ description: "Exact text to locate." }),
|
|
674
|
+
),
|
|
675
|
+
exactText: Type.Optional(
|
|
676
|
+
Type.String({ description: "Exact text to locate." }),
|
|
677
|
+
),
|
|
678
|
+
text: Type.Optional(
|
|
679
|
+
Type.String({ description: "Text to locate." }),
|
|
680
|
+
),
|
|
681
|
+
claim: Type.Optional(
|
|
682
|
+
Type.String({
|
|
683
|
+
description: "Claim to locate when exact quote is unknown.",
|
|
684
|
+
}),
|
|
685
|
+
),
|
|
686
|
+
terms: Type.Optional(
|
|
687
|
+
Type.Array(Type.String(), {
|
|
688
|
+
description:
|
|
689
|
+
"Important terms for deterministic quote harvesting.",
|
|
690
|
+
}),
|
|
691
|
+
),
|
|
692
|
+
maxChars: Type.Optional(
|
|
693
|
+
Type.Number({
|
|
694
|
+
description:
|
|
695
|
+
"Maximum visible snippet characters for this read.",
|
|
696
|
+
}),
|
|
697
|
+
),
|
|
698
|
+
}),
|
|
699
|
+
{
|
|
700
|
+
description:
|
|
701
|
+
"Mixed batch reads for one sourceRef; each item can use query or claim+terms.",
|
|
702
|
+
},
|
|
703
|
+
),
|
|
704
|
+
),
|
|
705
|
+
maxChars: Type.Optional(
|
|
706
|
+
Type.Number({
|
|
707
|
+
description: "Maximum visible snippet characters per query.",
|
|
708
|
+
}),
|
|
709
|
+
),
|
|
493
710
|
}),
|
|
494
711
|
execute: async (_toolCallId, params) => {
|
|
495
|
-
const sourceRef =
|
|
712
|
+
const sourceRef =
|
|
713
|
+
stringParam(params, "sourceRef") ?? stringParam(params, "source_ref");
|
|
496
714
|
const requests = sourceReadRequestsFromParams(params);
|
|
497
715
|
if (!sourceRef || requests.length === 0) {
|
|
498
716
|
return errorToolResult(
|
|
@@ -502,18 +720,28 @@ export function registerWorkflowWebSourceExtension(
|
|
|
502
720
|
}
|
|
503
721
|
const source = await readCachedWorkflowWebSource(sourceRef);
|
|
504
722
|
if (!source) {
|
|
505
|
-
await recordWorkflowWebSourceEvent(config, "source_read_missing", {
|
|
506
|
-
return errorToolResult("source_not_found", "No cached workflow web source exists for sourceRef.", {
|
|
723
|
+
await recordWorkflowWebSourceEvent(config, "source_read_missing", {
|
|
507
724
|
sourceRef,
|
|
508
725
|
});
|
|
726
|
+
return errorToolResult(
|
|
727
|
+
"source_not_found",
|
|
728
|
+
"No cached workflow web source exists for sourceRef.",
|
|
729
|
+
{
|
|
730
|
+
sourceRef,
|
|
731
|
+
},
|
|
732
|
+
);
|
|
509
733
|
}
|
|
510
|
-
const maxChars =
|
|
734
|
+
const maxChars =
|
|
735
|
+
positiveIntParam(params, "maxChars") ?? policy.sourceReadMaxChars;
|
|
511
736
|
const perQueryMaxChars = Math.min(maxChars, policy.sourceReadMaxChars);
|
|
512
737
|
const reads = readWorkflowWebSourceSnippets({
|
|
513
738
|
source,
|
|
514
739
|
requests: requests.map((request) => ({
|
|
515
740
|
...request,
|
|
516
|
-
maxChars: Math.min(
|
|
741
|
+
maxChars: Math.min(
|
|
742
|
+
request.maxChars ?? perQueryMaxChars,
|
|
743
|
+
policy.sourceReadMaxChars,
|
|
744
|
+
),
|
|
517
745
|
})),
|
|
518
746
|
maxChars: perQueryMaxChars,
|
|
519
747
|
budget,
|
|
@@ -532,14 +760,20 @@ export function registerWorkflowWebSourceExtension(
|
|
|
532
760
|
missingTerms: read.missingTerms,
|
|
533
761
|
coverageRatio: read.coverageRatio,
|
|
534
762
|
candidateOnly: read.candidateOnly,
|
|
763
|
+
truncated: read.truncated,
|
|
535
764
|
quote: status === "budget_exhausted" ? undefined : read.quote,
|
|
536
765
|
startOffset: read.startOffset,
|
|
537
766
|
endOffset: read.endOffset,
|
|
538
767
|
visibleChars: read.visibleChars,
|
|
539
768
|
};
|
|
540
769
|
});
|
|
541
|
-
const responseStatus = aggregateSourceReadStatus(
|
|
542
|
-
|
|
770
|
+
const responseStatus = aggregateSourceReadStatus(
|
|
771
|
+
results.map((result) => result.status),
|
|
772
|
+
);
|
|
773
|
+
const visibleChars = results.reduce(
|
|
774
|
+
(total, result) => total + result.visibleChars,
|
|
775
|
+
0,
|
|
776
|
+
);
|
|
543
777
|
await recordWorkflowWebSourceEvent(config, "source_read", {
|
|
544
778
|
sourceRef,
|
|
545
779
|
status: responseStatus,
|
|
@@ -561,32 +795,48 @@ export function registerWorkflowWebSourceExtension(
|
|
|
561
795
|
missingTerms: result.missingTerms,
|
|
562
796
|
coverageRatio: result.coverageRatio,
|
|
563
797
|
candidateOnly: result.candidateOnly,
|
|
564
|
-
|
|
798
|
+
truncated: result.truncated,
|
|
799
|
+
quote:
|
|
800
|
+
result.status === "budget_exhausted" ? undefined : result.quote,
|
|
565
801
|
startOffset: result.startOffset,
|
|
566
802
|
endOffset: result.endOffset,
|
|
567
|
-
budget: budgetSnapshot(
|
|
803
|
+
budget: budgetSnapshot(
|
|
804
|
+
result.status === "budget_exhausted" ||
|
|
805
|
+
result.status === "truncated",
|
|
806
|
+
),
|
|
568
807
|
next:
|
|
569
808
|
result.status === "budget_exhausted"
|
|
570
809
|
? "Visible web-source budget is exhausted for this task; cite the sourceRef as an evidence gap or use a smaller query in a fresh task."
|
|
571
|
-
:
|
|
810
|
+
: result.status === "truncated"
|
|
811
|
+
? "The matched web-source snippet was truncated by the visible budget or maxChars; use a smaller exact query or a fresh task if the full quote is required."
|
|
812
|
+
: undefined,
|
|
572
813
|
});
|
|
573
814
|
}
|
|
815
|
+
const hasBudgetExhaustedRead = results.some(
|
|
816
|
+
(result) => result.status === "budget_exhausted",
|
|
817
|
+
);
|
|
818
|
+
const hasTruncatedRead = results.some(
|
|
819
|
+
(result) => result.status === "truncated",
|
|
820
|
+
);
|
|
574
821
|
return toolResultFromJson({
|
|
575
822
|
status: responseStatus,
|
|
576
823
|
tool: "workflow_web_source_read",
|
|
577
824
|
sourceRef,
|
|
578
825
|
url: source.redactedUrl,
|
|
579
826
|
results,
|
|
580
|
-
budget: budgetSnapshot(
|
|
581
|
-
next:
|
|
582
|
-
|
|
583
|
-
|
|
827
|
+
budget: budgetSnapshot(hasBudgetExhaustedRead || hasTruncatedRead),
|
|
828
|
+
next: hasBudgetExhaustedRead
|
|
829
|
+
? "Visible web-source budget is exhausted for this task; cite missing quotes as evidence gaps or use smaller query batches in a fresh task."
|
|
830
|
+
: hasTruncatedRead
|
|
831
|
+
? "One or more matched web-source snippets were truncated by the visible budget or maxChars; use smaller exact queries or a fresh task if full quotes are required."
|
|
584
832
|
: undefined,
|
|
585
833
|
});
|
|
586
834
|
},
|
|
587
835
|
});
|
|
588
836
|
|
|
589
|
-
async function readCachedWorkflowWebSource(
|
|
837
|
+
async function readCachedWorkflowWebSource(
|
|
838
|
+
sourceRef: string,
|
|
839
|
+
): Promise<WorkflowWebSource | undefined> {
|
|
590
840
|
const cached = sourceCache.get(sourceRef);
|
|
591
841
|
if (cached) return cached;
|
|
592
842
|
const source = await readWorkflowWebSource(config, sourceRef);
|
|
@@ -669,7 +919,12 @@ async function cachedFetchFailureResult(
|
|
|
669
919
|
config: WorkflowWebSourceCacheConfig,
|
|
670
920
|
cache: Map<string, FetchFailure>,
|
|
671
921
|
key: string,
|
|
672
|
-
failure: {
|
|
922
|
+
failure: {
|
|
923
|
+
code: string;
|
|
924
|
+
message: string;
|
|
925
|
+
extra: Record<string, unknown>;
|
|
926
|
+
reason: string;
|
|
927
|
+
},
|
|
673
928
|
): Promise<ReturnType<typeof toolResultFromJson>> {
|
|
674
929
|
const cached = {
|
|
675
930
|
code: failure.code,
|
|
@@ -687,8 +942,8 @@ async function cachedFetchFailureResult(
|
|
|
687
942
|
return errorToolResult(failure.code, failure.message, failure.extra);
|
|
688
943
|
}
|
|
689
944
|
|
|
690
|
-
const FETCH_LOCK_STALE_MS = 60_000;
|
|
691
|
-
const FETCH_LOCK_WAIT_MS =
|
|
945
|
+
const FETCH_LOCK_STALE_MS = 4 * 60_000;
|
|
946
|
+
const FETCH_LOCK_WAIT_MS = 5 * 60_000;
|
|
692
947
|
|
|
693
948
|
async function withWorkflowWebFetchLock<T>(
|
|
694
949
|
config: WorkflowWebSourceCacheConfig,
|
|
@@ -715,14 +970,15 @@ async function acquireWorkflowWebFetchLock(
|
|
|
715
970
|
for (;;) {
|
|
716
971
|
if (signal?.aborted) throw new Error("aborted");
|
|
717
972
|
try {
|
|
973
|
+
const ownerId = `${process.pid}:${Date.now()}:${Math.random().toString(36).slice(2)}`;
|
|
718
974
|
await mkdir(lockDir);
|
|
719
975
|
await writeFile(
|
|
720
976
|
resolve(lockDir, "owner.json"),
|
|
721
|
-
`${JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString(), key }, null, 2)}\n`,
|
|
977
|
+
`${JSON.stringify({ ownerId, pid: process.pid, createdAt: new Date().toISOString(), key }, null, 2)}\n`,
|
|
722
978
|
"utf8",
|
|
723
979
|
);
|
|
724
980
|
return async () => {
|
|
725
|
-
await
|
|
981
|
+
await releaseWorkflowWebFetchLock(lockDir, ownerId);
|
|
726
982
|
};
|
|
727
983
|
} catch (error) {
|
|
728
984
|
if (!isFileExistsError(error)) throw error;
|
|
@@ -735,6 +991,19 @@ async function acquireWorkflowWebFetchLock(
|
|
|
735
991
|
}
|
|
736
992
|
}
|
|
737
993
|
|
|
994
|
+
async function releaseWorkflowWebFetchLock(
|
|
995
|
+
lockDir: string,
|
|
996
|
+
ownerId: string,
|
|
997
|
+
): Promise<void> {
|
|
998
|
+
try {
|
|
999
|
+
const current = await readFetchLockOwner(lockDir);
|
|
1000
|
+
if (current?.ownerId !== ownerId) return;
|
|
1001
|
+
await rm(lockDir, { recursive: true, force: true });
|
|
1002
|
+
} catch {
|
|
1003
|
+
// Missing or unreadable lock will be retried by the caller.
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
|
|
738
1007
|
async function removeStaleFetchLock(lockDir: string): Promise<void> {
|
|
739
1008
|
try {
|
|
740
1009
|
const current = await stat(lockDir);
|
|
@@ -746,12 +1015,29 @@ async function removeStaleFetchLock(lockDir: string): Promise<void> {
|
|
|
746
1015
|
}
|
|
747
1016
|
}
|
|
748
1017
|
|
|
1018
|
+
async function readFetchLockOwner(
|
|
1019
|
+
lockDir: string,
|
|
1020
|
+
): Promise<{ ownerId?: string } | undefined> {
|
|
1021
|
+
try {
|
|
1022
|
+
const parsed = JSON.parse(
|
|
1023
|
+
await readFile(resolve(lockDir, "owner.json"), "utf8"),
|
|
1024
|
+
) as unknown;
|
|
1025
|
+
return isRecord(parsed) && typeof parsed.ownerId === "string"
|
|
1026
|
+
? { ownerId: parsed.ownerId }
|
|
1027
|
+
: undefined;
|
|
1028
|
+
} catch {
|
|
1029
|
+
return undefined;
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
|
|
749
1033
|
async function readDurableFetchFailure(
|
|
750
1034
|
config: WorkflowWebSourceCacheConfig,
|
|
751
1035
|
key: string,
|
|
752
1036
|
): Promise<FetchFailure | undefined> {
|
|
753
1037
|
try {
|
|
754
|
-
const parsed = JSON.parse(
|
|
1038
|
+
const parsed = JSON.parse(
|
|
1039
|
+
await readFile(fetchFailurePath(config, key), "utf8"),
|
|
1040
|
+
) as unknown;
|
|
755
1041
|
return normalizeFetchFailure(parsed);
|
|
756
1042
|
} catch {
|
|
757
1043
|
return undefined;
|
|
@@ -773,23 +1059,36 @@ async function writeDurableFetchFailure(
|
|
|
773
1059
|
|
|
774
1060
|
function normalizeFetchFailure(value: unknown): FetchFailure | undefined {
|
|
775
1061
|
if (!isRecord(value)) return undefined;
|
|
776
|
-
if (typeof value.code !== "string" || typeof value.message !== "string")
|
|
1062
|
+
if (typeof value.code !== "string" || typeof value.message !== "string")
|
|
1063
|
+
return undefined;
|
|
777
1064
|
const extra = isRecord(value.extra) ? value.extra : {};
|
|
778
1065
|
return {
|
|
779
1066
|
code: value.code,
|
|
780
1067
|
message: value.message,
|
|
781
1068
|
extra,
|
|
782
1069
|
...(typeof value.reason === "string" ? { reason: value.reason } : {}),
|
|
783
|
-
...(typeof value.createdAt === "string"
|
|
1070
|
+
...(typeof value.createdAt === "string"
|
|
1071
|
+
? { createdAt: value.createdAt }
|
|
1072
|
+
: {}),
|
|
784
1073
|
};
|
|
785
1074
|
}
|
|
786
1075
|
|
|
787
|
-
function fetchLockPath(
|
|
1076
|
+
function fetchLockPath(
|
|
1077
|
+
config: WorkflowWebSourceCacheConfig,
|
|
1078
|
+
key: string,
|
|
1079
|
+
): string {
|
|
788
1080
|
return resolve(config.cacheDir, "fetch-locks", fetchCacheFileKey(key));
|
|
789
1081
|
}
|
|
790
1082
|
|
|
791
|
-
function fetchFailurePath(
|
|
792
|
-
|
|
1083
|
+
function fetchFailurePath(
|
|
1084
|
+
config: WorkflowWebSourceCacheConfig,
|
|
1085
|
+
key: string,
|
|
1086
|
+
): string {
|
|
1087
|
+
return resolve(
|
|
1088
|
+
config.cacheDir,
|
|
1089
|
+
"fetch-negative-cache",
|
|
1090
|
+
`${fetchCacheFileKey(key)}.json`,
|
|
1091
|
+
);
|
|
793
1092
|
}
|
|
794
1093
|
|
|
795
1094
|
function fetchCacheFileKey(key: string): string {
|
|
@@ -813,7 +1112,11 @@ function shouldCacheFetchFailure(reason: string): boolean {
|
|
|
813
1112
|
}
|
|
814
1113
|
|
|
815
1114
|
function shouldCacheFetchFailureInMemory(reason: string): boolean {
|
|
816
|
-
return
|
|
1115
|
+
return (
|
|
1116
|
+
reason === "empty_source" ||
|
|
1117
|
+
reason === "dns_resolution_failed" ||
|
|
1118
|
+
reason.includes("ENOTFOUND")
|
|
1119
|
+
);
|
|
817
1120
|
}
|
|
818
1121
|
|
|
819
1122
|
const WORKFLOW_WEB_FETCH_TIMEOUT_MS = 30_000;
|
|
@@ -824,25 +1127,46 @@ async function safeFetchWorkflowWebText(
|
|
|
824
1127
|
security: WorkflowWebSecurityPolicy,
|
|
825
1128
|
signal?: AbortSignal,
|
|
826
1129
|
): Promise<
|
|
827
|
-
| {
|
|
1130
|
+
| {
|
|
1131
|
+
ok: true;
|
|
1132
|
+
url: string;
|
|
1133
|
+
text: string;
|
|
1134
|
+
title?: string;
|
|
1135
|
+
extractionLossy?: boolean;
|
|
1136
|
+
}
|
|
828
1137
|
| { ok: false; reason: string; url: string }
|
|
829
1138
|
> {
|
|
830
1139
|
let current = url;
|
|
831
1140
|
for (let redirectCount = 0; redirectCount < 6; redirectCount += 1) {
|
|
832
1141
|
const checked = validateWorkflowWebUrl(current, security);
|
|
833
1142
|
if (!checked.ok) return { ok: false, reason: checked.reason, url: current };
|
|
834
|
-
const response = await safeFetchOnce(
|
|
1143
|
+
const response = await safeFetchOnce(
|
|
1144
|
+
checked.normalizedUrl,
|
|
1145
|
+
security,
|
|
1146
|
+
signal,
|
|
1147
|
+
);
|
|
835
1148
|
if (!response.ok) return response;
|
|
836
1149
|
if (response.status >= 300 && response.status < 400) {
|
|
837
1150
|
if (!response.location)
|
|
838
|
-
return {
|
|
1151
|
+
return {
|
|
1152
|
+
ok: false,
|
|
1153
|
+
reason: "redirect_without_location",
|
|
1154
|
+
url: checked.normalizedUrl,
|
|
1155
|
+
};
|
|
839
1156
|
current = new URL(response.location, checked.normalizedUrl).href;
|
|
840
1157
|
continue;
|
|
841
1158
|
}
|
|
842
1159
|
if (response.status < 200 || response.status >= 300) {
|
|
843
|
-
return {
|
|
1160
|
+
return {
|
|
1161
|
+
ok: false,
|
|
1162
|
+
reason: `http_${response.status}`,
|
|
1163
|
+
url: checked.normalizedUrl,
|
|
1164
|
+
};
|
|
844
1165
|
}
|
|
845
|
-
const extracted = extractWorkflowWebResponseText(
|
|
1166
|
+
const extracted = extractWorkflowWebResponseText(
|
|
1167
|
+
response.text,
|
|
1168
|
+
response.contentType,
|
|
1169
|
+
);
|
|
846
1170
|
return {
|
|
847
1171
|
ok: true,
|
|
848
1172
|
url: checked.normalizedUrl,
|
|
@@ -859,7 +1183,14 @@ function safeFetchOnce(
|
|
|
859
1183
|
security: WorkflowWebSecurityPolicy,
|
|
860
1184
|
signal?: AbortSignal,
|
|
861
1185
|
): Promise<
|
|
862
|
-
| {
|
|
1186
|
+
| {
|
|
1187
|
+
ok: true;
|
|
1188
|
+
status: number;
|
|
1189
|
+
location?: string;
|
|
1190
|
+
text: string;
|
|
1191
|
+
contentType?: string;
|
|
1192
|
+
truncated?: boolean;
|
|
1193
|
+
}
|
|
863
1194
|
| { ok: false; reason: string; url: string }
|
|
864
1195
|
> {
|
|
865
1196
|
const parsed = new URL(url);
|
|
@@ -868,7 +1199,14 @@ function safeFetchOnce(
|
|
|
868
1199
|
let settled = false;
|
|
869
1200
|
const settle = (
|
|
870
1201
|
result:
|
|
871
|
-
| {
|
|
1202
|
+
| {
|
|
1203
|
+
ok: true;
|
|
1204
|
+
status: number;
|
|
1205
|
+
location?: string;
|
|
1206
|
+
text: string;
|
|
1207
|
+
contentType?: string;
|
|
1208
|
+
truncated?: boolean;
|
|
1209
|
+
}
|
|
872
1210
|
| { ok: false; reason: string; url: string },
|
|
873
1211
|
) => {
|
|
874
1212
|
if (settled) return;
|
|
@@ -880,20 +1218,26 @@ function safeFetchOnce(
|
|
|
880
1218
|
{
|
|
881
1219
|
method: "GET",
|
|
882
1220
|
headers: {
|
|
883
|
-
accept:
|
|
1221
|
+
accept:
|
|
1222
|
+
"text/plain,text/html,application/json,application/xml;q=0.9,*/*;q=0.1",
|
|
884
1223
|
"user-agent": "pi-workflow-web-source/1",
|
|
885
1224
|
},
|
|
886
1225
|
lookup(hostname, options, callback) {
|
|
887
1226
|
lookupPublicAddress(hostname, security)
|
|
888
1227
|
.then((address) => {
|
|
889
1228
|
if (isLookupAllOptions(options)) {
|
|
890
|
-
callback(null, [
|
|
1229
|
+
callback(null, [
|
|
1230
|
+
{ address: address.address, family: address.family },
|
|
1231
|
+
]);
|
|
891
1232
|
return;
|
|
892
1233
|
}
|
|
893
1234
|
callback(null, address.address, address.family);
|
|
894
1235
|
})
|
|
895
1236
|
.catch((error: unknown) => {
|
|
896
|
-
const reason =
|
|
1237
|
+
const reason =
|
|
1238
|
+
error instanceof Error
|
|
1239
|
+
? error.message
|
|
1240
|
+
: "dns_resolution_failed";
|
|
897
1241
|
callback(new Error(reason), "", 4);
|
|
898
1242
|
});
|
|
899
1243
|
},
|
|
@@ -906,7 +1250,12 @@ function safeFetchOnce(
|
|
|
906
1250
|
? res.headers["content-type"][0]
|
|
907
1251
|
: res.headers["content-type"];
|
|
908
1252
|
const status = res.statusCode ?? 0;
|
|
909
|
-
if (
|
|
1253
|
+
if (
|
|
1254
|
+
status >= 200 &&
|
|
1255
|
+
status < 300 &&
|
|
1256
|
+
contentType &&
|
|
1257
|
+
!isWorkflowWebTextContentType(contentType)
|
|
1258
|
+
) {
|
|
910
1259
|
res.resume();
|
|
911
1260
|
settle({ ok: false, reason: "unsupported_content_type", url });
|
|
912
1261
|
return;
|
|
@@ -914,7 +1263,10 @@ function safeFetchOnce(
|
|
|
914
1263
|
res.on("data", (chunk: string) => {
|
|
915
1264
|
if (settled) return;
|
|
916
1265
|
if (text.length + chunk.length > WORKFLOW_WEB_FETCH_MAX_CHARS) {
|
|
917
|
-
text += chunk.slice(
|
|
1266
|
+
text += chunk.slice(
|
|
1267
|
+
0,
|
|
1268
|
+
Math.max(0, WORKFLOW_WEB_FETCH_MAX_CHARS - text.length),
|
|
1269
|
+
);
|
|
918
1270
|
truncated = true;
|
|
919
1271
|
req.destroy(new Error("workflow_fetch_truncated"));
|
|
920
1272
|
return;
|
|
@@ -977,7 +1329,9 @@ async function lookupPublicAddress(
|
|
|
977
1329
|
: privateIpReason(address.address);
|
|
978
1330
|
if (!reason) return address;
|
|
979
1331
|
}
|
|
980
|
-
throw new Error(
|
|
1332
|
+
throw new Error(
|
|
1333
|
+
addresses.length > 0 ? "private_host_blocked" : "dns_resolution_failed",
|
|
1334
|
+
);
|
|
981
1335
|
}
|
|
982
1336
|
|
|
983
1337
|
async function validateResolvedHost(
|
|
@@ -992,7 +1346,10 @@ async function validateResolvedHost(
|
|
|
992
1346
|
return { ok: false, reason: "invalid_url", url };
|
|
993
1347
|
}
|
|
994
1348
|
try {
|
|
995
|
-
const addresses = await lookup(parsed.hostname, {
|
|
1349
|
+
const addresses = await lookup(parsed.hostname, {
|
|
1350
|
+
all: true,
|
|
1351
|
+
verbatim: true,
|
|
1352
|
+
});
|
|
996
1353
|
for (const address of addresses) {
|
|
997
1354
|
const reason = privateIpReason(address.address);
|
|
998
1355
|
if (reason) return { ok: false, reason, url };
|
|
@@ -1015,27 +1372,38 @@ function privateIpReason(address: string): string | undefined {
|
|
|
1015
1372
|
if (hexMapped) {
|
|
1016
1373
|
const high = Number.parseInt(hexMapped[1]!, 16);
|
|
1017
1374
|
const low = Number.parseInt(hexMapped[2]!, 16);
|
|
1018
|
-
return privateIpReason(
|
|
1375
|
+
return privateIpReason(
|
|
1376
|
+
`${high >> 8}.${high & 255}.${low >> 8}.${low & 255}`,
|
|
1377
|
+
);
|
|
1019
1378
|
}
|
|
1020
1379
|
if (isIP(lower) === 4) {
|
|
1021
1380
|
const parts = lower.split(".").map((part) => Number(part));
|
|
1022
|
-
if (
|
|
1381
|
+
if (
|
|
1382
|
+
parts.length !== 4 ||
|
|
1383
|
+
parts.some((part) => !Number.isInteger(part) || part < 0 || part > 255)
|
|
1384
|
+
)
|
|
1385
|
+
return "private_host_blocked";
|
|
1023
1386
|
const [a, b, c, d] = parts as [number, number, number, number];
|
|
1024
|
-
if (a === 0 || a === 10 || a === 127 || a >= 224)
|
|
1387
|
+
if (a === 0 || a === 10 || a === 127 || a >= 224)
|
|
1388
|
+
return "private_host_blocked";
|
|
1025
1389
|
if (a === 100 && b >= 64 && b <= 127) return "private_host_blocked";
|
|
1026
1390
|
if (a === 169 && b === 254) return "private_host_blocked";
|
|
1027
1391
|
if (a === 172 && b >= 16 && b <= 31) return "private_host_blocked";
|
|
1028
1392
|
if (a === 192 && b === 168) return "private_host_blocked";
|
|
1029
|
-
if (a === 192 && b === 0 && (c === 0 || c === 2))
|
|
1393
|
+
if (a === 192 && b === 0 && (c === 0 || c === 2))
|
|
1394
|
+
return "private_host_blocked";
|
|
1030
1395
|
if (a === 198 && (b === 18 || b === 19)) return "private_host_blocked";
|
|
1031
1396
|
if (a === 198 && b === 51 && c === 100) return "private_host_blocked";
|
|
1032
1397
|
if (a === 203 && b === 0 && c === 113) return "private_host_blocked";
|
|
1033
|
-
if (a === 255 && b === 255 && c === 255 && d === 255)
|
|
1398
|
+
if (a === 255 && b === 255 && c === 255 && d === 255)
|
|
1399
|
+
return "private_host_blocked";
|
|
1034
1400
|
}
|
|
1035
1401
|
if (isIP(lower) === 6) {
|
|
1036
1402
|
if (lower === "::" || lower === "::1") return "private_host_blocked";
|
|
1037
|
-
if (lower.startsWith("fc") || lower.startsWith("fd"))
|
|
1038
|
-
|
|
1403
|
+
if (lower.startsWith("fc") || lower.startsWith("fd"))
|
|
1404
|
+
return "private_host_blocked";
|
|
1405
|
+
if (lower.startsWith("fe80") || lower.startsWith("ff"))
|
|
1406
|
+
return "private_host_blocked";
|
|
1039
1407
|
if (lower.startsWith("2001:db8")) return "private_host_blocked";
|
|
1040
1408
|
}
|
|
1041
1409
|
return undefined;
|
|
@@ -1048,7 +1416,10 @@ async function validateProviderResultUrls(
|
|
|
1048
1416
|
for (const url of providerResultUrls(result)) {
|
|
1049
1417
|
const checked = validateWorkflowWebUrl(url, security);
|
|
1050
1418
|
if (!checked.ok) return { ok: false, reason: checked.reason, url };
|
|
1051
|
-
const resolved = await validateResolvedHost(
|
|
1419
|
+
const resolved = await validateResolvedHost(
|
|
1420
|
+
checked.normalizedUrl,
|
|
1421
|
+
security,
|
|
1422
|
+
);
|
|
1052
1423
|
if (!resolved.ok) return resolved;
|
|
1053
1424
|
}
|
|
1054
1425
|
if (!security.allowPrivateHosts) {
|
|
@@ -1151,11 +1522,12 @@ function canonicalWorkflowWebFetchUrl(url: string): string {
|
|
|
1151
1522
|
if (parsed.pathname.length > 1 && parsed.pathname.endsWith("/")) {
|
|
1152
1523
|
parsed.pathname = parsed.pathname.slice(0, -1);
|
|
1153
1524
|
}
|
|
1154
|
-
const sortedParams = [...parsed.searchParams.entries()].sort(
|
|
1155
|
-
left.localeCompare(right),
|
|
1525
|
+
const sortedParams = [...parsed.searchParams.entries()].sort(
|
|
1526
|
+
([left], [right]) => left.localeCompare(right),
|
|
1156
1527
|
);
|
|
1157
1528
|
parsed.search = "";
|
|
1158
|
-
for (const [key, value] of sortedParams)
|
|
1529
|
+
for (const [key, value] of sortedParams)
|
|
1530
|
+
parsed.searchParams.append(key, value);
|
|
1159
1531
|
return parsed.href;
|
|
1160
1532
|
}
|
|
1161
1533
|
|
|
@@ -1165,13 +1537,20 @@ function shouldKeepWorkflowWebFragment(hash: string): boolean {
|
|
|
1165
1537
|
return raw.startsWith("/") || raw.startsWith("!") || raw.includes("?");
|
|
1166
1538
|
}
|
|
1167
1539
|
|
|
1168
|
-
function fetchSourceRequestsFromParams(
|
|
1540
|
+
function fetchSourceRequestsFromParams(
|
|
1541
|
+
params: unknown,
|
|
1542
|
+
): WorkflowWebFetchSourceRequest[] {
|
|
1169
1543
|
if (!isRecord(params)) return [];
|
|
1170
1544
|
const requests: WorkflowWebFetchSourceRequest[] = [];
|
|
1171
1545
|
const titles = Array.isArray(params.titles) ? params.titles : [];
|
|
1172
1546
|
if (Array.isArray(params.sources)) {
|
|
1173
1547
|
for (const source of params.sources) {
|
|
1174
|
-
if (
|
|
1548
|
+
if (
|
|
1549
|
+
!isRecord(source) ||
|
|
1550
|
+
typeof source.url !== "string" ||
|
|
1551
|
+
!source.url.trim()
|
|
1552
|
+
)
|
|
1553
|
+
continue;
|
|
1175
1554
|
requests.push({
|
|
1176
1555
|
url: source.url.trim(),
|
|
1177
1556
|
...(typeof source.title === "string" && source.title.trim()
|
|
@@ -1186,7 +1565,9 @@ function fetchSourceRequestsFromParams(params: unknown): WorkflowWebFetchSourceR
|
|
|
1186
1565
|
const title = titles[index];
|
|
1187
1566
|
requests.push({
|
|
1188
1567
|
url: url.trim(),
|
|
1189
|
-
...(typeof title === "string" && title.trim()
|
|
1568
|
+
...(typeof title === "string" && title.trim()
|
|
1569
|
+
? { title: title.trim() }
|
|
1570
|
+
: {}),
|
|
1190
1571
|
});
|
|
1191
1572
|
}
|
|
1192
1573
|
}
|
|
@@ -1201,7 +1582,9 @@ function fetchSourceRequestsFromParams(params: unknown): WorkflowWebFetchSourceR
|
|
|
1201
1582
|
return dedupeFetchSourceRequests(requests).slice(0, 20);
|
|
1202
1583
|
}
|
|
1203
1584
|
|
|
1204
|
-
function dedupeFetchSourceRequests(
|
|
1585
|
+
function dedupeFetchSourceRequests(
|
|
1586
|
+
requests: WorkflowWebFetchSourceRequest[],
|
|
1587
|
+
): WorkflowWebFetchSourceRequest[] {
|
|
1205
1588
|
const deduped: WorkflowWebFetchSourceRequest[] = [];
|
|
1206
1589
|
const seen = new Set<string>();
|
|
1207
1590
|
for (const request of requests) {
|
|
@@ -1214,7 +1597,9 @@ function dedupeFetchSourceRequests(requests: WorkflowWebFetchSourceRequest[]): W
|
|
|
1214
1597
|
}
|
|
1215
1598
|
|
|
1216
1599
|
function payloadFromToolResult(result: ToolResult): Record<string, unknown> {
|
|
1217
|
-
const text = result.content?.find(
|
|
1600
|
+
const text = result.content?.find(
|
|
1601
|
+
(item) => typeof item.text === "string",
|
|
1602
|
+
)?.text;
|
|
1218
1603
|
if (typeof text !== "string") return {};
|
|
1219
1604
|
try {
|
|
1220
1605
|
const payload = JSON.parse(text);
|
|
@@ -1237,7 +1622,9 @@ function titleFromParams(params: unknown): string | undefined {
|
|
|
1237
1622
|
return stringParam(params, "title");
|
|
1238
1623
|
}
|
|
1239
1624
|
|
|
1240
|
-
function sourceReadRequestsFromParams(
|
|
1625
|
+
function sourceReadRequestsFromParams(
|
|
1626
|
+
params: unknown,
|
|
1627
|
+
): WorkflowWebSourceReadRequest[] {
|
|
1241
1628
|
const requests: WorkflowWebSourceReadRequest[] = [];
|
|
1242
1629
|
if (isRecord(params) && Array.isArray(params.reads)) {
|
|
1243
1630
|
for (const item of params.reads) {
|
|
@@ -1245,9 +1632,12 @@ function sourceReadRequestsFromParams(params: unknown): WorkflowWebSourceReadReq
|
|
|
1245
1632
|
if (request) requests.push(request);
|
|
1246
1633
|
}
|
|
1247
1634
|
}
|
|
1248
|
-
for (const query of stringArrayParam(params, "queries"))
|
|
1249
|
-
|
|
1250
|
-
for (const query of stringArrayParam(params, "
|
|
1635
|
+
for (const query of stringArrayParam(params, "queries"))
|
|
1636
|
+
requests.push({ query });
|
|
1637
|
+
for (const query of stringArrayParam(params, "exactTexts"))
|
|
1638
|
+
requests.push({ query });
|
|
1639
|
+
for (const query of stringArrayParam(params, "texts"))
|
|
1640
|
+
requests.push({ query });
|
|
1251
1641
|
const query =
|
|
1252
1642
|
stringParam(params, "query") ??
|
|
1253
1643
|
stringParam(params, "exactText") ??
|
|
@@ -1255,11 +1645,14 @@ function sourceReadRequestsFromParams(params: unknown): WorkflowWebSourceReadReq
|
|
|
1255
1645
|
stringParam(params, "text");
|
|
1256
1646
|
const claim = stringParam(params, "claim");
|
|
1257
1647
|
const terms = stringArrayParam(params, "terms");
|
|
1258
|
-
if (query || claim || terms.length > 0)
|
|
1648
|
+
if (query || claim || terms.length > 0)
|
|
1649
|
+
requests.push({ query, claim, terms });
|
|
1259
1650
|
return dedupeSourceReadRequests(requests).slice(0, 20);
|
|
1260
1651
|
}
|
|
1261
1652
|
|
|
1262
|
-
function sourceReadRequestFromRecord(
|
|
1653
|
+
function sourceReadRequestFromRecord(
|
|
1654
|
+
value: unknown,
|
|
1655
|
+
): WorkflowWebSourceReadRequest | undefined {
|
|
1263
1656
|
if (!isRecord(value)) return undefined;
|
|
1264
1657
|
const query =
|
|
1265
1658
|
stringParam(value, "query") ??
|
|
@@ -1273,7 +1666,9 @@ function sourceReadRequestFromRecord(value: unknown): WorkflowWebSourceReadReque
|
|
|
1273
1666
|
return { query, claim, terms, maxChars };
|
|
1274
1667
|
}
|
|
1275
1668
|
|
|
1276
|
-
function dedupeSourceReadRequests(
|
|
1669
|
+
function dedupeSourceReadRequests(
|
|
1670
|
+
requests: WorkflowWebSourceReadRequest[],
|
|
1671
|
+
): WorkflowWebSourceReadRequest[] {
|
|
1277
1672
|
const deduped: WorkflowWebSourceReadRequest[] = [];
|
|
1278
1673
|
const seen = new Set<string>();
|
|
1279
1674
|
for (const request of requests) {
|
|
@@ -1292,18 +1687,27 @@ function dedupeSourceReadRequests(requests: WorkflowWebSourceReadRequest[]): Wor
|
|
|
1292
1687
|
|
|
1293
1688
|
function sourceReadBatchRequested(params: unknown): boolean {
|
|
1294
1689
|
return (
|
|
1295
|
-
(isRecord(params) &&
|
|
1690
|
+
(isRecord(params) &&
|
|
1691
|
+
Array.isArray(params.reads) &&
|
|
1692
|
+
params.reads.length > 0) ||
|
|
1296
1693
|
stringArrayParam(params, "queries").length > 0 ||
|
|
1297
1694
|
stringArrayParam(params, "exactTexts").length > 0 ||
|
|
1298
1695
|
stringArrayParam(params, "texts").length > 0
|
|
1299
1696
|
);
|
|
1300
1697
|
}
|
|
1301
1698
|
|
|
1302
|
-
type SourceReadToolStatus =
|
|
1699
|
+
type SourceReadToolStatus =
|
|
1700
|
+
| "ok"
|
|
1701
|
+
| "candidate"
|
|
1702
|
+
| "truncated"
|
|
1703
|
+
| "budget_exhausted"
|
|
1704
|
+
| "not_found";
|
|
1303
1705
|
|
|
1304
1706
|
function sourceReadResponseStatus(
|
|
1305
1707
|
read: WorkflowWebSourceReadResult,
|
|
1306
1708
|
): SourceReadToolStatus {
|
|
1709
|
+
if (read.status === "truncated" && !read.quote) return "budget_exhausted";
|
|
1710
|
+
if (read.status === "truncated") return "truncated";
|
|
1307
1711
|
if (read.status === "matched" && !read.quote) return "budget_exhausted";
|
|
1308
1712
|
if (read.status === "matched" && read.candidateOnly) return "candidate";
|
|
1309
1713
|
if (read.status === "matched") return "ok";
|
|
@@ -1312,11 +1716,19 @@ function sourceReadResponseStatus(
|
|
|
1312
1716
|
|
|
1313
1717
|
function aggregateSourceReadStatus(
|
|
1314
1718
|
statuses: SourceReadToolStatus[],
|
|
1315
|
-
):
|
|
1719
|
+
):
|
|
1720
|
+
| "ok"
|
|
1721
|
+
| "candidate"
|
|
1722
|
+
| "partial"
|
|
1723
|
+
| "truncated"
|
|
1724
|
+
| "budget_exhausted"
|
|
1725
|
+
| "not_found" {
|
|
1316
1726
|
if (statuses.every((status) => status === "ok")) return "ok";
|
|
1317
1727
|
if (statuses.every((status) => status === "candidate")) return "candidate";
|
|
1728
|
+
if (statuses.every((status) => status === "truncated")) return "truncated";
|
|
1318
1729
|
if (statuses.every((status) => status === "not_found")) return "not_found";
|
|
1319
|
-
if (statuses.every((status) => status === "budget_exhausted"))
|
|
1730
|
+
if (statuses.every((status) => status === "budget_exhausted"))
|
|
1731
|
+
return "budget_exhausted";
|
|
1320
1732
|
return "partial";
|
|
1321
1733
|
}
|
|
1322
1734
|
|
|
@@ -1345,18 +1757,25 @@ function positiveIntParam(params: unknown, key: string): number | undefined {
|
|
|
1345
1757
|
}
|
|
1346
1758
|
|
|
1347
1759
|
function isWorkflowWebTextContentType(contentType: string): boolean {
|
|
1348
|
-
return /^(text\/|application\/(json|xml|xhtml\+xml|ld\+json)|[^;]+\+json\b|[^;]+\+xml\b)/i.test(
|
|
1760
|
+
return /^(text\/|application\/(json|xml|xhtml\+xml|ld\+json)|[^;]+\+json\b|[^;]+\+xml\b)/i.test(
|
|
1761
|
+
contentType.trim(),
|
|
1762
|
+
);
|
|
1349
1763
|
}
|
|
1350
1764
|
|
|
1351
1765
|
function extractWorkflowWebResponseText(
|
|
1352
1766
|
text: string,
|
|
1353
1767
|
contentType?: string,
|
|
1354
1768
|
): { text: string; title?: string; lossy?: boolean } {
|
|
1355
|
-
const looksHtml =
|
|
1769
|
+
const looksHtml =
|
|
1770
|
+
/html/i.test(contentType ?? "") ||
|
|
1771
|
+
/<html[\s>]|<body[\s>]|<title[\s>]/i.test(text);
|
|
1356
1772
|
if (!looksHtml) {
|
|
1357
1773
|
return { text, title: titleFromPlainText(text) };
|
|
1358
1774
|
}
|
|
1359
|
-
const title =
|
|
1775
|
+
const title =
|
|
1776
|
+
decodeHtmlEntities(
|
|
1777
|
+
text.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1]?.trim() ?? "",
|
|
1778
|
+
).slice(0, 200) || undefined;
|
|
1360
1779
|
const body = text
|
|
1361
1780
|
.replace(/<script\b[\s\S]*?<\/script>/gi, " ")
|
|
1362
1781
|
.replace(/<style\b[\s\S]*?<\/style>/gi, " ")
|
|
@@ -1407,5 +1826,8 @@ function extensionImportSpecifier(importPath: string): string {
|
|
|
1407
1826
|
}
|
|
1408
1827
|
|
|
1409
1828
|
export function workflowWebSourceModuleImportPath(modulePath: string): string {
|
|
1410
|
-
return resolve(
|
|
1829
|
+
return resolve(
|
|
1830
|
+
dirname(modulePath),
|
|
1831
|
+
`workflow-web-source-extension${extname(modulePath)}`,
|
|
1832
|
+
);
|
|
1411
1833
|
}
|