@agwab/pi-workflow 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/artifact-graph-runtime.d.ts +1 -1
- package/dist/artifact-graph-runtime.js +10 -5
- package/dist/artifact-graph-schema.js +127 -5
- package/dist/compiler.js +52 -19
- package/dist/dynamic-generated-task-runtime.js +3 -1
- package/dist/dynamic-profiles.d.ts +1 -1
- package/dist/engine-run-graph.d.ts +3 -0
- package/dist/engine-run-graph.js +194 -4
- package/dist/engine.d.ts +5 -0
- package/dist/engine.js +389 -41
- package/dist/extension.d.ts +2 -1
- package/dist/extension.js +30 -8
- package/dist/index.d.ts +11 -3
- package/dist/index.js +6 -1
- package/dist/prompt-json.d.ts +7 -0
- package/dist/prompt-json.js +13 -0
- package/dist/roles.d.ts +1 -1
- package/dist/roles.js +5 -8
- package/dist/store.d.ts +20 -1
- package/dist/store.js +139 -35
- package/dist/strings.d.ts +11 -0
- package/dist/strings.js +24 -0
- package/dist/subagent-backend.js +710 -40
- package/dist/types.d.ts +107 -1
- package/dist/verification-ontology.d.ts +31 -0
- package/dist/verification-ontology.js +66 -0
- package/dist/workflow-artifact-tool.js +5 -6
- package/dist/workflow-artifacts.d.ts +7 -0
- package/dist/workflow-artifacts.js +55 -4
- package/dist/workflow-fetch-cache-extension.d.ts +1 -0
- package/dist/workflow-fetch-cache-extension.js +57 -9
- package/dist/workflow-metrics.d.ts +113 -0
- package/dist/workflow-metrics.js +272 -0
- package/dist/workflow-output-artifacts.js +5 -3
- package/dist/workflow-partial-output.d.ts +45 -0
- package/dist/workflow-partial-output.js +205 -0
- package/dist/workflow-progress-health.js +42 -10
- package/dist/workflow-runtime.js +10 -1
- package/dist/workflow-view.js +3 -1
- package/dist/workflow-web-source-extension.js +194 -52
- package/dist/workflow-web-source.d.ts +2 -1
- package/dist/workflow-web-source.js +109 -30
- package/docs/usage.md +76 -29
- package/node_modules/@agwab/pi-subagent/README.md +3 -3
- package/node_modules/@agwab/pi-subagent/api.mjs +1 -0
- package/node_modules/@agwab/pi-subagent/docs/usage.md +63 -12
- package/node_modules/@agwab/pi-subagent/package.json +2 -2
- package/node_modules/@agwab/pi-subagent/src/api.ts +54 -1
- package/node_modules/@agwab/pi-subagent/src/artifacts/registry.ts +9 -4
- package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +8 -0
- package/node_modules/@agwab/pi-subagent/src/core/constants.ts +9 -0
- package/node_modules/@agwab/pi-subagent/src/core/validation.ts +21 -0
- package/node_modules/@agwab/pi-subagent/src/index.ts +1046 -576
- package/node_modules/@agwab/pi-subagent/src/orchestrate/async.ts +279 -156
- package/node_modules/@agwab/pi-subagent/src/orchestrate/interrupt.ts +165 -89
- package/node_modules/@agwab/pi-subagent/src/orchestrate/reconcile.ts +111 -65
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run-ref.ts +219 -0
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +88 -8
- package/node_modules/@agwab/pi-subagent/src/orchestrate/status.ts +614 -298
- package/node_modules/@agwab/pi-subagent/src/panel.ts +1356 -560
- package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +53 -5
- package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +13 -6
- package/package.json +2 -2
- package/skills/workflow-guide/SKILL.md +1 -0
- package/src/artifact-graph-runtime.ts +19 -13
- package/src/artifact-graph-schema.ts +143 -3
- package/src/cli.mjs +52 -0
- package/src/compiler.ts +63 -18
- package/src/dynamic-generated-task-runtime.ts +3 -1
- package/src/dynamic-profiles.ts +1 -1
- package/src/engine-run-graph.ts +246 -4
- package/src/engine.ts +545 -38
- package/src/extension.ts +36 -6
- package/src/index.ts +52 -1
- package/src/prompt-json.ts +13 -0
- package/src/roles.ts +6 -9
- package/src/store.ts +194 -42
- package/src/strings.ts +38 -0
- package/src/subagent-backend.ts +921 -62
- package/src/types.ts +116 -2
- package/src/verification-ontology.ts +88 -0
- package/src/workflow-artifact-tool.ts +5 -7
- package/src/workflow-artifacts.ts +83 -3
- package/src/workflow-fetch-cache-extension.ts +78 -13
- package/src/workflow-metrics.ts +478 -0
- package/src/workflow-output-artifacts.ts +5 -3
- package/src/workflow-partial-output.ts +299 -0
- package/src/workflow-progress-health.ts +47 -15
- package/src/workflow-runtime.ts +18 -2
- package/src/workflow-view.ts +2 -1
- package/src/workflow-web-source-extension.ts +654 -232
- package/src/workflow-web-source.ts +153 -39
- package/workflows/README.md +7 -25
- package/workflows/deep-research/batched-verification.spec.json +253 -0
- package/workflows/deep-research/helpers/batch-verification-candidates.mjs +136 -0
- package/workflows/deep-research/helpers/claim-evidence-gate.mjs +229 -36
- package/workflows/deep-research/helpers/final-audit-packet.mjs +1 -4
- package/workflows/deep-research/helpers/normalize-input-packet.mjs +81 -2
- package/workflows/deep-research/helpers/render-executive.mjs +40 -26
- package/workflows/deep-research/helpers/sanitize-verification-candidates.mjs +89 -15
- package/workflows/deep-research/helpers/shadow-select-verification.mjs +229 -0
- package/workflows/deep-research/helpers/verification-ontology.mjs +77 -0
- package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +3 -3
- package/workflows/deep-research/schemas/deep-research-research-questions-control.schema.json +38 -0
- package/workflows/deep-research/schemas/deep-research-sanitize-claims-control.schema.json +63 -0
- package/workflows/deep-research/schemas/deep-research-verify-claims-batch-control.schema.json +47 -0
- package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +13 -3
- package/workflows/deep-research/spec.json +32 -12
- package/workflows/impact-review/spec.json +3 -3
- package/workflows/spec-review/helpers/spec-review-pipeline.mjs +1 -8
- package/dist/dynamic-loader.d.ts +0 -25
- package/dist/dynamic-loader.js +0 -13
- package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stderr +0 -0
- package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stdout +0 -13
- package/src/dynamic-loader.ts +0 -49
- package/workflows/impact-review/schemas/docs-release-impact-control.schema.json +0 -42
- package/workflows/impact-review/schemas/security-performance-impact-control.schema.json +0 -42
- package/workflows/impact-review/schemas/state-data-impact-control.schema.json +0 -42
package/dist/workflow-view.js
CHANGED
|
@@ -953,12 +953,14 @@ function statusForSummary(summary) {
|
|
|
953
953
|
return "running";
|
|
954
954
|
if (summary.blocked > 0)
|
|
955
955
|
return "blocked";
|
|
956
|
-
if (summary.failed > 0
|
|
956
|
+
if (summary.failed > 0)
|
|
957
957
|
return "failed";
|
|
958
958
|
if (summary.pending > 0)
|
|
959
959
|
return "pending";
|
|
960
960
|
if (summary.total > 0 && summary.completed === summary.total)
|
|
961
961
|
return "completed";
|
|
962
|
+
if (summary.interrupted > 0)
|
|
963
|
+
return "interrupted";
|
|
962
964
|
return "interrupted";
|
|
963
965
|
}
|
|
964
966
|
function taskElapsed(task) {
|
|
@@ -72,14 +72,26 @@ export function registerWorkflowWebSourceExtension(pi, config, providerExtension
|
|
|
72
72
|
name: "workflow_web_fetch_source",
|
|
73
73
|
description: "Fetch one or more URLs into the workflow web-source cache and return compact source cards with sourceRefs.",
|
|
74
74
|
parameters: Type.Object({
|
|
75
|
-
url: Type.Optional(Type.String({
|
|
76
|
-
|
|
75
|
+
url: Type.Optional(Type.String({
|
|
76
|
+
description: "Single URL to fetch into the workflow web-source cache.",
|
|
77
|
+
})),
|
|
78
|
+
urls: Type.Optional(Type.Array(Type.String(), {
|
|
79
|
+
description: "Multiple URLs to fetch in one tool call. Prefer this over repeated fetch calls when caching several promising sources.",
|
|
80
|
+
})),
|
|
77
81
|
sources: Type.Optional(Type.Array(Type.Object({
|
|
78
|
-
url: Type.String({
|
|
82
|
+
url: Type.String({
|
|
83
|
+
description: "URL to fetch into the workflow web-source cache.",
|
|
84
|
+
}),
|
|
79
85
|
title: Type.Optional(Type.String({ description: "Optional source title override." })),
|
|
80
|
-
}), {
|
|
81
|
-
|
|
82
|
-
|
|
86
|
+
}), {
|
|
87
|
+
description: "Multiple URL/title objects to fetch in one tool call.",
|
|
88
|
+
})),
|
|
89
|
+
title: Type.Optional(Type.String({
|
|
90
|
+
description: "Optional source title override for single-url fetches.",
|
|
91
|
+
})),
|
|
92
|
+
titles: Type.Optional(Type.Array(Type.String(), {
|
|
93
|
+
description: "Optional title overrides paired by index with urls.",
|
|
94
|
+
})),
|
|
83
95
|
}),
|
|
84
96
|
execute: async (toolCallId, params, signal, onUpdate, ctx) => {
|
|
85
97
|
const batchRequested = fetchSourceBatchRequested(params);
|
|
@@ -101,8 +113,12 @@ export function registerWorkflowWebSourceExtension(pi, config, providerExtension
|
|
|
101
113
|
url: sanitizeUrlForModel(request.url),
|
|
102
114
|
status: typeof payload.status === "string" ? payload.status : "unknown",
|
|
103
115
|
...(typeof payload.code === "string" ? { code: payload.code } : {}),
|
|
104
|
-
...(typeof payload.message === "string"
|
|
105
|
-
|
|
116
|
+
...(typeof payload.message === "string"
|
|
117
|
+
? { message: payload.message }
|
|
118
|
+
: {}),
|
|
119
|
+
...(typeof card?.sourceRef === "string"
|
|
120
|
+
? { sourceRef: card.sourceRef }
|
|
121
|
+
: {}),
|
|
106
122
|
...(card ? { cardIndex: cards.length - 1 } : {}),
|
|
107
123
|
});
|
|
108
124
|
}
|
|
@@ -159,10 +175,15 @@ export function registerWorkflowWebSourceExtension(pi, config, providerExtension
|
|
|
159
175
|
url: existing.redactedUrl,
|
|
160
176
|
visibleChars: budget.used,
|
|
161
177
|
});
|
|
162
|
-
return toolResultFromJson({
|
|
178
|
+
return toolResultFromJson({
|
|
179
|
+
status: "ok",
|
|
180
|
+
tool: "workflow_web_fetch_source",
|
|
181
|
+
card,
|
|
182
|
+
});
|
|
163
183
|
}
|
|
164
184
|
const fetchKey = sourceUrlCacheKey(fetchUrl);
|
|
165
|
-
const cachedFailure = fetchFailures.get(fetchKey) ??
|
|
185
|
+
const cachedFailure = fetchFailures.get(fetchKey) ??
|
|
186
|
+
(await readDurableFetchFailure(config, fetchKey));
|
|
166
187
|
if (cachedFailure) {
|
|
167
188
|
fetchFailures.set(fetchKey, cachedFailure);
|
|
168
189
|
await recordWorkflowWebSourceEvent(config, "fetch_negative_cache_hit", {
|
|
@@ -178,25 +199,43 @@ export function registerWorkflowWebSourceExtension(pi, config, providerExtension
|
|
|
178
199
|
if (!source)
|
|
179
200
|
return result;
|
|
180
201
|
sourceCache.set(source.sourceRef, source);
|
|
181
|
-
const card = buildWorkflowWebSourceCard({
|
|
202
|
+
const card = buildWorkflowWebSourceCard({
|
|
203
|
+
source,
|
|
204
|
+
policy,
|
|
205
|
+
budget,
|
|
206
|
+
duplicate: true,
|
|
207
|
+
});
|
|
182
208
|
await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
|
|
183
209
|
sourceRef: source.sourceRef,
|
|
184
210
|
url: source.redactedUrl,
|
|
185
211
|
visibleChars: budget.used,
|
|
186
212
|
});
|
|
187
|
-
return toolResultFromJson({
|
|
213
|
+
return toolResultFromJson({
|
|
214
|
+
status: "ok",
|
|
215
|
+
tool: "workflow_web_fetch_source",
|
|
216
|
+
card,
|
|
217
|
+
});
|
|
188
218
|
}
|
|
189
219
|
const fetchPromise = withWorkflowWebFetchLock(config, fetchKey, signal, async () => {
|
|
190
220
|
const lockedExisting = await findWorkflowWebSourceByUrl(config, fetchUrl);
|
|
191
221
|
if (lockedExisting) {
|
|
192
222
|
sourceCache.set(lockedExisting.sourceRef, lockedExisting);
|
|
193
|
-
const card = buildWorkflowWebSourceCard({
|
|
223
|
+
const card = buildWorkflowWebSourceCard({
|
|
224
|
+
source: lockedExisting,
|
|
225
|
+
policy,
|
|
226
|
+
budget,
|
|
227
|
+
duplicate: true,
|
|
228
|
+
});
|
|
194
229
|
await recordWorkflowWebSourceEvent(config, "fetch_duplicate", {
|
|
195
230
|
sourceRef: lockedExisting.sourceRef,
|
|
196
231
|
url: lockedExisting.redactedUrl,
|
|
197
232
|
visibleChars: budget.used,
|
|
198
233
|
});
|
|
199
|
-
return toolResultFromJson({
|
|
234
|
+
return toolResultFromJson({
|
|
235
|
+
status: "ok",
|
|
236
|
+
tool: "workflow_web_fetch_source",
|
|
237
|
+
card,
|
|
238
|
+
});
|
|
200
239
|
}
|
|
201
240
|
const lockedFailure = await readDurableFetchFailure(config, fetchKey);
|
|
202
241
|
if (lockedFailure) {
|
|
@@ -221,7 +260,10 @@ export function registerWorkflowWebSourceExtension(pi, config, providerExtension
|
|
|
221
260
|
return await cachedFetchFailureResult(config, fetchFailures, fetchKey, {
|
|
222
261
|
code: "blocked_url",
|
|
223
262
|
message: "URL was blocked by workflow web-source security policy before content fetch.",
|
|
224
|
-
extra: {
|
|
263
|
+
extra: {
|
|
264
|
+
reason: safeFetch.reason,
|
|
265
|
+
url: sanitizeUrlForModel(safeFetch.url),
|
|
266
|
+
},
|
|
225
267
|
reason: safeFetch.reason,
|
|
226
268
|
});
|
|
227
269
|
}
|
|
@@ -311,10 +353,16 @@ export function registerWorkflowWebSourceExtension(pi, config, providerExtension
|
|
|
311
353
|
textChars: source.textChars,
|
|
312
354
|
visibleChars: budget.used,
|
|
313
355
|
});
|
|
314
|
-
return toolResultFromJson({
|
|
356
|
+
return toolResultFromJson({
|
|
357
|
+
status: "ok",
|
|
358
|
+
tool: "workflow_web_fetch_source",
|
|
359
|
+
card,
|
|
360
|
+
});
|
|
315
361
|
}).catch(async (error) => {
|
|
316
362
|
const message = error instanceof Error ? error.message : "workflow_web_fetch_failed";
|
|
317
|
-
const code = message === "fetch_lock_timeout"
|
|
363
|
+
const code = message === "fetch_lock_timeout"
|
|
364
|
+
? "fetch_lock_timeout"
|
|
365
|
+
: "workflow_web_fetch_failed";
|
|
318
366
|
await recordWorkflowWebSourceEvent(config, "fetch_failed", {
|
|
319
367
|
url: sanitizeUrlForModel(fetchUrl),
|
|
320
368
|
code,
|
|
@@ -335,23 +383,47 @@ export function registerWorkflowWebSourceExtension(pi, config, providerExtension
|
|
|
335
383
|
name: "workflow_web_source_read",
|
|
336
384
|
description: "Read one or more narrow exact/fuzzy/term-matched snippets from a cached workflow web source by sourceRef.",
|
|
337
385
|
parameters: Type.Object({
|
|
338
|
-
sourceRef: Type.String({
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
386
|
+
sourceRef: Type.String({
|
|
387
|
+
description: "Opaque sourceRef returned by workflow_web_fetch_source.",
|
|
388
|
+
}),
|
|
389
|
+
query: Type.Optional(Type.String({
|
|
390
|
+
description: "Exact or fuzzy text to locate in the cached source.",
|
|
391
|
+
})),
|
|
392
|
+
queries: Type.Optional(Type.Array(Type.String(), {
|
|
393
|
+
description: "Multiple exact/fuzzy texts to locate in one cached source. Prefer this over repeated calls when reading several snippets from the same sourceRef.",
|
|
394
|
+
})),
|
|
395
|
+
exact: Type.Optional(Type.String({
|
|
396
|
+
description: "Exact text to locate in the cached source.",
|
|
397
|
+
})),
|
|
398
|
+
exactTexts: Type.Optional(Type.Array(Type.String(), {
|
|
399
|
+
description: "Multiple exact texts to locate in one cached source.",
|
|
400
|
+
})),
|
|
401
|
+
claim: Type.Optional(Type.String({
|
|
402
|
+
description: "Claim to locate when the exact quote is not known. Use with terms for deterministic quote harvesting.",
|
|
403
|
+
})),
|
|
404
|
+
terms: Type.Optional(Type.Array(Type.String(), {
|
|
405
|
+
description: "Important terms that should co-occur in the returned source window.",
|
|
406
|
+
})),
|
|
345
407
|
reads: Type.Optional(Type.Array(Type.Object({
|
|
346
408
|
query: Type.Optional(Type.String({ description: "Exact or fuzzy text to locate." })),
|
|
347
409
|
exact: Type.Optional(Type.String({ description: "Exact text to locate." })),
|
|
348
410
|
exactText: Type.Optional(Type.String({ description: "Exact text to locate." })),
|
|
349
411
|
text: Type.Optional(Type.String({ description: "Text to locate." })),
|
|
350
|
-
claim: Type.Optional(Type.String({
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
412
|
+
claim: Type.Optional(Type.String({
|
|
413
|
+
description: "Claim to locate when exact quote is unknown.",
|
|
414
|
+
})),
|
|
415
|
+
terms: Type.Optional(Type.Array(Type.String(), {
|
|
416
|
+
description: "Important terms for deterministic quote harvesting.",
|
|
417
|
+
})),
|
|
418
|
+
maxChars: Type.Optional(Type.Number({
|
|
419
|
+
description: "Maximum visible snippet characters for this read.",
|
|
420
|
+
})),
|
|
421
|
+
}), {
|
|
422
|
+
description: "Mixed batch reads for one sourceRef; each item can use query or claim+terms.",
|
|
423
|
+
})),
|
|
424
|
+
maxChars: Type.Optional(Type.Number({
|
|
425
|
+
description: "Maximum visible snippet characters per query.",
|
|
426
|
+
})),
|
|
355
427
|
}),
|
|
356
428
|
execute: async (_toolCallId, params) => {
|
|
357
429
|
const sourceRef = stringParam(params, "sourceRef") ?? stringParam(params, "source_ref");
|
|
@@ -361,7 +433,9 @@ export function registerWorkflowWebSourceExtension(pi, config, providerExtension
|
|
|
361
433
|
}
|
|
362
434
|
const source = await readCachedWorkflowWebSource(sourceRef);
|
|
363
435
|
if (!source) {
|
|
364
|
-
await recordWorkflowWebSourceEvent(config, "source_read_missing", {
|
|
436
|
+
await recordWorkflowWebSourceEvent(config, "source_read_missing", {
|
|
437
|
+
sourceRef,
|
|
438
|
+
});
|
|
365
439
|
return errorToolResult("source_not_found", "No cached workflow web source exists for sourceRef.", {
|
|
366
440
|
sourceRef,
|
|
367
441
|
});
|
|
@@ -391,6 +465,7 @@ export function registerWorkflowWebSourceExtension(pi, config, providerExtension
|
|
|
391
465
|
missingTerms: read.missingTerms,
|
|
392
466
|
coverageRatio: read.coverageRatio,
|
|
393
467
|
candidateOnly: read.candidateOnly,
|
|
468
|
+
truncated: read.truncated,
|
|
394
469
|
quote: status === "budget_exhausted" ? undefined : read.quote,
|
|
395
470
|
startOffset: read.startOffset,
|
|
396
471
|
endOffset: read.endOffset,
|
|
@@ -420,25 +495,33 @@ export function registerWorkflowWebSourceExtension(pi, config, providerExtension
|
|
|
420
495
|
missingTerms: result.missingTerms,
|
|
421
496
|
coverageRatio: result.coverageRatio,
|
|
422
497
|
candidateOnly: result.candidateOnly,
|
|
498
|
+
truncated: result.truncated,
|
|
423
499
|
quote: result.status === "budget_exhausted" ? undefined : result.quote,
|
|
424
500
|
startOffset: result.startOffset,
|
|
425
501
|
endOffset: result.endOffset,
|
|
426
|
-
budget: budgetSnapshot(result.status === "budget_exhausted"
|
|
502
|
+
budget: budgetSnapshot(result.status === "budget_exhausted" ||
|
|
503
|
+
result.status === "truncated"),
|
|
427
504
|
next: result.status === "budget_exhausted"
|
|
428
505
|
? "Visible web-source budget is exhausted for this task; cite the sourceRef as an evidence gap or use a smaller query in a fresh task."
|
|
429
|
-
:
|
|
506
|
+
: result.status === "truncated"
|
|
507
|
+
? "The matched web-source snippet was truncated by the visible budget or maxChars; use a smaller exact query or a fresh task if the full quote is required."
|
|
508
|
+
: undefined,
|
|
430
509
|
});
|
|
431
510
|
}
|
|
511
|
+
const hasBudgetExhaustedRead = results.some((result) => result.status === "budget_exhausted");
|
|
512
|
+
const hasTruncatedRead = results.some((result) => result.status === "truncated");
|
|
432
513
|
return toolResultFromJson({
|
|
433
514
|
status: responseStatus,
|
|
434
515
|
tool: "workflow_web_source_read",
|
|
435
516
|
sourceRef,
|
|
436
517
|
url: source.redactedUrl,
|
|
437
518
|
results,
|
|
438
|
-
budget: budgetSnapshot(
|
|
439
|
-
next:
|
|
519
|
+
budget: budgetSnapshot(hasBudgetExhaustedRead || hasTruncatedRead),
|
|
520
|
+
next: hasBudgetExhaustedRead
|
|
440
521
|
? "Visible web-source budget is exhausted for this task; cite missing quotes as evidence gaps or use smaller query batches in a fresh task."
|
|
441
|
-
:
|
|
522
|
+
: hasTruncatedRead
|
|
523
|
+
? "One or more matched web-source snippets were truncated by the visible budget or maxChars; use smaller exact queries or a fresh task if full quotes are required."
|
|
524
|
+
: undefined,
|
|
442
525
|
});
|
|
443
526
|
},
|
|
444
527
|
});
|
|
@@ -525,8 +608,8 @@ async function cachedFetchFailureResult(config, cache, key, failure) {
|
|
|
525
608
|
}
|
|
526
609
|
return errorToolResult(failure.code, failure.message, failure.extra);
|
|
527
610
|
}
|
|
528
|
-
const FETCH_LOCK_STALE_MS = 60_000;
|
|
529
|
-
const FETCH_LOCK_WAIT_MS =
|
|
611
|
+
const FETCH_LOCK_STALE_MS = 4 * 60_000;
|
|
612
|
+
const FETCH_LOCK_WAIT_MS = 5 * 60_000;
|
|
530
613
|
async function withWorkflowWebFetchLock(config, key, signal, fn) {
|
|
531
614
|
const release = await acquireWorkflowWebFetchLock(config, key, signal);
|
|
532
615
|
try {
|
|
@@ -544,10 +627,11 @@ async function acquireWorkflowWebFetchLock(config, key, signal) {
|
|
|
544
627
|
if (signal?.aborted)
|
|
545
628
|
throw new Error("aborted");
|
|
546
629
|
try {
|
|
630
|
+
const ownerId = `${process.pid}:${Date.now()}:${Math.random().toString(36).slice(2)}`;
|
|
547
631
|
await mkdir(lockDir);
|
|
548
|
-
await writeFile(resolve(lockDir, "owner.json"), `${JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString(), key }, null, 2)}\n`, "utf8");
|
|
632
|
+
await writeFile(resolve(lockDir, "owner.json"), `${JSON.stringify({ ownerId, pid: process.pid, createdAt: new Date().toISOString(), key }, null, 2)}\n`, "utf8");
|
|
549
633
|
return async () => {
|
|
550
|
-
await
|
|
634
|
+
await releaseWorkflowWebFetchLock(lockDir, ownerId);
|
|
551
635
|
};
|
|
552
636
|
}
|
|
553
637
|
catch (error) {
|
|
@@ -561,6 +645,17 @@ async function acquireWorkflowWebFetchLock(config, key, signal) {
|
|
|
561
645
|
}
|
|
562
646
|
}
|
|
563
647
|
}
|
|
648
|
+
async function releaseWorkflowWebFetchLock(lockDir, ownerId) {
|
|
649
|
+
try {
|
|
650
|
+
const current = await readFetchLockOwner(lockDir);
|
|
651
|
+
if (current?.ownerId !== ownerId)
|
|
652
|
+
return;
|
|
653
|
+
await rm(lockDir, { recursive: true, force: true });
|
|
654
|
+
}
|
|
655
|
+
catch {
|
|
656
|
+
// Missing or unreadable lock will be retried by the caller.
|
|
657
|
+
}
|
|
658
|
+
}
|
|
564
659
|
async function removeStaleFetchLock(lockDir) {
|
|
565
660
|
try {
|
|
566
661
|
const current = await stat(lockDir);
|
|
@@ -572,6 +667,17 @@ async function removeStaleFetchLock(lockDir) {
|
|
|
572
667
|
// Missing or unreadable lock will be retried by the caller.
|
|
573
668
|
}
|
|
574
669
|
}
|
|
670
|
+
async function readFetchLockOwner(lockDir) {
|
|
671
|
+
try {
|
|
672
|
+
const parsed = JSON.parse(await readFile(resolve(lockDir, "owner.json"), "utf8"));
|
|
673
|
+
return isRecord(parsed) && typeof parsed.ownerId === "string"
|
|
674
|
+
? { ownerId: parsed.ownerId }
|
|
675
|
+
: undefined;
|
|
676
|
+
}
|
|
677
|
+
catch {
|
|
678
|
+
return undefined;
|
|
679
|
+
}
|
|
680
|
+
}
|
|
575
681
|
async function readDurableFetchFailure(config, key) {
|
|
576
682
|
try {
|
|
577
683
|
const parsed = JSON.parse(await readFile(fetchFailurePath(config, key), "utf8"));
|
|
@@ -596,7 +702,9 @@ function normalizeFetchFailure(value) {
|
|
|
596
702
|
message: value.message,
|
|
597
703
|
extra,
|
|
598
704
|
...(typeof value.reason === "string" ? { reason: value.reason } : {}),
|
|
599
|
-
...(typeof value.createdAt === "string"
|
|
705
|
+
...(typeof value.createdAt === "string"
|
|
706
|
+
? { createdAt: value.createdAt }
|
|
707
|
+
: {}),
|
|
600
708
|
};
|
|
601
709
|
}
|
|
602
710
|
function fetchLockPath(config, key) {
|
|
@@ -621,7 +729,9 @@ function shouldCacheFetchFailure(reason) {
|
|
|
621
729
|
reason === "unsupported_content_type");
|
|
622
730
|
}
|
|
623
731
|
function shouldCacheFetchFailureInMemory(reason) {
|
|
624
|
-
return reason === "empty_source" ||
|
|
732
|
+
return (reason === "empty_source" ||
|
|
733
|
+
reason === "dns_resolution_failed" ||
|
|
734
|
+
reason.includes("ENOTFOUND"));
|
|
625
735
|
}
|
|
626
736
|
const WORKFLOW_WEB_FETCH_TIMEOUT_MS = 30_000;
|
|
627
737
|
const WORKFLOW_WEB_FETCH_MAX_CHARS = 1_000_000;
|
|
@@ -636,12 +746,20 @@ async function safeFetchWorkflowWebText(url, security, signal) {
|
|
|
636
746
|
return response;
|
|
637
747
|
if (response.status >= 300 && response.status < 400) {
|
|
638
748
|
if (!response.location)
|
|
639
|
-
return {
|
|
749
|
+
return {
|
|
750
|
+
ok: false,
|
|
751
|
+
reason: "redirect_without_location",
|
|
752
|
+
url: checked.normalizedUrl,
|
|
753
|
+
};
|
|
640
754
|
current = new URL(response.location, checked.normalizedUrl).href;
|
|
641
755
|
continue;
|
|
642
756
|
}
|
|
643
757
|
if (response.status < 200 || response.status >= 300) {
|
|
644
|
-
return {
|
|
758
|
+
return {
|
|
759
|
+
ok: false,
|
|
760
|
+
reason: `http_${response.status}`,
|
|
761
|
+
url: checked.normalizedUrl,
|
|
762
|
+
};
|
|
645
763
|
}
|
|
646
764
|
const extracted = extractWorkflowWebResponseText(response.text, response.contentType);
|
|
647
765
|
return {
|
|
@@ -675,13 +793,17 @@ function safeFetchOnce(url, security, signal) {
|
|
|
675
793
|
lookupPublicAddress(hostname, security)
|
|
676
794
|
.then((address) => {
|
|
677
795
|
if (isLookupAllOptions(options)) {
|
|
678
|
-
callback(null, [
|
|
796
|
+
callback(null, [
|
|
797
|
+
{ address: address.address, family: address.family },
|
|
798
|
+
]);
|
|
679
799
|
return;
|
|
680
800
|
}
|
|
681
801
|
callback(null, address.address, address.family);
|
|
682
802
|
})
|
|
683
803
|
.catch((error) => {
|
|
684
|
-
const reason = error instanceof Error
|
|
804
|
+
const reason = error instanceof Error
|
|
805
|
+
? error.message
|
|
806
|
+
: "dns_resolution_failed";
|
|
685
807
|
callback(new Error(reason), "", 4);
|
|
686
808
|
});
|
|
687
809
|
},
|
|
@@ -693,7 +815,10 @@ function safeFetchOnce(url, security, signal) {
|
|
|
693
815
|
? res.headers["content-type"][0]
|
|
694
816
|
: res.headers["content-type"];
|
|
695
817
|
const status = res.statusCode ?? 0;
|
|
696
|
-
if (status >= 200 &&
|
|
818
|
+
if (status >= 200 &&
|
|
819
|
+
status < 300 &&
|
|
820
|
+
contentType &&
|
|
821
|
+
!isWorkflowWebTextContentType(contentType)) {
|
|
697
822
|
res.resume();
|
|
698
823
|
settle({ ok: false, reason: "unsupported_content_type", url });
|
|
699
824
|
return;
|
|
@@ -772,7 +897,10 @@ async function validateResolvedHost(url, security) {
|
|
|
772
897
|
return { ok: false, reason: "invalid_url", url };
|
|
773
898
|
}
|
|
774
899
|
try {
|
|
775
|
-
const addresses = await lookup(parsed.hostname, {
|
|
900
|
+
const addresses = await lookup(parsed.hostname, {
|
|
901
|
+
all: true,
|
|
902
|
+
verbatim: true,
|
|
903
|
+
});
|
|
776
904
|
for (const address of addresses) {
|
|
777
905
|
const reason = privateIpReason(address.address);
|
|
778
906
|
if (reason)
|
|
@@ -800,7 +928,8 @@ function privateIpReason(address) {
|
|
|
800
928
|
}
|
|
801
929
|
if (isIP(lower) === 4) {
|
|
802
930
|
const parts = lower.split(".").map((part) => Number(part));
|
|
803
|
-
if (parts.length !== 4 ||
|
|
931
|
+
if (parts.length !== 4 ||
|
|
932
|
+
parts.some((part) => !Number.isInteger(part) || part < 0 || part > 255))
|
|
804
933
|
return "private_host_blocked";
|
|
805
934
|
const [a, b, c, d] = parts;
|
|
806
935
|
if (a === 0 || a === 10 || a === 127 || a >= 224)
|
|
@@ -961,7 +1090,9 @@ function fetchSourceRequestsFromParams(params) {
|
|
|
961
1090
|
const titles = Array.isArray(params.titles) ? params.titles : [];
|
|
962
1091
|
if (Array.isArray(params.sources)) {
|
|
963
1092
|
for (const source of params.sources) {
|
|
964
|
-
if (!isRecord(source) ||
|
|
1093
|
+
if (!isRecord(source) ||
|
|
1094
|
+
typeof source.url !== "string" ||
|
|
1095
|
+
!source.url.trim())
|
|
965
1096
|
continue;
|
|
966
1097
|
requests.push({
|
|
967
1098
|
url: source.url.trim(),
|
|
@@ -978,7 +1109,9 @@ function fetchSourceRequestsFromParams(params) {
|
|
|
978
1109
|
const title = titles[index];
|
|
979
1110
|
requests.push({
|
|
980
1111
|
url: url.trim(),
|
|
981
|
-
...(typeof title === "string" && title.trim()
|
|
1112
|
+
...(typeof title === "string" && title.trim()
|
|
1113
|
+
? { title: title.trim() }
|
|
1114
|
+
: {}),
|
|
982
1115
|
});
|
|
983
1116
|
}
|
|
984
1117
|
}
|
|
@@ -1086,12 +1219,18 @@ function dedupeSourceReadRequests(requests) {
|
|
|
1086
1219
|
return deduped;
|
|
1087
1220
|
}
|
|
1088
1221
|
function sourceReadBatchRequested(params) {
|
|
1089
|
-
return ((isRecord(params) &&
|
|
1222
|
+
return ((isRecord(params) &&
|
|
1223
|
+
Array.isArray(params.reads) &&
|
|
1224
|
+
params.reads.length > 0) ||
|
|
1090
1225
|
stringArrayParam(params, "queries").length > 0 ||
|
|
1091
1226
|
stringArrayParam(params, "exactTexts").length > 0 ||
|
|
1092
1227
|
stringArrayParam(params, "texts").length > 0);
|
|
1093
1228
|
}
|
|
1094
1229
|
function sourceReadResponseStatus(read) {
|
|
1230
|
+
if (read.status === "truncated" && !read.quote)
|
|
1231
|
+
return "budget_exhausted";
|
|
1232
|
+
if (read.status === "truncated")
|
|
1233
|
+
return "truncated";
|
|
1095
1234
|
if (read.status === "matched" && !read.quote)
|
|
1096
1235
|
return "budget_exhausted";
|
|
1097
1236
|
if (read.status === "matched" && read.candidateOnly)
|
|
@@ -1105,6 +1244,8 @@ function aggregateSourceReadStatus(statuses) {
|
|
|
1105
1244
|
return "ok";
|
|
1106
1245
|
if (statuses.every((status) => status === "candidate"))
|
|
1107
1246
|
return "candidate";
|
|
1247
|
+
if (statuses.every((status) => status === "truncated"))
|
|
1248
|
+
return "truncated";
|
|
1108
1249
|
if (statuses.every((status) => status === "not_found"))
|
|
1109
1250
|
return "not_found";
|
|
1110
1251
|
if (statuses.every((status) => status === "budget_exhausted"))
|
|
@@ -1140,7 +1281,8 @@ function isWorkflowWebTextContentType(contentType) {
|
|
|
1140
1281
|
return /^(text\/|application\/(json|xml|xhtml\+xml|ld\+json)|[^;]+\+json\b|[^;]+\+xml\b)/i.test(contentType.trim());
|
|
1141
1282
|
}
|
|
1142
1283
|
function extractWorkflowWebResponseText(text, contentType) {
|
|
1143
|
-
const looksHtml = /html/i.test(contentType ?? "") ||
|
|
1284
|
+
const looksHtml = /html/i.test(contentType ?? "") ||
|
|
1285
|
+
/<html[\s>]|<body[\s>]|<title[\s>]/i.test(text);
|
|
1144
1286
|
if (!looksHtml) {
|
|
1145
1287
|
return { text, title: titleFromPlainText(text) };
|
|
1146
1288
|
}
|
|
@@ -67,7 +67,7 @@ export interface WorkflowWebSourceReadRequest {
|
|
|
67
67
|
maxChars?: number;
|
|
68
68
|
}
|
|
69
69
|
export interface WorkflowWebSourceReadResult {
|
|
70
|
-
status: "matched" | "not_found";
|
|
70
|
+
status: "matched" | "truncated" | "not_found";
|
|
71
71
|
matchType?: "exact" | "normalized" | "terms";
|
|
72
72
|
quote?: string;
|
|
73
73
|
startOffset?: number;
|
|
@@ -77,6 +77,7 @@ export interface WorkflowWebSourceReadResult {
|
|
|
77
77
|
missingTerms?: string[];
|
|
78
78
|
coverageRatio?: number;
|
|
79
79
|
candidateOnly?: boolean;
|
|
80
|
+
truncated?: boolean;
|
|
80
81
|
}
|
|
81
82
|
export interface WorkflowWebSourceCard {
|
|
82
83
|
sourceRef: string;
|