@geotechcli/core 0.4.24 → 0.4.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/sandbox.d.ts.map +1 -1
- package/dist/agents/sandbox.js +13 -8
- package/dist/agents/sandbox.js.map +1 -1
- package/dist/agents/swarm.d.ts.map +1 -1
- package/dist/agents/swarm.js +21 -4
- package/dist/agents/swarm.js.map +1 -1
- package/dist/ingest/geotech-document.d.ts +4 -0
- package/dist/ingest/geotech-document.d.ts.map +1 -1
- package/dist/ingest/geotech-document.js +109 -5
- package/dist/ingest/geotech-document.js.map +1 -1
- package/dist/ingest/geotech-extract.d.ts +3 -0
- package/dist/ingest/geotech-extract.d.ts.map +1 -1
- package/dist/ingest/geotech-extract.js +29 -1
- package/dist/ingest/geotech-extract.js.map +1 -1
- package/dist/ingest/index.d.ts +2 -1
- package/dist/ingest/index.d.ts.map +1 -1
- package/dist/ingest/index.js +2 -1
- package/dist/ingest/index.js.map +1 -1
- package/dist/ingest/job-store.d.ts +18 -3
- package/dist/ingest/job-store.d.ts.map +1 -1
- package/dist/ingest/job-store.js +180 -11
- package/dist/ingest/job-store.js.map +1 -1
- package/dist/ingest/job-worker.d.ts.map +1 -1
- package/dist/ingest/job-worker.js +534 -63
- package/dist/ingest/job-worker.js.map +1 -1
- package/dist/ingest/pdf.d.ts +1 -0
- package/dist/ingest/pdf.d.ts.map +1 -1
- package/dist/ingest/pdf.js +11 -0
- package/dist/ingest/pdf.js.map +1 -1
- package/dist/ingest/segmentation.d.ts +38 -0
- package/dist/ingest/segmentation.d.ts.map +1 -0
- package/dist/ingest/segmentation.js +145 -0
- package/dist/ingest/segmentation.js.map +1 -0
- package/dist/meta/metadata.json +1 -1
- package/dist/report/html.d.ts.map +1 -1
- package/dist/report/html.js +17 -34
- package/dist/report/html.js.map +1 -1
- package/dist/report/ingest-dossier.d.ts.map +1 -1
- package/dist/report/ingest-dossier.js +19 -0
- package/dist/report/ingest-dossier.js.map +1 -1
- package/dist/skills/index.d.ts.map +1 -1
- package/dist/skills/index.js +12 -4
- package/dist/skills/index.js.map +1 -1
- package/dist/vision/geotech-document.d.ts +1 -0
- package/dist/vision/geotech-document.d.ts.map +1 -1
- package/dist/vision/geotech-document.js +82 -14
- package/dist/vision/geotech-document.js.map +1 -1
- package/dist/vision/index.d.ts.map +1 -1
- package/dist/vision/index.js +47 -10
- package/dist/vision/index.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,16 +1,20 @@
|
|
|
1
|
-
import { basename } from 'node:path';
|
|
1
|
+
import { basename, join } from 'node:path';
|
|
2
|
+
import { homedir } from 'node:os';
|
|
2
3
|
import { buildLLMConfig } from '../config/index.js';
|
|
3
|
-
import { readDocumentPdfPageInputs } from './document-inputs.js';
|
|
4
|
-
import { inspectPdfDocument } from './pdf.js';
|
|
4
|
+
import { countDocumentPdfPages, readDocumentPdfPageInputs } from './document-inputs.js';
|
|
5
|
+
import { inferPdfDocumentPageCountFallback, inspectPdfDocument, } from './pdf.js';
|
|
5
6
|
import { ingestBoreholeLogDocument, summarizeBoreholeIngestInspection, } from './geotech-extract.js';
|
|
6
7
|
import { buildPreflightLowYieldInsight, ingestGeotechDocument, inferPreflightLowYieldPageRole, } from './geotech-document.js';
|
|
7
8
|
import { extractGeotechDocumentFactsFromText, interpretGeotechDocumentPage, } from '../vision/geotech-document.js';
|
|
8
9
|
import { interpretBoreholeLogWithContext, transcribeDocumentImageText, } from '../vision/index.js';
|
|
9
10
|
import { recoverDocumentTextHint } from '../vision/ocr.js';
|
|
10
11
|
import { persistBoreholeIngestReview } from './review-store.js';
|
|
11
|
-
import { loadPersistedIngestJob, savePersistedIngestJob, } from './job-store.js';
|
|
12
|
+
import { buildPersistedIngestJobSegments, createPersistedIngestJob, loadPersistedIngestJob, resolvePersistedIngestJobExtractionConcurrency, savePersistedIngestJob, } from './job-store.js';
|
|
13
|
+
import { HOSTED_BETA_EFFECTIVE_PAGE_LIMIT, slicePdfInspectionToRange, writePdfPageSubset, } from './segmentation.js';
|
|
12
14
|
const SLOW_VISUAL_ERROR_PATTERNS = [
|
|
13
15
|
/timeout/i,
|
|
16
|
+
/\b524\b/i,
|
|
17
|
+
/upstream(?: request)? (?:timed out|timeout|failed)/i,
|
|
14
18
|
/provider is busy/i,
|
|
15
19
|
/returned no content/i,
|
|
16
20
|
/did not contain assistant text/i,
|
|
@@ -20,6 +24,15 @@ const SLOW_VISUAL_ERROR_PATTERNS = [
|
|
|
20
24
|
/\b503\b/i,
|
|
21
25
|
/\b504\b/i,
|
|
22
26
|
];
|
|
27
|
+
const RETRYABLE_UPSTREAM_ERROR_PATTERNS = [
|
|
28
|
+
/timeout/i,
|
|
29
|
+
/\b524\b/i,
|
|
30
|
+
/upstream(?: request)? (?:timed out|timeout|failed)/i,
|
|
31
|
+
/provider is busy/i,
|
|
32
|
+
/temporarily unavailable/i,
|
|
33
|
+
/\b503\b/i,
|
|
34
|
+
/\b504\b/i,
|
|
35
|
+
];
|
|
23
36
|
const FATAL_PROVIDER_STOP_PATTERNS = [
|
|
24
37
|
/daily limit reached/i,
|
|
25
38
|
/remaining today:\s*0/i,
|
|
@@ -34,6 +47,310 @@ function nowIso(now) {
|
|
|
34
47
|
function uniqueStrings(values) {
|
|
35
48
|
return [...new Set(values.filter((value) => typeof value === 'string' && value.trim().length > 0))];
|
|
36
49
|
}
|
|
50
|
+
function getJobSourceDisplayPath(job) {
|
|
51
|
+
return job.source.originalFilePath ?? job.source.filePath;
|
|
52
|
+
}
|
|
53
|
+
function getJobSourceDisplayName(job) {
|
|
54
|
+
return job.source.originalFileName ?? basename(getJobSourceDisplayPath(job));
|
|
55
|
+
}
|
|
56
|
+
function cloneSegmentationSummary(segmentation) {
|
|
57
|
+
if (!segmentation) {
|
|
58
|
+
return undefined;
|
|
59
|
+
}
|
|
60
|
+
return {
|
|
61
|
+
...segmentation,
|
|
62
|
+
pageRange: [...segmentation.pageRange],
|
|
63
|
+
segments: segmentation.segments?.map((segment) => ({ ...segment })),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
function buildJobResultSource(job, counts) {
|
|
67
|
+
return {
|
|
68
|
+
filePath: getJobSourceDisplayPath(job),
|
|
69
|
+
fileName: getJobSourceDisplayName(job),
|
|
70
|
+
inputKind: 'pdf',
|
|
71
|
+
totalPages: job.source.totalPages,
|
|
72
|
+
successfulPages: counts.successfulPages,
|
|
73
|
+
failedPages: counts.failedPages,
|
|
74
|
+
pageRange: job.source.pageRange,
|
|
75
|
+
segmentation: cloneSegmentationSummary(job.segmentation),
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
function remapPageReferenceText(message, localPageNumber, originalPageNumber) {
|
|
79
|
+
if (!message) {
|
|
80
|
+
return undefined;
|
|
81
|
+
}
|
|
82
|
+
return message
|
|
83
|
+
.replace(new RegExp(`\\bPage ${localPageNumber}\\b`, 'g'), `Page ${originalPageNumber}`)
|
|
84
|
+
.replace(new RegExp(`\\bpage ${localPageNumber}\\b`, 'g'), `page ${originalPageNumber}`);
|
|
85
|
+
}
|
|
86
|
+
function remapChildGeotechInsightToOriginalPage(insight, localPageNumber, originalPageNumber, parentTotalPages) {
|
|
87
|
+
return {
|
|
88
|
+
...insight,
|
|
89
|
+
pageNumber: originalPageNumber,
|
|
90
|
+
totalPages: parentTotalPages,
|
|
91
|
+
warnings: uniqueStrings(insight.warnings.map((warning) => remapPageReferenceText(warning, localPageNumber, originalPageNumber))),
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
function updateSegmentSummaryStatus(segmentation, segmentIndex, patch) {
|
|
95
|
+
if (!segmentation?.segments) {
|
|
96
|
+
return segmentation;
|
|
97
|
+
}
|
|
98
|
+
return {
|
|
99
|
+
...segmentation,
|
|
100
|
+
segments: segmentation.segments.map((segment) => segment.segmentIndex === segmentIndex
|
|
101
|
+
? {
|
|
102
|
+
...segment,
|
|
103
|
+
...patch,
|
|
104
|
+
}
|
|
105
|
+
: segment),
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
function getIngestJobArtifactsRoot() {
|
|
109
|
+
return process.env.GEOTECHCLI_CONFIG_DIR ?? join(homedir(), '.geotechcli');
|
|
110
|
+
}
|
|
111
|
+
function getSegmentArtifactsDir(parentJobId) {
|
|
112
|
+
return join(getIngestJobArtifactsRoot(), 'ingest-jobs', parentJobId, 'segments');
|
|
113
|
+
}
|
|
114
|
+
function getJobScopedPageRange(job) {
|
|
115
|
+
const [startPage, endPage] = job.source.pageRange ?? job.segmentation?.pageRange ?? [1, job.source.totalPages];
|
|
116
|
+
return {
|
|
117
|
+
startPage,
|
|
118
|
+
endPage,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
function getJobOriginalTotalPages(job) {
|
|
122
|
+
return Math.max(job.source.totalPages, job.source.pageRange?.[1] ?? 0, job.segmentation?.pageRange?.[1] ?? 0);
|
|
123
|
+
}
|
|
124
|
+
function filterPageInputsToSelectedPages(pageInputs, selectedPageNumbers) {
|
|
125
|
+
const selected = new Set(selectedPageNumbers);
|
|
126
|
+
return pageInputs.filter((pageInput) => selected.has(pageInput.pageNumber));
|
|
127
|
+
}
|
|
128
|
+
function countCheckpointStatuses(pages, range) {
|
|
129
|
+
let completedPages = 0;
|
|
130
|
+
let failedPages = 0;
|
|
131
|
+
for (const page of pages) {
|
|
132
|
+
if (page.pageNumber < range.startPage || page.pageNumber > range.endPage) {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
if (page.status === 'completed') {
|
|
136
|
+
completedPages += 1;
|
|
137
|
+
}
|
|
138
|
+
else if (page.status === 'failed') {
|
|
139
|
+
failedPages += 1;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return { completedPages, failedPages };
|
|
143
|
+
}
|
|
144
|
+
function isSegmentResolved(job, range) {
|
|
145
|
+
return job.checkpoints.pages
|
|
146
|
+
.filter((page) => page.pageNumber >= range.startPage && page.pageNumber <= range.endPage)
|
|
147
|
+
.every((page) => page.status !== 'pending');
|
|
148
|
+
}
|
|
149
|
+
function remapSegmentCheckpointError(message, localPageNumber, originalPageNumber) {
|
|
150
|
+
const normalized = normalizeCheckpointErrorMessage(message ?? '');
|
|
151
|
+
return remapPageReferenceText(normalized, localPageNumber, originalPageNumber);
|
|
152
|
+
}
|
|
153
|
+
function mergeSegmentChildJobIntoParentJob(parentJob, childJob, range, now) {
|
|
154
|
+
const timestamp = nowIso(now);
|
|
155
|
+
const originalTotalPages = getJobOriginalTotalPages(parentJob);
|
|
156
|
+
return {
|
|
157
|
+
...parentJob,
|
|
158
|
+
updatedAt: timestamp,
|
|
159
|
+
execution: {
|
|
160
|
+
...parentJob.execution,
|
|
161
|
+
lastHeartbeatAt: timestamp,
|
|
162
|
+
},
|
|
163
|
+
checkpoints: {
|
|
164
|
+
pages: parentJob.checkpoints.pages.map((pageCheckpoint) => {
|
|
165
|
+
if (pageCheckpoint.pageNumber < range.startPage || pageCheckpoint.pageNumber > range.endPage) {
|
|
166
|
+
return pageCheckpoint;
|
|
167
|
+
}
|
|
168
|
+
const localPageNumber = pageCheckpoint.pageNumber - range.startPage + 1;
|
|
169
|
+
const childCheckpoint = childJob.checkpoints.pages.find((page) => page.pageNumber === localPageNumber);
|
|
170
|
+
if (!childCheckpoint || childCheckpoint.status === 'pending') {
|
|
171
|
+
return {
|
|
172
|
+
...pageCheckpoint,
|
|
173
|
+
status: 'failed',
|
|
174
|
+
updatedAt: timestamp,
|
|
175
|
+
error: `Page ${pageCheckpoint.pageNumber} did not complete within segment ${range.startPage}-${range.endPage}.`,
|
|
176
|
+
downgraded: false,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
const remappedResult = parentJob.documentType === 'geotech-document'
|
|
180
|
+
&& childCheckpoint.status === 'completed'
|
|
181
|
+
&& childCheckpoint.result
|
|
182
|
+
? remapChildGeotechInsightToOriginalPage(childCheckpoint.result, localPageNumber, pageCheckpoint.pageNumber, originalTotalPages)
|
|
183
|
+
: childCheckpoint.result;
|
|
184
|
+
return {
|
|
185
|
+
...pageCheckpoint,
|
|
186
|
+
status: childCheckpoint.status,
|
|
187
|
+
attempts: Math.max(pageCheckpoint.attempts, childCheckpoint.attempts),
|
|
188
|
+
updatedAt: timestamp,
|
|
189
|
+
completedAt: childCheckpoint.completedAt,
|
|
190
|
+
error: remapSegmentCheckpointError(childCheckpoint.error, localPageNumber, pageCheckpoint.pageNumber),
|
|
191
|
+
downgraded: childCheckpoint.downgraded,
|
|
192
|
+
ocrTextHint: childCheckpoint.ocrTextHint,
|
|
193
|
+
ocrSource: childCheckpoint.ocrSource,
|
|
194
|
+
ocrWarnings: childCheckpoint.ocrWarnings,
|
|
195
|
+
result: remappedResult,
|
|
196
|
+
};
|
|
197
|
+
}),
|
|
198
|
+
},
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
async function runSegmentedParentGeotechJob(jobId, currentJob, mutateJob, config, dependencies) {
|
|
202
|
+
const effectivePageLimit = currentJob.segmentation?.effectivePageLimit
|
|
203
|
+
?? HOSTED_BETA_EFFECTIVE_PAGE_LIMIT;
|
|
204
|
+
const scopedRange = getJobScopedPageRange(currentJob);
|
|
205
|
+
const baseInspection = currentJob.inspection;
|
|
206
|
+
if (!baseInspection) {
|
|
207
|
+
throw new Error('Segmented parent ingest requires PDF inspection metadata.');
|
|
208
|
+
}
|
|
209
|
+
const baseSegments = currentJob.segmentation?.segments?.length
|
|
210
|
+
? currentJob.segmentation.segments
|
|
211
|
+
: buildPersistedIngestJobSegments(baseInspection, {
|
|
212
|
+
pageRange: scopedRange,
|
|
213
|
+
effectivePageLimit,
|
|
214
|
+
}).map((segment, index, allSegments) => ({
|
|
215
|
+
...segment,
|
|
216
|
+
segmentIndex: index + 1,
|
|
217
|
+
segmentCount: allSegments.length,
|
|
218
|
+
status: 'queued',
|
|
219
|
+
}));
|
|
220
|
+
if (!currentJob.segmentation?.segments?.length) {
|
|
221
|
+
await mutateJob((job) => ({
|
|
222
|
+
...job,
|
|
223
|
+
updatedAt: nowIso(dependencies.now),
|
|
224
|
+
segmentation: {
|
|
225
|
+
mode: 'segmented-parent',
|
|
226
|
+
pageRange: [scopedRange.startPage, scopedRange.endPage],
|
|
227
|
+
effectivePageLimit,
|
|
228
|
+
segmentCount: baseSegments.length,
|
|
229
|
+
segments: baseSegments,
|
|
230
|
+
},
|
|
231
|
+
}));
|
|
232
|
+
currentJob = loadPersistedIngestJob(jobId) ?? currentJob;
|
|
233
|
+
}
|
|
234
|
+
const segmentArtifactsDir = getSegmentArtifactsDir(jobId);
|
|
235
|
+
for (const segment of currentJob.segmentation?.segments ?? baseSegments) {
|
|
236
|
+
if (isCancelled(jobId)) {
|
|
237
|
+
break;
|
|
238
|
+
}
|
|
239
|
+
currentJob = loadPersistedIngestJob(jobId) ?? currentJob;
|
|
240
|
+
const latestSegment = currentJob.segmentation?.segments?.find((entry) => entry.segmentIndex === segment.segmentIndex) ?? segment;
|
|
241
|
+
const range = {
|
|
242
|
+
startPage: latestSegment.startPage,
|
|
243
|
+
endPage: latestSegment.endPage,
|
|
244
|
+
};
|
|
245
|
+
if (isSegmentResolved(currentJob, range) && latestSegment.status === 'completed') {
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
const packetPath = join(segmentArtifactsDir, `segment-${String(latestSegment.segmentIndex).padStart(2, '0')}-pages-${range.startPage}-${range.endPage}.pdf`);
|
|
249
|
+
await writePdfPageSubset(getJobSourceDisplayPath(currentJob), range, packetPath);
|
|
250
|
+
const childInspection = slicePdfInspectionToRange(baseInspection, range, { rebasePageNumbers: true });
|
|
251
|
+
if (!childInspection) {
|
|
252
|
+
throw new Error(`Unable to create a scoped inspection for segment ${latestSegment.segmentIndex}.`);
|
|
253
|
+
}
|
|
254
|
+
let childJob = latestSegment.childJobId ? loadPersistedIngestJob(latestSegment.childJobId) : null;
|
|
255
|
+
if (!childJob || childJob.status === 'failed' || childJob.status === 'canceled') {
|
|
256
|
+
childJob = createPersistedIngestJob({
|
|
257
|
+
documentType: currentJob.documentType,
|
|
258
|
+
filePath: packetPath,
|
|
259
|
+
inspection: childInspection,
|
|
260
|
+
config: currentJob.config,
|
|
261
|
+
overrideBoreholeId: currentJob.request.overrideBoreholeId,
|
|
262
|
+
originalFilePath: getJobSourceDisplayPath(currentJob),
|
|
263
|
+
originalFileName: getJobSourceDisplayName(currentJob),
|
|
264
|
+
segmentation: {
|
|
265
|
+
mode: 'segment-child',
|
|
266
|
+
pageRange: [range.startPage, range.endPage],
|
|
267
|
+
effectivePageLimit,
|
|
268
|
+
segmentCount: currentJob.segmentation?.segmentCount ?? baseSegments.length,
|
|
269
|
+
segmentIndex: latestSegment.segmentIndex,
|
|
270
|
+
parentJobId: currentJob.jobId,
|
|
271
|
+
},
|
|
272
|
+
now: dependencies.now,
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
const startedAt = Date.now();
|
|
276
|
+
await mutateJob((job) => ({
|
|
277
|
+
...job,
|
|
278
|
+
updatedAt: nowIso(dependencies.now),
|
|
279
|
+
execution: {
|
|
280
|
+
...job.execution,
|
|
281
|
+
lastHeartbeatAt: nowIso(dependencies.now),
|
|
282
|
+
},
|
|
283
|
+
segmentation: updateSegmentSummaryStatus(job.segmentation, latestSegment.segmentIndex, {
|
|
284
|
+
childJobId: childJob.jobId,
|
|
285
|
+
status: 'running',
|
|
286
|
+
}),
|
|
287
|
+
}));
|
|
288
|
+
const completedChild = await runPersistedIngestJobWorker(childJob.jobId, dependencies);
|
|
289
|
+
const durationMs = Date.now() - startedAt;
|
|
290
|
+
await mutateJob((job) => {
|
|
291
|
+
const mergedJob = mergeSegmentChildJobIntoParentJob(job, completedChild, range, dependencies.now);
|
|
292
|
+
const counts = countCheckpointStatuses(mergedJob.checkpoints.pages, range);
|
|
293
|
+
const childStatus = completedChild.status === 'completed'
|
|
294
|
+
? 'completed'
|
|
295
|
+
: completedChild.status === 'canceled'
|
|
296
|
+
? 'canceled'
|
|
297
|
+
: 'failed';
|
|
298
|
+
return {
|
|
299
|
+
...mergedJob,
|
|
300
|
+
segmentation: updateSegmentSummaryStatus(mergedJob.segmentation, latestSegment.segmentIndex, {
|
|
301
|
+
childJobId: completedChild.jobId,
|
|
302
|
+
status: childStatus,
|
|
303
|
+
completedPages: counts.completedPages,
|
|
304
|
+
failedPages: counts.failedPages,
|
|
305
|
+
durationMs,
|
|
306
|
+
}),
|
|
307
|
+
};
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
currentJob = loadPersistedIngestJob(jobId) ?? currentJob;
|
|
311
|
+
if (currentJob.execution.cancelRequested) {
|
|
312
|
+
await mutateJob((job) => ({
|
|
313
|
+
...job,
|
|
314
|
+
status: 'canceled',
|
|
315
|
+
updatedAt: nowIso(dependencies.now),
|
|
316
|
+
canceledAt: nowIso(dependencies.now),
|
|
317
|
+
execution: {
|
|
318
|
+
...job.execution,
|
|
319
|
+
pid: undefined,
|
|
320
|
+
},
|
|
321
|
+
}));
|
|
322
|
+
return loadPersistedIngestJob(jobId) ?? currentJob;
|
|
323
|
+
}
|
|
324
|
+
const replayPageInputs = filterPageInputsToSelectedPages(await preparePdfPageInputs(currentJob.source.filePath, null, currentJob.processing.pagePreprocessingConcurrency, dependencies), currentJob.checkpoints.pages.map((page) => page.pageNumber));
|
|
325
|
+
const finalResult = await finalizeJobResult(currentJob, replayPageInputs, config, dependencies);
|
|
326
|
+
const persistedReview = currentJob.request.projectId
|
|
327
|
+
? (dependencies.persistReview ?? persistBoreholeIngestReview)(currentJob.request.projectId, finalResult, {
|
|
328
|
+
title: currentJob.request.reviewTitle,
|
|
329
|
+
})
|
|
330
|
+
: null;
|
|
331
|
+
await mutateJob((job) => ({
|
|
332
|
+
...job,
|
|
333
|
+
status: 'completed',
|
|
334
|
+
updatedAt: nowIso(dependencies.now),
|
|
335
|
+
completedAt: nowIso(dependencies.now),
|
|
336
|
+
execution: {
|
|
337
|
+
...job.execution,
|
|
338
|
+
pid: undefined,
|
|
339
|
+
lastHeartbeatAt: nowIso(dependencies.now),
|
|
340
|
+
},
|
|
341
|
+
result: {
|
|
342
|
+
ingestResult: finalResult,
|
|
343
|
+
persistedReview: persistedReview
|
|
344
|
+
? {
|
|
345
|
+
datasetName: persistedReview.datasetName,
|
|
346
|
+
reviewId: persistedReview.reviewId,
|
|
347
|
+
createdAt: persistedReview.createdAt,
|
|
348
|
+
}
|
|
349
|
+
: undefined,
|
|
350
|
+
},
|
|
351
|
+
}));
|
|
352
|
+
return loadPersistedIngestJob(jobId) ?? currentJob;
|
|
353
|
+
}
|
|
37
354
|
function isBoreholeResult(result) {
|
|
38
355
|
return result.documentType === 'borehole-log';
|
|
39
356
|
}
|
|
@@ -155,6 +472,9 @@ function isSlowVisualPageError(message, classification, sourceKind) {
|
|
|
155
472
|
const looksVisual = sourceKind === 'raster-image' || classification === 'image-only' || classification === 'text-unreadable';
|
|
156
473
|
return looksVisual && SLOW_VISUAL_ERROR_PATTERNS.some((pattern) => pattern.test(message));
|
|
157
474
|
}
|
|
475
|
+
function isRetryableUpstreamPageError(message) {
|
|
476
|
+
return RETRYABLE_UPSTREAM_ERROR_PATTERNS.some((pattern) => pattern.test(message));
|
|
477
|
+
}
|
|
158
478
|
function isFatalProviderStopError(message) {
|
|
159
479
|
return FATAL_PROVIDER_STOP_PATTERNS.some((pattern) => pattern.test(message));
|
|
160
480
|
}
|
|
@@ -169,6 +489,13 @@ function normalizeCheckpointErrorMessage(message) {
|
|
|
169
489
|
}
|
|
170
490
|
return normalized;
|
|
171
491
|
}
|
|
492
|
+
async function waitForCheckpointRetryBackoff(attempt) {
|
|
493
|
+
const delayMs = Math.min(1000, 100 * Math.max(1, 2 ** Math.max(0, attempt - 1)));
|
|
494
|
+
await new Promise((resolvePromise) => {
|
|
495
|
+
const timer = setTimeout(resolvePromise, delayMs);
|
|
496
|
+
timer.unref?.();
|
|
497
|
+
});
|
|
498
|
+
}
|
|
172
499
|
async function withWorkerPageTimeout(promise, timeoutMs, errorMessage) {
|
|
173
500
|
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
|
|
174
501
|
return promise;
|
|
@@ -249,6 +576,59 @@ function buildInspectionSummary(inspection) {
|
|
|
249
576
|
ocrRecoveredPageCount: 0,
|
|
250
577
|
};
|
|
251
578
|
}
|
|
579
|
+
function countCheckpointOcrRecoveredPages(checkpoints) {
|
|
580
|
+
return checkpoints.filter((page) => page.ocrSource === 'local-ocr' || page.ocrSource === 'vision-ocr').length;
|
|
581
|
+
}
|
|
582
|
+
function applyCheckpointOcrRecoveredSummary(result, checkpoints) {
|
|
583
|
+
const recoveredPageCount = countCheckpointOcrRecoveredPages(checkpoints);
|
|
584
|
+
if (recoveredPageCount === 0 || !result.inspectionSummary) {
|
|
585
|
+
return result;
|
|
586
|
+
}
|
|
587
|
+
return {
|
|
588
|
+
...result,
|
|
589
|
+
inspectionSummary: {
|
|
590
|
+
...result.inspectionSummary,
|
|
591
|
+
ocrRecoveredPageCount: Math.max(result.inspectionSummary.ocrRecoveredPageCount, recoveredPageCount),
|
|
592
|
+
},
|
|
593
|
+
};
|
|
594
|
+
}
|
|
595
|
+
function buildFallbackWorkerCheckpoints(job, pageCount, timestamp) {
|
|
596
|
+
if (job.checkpoints.pages.length > 0) {
|
|
597
|
+
return job.checkpoints.pages;
|
|
598
|
+
}
|
|
599
|
+
const startPage = job.source.pageRange?.[0] ?? job.segmentation?.pageRange?.[0] ?? 1;
|
|
600
|
+
return Array.from({ length: pageCount }, (_, index) => ({
|
|
601
|
+
pageNumber: startPage + index,
|
|
602
|
+
classification: null,
|
|
603
|
+
sourceKind: 'pdf-page',
|
|
604
|
+
weight: 1,
|
|
605
|
+
status: 'pending',
|
|
606
|
+
attempts: 0,
|
|
607
|
+
updatedAt: timestamp,
|
|
608
|
+
}));
|
|
609
|
+
}
|
|
610
|
+
async function inferWorkerPdfPageCount(job) {
|
|
611
|
+
if (job.source.totalPages > 0) {
|
|
612
|
+
return job.source.totalPages;
|
|
613
|
+
}
|
|
614
|
+
try {
|
|
615
|
+
const fullPageCount = await countDocumentPdfPages(job.source.filePath);
|
|
616
|
+
return job.source.pageRange
|
|
617
|
+
? Math.max(0, Math.min(fullPageCount, job.source.pageRange[1]) - job.source.pageRange[0] + 1)
|
|
618
|
+
: fullPageCount;
|
|
619
|
+
}
|
|
620
|
+
catch {
|
|
621
|
+
try {
|
|
622
|
+
const fullPageCount = inferPdfDocumentPageCountFallback(job.source.filePath);
|
|
623
|
+
return job.source.pageRange
|
|
624
|
+
? Math.max(0, Math.min(fullPageCount, job.source.pageRange[1]) - job.source.pageRange[0] + 1)
|
|
625
|
+
: fullPageCount;
|
|
626
|
+
}
|
|
627
|
+
catch {
|
|
628
|
+
return 0;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
}
|
|
252
632
|
function summarizeReviewReasons(findings) {
|
|
253
633
|
return uniqueStrings(findings
|
|
254
634
|
.filter((finding) => finding.severity !== 'advisory')
|
|
@@ -289,19 +669,15 @@ function buildSyntheticBoreholeResult(job, inspection, now) {
|
|
|
289
669
|
message: `${normalFailedPages.length} page(s) failed during ingest and should be reviewed.`,
|
|
290
670
|
});
|
|
291
671
|
}
|
|
292
|
-
return {
|
|
672
|
+
return applyCheckpointOcrRecoveredSummary({
|
|
293
673
|
kind: 'geotech-ingest-result',
|
|
294
674
|
schemaVersion: 1,
|
|
295
675
|
documentType: 'borehole-log',
|
|
296
676
|
generatedAt: nowIso(now),
|
|
297
|
-
source: {
|
|
298
|
-
filePath: job.source.filePath,
|
|
299
|
-
fileName: basename(job.source.filePath),
|
|
300
|
-
inputKind: 'pdf',
|
|
301
|
-
totalPages: job.source.totalPages,
|
|
677
|
+
source: buildJobResultSource(job, {
|
|
302
678
|
successfulPages: 0,
|
|
303
679
|
failedPages: pageFailures.length,
|
|
304
|
-
},
|
|
680
|
+
}),
|
|
305
681
|
inspection,
|
|
306
682
|
inspectionSummary: summarizeBoreholeIngestInspection(inspection),
|
|
307
683
|
boreholes: [],
|
|
@@ -323,7 +699,7 @@ function buildSyntheticBoreholeResult(job, inspection, now) {
|
|
|
323
699
|
reviewRequired: reviewFindings.some((finding) => finding.severity !== 'advisory'),
|
|
324
700
|
confidence: 0,
|
|
325
701
|
canAutoProceed: false,
|
|
326
|
-
};
|
|
702
|
+
}, job.checkpoints.pages);
|
|
327
703
|
}
|
|
328
704
|
function buildSyntheticGeotechDocumentResult(job, inspection, now) {
|
|
329
705
|
const pageFailures = job.checkpoints.pages
|
|
@@ -360,19 +736,15 @@ function buildSyntheticGeotechDocumentResult(job, inspection, now) {
|
|
|
360
736
|
message: `${normalFailedPages.length} page(s) failed during ingest and should be reviewed.`,
|
|
361
737
|
});
|
|
362
738
|
}
|
|
363
|
-
return {
|
|
739
|
+
return applyCheckpointOcrRecoveredSummary({
|
|
364
740
|
kind: 'geotech-ingest-result',
|
|
365
741
|
schemaVersion: 1,
|
|
366
742
|
documentType: 'geotech-document',
|
|
367
743
|
generatedAt: nowIso(now),
|
|
368
|
-
source: {
|
|
369
|
-
filePath: job.source.filePath,
|
|
370
|
-
fileName: basename(job.source.filePath),
|
|
371
|
-
inputKind: 'pdf',
|
|
372
|
-
totalPages: job.source.totalPages,
|
|
744
|
+
source: buildJobResultSource(job, {
|
|
373
745
|
successfulPages: 0,
|
|
374
746
|
failedPages: pageFailures.length,
|
|
375
|
-
},
|
|
747
|
+
}),
|
|
376
748
|
inspection,
|
|
377
749
|
inspectionSummary: buildInspectionSummary(inspection),
|
|
378
750
|
documentClass: null,
|
|
@@ -402,7 +774,7 @@ function buildSyntheticGeotechDocumentResult(job, inspection, now) {
|
|
|
402
774
|
confidence: 0,
|
|
403
775
|
reviewRequired: reviewFindings.some((finding) => finding.severity !== 'advisory'),
|
|
404
776
|
canAutoProceed: false,
|
|
405
|
-
};
|
|
777
|
+
}, job.checkpoints.pages);
|
|
406
778
|
}
|
|
407
779
|
function dedupeReviewFindings(reviewFindings) {
|
|
408
780
|
return [
|
|
@@ -531,6 +903,7 @@ async function processGeotechDocumentPage(job, pageInput, config, dependencies)
|
|
|
531
903
|
const lowYieldRole = inferPreflightLowYieldPageRole({
|
|
532
904
|
inspectionPage,
|
|
533
905
|
previousInspectionPage: job.inspection?.pages[pageInput.pageNumber - 2],
|
|
906
|
+
nextInspectionPage: job.inspection?.pages[pageInput.pageNumber],
|
|
534
907
|
pageNumber: pageInput.pageNumber,
|
|
535
908
|
totalPages: pageInput.totalPages,
|
|
536
909
|
sourceKind: pageInput.sourceKind,
|
|
@@ -662,11 +1035,7 @@ async function finalizeJobResult(job, pageInputs, config, dependencies) {
|
|
|
662
1035
|
try {
|
|
663
1036
|
const result = await ingestBoreholeLogDocument({
|
|
664
1037
|
config,
|
|
665
|
-
source: {
|
|
666
|
-
filePath: job.source.filePath,
|
|
667
|
-
fileName: basename(job.source.filePath),
|
|
668
|
-
inputKind: 'pdf',
|
|
669
|
-
},
|
|
1038
|
+
source: buildJobResultSource(job, { successfulPages: 0, failedPages: 0 }),
|
|
670
1039
|
overrideBoreholeId: job.request.overrideBoreholeId,
|
|
671
1040
|
inspection: job.inspection,
|
|
672
1041
|
pages: job.checkpoints.pages
|
|
@@ -695,7 +1064,7 @@ async function finalizeJobResult(job, pageInputs, config, dependencies) {
|
|
|
695
1064
|
},
|
|
696
1065
|
now: dependencies.now,
|
|
697
1066
|
});
|
|
698
|
-
return applyBoreholeFailureDowngrades(result, job.checkpoints.pages);
|
|
1067
|
+
return applyCheckpointOcrRecoveredSummary(applyBoreholeFailureDowngrades(result, job.checkpoints.pages), job.checkpoints.pages);
|
|
699
1068
|
}
|
|
700
1069
|
catch (error) {
|
|
701
1070
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -709,11 +1078,7 @@ async function finalizeJobResult(job, pageInputs, config, dependencies) {
|
|
|
709
1078
|
try {
|
|
710
1079
|
const result = await ingestGeotechDocument({
|
|
711
1080
|
config,
|
|
712
|
-
source: {
|
|
713
|
-
filePath: job.source.filePath,
|
|
714
|
-
fileName: basename(job.source.filePath),
|
|
715
|
-
inputKind: 'pdf',
|
|
716
|
-
},
|
|
1081
|
+
source: buildJobResultSource(job, { successfulPages: 0, failedPages: 0 }),
|
|
717
1082
|
inspection: job.inspection,
|
|
718
1083
|
pages: job.checkpoints.pages
|
|
719
1084
|
.map((checkpoint) => geotechPageInputMap.get(checkpoint.pageNumber))
|
|
@@ -752,7 +1117,7 @@ async function finalizeJobResult(job, pageInputs, config, dependencies) {
|
|
|
752
1117
|
},
|
|
753
1118
|
now: dependencies.now,
|
|
754
1119
|
});
|
|
755
|
-
return applyGeotechFailureDowngrades(result, job.checkpoints.pages);
|
|
1120
|
+
return applyCheckpointOcrRecoveredSummary(applyGeotechFailureDowngrades(result, job.checkpoints.pages), job.checkpoints.pages);
|
|
756
1121
|
}
|
|
757
1122
|
catch (error) {
|
|
758
1123
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -805,28 +1170,62 @@ export async function runPersistedIngestJobWorker(jobId, dependencies = {}) {
|
|
|
805
1170
|
? currentJob.inspection
|
|
806
1171
|
: inspect(currentJob.source.filePath);
|
|
807
1172
|
if (!currentJob.inspection || currentJob.inspection.totalPages === 0) {
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
1173
|
+
if (inspection.totalPages > 0) {
|
|
1174
|
+
await mutateJob((job) => ({
|
|
1175
|
+
...job,
|
|
1176
|
+
inspection,
|
|
1177
|
+
source: {
|
|
1178
|
+
...job.source,
|
|
1179
|
+
totalPages: inspection.totalPages,
|
|
1180
|
+
weightedPageCost: inspection.pages.reduce((sum, page) => sum + (page.classification === 'image-only' || page.classification === 'text-unreadable' ? 2 : 1), 0),
|
|
1181
|
+
},
|
|
1182
|
+
processing: {
|
|
1183
|
+
...job.processing,
|
|
1184
|
+
chunkExtractionConcurrency: resolvePersistedIngestJobExtractionConcurrency(job.config, inspection),
|
|
1185
|
+
},
|
|
1186
|
+
checkpoints: {
|
|
1187
|
+
pages: inspection.pages.map((page) => job.checkpoints.pages.find((existing) => existing.pageNumber === page.pageNumber) ?? ({
|
|
1188
|
+
pageNumber: page.pageNumber,
|
|
1189
|
+
classification: page.classification,
|
|
1190
|
+
sourceKind: mapPageSourceKind(page.classification),
|
|
1191
|
+
weight: page.classification === 'image-only' || page.classification === 'text-unreadable' ? 2 : 1,
|
|
1192
|
+
status: 'pending',
|
|
1193
|
+
attempts: 0,
|
|
1194
|
+
updatedAt: nowIso(dependencies.now),
|
|
1195
|
+
})),
|
|
1196
|
+
},
|
|
1197
|
+
}));
|
|
1198
|
+
}
|
|
1199
|
+
else {
|
|
1200
|
+
const inferredPageCount = await inferWorkerPdfPageCount(currentJob);
|
|
1201
|
+
if (inferredPageCount > 0) {
|
|
1202
|
+
await mutateJob((job) => {
|
|
1203
|
+
const timestamp = nowIso(dependencies.now);
|
|
1204
|
+
return {
|
|
1205
|
+
...job,
|
|
1206
|
+
inspection: null,
|
|
1207
|
+
source: {
|
|
1208
|
+
...job.source,
|
|
1209
|
+
totalPages: Math.max(job.source.totalPages, inferredPageCount),
|
|
1210
|
+
weightedPageCost: Math.max(job.source.weightedPageCost, inferredPageCount),
|
|
1211
|
+
},
|
|
1212
|
+
processing: {
|
|
1213
|
+
...job.processing,
|
|
1214
|
+
chunkExtractionConcurrency: resolvePersistedIngestJobExtractionConcurrency(job.config, null),
|
|
1215
|
+
},
|
|
1216
|
+
checkpoints: {
|
|
1217
|
+
pages: buildFallbackWorkerCheckpoints(job, inferredPageCount, timestamp),
|
|
1218
|
+
},
|
|
1219
|
+
};
|
|
1220
|
+
});
|
|
1221
|
+
}
|
|
1222
|
+
}
|
|
828
1223
|
}
|
|
1224
|
+
currentJob = loadPersistedIngestJob(jobId) ?? currentJob;
|
|
829
1225
|
const config = buildJobConfig(currentJob, dependencies);
|
|
1226
|
+
if (currentJob.documentType === 'geotech-document' && currentJob.segmentation?.mode === 'segmented-parent') {
|
|
1227
|
+
return await runSegmentedParentGeotechJob(jobId, currentJob, mutateJob, config, dependencies);
|
|
1228
|
+
}
|
|
830
1229
|
const pageInputs = await preparePdfPageInputs(currentJob.source.filePath, currentJob.inspection, currentJob.processing.pagePreprocessingConcurrency, dependencies);
|
|
831
1230
|
let processedNewPages = 0;
|
|
832
1231
|
if (currentJob.documentType === 'geotech-document') {
|
|
@@ -878,8 +1277,45 @@ export async function runPersistedIngestJobWorker(jobId, dependencies = {}) {
|
|
|
878
1277
|
: checkpoint),
|
|
879
1278
|
},
|
|
880
1279
|
}));
|
|
881
|
-
|
|
882
|
-
|
|
1280
|
+
let processed = null;
|
|
1281
|
+
let finalErrorMessage = '';
|
|
1282
|
+
for (let attemptIndex = 0; attemptIndex < 2; attemptIndex += 1) {
|
|
1283
|
+
try {
|
|
1284
|
+
processed = await processGeotechDocumentPage(currentJob, page, config, dependencies);
|
|
1285
|
+
break;
|
|
1286
|
+
}
|
|
1287
|
+
catch (error) {
|
|
1288
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1289
|
+
const normalizedMessage = normalizeCheckpointErrorMessage(message);
|
|
1290
|
+
if (attemptIndex === 0
|
|
1291
|
+
&& isRetryableUpstreamPageError(normalizedMessage)
|
|
1292
|
+
&& !isFatalProviderStopError(normalizedMessage)) {
|
|
1293
|
+
await waitForCheckpointRetryBackoff(findCheckpoint(currentJob, page.pageNumber).attempts);
|
|
1294
|
+
await mutateJob((job) => ({
|
|
1295
|
+
...job,
|
|
1296
|
+
updatedAt: nowIso(dependencies.now),
|
|
1297
|
+
execution: {
|
|
1298
|
+
...job.execution,
|
|
1299
|
+
lastHeartbeatAt: nowIso(dependencies.now),
|
|
1300
|
+
},
|
|
1301
|
+
checkpoints: {
|
|
1302
|
+
pages: job.checkpoints.pages.map((checkpoint) => checkpoint.pageNumber === page.pageNumber
|
|
1303
|
+
? {
|
|
1304
|
+
...checkpoint,
|
|
1305
|
+
attempts: checkpoint.attempts + 1,
|
|
1306
|
+
updatedAt: nowIso(dependencies.now),
|
|
1307
|
+
error: `retrying after upstream timeout: ${normalizedMessage}`,
|
|
1308
|
+
}
|
|
1309
|
+
: checkpoint),
|
|
1310
|
+
},
|
|
1311
|
+
}));
|
|
1312
|
+
continue;
|
|
1313
|
+
}
|
|
1314
|
+
finalErrorMessage = normalizedMessage;
|
|
1315
|
+
break;
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
if (processed) {
|
|
883
1319
|
processedNewPages += 1;
|
|
884
1320
|
await mutateJob((job) => ({
|
|
885
1321
|
...job,
|
|
@@ -906,9 +1342,8 @@ export async function runPersistedIngestJobWorker(jobId, dependencies = {}) {
|
|
|
906
1342
|
},
|
|
907
1343
|
}));
|
|
908
1344
|
}
|
|
909
|
-
|
|
910
|
-
const
|
|
911
|
-
const normalizedMessage = normalizeCheckpointErrorMessage(message);
|
|
1345
|
+
else {
|
|
1346
|
+
const normalizedMessage = finalErrorMessage || `Page ${page.pageNumber} failed during async ingest.`;
|
|
912
1347
|
const checkpoint = findCheckpoint(currentJob, page.pageNumber);
|
|
913
1348
|
if (!fatalProviderStopMessage && isFatalProviderStopError(normalizedMessage)) {
|
|
914
1349
|
fatalProviderStopMessage = normalizedMessage;
|
|
@@ -993,8 +1428,45 @@ export async function runPersistedIngestJobWorker(jobId, dependencies = {}) {
|
|
|
993
1428
|
: pageCheckpoint),
|
|
994
1429
|
},
|
|
995
1430
|
}));
|
|
996
|
-
|
|
997
|
-
|
|
1431
|
+
let processed = null;
|
|
1432
|
+
let finalErrorMessage = '';
|
|
1433
|
+
for (let attemptIndex = 0; attemptIndex < 2; attemptIndex += 1) {
|
|
1434
|
+
try {
|
|
1435
|
+
processed = await processBoreholePage(currentJob, page, config, state, dependencies);
|
|
1436
|
+
break;
|
|
1437
|
+
}
|
|
1438
|
+
catch (error) {
|
|
1439
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1440
|
+
const normalizedMessage = normalizeCheckpointErrorMessage(message);
|
|
1441
|
+
if (attemptIndex === 0
|
|
1442
|
+
&& isRetryableUpstreamPageError(normalizedMessage)
|
|
1443
|
+
&& !isFatalProviderStopError(normalizedMessage)) {
|
|
1444
|
+
await waitForCheckpointRetryBackoff(findCheckpoint(currentJob, page.pageNumber).attempts);
|
|
1445
|
+
await mutateJob((job) => ({
|
|
1446
|
+
...job,
|
|
1447
|
+
updatedAt: nowIso(dependencies.now),
|
|
1448
|
+
execution: {
|
|
1449
|
+
...job.execution,
|
|
1450
|
+
lastHeartbeatAt: nowIso(dependencies.now),
|
|
1451
|
+
},
|
|
1452
|
+
checkpoints: {
|
|
1453
|
+
pages: job.checkpoints.pages.map((pageCheckpoint) => pageCheckpoint.pageNumber === page.pageNumber
|
|
1454
|
+
? {
|
|
1455
|
+
...pageCheckpoint,
|
|
1456
|
+
attempts: pageCheckpoint.attempts + 1,
|
|
1457
|
+
updatedAt: nowIso(dependencies.now),
|
|
1458
|
+
error: `retrying after upstream timeout: ${normalizedMessage}`,
|
|
1459
|
+
}
|
|
1460
|
+
: pageCheckpoint),
|
|
1461
|
+
},
|
|
1462
|
+
}));
|
|
1463
|
+
continue;
|
|
1464
|
+
}
|
|
1465
|
+
finalErrorMessage = normalizedMessage;
|
|
1466
|
+
break;
|
|
1467
|
+
}
|
|
1468
|
+
}
|
|
1469
|
+
if (processed) {
|
|
998
1470
|
processedNewPages += 1;
|
|
999
1471
|
state = processed.nextState;
|
|
1000
1472
|
await mutateJob((job) => ({
|
|
@@ -1022,9 +1494,8 @@ export async function runPersistedIngestJobWorker(jobId, dependencies = {}) {
|
|
|
1022
1494
|
},
|
|
1023
1495
|
}));
|
|
1024
1496
|
}
|
|
1025
|
-
|
|
1026
|
-
const
|
|
1027
|
-
const normalizedMessage = normalizeCheckpointErrorMessage(message);
|
|
1497
|
+
else {
|
|
1498
|
+
const normalizedMessage = finalErrorMessage || `Page ${page.pageNumber} failed during async ingest.`;
|
|
1028
1499
|
await mutateJob((job) => ({
|
|
1029
1500
|
...job,
|
|
1030
1501
|
updatedAt: nowIso(dependencies.now),
|
|
@@ -1058,7 +1529,7 @@ export async function runPersistedIngestJobWorker(jobId, dependencies = {}) {
|
|
|
1058
1529
|
...pageCheckpoint,
|
|
1059
1530
|
status: 'failed',
|
|
1060
1531
|
updatedAt: nowIso(dependencies.now),
|
|
1061
|
-
error: `skipped after upstream provider stop. ${
|
|
1532
|
+
error: `skipped after upstream provider stop. ${normalizedMessage}`,
|
|
1062
1533
|
downgraded: false,
|
|
1063
1534
|
}
|
|
1064
1535
|
: pageCheckpoint),
|