geotechcli 0.4.24 → 0.4.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/config.d.ts.map +1 -1
- package/dist/commands/config.js +17 -3
- package/dist/commands/config.js.map +1 -1
- package/dist/commands/ingest.d.ts.map +1 -1
- package/dist/commands/ingest.js +356 -31
- package/dist/commands/ingest.js.map +1 -1
- package/dist/ui/browser.d.ts +5 -0
- package/dist/ui/browser.d.ts.map +1 -0
- package/dist/ui/browser.js +40 -0
- package/dist/ui/browser.js.map +1 -0
- package/dist/ui/plot-viewer.d.ts.map +1 -1
- package/dist/ui/plot-viewer.js +4 -36
- package/dist/ui/plot-viewer.js.map +1 -1
- package/dist/util/flags.d.ts +1 -0
- package/dist/util/flags.d.ts.map +1 -1
- package/dist/util/flags.js +6 -3
- package/dist/util/flags.js.map +1 -1
- package/package.json +2 -2
package/dist/commands/ingest.js
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
import { basename, parse } from 'node:path';
|
|
1
|
+
import { basename, join, parse } from 'node:path';
|
|
2
2
|
import { writeFileSync } from 'node:fs';
|
|
3
3
|
import { Command } from 'commander';
|
|
4
4
|
import chalk from 'chalk';
|
|
5
|
-
import { approvePersistedBoreholeIngestReview, buildIngestDossier, buildLLMConfig, cancelPersistedIngestJob, computeWeightedPdfPageCost, createAndStartPersistedIngestJob, DEFAULT_LLM_VISION_MODEL, ingestBoreholeLogDocument, ingestGeotechDocument, inspectPdfDocument, listPersistedBoreholeIngestReviewApprovals, listPersistedBoreholeIngestReviews, loadLatestPersistedBoreholeIngestReviewApproval, loadLatestPersistedBoreholeIngestReview, loadPersistedIngestJob, loadPersistedIngestJobResult, loadPersistedBoreholeIngestReviewApproval, loadPersistedBoreholeIngestReview, persistBoreholeIngestReview, promotePersistedBoreholeIngestReview, resolvePersistedIngestJobExtractionConcurrency, renderIngestDossierAsHtml, resumePersistedIngestJob, shouldUseAsyncIngestJob, waitForPersistedIngestJob, } from '@geotechcli/core';
|
|
5
|
+
import { approvePersistedBoreholeIngestReview, buildPersistedIngestJobSegments, buildIngestDossier, buildLLMConfig, cancelPersistedIngestJob, computeWeightedPdfPageCost, createAndStartPersistedIngestJob, DEFAULT_LLM_VISION_MODEL, HOSTED_BETA_EFFECTIVE_PAGE_LIMIT, ingestBoreholeLogDocument, ingestGeotechDocument, inspectPdfDocument, listPersistedBoreholeIngestReviewApprovals, listPersistedBoreholeIngestReviews, loadLatestPersistedBoreholeIngestReviewApproval, loadLatestPersistedBoreholeIngestReview, loadPersistedIngestJob, loadPersistedIngestJobResult, loadPersistedBoreholeIngestReviewApproval, loadPersistedBoreholeIngestReview, persistBoreholeIngestReview, promotePersistedBoreholeIngestReview, resolvePersistedIngestJobExtractionConcurrency, renderIngestDossierAsHtml, resumePersistedIngestJob, shouldSegmentHostedBetaLongPdf, shouldUseAsyncIngestJob, slicePdfInspectionToRange, waitForPersistedIngestJob, writePdfPageSubset, } from '@geotechcli/core';
|
|
6
6
|
import { heading, keyValue, renderJSON, renderTable, success, error, info, warn } from '../ui/terminal.js';
|
|
7
7
|
import { addGlobalFlags, getGlobalFlags } from '../util/flags.js';
|
|
8
8
|
import { estimateHostedBetaVisionBodyBytes, formatByteSize, HOSTED_BETA_REQUEST_LIMIT_BYTES, HOSTED_BETA_REQUEST_SAFE_BYTES, countPdfPages, readVisionInput, readVisionPdfPageInputs, } from '../util/vision-output.js';
|
|
9
|
+
import { openFileInBrowser } from '../ui/browser.js';
|
|
9
10
|
function formatMaybe(value, suffix = '') {
|
|
10
11
|
if (value == null || value === '')
|
|
11
12
|
return 'Unavailable';
|
|
@@ -62,8 +63,14 @@ function writeHtmlDossier(result, options) {
|
|
|
62
63
|
});
|
|
63
64
|
const outputPath = options.outputPath ?? defaultDossierOutputPath(options.sourceLabel);
|
|
64
65
|
writeFileSync(outputPath, renderIngestDossierAsHtml(dossier));
|
|
65
|
-
|
|
66
|
-
|
|
66
|
+
const opened = options.open === false ? false : openFileInBrowser(outputPath);
|
|
67
|
+
success(opened
|
|
68
|
+
? `HTML ingest dossier opened in your browser: ${outputPath}`
|
|
69
|
+
: `HTML ingest dossier saved to ${outputPath}`);
|
|
70
|
+
return { outputPath, opened };
|
|
71
|
+
}
|
|
72
|
+
function shouldOpenHtmlDossier(flags) {
|
|
73
|
+
return flags.openInteractivePlot !== false;
|
|
67
74
|
}
|
|
68
75
|
function startProgress(flags, text) {
|
|
69
76
|
if (flags.json || flags.quiet) {
|
|
@@ -179,6 +186,30 @@ function asOptionalPositiveInteger(value) {
|
|
|
179
186
|
function asOptionalTrimmedString(value) {
|
|
180
187
|
return typeof value === 'string' && value.trim() ? value.trim() : undefined;
|
|
181
188
|
}
|
|
189
|
+
function parsePageRange(value) {
|
|
190
|
+
const normalized = typeof value === 'string' ? value.trim() : '';
|
|
191
|
+
if (!normalized) {
|
|
192
|
+
return undefined;
|
|
193
|
+
}
|
|
194
|
+
const match = normalized.match(/^(\d+)\s*:\s*(\d+)$/);
|
|
195
|
+
if (!match) {
|
|
196
|
+
throw new Error(`Invalid --page-range "${String(value)}". Use start:end, for example 61:102.`);
|
|
197
|
+
}
|
|
198
|
+
const startPage = Number.parseInt(match[1], 10);
|
|
199
|
+
const endPage = Number.parseInt(match[2], 10);
|
|
200
|
+
if (!Number.isInteger(startPage) || !Number.isInteger(endPage) || startPage < 1 || endPage < startPage) {
|
|
201
|
+
throw new Error(`Invalid --page-range "${String(value)}". Use start:end with positive page numbers.`);
|
|
202
|
+
}
|
|
203
|
+
return { startPage, endPage };
|
|
204
|
+
}
|
|
205
|
+
function formatPageRange(range) {
|
|
206
|
+
if (!range) {
|
|
207
|
+
return undefined;
|
|
208
|
+
}
|
|
209
|
+
const startPage = Array.isArray(range) ? range[0] : range.startPage;
|
|
210
|
+
const endPage = Array.isArray(range) ? range[1] : range.endPage;
|
|
211
|
+
return `${startPage}-${endPage}`;
|
|
212
|
+
}
|
|
182
213
|
function getCommandOptionValue(commandLike, key) {
|
|
183
214
|
return typeof commandLike?.getOptionValue === 'function'
|
|
184
215
|
? commandLike.getOptionValue(key)
|
|
@@ -200,6 +231,9 @@ function getRawOptionValue(commandLike, key) {
|
|
|
200
231
|
if (typeof current !== 'string') {
|
|
201
232
|
continue;
|
|
202
233
|
}
|
|
234
|
+
if (key === 'open' && current === '--no-open') {
|
|
235
|
+
return false;
|
|
236
|
+
}
|
|
203
237
|
if (current === flag) {
|
|
204
238
|
const next = rawArgs[index + 1];
|
|
205
239
|
return typeof next === 'string' && !next.startsWith('--') ? next : true;
|
|
@@ -571,15 +605,22 @@ function createPersistedReviewApprovalLookupDryRun(projectId, reviewDatasetName,
|
|
|
571
605
|
};
|
|
572
606
|
}
|
|
573
607
|
function resolveCommandOptions(opts, commandLike, extraKeys = []) {
|
|
574
|
-
const
|
|
575
|
-
|
|
608
|
+
const commandSource = commandLike ?? opts;
|
|
609
|
+
const rawOpts = isRecord(opts) && typeof opts.optsWithGlobals !== 'function'
|
|
610
|
+
? opts
|
|
611
|
+
: {};
|
|
612
|
+
const resolvedOpts = typeof commandSource?.optsWithGlobals === 'function'
|
|
613
|
+
? commandSource.optsWithGlobals()
|
|
576
614
|
: undefined;
|
|
577
615
|
const resolved = {
|
|
578
|
-
...
|
|
616
|
+
...rawOpts,
|
|
579
617
|
...(isRecord(resolvedOpts) ? resolvedOpts : {}),
|
|
580
618
|
};
|
|
581
|
-
for (const key of ['json', 'quiet', 'dryRun', 'output', ...extraKeys]) {
|
|
582
|
-
|
|
619
|
+
for (const key of ['json', 'quiet', 'dryRun', 'output', 'open', 'noOpen', ...extraKeys]) {
|
|
620
|
+
if (resolved[key] !== undefined) {
|
|
621
|
+
continue;
|
|
622
|
+
}
|
|
623
|
+
const value = getRawOptionValue(commandSource, key) ?? getCommandOptionValue(rawOpts, key) ?? getCommandOptionValue(commandSource, key);
|
|
583
624
|
if (value !== undefined) {
|
|
584
625
|
resolved[key] = value;
|
|
585
626
|
}
|
|
@@ -838,6 +879,43 @@ function normalizeIngestJobRecord(value) {
|
|
|
838
879
|
: undefined,
|
|
839
880
|
}
|
|
840
881
|
: undefined;
|
|
882
|
+
const segmentation = isRecord(value.segmentation)
|
|
883
|
+
? {
|
|
884
|
+
mode: asOptionalTrimmedString(value.segmentation.mode) ?? 'single',
|
|
885
|
+
pageRange: Array.isArray(value.segmentation.pageRange)
|
|
886
|
+
&& value.segmentation.pageRange.length === 2
|
|
887
|
+
&& typeof value.segmentation.pageRange[0] === 'number'
|
|
888
|
+
&& typeof value.segmentation.pageRange[1] === 'number'
|
|
889
|
+
? [value.segmentation.pageRange[0], value.segmentation.pageRange[1]]
|
|
890
|
+
: undefined,
|
|
891
|
+
effectivePageLimit: asOptionalPositiveInteger(value.segmentation.effectivePageLimit),
|
|
892
|
+
segmentCount: asOptionalPositiveInteger(value.segmentation.segmentCount),
|
|
893
|
+
segments: Array.isArray(value.segmentation.segments)
|
|
894
|
+
? value.segmentation.segments.flatMap((segment) => {
|
|
895
|
+
if (!isRecord(segment)) {
|
|
896
|
+
return [];
|
|
897
|
+
}
|
|
898
|
+
const segmentIndex = asOptionalPositiveInteger(segment.segmentIndex);
|
|
899
|
+
const startPage = asOptionalPositiveInteger(segment.startPage);
|
|
900
|
+
const endPage = asOptionalPositiveInteger(segment.endPage);
|
|
901
|
+
if (segmentIndex == null || startPage == null || endPage == null) {
|
|
902
|
+
return [];
|
|
903
|
+
}
|
|
904
|
+
return [{
|
|
905
|
+
segmentIndex,
|
|
906
|
+
startPage,
|
|
907
|
+
endPage,
|
|
908
|
+
status: asOptionalTrimmedString(segment.status),
|
|
909
|
+
childJobId: asOptionalTrimmedString(segment.childJobId),
|
|
910
|
+
completedPages: asOptionalPositiveInteger(segment.completedPages),
|
|
911
|
+
failedPages: typeof segment.failedPages === 'number' && Number.isFinite(segment.failedPages)
|
|
912
|
+
? segment.failedPages
|
|
913
|
+
: undefined,
|
|
914
|
+
}];
|
|
915
|
+
})
|
|
916
|
+
: [],
|
|
917
|
+
}
|
|
918
|
+
: undefined;
|
|
841
919
|
return {
|
|
842
920
|
jobId,
|
|
843
921
|
documentType,
|
|
@@ -854,6 +932,12 @@ function normalizeIngestJobRecord(value) {
|
|
|
854
932
|
weightedPageCost: typeof source.weightedPageCost === 'number' && Number.isFinite(source.weightedPageCost)
|
|
855
933
|
? source.weightedPageCost
|
|
856
934
|
: 0,
|
|
935
|
+
pageRange: Array.isArray(source.pageRange)
|
|
936
|
+
&& source.pageRange.length === 2
|
|
937
|
+
&& typeof source.pageRange[0] === 'number'
|
|
938
|
+
&& typeof source.pageRange[1] === 'number'
|
|
939
|
+
? [source.pageRange[0], source.pageRange[1]]
|
|
940
|
+
: undefined,
|
|
857
941
|
},
|
|
858
942
|
processing: {
|
|
859
943
|
pagePreprocessingConcurrency: asOptionalPositiveInteger(processing.pagePreprocessingConcurrency) ?? 0,
|
|
@@ -870,6 +954,7 @@ function normalizeIngestJobRecord(value) {
|
|
|
870
954
|
lastError: asOptionalTrimmedString(execution.lastError),
|
|
871
955
|
cancelRequested: execution.cancelRequested === true,
|
|
872
956
|
},
|
|
957
|
+
segmentation,
|
|
873
958
|
pageCounts,
|
|
874
959
|
pages,
|
|
875
960
|
result,
|
|
@@ -883,6 +968,18 @@ function renderIngestJobRecord(job, options) {
|
|
|
883
968
|
keyValue('Source', job.source.fileName ?? job.source.filePath ?? 'Unknown');
|
|
884
969
|
keyValue('Pages', String(job.source.totalPages));
|
|
885
970
|
keyValue('Weighted page cost', String(job.source.weightedPageCost));
|
|
971
|
+
if (job.source.pageRange) {
|
|
972
|
+
keyValue('Selected range', formatPageRange(job.source.pageRange) ?? 'Unavailable');
|
|
973
|
+
}
|
|
974
|
+
if (job.segmentation && job.segmentation.mode !== 'single') {
|
|
975
|
+
keyValue('Segmentation mode', job.segmentation.mode);
|
|
976
|
+
if (job.segmentation.segmentCount) {
|
|
977
|
+
keyValue('Segments', String(job.segmentation.segmentCount));
|
|
978
|
+
}
|
|
979
|
+
if (job.segmentation.effectivePageLimit) {
|
|
980
|
+
keyValue('Effective-page window', String(job.segmentation.effectivePageLimit));
|
|
981
|
+
}
|
|
982
|
+
}
|
|
886
983
|
keyValue('Completed pages', String(job.pageCounts.completed));
|
|
887
984
|
keyValue('Failed pages', String(job.pageCounts.failed));
|
|
888
985
|
keyValue('Pending pages', String(job.pageCounts.pending));
|
|
@@ -922,6 +1019,17 @@ function renderIngestJobRecord(job, options) {
|
|
|
922
1019
|
if (job.result?.persistedReview?.datasetName) {
|
|
923
1020
|
keyValue('Stored review', job.result.persistedReview.datasetName);
|
|
924
1021
|
}
|
|
1022
|
+
if (job.segmentation?.segments.length) {
|
|
1023
|
+
console.log('');
|
|
1024
|
+
console.log(chalk.white(' Segments:'));
|
|
1025
|
+
renderTable(['Segment', 'Pages', 'Status', 'Completed', 'Failed'], job.segmentation.segments.map((segment) => [
|
|
1026
|
+
String(segment.segmentIndex),
|
|
1027
|
+
`${segment.startPage}-${segment.endPage}`,
|
|
1028
|
+
segment.status ?? 'queued',
|
|
1029
|
+
segment.completedPages != null ? String(segment.completedPages) : '-',
|
|
1030
|
+
segment.failedPages != null ? String(segment.failedPages) : '-',
|
|
1031
|
+
]));
|
|
1032
|
+
}
|
|
925
1033
|
const downgradedPages = job.pages.filter((page) => page.downgraded);
|
|
926
1034
|
const failedPages = job.pages.filter((page) => page.status === 'failed');
|
|
927
1035
|
if (failedPages.length > 0) {
|
|
@@ -983,6 +1091,81 @@ function renderIngestJobResult(job) {
|
|
|
983
1091
|
persistedReview,
|
|
984
1092
|
});
|
|
985
1093
|
}
|
|
1094
|
+
function renderCompactIngestResultSummary(result, options = {}) {
|
|
1095
|
+
heading(options.title ?? 'Geotechnical Ingest Result');
|
|
1096
|
+
keyValue('Document type', result.documentType);
|
|
1097
|
+
keyValue('Source', options.sourceLabel ?? result.source.fileName ?? result.source.filePath ?? 'Unknown');
|
|
1098
|
+
keyValue('Pages processed', `${result.source.successfulPages}/${result.source.totalPages}`);
|
|
1099
|
+
if (result.documentType === 'geotech-document') {
|
|
1100
|
+
keyValue('Materials', String(result.materials.length));
|
|
1101
|
+
keyValue('Parameters', String(result.parameters.length));
|
|
1102
|
+
}
|
|
1103
|
+
else {
|
|
1104
|
+
keyValue('Boreholes extracted', String(result.boreholes.length));
|
|
1105
|
+
}
|
|
1106
|
+
keyValue('Confidence', `${result.confidence}%`);
|
|
1107
|
+
keyValue('Review required', result.reviewRequired ? 'Yes' : 'No');
|
|
1108
|
+
keyValue('Auto proceed', result.canAutoProceed ? 'Yes' : 'No');
|
|
1109
|
+
if (options.persistedReview) {
|
|
1110
|
+
keyValue('Stored review', options.persistedReview.datasetName);
|
|
1111
|
+
}
|
|
1112
|
+
if (options.htmlDossier) {
|
|
1113
|
+
keyValue('HTML dossier', options.htmlDossier.outputPath);
|
|
1114
|
+
keyValue('Opened', options.htmlDossier.opened ? 'Yes' : 'No');
|
|
1115
|
+
}
|
|
1116
|
+
console.log('');
|
|
1117
|
+
}
|
|
1118
|
+
function formatIngestJobProgress(job) {
|
|
1119
|
+
const knownPageCount = job.pageCounts.completed + job.pageCounts.failed + job.pageCounts.pending;
|
|
1120
|
+
const totalPages = Math.max(job.source.totalPages, knownPageCount, 1);
|
|
1121
|
+
const resolvedPages = Math.min(totalPages, job.pageCounts.completed + job.pageCounts.failed);
|
|
1122
|
+
const pendingPages = Math.max(job.pageCounts.pending, totalPages - resolvedPages);
|
|
1123
|
+
const failedText = job.pageCounts.failed > 0 ? `, ${job.pageCounts.failed} failed` : '';
|
|
1124
|
+
const segments = job.segmentation?.segments ?? [];
|
|
1125
|
+
const segmentText = segments.length > 0
|
|
1126
|
+
? `, ${segments.filter((segment) => segment.status === 'completed' || segment.status === 'failed').length}/${segments.length} segments resolved`
|
|
1127
|
+
: '';
|
|
1128
|
+
return `Ingest progress: ${resolvedPages}/${totalPages} pages resolved (${job.pageCounts.completed} completed${failedText}, ${pendingPages} pending${segmentText}) - ${job.status}`;
|
|
1129
|
+
}
|
|
1130
|
+
function isWaitTimeoutError(err, jobId) {
|
|
1131
|
+
return err instanceof Error
|
|
1132
|
+
&& err.message.includes(`Timed out while waiting for persisted ingest job "${jobId}"`);
|
|
1133
|
+
}
|
|
1134
|
+
async function waitForPersistedIngestJobWithLiveProgress(jobId, flags) {
|
|
1135
|
+
if (flags.json || flags.quiet) {
|
|
1136
|
+
return waitForPersistedIngestJob(jobId);
|
|
1137
|
+
}
|
|
1138
|
+
info(`Waiting for ingest job ${jobId} to finish...`);
|
|
1139
|
+
let lastProgress = '';
|
|
1140
|
+
while (true) {
|
|
1141
|
+
try {
|
|
1142
|
+
const record = await waitForPersistedIngestJob(jobId, { pollMs: 250, timeoutMs: 1000 });
|
|
1143
|
+
const normalized = normalizeIngestJobRecord(record);
|
|
1144
|
+
if (normalized) {
|
|
1145
|
+
const progress = formatIngestJobProgress(normalized);
|
|
1146
|
+
if (progress !== lastProgress) {
|
|
1147
|
+
info(progress);
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
return record;
|
|
1151
|
+
}
|
|
1152
|
+
catch (err) {
|
|
1153
|
+
if (!isWaitTimeoutError(err, jobId)) {
|
|
1154
|
+
throw err;
|
|
1155
|
+
}
|
|
1156
|
+
const record = loadPersistedIngestJob(jobId);
|
|
1157
|
+
const normalized = normalizeIngestJobRecord(record);
|
|
1158
|
+
if (!normalized) {
|
|
1159
|
+
continue;
|
|
1160
|
+
}
|
|
1161
|
+
const progress = formatIngestJobProgress(normalized);
|
|
1162
|
+
if (progress !== lastProgress) {
|
|
1163
|
+
info(progress);
|
|
1164
|
+
lastProgress = progress;
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
}
|
|
1168
|
+
}
|
|
986
1169
|
export function registerIngestCommand(program) {
|
|
987
1170
|
const cmd = new Command('ingest')
|
|
988
1171
|
.description('Extract structured geotechnical data from image/PDF documents')
|
|
@@ -990,13 +1173,16 @@ export function registerIngestCommand(program) {
|
|
|
990
1173
|
.argument('<file>', 'Path to a geotechnical image or PDF document')
|
|
991
1174
|
.option('--type <type>', 'Document type to ingest', 'borehole-log')
|
|
992
1175
|
.option('--format <format>', 'Result presentation format: plain or html', 'plain')
|
|
1176
|
+
.option('--page-range <start:end>', 'Restrict PDF ingest to a contiguous page range, for example 61:102')
|
|
993
1177
|
.option('--borehole-id <id>', 'Override borehole ID for a single continuous borehole log')
|
|
994
1178
|
.option('--project <id>', 'Persist the ingest review into a stored project')
|
|
1179
|
+
.option('--background', 'Create a resumable ingest job and return immediately')
|
|
995
1180
|
.action(async (filePath, opts) => {
|
|
996
1181
|
const flags = getGlobalFlags(opts);
|
|
997
1182
|
const outputFormat = resolveIngestPresentationFormat(opts.format);
|
|
998
1183
|
assertIngestPresentationMode(flags, outputFormat);
|
|
999
1184
|
const wantsHtmlDossier = shouldRenderHtmlDossier(outputFormat, flags.output);
|
|
1185
|
+
const runJobInBackground = Boolean(opts.background) || flags.json || flags.quiet;
|
|
1000
1186
|
const documentType = String(opts.type ?? 'borehole-log').toLowerCase();
|
|
1001
1187
|
const supportedTypes = new Set(['borehole-log', 'geotech-document']);
|
|
1002
1188
|
if (!supportedTypes.has(documentType)) {
|
|
@@ -1006,6 +1192,13 @@ export function registerIngestCommand(program) {
|
|
|
1006
1192
|
try {
|
|
1007
1193
|
const file = readVisionInput(filePath);
|
|
1008
1194
|
describeVisionInput(file, flags);
|
|
1195
|
+
const config = buildLLMConfig();
|
|
1196
|
+
const selectedPageRange = file.kind === 'pdf'
|
|
1197
|
+
? parsePageRange(opts.pageRange)
|
|
1198
|
+
: undefined;
|
|
1199
|
+
if (selectedPageRange && file.kind !== 'pdf') {
|
|
1200
|
+
throw new Error('--page-range is only supported for PDF ingest.');
|
|
1201
|
+
}
|
|
1009
1202
|
let countedPdfPages = null;
|
|
1010
1203
|
if (file.kind === 'pdf') {
|
|
1011
1204
|
try {
|
|
@@ -1015,21 +1208,63 @@ export function registerIngestCommand(program) {
|
|
|
1015
1208
|
countedPdfPages = null;
|
|
1016
1209
|
}
|
|
1017
1210
|
}
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
const
|
|
1211
|
+
if (selectedPageRange && countedPdfPages != null && selectedPageRange.endPage > countedPdfPages) {
|
|
1212
|
+
throw new Error(`--page-range ${selectedPageRange.startPage}:${selectedPageRange.endPage} exceeds the PDF page count (${countedPdfPages}).`);
|
|
1213
|
+
}
|
|
1214
|
+
const shouldInspectPdf = file.kind === 'pdf'
|
|
1215
|
+
&& (selectedPageRange != null
|
|
1216
|
+
|| countedPdfPages == null
|
|
1217
|
+
|| countedPdfPages <= 5
|
|
1218
|
+
|| documentType === 'geotech-document');
|
|
1219
|
+
const inspection = file.kind === 'pdf' && shouldInspectPdf
|
|
1022
1220
|
? inspectPdfDocument(filePath)
|
|
1023
1221
|
: null;
|
|
1024
|
-
const
|
|
1025
|
-
|
|
1222
|
+
const fullInspection = inspection && inspection.totalPages > 0 ? inspection : null;
|
|
1223
|
+
if (file.kind === 'pdf' && countedPdfPages == null && !fullInspection && !selectedPageRange) {
|
|
1224
|
+
throw new Error('Could not determine the PDF page count. The file may be encrypted, damaged, or use an unsupported PDF structure.');
|
|
1225
|
+
}
|
|
1226
|
+
if (selectedPageRange && fullInspection && selectedPageRange.endPage > fullInspection.totalPages) {
|
|
1227
|
+
throw new Error(`--page-range ${selectedPageRange.startPage}:${selectedPageRange.endPage} exceeds the PDF page count (${fullInspection.totalPages}).`);
|
|
1228
|
+
}
|
|
1229
|
+
const effectiveInspection = fullInspection && selectedPageRange
|
|
1230
|
+
? slicePdfInspectionToRange(fullInspection, selectedPageRange)
|
|
1231
|
+
: fullInspection;
|
|
1232
|
+
const totalPages = effectiveInspection?.totalPages
|
|
1233
|
+
?? countedPdfPages
|
|
1234
|
+
?? (selectedPageRange ? (selectedPageRange.endPage - selectedPageRange.startPage + 1) : null)
|
|
1235
|
+
?? fullInspection?.totalPages
|
|
1026
1236
|
?? effectiveInspection?.totalPages
|
|
1027
1237
|
?? 1;
|
|
1028
1238
|
const weightedPageCost = file.kind === 'pdf'
|
|
1029
1239
|
? (effectiveInspection ? computeWeightedPdfPageCost(effectiveInspection) : totalPages)
|
|
1030
1240
|
: 1;
|
|
1241
|
+
const shouldCreateSegmentedParent = file.kind === 'pdf'
|
|
1242
|
+
&& shouldSegmentHostedBetaLongPdf(documentType, config, fullInspection, selectedPageRange);
|
|
1243
|
+
const segmentationSummary = shouldCreateSegmentedParent && fullInspection
|
|
1244
|
+
? {
|
|
1245
|
+
mode: 'segmented-parent',
|
|
1246
|
+
pageRange: [
|
|
1247
|
+
selectedPageRange?.startPage ?? 1,
|
|
1248
|
+
selectedPageRange?.endPage ?? fullInspection.totalPages,
|
|
1249
|
+
],
|
|
1250
|
+
effectivePageLimit: HOSTED_BETA_EFFECTIVE_PAGE_LIMIT,
|
|
1251
|
+
segmentCount: buildPersistedIngestJobSegments(fullInspection, {
|
|
1252
|
+
pageRange: selectedPageRange,
|
|
1253
|
+
effectivePageLimit: HOSTED_BETA_EFFECTIVE_PAGE_LIMIT,
|
|
1254
|
+
}).length,
|
|
1255
|
+
segments: buildPersistedIngestJobSegments(fullInspection, {
|
|
1256
|
+
pageRange: selectedPageRange,
|
|
1257
|
+
effectivePageLimit: HOSTED_BETA_EFFECTIVE_PAGE_LIMIT,
|
|
1258
|
+
}).map((segment, index, segments) => ({
|
|
1259
|
+
...segment,
|
|
1260
|
+
segmentIndex: index + 1,
|
|
1261
|
+
segmentCount: segments.length,
|
|
1262
|
+
status: 'queued',
|
|
1263
|
+
})),
|
|
1264
|
+
}
|
|
1265
|
+
: undefined;
|
|
1031
1266
|
const shouldRunAsJob = file.kind === 'pdf'
|
|
1032
|
-
&& (
|
|
1267
|
+
&& (shouldCreateSegmentedParent || shouldUseAsyncIngestJob(effectiveInspection, totalPages));
|
|
1033
1268
|
if (flags.dryRun) {
|
|
1034
1269
|
if (shouldRunAsJob) {
|
|
1035
1270
|
const dryRun = {
|
|
@@ -1044,7 +1279,9 @@ export function registerIngestCommand(program) {
|
|
|
1044
1279
|
weightedPageCost,
|
|
1045
1280
|
wouldCreateBackgroundJob: true,
|
|
1046
1281
|
pagePreprocessingConcurrency: 2,
|
|
1047
|
-
chunkExtractionConcurrency: resolvePersistedIngestJobExtractionConcurrency(
|
|
1282
|
+
chunkExtractionConcurrency: resolvePersistedIngestJobExtractionConcurrency(config, effectiveInspection, segmentationSummary),
|
|
1283
|
+
pageRange: selectedPageRange ? [selectedPageRange.startPage, selectedPageRange.endPage] : undefined,
|
|
1284
|
+
segmentation: segmentationSummary,
|
|
1048
1285
|
pageClassifications: effectiveInspection?.pages.map((page) => ({
|
|
1049
1286
|
pageNumber: page.pageNumber,
|
|
1050
1287
|
classification: page.classification,
|
|
@@ -1062,6 +1299,12 @@ export function registerIngestCommand(program) {
|
|
|
1062
1299
|
keyValue('Pages', String(totalPages));
|
|
1063
1300
|
keyValue('Weighted page cost', String(weightedPageCost));
|
|
1064
1301
|
keyValue('Would create background job', 'Yes');
|
|
1302
|
+
if (selectedPageRange) {
|
|
1303
|
+
keyValue('Selected range', formatPageRange(selectedPageRange) ?? 'Unavailable');
|
|
1304
|
+
}
|
|
1305
|
+
if (segmentationSummary?.segments?.length) {
|
|
1306
|
+
keyValue('Segmentation', `Hosted-beta best-result window is ${HOSTED_BETA_EFFECTIVE_PAGE_LIMIT} effective pages; processing as ${segmentationSummary.segments.map((segment) => `${segment.startPage}-${segment.endPage}`).join(', ')}`);
|
|
1307
|
+
}
|
|
1065
1308
|
if (opts.project) {
|
|
1066
1309
|
keyValue('Project', String(opts.project));
|
|
1067
1310
|
}
|
|
@@ -1081,6 +1324,7 @@ export function registerIngestCommand(program) {
|
|
|
1081
1324
|
wouldUseHostedVision: true,
|
|
1082
1325
|
projectId: opts.project,
|
|
1083
1326
|
totalPages,
|
|
1327
|
+
pageRange: selectedPageRange ? [selectedPageRange.startPage, selectedPageRange.endPage] : undefined,
|
|
1084
1328
|
pageClassifications: effectiveInspection?.pages.map((page) => ({
|
|
1085
1329
|
pageNumber: page.pageNumber,
|
|
1086
1330
|
classification: page.classification,
|
|
@@ -1099,6 +1343,9 @@ export function registerIngestCommand(program) {
|
|
|
1099
1343
|
keyValue('Source', filePath);
|
|
1100
1344
|
keyValue('Input kind', file.kind === 'pdf' ? 'pdf' : 'image');
|
|
1101
1345
|
keyValue('Pages', String(dryRun.totalPages));
|
|
1346
|
+
if (selectedPageRange) {
|
|
1347
|
+
keyValue('Selected range', formatPageRange(selectedPageRange) ?? 'Unavailable');
|
|
1348
|
+
}
|
|
1102
1349
|
if (opts.project) {
|
|
1103
1350
|
keyValue('Project', String(opts.project));
|
|
1104
1351
|
}
|
|
@@ -1115,18 +1362,20 @@ export function registerIngestCommand(program) {
|
|
|
1115
1362
|
return;
|
|
1116
1363
|
}
|
|
1117
1364
|
if (shouldRunAsJob) {
|
|
1118
|
-
if (
|
|
1365
|
+
if (runJobInBackground && wantsHtmlDossier) {
|
|
1119
1366
|
throw new Error('HTML ingest dossiers are generated from completed results. Start the job first, then run geotech ingest wait <jobId> --format html --output <file>.');
|
|
1120
1367
|
}
|
|
1121
1368
|
spinner = startProgress(flags, 'Creating resumable ingest job...');
|
|
1122
|
-
const config = buildLLMConfig();
|
|
1123
1369
|
const job = createAndStartPersistedIngestJob({
|
|
1124
1370
|
documentType: documentType,
|
|
1125
1371
|
filePath,
|
|
1126
1372
|
inspection: effectiveInspection,
|
|
1373
|
+
totalPagesFallback: totalPages,
|
|
1127
1374
|
config,
|
|
1128
1375
|
projectId: opts.project,
|
|
1129
1376
|
overrideBoreholeId: opts.boreholeId,
|
|
1377
|
+
pageRange: selectedPageRange ? [selectedPageRange.startPage, selectedPageRange.endPage] : undefined,
|
|
1378
|
+
segmentation: segmentationSummary,
|
|
1130
1379
|
});
|
|
1131
1380
|
const normalizedJob = normalizeIngestJobRecord(job);
|
|
1132
1381
|
spinner?.succeed(`Ingest job started: ${job.jobId}`);
|
|
@@ -1134,20 +1383,65 @@ export function registerIngestCommand(program) {
|
|
|
1134
1383
|
renderJSON(job);
|
|
1135
1384
|
return;
|
|
1136
1385
|
}
|
|
1137
|
-
if (normalizedJob) {
|
|
1386
|
+
if (runJobInBackground && normalizedJob && !flags.quiet) {
|
|
1138
1387
|
renderIngestJobRecord(normalizedJob, {
|
|
1139
1388
|
title: 'Geotechnical Ingest Job Started',
|
|
1140
1389
|
includeCommands: true,
|
|
1141
1390
|
});
|
|
1142
1391
|
}
|
|
1143
|
-
if (flags.
|
|
1392
|
+
if (runJobInBackground && segmentationSummary?.segments?.length && !flags.quiet) {
|
|
1393
|
+
info(`Hosted-beta best-result window is ${HOSTED_BETA_EFFECTIVE_PAGE_LIMIT} effective pages; processing as linked segments ${segmentationSummary.segments.map((segment) => `${segment.startPage}-${segment.endPage}`).join(' and ')}.`);
|
|
1394
|
+
}
|
|
1395
|
+
if (runJobInBackground && flags.output) {
|
|
1144
1396
|
writeFileSync(flags.output, JSON.stringify(job, null, 2));
|
|
1145
|
-
|
|
1397
|
+
if (!flags.quiet) {
|
|
1398
|
+
success(`Job details saved to ${flags.output}`);
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
if (runJobInBackground) {
|
|
1402
|
+
return;
|
|
1403
|
+
}
|
|
1404
|
+
const waitedRecord = await waitForPersistedIngestJobWithLiveProgress(job.jobId, flags);
|
|
1405
|
+
const completedJob = normalizeIngestJobRecord(waitedRecord);
|
|
1406
|
+
if (!completedJob) {
|
|
1407
|
+
throw new Error(`Persisted ingest job "${job.jobId}" could not be normalized.`);
|
|
1408
|
+
}
|
|
1409
|
+
if (completedJob.status !== 'completed' || !completedJob.result?.ingestResult) {
|
|
1410
|
+
renderIngestJobRecord(completedJob, { title: 'Geotechnical Ingest Job Status', includeCommands: true });
|
|
1411
|
+
throw new Error(`Persisted ingest job "${job.jobId}" finished with status "${completedJob.status}".`);
|
|
1412
|
+
}
|
|
1413
|
+
const completedResult = completedJob.result.ingestResult;
|
|
1414
|
+
const persistedReview = completedJob.result.persistedReview
|
|
1415
|
+
? {
|
|
1416
|
+
projectId: completedJob.request.projectId ?? 'Unknown',
|
|
1417
|
+
datasetName: completedJob.result.persistedReview.datasetName,
|
|
1418
|
+
reviewId: completedJob.result.persistedReview.reviewId,
|
|
1419
|
+
createdAt: completedJob.result.persistedReview.createdAt,
|
|
1420
|
+
}
|
|
1421
|
+
: null;
|
|
1422
|
+
if (wantsHtmlDossier) {
|
|
1423
|
+
const htmlDossier = writeHtmlDossier(completedResult, {
|
|
1424
|
+
outputPath: flags.output,
|
|
1425
|
+
open: shouldOpenHtmlDossier(flags),
|
|
1426
|
+
sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? completedJob.jobId,
|
|
1427
|
+
storedReview: persistedReview,
|
|
1428
|
+
});
|
|
1429
|
+
renderCompactIngestResultSummary(completedResult, {
|
|
1430
|
+
sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? completedJob.jobId,
|
|
1431
|
+
persistedReview,
|
|
1432
|
+
htmlDossier,
|
|
1433
|
+
});
|
|
1434
|
+
}
|
|
1435
|
+
else {
|
|
1436
|
+
renderIngestJobResult(completedJob);
|
|
1437
|
+
if (flags.output) {
|
|
1438
|
+
writeFileSync(flags.output, JSON.stringify(completedJob.result, null, 2));
|
|
1439
|
+
success(`Results saved to ${flags.output}`);
|
|
1440
|
+
}
|
|
1146
1441
|
}
|
|
1147
1442
|
return;
|
|
1148
1443
|
}
|
|
1149
1444
|
spinner = startProgress(flags, 'Running geotechnical ingest...');
|
|
1150
|
-
const config = buildLLMConfig();
|
|
1151
1445
|
const requestDetails = {
|
|
1152
1446
|
prompt: documentType === 'borehole-log'
|
|
1153
1447
|
? 'Extract structured borehole log data.'
|
|
@@ -1158,7 +1452,16 @@ export function registerIngestCommand(program) {
|
|
|
1158
1452
|
};
|
|
1159
1453
|
const result = file.kind === 'pdf'
|
|
1160
1454
|
? await (async () => {
|
|
1161
|
-
const
|
|
1455
|
+
const scopedInspection = selectedPageRange && fullInspection
|
|
1456
|
+
? slicePdfInspectionToRange(fullInspection, selectedPageRange, { rebasePageNumbers: true })
|
|
1457
|
+
: effectiveInspection;
|
|
1458
|
+
const pdfInputPath = selectedPageRange
|
|
1459
|
+
? join(process.cwd(), 'tmp', 'cli-page-ranges', `${slugifyOutputStem(basename(filePath))}-pages-${selectedPageRange.startPage}-${selectedPageRange.endPage}.pdf`)
|
|
1460
|
+
: filePath;
|
|
1461
|
+
if (selectedPageRange) {
|
|
1462
|
+
await writePdfPageSubset(filePath, selectedPageRange, pdfInputPath);
|
|
1463
|
+
}
|
|
1464
|
+
const pageInputs = await readVisionPdfPageInputs(pdfInputPath, { inspection: scopedInspection });
|
|
1162
1465
|
for (const pageInput of pageInputs) {
|
|
1163
1466
|
maybeCheckHostedBetaVisionPayload(config, pageInput, requestDetails);
|
|
1164
1467
|
}
|
|
@@ -1169,9 +1472,10 @@ export function registerIngestCommand(program) {
|
|
|
1169
1472
|
filePath,
|
|
1170
1473
|
fileName: basename(filePath),
|
|
1171
1474
|
inputKind: 'pdf',
|
|
1475
|
+
pageRange: selectedPageRange ? [selectedPageRange.startPage, selectedPageRange.endPage] : undefined,
|
|
1172
1476
|
},
|
|
1173
1477
|
overrideBoreholeId: opts.boreholeId,
|
|
1174
|
-
inspection:
|
|
1478
|
+
inspection: scopedInspection,
|
|
1175
1479
|
pages: pageInputs,
|
|
1176
1480
|
})
|
|
1177
1481
|
: ingestGeotechDocument({
|
|
@@ -1180,8 +1484,9 @@ export function registerIngestCommand(program) {
|
|
|
1180
1484
|
filePath,
|
|
1181
1485
|
fileName: basename(filePath),
|
|
1182
1486
|
inputKind: 'pdf',
|
|
1487
|
+
pageRange: selectedPageRange ? [selectedPageRange.startPage, selectedPageRange.endPage] : undefined,
|
|
1183
1488
|
},
|
|
1184
|
-
inspection:
|
|
1489
|
+
inspection: scopedInspection,
|
|
1185
1490
|
pages: pageInputs,
|
|
1186
1491
|
});
|
|
1187
1492
|
})()
|
|
@@ -1247,6 +1552,7 @@ export function registerIngestCommand(program) {
|
|
|
1247
1552
|
if (wantsHtmlDossier) {
|
|
1248
1553
|
writeHtmlDossier(result, {
|
|
1249
1554
|
outputPath: flags.output,
|
|
1555
|
+
open: shouldOpenHtmlDossier(flags),
|
|
1250
1556
|
sourceLabel: result.source.fileName ?? result.source.filePath ?? filePath,
|
|
1251
1557
|
storedReview: persistedReviewDetails,
|
|
1252
1558
|
});
|
|
@@ -1333,6 +1639,7 @@ export function registerIngestCommand(program) {
|
|
|
1333
1639
|
const dossierDetails = buildPersistedReviewDossierDetails(record, resolvedProjectId);
|
|
1334
1640
|
writeHtmlDossier(record.result, {
|
|
1335
1641
|
outputPath: flags.output,
|
|
1642
|
+
open: shouldOpenHtmlDossier(flags),
|
|
1336
1643
|
sourceLabel: dossierDetails.sourceLabel,
|
|
1337
1644
|
storedReview: dossierDetails.storedReview,
|
|
1338
1645
|
approval: dossierDetails.approval,
|
|
@@ -1564,7 +1871,7 @@ export function registerIngestCommand(program) {
|
|
|
1564
1871
|
const outputFormat = resolveIngestPresentationFormat(resolvedOpts.format);
|
|
1565
1872
|
assertIngestPresentationMode(flags, outputFormat);
|
|
1566
1873
|
const wantsHtmlDossier = shouldRenderHtmlDossier(outputFormat, flags.output);
|
|
1567
|
-
const record = await
|
|
1874
|
+
const record = await waitForPersistedIngestJobWithLiveProgress(String(jobId), flags);
|
|
1568
1875
|
const normalized = normalizeIngestJobRecord(record);
|
|
1569
1876
|
if (!normalized) {
|
|
1570
1877
|
throw new Error(`Persisted ingest job "${jobId}" could not be normalized.`);
|
|
@@ -1590,18 +1897,27 @@ export function registerIngestCommand(program) {
|
|
|
1590
1897
|
createdAt: normalized.result.persistedReview.createdAt,
|
|
1591
1898
|
}
|
|
1592
1899
|
: null;
|
|
1593
|
-
renderIngestJobResult(normalized);
|
|
1594
1900
|
if (wantsHtmlDossier) {
|
|
1595
|
-
writeHtmlDossier(completedResult, {
|
|
1901
|
+
const htmlDossier = writeHtmlDossier(completedResult, {
|
|
1596
1902
|
outputPath: flags.output,
|
|
1903
|
+
open: shouldOpenHtmlDossier(flags),
|
|
1597
1904
|
sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? normalized.jobId,
|
|
1598
1905
|
storedReview: persistedReview,
|
|
1599
1906
|
});
|
|
1907
|
+
renderCompactIngestResultSummary(completedResult, {
|
|
1908
|
+
sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? normalized.jobId,
|
|
1909
|
+
persistedReview,
|
|
1910
|
+
htmlDossier,
|
|
1911
|
+
});
|
|
1600
1912
|
}
|
|
1601
1913
|
else if (flags.output) {
|
|
1914
|
+
renderIngestJobResult(normalized);
|
|
1602
1915
|
writeFileSync(flags.output, JSON.stringify(record.result, null, 2));
|
|
1603
1916
|
success(`Results saved to ${flags.output}`);
|
|
1604
1917
|
}
|
|
1918
|
+
else {
|
|
1919
|
+
renderIngestJobResult(normalized);
|
|
1920
|
+
}
|
|
1605
1921
|
});
|
|
1606
1922
|
const resumeCmd = new Command('resume')
|
|
1607
1923
|
.description('Resume a persisted geotechnical ingest job from completed checkpoints')
|
|
@@ -1661,18 +1977,27 @@ export function registerIngestCommand(program) {
|
|
|
1661
1977
|
createdAt: normalized.result.persistedReview.createdAt,
|
|
1662
1978
|
}
|
|
1663
1979
|
: null;
|
|
1664
|
-
renderIngestJobResult(normalized);
|
|
1665
1980
|
if (wantsHtmlDossier) {
|
|
1666
|
-
writeHtmlDossier(completedResult, {
|
|
1981
|
+
const htmlDossier = writeHtmlDossier(completedResult, {
|
|
1667
1982
|
outputPath: flags.output,
|
|
1983
|
+
open: shouldOpenHtmlDossier(flags),
|
|
1668
1984
|
sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? normalized.jobId,
|
|
1669
1985
|
storedReview: persistedReview,
|
|
1670
1986
|
});
|
|
1987
|
+
renderCompactIngestResultSummary(completedResult, {
|
|
1988
|
+
sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? normalized.jobId,
|
|
1989
|
+
persistedReview,
|
|
1990
|
+
htmlDossier,
|
|
1991
|
+
});
|
|
1671
1992
|
}
|
|
1672
1993
|
else if (flags.output) {
|
|
1994
|
+
renderIngestJobResult(normalized);
|
|
1673
1995
|
writeFileSync(flags.output, JSON.stringify(result, null, 2));
|
|
1674
1996
|
success(`Results saved to ${flags.output}`);
|
|
1675
1997
|
}
|
|
1998
|
+
else {
|
|
1999
|
+
renderIngestJobResult(normalized);
|
|
2000
|
+
}
|
|
1676
2001
|
});
|
|
1677
2002
|
const cancelCmd = new Command('cancel')
|
|
1678
2003
|
.description('Cancel a persisted geotechnical ingest job')
|