geotechcli 0.4.24 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,12 @@
1
- import { basename, parse } from 'node:path';
1
+ import { basename, join, parse } from 'node:path';
2
2
  import { writeFileSync } from 'node:fs';
3
3
  import { Command } from 'commander';
4
4
  import chalk from 'chalk';
5
- import { approvePersistedBoreholeIngestReview, buildIngestDossier, buildLLMConfig, cancelPersistedIngestJob, computeWeightedPdfPageCost, createAndStartPersistedIngestJob, DEFAULT_LLM_VISION_MODEL, ingestBoreholeLogDocument, ingestGeotechDocument, inspectPdfDocument, listPersistedBoreholeIngestReviewApprovals, listPersistedBoreholeIngestReviews, loadLatestPersistedBoreholeIngestReviewApproval, loadLatestPersistedBoreholeIngestReview, loadPersistedIngestJob, loadPersistedIngestJobResult, loadPersistedBoreholeIngestReviewApproval, loadPersistedBoreholeIngestReview, persistBoreholeIngestReview, promotePersistedBoreholeIngestReview, resolvePersistedIngestJobExtractionConcurrency, renderIngestDossierAsHtml, resumePersistedIngestJob, shouldUseAsyncIngestJob, waitForPersistedIngestJob, } from '@geotechcli/core';
5
+ import { approvePersistedBoreholeIngestReview, buildPersistedIngestJobSegments, buildIngestDossier, buildLLMConfig, cancelPersistedIngestJob, computeWeightedPdfPageCost, createAndStartPersistedIngestJob, DEFAULT_LLM_VISION_MODEL, HOSTED_BETA_EFFECTIVE_PAGE_LIMIT, ingestBoreholeLogDocument, ingestGeotechDocument, inspectPdfDocument, listPersistedBoreholeIngestReviewApprovals, listPersistedBoreholeIngestReviews, loadLatestPersistedBoreholeIngestReviewApproval, loadLatestPersistedBoreholeIngestReview, loadPersistedIngestJob, loadPersistedIngestJobResult, loadPersistedBoreholeIngestReviewApproval, loadPersistedBoreholeIngestReview, persistBoreholeIngestReview, promotePersistedBoreholeIngestReview, resolvePersistedIngestJobExtractionConcurrency, renderIngestDossierAsHtml, resumePersistedIngestJob, shouldSegmentHostedBetaLongPdf, shouldUseAsyncIngestJob, slicePdfInspectionToRange, waitForPersistedIngestJob, writePdfPageSubset, } from '@geotechcli/core';
6
6
  import { heading, keyValue, renderJSON, renderTable, success, error, info, warn } from '../ui/terminal.js';
7
7
  import { addGlobalFlags, getGlobalFlags } from '../util/flags.js';
8
8
  import { estimateHostedBetaVisionBodyBytes, formatByteSize, HOSTED_BETA_REQUEST_LIMIT_BYTES, HOSTED_BETA_REQUEST_SAFE_BYTES, countPdfPages, readVisionInput, readVisionPdfPageInputs, } from '../util/vision-output.js';
9
+ import { openFileInBrowser } from '../ui/browser.js';
9
10
  function formatMaybe(value, suffix = '') {
10
11
  if (value == null || value === '')
11
12
  return 'Unavailable';
@@ -62,8 +63,14 @@ function writeHtmlDossier(result, options) {
62
63
  });
63
64
  const outputPath = options.outputPath ?? defaultDossierOutputPath(options.sourceLabel);
64
65
  writeFileSync(outputPath, renderIngestDossierAsHtml(dossier));
65
- success(`HTML ingest dossier saved to ${outputPath}`);
66
- return outputPath;
66
+ const opened = options.open === false ? false : openFileInBrowser(outputPath);
67
+ success(opened
68
+ ? `HTML ingest dossier opened in your browser: ${outputPath}`
69
+ : `HTML ingest dossier saved to ${outputPath}`);
70
+ return { outputPath, opened };
71
+ }
72
+ function shouldOpenHtmlDossier(flags) {
73
+ return flags.openInteractivePlot !== false;
67
74
  }
68
75
  function startProgress(flags, text) {
69
76
  if (flags.json || flags.quiet) {
@@ -179,6 +186,30 @@ function asOptionalPositiveInteger(value) {
179
186
  function asOptionalTrimmedString(value) {
180
187
  return typeof value === 'string' && value.trim() ? value.trim() : undefined;
181
188
  }
189
+ function parsePageRange(value) {
190
+ const normalized = typeof value === 'string' ? value.trim() : '';
191
+ if (!normalized) {
192
+ return undefined;
193
+ }
194
+ const match = normalized.match(/^(\d+)\s*:\s*(\d+)$/);
195
+ if (!match) {
196
+ throw new Error(`Invalid --page-range "${String(value)}". Use start:end, for example 61:102.`);
197
+ }
198
+ const startPage = Number.parseInt(match[1], 10);
199
+ const endPage = Number.parseInt(match[2], 10);
200
+ if (!Number.isInteger(startPage) || !Number.isInteger(endPage) || startPage < 1 || endPage < startPage) {
201
+ throw new Error(`Invalid --page-range "${String(value)}". Use start:end with positive page numbers.`);
202
+ }
203
+ return { startPage, endPage };
204
+ }
205
+ function formatPageRange(range) {
206
+ if (!range) {
207
+ return undefined;
208
+ }
209
+ const startPage = Array.isArray(range) ? range[0] : range.startPage;
210
+ const endPage = Array.isArray(range) ? range[1] : range.endPage;
211
+ return `${startPage}-${endPage}`;
212
+ }
182
213
  function getCommandOptionValue(commandLike, key) {
183
214
  return typeof commandLike?.getOptionValue === 'function'
184
215
  ? commandLike.getOptionValue(key)
@@ -200,6 +231,9 @@ function getRawOptionValue(commandLike, key) {
200
231
  if (typeof current !== 'string') {
201
232
  continue;
202
233
  }
234
+ if (key === 'open' && current === '--no-open') {
235
+ return false;
236
+ }
203
237
  if (current === flag) {
204
238
  const next = rawArgs[index + 1];
205
239
  return typeof next === 'string' && !next.startsWith('--') ? next : true;
@@ -571,15 +605,22 @@ function createPersistedReviewApprovalLookupDryRun(projectId, reviewDatasetName,
571
605
  };
572
606
  }
573
607
  function resolveCommandOptions(opts, commandLike, extraKeys = []) {
574
- const resolvedOpts = typeof commandLike?.optsWithGlobals === 'function'
575
- ? commandLike.optsWithGlobals()
608
+ const commandSource = commandLike ?? opts;
609
+ const rawOpts = isRecord(opts) && typeof opts.optsWithGlobals !== 'function'
610
+ ? opts
611
+ : {};
612
+ const resolvedOpts = typeof commandSource?.optsWithGlobals === 'function'
613
+ ? commandSource.optsWithGlobals()
576
614
  : undefined;
577
615
  const resolved = {
578
- ...(isRecord(opts) ? opts : {}),
616
+ ...rawOpts,
579
617
  ...(isRecord(resolvedOpts) ? resolvedOpts : {}),
580
618
  };
581
- for (const key of ['json', 'quiet', 'dryRun', 'output', ...extraKeys]) {
582
- const value = getRawOptionValue(commandLike, key) ?? getCommandOptionValue(opts, key) ?? getCommandOptionValue(commandLike, key);
619
+ for (const key of ['json', 'quiet', 'dryRun', 'output', 'open', 'noOpen', ...extraKeys]) {
620
+ if (resolved[key] !== undefined) {
621
+ continue;
622
+ }
623
+ const value = getRawOptionValue(commandSource, key) ?? getCommandOptionValue(rawOpts, key) ?? getCommandOptionValue(commandSource, key);
583
624
  if (value !== undefined) {
584
625
  resolved[key] = value;
585
626
  }
@@ -838,6 +879,43 @@ function normalizeIngestJobRecord(value) {
838
879
  : undefined,
839
880
  }
840
881
  : undefined;
882
+ const segmentation = isRecord(value.segmentation)
883
+ ? {
884
+ mode: asOptionalTrimmedString(value.segmentation.mode) ?? 'single',
885
+ pageRange: Array.isArray(value.segmentation.pageRange)
886
+ && value.segmentation.pageRange.length === 2
887
+ && typeof value.segmentation.pageRange[0] === 'number'
888
+ && typeof value.segmentation.pageRange[1] === 'number'
889
+ ? [value.segmentation.pageRange[0], value.segmentation.pageRange[1]]
890
+ : undefined,
891
+ effectivePageLimit: asOptionalPositiveInteger(value.segmentation.effectivePageLimit),
892
+ segmentCount: asOptionalPositiveInteger(value.segmentation.segmentCount),
893
+ segments: Array.isArray(value.segmentation.segments)
894
+ ? value.segmentation.segments.flatMap((segment) => {
895
+ if (!isRecord(segment)) {
896
+ return [];
897
+ }
898
+ const segmentIndex = asOptionalPositiveInteger(segment.segmentIndex);
899
+ const startPage = asOptionalPositiveInteger(segment.startPage);
900
+ const endPage = asOptionalPositiveInteger(segment.endPage);
901
+ if (segmentIndex == null || startPage == null || endPage == null) {
902
+ return [];
903
+ }
904
+ return [{
905
+ segmentIndex,
906
+ startPage,
907
+ endPage,
908
+ status: asOptionalTrimmedString(segment.status),
909
+ childJobId: asOptionalTrimmedString(segment.childJobId),
910
+ completedPages: asOptionalPositiveInteger(segment.completedPages),
911
+ failedPages: typeof segment.failedPages === 'number' && Number.isFinite(segment.failedPages)
912
+ ? segment.failedPages
913
+ : undefined,
914
+ }];
915
+ })
916
+ : [],
917
+ }
918
+ : undefined;
841
919
  return {
842
920
  jobId,
843
921
  documentType,
@@ -854,6 +932,12 @@ function normalizeIngestJobRecord(value) {
854
932
  weightedPageCost: typeof source.weightedPageCost === 'number' && Number.isFinite(source.weightedPageCost)
855
933
  ? source.weightedPageCost
856
934
  : 0,
935
+ pageRange: Array.isArray(source.pageRange)
936
+ && source.pageRange.length === 2
937
+ && typeof source.pageRange[0] === 'number'
938
+ && typeof source.pageRange[1] === 'number'
939
+ ? [source.pageRange[0], source.pageRange[1]]
940
+ : undefined,
857
941
  },
858
942
  processing: {
859
943
  pagePreprocessingConcurrency: asOptionalPositiveInteger(processing.pagePreprocessingConcurrency) ?? 0,
@@ -870,6 +954,7 @@ function normalizeIngestJobRecord(value) {
870
954
  lastError: asOptionalTrimmedString(execution.lastError),
871
955
  cancelRequested: execution.cancelRequested === true,
872
956
  },
957
+ segmentation,
873
958
  pageCounts,
874
959
  pages,
875
960
  result,
@@ -883,6 +968,18 @@ function renderIngestJobRecord(job, options) {
883
968
  keyValue('Source', job.source.fileName ?? job.source.filePath ?? 'Unknown');
884
969
  keyValue('Pages', String(job.source.totalPages));
885
970
  keyValue('Weighted page cost', String(job.source.weightedPageCost));
971
+ if (job.source.pageRange) {
972
+ keyValue('Selected range', formatPageRange(job.source.pageRange) ?? 'Unavailable');
973
+ }
974
+ if (job.segmentation && job.segmentation.mode !== 'single') {
975
+ keyValue('Segmentation mode', job.segmentation.mode);
976
+ if (job.segmentation.segmentCount) {
977
+ keyValue('Segments', String(job.segmentation.segmentCount));
978
+ }
979
+ if (job.segmentation.effectivePageLimit) {
980
+ keyValue('Effective-page window', String(job.segmentation.effectivePageLimit));
981
+ }
982
+ }
886
983
  keyValue('Completed pages', String(job.pageCounts.completed));
887
984
  keyValue('Failed pages', String(job.pageCounts.failed));
888
985
  keyValue('Pending pages', String(job.pageCounts.pending));
@@ -922,6 +1019,17 @@ function renderIngestJobRecord(job, options) {
922
1019
  if (job.result?.persistedReview?.datasetName) {
923
1020
  keyValue('Stored review', job.result.persistedReview.datasetName);
924
1021
  }
1022
+ if (job.segmentation?.segments.length) {
1023
+ console.log('');
1024
+ console.log(chalk.white(' Segments:'));
1025
+ renderTable(['Segment', 'Pages', 'Status', 'Completed', 'Failed'], job.segmentation.segments.map((segment) => [
1026
+ String(segment.segmentIndex),
1027
+ `${segment.startPage}-${segment.endPage}`,
1028
+ segment.status ?? 'queued',
1029
+ segment.completedPages != null ? String(segment.completedPages) : '-',
1030
+ segment.failedPages != null ? String(segment.failedPages) : '-',
1031
+ ]));
1032
+ }
925
1033
  const downgradedPages = job.pages.filter((page) => page.downgraded);
926
1034
  const failedPages = job.pages.filter((page) => page.status === 'failed');
927
1035
  if (failedPages.length > 0) {
@@ -983,6 +1091,81 @@ function renderIngestJobResult(job) {
983
1091
  persistedReview,
984
1092
  });
985
1093
  }
1094
+ function renderCompactIngestResultSummary(result, options = {}) {
1095
+ heading(options.title ?? 'Geotechnical Ingest Result');
1096
+ keyValue('Document type', result.documentType);
1097
+ keyValue('Source', options.sourceLabel ?? result.source.fileName ?? result.source.filePath ?? 'Unknown');
1098
+ keyValue('Pages processed', `${result.source.successfulPages}/${result.source.totalPages}`);
1099
+ if (result.documentType === 'geotech-document') {
1100
+ keyValue('Materials', String(result.materials.length));
1101
+ keyValue('Parameters', String(result.parameters.length));
1102
+ }
1103
+ else {
1104
+ keyValue('Boreholes extracted', String(result.boreholes.length));
1105
+ }
1106
+ keyValue('Confidence', `${result.confidence}%`);
1107
+ keyValue('Review required', result.reviewRequired ? 'Yes' : 'No');
1108
+ keyValue('Auto proceed', result.canAutoProceed ? 'Yes' : 'No');
1109
+ if (options.persistedReview) {
1110
+ keyValue('Stored review', options.persistedReview.datasetName);
1111
+ }
1112
+ if (options.htmlDossier) {
1113
+ keyValue('HTML dossier', options.htmlDossier.outputPath);
1114
+ keyValue('Opened', options.htmlDossier.opened ? 'Yes' : 'No');
1115
+ }
1116
+ console.log('');
1117
+ }
1118
+ function formatIngestJobProgress(job) {
1119
+ const knownPageCount = job.pageCounts.completed + job.pageCounts.failed + job.pageCounts.pending;
1120
+ const totalPages = Math.max(job.source.totalPages, knownPageCount, 1);
1121
+ const resolvedPages = Math.min(totalPages, job.pageCounts.completed + job.pageCounts.failed);
1122
+ const pendingPages = Math.max(job.pageCounts.pending, totalPages - resolvedPages);
1123
+ const failedText = job.pageCounts.failed > 0 ? `, ${job.pageCounts.failed} failed` : '';
1124
+ const segments = job.segmentation?.segments ?? [];
1125
+ const segmentText = segments.length > 0
1126
+ ? `, ${segments.filter((segment) => segment.status === 'completed' || segment.status === 'failed').length}/${segments.length} segments resolved`
1127
+ : '';
1128
+ return `Ingest progress: ${resolvedPages}/${totalPages} pages resolved (${job.pageCounts.completed} completed${failedText}, ${pendingPages} pending${segmentText}) - ${job.status}`;
1129
+ }
1130
+ function isWaitTimeoutError(err, jobId) {
1131
+ return err instanceof Error
1132
+ && err.message.includes(`Timed out while waiting for persisted ingest job "${jobId}"`);
1133
+ }
1134
+ async function waitForPersistedIngestJobWithLiveProgress(jobId, flags) {
1135
+ if (flags.json || flags.quiet) {
1136
+ return waitForPersistedIngestJob(jobId);
1137
+ }
1138
+ info(`Waiting for ingest job ${jobId} to finish...`);
1139
+ let lastProgress = '';
1140
+ while (true) {
1141
+ try {
1142
+ const record = await waitForPersistedIngestJob(jobId, { pollMs: 250, timeoutMs: 1000 });
1143
+ const normalized = normalizeIngestJobRecord(record);
1144
+ if (normalized) {
1145
+ const progress = formatIngestJobProgress(normalized);
1146
+ if (progress !== lastProgress) {
1147
+ info(progress);
1148
+ }
1149
+ }
1150
+ return record;
1151
+ }
1152
+ catch (err) {
1153
+ if (!isWaitTimeoutError(err, jobId)) {
1154
+ throw err;
1155
+ }
1156
+ const record = loadPersistedIngestJob(jobId);
1157
+ const normalized = normalizeIngestJobRecord(record);
1158
+ if (!normalized) {
1159
+ continue;
1160
+ }
1161
+ const progress = formatIngestJobProgress(normalized);
1162
+ if (progress !== lastProgress) {
1163
+ info(progress);
1164
+ lastProgress = progress;
1165
+ }
1166
+ }
1167
+ }
1168
+ }
986
1169
  export function registerIngestCommand(program) {
987
1170
  const cmd = new Command('ingest')
988
1171
  .description('Extract structured geotechnical data from image/PDF documents')
@@ -990,13 +1173,16 @@ export function registerIngestCommand(program) {
990
1173
  .argument('<file>', 'Path to a geotechnical image or PDF document')
991
1174
  .option('--type <type>', 'Document type to ingest', 'borehole-log')
992
1175
  .option('--format <format>', 'Result presentation format: plain or html', 'plain')
1176
+ .option('--page-range <start:end>', 'Restrict PDF ingest to a contiguous page range, for example 61:102')
993
1177
  .option('--borehole-id <id>', 'Override borehole ID for a single continuous borehole log')
994
1178
  .option('--project <id>', 'Persist the ingest review into a stored project')
1179
+ .option('--background', 'Create a resumable ingest job and return immediately')
995
1180
  .action(async (filePath, opts) => {
996
1181
  const flags = getGlobalFlags(opts);
997
1182
  const outputFormat = resolveIngestPresentationFormat(opts.format);
998
1183
  assertIngestPresentationMode(flags, outputFormat);
999
1184
  const wantsHtmlDossier = shouldRenderHtmlDossier(outputFormat, flags.output);
1185
+ const runJobInBackground = Boolean(opts.background) || flags.json || flags.quiet;
1000
1186
  const documentType = String(opts.type ?? 'borehole-log').toLowerCase();
1001
1187
  const supportedTypes = new Set(['borehole-log', 'geotech-document']);
1002
1188
  if (!supportedTypes.has(documentType)) {
@@ -1006,6 +1192,13 @@ export function registerIngestCommand(program) {
1006
1192
  try {
1007
1193
  const file = readVisionInput(filePath);
1008
1194
  describeVisionInput(file, flags);
1195
+ const config = buildLLMConfig();
1196
+ const selectedPageRange = file.kind === 'pdf'
1197
+ ? parsePageRange(opts.pageRange)
1198
+ : undefined;
1199
+ if (selectedPageRange && file.kind !== 'pdf') {
1200
+ throw new Error('--page-range is only supported for PDF ingest.');
1201
+ }
1009
1202
  let countedPdfPages = null;
1010
1203
  if (file.kind === 'pdf') {
1011
1204
  try {
@@ -1015,21 +1208,63 @@ export function registerIngestCommand(program) {
1015
1208
  countedPdfPages = null;
1016
1209
  }
1017
1210
  }
1018
- const shouldShortCircuitAsyncInspection = file.kind === 'pdf'
1019
- && countedPdfPages != null
1020
- && countedPdfPages > 5;
1021
- const inspection = file.kind === 'pdf' && (!shouldShortCircuitAsyncInspection || countedPdfPages == null)
1211
+ if (selectedPageRange && countedPdfPages != null && selectedPageRange.endPage > countedPdfPages) {
1212
+ throw new Error(`--page-range ${selectedPageRange.startPage}:${selectedPageRange.endPage} exceeds the PDF page count (${countedPdfPages}).`);
1213
+ }
1214
+ const shouldInspectPdf = file.kind === 'pdf'
1215
+ && (selectedPageRange != null
1216
+ || countedPdfPages == null
1217
+ || countedPdfPages <= 5
1218
+ || documentType === 'geotech-document');
1219
+ const inspection = file.kind === 'pdf' && shouldInspectPdf
1022
1220
  ? inspectPdfDocument(filePath)
1023
1221
  : null;
1024
- const effectiveInspection = inspection && inspection.totalPages > 0 ? inspection : null;
1025
- const totalPages = countedPdfPages
1222
+ const fullInspection = inspection && inspection.totalPages > 0 ? inspection : null;
1223
+ if (file.kind === 'pdf' && countedPdfPages == null && !fullInspection && !selectedPageRange) {
1224
+ throw new Error('Could not determine the PDF page count. The file may be encrypted, damaged, or use an unsupported PDF structure.');
1225
+ }
1226
+ if (selectedPageRange && fullInspection && selectedPageRange.endPage > fullInspection.totalPages) {
1227
+ throw new Error(`--page-range ${selectedPageRange.startPage}:${selectedPageRange.endPage} exceeds the PDF page count (${fullInspection.totalPages}).`);
1228
+ }
1229
+ const effectiveInspection = fullInspection && selectedPageRange
1230
+ ? slicePdfInspectionToRange(fullInspection, selectedPageRange)
1231
+ : fullInspection;
1232
+ const totalPages = effectiveInspection?.totalPages
1233
+ ?? countedPdfPages
1234
+ ?? (selectedPageRange ? (selectedPageRange.endPage - selectedPageRange.startPage + 1) : null)
1235
+ ?? fullInspection?.totalPages
1026
1236
  ?? effectiveInspection?.totalPages
1027
1237
  ?? 1;
1028
1238
  const weightedPageCost = file.kind === 'pdf'
1029
1239
  ? (effectiveInspection ? computeWeightedPdfPageCost(effectiveInspection) : totalPages)
1030
1240
  : 1;
1241
+ const shouldCreateSegmentedParent = file.kind === 'pdf'
1242
+ && shouldSegmentHostedBetaLongPdf(documentType, config, fullInspection, selectedPageRange);
1243
+ const segmentationSummary = shouldCreateSegmentedParent && fullInspection
1244
+ ? {
1245
+ mode: 'segmented-parent',
1246
+ pageRange: [
1247
+ selectedPageRange?.startPage ?? 1,
1248
+ selectedPageRange?.endPage ?? fullInspection.totalPages,
1249
+ ],
1250
+ effectivePageLimit: HOSTED_BETA_EFFECTIVE_PAGE_LIMIT,
1251
+ segmentCount: buildPersistedIngestJobSegments(fullInspection, {
1252
+ pageRange: selectedPageRange,
1253
+ effectivePageLimit: HOSTED_BETA_EFFECTIVE_PAGE_LIMIT,
1254
+ }).length,
1255
+ segments: buildPersistedIngestJobSegments(fullInspection, {
1256
+ pageRange: selectedPageRange,
1257
+ effectivePageLimit: HOSTED_BETA_EFFECTIVE_PAGE_LIMIT,
1258
+ }).map((segment, index, segments) => ({
1259
+ ...segment,
1260
+ segmentIndex: index + 1,
1261
+ segmentCount: segments.length,
1262
+ status: 'queued',
1263
+ })),
1264
+ }
1265
+ : undefined;
1031
1266
  const shouldRunAsJob = file.kind === 'pdf'
1032
- && (shouldShortCircuitAsyncInspection || shouldUseAsyncIngestJob(effectiveInspection, totalPages));
1267
+ && (shouldCreateSegmentedParent || shouldUseAsyncIngestJob(effectiveInspection, totalPages));
1033
1268
  if (flags.dryRun) {
1034
1269
  if (shouldRunAsJob) {
1035
1270
  const dryRun = {
@@ -1044,7 +1279,9 @@ export function registerIngestCommand(program) {
1044
1279
  weightedPageCost,
1045
1280
  wouldCreateBackgroundJob: true,
1046
1281
  pagePreprocessingConcurrency: 2,
1047
- chunkExtractionConcurrency: resolvePersistedIngestJobExtractionConcurrency(buildLLMConfig()),
1282
+ chunkExtractionConcurrency: resolvePersistedIngestJobExtractionConcurrency(config, effectiveInspection, segmentationSummary),
1283
+ pageRange: selectedPageRange ? [selectedPageRange.startPage, selectedPageRange.endPage] : undefined,
1284
+ segmentation: segmentationSummary,
1048
1285
  pageClassifications: effectiveInspection?.pages.map((page) => ({
1049
1286
  pageNumber: page.pageNumber,
1050
1287
  classification: page.classification,
@@ -1062,6 +1299,12 @@ export function registerIngestCommand(program) {
1062
1299
  keyValue('Pages', String(totalPages));
1063
1300
  keyValue('Weighted page cost', String(weightedPageCost));
1064
1301
  keyValue('Would create background job', 'Yes');
1302
+ if (selectedPageRange) {
1303
+ keyValue('Selected range', formatPageRange(selectedPageRange) ?? 'Unavailable');
1304
+ }
1305
+ if (segmentationSummary?.segments?.length) {
1306
+ keyValue('Segmentation', `Hosted-beta best-result window is ${HOSTED_BETA_EFFECTIVE_PAGE_LIMIT} effective pages; processing as ${segmentationSummary.segments.map((segment) => `${segment.startPage}-${segment.endPage}`).join(', ')}`);
1307
+ }
1065
1308
  if (opts.project) {
1066
1309
  keyValue('Project', String(opts.project));
1067
1310
  }
@@ -1081,6 +1324,7 @@ export function registerIngestCommand(program) {
1081
1324
  wouldUseHostedVision: true,
1082
1325
  projectId: opts.project,
1083
1326
  totalPages,
1327
+ pageRange: selectedPageRange ? [selectedPageRange.startPage, selectedPageRange.endPage] : undefined,
1084
1328
  pageClassifications: effectiveInspection?.pages.map((page) => ({
1085
1329
  pageNumber: page.pageNumber,
1086
1330
  classification: page.classification,
@@ -1099,6 +1343,9 @@ export function registerIngestCommand(program) {
1099
1343
  keyValue('Source', filePath);
1100
1344
  keyValue('Input kind', file.kind === 'pdf' ? 'pdf' : 'image');
1101
1345
  keyValue('Pages', String(dryRun.totalPages));
1346
+ if (selectedPageRange) {
1347
+ keyValue('Selected range', formatPageRange(selectedPageRange) ?? 'Unavailable');
1348
+ }
1102
1349
  if (opts.project) {
1103
1350
  keyValue('Project', String(opts.project));
1104
1351
  }
@@ -1115,18 +1362,20 @@ export function registerIngestCommand(program) {
1115
1362
  return;
1116
1363
  }
1117
1364
  if (shouldRunAsJob) {
1118
- if (wantsHtmlDossier && flags.output) {
1365
+ if (runJobInBackground && wantsHtmlDossier) {
1119
1366
  throw new Error('HTML ingest dossiers are generated from completed results. Start the job first, then run geotech ingest wait <jobId> --format html --output <file>.');
1120
1367
  }
1121
1368
  spinner = startProgress(flags, 'Creating resumable ingest job...');
1122
- const config = buildLLMConfig();
1123
1369
  const job = createAndStartPersistedIngestJob({
1124
1370
  documentType: documentType,
1125
1371
  filePath,
1126
1372
  inspection: effectiveInspection,
1373
+ totalPagesFallback: totalPages,
1127
1374
  config,
1128
1375
  projectId: opts.project,
1129
1376
  overrideBoreholeId: opts.boreholeId,
1377
+ pageRange: selectedPageRange ? [selectedPageRange.startPage, selectedPageRange.endPage] : undefined,
1378
+ segmentation: segmentationSummary,
1130
1379
  });
1131
1380
  const normalizedJob = normalizeIngestJobRecord(job);
1132
1381
  spinner?.succeed(`Ingest job started: ${job.jobId}`);
@@ -1134,20 +1383,65 @@ export function registerIngestCommand(program) {
1134
1383
  renderJSON(job);
1135
1384
  return;
1136
1385
  }
1137
- if (normalizedJob) {
1386
+ if (runJobInBackground && normalizedJob && !flags.quiet) {
1138
1387
  renderIngestJobRecord(normalizedJob, {
1139
1388
  title: 'Geotechnical Ingest Job Started',
1140
1389
  includeCommands: true,
1141
1390
  });
1142
1391
  }
1143
- if (flags.output) {
1392
+ if (runJobInBackground && segmentationSummary?.segments?.length && !flags.quiet) {
1393
+ info(`Hosted-beta best-result window is ${HOSTED_BETA_EFFECTIVE_PAGE_LIMIT} effective pages; processing as linked segments ${segmentationSummary.segments.map((segment) => `${segment.startPage}-${segment.endPage}`).join(' and ')}.`);
1394
+ }
1395
+ if (runJobInBackground && flags.output) {
1144
1396
  writeFileSync(flags.output, JSON.stringify(job, null, 2));
1145
- success(`Job details saved to ${flags.output}`);
1397
+ if (!flags.quiet) {
1398
+ success(`Job details saved to ${flags.output}`);
1399
+ }
1400
+ }
1401
+ if (runJobInBackground) {
1402
+ return;
1403
+ }
1404
+ const waitedRecord = await waitForPersistedIngestJobWithLiveProgress(job.jobId, flags);
1405
+ const completedJob = normalizeIngestJobRecord(waitedRecord);
1406
+ if (!completedJob) {
1407
+ throw new Error(`Persisted ingest job "${job.jobId}" could not be normalized.`);
1408
+ }
1409
+ if (completedJob.status !== 'completed' || !completedJob.result?.ingestResult) {
1410
+ renderIngestJobRecord(completedJob, { title: 'Geotechnical Ingest Job Status', includeCommands: true });
1411
+ throw new Error(`Persisted ingest job "${job.jobId}" finished with status "${completedJob.status}".`);
1412
+ }
1413
+ const completedResult = completedJob.result.ingestResult;
1414
+ const persistedReview = completedJob.result.persistedReview
1415
+ ? {
1416
+ projectId: completedJob.request.projectId ?? 'Unknown',
1417
+ datasetName: completedJob.result.persistedReview.datasetName,
1418
+ reviewId: completedJob.result.persistedReview.reviewId,
1419
+ createdAt: completedJob.result.persistedReview.createdAt,
1420
+ }
1421
+ : null;
1422
+ if (wantsHtmlDossier) {
1423
+ const htmlDossier = writeHtmlDossier(completedResult, {
1424
+ outputPath: flags.output,
1425
+ open: shouldOpenHtmlDossier(flags),
1426
+ sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? completedJob.jobId,
1427
+ storedReview: persistedReview,
1428
+ });
1429
+ renderCompactIngestResultSummary(completedResult, {
1430
+ sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? completedJob.jobId,
1431
+ persistedReview,
1432
+ htmlDossier,
1433
+ });
1434
+ }
1435
+ else {
1436
+ renderIngestJobResult(completedJob);
1437
+ if (flags.output) {
1438
+ writeFileSync(flags.output, JSON.stringify(completedJob.result, null, 2));
1439
+ success(`Results saved to ${flags.output}`);
1440
+ }
1146
1441
  }
1147
1442
  return;
1148
1443
  }
1149
1444
  spinner = startProgress(flags, 'Running geotechnical ingest...');
1150
- const config = buildLLMConfig();
1151
1445
  const requestDetails = {
1152
1446
  prompt: documentType === 'borehole-log'
1153
1447
  ? 'Extract structured borehole log data.'
@@ -1158,7 +1452,16 @@ export function registerIngestCommand(program) {
1158
1452
  };
1159
1453
  const result = file.kind === 'pdf'
1160
1454
  ? await (async () => {
1161
- const pageInputs = await readVisionPdfPageInputs(filePath, { inspection: effectiveInspection });
1455
+ const scopedInspection = selectedPageRange && fullInspection
1456
+ ? slicePdfInspectionToRange(fullInspection, selectedPageRange, { rebasePageNumbers: true })
1457
+ : effectiveInspection;
1458
+ const pdfInputPath = selectedPageRange
1459
+ ? join(process.cwd(), 'tmp', 'cli-page-ranges', `${slugifyOutputStem(basename(filePath))}-pages-${selectedPageRange.startPage}-${selectedPageRange.endPage}.pdf`)
1460
+ : filePath;
1461
+ if (selectedPageRange) {
1462
+ await writePdfPageSubset(filePath, selectedPageRange, pdfInputPath);
1463
+ }
1464
+ const pageInputs = await readVisionPdfPageInputs(pdfInputPath, { inspection: scopedInspection });
1162
1465
  for (const pageInput of pageInputs) {
1163
1466
  maybeCheckHostedBetaVisionPayload(config, pageInput, requestDetails);
1164
1467
  }
@@ -1169,9 +1472,10 @@ export function registerIngestCommand(program) {
1169
1472
  filePath,
1170
1473
  fileName: basename(filePath),
1171
1474
  inputKind: 'pdf',
1475
+ pageRange: selectedPageRange ? [selectedPageRange.startPage, selectedPageRange.endPage] : undefined,
1172
1476
  },
1173
1477
  overrideBoreholeId: opts.boreholeId,
1174
- inspection: effectiveInspection,
1478
+ inspection: scopedInspection,
1175
1479
  pages: pageInputs,
1176
1480
  })
1177
1481
  : ingestGeotechDocument({
@@ -1180,8 +1484,9 @@ export function registerIngestCommand(program) {
1180
1484
  filePath,
1181
1485
  fileName: basename(filePath),
1182
1486
  inputKind: 'pdf',
1487
+ pageRange: selectedPageRange ? [selectedPageRange.startPage, selectedPageRange.endPage] : undefined,
1183
1488
  },
1184
- inspection: effectiveInspection,
1489
+ inspection: scopedInspection,
1185
1490
  pages: pageInputs,
1186
1491
  });
1187
1492
  })()
@@ -1247,6 +1552,7 @@ export function registerIngestCommand(program) {
1247
1552
  if (wantsHtmlDossier) {
1248
1553
  writeHtmlDossier(result, {
1249
1554
  outputPath: flags.output,
1555
+ open: shouldOpenHtmlDossier(flags),
1250
1556
  sourceLabel: result.source.fileName ?? result.source.filePath ?? filePath,
1251
1557
  storedReview: persistedReviewDetails,
1252
1558
  });
@@ -1333,6 +1639,7 @@ export function registerIngestCommand(program) {
1333
1639
  const dossierDetails = buildPersistedReviewDossierDetails(record, resolvedProjectId);
1334
1640
  writeHtmlDossier(record.result, {
1335
1641
  outputPath: flags.output,
1642
+ open: shouldOpenHtmlDossier(flags),
1336
1643
  sourceLabel: dossierDetails.sourceLabel,
1337
1644
  storedReview: dossierDetails.storedReview,
1338
1645
  approval: dossierDetails.approval,
@@ -1564,7 +1871,7 @@ export function registerIngestCommand(program) {
1564
1871
  const outputFormat = resolveIngestPresentationFormat(resolvedOpts.format);
1565
1872
  assertIngestPresentationMode(flags, outputFormat);
1566
1873
  const wantsHtmlDossier = shouldRenderHtmlDossier(outputFormat, flags.output);
1567
- const record = await waitForPersistedIngestJob(String(jobId));
1874
+ const record = await waitForPersistedIngestJobWithLiveProgress(String(jobId), flags);
1568
1875
  const normalized = normalizeIngestJobRecord(record);
1569
1876
  if (!normalized) {
1570
1877
  throw new Error(`Persisted ingest job "${jobId}" could not be normalized.`);
@@ -1590,18 +1897,27 @@ export function registerIngestCommand(program) {
1590
1897
  createdAt: normalized.result.persistedReview.createdAt,
1591
1898
  }
1592
1899
  : null;
1593
- renderIngestJobResult(normalized);
1594
1900
  if (wantsHtmlDossier) {
1595
- writeHtmlDossier(completedResult, {
1901
+ const htmlDossier = writeHtmlDossier(completedResult, {
1596
1902
  outputPath: flags.output,
1903
+ open: shouldOpenHtmlDossier(flags),
1597
1904
  sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? normalized.jobId,
1598
1905
  storedReview: persistedReview,
1599
1906
  });
1907
+ renderCompactIngestResultSummary(completedResult, {
1908
+ sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? normalized.jobId,
1909
+ persistedReview,
1910
+ htmlDossier,
1911
+ });
1600
1912
  }
1601
1913
  else if (flags.output) {
1914
+ renderIngestJobResult(normalized);
1602
1915
  writeFileSync(flags.output, JSON.stringify(record.result, null, 2));
1603
1916
  success(`Results saved to ${flags.output}`);
1604
1917
  }
1918
+ else {
1919
+ renderIngestJobResult(normalized);
1920
+ }
1605
1921
  });
1606
1922
  const resumeCmd = new Command('resume')
1607
1923
  .description('Resume a persisted geotechnical ingest job from completed checkpoints')
@@ -1661,18 +1977,27 @@ export function registerIngestCommand(program) {
1661
1977
  createdAt: normalized.result.persistedReview.createdAt,
1662
1978
  }
1663
1979
  : null;
1664
- renderIngestJobResult(normalized);
1665
1980
  if (wantsHtmlDossier) {
1666
- writeHtmlDossier(completedResult, {
1981
+ const htmlDossier = writeHtmlDossier(completedResult, {
1667
1982
  outputPath: flags.output,
1983
+ open: shouldOpenHtmlDossier(flags),
1668
1984
  sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? normalized.jobId,
1669
1985
  storedReview: persistedReview,
1670
1986
  });
1987
+ renderCompactIngestResultSummary(completedResult, {
1988
+ sourceLabel: completedResult.source.fileName ?? completedResult.source.filePath ?? normalized.jobId,
1989
+ persistedReview,
1990
+ htmlDossier,
1991
+ });
1671
1992
  }
1672
1993
  else if (flags.output) {
1994
+ renderIngestJobResult(normalized);
1673
1995
  writeFileSync(flags.output, JSON.stringify(result, null, 2));
1674
1996
  success(`Results saved to ${flags.output}`);
1675
1997
  }
1998
+ else {
1999
+ renderIngestJobResult(normalized);
2000
+ }
1676
2001
  });
1677
2002
  const cancelCmd = new Command('cancel')
1678
2003
  .description('Cancel a persisted geotechnical ingest job')