@geotechcli/core 0.4.22 → 0.4.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/dist/agents/brain.d.ts.map +1 -1
  2. package/dist/agents/brain.js +2 -1
  3. package/dist/agents/brain.js.map +1 -1
  4. package/dist/agents/data-tools.js +759 -0
  5. package/dist/agents/data-tools.js.map +1 -1
  6. package/dist/agents/swarm.d.ts.map +1 -1
  7. package/dist/agents/swarm.js +22 -2
  8. package/dist/agents/swarm.js.map +1 -1
  9. package/dist/agents/tool-runtime.d.ts +7 -0
  10. package/dist/agents/tool-runtime.d.ts.map +1 -0
  11. package/dist/agents/tool-runtime.js +9 -0
  12. package/dist/agents/tool-runtime.js.map +1 -0
  13. package/dist/config/index.d.ts +4 -4
  14. package/dist/config/index.js +1 -1
  15. package/dist/config/index.js.map +1 -1
  16. package/dist/geo/coordinates.d.ts +40 -0
  17. package/dist/geo/coordinates.d.ts.map +1 -0
  18. package/dist/geo/coordinates.js +461 -0
  19. package/dist/geo/coordinates.js.map +1 -0
  20. package/dist/geo/index.d.ts +1 -0
  21. package/dist/geo/index.d.ts.map +1 -1
  22. package/dist/geo/index.js +1 -0
  23. package/dist/geo/index.js.map +1 -1
  24. package/dist/index.d.ts +3 -2
  25. package/dist/index.d.ts.map +1 -1
  26. package/dist/index.js +3 -2
  27. package/dist/index.js.map +1 -1
  28. package/dist/ingest/ags.d.ts +3 -0
  29. package/dist/ingest/ags.d.ts.map +1 -1
  30. package/dist/ingest/ags.js +98 -9
  31. package/dist/ingest/ags.js.map +1 -1
  32. package/dist/ingest/cpt.d.ts +4 -0
  33. package/dist/ingest/cpt.d.ts.map +1 -1
  34. package/dist/ingest/cpt.js +87 -25
  35. package/dist/ingest/cpt.js.map +1 -1
  36. package/dist/ingest/document-inputs.d.ts +37 -0
  37. package/dist/ingest/document-inputs.d.ts.map +1 -0
  38. package/dist/ingest/document-inputs.js +197 -0
  39. package/dist/ingest/document-inputs.js.map +1 -0
  40. package/dist/ingest/geotech-document.d.ts +118 -0
  41. package/dist/ingest/geotech-document.d.ts.map +1 -0
  42. package/dist/ingest/geotech-document.js +1006 -0
  43. package/dist/ingest/geotech-document.js.map +1 -0
  44. package/dist/ingest/geotech-extract.d.ts +86 -0
  45. package/dist/ingest/geotech-extract.d.ts.map +1 -0
  46. package/dist/ingest/geotech-extract.js +652 -0
  47. package/dist/ingest/geotech-extract.js.map +1 -0
  48. package/dist/ingest/geotech-schemas.d.ts +248 -0
  49. package/dist/ingest/geotech-schemas.d.ts.map +1 -0
  50. package/dist/ingest/geotech-schemas.js +150 -0
  51. package/dist/ingest/geotech-schemas.js.map +1 -0
  52. package/dist/ingest/index.d.ts +8 -0
  53. package/dist/ingest/index.d.ts.map +1 -1
  54. package/dist/ingest/index.js +8 -0
  55. package/dist/ingest/index.js.map +1 -1
  56. package/dist/ingest/ingest-job-child.d.ts +2 -0
  57. package/dist/ingest/ingest-job-child.d.ts.map +1 -0
  58. package/dist/ingest/ingest-job-child.js +45 -0
  59. package/dist/ingest/ingest-job-child.js.map +1 -0
  60. package/dist/ingest/job-store.d.ts +117 -0
  61. package/dist/ingest/job-store.d.ts.map +1 -0
  62. package/dist/ingest/job-store.js +541 -0
  63. package/dist/ingest/job-store.js.map +1 -0
  64. package/dist/ingest/job-worker.d.ts +24 -0
  65. package/dist/ingest/job-worker.d.ts.map +1 -0
  66. package/dist/ingest/job-worker.js +1129 -0
  67. package/dist/ingest/job-worker.js.map +1 -0
  68. package/dist/ingest/pdf.d.ts +102 -0
  69. package/dist/ingest/pdf.d.ts.map +1 -0
  70. package/dist/ingest/pdf.js +1544 -0
  71. package/dist/ingest/pdf.js.map +1 -0
  72. package/dist/ingest/review-store.d.ts +215 -0
  73. package/dist/ingest/review-store.d.ts.map +1 -0
  74. package/dist/ingest/review-store.js +1995 -0
  75. package/dist/ingest/review-store.js.map +1 -0
  76. package/dist/llm/capabilities.d.ts +8 -0
  77. package/dist/llm/capabilities.d.ts.map +1 -0
  78. package/dist/llm/capabilities.js +73 -0
  79. package/dist/llm/capabilities.js.map +1 -0
  80. package/dist/llm/index.d.ts +3 -2
  81. package/dist/llm/index.d.ts.map +1 -1
  82. package/dist/llm/index.js +2 -1
  83. package/dist/llm/index.js.map +1 -1
  84. package/dist/llm/providers/anthropic.d.ts +6 -0
  85. package/dist/llm/providers/anthropic.d.ts.map +1 -1
  86. package/dist/llm/providers/anthropic.js +10 -1
  87. package/dist/llm/providers/anthropic.js.map +1 -1
  88. package/dist/llm/providers/hosted-beta.d.ts +6 -0
  89. package/dist/llm/providers/hosted-beta.d.ts.map +1 -1
  90. package/dist/llm/providers/hosted-beta.js +40 -10
  91. package/dist/llm/providers/hosted-beta.js.map +1 -1
  92. package/dist/llm/providers/huggingface.d.ts +6 -0
  93. package/dist/llm/providers/huggingface.d.ts.map +1 -1
  94. package/dist/llm/providers/huggingface.js +21 -1
  95. package/dist/llm/providers/huggingface.js.map +1 -1
  96. package/dist/llm/providers/openai-compatible.d.ts +6 -0
  97. package/dist/llm/providers/openai-compatible.d.ts.map +1 -1
  98. package/dist/llm/providers/openai-compatible.js +21 -1
  99. package/dist/llm/providers/openai-compatible.js.map +1 -1
  100. package/dist/llm/providers/zhipu.d.ts +6 -0
  101. package/dist/llm/providers/zhipu.d.ts.map +1 -1
  102. package/dist/llm/providers/zhipu.js +15 -1
  103. package/dist/llm/providers/zhipu.js.map +1 -1
  104. package/dist/llm/router.d.ts +7 -0
  105. package/dist/llm/router.d.ts.map +1 -1
  106. package/dist/llm/router.js +33 -13
  107. package/dist/llm/router.js.map +1 -1
  108. package/dist/llm/types.d.ts +22 -4
  109. package/dist/llm/types.d.ts.map +1 -1
  110. package/dist/llm/types.js.map +1 -1
  111. package/dist/meta/metadata.json +1 -1
  112. package/dist/report/html.d.ts +3 -0
  113. package/dist/report/html.d.ts.map +1 -0
  114. package/dist/report/html.js +626 -0
  115. package/dist/report/html.js.map +1 -0
  116. package/dist/report/index.d.ts +2 -0
  117. package/dist/report/index.d.ts.map +1 -1
  118. package/dist/report/index.js +2 -0
  119. package/dist/report/index.js.map +1 -1
  120. package/dist/report/ingest-dossier.d.ts +81 -0
  121. package/dist/report/ingest-dossier.d.ts.map +1 -0
  122. package/dist/report/ingest-dossier.js +324 -0
  123. package/dist/report/ingest-dossier.js.map +1 -0
  124. package/dist/storage/index.d.ts +5 -0
  125. package/dist/storage/index.d.ts.map +1 -1
  126. package/dist/storage/index.js +12 -6
  127. package/dist/storage/index.js.map +1 -1
  128. package/dist/vision/geotech-document.d.ts +46 -0
  129. package/dist/vision/geotech-document.d.ts.map +1 -0
  130. package/dist/vision/geotech-document.js +576 -0
  131. package/dist/vision/geotech-document.js.map +1 -0
  132. package/dist/vision/index.d.ts +31 -0
  133. package/dist/vision/index.d.ts.map +1 -1
  134. package/dist/vision/index.js +659 -27
  135. package/dist/vision/index.js.map +1 -1
  136. package/dist/vision/ocr.d.ts +29 -0
  137. package/dist/vision/ocr.d.ts.map +1 -0
  138. package/dist/vision/ocr.js +287 -0
  139. package/dist/vision/ocr.js.map +1 -0
  140. package/dist/vision/preprocess.d.ts +26 -0
  141. package/dist/vision/preprocess.d.ts.map +1 -0
  142. package/dist/vision/preprocess.js +194 -0
  143. package/dist/vision/preprocess.js.map +1 -0
  144. package/package.json +5 -1
@@ -1,6 +1,22 @@
1
- import { generateVision, generateText } from '../llm/router.js';
1
+ import { generateDocumentVision, generateVision, generateText } from '../llm/router.js';
2
+ import { providerSupportsNativePdfDocuments } from '../llm/capabilities.js';
3
+ import { buildBoreholeLocation as buildStructuredBoreholeLocation } from '../geo/coordinates.js';
2
4
  import { classifyRMR89 } from '../geo/classification.js';
3
5
  import { clampConfidence, createParseSafety, deriveParseStatus, normalizeWarnings, parseJsonObject, readNumber, readString, } from './parse.js';
6
+ function getHostedBetaVisionMaxTokens(config, profile, requestedMaxTokens) {
7
+ if (config.provider !== 'hosted-beta') {
8
+ return requestedMaxTokens;
9
+ }
10
+ const capByProfile = {
11
+ 'structured-vision': 850,
12
+ 'fallback-vision': 950,
13
+ 'ocr-vision': 700,
14
+ 'structured-text': 700,
15
+ 'fallback-text': 850,
16
+ 'document-query': 900,
17
+ };
18
+ return Math.min(requestedMaxTokens, capByProfile[profile]);
19
+ }
4
20
  // ---------------------------------------------------------------------------
5
21
  // Vision retry helper — handles upstream empty-content failures
6
22
  // ---------------------------------------------------------------------------
@@ -17,14 +33,19 @@ function isRecoverableVisionEmptyResponse(error) {
17
33
  * Second attempt: softer plain-text prompt at higher temperature, same image.
18
34
  * This handles hosted-beta vision models returning empty content on the first call.
19
35
  */
20
- async function visionWithRetry(imageBase64, mimeType, config, strictPrompt, softPrompt, systemPrompt, maxTokens) {
36
+ async function visionWithRetry(imageBase64, mimeType, config, strictPrompt, softPrompt, systemPrompt, maxTokens, retryOptions) {
21
37
  const start = Date.now();
38
+ const multimodalCall = mimeType === 'application/pdf'
39
+ ? generateDocumentVision
40
+ : generateVision;
41
+ const primaryMaxTokens = getHostedBetaVisionMaxTokens(config, 'structured-vision', maxTokens);
42
+ const fallbackMaxTokens = getHostedBetaVisionMaxTokens(config, 'fallback-vision', retryOptions?.fallbackMaxTokens ?? (maxTokens + 200));
22
43
  // Attempt 1: strict JSON prompt
23
44
  try {
24
- const r1 = await generateVision(strictPrompt, imageBase64, mimeType, config, {
45
+ const r1 = await multimodalCall(strictPrompt, imageBase64, mimeType, config, {
25
46
  systemPrompt,
26
47
  temperature: 0.1,
27
- maxTokens,
48
+ maxTokens: primaryMaxTokens,
28
49
  });
29
50
  if (r1.text && r1.text.trim().length > 10) {
30
51
  return { text: r1.text, latencyMs: r1.latencyMs, usedFallback: false };
@@ -37,10 +58,11 @@ async function visionWithRetry(imageBase64, mimeType, config, strictPrompt, soft
37
58
  }
38
59
  // Attempt 2: softer plain text prompt
39
60
  try {
40
- const r2 = await generateVision(softPrompt, imageBase64, mimeType, config, {
41
- systemPrompt: systemPrompt + ' Be concise but thorough. You must provide values even if approximate.',
42
- temperature: 0.3,
43
- maxTokens: maxTokens + 200,
61
+ const r2 = await multimodalCall(softPrompt, imageBase64, mimeType, config, {
62
+ systemPrompt: retryOptions?.fallbackSystemPrompt
63
+ ?? `${systemPrompt} Be concise but thorough. You must provide values even if approximate.`,
64
+ temperature: retryOptions?.fallbackTemperature ?? 0.3,
65
+ maxTokens: fallbackMaxTokens,
44
66
  });
45
67
  if (r2.text && r2.text.trim().length > 0) {
46
68
  return {
@@ -639,12 +661,373 @@ Classify the soil and estimate engineering properties. Respond with ONLY a JSON
639
661
  rawLLMText: response.text,
640
662
  };
641
663
  }
642
- export async function interpretBoreholeLog(imageBase64, mimeType, config, boreholeId) {
643
- const strictPrompt = `Extract structured data from this borehole log image. Respond with ONLY a JSON object:
664
+ function readOptionalString(source, key) {
665
+ const value = source?.[key];
666
+ return typeof value === 'string' && value.trim() ? value.trim() : null;
667
+ }
668
+ function readOptionalNumber(source, key) {
669
+ const value = source?.[key];
670
+ const num = typeof value === 'number' ? value : Number(value);
671
+ return Number.isFinite(num) ? num : null;
672
+ }
673
+ function firstMatchingText(rawText, patterns) {
674
+ for (const pattern of patterns) {
675
+ const match = rawText.match(pattern);
676
+ if (match?.[0]) {
677
+ return match[0].trim();
678
+ }
679
+ }
680
+ return null;
681
+ }
682
+ function buildVisionBoreholeLocation(source, groundElevation, boreholeId) {
683
+ const coordinateSource = source && typeof source.coordinates === 'object' && source.coordinates !== null
684
+ ? source.coordinates
685
+ : source;
686
+ if (!coordinateSource) {
687
+ return null;
688
+ }
689
+ const latitude = readOptionalNumber(coordinateSource, 'latitude');
690
+ const longitude = readOptionalNumber(coordinateSource, 'longitude');
691
+ const easting = readOptionalNumber(coordinateSource, 'easting');
692
+ const northing = readOptionalNumber(coordinateSource, 'northing');
693
+ const rawCoordinateText = readOptionalString(coordinateSource, 'rawText')
694
+ ?? readOptionalString(coordinateSource, 'rawCoordinateText');
695
+ const coordinateSystem = readOptionalString(coordinateSource, 'coordinateSystem');
696
+ if (latitude == null
697
+ && longitude == null
698
+ && easting == null
699
+ && northing == null
700
+ && !rawCoordinateText) {
701
+ return null;
702
+ }
703
+ return buildStructuredBoreholeLocation({
704
+ boreholeId,
705
+ source: 'vision',
706
+ crs: coordinateSystem ?? undefined,
707
+ easting,
708
+ northing,
709
+ latitude,
710
+ longitude,
711
+ groundLevel: groundElevation,
712
+ raw: {
713
+ rawCoordinateText,
714
+ coordinateSystem,
715
+ metadataConfidence: clampConfidence(coordinateSource.confidence, rawCoordinateText || latitude != null || easting != null ? 64 : 0),
716
+ },
717
+ }) ?? null;
718
+ }
719
+ function extractBoreholeMetadataFallback(rawText, boreholeId) {
720
+ if (!rawText.trim()) {
721
+ return { value: null, baseStatus: 'failed', warnings: [] };
722
+ }
723
+ const boreholeMatch = rawText.match(/\bBH[-_\s]?[A-Z0-9]+\b/i);
724
+ const totalDepthMatch = rawText.match(/total depth[^0-9]*(\d+(?:\.\d+)?)\s*m/i)
725
+ ?? rawText.match(/depth[^0-9]*(\d+(?:\.\d+)?)\s*m/i);
726
+ const elevationMatch = rawText.match(/(?:ground level|ground elevation|gl|m\.?o\.?d\.?)[^0-9+-]*([+-]?\d+(?:\.\d+)?)/i);
727
+ const pageInfo = firstMatchingText(rawText, [
728
+ /page\s+\d+\s*(?:of|\/)\s*\d+/i,
729
+ /sheet\s+\d+\s*(?:of|\/)\s*\d+/i,
730
+ ]);
731
+ const coordinateText = firstMatchingText(rawText, [
732
+ /\bE(?:asting)?[:=]?\s*\d+(?:\.\d+)?\s*[,\s;]+N(?:orthing)?[:=]?\s*\d+(?:\.\d+)?/i,
733
+ /\bLat(?:itude)?[:=]?\s*[-+]?\d+(?:\.\d+)?\s*[,\s;]+Lon(?:gitude)?[:=]?\s*[-+]?\d+(?:\.\d+)?/i,
734
+ /\bUTM\s+Zone\s+\d{1,2}[NS]?\s+E[:=]?\s*\d+(?:\.\d+)?\s+N[:=]?\s*\d+(?:\.\d+)?/i,
735
+ ]);
736
+ const eastingMatch = coordinateText?.match(/E(?:asting)?[:=]?\s*(\d+(?:\.\d+)?)/i)
737
+ ?? rawText.match(/\bE(?:asting)?[:=]?\s*(\d+(?:\.\d+)?)/i);
738
+ const northingMatch = coordinateText?.match(/N(?:orthing)?[:=]?\s*(\d+(?:\.\d+)?)/i)
739
+ ?? rawText.match(/\bN(?:orthing)?[:=]?\s*(\d+(?:\.\d+)?)/i);
740
+ const latitudeMatch = coordinateText?.match(/Lat(?:itude)?[:=]?\s*([-+]?\d+(?:\.\d+)?)/i)
741
+ ?? rawText.match(/\bLat(?:itude)?[:=]?\s*([-+]?\d+(?:\.\d+)?)/i);
742
+ const longitudeMatch = coordinateText?.match(/Lon(?:gitude)?[:=]?\s*([-+]?\d+(?:\.\d+)?)/i)
743
+ ?? rawText.match(/\bLon(?:gitude)?[:=]?\s*([-+]?\d+(?:\.\d+)?)/i);
744
+ const coordinateSystem = firstMatchingText(rawText, [
745
+ /\bUTM\s+Zone\s+\d{1,2}[NS]?\b/i,
746
+ /\bWGS ?84\b/i,
747
+ /\bBNG\b/i,
748
+ /\bBritish National Grid\b/i,
749
+ /\bMGA\s+Zone\s+\d{1,2}\b/i,
750
+ ]);
751
+ const value = {
752
+ boreholeId: (boreholeMatch?.[0] ?? boreholeId)?.replace(/\s+/g, '') ?? boreholeId ?? 'BH-unknown',
753
+ };
754
+ if (totalDepthMatch?.[1])
755
+ value.totalDepth = Number(totalDepthMatch[1]);
756
+ if (elevationMatch?.[1])
757
+ value.groundElevation = Number(elevationMatch[1]);
758
+ if (pageInfo)
759
+ value.pageInfo = pageInfo;
760
+ if (coordinateText || eastingMatch?.[1] || northingMatch?.[1] || latitudeMatch?.[1] || longitudeMatch?.[1]) {
761
+ value.coordinates = {
762
+ easting: eastingMatch?.[1] ? Number(eastingMatch[1]) : null,
763
+ northing: northingMatch?.[1] ? Number(northingMatch[1]) : null,
764
+ latitude: latitudeMatch?.[1] ? Number(latitudeMatch[1]) : null,
765
+ longitude: longitudeMatch?.[1] ? Number(longitudeMatch[1]) : null,
766
+ coordinateSystem,
767
+ rawText: coordinateText,
768
+ };
769
+ }
770
+ return {
771
+ value,
772
+ baseStatus: Object.keys(value).length > 1 ? 'partial' : 'failed',
773
+ warnings: Object.keys(value).length > 1
774
+ ? ['Metadata pass returned narrative text; extracted partial borehole metadata fields.']
775
+ : [],
776
+ };
777
+ }
778
+ async function textWithRetry(prompt, config, systemPrompt, maxTokens) {
779
+ const start = Date.now();
780
+ const primaryMaxTokens = getHostedBetaVisionMaxTokens(config, 'structured-text', maxTokens);
781
+ const fallbackMaxTokens = getHostedBetaVisionMaxTokens(config, 'fallback-text', maxTokens + 250);
782
+ try {
783
+ const first = await generateText(prompt, config, {
784
+ systemPrompt,
785
+ temperature: 0.1,
786
+ jsonMode: true,
787
+ maxTokens: primaryMaxTokens,
788
+ });
789
+ if (first.text && first.text.trim().length > 10) {
790
+ return { text: first.text, latencyMs: first.latencyMs, usedFallback: false };
791
+ }
792
+ }
793
+ catch (error) {
794
+ if (!isRecoverableVisionEmptyResponse(error)) {
795
+ throw error;
796
+ }
797
+ }
798
+ try {
799
+ const second = await generateText(prompt, config, {
800
+ systemPrompt: `${systemPrompt} Return best-effort structured information even if some fields are uncertain.`,
801
+ temperature: 0.25,
802
+ jsonMode: false,
803
+ maxTokens: fallbackMaxTokens,
804
+ });
805
+ if (second.text && second.text.trim().length > 0) {
806
+ return {
807
+ text: second.text,
808
+ latencyMs: Date.now() - start,
809
+ usedFallback: true,
810
+ };
811
+ }
812
+ }
813
+ catch (error) {
814
+ if (!isRecoverableVisionEmptyResponse(error)) {
815
+ throw error;
816
+ }
817
+ }
818
+ return {
819
+ text: '',
820
+ latencyMs: Date.now() - start,
821
+ usedFallback: true,
822
+ };
823
+ }
824
+ export async function transcribeDocumentImageText(imageBase64, mimeType, config) {
825
+ const response = await visionWithRetry(imageBase64, mimeType, config, `Transcribe the visible text from this document page. Respond with plain text only.
826
+ - Preserve important line breaks when they help readability.
827
+ - Include borehole IDs, depths, coordinate text, dates, table values, and page markers if visible.
828
+ - Do not explain or summarize.
829
+ - If only fragments are legible, return the fragments you can read.`, `OCR this document image and return only the visible text. Preserve useful line breaks. Do not explain, analyze, summarize, infer, or guess missing text. If only fragments are legible, return only those fragments.`, 'You are performing OCR-style transcription on a geotechnical document image. Return plain text only, with no markdown fences or commentary.', 700, {
830
+ fallbackSystemPrompt: 'You are performing OCR-style transcription on a geotechnical document image. Return only the visible text. Do not explain, analyze, summarize, infer, or guess missing text. No markdown, bullets, or commentary.',
831
+ fallbackTemperature: 0.15,
832
+ fallbackMaxTokens: 700,
833
+ });
834
+ return {
835
+ text: response.text.trim(),
836
+ latencyMs: response.latencyMs,
837
+ usedFallback: response.usedFallback,
838
+ warnings: response.usedFallback
839
+ ? ['OCR-style transcription required a fallback retry before returning text.']
840
+ : [],
841
+ };
842
+ }
843
+ async function extractBoreholeMetadata(imageBase64, mimeType, config, context) {
844
+ const strictPrompt = `Extract only the borehole log metadata visible on this page. Respond with ONLY a JSON object:
644
845
  {
645
846
  "boreholeId": "<ID if visible, or 'BH-unknown'>",
646
- "totalDepth": <number in meters>,
847
+ "projectName": "<project name or null>",
848
+ "dateDrilled": "<date or date range or null>",
849
+ "drillingMethod": "<method or null>",
850
+ "groundElevation": <number in meters or null>,
851
+ "totalDepth": <number in meters or null>,
852
+ "pageInfo": "<page 1 of 3 or null>",
853
+ "layoutNotes": "<brief description of layout and depth scale placement or null>",
854
+ "coordinates": {
855
+ "easting": <number or null>,
856
+ "northing": <number or null>,
857
+ "latitude": <number or null>,
858
+ "longitude": <number or null>,
859
+ "coordinateSystem": "<declared CRS or null>",
860
+ "rawText": "<exact coordinate text or null>"
861
+ },
862
+ "confidence": <number 0-100>,
863
+ "warnings": ["<warning>", "<warning>"]
864
+ }`;
865
+ const softPrompt = `Inspect only the borehole log header, title block, margins, and information boxes.
866
+ Extract:
867
+ 1. Borehole ID
868
+ 2. Project name
869
+ 3. Date drilled
870
+ 4. Drilling method
871
+ 5. Ground elevation
872
+ 6. Total depth if printed
873
+ 7. Page information
874
+ 8. Layout notes
875
+ 9. Any coordinates exactly as printed, plus parsed numeric fields when visible
876
+ Do not extract layers or soil descriptions in this pass.`;
877
+ const response = await visionWithRetry(imageBase64, mimeType, config, strictPrompt, softPrompt, 'You are an expert geotechnical engineer extracting borehole log metadata. Focus on the header area, coordinate text, and document layout. Respond with JSON only.', 900);
878
+ const parsed = parseJsonObject(response.text);
879
+ const narrativeFallback = extractBoreholeMetadataFallback(response.text, context.boreholeId);
880
+ const mergedValue = {
881
+ ...(narrativeFallback.value ?? {}),
882
+ ...(parsed.value ?? {}),
883
+ };
884
+ const baseStatus = parsed.baseStatus !== 'failed' ? parsed.baseStatus : narrativeFallback.baseStatus;
885
+ const warnings = [...parsed.warnings, ...narrativeFallback.warnings];
886
+ const resolvedBoreholeId = readOptionalString(mergedValue, 'boreholeId') ?? context.boreholeId ?? 'BH-unknown';
887
+ const projectName = readOptionalString(mergedValue, 'projectName');
888
+ const dateDrilled = readOptionalString(mergedValue, 'dateDrilled');
889
+ const drillingMethod = readOptionalString(mergedValue, 'drillingMethod');
890
+ const groundElevation = readOptionalNumber(mergedValue, 'groundElevation');
891
+ const totalDepth = readOptionalNumber(mergedValue, 'totalDepth');
892
+ const pageInfo = readOptionalString(mergedValue, 'pageInfo');
893
+ const layoutNotes = readOptionalString(mergedValue, 'layoutNotes');
894
+ const location = buildVisionBoreholeLocation(mergedValue, groundElevation, resolvedBoreholeId);
895
+ const confidence = clampConfidence(mergedValue.confidence, baseStatus === 'parsed' ? 78 : baseStatus === 'partial' ? 60 : 0);
896
+ const status = deriveParseStatus(baseStatus, [resolvedBoreholeId !== 'BH-unknown' ? resolvedBoreholeId : null, totalDepth, location, pageInfo]
897
+ .filter((value) => value !== null).length, 2);
898
+ const safety = createParseSafety(status, confidence, combineWarnings(warnings, normalizeWarnings(mergedValue.warnings)));
899
+ return {
900
+ ...safety,
901
+ boreholeId: resolvedBoreholeId,
902
+ totalDepth,
903
+ projectName,
904
+ dateDrilled,
905
+ drillingMethod,
906
+ groundElevation,
907
+ pageInfo,
908
+ layoutNotes,
909
+ location,
910
+ rawLLMText: response.text,
911
+ latencyMs: response.latencyMs,
912
+ };
913
+ }
914
+ function shouldUseTextOnlyBoreholeInterpretation(mimeType, config, context) {
915
+ return (mimeType === 'application/pdf'
916
+ && typeof context.pageTextHint === 'string'
917
+ && context.pageTextHint.trim().length >= 24
918
+ && !providerSupportsNativePdfDocuments(config));
919
+ }
920
+ async function extractBoreholeMetadataFromText(pageText, config, context) {
921
+ const normalizedPageText = pageText.replace(/\s+/g, ' ').trim();
922
+ if (!normalizedPageText) {
923
+ return {
924
+ ...createParseSafety('failed', 0, ['No usable borehole page text was available for metadata extraction.']),
925
+ boreholeId: context.boreholeId ?? 'BH-unknown',
926
+ projectName: null,
927
+ dateDrilled: null,
928
+ drillingMethod: null,
929
+ groundElevation: null,
930
+ totalDepth: null,
931
+ pageInfo: null,
932
+ layoutNotes: null,
933
+ location: null,
934
+ rawLLMText: '',
935
+ latencyMs: 0,
936
+ };
937
+ }
938
+ const strictPrompt = `Extract only the borehole log metadata from this extracted page text. Respond with ONLY a JSON object:
939
+ {
940
+ "boreholeId": "<ID if visible, or 'BH-unknown'>",
941
+ "projectName": "<project name or null>",
942
+ "dateDrilled": "<date or date range or null>",
943
+ "drillingMethod": "<method or null>",
944
+ "groundElevation": <number in meters or null>,
945
+ "totalDepth": <number in meters or null>,
946
+ "pageInfo": "<page 1 of 3 or null>",
947
+ "layoutNotes": "<brief description of layout and depth scale placement or null>",
948
+ "coordinates": {
949
+ "easting": <number or null>,
950
+ "northing": <number or null>,
951
+ "latitude": <number or null>,
952
+ "longitude": <number or null>,
953
+ "coordinateSystem": "<declared CRS or null>",
954
+ "rawText": "<exact coordinate text or null>"
955
+ },
956
+ "confidence": <number 0-100>,
957
+ "warnings": ["<warning>", "<warning>"]
958
+ }
959
+
960
+ Extract only header/title-block/style metadata from this page text. Do not invent layers.
961
+
962
+ Page text:
963
+ ${normalizedPageText.slice(0, 6000)}`;
964
+ const response = await textWithRetry(strictPrompt, config, 'You are an expert geotechnical engineer extracting borehole log metadata from OCR/native page text. Respond with JSON only when possible.', 900);
965
+ const parsed = parseJsonObject(response.text);
966
+ const narrativeFallback = extractBoreholeMetadataFallback(normalizedPageText, context.boreholeId);
967
+ const mergedValue = {
968
+ ...(narrativeFallback.value ?? {}),
969
+ ...(parsed.value ?? {}),
970
+ };
971
+ const baseStatus = parsed.baseStatus !== 'failed' ? parsed.baseStatus : narrativeFallback.baseStatus;
972
+ const warnings = [...parsed.warnings, ...narrativeFallback.warnings];
973
+ if (response.usedFallback) {
974
+ warnings.push('Metadata extraction required a text fallback retry before structured parsing succeeded.');
975
+ }
976
+ const resolvedBoreholeId = readOptionalString(mergedValue, 'boreholeId') ?? context.boreholeId ?? 'BH-unknown';
977
+ const projectName = readOptionalString(mergedValue, 'projectName');
978
+ const dateDrilled = readOptionalString(mergedValue, 'dateDrilled');
979
+ const drillingMethod = readOptionalString(mergedValue, 'drillingMethod');
980
+ const groundElevation = readOptionalNumber(mergedValue, 'groundElevation');
981
+ const totalDepth = readOptionalNumber(mergedValue, 'totalDepth');
982
+ const pageInfo = readOptionalString(mergedValue, 'pageInfo');
983
+ const layoutNotes = readOptionalString(mergedValue, 'layoutNotes');
984
+ const location = buildVisionBoreholeLocation(mergedValue, groundElevation, resolvedBoreholeId);
985
+ const confidence = clampConfidence(mergedValue.confidence, baseStatus === 'parsed' ? 74 : baseStatus === 'partial' ? 58 : 0);
986
+ const status = deriveParseStatus(baseStatus, [resolvedBoreholeId !== 'BH-unknown' ? resolvedBoreholeId : null, totalDepth, location, pageInfo]
987
+ .filter((value) => value !== null).length, 2);
988
+ const safety = createParseSafety(status, confidence, combineWarnings(warnings, normalizeWarnings(mergedValue.warnings)));
989
+ return {
990
+ ...safety,
991
+ boreholeId: resolvedBoreholeId,
992
+ totalDepth,
993
+ projectName,
994
+ dateDrilled,
995
+ drillingMethod,
996
+ groundElevation,
997
+ pageInfo,
998
+ layoutNotes,
999
+ location,
1000
+ rawLLMText: response.text,
1001
+ latencyMs: response.latencyMs,
1002
+ };
1003
+ }
1004
+ function buildBoreholeLayerPrompts(context, metadata) {
1005
+ const locationCode = metadata.location?.crs?.code ?? metadata.location?.crs?.name ?? null;
1006
+ const locationRawText = typeof metadata.location?.raw?.rawCoordinateText === 'string'
1007
+ ? metadata.location.raw.rawCoordinateText
1008
+ : null;
1009
+ const contextParts = [
1010
+ `Borehole ID: ${metadata.boreholeId}`,
1011
+ locationCode ? `Coordinate system: ${locationCode}` : null,
1012
+ locationRawText ? `Coordinate text: ${locationRawText}` : null,
1013
+ context.pageNumber != null && context.totalPages != null
1014
+ ? `Page ${context.pageNumber} of ${context.totalPages}`
1015
+ : null,
1016
+ context.pageClassification ? `Page classification: ${context.pageClassification}` : null,
1017
+ context.priorContinuationDepth != null
1018
+ ? `Previous pages continued to ${context.priorContinuationDepth.toFixed(2)} m depth. Continue from there unless the page clearly restarts at a new borehole.`
1019
+ : null,
1020
+ context.pageTextHint ? `Native text hint: ${context.pageTextHint.slice(0, 400)}` : null,
1021
+ metadata.rawLLMText ? `Metadata/layout notes: ${metadata.rawLLMText.slice(0, 300)}` : null,
1022
+ ].filter((value) => Boolean(value));
1023
+ const sharedContext = contextParts.join('\n');
1024
+ return {
1025
+ strictPrompt: `Extract structured borehole log layer data from this page. Respond with ONLY a JSON object:
1026
+ {
1027
+ "boreholeId": "<ID if visible, or ${JSON.stringify(metadata.boreholeId)}>",
1028
+ "totalDepth": <number in meters or null>,
647
1029
  "waterTableDepth": <number in meters or null>,
1030
+ "continuationDepth": <deepest visible depth on this page or null>,
648
1031
  "layers": [
649
1032
  {
650
1033
  "depthFrom": <m>,
@@ -656,26 +1039,67 @@ export async function interpretBoreholeLog(imageBase64, mimeType, config, boreho
656
1039
  "notes": "<any additional notes>"
657
1040
  }
658
1041
  ],
659
- "summary": "<brief engineering summary of the borehole>",
1042
+ "summary": "<brief engineering summary of this page and its strata>",
660
1043
  "confidence": <number 0-100>,
661
1044
  "warnings": ["<warning>", "<warning>"]
662
- }`;
663
- const softPrompt = `Read this borehole log carefully and extract the key structured data:
1045
+ }
1046
+
1047
+ Context:
1048
+ ${sharedContext}`,
1049
+ softPrompt: `Read this borehole log page carefully and extract:
664
1050
  1. Borehole ID if visible
665
- 2. Total depth in meters
1051
+ 2. Total depth on the page
666
1052
  3. Water table depth if shown
667
- 4. Each stratigraphic layer with depthFrom, depthTo, description, USCS symbol, SPT N, water content, and notes
668
- 5. A brief engineering summary
669
- Provide approximate values where necessary, but keep the structure complete.`;
670
- const response = await visionWithRetry(imageBase64, mimeType, config, strictPrompt, softPrompt, 'You are an expert geotechnical engineer extracting data from borehole log documents. Be precise with depths, descriptions, and test values. Respond with JSON only.', 1500);
1053
+ 4. The deepest continuation depth visible on the page
1054
+ 5. Each stratigraphic layer with depthFrom, depthTo, description, USCS symbol, SPT N, water content, and notes
1055
+ 6. A brief engineering summary
1056
+
1057
+ Context:
1058
+ ${sharedContext}
1059
+
1060
+ Adapt to the visible layout. Continue from previous pages when the depth scale clearly carries on.`,
1061
+ systemPrompt: 'You are an expert geotechnical engineer extracting layer data from borehole log documents. Adapt to varying log layouts, preserve depth continuity, and respond with JSON only.',
1062
+ };
1063
+ }
1064
+ async function interpretBoreholeLogTextWithContext(pageText, config, context = {}) {
1065
+ const normalizedPageText = pageText.replace(/\s+/g, ' ').trim();
1066
+ if (!normalizedPageText) {
1067
+ return {
1068
+ ...createParseSafety('failed', 0, ['No usable borehole page text was available for text-based interpretation.']),
1069
+ boreholeId: context.boreholeId ?? 'BH-unknown',
1070
+ totalDepth: null,
1071
+ waterTableDepth: null,
1072
+ layers: [],
1073
+ summary: null,
1074
+ location: null,
1075
+ groundElevation: null,
1076
+ dateDrilled: null,
1077
+ drillingMethod: null,
1078
+ projectName: null,
1079
+ continuationDepth: null,
1080
+ pageNumber: context.pageNumber ?? null,
1081
+ totalPages: context.totalPages ?? null,
1082
+ rawLLMText: '',
1083
+ latencyMs: 0,
1084
+ };
1085
+ }
1086
+ const metadata = await extractBoreholeMetadataFromText(normalizedPageText, config, context);
1087
+ const prompts = buildBoreholeLayerPrompts({
1088
+ ...context,
1089
+ pageTextHint: normalizedPageText,
1090
+ }, metadata);
1091
+ const response = await textWithRetry(`${prompts.strictPrompt}\n\nBorehole page text:\n${normalizedPageText.slice(0, 7000)}`, config, `${prompts.systemPrompt} Work from OCR/native extracted page text rather than pixels when needed.`, 1500);
671
1092
  const parsed = parseJsonObject(response.text);
672
- const narrativeFallback = extractBoreholeFallback(response.text, boreholeId);
1093
+ const narrativeFallback = extractBoreholeFallback(normalizedPageText, metadata.boreholeId);
673
1094
  const mergedValue = {
674
1095
  ...(narrativeFallback.value ?? {}),
675
1096
  ...(parsed.value ?? {}),
676
1097
  };
677
1098
  const baseStatus = parsed.baseStatus !== 'failed' ? parsed.baseStatus : narrativeFallback.baseStatus;
678
- const warnings = [...parsed.warnings, ...narrativeFallback.warnings];
1099
+ const warnings = [...metadata.warnings, ...parsed.warnings, ...narrativeFallback.warnings];
1100
+ if (response.usedFallback) {
1101
+ warnings.push('Layer extraction required a text fallback retry before structured parsing succeeded.');
1102
+ }
679
1103
  const parsedLayers = Array.isArray(mergedValue.layers)
680
1104
  ? mergedValue.layers
681
1105
  : [];
@@ -696,12 +1120,104 @@ Provide approximate values where necessary, but keep the structure complete.`;
696
1120
  warnings.push(...layerWarnings.map((warning) => `Layer warning: ${warning}`));
697
1121
  return item;
698
1122
  });
699
- const totalDepth = readNumber(mergedValue, 'totalDepth', warnings);
1123
+ const totalDepth = readOptionalNumber(mergedValue, 'totalDepth') ?? metadata.totalDepth;
700
1124
  const waterTableDepth = mergedValue.waterTableDepth == null
701
1125
  ? null
702
1126
  : readNumber(mergedValue, 'waterTableDepth', warnings);
703
- const summary = readString(mergedValue, 'summary', warnings);
704
- const resolvedBoreholeId = readString(mergedValue, 'boreholeId', []) ?? boreholeId ?? 'BH-unknown';
1127
+ const summary = readOptionalString(mergedValue, 'summary');
1128
+ const continuationDepth = readOptionalNumber(mergedValue, 'continuationDepth')
1129
+ ?? layers.reduce((maxDepth, layer) => {
1130
+ if (layer.depthTo == null)
1131
+ return maxDepth;
1132
+ return maxDepth == null ? layer.depthTo : Math.max(maxDepth, layer.depthTo);
1133
+ }, null)
1134
+ ?? totalDepth;
1135
+ const resolvedBoreholeId = readOptionalString(mergedValue, 'boreholeId') ?? metadata.boreholeId ?? context.boreholeId ?? 'BH-unknown';
1136
+ const confidence = clampConfidence(mergedValue.confidence, baseStatus === 'parsed' ? 72 : baseStatus === 'partial' ? 55 : 0);
1137
+ const status = deriveParseStatus(baseStatus, [totalDepth, summary, layers.length > 0 ? 'layers' : null].filter((value) => value !== null)
1138
+ .length, 3);
1139
+ const safety = createParseSafety(status, confidence, combineWarnings(warnings, normalizeWarnings(mergedValue.warnings)));
1140
+ return {
1141
+ ...safety,
1142
+ boreholeId: resolvedBoreholeId,
1143
+ totalDepth,
1144
+ waterTableDepth,
1145
+ layers,
1146
+ summary,
1147
+ location: metadata.location,
1148
+ groundElevation: metadata.groundElevation,
1149
+ dateDrilled: metadata.dateDrilled,
1150
+ drillingMethod: metadata.drillingMethod,
1151
+ projectName: metadata.projectName,
1152
+ continuationDepth,
1153
+ pageNumber: context.pageNumber ?? null,
1154
+ totalPages: context.totalPages ?? null,
1155
+ rawLLMText: [metadata.rawLLMText, response.text].filter(Boolean).join('\n\n'),
1156
+ latencyMs: metadata.latencyMs + response.latencyMs,
1157
+ };
1158
+ }
1159
+ function scoreBoreholeLocation(location) {
1160
+ if (!location)
1161
+ return 0;
1162
+ let score = 0;
1163
+ if (location.wgs84)
1164
+ score += 100;
1165
+ if (location.projected)
1166
+ score += 70;
1167
+ if (location.crs?.confidence != null)
1168
+ score += Math.round(location.crs.confidence * 20);
1169
+ if (location.raw && Object.keys(location.raw).length > 0)
1170
+ score += 5;
1171
+ return score;
1172
+ }
1173
+ export async function interpretBoreholeLogWithContext(imageBase64, mimeType, config, context = {}) {
1174
+ if (shouldUseTextOnlyBoreholeInterpretation(mimeType, config, context)) {
1175
+ return interpretBoreholeLogTextWithContext(context.pageTextHint ?? '', config, context);
1176
+ }
1177
+ const metadata = await extractBoreholeMetadata(imageBase64, mimeType, config, context);
1178
+ const prompts = buildBoreholeLayerPrompts(context, metadata);
1179
+ const response = await visionWithRetry(imageBase64, mimeType, config, prompts.strictPrompt, prompts.softPrompt, prompts.systemPrompt, 1500);
1180
+ const parsed = parseJsonObject(response.text);
1181
+ const narrativeFallback = extractBoreholeFallback(response.text, metadata.boreholeId);
1182
+ const mergedValue = {
1183
+ ...(narrativeFallback.value ?? {}),
1184
+ ...(parsed.value ?? {}),
1185
+ };
1186
+ const baseStatus = parsed.baseStatus !== 'failed' ? parsed.baseStatus : narrativeFallback.baseStatus;
1187
+ const warnings = [...metadata.warnings, ...parsed.warnings, ...narrativeFallback.warnings];
1188
+ const parsedLayers = Array.isArray(mergedValue.layers)
1189
+ ? mergedValue.layers
1190
+ : [];
1191
+ if (!Array.isArray(mergedValue.layers)) {
1192
+ warnings.push('Missing or invalid "layers" array.');
1193
+ }
1194
+ const layers = parsedLayers.map((layer) => {
1195
+ const layerWarnings = [];
1196
+ const item = {
1197
+ depthFrom: readNumber(layer, 'depthFrom', layerWarnings),
1198
+ depthTo: readNumber(layer, 'depthTo', layerWarnings),
1199
+ description: readString(layer, 'description', layerWarnings),
1200
+ uscsSymbol: readString(layer, 'uscsSymbol', []),
1201
+ sptN: layer.sptN == null ? null : readNumber(layer, 'sptN', layerWarnings),
1202
+ waterContent: layer.waterContent == null ? null : readNumber(layer, 'waterContent', layerWarnings),
1203
+ notes: readString(layer, 'notes', []),
1204
+ };
1205
+ warnings.push(...layerWarnings.map((warning) => `Layer warning: ${warning}`));
1206
+ return item;
1207
+ });
1208
+ const totalDepth = readOptionalNumber(mergedValue, 'totalDepth') ?? metadata.totalDepth;
1209
+ const waterTableDepth = mergedValue.waterTableDepth == null
1210
+ ? null
1211
+ : readNumber(mergedValue, 'waterTableDepth', warnings);
1212
+ const summary = readOptionalString(mergedValue, 'summary');
1213
+ const continuationDepth = readOptionalNumber(mergedValue, 'continuationDepth')
1214
+ ?? layers.reduce((maxDepth, layer) => {
1215
+ if (layer.depthTo == null)
1216
+ return maxDepth;
1217
+ return maxDepth == null ? layer.depthTo : Math.max(maxDepth, layer.depthTo);
1218
+ }, null)
1219
+ ?? totalDepth;
1220
+ const resolvedBoreholeId = readOptionalString(mergedValue, 'boreholeId') ?? metadata.boreholeId ?? context.boreholeId ?? 'BH-unknown';
705
1221
  const confidence = clampConfidence(mergedValue.confidence, baseStatus === 'parsed' ? 75 : baseStatus === 'partial' ? 58 : 0);
706
1222
  const status = deriveParseStatus(baseStatus, [totalDepth, summary, layers.length > 0 ? 'layers' : null].filter((value) => value !== null)
707
1223
  .length, 3);
@@ -713,12 +1229,128 @@ Provide approximate values where necessary, but keep the structure complete.`;
713
1229
  waterTableDepth,
714
1230
  layers,
715
1231
  summary,
716
- rawLLMText: response.text,
717
- latencyMs: response.latencyMs,
1232
+ location: metadata.location,
1233
+ groundElevation: metadata.groundElevation,
1234
+ dateDrilled: metadata.dateDrilled,
1235
+ drillingMethod: metadata.drillingMethod,
1236
+ projectName: metadata.projectName,
1237
+ continuationDepth,
1238
+ pageNumber: context.pageNumber ?? null,
1239
+ totalPages: context.totalPages ?? null,
1240
+ rawLLMText: [metadata.rawLLMText, response.text].filter(Boolean).join('\n\n'),
1241
+ latencyMs: metadata.latencyMs + response.latencyMs,
1242
+ };
1243
+ }
1244
+ export async function interpretBoreholeLog(imageBase64, mimeType, config, boreholeId) {
1245
+ return interpretBoreholeLogWithContext(imageBase64, mimeType, config, { boreholeId });
1246
+ }
1247
+ export function mergeBoreholeLogPages(pages, overrideBoreholeId) {
1248
+ const validPages = pages.filter(({ result }) => (result.layers.length > 0
1249
+ || result.totalDepth != null
1250
+ || result.summary
1251
+ || result.location));
1252
+ const sourcePages = validPages.length > 0 ? validPages : pages;
1253
+ const distinctBoreholeIds = [
1254
+ ...new Set(sourcePages
1255
+ .map(({ result }) => result.boreholeId)
1256
+ .filter((value) => value && value !== 'BH-unknown')),
1257
+ ];
1258
+ const deduped = new Map();
1259
+ for (const layer of sourcePages.flatMap(({ result }) => result.layers)) {
1260
+ const key = [
1261
+ layer.depthFrom ?? 'na',
1262
+ layer.depthTo ?? 'na',
1263
+ (layer.description ?? '').trim().toLowerCase(),
1264
+ (layer.uscsSymbol ?? '').trim().toUpperCase(),
1265
+ layer.sptN ?? 'na',
1266
+ ].join('|');
1267
+ if (!deduped.has(key)) {
1268
+ deduped.set(key, layer);
1269
+ }
1270
+ }
1271
+ const mergedLayers = [...deduped.values()].sort((left, right) => {
1272
+ const leftDepth = left.depthFrom ?? Number.POSITIVE_INFINITY;
1273
+ const rightDepth = right.depthFrom ?? Number.POSITIVE_INFINITY;
1274
+ return leftDepth - rightDepth;
1275
+ });
1276
+ const summaries = [
1277
+ ...new Set(sourcePages
1278
+ .map(({ result }) => result.summary?.trim())
1279
+ .filter((value) => Boolean(value))),
1280
+ ];
1281
+ const warnings = [
1282
+ ...new Set(pages.flatMap(({ pageNumber, result }) => [
1283
+ ...result.warnings.map((warning) => `Page ${pageNumber}: ${warning}`),
1284
+ ])),
1285
+ ];
1286
+ if (distinctBoreholeIds.length > 1 && !overrideBoreholeId) {
1287
+ warnings.push(`Multiple borehole IDs were detected across the supplied pages (${distinctBoreholeIds.join(', ')}). Split the PDF by borehole for a safer interpretation.`);
1288
+ }
1289
+ const confidences = sourcePages.map(({ result }) => result.confidence);
1290
+ const averageConfidence = confidences.length > 0
1291
+ ? Math.round(confidences.reduce((sum, value) => sum + value, 0) / confidences.length)
1292
+ : 0;
1293
+ const totalDepth = sourcePages.reduce((maxDepth, { result }) => {
1294
+ if (result.totalDepth == null)
1295
+ return maxDepth;
1296
+ return maxDepth == null ? result.totalDepth : Math.max(maxDepth, result.totalDepth);
1297
+ }, null);
1298
+ const waterTableDepth = sourcePages.reduce((selected, { result }) => {
1299
+ if (result.waterTableDepth == null)
1300
+ return selected;
1301
+ return selected == null ? result.waterTableDepth : Math.min(selected, result.waterTableDepth);
1302
+ }, null);
1303
+ const bestLocation = sourcePages
1304
+ .map(({ result }) => result.location)
1305
+ .filter((value) => value !== null)
1306
+ .sort((left, right) => scoreBoreholeLocation(right) - scoreBoreholeLocation(left))[0] ?? null;
1307
+ const firstMetadataPage = sourcePages
1308
+ .map(({ result }) => result)
1309
+ .find((result) => result.projectName
1310
+ || result.drillingMethod
1311
+ || result.dateDrilled
1312
+ || result.groundElevation != null) ?? sourcePages[0]?.result;
1313
+ const continuationDepth = sourcePages.reduce((maxDepth, { result }) => {
1314
+ if (result.continuationDepth == null)
1315
+ return maxDepth;
1316
+ return maxDepth == null ? result.continuationDepth : Math.max(maxDepth, result.continuationDepth);
1317
+ }, totalDepth);
1318
+ const parseStatus = mergedLayers.length > 0 && totalDepth != null
1319
+ ? 'parsed'
1320
+ : mergedLayers.length > 0 || summaries.length > 0 || totalDepth != null
1321
+ ? 'partial'
1322
+ : 'failed';
1323
+ return {
1324
+ boreholeId: overrideBoreholeId
1325
+ ?? distinctBoreholeIds[0]
1326
+ ?? sourcePages.map(({ result }) => result.boreholeId).find((value) => value && value !== 'BH-unknown')
1327
+ ?? 'BH-unknown',
1328
+ totalDepth,
1329
+ waterTableDepth,
1330
+ layers: mergedLayers,
1331
+ summary: summaries.length > 0 ? summaries.join(' ') : null,
1332
+ location: bestLocation,
1333
+ groundElevation: firstMetadataPage?.groundElevation
1334
+ ?? bestLocation?.groundLevel
1335
+ ?? bestLocation?.projected?.elevation
1336
+ ?? bestLocation?.wgs84?.elevation
1337
+ ?? null,
1338
+ dateDrilled: firstMetadataPage?.dateDrilled ?? null,
1339
+ drillingMethod: firstMetadataPage?.drillingMethod ?? null,
1340
+ projectName: firstMetadataPage?.projectName ?? null,
1341
+ continuationDepth,
1342
+ pageNumber: sourcePages[0]?.result.pageNumber ?? null,
1343
+ totalPages: sourcePages[0]?.result.totalPages ?? null,
1344
+ rawLLMText: pages.map(({ pageNumber, result }) => `[Page ${pageNumber}]\n${result.rawLLMText}`).join('\n\n'),
1345
+ latencyMs: pages.reduce((sum, { result }) => sum + result.latencyMs, 0),
1346
+ parseStatus,
1347
+ confidence: averageConfidence,
1348
+ warnings,
1349
+ canAutoProceed: parseStatus === 'parsed' && averageConfidence >= 70,
718
1350
  };
719
1351
  }
720
1352
  export async function queryGBRDocument(question, documentBase64, mimeType, config) {
721
- const response = await visionWithRetry(documentBase64, mimeType, config, `Based on this Geotechnical Baseline Report, answer the following question with a concise, technically accurate response and specific page/section references where possible:\n\n${question}`, `Read this Geotechnical Baseline Report and answer the question directly:\n\n${question}\n\nUse specific values, limits, assumptions, and references if they are visible in the document.`, 'You are an expert geotechnical engineer analyzing a Geotechnical Baseline Report (GBR). Provide precise, actionable answers referencing specific data from the document.', 2000);
1353
+ const response = await visionWithRetry(documentBase64, mimeType, config, `Based on this Geotechnical Baseline Report, answer the following question with a concise, technically accurate response and specific page/section references where possible:\n\n${question}`, `Read this Geotechnical Baseline Report and answer the question directly:\n\n${question}\n\nUse specific values, limits, assumptions, and references if they are visible in the document.`, 'You are an expert geotechnical engineer analyzing a Geotechnical Baseline Report (GBR). Provide precise, actionable answers referencing specific data from the document.', getHostedBetaVisionMaxTokens(config, 'document-query', 2000));
722
1354
  if (!response.text.trim()) {
723
1355
  throw new Error('Hosted beta upstream returned no content. The document could not be interpreted. Try a smaller PNG or JPG export of the relevant pages.');
724
1356
  }