@geotechcli/core 0.4.22 → 0.4.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/brain.d.ts.map +1 -1
- package/dist/agents/brain.js +2 -1
- package/dist/agents/brain.js.map +1 -1
- package/dist/agents/data-tools.js +759 -0
- package/dist/agents/data-tools.js.map +1 -1
- package/dist/agents/swarm.d.ts.map +1 -1
- package/dist/agents/swarm.js +22 -2
- package/dist/agents/swarm.js.map +1 -1
- package/dist/agents/tool-runtime.d.ts +7 -0
- package/dist/agents/tool-runtime.d.ts.map +1 -0
- package/dist/agents/tool-runtime.js +9 -0
- package/dist/agents/tool-runtime.js.map +1 -0
- package/dist/config/index.d.ts +4 -4
- package/dist/config/index.js +1 -1
- package/dist/config/index.js.map +1 -1
- package/dist/geo/coordinates.d.ts +40 -0
- package/dist/geo/coordinates.d.ts.map +1 -0
- package/dist/geo/coordinates.js +461 -0
- package/dist/geo/coordinates.js.map +1 -0
- package/dist/geo/index.d.ts +1 -0
- package/dist/geo/index.d.ts.map +1 -1
- package/dist/geo/index.js +1 -0
- package/dist/geo/index.js.map +1 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -2
- package/dist/index.js.map +1 -1
- package/dist/ingest/ags.d.ts +3 -0
- package/dist/ingest/ags.d.ts.map +1 -1
- package/dist/ingest/ags.js +98 -9
- package/dist/ingest/ags.js.map +1 -1
- package/dist/ingest/cpt.d.ts +4 -0
- package/dist/ingest/cpt.d.ts.map +1 -1
- package/dist/ingest/cpt.js +87 -25
- package/dist/ingest/cpt.js.map +1 -1
- package/dist/ingest/document-inputs.d.ts +37 -0
- package/dist/ingest/document-inputs.d.ts.map +1 -0
- package/dist/ingest/document-inputs.js +197 -0
- package/dist/ingest/document-inputs.js.map +1 -0
- package/dist/ingest/geotech-document.d.ts +118 -0
- package/dist/ingest/geotech-document.d.ts.map +1 -0
- package/dist/ingest/geotech-document.js +1006 -0
- package/dist/ingest/geotech-document.js.map +1 -0
- package/dist/ingest/geotech-extract.d.ts +86 -0
- package/dist/ingest/geotech-extract.d.ts.map +1 -0
- package/dist/ingest/geotech-extract.js +652 -0
- package/dist/ingest/geotech-extract.js.map +1 -0
- package/dist/ingest/geotech-schemas.d.ts +248 -0
- package/dist/ingest/geotech-schemas.d.ts.map +1 -0
- package/dist/ingest/geotech-schemas.js +150 -0
- package/dist/ingest/geotech-schemas.js.map +1 -0
- package/dist/ingest/index.d.ts +8 -0
- package/dist/ingest/index.d.ts.map +1 -1
- package/dist/ingest/index.js +8 -0
- package/dist/ingest/index.js.map +1 -1
- package/dist/ingest/ingest-job-child.d.ts +2 -0
- package/dist/ingest/ingest-job-child.d.ts.map +1 -0
- package/dist/ingest/ingest-job-child.js +45 -0
- package/dist/ingest/ingest-job-child.js.map +1 -0
- package/dist/ingest/job-store.d.ts +117 -0
- package/dist/ingest/job-store.d.ts.map +1 -0
- package/dist/ingest/job-store.js +541 -0
- package/dist/ingest/job-store.js.map +1 -0
- package/dist/ingest/job-worker.d.ts +24 -0
- package/dist/ingest/job-worker.d.ts.map +1 -0
- package/dist/ingest/job-worker.js +1129 -0
- package/dist/ingest/job-worker.js.map +1 -0
- package/dist/ingest/pdf.d.ts +102 -0
- package/dist/ingest/pdf.d.ts.map +1 -0
- package/dist/ingest/pdf.js +1544 -0
- package/dist/ingest/pdf.js.map +1 -0
- package/dist/ingest/review-store.d.ts +215 -0
- package/dist/ingest/review-store.d.ts.map +1 -0
- package/dist/ingest/review-store.js +1995 -0
- package/dist/ingest/review-store.js.map +1 -0
- package/dist/llm/capabilities.d.ts +8 -0
- package/dist/llm/capabilities.d.ts.map +1 -0
- package/dist/llm/capabilities.js +73 -0
- package/dist/llm/capabilities.js.map +1 -0
- package/dist/llm/index.d.ts +3 -2
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/providers/anthropic.d.ts +6 -0
- package/dist/llm/providers/anthropic.d.ts.map +1 -1
- package/dist/llm/providers/anthropic.js +10 -1
- package/dist/llm/providers/anthropic.js.map +1 -1
- package/dist/llm/providers/hosted-beta.d.ts +6 -0
- package/dist/llm/providers/hosted-beta.d.ts.map +1 -1
- package/dist/llm/providers/hosted-beta.js +40 -10
- package/dist/llm/providers/hosted-beta.js.map +1 -1
- package/dist/llm/providers/huggingface.d.ts +6 -0
- package/dist/llm/providers/huggingface.d.ts.map +1 -1
- package/dist/llm/providers/huggingface.js +21 -1
- package/dist/llm/providers/huggingface.js.map +1 -1
- package/dist/llm/providers/openai-compatible.d.ts +6 -0
- package/dist/llm/providers/openai-compatible.d.ts.map +1 -1
- package/dist/llm/providers/openai-compatible.js +21 -1
- package/dist/llm/providers/openai-compatible.js.map +1 -1
- package/dist/llm/providers/zhipu.d.ts +6 -0
- package/dist/llm/providers/zhipu.d.ts.map +1 -1
- package/dist/llm/providers/zhipu.js +15 -1
- package/dist/llm/providers/zhipu.js.map +1 -1
- package/dist/llm/router.d.ts +7 -0
- package/dist/llm/router.d.ts.map +1 -1
- package/dist/llm/router.js +33 -13
- package/dist/llm/router.js.map +1 -1
- package/dist/llm/types.d.ts +22 -4
- package/dist/llm/types.d.ts.map +1 -1
- package/dist/llm/types.js.map +1 -1
- package/dist/meta/metadata.json +1 -1
- package/dist/report/html.d.ts +3 -0
- package/dist/report/html.d.ts.map +1 -0
- package/dist/report/html.js +626 -0
- package/dist/report/html.js.map +1 -0
- package/dist/report/index.d.ts +2 -0
- package/dist/report/index.d.ts.map +1 -1
- package/dist/report/index.js +2 -0
- package/dist/report/index.js.map +1 -1
- package/dist/report/ingest-dossier.d.ts +81 -0
- package/dist/report/ingest-dossier.d.ts.map +1 -0
- package/dist/report/ingest-dossier.js +324 -0
- package/dist/report/ingest-dossier.js.map +1 -0
- package/dist/storage/index.d.ts +5 -0
- package/dist/storage/index.d.ts.map +1 -1
- package/dist/storage/index.js +12 -6
- package/dist/storage/index.js.map +1 -1
- package/dist/vision/geotech-document.d.ts +46 -0
- package/dist/vision/geotech-document.d.ts.map +1 -0
- package/dist/vision/geotech-document.js +576 -0
- package/dist/vision/geotech-document.js.map +1 -0
- package/dist/vision/index.d.ts +31 -0
- package/dist/vision/index.d.ts.map +1 -1
- package/dist/vision/index.js +659 -27
- package/dist/vision/index.js.map +1 -1
- package/dist/vision/ocr.d.ts +29 -0
- package/dist/vision/ocr.d.ts.map +1 -0
- package/dist/vision/ocr.js +287 -0
- package/dist/vision/ocr.js.map +1 -0
- package/dist/vision/preprocess.d.ts +26 -0
- package/dist/vision/preprocess.d.ts.map +1 -0
- package/dist/vision/preprocess.js +194 -0
- package/dist/vision/preprocess.js.map +1 -0
- package/package.json +5 -1
package/dist/vision/index.js
CHANGED
|
@@ -1,6 +1,22 @@
|
|
|
1
|
-
import { generateVision, generateText } from '../llm/router.js';
|
|
1
|
+
import { generateDocumentVision, generateVision, generateText } from '../llm/router.js';
|
|
2
|
+
import { providerSupportsNativePdfDocuments } from '../llm/capabilities.js';
|
|
3
|
+
import { buildBoreholeLocation as buildStructuredBoreholeLocation } from '../geo/coordinates.js';
|
|
2
4
|
import { classifyRMR89 } from '../geo/classification.js';
|
|
3
5
|
import { clampConfidence, createParseSafety, deriveParseStatus, normalizeWarnings, parseJsonObject, readNumber, readString, } from './parse.js';
|
|
6
|
+
function getHostedBetaVisionMaxTokens(config, profile, requestedMaxTokens) {
|
|
7
|
+
if (config.provider !== 'hosted-beta') {
|
|
8
|
+
return requestedMaxTokens;
|
|
9
|
+
}
|
|
10
|
+
const capByProfile = {
|
|
11
|
+
'structured-vision': 850,
|
|
12
|
+
'fallback-vision': 950,
|
|
13
|
+
'ocr-vision': 700,
|
|
14
|
+
'structured-text': 700,
|
|
15
|
+
'fallback-text': 850,
|
|
16
|
+
'document-query': 900,
|
|
17
|
+
};
|
|
18
|
+
return Math.min(requestedMaxTokens, capByProfile[profile]);
|
|
19
|
+
}
|
|
4
20
|
// ---------------------------------------------------------------------------
|
|
5
21
|
// Vision retry helper — handles upstream empty-content failures
|
|
6
22
|
// ---------------------------------------------------------------------------
|
|
@@ -17,14 +33,19 @@ function isRecoverableVisionEmptyResponse(error) {
|
|
|
17
33
|
* Second attempt: softer plain-text prompt at higher temperature, same image.
|
|
18
34
|
* This handles hosted-beta vision models returning empty content on the first call.
|
|
19
35
|
*/
|
|
20
|
-
async function visionWithRetry(imageBase64, mimeType, config, strictPrompt, softPrompt, systemPrompt, maxTokens) {
|
|
36
|
+
async function visionWithRetry(imageBase64, mimeType, config, strictPrompt, softPrompt, systemPrompt, maxTokens, retryOptions) {
|
|
21
37
|
const start = Date.now();
|
|
38
|
+
const multimodalCall = mimeType === 'application/pdf'
|
|
39
|
+
? generateDocumentVision
|
|
40
|
+
: generateVision;
|
|
41
|
+
const primaryMaxTokens = getHostedBetaVisionMaxTokens(config, 'structured-vision', maxTokens);
|
|
42
|
+
const fallbackMaxTokens = getHostedBetaVisionMaxTokens(config, 'fallback-vision', retryOptions?.fallbackMaxTokens ?? (maxTokens + 200));
|
|
22
43
|
// Attempt 1: strict JSON prompt
|
|
23
44
|
try {
|
|
24
|
-
const r1 = await
|
|
45
|
+
const r1 = await multimodalCall(strictPrompt, imageBase64, mimeType, config, {
|
|
25
46
|
systemPrompt,
|
|
26
47
|
temperature: 0.1,
|
|
27
|
-
maxTokens,
|
|
48
|
+
maxTokens: primaryMaxTokens,
|
|
28
49
|
});
|
|
29
50
|
if (r1.text && r1.text.trim().length > 10) {
|
|
30
51
|
return { text: r1.text, latencyMs: r1.latencyMs, usedFallback: false };
|
|
@@ -37,10 +58,11 @@ async function visionWithRetry(imageBase64, mimeType, config, strictPrompt, soft
|
|
|
37
58
|
}
|
|
38
59
|
// Attempt 2: softer plain text prompt
|
|
39
60
|
try {
|
|
40
|
-
const r2 = await
|
|
41
|
-
systemPrompt:
|
|
42
|
-
|
|
43
|
-
|
|
61
|
+
const r2 = await multimodalCall(softPrompt, imageBase64, mimeType, config, {
|
|
62
|
+
systemPrompt: retryOptions?.fallbackSystemPrompt
|
|
63
|
+
?? `${systemPrompt} Be concise but thorough. You must provide values even if approximate.`,
|
|
64
|
+
temperature: retryOptions?.fallbackTemperature ?? 0.3,
|
|
65
|
+
maxTokens: fallbackMaxTokens,
|
|
44
66
|
});
|
|
45
67
|
if (r2.text && r2.text.trim().length > 0) {
|
|
46
68
|
return {
|
|
@@ -639,12 +661,373 @@ Classify the soil and estimate engineering properties. Respond with ONLY a JSON
|
|
|
639
661
|
rawLLMText: response.text,
|
|
640
662
|
};
|
|
641
663
|
}
|
|
642
|
-
|
|
643
|
-
const
|
|
664
|
+
function readOptionalString(source, key) {
|
|
665
|
+
const value = source?.[key];
|
|
666
|
+
return typeof value === 'string' && value.trim() ? value.trim() : null;
|
|
667
|
+
}
|
|
668
|
+
function readOptionalNumber(source, key) {
|
|
669
|
+
const value = source?.[key];
|
|
670
|
+
const num = typeof value === 'number' ? value : Number(value);
|
|
671
|
+
return Number.isFinite(num) ? num : null;
|
|
672
|
+
}
|
|
673
|
+
function firstMatchingText(rawText, patterns) {
|
|
674
|
+
for (const pattern of patterns) {
|
|
675
|
+
const match = rawText.match(pattern);
|
|
676
|
+
if (match?.[0]) {
|
|
677
|
+
return match[0].trim();
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
return null;
|
|
681
|
+
}
|
|
682
|
+
function buildVisionBoreholeLocation(source, groundElevation, boreholeId) {
|
|
683
|
+
const coordinateSource = source && typeof source.coordinates === 'object' && source.coordinates !== null
|
|
684
|
+
? source.coordinates
|
|
685
|
+
: source;
|
|
686
|
+
if (!coordinateSource) {
|
|
687
|
+
return null;
|
|
688
|
+
}
|
|
689
|
+
const latitude = readOptionalNumber(coordinateSource, 'latitude');
|
|
690
|
+
const longitude = readOptionalNumber(coordinateSource, 'longitude');
|
|
691
|
+
const easting = readOptionalNumber(coordinateSource, 'easting');
|
|
692
|
+
const northing = readOptionalNumber(coordinateSource, 'northing');
|
|
693
|
+
const rawCoordinateText = readOptionalString(coordinateSource, 'rawText')
|
|
694
|
+
?? readOptionalString(coordinateSource, 'rawCoordinateText');
|
|
695
|
+
const coordinateSystem = readOptionalString(coordinateSource, 'coordinateSystem');
|
|
696
|
+
if (latitude == null
|
|
697
|
+
&& longitude == null
|
|
698
|
+
&& easting == null
|
|
699
|
+
&& northing == null
|
|
700
|
+
&& !rawCoordinateText) {
|
|
701
|
+
return null;
|
|
702
|
+
}
|
|
703
|
+
return buildStructuredBoreholeLocation({
|
|
704
|
+
boreholeId,
|
|
705
|
+
source: 'vision',
|
|
706
|
+
crs: coordinateSystem ?? undefined,
|
|
707
|
+
easting,
|
|
708
|
+
northing,
|
|
709
|
+
latitude,
|
|
710
|
+
longitude,
|
|
711
|
+
groundLevel: groundElevation,
|
|
712
|
+
raw: {
|
|
713
|
+
rawCoordinateText,
|
|
714
|
+
coordinateSystem,
|
|
715
|
+
metadataConfidence: clampConfidence(coordinateSource.confidence, rawCoordinateText || latitude != null || easting != null ? 64 : 0),
|
|
716
|
+
},
|
|
717
|
+
}) ?? null;
|
|
718
|
+
}
|
|
719
|
+
function extractBoreholeMetadataFallback(rawText, boreholeId) {
|
|
720
|
+
if (!rawText.trim()) {
|
|
721
|
+
return { value: null, baseStatus: 'failed', warnings: [] };
|
|
722
|
+
}
|
|
723
|
+
const boreholeMatch = rawText.match(/\bBH[-_\s]?[A-Z0-9]+\b/i);
|
|
724
|
+
const totalDepthMatch = rawText.match(/total depth[^0-9]*(\d+(?:\.\d+)?)\s*m/i)
|
|
725
|
+
?? rawText.match(/depth[^0-9]*(\d+(?:\.\d+)?)\s*m/i);
|
|
726
|
+
const elevationMatch = rawText.match(/(?:ground level|ground elevation|gl|m\.?o\.?d\.?)[^0-9+-]*([+-]?\d+(?:\.\d+)?)/i);
|
|
727
|
+
const pageInfo = firstMatchingText(rawText, [
|
|
728
|
+
/page\s+\d+\s*(?:of|\/)\s*\d+/i,
|
|
729
|
+
/sheet\s+\d+\s*(?:of|\/)\s*\d+/i,
|
|
730
|
+
]);
|
|
731
|
+
const coordinateText = firstMatchingText(rawText, [
|
|
732
|
+
/\bE(?:asting)?[:=]?\s*\d+(?:\.\d+)?\s*[,\s;]+N(?:orthing)?[:=]?\s*\d+(?:\.\d+)?/i,
|
|
733
|
+
/\bLat(?:itude)?[:=]?\s*[-+]?\d+(?:\.\d+)?\s*[,\s;]+Lon(?:gitude)?[:=]?\s*[-+]?\d+(?:\.\d+)?/i,
|
|
734
|
+
/\bUTM\s+Zone\s+\d{1,2}[NS]?\s+E[:=]?\s*\d+(?:\.\d+)?\s+N[:=]?\s*\d+(?:\.\d+)?/i,
|
|
735
|
+
]);
|
|
736
|
+
const eastingMatch = coordinateText?.match(/E(?:asting)?[:=]?\s*(\d+(?:\.\d+)?)/i)
|
|
737
|
+
?? rawText.match(/\bE(?:asting)?[:=]?\s*(\d+(?:\.\d+)?)/i);
|
|
738
|
+
const northingMatch = coordinateText?.match(/N(?:orthing)?[:=]?\s*(\d+(?:\.\d+)?)/i)
|
|
739
|
+
?? rawText.match(/\bN(?:orthing)?[:=]?\s*(\d+(?:\.\d+)?)/i);
|
|
740
|
+
const latitudeMatch = coordinateText?.match(/Lat(?:itude)?[:=]?\s*([-+]?\d+(?:\.\d+)?)/i)
|
|
741
|
+
?? rawText.match(/\bLat(?:itude)?[:=]?\s*([-+]?\d+(?:\.\d+)?)/i);
|
|
742
|
+
const longitudeMatch = coordinateText?.match(/Lon(?:gitude)?[:=]?\s*([-+]?\d+(?:\.\d+)?)/i)
|
|
743
|
+
?? rawText.match(/\bLon(?:gitude)?[:=]?\s*([-+]?\d+(?:\.\d+)?)/i);
|
|
744
|
+
const coordinateSystem = firstMatchingText(rawText, [
|
|
745
|
+
/\bUTM\s+Zone\s+\d{1,2}[NS]?\b/i,
|
|
746
|
+
/\bWGS ?84\b/i,
|
|
747
|
+
/\bBNG\b/i,
|
|
748
|
+
/\bBritish National Grid\b/i,
|
|
749
|
+
/\bMGA\s+Zone\s+\d{1,2}\b/i,
|
|
750
|
+
]);
|
|
751
|
+
const value = {
|
|
752
|
+
boreholeId: (boreholeMatch?.[0] ?? boreholeId)?.replace(/\s+/g, '') ?? boreholeId ?? 'BH-unknown',
|
|
753
|
+
};
|
|
754
|
+
if (totalDepthMatch?.[1])
|
|
755
|
+
value.totalDepth = Number(totalDepthMatch[1]);
|
|
756
|
+
if (elevationMatch?.[1])
|
|
757
|
+
value.groundElevation = Number(elevationMatch[1]);
|
|
758
|
+
if (pageInfo)
|
|
759
|
+
value.pageInfo = pageInfo;
|
|
760
|
+
if (coordinateText || eastingMatch?.[1] || northingMatch?.[1] || latitudeMatch?.[1] || longitudeMatch?.[1]) {
|
|
761
|
+
value.coordinates = {
|
|
762
|
+
easting: eastingMatch?.[1] ? Number(eastingMatch[1]) : null,
|
|
763
|
+
northing: northingMatch?.[1] ? Number(northingMatch[1]) : null,
|
|
764
|
+
latitude: latitudeMatch?.[1] ? Number(latitudeMatch[1]) : null,
|
|
765
|
+
longitude: longitudeMatch?.[1] ? Number(longitudeMatch[1]) : null,
|
|
766
|
+
coordinateSystem,
|
|
767
|
+
rawText: coordinateText,
|
|
768
|
+
};
|
|
769
|
+
}
|
|
770
|
+
return {
|
|
771
|
+
value,
|
|
772
|
+
baseStatus: Object.keys(value).length > 1 ? 'partial' : 'failed',
|
|
773
|
+
warnings: Object.keys(value).length > 1
|
|
774
|
+
? ['Metadata pass returned narrative text; extracted partial borehole metadata fields.']
|
|
775
|
+
: [],
|
|
776
|
+
};
|
|
777
|
+
}
|
|
778
|
+
async function textWithRetry(prompt, config, systemPrompt, maxTokens) {
|
|
779
|
+
const start = Date.now();
|
|
780
|
+
const primaryMaxTokens = getHostedBetaVisionMaxTokens(config, 'structured-text', maxTokens);
|
|
781
|
+
const fallbackMaxTokens = getHostedBetaVisionMaxTokens(config, 'fallback-text', maxTokens + 250);
|
|
782
|
+
try {
|
|
783
|
+
const first = await generateText(prompt, config, {
|
|
784
|
+
systemPrompt,
|
|
785
|
+
temperature: 0.1,
|
|
786
|
+
jsonMode: true,
|
|
787
|
+
maxTokens: primaryMaxTokens,
|
|
788
|
+
});
|
|
789
|
+
if (first.text && first.text.trim().length > 10) {
|
|
790
|
+
return { text: first.text, latencyMs: first.latencyMs, usedFallback: false };
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
catch (error) {
|
|
794
|
+
if (!isRecoverableVisionEmptyResponse(error)) {
|
|
795
|
+
throw error;
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
try {
|
|
799
|
+
const second = await generateText(prompt, config, {
|
|
800
|
+
systemPrompt: `${systemPrompt} Return best-effort structured information even if some fields are uncertain.`,
|
|
801
|
+
temperature: 0.25,
|
|
802
|
+
jsonMode: false,
|
|
803
|
+
maxTokens: fallbackMaxTokens,
|
|
804
|
+
});
|
|
805
|
+
if (second.text && second.text.trim().length > 0) {
|
|
806
|
+
return {
|
|
807
|
+
text: second.text,
|
|
808
|
+
latencyMs: Date.now() - start,
|
|
809
|
+
usedFallback: true,
|
|
810
|
+
};
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
catch (error) {
|
|
814
|
+
if (!isRecoverableVisionEmptyResponse(error)) {
|
|
815
|
+
throw error;
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
return {
|
|
819
|
+
text: '',
|
|
820
|
+
latencyMs: Date.now() - start,
|
|
821
|
+
usedFallback: true,
|
|
822
|
+
};
|
|
823
|
+
}
|
|
824
|
+
export async function transcribeDocumentImageText(imageBase64, mimeType, config) {
|
|
825
|
+
const response = await visionWithRetry(imageBase64, mimeType, config, `Transcribe the visible text from this document page. Respond with plain text only.
|
|
826
|
+
- Preserve important line breaks when they help readability.
|
|
827
|
+
- Include borehole IDs, depths, coordinate text, dates, table values, and page markers if visible.
|
|
828
|
+
- Do not explain or summarize.
|
|
829
|
+
- If only fragments are legible, return the fragments you can read.`, `OCR this document image and return only the visible text. Preserve useful line breaks. Do not explain, analyze, summarize, infer, or guess missing text. If only fragments are legible, return only those fragments.`, 'You are performing OCR-style transcription on a geotechnical document image. Return plain text only, with no markdown fences or commentary.', 700, {
|
|
830
|
+
fallbackSystemPrompt: 'You are performing OCR-style transcription on a geotechnical document image. Return only the visible text. Do not explain, analyze, summarize, infer, or guess missing text. No markdown, bullets, or commentary.',
|
|
831
|
+
fallbackTemperature: 0.15,
|
|
832
|
+
fallbackMaxTokens: 700,
|
|
833
|
+
});
|
|
834
|
+
return {
|
|
835
|
+
text: response.text.trim(),
|
|
836
|
+
latencyMs: response.latencyMs,
|
|
837
|
+
usedFallback: response.usedFallback,
|
|
838
|
+
warnings: response.usedFallback
|
|
839
|
+
? ['OCR-style transcription required a fallback retry before returning text.']
|
|
840
|
+
: [],
|
|
841
|
+
};
|
|
842
|
+
}
|
|
843
|
+
async function extractBoreholeMetadata(imageBase64, mimeType, config, context) {
|
|
844
|
+
const strictPrompt = `Extract only the borehole log metadata visible on this page. Respond with ONLY a JSON object:
|
|
644
845
|
{
|
|
645
846
|
"boreholeId": "<ID if visible, or 'BH-unknown'>",
|
|
646
|
-
"
|
|
847
|
+
"projectName": "<project name or null>",
|
|
848
|
+
"dateDrilled": "<date or date range or null>",
|
|
849
|
+
"drillingMethod": "<method or null>",
|
|
850
|
+
"groundElevation": <number in meters or null>,
|
|
851
|
+
"totalDepth": <number in meters or null>,
|
|
852
|
+
"pageInfo": "<page 1 of 3 or null>",
|
|
853
|
+
"layoutNotes": "<brief description of layout and depth scale placement or null>",
|
|
854
|
+
"coordinates": {
|
|
855
|
+
"easting": <number or null>,
|
|
856
|
+
"northing": <number or null>,
|
|
857
|
+
"latitude": <number or null>,
|
|
858
|
+
"longitude": <number or null>,
|
|
859
|
+
"coordinateSystem": "<declared CRS or null>",
|
|
860
|
+
"rawText": "<exact coordinate text or null>"
|
|
861
|
+
},
|
|
862
|
+
"confidence": <number 0-100>,
|
|
863
|
+
"warnings": ["<warning>", "<warning>"]
|
|
864
|
+
}`;
|
|
865
|
+
const softPrompt = `Inspect only the borehole log header, title block, margins, and information boxes.
|
|
866
|
+
Extract:
|
|
867
|
+
1. Borehole ID
|
|
868
|
+
2. Project name
|
|
869
|
+
3. Date drilled
|
|
870
|
+
4. Drilling method
|
|
871
|
+
5. Ground elevation
|
|
872
|
+
6. Total depth if printed
|
|
873
|
+
7. Page information
|
|
874
|
+
8. Layout notes
|
|
875
|
+
9. Any coordinates exactly as printed, plus parsed numeric fields when visible
|
|
876
|
+
Do not extract layers or soil descriptions in this pass.`;
|
|
877
|
+
const response = await visionWithRetry(imageBase64, mimeType, config, strictPrompt, softPrompt, 'You are an expert geotechnical engineer extracting borehole log metadata. Focus on the header area, coordinate text, and document layout. Respond with JSON only.', 900);
|
|
878
|
+
const parsed = parseJsonObject(response.text);
|
|
879
|
+
const narrativeFallback = extractBoreholeMetadataFallback(response.text, context.boreholeId);
|
|
880
|
+
const mergedValue = {
|
|
881
|
+
...(narrativeFallback.value ?? {}),
|
|
882
|
+
...(parsed.value ?? {}),
|
|
883
|
+
};
|
|
884
|
+
const baseStatus = parsed.baseStatus !== 'failed' ? parsed.baseStatus : narrativeFallback.baseStatus;
|
|
885
|
+
const warnings = [...parsed.warnings, ...narrativeFallback.warnings];
|
|
886
|
+
const resolvedBoreholeId = readOptionalString(mergedValue, 'boreholeId') ?? context.boreholeId ?? 'BH-unknown';
|
|
887
|
+
const projectName = readOptionalString(mergedValue, 'projectName');
|
|
888
|
+
const dateDrilled = readOptionalString(mergedValue, 'dateDrilled');
|
|
889
|
+
const drillingMethod = readOptionalString(mergedValue, 'drillingMethod');
|
|
890
|
+
const groundElevation = readOptionalNumber(mergedValue, 'groundElevation');
|
|
891
|
+
const totalDepth = readOptionalNumber(mergedValue, 'totalDepth');
|
|
892
|
+
const pageInfo = readOptionalString(mergedValue, 'pageInfo');
|
|
893
|
+
const layoutNotes = readOptionalString(mergedValue, 'layoutNotes');
|
|
894
|
+
const location = buildVisionBoreholeLocation(mergedValue, groundElevation, resolvedBoreholeId);
|
|
895
|
+
const confidence = clampConfidence(mergedValue.confidence, baseStatus === 'parsed' ? 78 : baseStatus === 'partial' ? 60 : 0);
|
|
896
|
+
const status = deriveParseStatus(baseStatus, [resolvedBoreholeId !== 'BH-unknown' ? resolvedBoreholeId : null, totalDepth, location, pageInfo]
|
|
897
|
+
.filter((value) => value !== null).length, 2);
|
|
898
|
+
const safety = createParseSafety(status, confidence, combineWarnings(warnings, normalizeWarnings(mergedValue.warnings)));
|
|
899
|
+
return {
|
|
900
|
+
...safety,
|
|
901
|
+
boreholeId: resolvedBoreholeId,
|
|
902
|
+
totalDepth,
|
|
903
|
+
projectName,
|
|
904
|
+
dateDrilled,
|
|
905
|
+
drillingMethod,
|
|
906
|
+
groundElevation,
|
|
907
|
+
pageInfo,
|
|
908
|
+
layoutNotes,
|
|
909
|
+
location,
|
|
910
|
+
rawLLMText: response.text,
|
|
911
|
+
latencyMs: response.latencyMs,
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
function shouldUseTextOnlyBoreholeInterpretation(mimeType, config, context) {
|
|
915
|
+
return (mimeType === 'application/pdf'
|
|
916
|
+
&& typeof context.pageTextHint === 'string'
|
|
917
|
+
&& context.pageTextHint.trim().length >= 24
|
|
918
|
+
&& !providerSupportsNativePdfDocuments(config));
|
|
919
|
+
}
|
|
920
|
+
async function extractBoreholeMetadataFromText(pageText, config, context) {
|
|
921
|
+
const normalizedPageText = pageText.replace(/\s+/g, ' ').trim();
|
|
922
|
+
if (!normalizedPageText) {
|
|
923
|
+
return {
|
|
924
|
+
...createParseSafety('failed', 0, ['No usable borehole page text was available for metadata extraction.']),
|
|
925
|
+
boreholeId: context.boreholeId ?? 'BH-unknown',
|
|
926
|
+
projectName: null,
|
|
927
|
+
dateDrilled: null,
|
|
928
|
+
drillingMethod: null,
|
|
929
|
+
groundElevation: null,
|
|
930
|
+
totalDepth: null,
|
|
931
|
+
pageInfo: null,
|
|
932
|
+
layoutNotes: null,
|
|
933
|
+
location: null,
|
|
934
|
+
rawLLMText: '',
|
|
935
|
+
latencyMs: 0,
|
|
936
|
+
};
|
|
937
|
+
}
|
|
938
|
+
const strictPrompt = `Extract only the borehole log metadata from this extracted page text. Respond with ONLY a JSON object:
|
|
939
|
+
{
|
|
940
|
+
"boreholeId": "<ID if visible, or 'BH-unknown'>",
|
|
941
|
+
"projectName": "<project name or null>",
|
|
942
|
+
"dateDrilled": "<date or date range or null>",
|
|
943
|
+
"drillingMethod": "<method or null>",
|
|
944
|
+
"groundElevation": <number in meters or null>,
|
|
945
|
+
"totalDepth": <number in meters or null>,
|
|
946
|
+
"pageInfo": "<page 1 of 3 or null>",
|
|
947
|
+
"layoutNotes": "<brief description of layout and depth scale placement or null>",
|
|
948
|
+
"coordinates": {
|
|
949
|
+
"easting": <number or null>,
|
|
950
|
+
"northing": <number or null>,
|
|
951
|
+
"latitude": <number or null>,
|
|
952
|
+
"longitude": <number or null>,
|
|
953
|
+
"coordinateSystem": "<declared CRS or null>",
|
|
954
|
+
"rawText": "<exact coordinate text or null>"
|
|
955
|
+
},
|
|
956
|
+
"confidence": <number 0-100>,
|
|
957
|
+
"warnings": ["<warning>", "<warning>"]
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
Extract only header/title-block/style metadata from this page text. Do not invent layers.
|
|
961
|
+
|
|
962
|
+
Page text:
|
|
963
|
+
${normalizedPageText.slice(0, 6000)}`;
|
|
964
|
+
const response = await textWithRetry(strictPrompt, config, 'You are an expert geotechnical engineer extracting borehole log metadata from OCR/native page text. Respond with JSON only when possible.', 900);
|
|
965
|
+
const parsed = parseJsonObject(response.text);
|
|
966
|
+
const narrativeFallback = extractBoreholeMetadataFallback(normalizedPageText, context.boreholeId);
|
|
967
|
+
const mergedValue = {
|
|
968
|
+
...(narrativeFallback.value ?? {}),
|
|
969
|
+
...(parsed.value ?? {}),
|
|
970
|
+
};
|
|
971
|
+
const baseStatus = parsed.baseStatus !== 'failed' ? parsed.baseStatus : narrativeFallback.baseStatus;
|
|
972
|
+
const warnings = [...parsed.warnings, ...narrativeFallback.warnings];
|
|
973
|
+
if (response.usedFallback) {
|
|
974
|
+
warnings.push('Metadata extraction required a text fallback retry before structured parsing succeeded.');
|
|
975
|
+
}
|
|
976
|
+
const resolvedBoreholeId = readOptionalString(mergedValue, 'boreholeId') ?? context.boreholeId ?? 'BH-unknown';
|
|
977
|
+
const projectName = readOptionalString(mergedValue, 'projectName');
|
|
978
|
+
const dateDrilled = readOptionalString(mergedValue, 'dateDrilled');
|
|
979
|
+
const drillingMethod = readOptionalString(mergedValue, 'drillingMethod');
|
|
980
|
+
const groundElevation = readOptionalNumber(mergedValue, 'groundElevation');
|
|
981
|
+
const totalDepth = readOptionalNumber(mergedValue, 'totalDepth');
|
|
982
|
+
const pageInfo = readOptionalString(mergedValue, 'pageInfo');
|
|
983
|
+
const layoutNotes = readOptionalString(mergedValue, 'layoutNotes');
|
|
984
|
+
const location = buildVisionBoreholeLocation(mergedValue, groundElevation, resolvedBoreholeId);
|
|
985
|
+
const confidence = clampConfidence(mergedValue.confidence, baseStatus === 'parsed' ? 74 : baseStatus === 'partial' ? 58 : 0);
|
|
986
|
+
const status = deriveParseStatus(baseStatus, [resolvedBoreholeId !== 'BH-unknown' ? resolvedBoreholeId : null, totalDepth, location, pageInfo]
|
|
987
|
+
.filter((value) => value !== null).length, 2);
|
|
988
|
+
const safety = createParseSafety(status, confidence, combineWarnings(warnings, normalizeWarnings(mergedValue.warnings)));
|
|
989
|
+
return {
|
|
990
|
+
...safety,
|
|
991
|
+
boreholeId: resolvedBoreholeId,
|
|
992
|
+
totalDepth,
|
|
993
|
+
projectName,
|
|
994
|
+
dateDrilled,
|
|
995
|
+
drillingMethod,
|
|
996
|
+
groundElevation,
|
|
997
|
+
pageInfo,
|
|
998
|
+
layoutNotes,
|
|
999
|
+
location,
|
|
1000
|
+
rawLLMText: response.text,
|
|
1001
|
+
latencyMs: response.latencyMs,
|
|
1002
|
+
};
|
|
1003
|
+
}
|
|
1004
|
+
function buildBoreholeLayerPrompts(context, metadata) {
|
|
1005
|
+
const locationCode = metadata.location?.crs?.code ?? metadata.location?.crs?.name ?? null;
|
|
1006
|
+
const locationRawText = typeof metadata.location?.raw?.rawCoordinateText === 'string'
|
|
1007
|
+
? metadata.location.raw.rawCoordinateText
|
|
1008
|
+
: null;
|
|
1009
|
+
const contextParts = [
|
|
1010
|
+
`Borehole ID: ${metadata.boreholeId}`,
|
|
1011
|
+
locationCode ? `Coordinate system: ${locationCode}` : null,
|
|
1012
|
+
locationRawText ? `Coordinate text: ${locationRawText}` : null,
|
|
1013
|
+
context.pageNumber != null && context.totalPages != null
|
|
1014
|
+
? `Page ${context.pageNumber} of ${context.totalPages}`
|
|
1015
|
+
: null,
|
|
1016
|
+
context.pageClassification ? `Page classification: ${context.pageClassification}` : null,
|
|
1017
|
+
context.priorContinuationDepth != null
|
|
1018
|
+
? `Previous pages continued to ${context.priorContinuationDepth.toFixed(2)} m depth. Continue from there unless the page clearly restarts at a new borehole.`
|
|
1019
|
+
: null,
|
|
1020
|
+
context.pageTextHint ? `Native text hint: ${context.pageTextHint.slice(0, 400)}` : null,
|
|
1021
|
+
metadata.rawLLMText ? `Metadata/layout notes: ${metadata.rawLLMText.slice(0, 300)}` : null,
|
|
1022
|
+
].filter((value) => Boolean(value));
|
|
1023
|
+
const sharedContext = contextParts.join('\n');
|
|
1024
|
+
return {
|
|
1025
|
+
strictPrompt: `Extract structured borehole log layer data from this page. Respond with ONLY a JSON object:
|
|
1026
|
+
{
|
|
1027
|
+
"boreholeId": "<ID if visible, or ${JSON.stringify(metadata.boreholeId)}>",
|
|
1028
|
+
"totalDepth": <number in meters or null>,
|
|
647
1029
|
"waterTableDepth": <number in meters or null>,
|
|
1030
|
+
"continuationDepth": <deepest visible depth on this page or null>,
|
|
648
1031
|
"layers": [
|
|
649
1032
|
{
|
|
650
1033
|
"depthFrom": <m>,
|
|
@@ -656,26 +1039,67 @@ export async function interpretBoreholeLog(imageBase64, mimeType, config, boreho
|
|
|
656
1039
|
"notes": "<any additional notes>"
|
|
657
1040
|
}
|
|
658
1041
|
],
|
|
659
|
-
"summary": "<brief engineering summary of
|
|
1042
|
+
"summary": "<brief engineering summary of this page and its strata>",
|
|
660
1043
|
"confidence": <number 0-100>,
|
|
661
1044
|
"warnings": ["<warning>", "<warning>"]
|
|
662
|
-
}
|
|
663
|
-
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
Context:
|
|
1048
|
+
${sharedContext}`,
|
|
1049
|
+
softPrompt: `Read this borehole log page carefully and extract:
|
|
664
1050
|
1. Borehole ID if visible
|
|
665
|
-
2. Total depth
|
|
1051
|
+
2. Total depth on the page
|
|
666
1052
|
3. Water table depth if shown
|
|
667
|
-
4.
|
|
668
|
-
5.
|
|
669
|
-
|
|
670
|
-
|
|
1053
|
+
4. The deepest continuation depth visible on the page
|
|
1054
|
+
5. Each stratigraphic layer with depthFrom, depthTo, description, USCS symbol, SPT N, water content, and notes
|
|
1055
|
+
6. A brief engineering summary
|
|
1056
|
+
|
|
1057
|
+
Context:
|
|
1058
|
+
${sharedContext}
|
|
1059
|
+
|
|
1060
|
+
Adapt to the visible layout. Continue from previous pages when the depth scale clearly carries on.`,
|
|
1061
|
+
systemPrompt: 'You are an expert geotechnical engineer extracting layer data from borehole log documents. Adapt to varying log layouts, preserve depth continuity, and respond with JSON only.',
|
|
1062
|
+
};
|
|
1063
|
+
}
|
|
1064
|
+
async function interpretBoreholeLogTextWithContext(pageText, config, context = {}) {
|
|
1065
|
+
const normalizedPageText = pageText.replace(/\s+/g, ' ').trim();
|
|
1066
|
+
if (!normalizedPageText) {
|
|
1067
|
+
return {
|
|
1068
|
+
...createParseSafety('failed', 0, ['No usable borehole page text was available for text-based interpretation.']),
|
|
1069
|
+
boreholeId: context.boreholeId ?? 'BH-unknown',
|
|
1070
|
+
totalDepth: null,
|
|
1071
|
+
waterTableDepth: null,
|
|
1072
|
+
layers: [],
|
|
1073
|
+
summary: null,
|
|
1074
|
+
location: null,
|
|
1075
|
+
groundElevation: null,
|
|
1076
|
+
dateDrilled: null,
|
|
1077
|
+
drillingMethod: null,
|
|
1078
|
+
projectName: null,
|
|
1079
|
+
continuationDepth: null,
|
|
1080
|
+
pageNumber: context.pageNumber ?? null,
|
|
1081
|
+
totalPages: context.totalPages ?? null,
|
|
1082
|
+
rawLLMText: '',
|
|
1083
|
+
latencyMs: 0,
|
|
1084
|
+
};
|
|
1085
|
+
}
|
|
1086
|
+
const metadata = await extractBoreholeMetadataFromText(normalizedPageText, config, context);
|
|
1087
|
+
const prompts = buildBoreholeLayerPrompts({
|
|
1088
|
+
...context,
|
|
1089
|
+
pageTextHint: normalizedPageText,
|
|
1090
|
+
}, metadata);
|
|
1091
|
+
const response = await textWithRetry(`${prompts.strictPrompt}\n\nBorehole page text:\n${normalizedPageText.slice(0, 7000)}`, config, `${prompts.systemPrompt} Work from OCR/native extracted page text rather than pixels when needed.`, 1500);
|
|
671
1092
|
const parsed = parseJsonObject(response.text);
|
|
672
|
-
const narrativeFallback = extractBoreholeFallback(
|
|
1093
|
+
const narrativeFallback = extractBoreholeFallback(normalizedPageText, metadata.boreholeId);
|
|
673
1094
|
const mergedValue = {
|
|
674
1095
|
...(narrativeFallback.value ?? {}),
|
|
675
1096
|
...(parsed.value ?? {}),
|
|
676
1097
|
};
|
|
677
1098
|
const baseStatus = parsed.baseStatus !== 'failed' ? parsed.baseStatus : narrativeFallback.baseStatus;
|
|
678
|
-
const warnings = [...parsed.warnings, ...narrativeFallback.warnings];
|
|
1099
|
+
const warnings = [...metadata.warnings, ...parsed.warnings, ...narrativeFallback.warnings];
|
|
1100
|
+
if (response.usedFallback) {
|
|
1101
|
+
warnings.push('Layer extraction required a text fallback retry before structured parsing succeeded.');
|
|
1102
|
+
}
|
|
679
1103
|
const parsedLayers = Array.isArray(mergedValue.layers)
|
|
680
1104
|
? mergedValue.layers
|
|
681
1105
|
: [];
|
|
@@ -696,12 +1120,104 @@ Provide approximate values where necessary, but keep the structure complete.`;
|
|
|
696
1120
|
warnings.push(...layerWarnings.map((warning) => `Layer warning: ${warning}`));
|
|
697
1121
|
return item;
|
|
698
1122
|
});
|
|
699
|
-
const totalDepth =
|
|
1123
|
+
const totalDepth = readOptionalNumber(mergedValue, 'totalDepth') ?? metadata.totalDepth;
|
|
700
1124
|
const waterTableDepth = mergedValue.waterTableDepth == null
|
|
701
1125
|
? null
|
|
702
1126
|
: readNumber(mergedValue, 'waterTableDepth', warnings);
|
|
703
|
-
const summary =
|
|
704
|
-
const
|
|
1127
|
+
const summary = readOptionalString(mergedValue, 'summary');
|
|
1128
|
+
const continuationDepth = readOptionalNumber(mergedValue, 'continuationDepth')
|
|
1129
|
+
?? layers.reduce((maxDepth, layer) => {
|
|
1130
|
+
if (layer.depthTo == null)
|
|
1131
|
+
return maxDepth;
|
|
1132
|
+
return maxDepth == null ? layer.depthTo : Math.max(maxDepth, layer.depthTo);
|
|
1133
|
+
}, null)
|
|
1134
|
+
?? totalDepth;
|
|
1135
|
+
const resolvedBoreholeId = readOptionalString(mergedValue, 'boreholeId') ?? metadata.boreholeId ?? context.boreholeId ?? 'BH-unknown';
|
|
1136
|
+
const confidence = clampConfidence(mergedValue.confidence, baseStatus === 'parsed' ? 72 : baseStatus === 'partial' ? 55 : 0);
|
|
1137
|
+
const status = deriveParseStatus(baseStatus, [totalDepth, summary, layers.length > 0 ? 'layers' : null].filter((value) => value !== null)
|
|
1138
|
+
.length, 3);
|
|
1139
|
+
const safety = createParseSafety(status, confidence, combineWarnings(warnings, normalizeWarnings(mergedValue.warnings)));
|
|
1140
|
+
return {
|
|
1141
|
+
...safety,
|
|
1142
|
+
boreholeId: resolvedBoreholeId,
|
|
1143
|
+
totalDepth,
|
|
1144
|
+
waterTableDepth,
|
|
1145
|
+
layers,
|
|
1146
|
+
summary,
|
|
1147
|
+
location: metadata.location,
|
|
1148
|
+
groundElevation: metadata.groundElevation,
|
|
1149
|
+
dateDrilled: metadata.dateDrilled,
|
|
1150
|
+
drillingMethod: metadata.drillingMethod,
|
|
1151
|
+
projectName: metadata.projectName,
|
|
1152
|
+
continuationDepth,
|
|
1153
|
+
pageNumber: context.pageNumber ?? null,
|
|
1154
|
+
totalPages: context.totalPages ?? null,
|
|
1155
|
+
rawLLMText: [metadata.rawLLMText, response.text].filter(Boolean).join('\n\n'),
|
|
1156
|
+
latencyMs: metadata.latencyMs + response.latencyMs,
|
|
1157
|
+
};
|
|
1158
|
+
}
|
|
1159
|
+
function scoreBoreholeLocation(location) {
|
|
1160
|
+
if (!location)
|
|
1161
|
+
return 0;
|
|
1162
|
+
let score = 0;
|
|
1163
|
+
if (location.wgs84)
|
|
1164
|
+
score += 100;
|
|
1165
|
+
if (location.projected)
|
|
1166
|
+
score += 70;
|
|
1167
|
+
if (location.crs?.confidence != null)
|
|
1168
|
+
score += Math.round(location.crs.confidence * 20);
|
|
1169
|
+
if (location.raw && Object.keys(location.raw).length > 0)
|
|
1170
|
+
score += 5;
|
|
1171
|
+
return score;
|
|
1172
|
+
}
|
|
1173
|
+
export async function interpretBoreholeLogWithContext(imageBase64, mimeType, config, context = {}) {
|
|
1174
|
+
if (shouldUseTextOnlyBoreholeInterpretation(mimeType, config, context)) {
|
|
1175
|
+
return interpretBoreholeLogTextWithContext(context.pageTextHint ?? '', config, context);
|
|
1176
|
+
}
|
|
1177
|
+
const metadata = await extractBoreholeMetadata(imageBase64, mimeType, config, context);
|
|
1178
|
+
const prompts = buildBoreholeLayerPrompts(context, metadata);
|
|
1179
|
+
const response = await visionWithRetry(imageBase64, mimeType, config, prompts.strictPrompt, prompts.softPrompt, prompts.systemPrompt, 1500);
|
|
1180
|
+
const parsed = parseJsonObject(response.text);
|
|
1181
|
+
const narrativeFallback = extractBoreholeFallback(response.text, metadata.boreholeId);
|
|
1182
|
+
const mergedValue = {
|
|
1183
|
+
...(narrativeFallback.value ?? {}),
|
|
1184
|
+
...(parsed.value ?? {}),
|
|
1185
|
+
};
|
|
1186
|
+
const baseStatus = parsed.baseStatus !== 'failed' ? parsed.baseStatus : narrativeFallback.baseStatus;
|
|
1187
|
+
const warnings = [...metadata.warnings, ...parsed.warnings, ...narrativeFallback.warnings];
|
|
1188
|
+
const parsedLayers = Array.isArray(mergedValue.layers)
|
|
1189
|
+
? mergedValue.layers
|
|
1190
|
+
: [];
|
|
1191
|
+
if (!Array.isArray(mergedValue.layers)) {
|
|
1192
|
+
warnings.push('Missing or invalid "layers" array.');
|
|
1193
|
+
}
|
|
1194
|
+
const layers = parsedLayers.map((layer) => {
|
|
1195
|
+
const layerWarnings = [];
|
|
1196
|
+
const item = {
|
|
1197
|
+
depthFrom: readNumber(layer, 'depthFrom', layerWarnings),
|
|
1198
|
+
depthTo: readNumber(layer, 'depthTo', layerWarnings),
|
|
1199
|
+
description: readString(layer, 'description', layerWarnings),
|
|
1200
|
+
uscsSymbol: readString(layer, 'uscsSymbol', []),
|
|
1201
|
+
sptN: layer.sptN == null ? null : readNumber(layer, 'sptN', layerWarnings),
|
|
1202
|
+
waterContent: layer.waterContent == null ? null : readNumber(layer, 'waterContent', layerWarnings),
|
|
1203
|
+
notes: readString(layer, 'notes', []),
|
|
1204
|
+
};
|
|
1205
|
+
warnings.push(...layerWarnings.map((warning) => `Layer warning: ${warning}`));
|
|
1206
|
+
return item;
|
|
1207
|
+
});
|
|
1208
|
+
const totalDepth = readOptionalNumber(mergedValue, 'totalDepth') ?? metadata.totalDepth;
|
|
1209
|
+
const waterTableDepth = mergedValue.waterTableDepth == null
|
|
1210
|
+
? null
|
|
1211
|
+
: readNumber(mergedValue, 'waterTableDepth', warnings);
|
|
1212
|
+
const summary = readOptionalString(mergedValue, 'summary');
|
|
1213
|
+
const continuationDepth = readOptionalNumber(mergedValue, 'continuationDepth')
|
|
1214
|
+
?? layers.reduce((maxDepth, layer) => {
|
|
1215
|
+
if (layer.depthTo == null)
|
|
1216
|
+
return maxDepth;
|
|
1217
|
+
return maxDepth == null ? layer.depthTo : Math.max(maxDepth, layer.depthTo);
|
|
1218
|
+
}, null)
|
|
1219
|
+
?? totalDepth;
|
|
1220
|
+
const resolvedBoreholeId = readOptionalString(mergedValue, 'boreholeId') ?? metadata.boreholeId ?? context.boreholeId ?? 'BH-unknown';
|
|
705
1221
|
const confidence = clampConfidence(mergedValue.confidence, baseStatus === 'parsed' ? 75 : baseStatus === 'partial' ? 58 : 0);
|
|
706
1222
|
const status = deriveParseStatus(baseStatus, [totalDepth, summary, layers.length > 0 ? 'layers' : null].filter((value) => value !== null)
|
|
707
1223
|
.length, 3);
|
|
@@ -713,12 +1229,128 @@ Provide approximate values where necessary, but keep the structure complete.`;
|
|
|
713
1229
|
waterTableDepth,
|
|
714
1230
|
layers,
|
|
715
1231
|
summary,
|
|
716
|
-
|
|
717
|
-
|
|
1232
|
+
location: metadata.location,
|
|
1233
|
+
groundElevation: metadata.groundElevation,
|
|
1234
|
+
dateDrilled: metadata.dateDrilled,
|
|
1235
|
+
drillingMethod: metadata.drillingMethod,
|
|
1236
|
+
projectName: metadata.projectName,
|
|
1237
|
+
continuationDepth,
|
|
1238
|
+
pageNumber: context.pageNumber ?? null,
|
|
1239
|
+
totalPages: context.totalPages ?? null,
|
|
1240
|
+
rawLLMText: [metadata.rawLLMText, response.text].filter(Boolean).join('\n\n'),
|
|
1241
|
+
latencyMs: metadata.latencyMs + response.latencyMs,
|
|
1242
|
+
};
|
|
1243
|
+
}
|
|
1244
|
+
export async function interpretBoreholeLog(imageBase64, mimeType, config, boreholeId) {
|
|
1245
|
+
return interpretBoreholeLogWithContext(imageBase64, mimeType, config, { boreholeId });
|
|
1246
|
+
}
|
|
1247
|
+
export function mergeBoreholeLogPages(pages, overrideBoreholeId) {
|
|
1248
|
+
const validPages = pages.filter(({ result }) => (result.layers.length > 0
|
|
1249
|
+
|| result.totalDepth != null
|
|
1250
|
+
|| result.summary
|
|
1251
|
+
|| result.location));
|
|
1252
|
+
const sourcePages = validPages.length > 0 ? validPages : pages;
|
|
1253
|
+
const distinctBoreholeIds = [
|
|
1254
|
+
...new Set(sourcePages
|
|
1255
|
+
.map(({ result }) => result.boreholeId)
|
|
1256
|
+
.filter((value) => value && value !== 'BH-unknown')),
|
|
1257
|
+
];
|
|
1258
|
+
const deduped = new Map();
|
|
1259
|
+
for (const layer of sourcePages.flatMap(({ result }) => result.layers)) {
|
|
1260
|
+
const key = [
|
|
1261
|
+
layer.depthFrom ?? 'na',
|
|
1262
|
+
layer.depthTo ?? 'na',
|
|
1263
|
+
(layer.description ?? '').trim().toLowerCase(),
|
|
1264
|
+
(layer.uscsSymbol ?? '').trim().toUpperCase(),
|
|
1265
|
+
layer.sptN ?? 'na',
|
|
1266
|
+
].join('|');
|
|
1267
|
+
if (!deduped.has(key)) {
|
|
1268
|
+
deduped.set(key, layer);
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
const mergedLayers = [...deduped.values()].sort((left, right) => {
|
|
1272
|
+
const leftDepth = left.depthFrom ?? Number.POSITIVE_INFINITY;
|
|
1273
|
+
const rightDepth = right.depthFrom ?? Number.POSITIVE_INFINITY;
|
|
1274
|
+
return leftDepth - rightDepth;
|
|
1275
|
+
});
|
|
1276
|
+
const summaries = [
|
|
1277
|
+
...new Set(sourcePages
|
|
1278
|
+
.map(({ result }) => result.summary?.trim())
|
|
1279
|
+
.filter((value) => Boolean(value))),
|
|
1280
|
+
];
|
|
1281
|
+
const warnings = [
|
|
1282
|
+
...new Set(pages.flatMap(({ pageNumber, result }) => [
|
|
1283
|
+
...result.warnings.map((warning) => `Page ${pageNumber}: ${warning}`),
|
|
1284
|
+
])),
|
|
1285
|
+
];
|
|
1286
|
+
if (distinctBoreholeIds.length > 1 && !overrideBoreholeId) {
|
|
1287
|
+
warnings.push(`Multiple borehole IDs were detected across the supplied pages (${distinctBoreholeIds.join(', ')}). Split the PDF by borehole for a safer interpretation.`);
|
|
1288
|
+
}
|
|
1289
|
+
const confidences = sourcePages.map(({ result }) => result.confidence);
|
|
1290
|
+
const averageConfidence = confidences.length > 0
|
|
1291
|
+
? Math.round(confidences.reduce((sum, value) => sum + value, 0) / confidences.length)
|
|
1292
|
+
: 0;
|
|
1293
|
+
const totalDepth = sourcePages.reduce((maxDepth, { result }) => {
|
|
1294
|
+
if (result.totalDepth == null)
|
|
1295
|
+
return maxDepth;
|
|
1296
|
+
return maxDepth == null ? result.totalDepth : Math.max(maxDepth, result.totalDepth);
|
|
1297
|
+
}, null);
|
|
1298
|
+
const waterTableDepth = sourcePages.reduce((selected, { result }) => {
|
|
1299
|
+
if (result.waterTableDepth == null)
|
|
1300
|
+
return selected;
|
|
1301
|
+
return selected == null ? result.waterTableDepth : Math.min(selected, result.waterTableDepth);
|
|
1302
|
+
}, null);
|
|
1303
|
+
const bestLocation = sourcePages
|
|
1304
|
+
.map(({ result }) => result.location)
|
|
1305
|
+
.filter((value) => value !== null)
|
|
1306
|
+
.sort((left, right) => scoreBoreholeLocation(right) - scoreBoreholeLocation(left))[0] ?? null;
|
|
1307
|
+
const firstMetadataPage = sourcePages
|
|
1308
|
+
.map(({ result }) => result)
|
|
1309
|
+
.find((result) => result.projectName
|
|
1310
|
+
|| result.drillingMethod
|
|
1311
|
+
|| result.dateDrilled
|
|
1312
|
+
|| result.groundElevation != null) ?? sourcePages[0]?.result;
|
|
1313
|
+
const continuationDepth = sourcePages.reduce((maxDepth, { result }) => {
|
|
1314
|
+
if (result.continuationDepth == null)
|
|
1315
|
+
return maxDepth;
|
|
1316
|
+
return maxDepth == null ? result.continuationDepth : Math.max(maxDepth, result.continuationDepth);
|
|
1317
|
+
}, totalDepth);
|
|
1318
|
+
const parseStatus = mergedLayers.length > 0 && totalDepth != null
|
|
1319
|
+
? 'parsed'
|
|
1320
|
+
: mergedLayers.length > 0 || summaries.length > 0 || totalDepth != null
|
|
1321
|
+
? 'partial'
|
|
1322
|
+
: 'failed';
|
|
1323
|
+
return {
|
|
1324
|
+
boreholeId: overrideBoreholeId
|
|
1325
|
+
?? distinctBoreholeIds[0]
|
|
1326
|
+
?? sourcePages.map(({ result }) => result.boreholeId).find((value) => value && value !== 'BH-unknown')
|
|
1327
|
+
?? 'BH-unknown',
|
|
1328
|
+
totalDepth,
|
|
1329
|
+
waterTableDepth,
|
|
1330
|
+
layers: mergedLayers,
|
|
1331
|
+
summary: summaries.length > 0 ? summaries.join(' ') : null,
|
|
1332
|
+
location: bestLocation,
|
|
1333
|
+
groundElevation: firstMetadataPage?.groundElevation
|
|
1334
|
+
?? bestLocation?.groundLevel
|
|
1335
|
+
?? bestLocation?.projected?.elevation
|
|
1336
|
+
?? bestLocation?.wgs84?.elevation
|
|
1337
|
+
?? null,
|
|
1338
|
+
dateDrilled: firstMetadataPage?.dateDrilled ?? null,
|
|
1339
|
+
drillingMethod: firstMetadataPage?.drillingMethod ?? null,
|
|
1340
|
+
projectName: firstMetadataPage?.projectName ?? null,
|
|
1341
|
+
continuationDepth,
|
|
1342
|
+
pageNumber: sourcePages[0]?.result.pageNumber ?? null,
|
|
1343
|
+
totalPages: sourcePages[0]?.result.totalPages ?? null,
|
|
1344
|
+
rawLLMText: pages.map(({ pageNumber, result }) => `[Page ${pageNumber}]\n${result.rawLLMText}`).join('\n\n'),
|
|
1345
|
+
latencyMs: pages.reduce((sum, { result }) => sum + result.latencyMs, 0),
|
|
1346
|
+
parseStatus,
|
|
1347
|
+
confidence: averageConfidence,
|
|
1348
|
+
warnings,
|
|
1349
|
+
canAutoProceed: parseStatus === 'parsed' && averageConfidence >= 70,
|
|
718
1350
|
};
|
|
719
1351
|
}
|
|
720
1352
|
export async function queryGBRDocument(question, documentBase64, mimeType, config) {
|
|
721
|
-
const response = await visionWithRetry(documentBase64, mimeType, config, `Based on this Geotechnical Baseline Report, answer the following question with a concise, technically accurate response and specific page/section references where possible:\n\n${question}`, `Read this Geotechnical Baseline Report and answer the question directly:\n\n${question}\n\nUse specific values, limits, assumptions, and references if they are visible in the document.`, 'You are an expert geotechnical engineer analyzing a Geotechnical Baseline Report (GBR). Provide precise, actionable answers referencing specific data from the document.', 2000);
|
|
1353
|
+
const response = await visionWithRetry(documentBase64, mimeType, config, `Based on this Geotechnical Baseline Report, answer the following question with a concise, technically accurate response and specific page/section references where possible:\n\n${question}`, `Read this Geotechnical Baseline Report and answer the question directly:\n\n${question}\n\nUse specific values, limits, assumptions, and references if they are visible in the document.`, 'You are an expert geotechnical engineer analyzing a Geotechnical Baseline Report (GBR). Provide precise, actionable answers referencing specific data from the document.', getHostedBetaVisionMaxTokens(config, 'document-query', 2000));
|
|
722
1354
|
if (!response.text.trim()) {
|
|
723
1355
|
throw new Error('Hosted beta upstream returned no content. The document could not be interpreted. Try a smaller PNG or JPG export of the relevant pages.');
|
|
724
1356
|
}
|