@geotechcli/core 0.4.21 → 0.4.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/dist/agents/brain.d.ts +1 -5
  2. package/dist/agents/brain.d.ts.map +1 -1
  3. package/dist/agents/brain.js +4 -120
  4. package/dist/agents/brain.js.map +1 -1
  5. package/dist/agents/data-tools.js +759 -0
  6. package/dist/agents/data-tools.js.map +1 -1
  7. package/dist/agents/runtime-bootstrap.d.ts +6 -0
  8. package/dist/agents/runtime-bootstrap.d.ts.map +1 -0
  9. package/dist/agents/runtime-bootstrap.js +8 -0
  10. package/dist/agents/runtime-bootstrap.js.map +1 -0
  11. package/dist/agents/runtime-fallbacks.d.ts +7 -0
  12. package/dist/agents/runtime-fallbacks.d.ts.map +1 -0
  13. package/dist/agents/runtime-fallbacks.js +87 -0
  14. package/dist/agents/runtime-fallbacks.js.map +1 -0
  15. package/dist/agents/swarm.d.ts +1 -4
  16. package/dist/agents/swarm.d.ts.map +1 -1
  17. package/dist/agents/swarm.js +74 -8
  18. package/dist/agents/swarm.js.map +1 -1
  19. package/dist/agents/tool-runtime.d.ts +7 -0
  20. package/dist/agents/tool-runtime.d.ts.map +1 -0
  21. package/dist/agents/tool-runtime.js +9 -0
  22. package/dist/agents/tool-runtime.js.map +1 -0
  23. package/dist/config/index.d.ts +4 -4
  24. package/dist/config/index.js +1 -1
  25. package/dist/config/index.js.map +1 -1
  26. package/dist/geo/coordinates.d.ts +40 -0
  27. package/dist/geo/coordinates.d.ts.map +1 -0
  28. package/dist/geo/coordinates.js +461 -0
  29. package/dist/geo/coordinates.js.map +1 -0
  30. package/dist/geo/index.d.ts +1 -0
  31. package/dist/geo/index.d.ts.map +1 -1
  32. package/dist/geo/index.js +1 -0
  33. package/dist/geo/index.js.map +1 -1
  34. package/dist/index.d.ts +3 -2
  35. package/dist/index.d.ts.map +1 -1
  36. package/dist/index.js +3 -2
  37. package/dist/index.js.map +1 -1
  38. package/dist/ingest/ags.d.ts +3 -0
  39. package/dist/ingest/ags.d.ts.map +1 -1
  40. package/dist/ingest/ags.js +98 -9
  41. package/dist/ingest/ags.js.map +1 -1
  42. package/dist/ingest/cpt.d.ts +4 -0
  43. package/dist/ingest/cpt.d.ts.map +1 -1
  44. package/dist/ingest/cpt.js +87 -25
  45. package/dist/ingest/cpt.js.map +1 -1
  46. package/dist/ingest/document-inputs.d.ts +37 -0
  47. package/dist/ingest/document-inputs.d.ts.map +1 -0
  48. package/dist/ingest/document-inputs.js +197 -0
  49. package/dist/ingest/document-inputs.js.map +1 -0
  50. package/dist/ingest/geotech-document.d.ts +118 -0
  51. package/dist/ingest/geotech-document.d.ts.map +1 -0
  52. package/dist/ingest/geotech-document.js +1006 -0
  53. package/dist/ingest/geotech-document.js.map +1 -0
  54. package/dist/ingest/geotech-extract.d.ts +86 -0
  55. package/dist/ingest/geotech-extract.d.ts.map +1 -0
  56. package/dist/ingest/geotech-extract.js +652 -0
  57. package/dist/ingest/geotech-extract.js.map +1 -0
  58. package/dist/ingest/geotech-schemas.d.ts +248 -0
  59. package/dist/ingest/geotech-schemas.d.ts.map +1 -0
  60. package/dist/ingest/geotech-schemas.js +150 -0
  61. package/dist/ingest/geotech-schemas.js.map +1 -0
  62. package/dist/ingest/index.d.ts +8 -0
  63. package/dist/ingest/index.d.ts.map +1 -1
  64. package/dist/ingest/index.js +8 -0
  65. package/dist/ingest/index.js.map +1 -1
  66. package/dist/ingest/ingest-job-child.d.ts +2 -0
  67. package/dist/ingest/ingest-job-child.d.ts.map +1 -0
  68. package/dist/ingest/ingest-job-child.js +45 -0
  69. package/dist/ingest/ingest-job-child.js.map +1 -0
  70. package/dist/ingest/job-store.d.ts +117 -0
  71. package/dist/ingest/job-store.d.ts.map +1 -0
  72. package/dist/ingest/job-store.js +541 -0
  73. package/dist/ingest/job-store.js.map +1 -0
  74. package/dist/ingest/job-worker.d.ts +24 -0
  75. package/dist/ingest/job-worker.d.ts.map +1 -0
  76. package/dist/ingest/job-worker.js +1129 -0
  77. package/dist/ingest/job-worker.js.map +1 -0
  78. package/dist/ingest/pdf.d.ts +102 -0
  79. package/dist/ingest/pdf.d.ts.map +1 -0
  80. package/dist/ingest/pdf.js +1544 -0
  81. package/dist/ingest/pdf.js.map +1 -0
  82. package/dist/ingest/review-store.d.ts +215 -0
  83. package/dist/ingest/review-store.d.ts.map +1 -0
  84. package/dist/ingest/review-store.js +1995 -0
  85. package/dist/ingest/review-store.js.map +1 -0
  86. package/dist/llm/capabilities.d.ts +8 -0
  87. package/dist/llm/capabilities.d.ts.map +1 -0
  88. package/dist/llm/capabilities.js +73 -0
  89. package/dist/llm/capabilities.js.map +1 -0
  90. package/dist/llm/index.d.ts +3 -2
  91. package/dist/llm/index.d.ts.map +1 -1
  92. package/dist/llm/index.js +2 -1
  93. package/dist/llm/index.js.map +1 -1
  94. package/dist/llm/providers/anthropic.d.ts +6 -0
  95. package/dist/llm/providers/anthropic.d.ts.map +1 -1
  96. package/dist/llm/providers/anthropic.js +10 -1
  97. package/dist/llm/providers/anthropic.js.map +1 -1
  98. package/dist/llm/providers/hosted-beta.d.ts +6 -0
  99. package/dist/llm/providers/hosted-beta.d.ts.map +1 -1
  100. package/dist/llm/providers/hosted-beta.js +40 -10
  101. package/dist/llm/providers/hosted-beta.js.map +1 -1
  102. package/dist/llm/providers/huggingface.d.ts +6 -0
  103. package/dist/llm/providers/huggingface.d.ts.map +1 -1
  104. package/dist/llm/providers/huggingface.js +21 -1
  105. package/dist/llm/providers/huggingface.js.map +1 -1
  106. package/dist/llm/providers/openai-compatible.d.ts +6 -0
  107. package/dist/llm/providers/openai-compatible.d.ts.map +1 -1
  108. package/dist/llm/providers/openai-compatible.js +21 -1
  109. package/dist/llm/providers/openai-compatible.js.map +1 -1
  110. package/dist/llm/providers/zhipu.d.ts +6 -0
  111. package/dist/llm/providers/zhipu.d.ts.map +1 -1
  112. package/dist/llm/providers/zhipu.js +15 -1
  113. package/dist/llm/providers/zhipu.js.map +1 -1
  114. package/dist/llm/router.d.ts +7 -0
  115. package/dist/llm/router.d.ts.map +1 -1
  116. package/dist/llm/router.js +33 -13
  117. package/dist/llm/router.js.map +1 -1
  118. package/dist/llm/types.d.ts +22 -4
  119. package/dist/llm/types.d.ts.map +1 -1
  120. package/dist/llm/types.js.map +1 -1
  121. package/dist/meta/metadata.json +1 -1
  122. package/dist/report/html.d.ts +3 -0
  123. package/dist/report/html.d.ts.map +1 -0
  124. package/dist/report/html.js +626 -0
  125. package/dist/report/html.js.map +1 -0
  126. package/dist/report/index.d.ts +2 -0
  127. package/dist/report/index.d.ts.map +1 -1
  128. package/dist/report/index.js +2 -0
  129. package/dist/report/index.js.map +1 -1
  130. package/dist/report/ingest-dossier.d.ts +81 -0
  131. package/dist/report/ingest-dossier.d.ts.map +1 -0
  132. package/dist/report/ingest-dossier.js +324 -0
  133. package/dist/report/ingest-dossier.js.map +1 -0
  134. package/dist/storage/index.d.ts +5 -0
  135. package/dist/storage/index.d.ts.map +1 -1
  136. package/dist/storage/index.js +12 -6
  137. package/dist/storage/index.js.map +1 -1
  138. package/dist/vision/geotech-document.d.ts +46 -0
  139. package/dist/vision/geotech-document.d.ts.map +1 -0
  140. package/dist/vision/geotech-document.js +576 -0
  141. package/dist/vision/geotech-document.js.map +1 -0
  142. package/dist/vision/index.d.ts +31 -0
  143. package/dist/vision/index.d.ts.map +1 -1
  144. package/dist/vision/index.js +659 -27
  145. package/dist/vision/index.js.map +1 -1
  146. package/dist/vision/ocr.d.ts +29 -0
  147. package/dist/vision/ocr.d.ts.map +1 -0
  148. package/dist/vision/ocr.js +287 -0
  149. package/dist/vision/ocr.js.map +1 -0
  150. package/dist/vision/preprocess.d.ts +26 -0
  151. package/dist/vision/preprocess.d.ts.map +1 -0
  152. package/dist/vision/preprocess.js +194 -0
  153. package/dist/vision/preprocess.js.map +1 -0
  154. package/package.json +5 -1
@@ -0,0 +1,652 @@
1
+ import { interpretBoreholeLog, interpretBoreholeLogWithContext, mergeBoreholeLogPages, transcribeDocumentImageText, } from '../vision/index.js';
2
+ import { recoverDocumentTextHint } from '../vision/ocr.js';
3
+ function uniqueStrings(values) {
4
+ return [...new Set(values.filter((value) => typeof value === 'string' && value.trim().length > 0))];
5
+ }
6
+ function createFindingKey(finding) {
7
+ return [
8
+ finding.severity,
9
+ finding.scope,
10
+ finding.code,
11
+ finding.message,
12
+ finding.pageNumber ?? '',
13
+ finding.boreholeId ?? '',
14
+ ].join('|');
15
+ }
16
+ function uniqueFindings(findings) {
17
+ const seen = new Set();
18
+ const unique = [];
19
+ for (const finding of findings) {
20
+ const key = createFindingKey(finding);
21
+ if (seen.has(key)) {
22
+ continue;
23
+ }
24
+ seen.add(key);
25
+ unique.push(finding);
26
+ }
27
+ return unique;
28
+ }
29
+ function findingRequiresReview(finding) {
30
+ return finding.severity !== 'advisory';
31
+ }
32
+ function summarizeReviewReasons(findings) {
33
+ return uniqueStrings(findings
34
+ .filter(findingRequiresReview)
35
+ .map((finding) => finding.message));
36
+ }
37
+ function normalizeKnownBoreholeId(value) {
38
+ if (typeof value !== 'string') {
39
+ return null;
40
+ }
41
+ const trimmed = value.trim();
42
+ if (!trimmed || trimmed === 'BH-unknown') {
43
+ return null;
44
+ }
45
+ return trimmed;
46
+ }
47
+ function minimumLayerDepth(result) {
48
+ let minDepth = null;
49
+ for (const layer of result.layers) {
50
+ const candidates = [layer.depthFrom, layer.depthTo].filter((value) => value != null && Number.isFinite(value));
51
+ for (const candidate of candidates) {
52
+ minDepth = minDepth == null ? candidate : Math.min(minDepth, candidate);
53
+ }
54
+ }
55
+ return minDepth;
56
+ }
57
+ function createGroupKey(boreholeId, anonymousIndex) {
58
+ if (boreholeId) {
59
+ return {
60
+ key: `borehole:${boreholeId}`,
61
+ nextAnonymousIndex: anonymousIndex,
62
+ };
63
+ }
64
+ return {
65
+ key: `unresolved:${anonymousIndex}`,
66
+ nextAnonymousIndex: anonymousIndex + 1,
67
+ };
68
+ }
69
+ function shouldStartNewAnonymousGroup(result, currentGroup) {
70
+ if (!currentGroup || currentGroup.pages.length === 0) {
71
+ return false;
72
+ }
73
+ const previousPage = currentGroup.pages[currentGroup.pages.length - 1]?.result;
74
+ const previousContinuationDepth = previousPage?.continuationDepth ?? null;
75
+ const currentStartDepth = minimumLayerDepth(result);
76
+ return (previousContinuationDepth != null
77
+ && previousContinuationDepth >= 3
78
+ && currentStartDepth != null
79
+ && currentStartDepth <= 0.5);
80
+ }
81
+ function summarizeInspection(inspection, ocrRecoveredPageCount = 0) {
82
+ if (!inspection) {
83
+ return null;
84
+ }
85
+ const counts = {};
86
+ let imageHeavyPageCount = 0;
87
+ let nativeTextPageCount = 0;
88
+ let degradedPageCount = 0;
89
+ for (const page of inspection.pages) {
90
+ counts[page.classification] = (counts[page.classification] ?? 0) + 1;
91
+ if (page.classification === 'image-only' || page.classification === 'text-unreadable') {
92
+ imageHeavyPageCount += 1;
93
+ }
94
+ if (page.capabilities.nativeTextExtraction !== 'unavailable') {
95
+ nativeTextPageCount += 1;
96
+ }
97
+ if (page.degradation.level !== 'none') {
98
+ degradedPageCount += 1;
99
+ }
100
+ }
101
+ return {
102
+ pageClassificationCounts: counts,
103
+ imageHeavyPageCount,
104
+ nativeTextPageCount,
105
+ degradedPageCount,
106
+ ocrRecoveredPageCount,
107
+ };
108
+ }
109
+ function buildInspectionWarnings(inspection) {
110
+ if (!inspection) {
111
+ return [];
112
+ }
113
+ const warnings = [...inspection.warnings];
114
+ for (const page of inspection.pages) {
115
+ if (page.classification === 'image-only' || page.classification === 'text-unreadable') {
116
+ warnings.push(`PDF page ${page.pageNumber} is ${page.classification}. Native text was not recovered from the PDF parser, so an OCR-style transcription fallback may be needed.`);
117
+ }
118
+ }
119
+ return uniqueStrings(warnings);
120
+ }
121
+ function pageTextHintLooksBoreholeLike(value) {
122
+ if (!value) {
123
+ return false;
124
+ }
125
+ return (/\bborehole\b/i.test(value)
126
+ || /\bBH[-\s_/]?\d+\b/i.test(value)
127
+ || /\bTP[-\s_/]?\d+\b/i.test(value)
128
+ || /\bCPT[-\s_/]?\d+\b/i.test(value)
129
+ || /\bSPT\b/i.test(value)
130
+ || /\b(?:easting|northing|latitude|longitude|groundwater)\b/i.test(value)
131
+ || /\b\d+(?:\.\d+)?\s*-\s*\d+(?:\.\d+)?\s*m\b/i.test(value));
132
+ }
133
+ function hasUsableBoreholeSignal(result, detectedBoreholeId) {
134
+ return (detectedBoreholeId != null
135
+ || result.layers.length > 0
136
+ || result.totalDepth != null
137
+ || result.waterTableDepth != null
138
+ || result.location != null
139
+ || result.groundElevation != null
140
+ || result.dateDrilled != null
141
+ || result.drillingMethod != null);
142
+ }
143
+ function shouldIgnoreNonLogPage(result, detectedBoreholeId, pageTextHint) {
144
+ return (!hasUsableBoreholeSignal(result, detectedBoreholeId)
145
+ && !pageTextHintLooksBoreholeLike(pageTextHint));
146
+ }
147
+ function averageConfidence(results) {
148
+ if (results.length === 0) {
149
+ return 0;
150
+ }
151
+ return Math.round(results.reduce((sum, result) => sum + result.confidence, 0) / results.length);
152
+ }
153
+ function mergeParseStatuses(current, next) {
154
+ if (!next) {
155
+ return current;
156
+ }
157
+ if (current === 'failed' || next === 'failed') {
158
+ return 'failed';
159
+ }
160
+ if (current === 'partial' || next === 'partial') {
161
+ return 'partial';
162
+ }
163
+ return 'parsed';
164
+ }
165
+ function mergeConfidenceCap(current, next) {
166
+ if (current == null) {
167
+ return next;
168
+ }
169
+ if (next == null) {
170
+ return current;
171
+ }
172
+ return Math.min(current, next);
173
+ }
174
+ function maximumLayerDepth(result) {
175
+ let maxDepth = null;
176
+ for (const layer of result.layers) {
177
+ const candidates = [layer.depthFrom, layer.depthTo].filter((value) => value != null && Number.isFinite(value));
178
+ for (const candidate of candidates) {
179
+ maxDepth = maxDepth == null ? candidate : Math.max(maxDepth, candidate);
180
+ }
181
+ }
182
+ return maxDepth;
183
+ }
184
+ function validateMergedBorehole(result) {
185
+ const warnings = [];
186
+ const findings = [];
187
+ let confidenceCap = null;
188
+ let degradedParseStatus = null;
189
+ const addIssue = (reason, options) => {
190
+ findings.push({
191
+ code: options?.code ?? 'validation_issue',
192
+ severity: options?.severity ?? 'review',
193
+ scope: 'borehole',
194
+ message: reason,
195
+ boreholeId: result.boreholeId,
196
+ });
197
+ warnings.push(options?.warning ?? reason);
198
+ confidenceCap = mergeConfidenceCap(confidenceCap, options?.confidenceCap ?? null);
199
+ degradedParseStatus = mergeParseStatuses(degradedParseStatus ?? 'parsed', options?.degradeParseStatus ?? null);
200
+ };
201
+ if (result.totalDepth != null) {
202
+ if (result.totalDepth < 0) {
203
+ addIssue(`Borehole ${result.boreholeId} has a negative total depth (${result.totalDepth} m).`, {
204
+ code: 'negative_total_depth',
205
+ severity: 'blocking',
206
+ confidenceCap: 55,
207
+ degradeParseStatus: 'partial',
208
+ });
209
+ }
210
+ else if (result.totalDepth > 300) {
211
+ addIssue(`Borehole ${result.boreholeId} has an unusually large total depth (${result.totalDepth} m) that should be reviewed.`, {
212
+ code: 'unusually_large_total_depth',
213
+ severity: 'review',
214
+ confidenceCap: 68,
215
+ });
216
+ }
217
+ }
218
+ if (result.waterTableDepth != null) {
219
+ if (result.waterTableDepth < 0) {
220
+ addIssue(`Borehole ${result.boreholeId} has a negative water table depth (${result.waterTableDepth} m).`, {
221
+ code: 'negative_water_table_depth',
222
+ severity: 'blocking',
223
+ confidenceCap: 55,
224
+ degradeParseStatus: 'partial',
225
+ });
226
+ }
227
+ else if (result.totalDepth != null && result.waterTableDepth > result.totalDepth + 0.5) {
228
+ addIssue(`Borehole ${result.boreholeId} has a water table depth (${result.waterTableDepth} m) deeper than the total depth (${result.totalDepth} m).`, {
229
+ code: 'water_table_below_total_depth',
230
+ severity: 'blocking',
231
+ confidenceCap: 68,
232
+ degradeParseStatus: 'partial',
233
+ });
234
+ }
235
+ }
236
+ if (result.groundElevation != null && (result.groundElevation < -500 || result.groundElevation > 9000)) {
237
+ addIssue(`Borehole ${result.boreholeId} has an implausible ground elevation (${result.groundElevation} m).`, {
238
+ code: 'implausible_ground_elevation',
239
+ severity: 'review',
240
+ confidenceCap: 68,
241
+ });
242
+ }
243
+ let previousDepthTo = null;
244
+ result.layers.forEach((layer, index) => {
245
+ const label = `Borehole ${result.boreholeId} layer ${index + 1}`;
246
+ if (layer.depthFrom != null && layer.depthFrom < 0) {
247
+ addIssue(`${label} has a negative top depth (${layer.depthFrom} m).`, {
248
+ code: 'negative_layer_top_depth',
249
+ severity: 'blocking',
250
+ confidenceCap: 55,
251
+ degradeParseStatus: 'partial',
252
+ });
253
+ }
254
+ if (layer.depthTo != null && layer.depthTo < 0) {
255
+ addIssue(`${label} has a negative bottom depth (${layer.depthTo} m).`, {
256
+ code: 'negative_layer_bottom_depth',
257
+ severity: 'blocking',
258
+ confidenceCap: 55,
259
+ degradeParseStatus: 'partial',
260
+ });
261
+ }
262
+ if (layer.depthFrom != null && layer.depthTo != null && layer.depthTo < layer.depthFrom) {
263
+ addIssue(`${label} ends above where it starts (${layer.depthFrom} m to ${layer.depthTo} m).`, {
264
+ code: 'layer_depth_reversed',
265
+ severity: 'blocking',
266
+ confidenceCap: 55,
267
+ degradeParseStatus: 'partial',
268
+ });
269
+ }
270
+ if (previousDepthTo != null && layer.depthFrom != null) {
271
+ if (layer.depthFrom < previousDepthTo - 0.25) {
272
+ addIssue(`Borehole ${result.boreholeId} has overlapping or restarted layer depths between ${previousDepthTo} m and ${layer.depthFrom} m.`, {
273
+ code: 'layer_depth_restart',
274
+ severity: 'blocking',
275
+ confidenceCap: 55,
276
+ degradeParseStatus: 'partial',
277
+ });
278
+ }
279
+ else if (layer.depthFrom > previousDepthTo + 1) {
280
+ addIssue(`Borehole ${result.boreholeId} has a depth gap between ${previousDepthTo} m and ${layer.depthFrom} m.`, {
281
+ code: 'layer_depth_gap',
282
+ severity: 'review',
283
+ confidenceCap: 68,
284
+ degradeParseStatus: 'partial',
285
+ });
286
+ }
287
+ }
288
+ if (layer.sptN != null && (layer.sptN < 0 || layer.sptN > 200)) {
289
+ addIssue(`${label} has an implausible SPT N value (${layer.sptN}).`, {
290
+ code: 'implausible_spt_n',
291
+ severity: layer.sptN < 0 ? 'blocking' : 'review',
292
+ confidenceCap: layer.sptN < 0 ? 55 : 68,
293
+ degradeParseStatus: 'partial',
294
+ });
295
+ }
296
+ if (layer.waterContent != null && (layer.waterContent < 0 || layer.waterContent > 200)) {
297
+ addIssue(`${label} has an implausible water content (${layer.waterContent}%).`, {
298
+ code: 'implausible_water_content',
299
+ severity: layer.waterContent < 0 ? 'blocking' : 'review',
300
+ confidenceCap: layer.waterContent < 0 ? 55 : 68,
301
+ degradeParseStatus: 'partial',
302
+ });
303
+ }
304
+ previousDepthTo =
305
+ layer.depthTo
306
+ ?? layer.depthFrom
307
+ ?? previousDepthTo;
308
+ });
309
+ const deepestLayerDepth = maximumLayerDepth(result);
310
+ if (result.totalDepth != null
311
+ && deepestLayerDepth != null
312
+ && Math.abs(deepestLayerDepth - result.totalDepth) > 1.5) {
313
+ addIssue(`Borehole ${result.boreholeId} has a declared total depth (${result.totalDepth} m) that does not align with the deepest parsed layer (${deepestLayerDepth} m).`, {
314
+ code: 'total_depth_layer_mismatch',
315
+ severity: 'review',
316
+ confidenceCap: 68,
317
+ degradeParseStatus: 'partial',
318
+ });
319
+ }
320
+ const location = result.location;
321
+ if (location?.projected && !location.crs) {
322
+ addIssue(`Borehole ${result.boreholeId} has projected coordinates but no coordinate reference system.`, {
323
+ code: 'projected_coordinates_missing_crs',
324
+ severity: 'review',
325
+ confidenceCap: 68,
326
+ });
327
+ }
328
+ if (location?.projected
329
+ && location.crs
330
+ && (location.crs.confidence ?? 0) < 0.6) {
331
+ const crsLabel = location.crs.code ?? location.crs.name ?? 'the detected CRS';
332
+ addIssue(`Borehole ${result.boreholeId} has projected coordinates with low CRS confidence (${Math.round((location.crs.confidence ?? 0) * 100)}%) for ${crsLabel}.`, {
333
+ code: 'low_crs_confidence',
334
+ severity: 'review',
335
+ confidenceCap: 68,
336
+ });
337
+ }
338
+ if (location?.wgs84
339
+ && Math.abs(location.wgs84.latitude) < 0.0001
340
+ && Math.abs(location.wgs84.longitude) < 0.0001) {
341
+ addIssue(`Borehole ${result.boreholeId} resolved to WGS84 coordinates near 0,0 and should be reviewed.`, {
342
+ code: 'wgs84_near_origin',
343
+ severity: 'review',
344
+ confidenceCap: 68,
345
+ });
346
+ }
347
+ return {
348
+ warnings: uniqueStrings(warnings),
349
+ findings: uniqueFindings(findings),
350
+ confidenceCap,
351
+ degradedParseStatus,
352
+ };
353
+ }
354
+ export function summarizeBoreholeIngestInspection(inspection) {
355
+ return summarizeInspection(inspection);
356
+ }
357
+ export async function ingestBoreholeLogDocument(options) {
358
+ const interpretSingleImage = options.interpretSingleImage ?? interpretBoreholeLog;
359
+ const interpretPageWithContext = options.interpretPageWithContext ?? interpretBoreholeLogWithContext;
360
+ const transcribePageImageText = options.transcribePageImageText ?? transcribeDocumentImageText;
361
+ const now = options.now ?? (() => new Date());
362
+ if (!options.image && (!options.pages || options.pages.length === 0)) {
363
+ throw new Error('Borehole ingest requires either a single image input or one or more PDF page inputs.');
364
+ }
365
+ const pageAudits = [];
366
+ const pageFailures = [];
367
+ const documentWarnings = buildInspectionWarnings(options.inspection);
368
+ const reviewFindings = [];
369
+ const groups = [];
370
+ let currentGroup = null;
371
+ let anonymousGroupIndex = 1;
372
+ let lastResolvedBoreholeId = options.overrideBoreholeId;
373
+ let priorContinuationDepth = null;
374
+ const recoveredOcrPages = new Set();
375
+ const ensureGroup = (boreholeId) => {
376
+ const { key, nextAnonymousIndex } = createGroupKey(boreholeId, anonymousGroupIndex);
377
+ anonymousGroupIndex = nextAnonymousIndex;
378
+ const group = {
379
+ key,
380
+ boreholeId,
381
+ pages: [],
382
+ };
383
+ groups.push(group);
384
+ return group;
385
+ };
386
+ if (options.pages && options.pages.length > 0) {
387
+ const pages = [...options.pages].sort((left, right) => left.pageNumber - right.pageNumber);
388
+ for (const page of pages) {
389
+ const inspectionPage = options.inspection?.pages[page.pageNumber - 1];
390
+ let pageTextHint = inspectionPage?.normalizedArtifact?.nativeText
391
+ ?? inspectionPage?.normalizedText
392
+ ?? undefined;
393
+ let textHintSource = pageTextHint ? 'native-text' : 'none';
394
+ try {
395
+ const pagePhaseConfig = {
396
+ ...options.config,
397
+ timeout: Math.min(Math.max(options.config.timeout ?? 120000, 60000), 180000),
398
+ };
399
+ const recovery = await recoverDocumentTextHint({
400
+ existingTextHint: pageTextHint,
401
+ existingTextAccepted: inspectionPage?.normalizedArtifact?.textQuality.accepted ?? true,
402
+ imageBase64: page.base64,
403
+ mimeType: page.mimeType,
404
+ config: pagePhaseConfig,
405
+ pdfFilePath: page.filePath,
406
+ pdfPageNumber: page.pageNumber,
407
+ visionTranscribe: transcribePageImageText,
408
+ });
409
+ if (recovery.textHint) {
410
+ pageTextHint = recovery.textHint;
411
+ }
412
+ textHintSource = recovery.source;
413
+ if (recovery.source === 'local-ocr' || recovery.source === 'vision-ocr') {
414
+ recoveredOcrPages.add(page.pageNumber);
415
+ documentWarnings.push(`Recovered ${recovery.source === 'local-ocr' ? 'local OCR' : 'OCR-style'} text hint for page ${page.pageNumber}.`);
416
+ }
417
+ else if (recovery.source === 'pdfjs-text') {
418
+ documentWarnings.push(`Recovered high-fidelity PDF text for page ${page.pageNumber} without a multimodal OCR call.`);
419
+ }
420
+ documentWarnings.push(...recovery.warnings.map((warning) => `Page ${page.pageNumber}: ${warning}`));
421
+ const result = await interpretPageWithContext(page.base64, page.mimeType, pagePhaseConfig, {
422
+ boreholeId: lastResolvedBoreholeId,
423
+ pageNumber: page.pageNumber,
424
+ totalPages: page.totalPages,
425
+ priorContinuationDepth,
426
+ pageClassification: inspectionPage?.classification,
427
+ pageTextHint,
428
+ });
429
+ const detectedBoreholeId = normalizeKnownBoreholeId(result.boreholeId);
430
+ if (shouldIgnoreNonLogPage(result, detectedBoreholeId, pageTextHint)) {
431
+ const warning = `Page ${page.pageNumber} did not contain usable borehole signals and was excluded from grouping.`;
432
+ documentWarnings.push(warning);
433
+ pageAudits.push({
434
+ pageNumber: page.pageNumber,
435
+ detectedBoreholeId,
436
+ assignedGroup: 'ignored:non-log',
437
+ classification: inspectionPage?.classification ?? null,
438
+ textHintSource,
439
+ parseStatus: result.parseStatus,
440
+ confidence: result.confidence,
441
+ continuationDepth: result.continuationDepth,
442
+ warnings: uniqueStrings([...result.warnings, warning]),
443
+ });
444
+ continue;
445
+ }
446
+ if (options.overrideBoreholeId) {
447
+ currentGroup ??= ensureGroup(options.overrideBoreholeId);
448
+ }
449
+ else if (!currentGroup) {
450
+ currentGroup = ensureGroup(detectedBoreholeId);
451
+ }
452
+ else if (detectedBoreholeId
453
+ && currentGroup.boreholeId
454
+ && detectedBoreholeId !== currentGroup.boreholeId) {
455
+ documentWarnings.push(`Page ${page.pageNumber} appears to start a new borehole (${detectedBoreholeId}) after ${currentGroup.boreholeId}.`);
456
+ currentGroup = ensureGroup(detectedBoreholeId);
457
+ }
458
+ else if (!detectedBoreholeId && shouldStartNewAnonymousGroup(result, currentGroup)) {
459
+ documentWarnings.push(`Page ${page.pageNumber} appears to restart near surface depth without a stable borehole ID. Started a new unresolved group for manual review.`);
460
+ currentGroup = ensureGroup(null);
461
+ }
462
+ else if (detectedBoreholeId && !currentGroup.boreholeId) {
463
+ currentGroup.boreholeId = detectedBoreholeId;
464
+ }
465
+ currentGroup ??= ensureGroup(detectedBoreholeId);
466
+ if (detectedBoreholeId && !options.overrideBoreholeId) {
467
+ currentGroup.boreholeId = detectedBoreholeId;
468
+ }
469
+ currentGroup.pages.push({
470
+ pageNumber: page.pageNumber,
471
+ result,
472
+ });
473
+ pageAudits.push({
474
+ pageNumber: page.pageNumber,
475
+ detectedBoreholeId,
476
+ assignedGroup: currentGroup.key,
477
+ classification: inspectionPage?.classification ?? null,
478
+ textHintSource,
479
+ parseStatus: result.parseStatus,
480
+ confidence: result.confidence,
481
+ continuationDepth: result.continuationDepth,
482
+ warnings: result.warnings,
483
+ });
484
+ lastResolvedBoreholeId = options.overrideBoreholeId ?? currentGroup.boreholeId ?? undefined;
485
+ priorContinuationDepth = result.continuationDepth;
486
+ }
487
+ catch (error) {
488
+ const message = error instanceof Error ? error.message : String(error);
489
+ pageFailures.push(`Page ${page.pageNumber}: ${message}`);
490
+ reviewFindings.push({
491
+ code: 'page_ingest_failed',
492
+ severity: 'blocking',
493
+ scope: 'page',
494
+ message: `Page ${page.pageNumber} failed during ingest: ${message}`,
495
+ pageNumber: page.pageNumber,
496
+ });
497
+ pageAudits.push({
498
+ pageNumber: page.pageNumber,
499
+ detectedBoreholeId: null,
500
+ assignedGroup: currentGroup?.key ?? 'unassigned',
501
+ classification: inspectionPage?.classification ?? null,
502
+ textHintSource,
503
+ parseStatus: 'failed',
504
+ confidence: 0,
505
+ continuationDepth: null,
506
+ warnings: [message],
507
+ });
508
+ }
509
+ }
510
+ }
511
+ else if (options.image) {
512
+ const result = await interpretSingleImage(options.image.base64, options.image.mimeType, options.config, options.overrideBoreholeId);
513
+ const detectedBoreholeId = normalizeKnownBoreholeId(result.boreholeId);
514
+ currentGroup = ensureGroup(options.overrideBoreholeId ?? detectedBoreholeId);
515
+ currentGroup.pages.push({
516
+ pageNumber: 1,
517
+ result,
518
+ });
519
+ pageAudits.push({
520
+ pageNumber: 1,
521
+ detectedBoreholeId,
522
+ assignedGroup: currentGroup.key,
523
+ classification: null,
524
+ textHintSource: 'none',
525
+ parseStatus: result.parseStatus,
526
+ confidence: result.confidence,
527
+ continuationDepth: result.continuationDepth,
528
+ warnings: result.warnings,
529
+ });
530
+ }
531
+ if (groups.length === 0) {
532
+ throw new Error(pageFailures.length > 0
533
+ ? `No pages could be ingested successfully.\n${pageFailures.join('\n')}`
534
+ : 'No pages could be ingested successfully.');
535
+ }
536
+ const mergedBoreholes = groups
537
+ .filter((group) => group.pages.length > 0)
538
+ .map((group) => mergeBoreholeLogPages(group.pages, options.overrideBoreholeId ?? group.boreholeId ?? undefined));
539
+ const boreholeValidationFeedback = mergedBoreholes.map((borehole) => validateMergedBorehole(borehole));
540
+ const boreholes = mergedBoreholes.map((borehole, index) => {
541
+ const validation = boreholeValidationFeedback[index];
542
+ const parseStatus = mergeParseStatuses(borehole.parseStatus, validation?.degradedParseStatus ?? null);
543
+ const confidence = validation?.confidenceCap == null
544
+ ? borehole.confidence
545
+ : Math.min(borehole.confidence, validation.confidenceCap);
546
+ return {
547
+ ...borehole,
548
+ parseStatus,
549
+ confidence,
550
+ warnings: uniqueStrings([...borehole.warnings, ...(validation?.warnings ?? [])]),
551
+ canAutoProceed: borehole.canAutoProceed
552
+ && !(validation?.findings.some(findingRequiresReview) ?? false)
553
+ && parseStatus === 'parsed'
554
+ && confidence >= 70,
555
+ };
556
+ });
557
+ if (pageFailures.length > 0) {
558
+ reviewFindings.push({
559
+ code: 'page_failures_present',
560
+ severity: 'blocking',
561
+ scope: 'document',
562
+ message: `${pageFailures.length} page(s) failed during ingest and should be reviewed.`,
563
+ });
564
+ }
565
+ const unrecoveredScanPages = options.inspection?.pages.filter((page) => (page.classification === 'image-only' || page.classification === 'text-unreadable')
566
+ && !recoveredOcrPages.has(page.pageNumber)) ?? [];
567
+ reviewFindings.push(...unrecoveredScanPages.map((page) => ({
568
+ code: 'missing_ocr_text_hint',
569
+ severity: 'review',
570
+ scope: 'page',
571
+ message: `Page ${page.pageNumber} did not yield a recovered OCR-style text hint and should be reviewed manually.`,
572
+ pageNumber: page.pageNumber,
573
+ })));
574
+ if (unrecoveredScanPages.length > 0) {
575
+ reviewFindings.push({
576
+ code: 'unrecovered_scanned_pages_present',
577
+ severity: 'review',
578
+ scope: 'document',
579
+ message: `${unrecoveredScanPages.length} scanned/image-heavy page(s) did not yield a recovered OCR-style text hint and should be reviewed manually.`,
580
+ });
581
+ }
582
+ const partiallyParsedPageAudits = pageAudits.filter((audit) => audit.assignedGroup !== 'ignored:non-log' && audit.parseStatus === 'partial');
583
+ reviewFindings.push(...partiallyParsedPageAudits.map((audit) => ({
584
+ code: 'page_partial_parse',
585
+ severity: 'review',
586
+ scope: 'page',
587
+ message: `Page ${audit.pageNumber} parsed only partially and should be reviewed.`,
588
+ pageNumber: audit.pageNumber,
589
+ boreholeId: audit.detectedBoreholeId ?? undefined,
590
+ })));
591
+ if (partiallyParsedPageAudits.length > 0 || pageFailures.length > 0) {
592
+ reviewFindings.push({
593
+ code: 'pages_incomplete',
594
+ severity: 'review',
595
+ scope: 'document',
596
+ message: 'One or more pages parsed only partially or failed.',
597
+ });
598
+ }
599
+ if (groups.some((group) => group.boreholeId == null)) {
600
+ reviewFindings.push({
601
+ code: 'unstable_borehole_group',
602
+ severity: 'blocking',
603
+ scope: 'document',
604
+ message: 'At least one borehole group could not be assigned a stable borehole ID.',
605
+ });
606
+ }
607
+ reviewFindings.push(...boreholeValidationFeedback.flatMap((validation) => validation.findings));
608
+ if (boreholes.some((borehole) => borehole.confidence < 70 || borehole.parseStatus !== 'parsed')) {
609
+ reviewFindings.push({
610
+ code: 'merged_borehole_incomplete',
611
+ severity: 'review',
612
+ scope: 'document',
613
+ message: 'At least one merged borehole result is low-confidence or incomplete.',
614
+ });
615
+ }
616
+ const warnings = uniqueStrings([
617
+ ...documentWarnings,
618
+ ...pageFailures,
619
+ ...boreholes.flatMap((borehole) => borehole.warnings),
620
+ ]);
621
+ const normalizedReviewFindings = uniqueFindings(reviewFindings);
622
+ const uniqueReviewReasons = summarizeReviewReasons(normalizedReviewFindings);
623
+ const totalPages = options.pages?.length ?? 1;
624
+ const inspectionSummary = summarizeInspection(options.inspection, recoveredOcrPages.size);
625
+ const confidence = averageConfidence(boreholes);
626
+ return {
627
+ kind: 'geotech-ingest-result',
628
+ schemaVersion: 1,
629
+ documentType: 'borehole-log',
630
+ generatedAt: now().toISOString(),
631
+ source: {
632
+ ...options.source,
633
+ totalPages,
634
+ successfulPages: pageAudits.filter((audit) => audit.parseStatus !== 'failed').length,
635
+ failedPages: pageFailures.length,
636
+ },
637
+ inspection: options.inspection ?? null,
638
+ inspectionSummary,
639
+ boreholes,
640
+ pageAudits,
641
+ pageFailures,
642
+ warnings,
643
+ reviewFindings: normalizedReviewFindings,
644
+ reviewReasons: uniqueReviewReasons,
645
+ reviewRequired: uniqueReviewReasons.length > 0,
646
+ confidence,
647
+ canAutoProceed: boreholes.length > 0
648
+ && uniqueReviewReasons.length === 0
649
+ && boreholes.every((borehole) => borehole.canAutoProceed),
650
+ };
651
+ }
652
+ //# sourceMappingURL=geotech-extract.js.map