@geotechcli/core 0.4.44 → 0.4.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/brain.d.ts.map +1 -1
- package/dist/agents/brain.js +9 -5
- package/dist/agents/brain.js.map +1 -1
- package/dist/agents/data-tools.js +44 -5
- package/dist/agents/data-tools.js.map +1 -1
- package/dist/agents/orchestrator.d.ts.map +1 -1
- package/dist/agents/orchestrator.js +25 -12
- package/dist/agents/orchestrator.js.map +1 -1
- package/dist/agents/provider-operating-contract.d.ts +20 -0
- package/dist/agents/provider-operating-contract.d.ts.map +1 -0
- package/dist/agents/provider-operating-contract.js +146 -0
- package/dist/agents/provider-operating-contract.js.map +1 -0
- package/dist/agents/safety.d.ts +1 -0
- package/dist/agents/safety.d.ts.map +1 -1
- package/dist/agents/safety.js +26 -0
- package/dist/agents/safety.js.map +1 -1
- package/dist/agents/swarm.d.ts.map +1 -1
- package/dist/agents/swarm.js +22 -11
- package/dist/agents/swarm.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/ingest/document-evidence-packet.d.ts +814 -0
- package/dist/ingest/document-evidence-packet.d.ts.map +1 -0
- package/dist/ingest/document-evidence-packet.js +727 -0
- package/dist/ingest/document-evidence-packet.js.map +1 -0
- package/dist/ingest/geotech-document-benchmark.d.ts +15 -0
- package/dist/ingest/geotech-document-benchmark.d.ts.map +1 -1
- package/dist/ingest/geotech-document-benchmark.js +18 -0
- package/dist/ingest/geotech-document-benchmark.js.map +1 -1
- package/dist/ingest/geotech-document.d.ts +2 -0
- package/dist/ingest/geotech-document.d.ts.map +1 -1
- package/dist/ingest/geotech-document.js +12 -107
- package/dist/ingest/geotech-document.js.map +1 -1
- package/dist/ingest/index.d.ts +1 -0
- package/dist/ingest/index.d.ts.map +1 -1
- package/dist/ingest/index.js +1 -0
- package/dist/ingest/index.js.map +1 -1
- package/dist/ingest/job-worker.d.ts.map +1 -1
- package/dist/ingest/job-worker.js +4 -2
- package/dist/ingest/job-worker.js.map +1 -1
- package/dist/llm/capabilities.d.ts.map +1 -1
- package/dist/llm/capabilities.js +42 -0
- package/dist/llm/capabilities.js.map +1 -1
- package/dist/meta/metadata.json +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,727 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export const DOCUMENT_EVIDENCE_PACKET_SCHEMA_VERSION = 2;
|
|
3
|
+
export const DocumentEvidenceMethodSchema = z.enum([
|
|
4
|
+
'native-pdf-text',
|
|
5
|
+
'layout-ocr',
|
|
6
|
+
'visual-reasoning',
|
|
7
|
+
'hybrid',
|
|
8
|
+
'none',
|
|
9
|
+
]);
|
|
10
|
+
export const DocumentEvidenceReviewStatusSchema = z.enum([
|
|
11
|
+
'verified',
|
|
12
|
+
'needs_review',
|
|
13
|
+
'missing',
|
|
14
|
+
'uncertain',
|
|
15
|
+
]);
|
|
16
|
+
export const DocumentEvidenceObservationTypeSchema = z.enum([
|
|
17
|
+
'material',
|
|
18
|
+
'classification',
|
|
19
|
+
'parameter',
|
|
20
|
+
]);
|
|
21
|
+
const SourceSchema = z.object({
|
|
22
|
+
fileName: z.string().optional(),
|
|
23
|
+
filePath: z.string().optional(),
|
|
24
|
+
inputKind: z.enum(['image', 'pdf']),
|
|
25
|
+
pageRange: z.tuple([z.number().int().positive(), z.number().int().positive()]).optional(),
|
|
26
|
+
totalPages: z.number().int().nonnegative(),
|
|
27
|
+
successfulPages: z.number().int().nonnegative(),
|
|
28
|
+
failedPages: z.number().int().nonnegative(),
|
|
29
|
+
});
|
|
30
|
+
const DocumentSchema = z.object({
|
|
31
|
+
title: z.string().nullable(),
|
|
32
|
+
documentClass: z.string().nullable(),
|
|
33
|
+
parseStatus: z.enum(['parsed', 'partial', 'failed']),
|
|
34
|
+
confidence: z.number().min(0).max(100),
|
|
35
|
+
reviewRequired: z.boolean(),
|
|
36
|
+
canAutoProceed: z.boolean(),
|
|
37
|
+
});
|
|
38
|
+
const PageSchema = z.object({
|
|
39
|
+
pageNumber: z.number().int().positive(),
|
|
40
|
+
classification: z.string().nullable(),
|
|
41
|
+
parseStatus: z.enum(['parsed', 'partial', 'failed']),
|
|
42
|
+
confidence: z.number().min(0).max(100),
|
|
43
|
+
method: DocumentEvidenceMethodSchema,
|
|
44
|
+
rawSource: z.string(),
|
|
45
|
+
sourceCategory: z.enum(['native-text', 'layout-ocr', 'vision', 'none']),
|
|
46
|
+
cacheStatus: z.enum(['hit', 'miss', 'stored', 'skipped', 'unavailable']),
|
|
47
|
+
cacheEntryId: z.string().optional(),
|
|
48
|
+
counts: z.object({
|
|
49
|
+
materials: z.number().int().nonnegative(),
|
|
50
|
+
classifications: z.number().int().nonnegative(),
|
|
51
|
+
parameters: z.number().int().nonnegative(),
|
|
52
|
+
}),
|
|
53
|
+
warnings: z.array(z.string()),
|
|
54
|
+
});
|
|
55
|
+
const ObservationSchema = z.object({
|
|
56
|
+
id: z.string(),
|
|
57
|
+
type: DocumentEvidenceObservationTypeSchema,
|
|
58
|
+
label: z.string(),
|
|
59
|
+
value: z.union([z.string(), z.number(), z.boolean(), z.null()]),
|
|
60
|
+
numericValue: z.number().nullable().optional(),
|
|
61
|
+
unit: z.string().nullable().optional(),
|
|
62
|
+
material: z.string().nullable().optional(),
|
|
63
|
+
context: z.string().nullable().optional(),
|
|
64
|
+
sourcePages: z.array(z.number().int().positive()),
|
|
65
|
+
method: DocumentEvidenceMethodSchema,
|
|
66
|
+
confidence: z.number().min(0).max(100),
|
|
67
|
+
reviewStatus: DocumentEvidenceReviewStatusSchema,
|
|
68
|
+
warnings: z.array(z.string()),
|
|
69
|
+
});
|
|
70
|
+
const ContentChunkSchema = z.object({
|
|
71
|
+
chunkId: z.string(),
|
|
72
|
+
pageRange: z.tuple([z.number().int().positive(), z.number().int().positive()]),
|
|
73
|
+
headingAncestry: z.array(z.string()),
|
|
74
|
+
scope: z.enum(['page', 'table', 'figure', 'section']),
|
|
75
|
+
sectionType: z.string().optional(),
|
|
76
|
+
significance: z.number().optional(),
|
|
77
|
+
sourcePages: z.array(z.number().int().positive()),
|
|
78
|
+
text: z.string(),
|
|
79
|
+
});
|
|
80
|
+
const ReviewFindingSchema = z.object({
|
|
81
|
+
code: z.string(),
|
|
82
|
+
severity: z.enum(['advisory', 'review', 'blocking']),
|
|
83
|
+
scope: z.enum(['document', 'page', 'material']),
|
|
84
|
+
message: z.string(),
|
|
85
|
+
pageNumber: z.number().int().positive().optional(),
|
|
86
|
+
materialDescription: z.string().optional(),
|
|
87
|
+
});
|
|
88
|
+
const SynthesisSchema = z.object({
|
|
89
|
+
takeaways: z.array(z.string()),
|
|
90
|
+
groundModel: z.array(z.string()),
|
|
91
|
+
keyParameters: z.array(z.string()),
|
|
92
|
+
interpretation: z.array(z.string()),
|
|
93
|
+
limitations: z.array(z.string()),
|
|
94
|
+
sourcePages: z.array(z.number().int().positive()),
|
|
95
|
+
latencyMs: z.number().nonnegative().optional(),
|
|
96
|
+
}).nullable();
|
|
97
|
+
const EngineeringSignalsSchema = z.object({
|
|
98
|
+
risks: z.array(z.string()),
|
|
99
|
+
recommendations: z.array(z.string()),
|
|
100
|
+
});
|
|
101
|
+
export const DocumentEvidencePacketSchema = z.object({
|
|
102
|
+
kind: z.literal('document-evidence-packet'),
|
|
103
|
+
schemaVersion: z.literal(DOCUMENT_EVIDENCE_PACKET_SCHEMA_VERSION),
|
|
104
|
+
generatedAt: z.string(),
|
|
105
|
+
providerContract: z.object({
|
|
106
|
+
providerNeutral: z.literal(true),
|
|
107
|
+
purpose: z.literal('byok-document-understanding'),
|
|
108
|
+
normalizedMethods: z.array(DocumentEvidenceMethodSchema),
|
|
109
|
+
reviewGates: z.array(z.string()),
|
|
110
|
+
}),
|
|
111
|
+
source: SourceSchema,
|
|
112
|
+
document: DocumentSchema,
|
|
113
|
+
pages: z.array(PageSchema),
|
|
114
|
+
observations: z.object({
|
|
115
|
+
materials: z.array(ObservationSchema),
|
|
116
|
+
classifications: z.array(ObservationSchema),
|
|
117
|
+
parameters: z.array(ObservationSchema),
|
|
118
|
+
}),
|
|
119
|
+
contentChunks: z.array(ContentChunkSchema),
|
|
120
|
+
engineeringSignals: EngineeringSignalsSchema,
|
|
121
|
+
synthesis: SynthesisSchema,
|
|
122
|
+
review: z.object({
|
|
123
|
+
warnings: z.array(z.string()),
|
|
124
|
+
reviewReasons: z.array(z.string()),
|
|
125
|
+
findings: z.array(ReviewFindingSchema),
|
|
126
|
+
}),
|
|
127
|
+
traceability: z.object({
|
|
128
|
+
sourcePages: z.array(z.number().int().positive()),
|
|
129
|
+
pagesWithEvidence: z.array(z.number().int().positive()),
|
|
130
|
+
directVisualPages: z.array(z.number().int().positive()),
|
|
131
|
+
layoutOcrPages: z.array(z.number().int().positive()),
|
|
132
|
+
nativeTextPages: z.array(z.number().int().positive()),
|
|
133
|
+
boreholeIds: z.array(z.string()),
|
|
134
|
+
maxDepthMeters: z.number().nullable(),
|
|
135
|
+
parametersWithSourcePage: z.number().int().nonnegative(),
|
|
136
|
+
parametersWithoutSourcePage: z.number().int().nonnegative(),
|
|
137
|
+
parameterTraceabilityRate: z.number().min(0).max(1),
|
|
138
|
+
methodCounts: z.record(z.string(), z.number().int().nonnegative()),
|
|
139
|
+
}),
|
|
140
|
+
});
|
|
141
|
+
export function buildDocumentEvidencePacket(result) {
|
|
142
|
+
const pages = result.pageAudits.map((audit) => buildEvidencePage(audit));
|
|
143
|
+
const traceability = buildTraceability(result, pages);
|
|
144
|
+
const packet = {
|
|
145
|
+
kind: 'document-evidence-packet',
|
|
146
|
+
schemaVersion: DOCUMENT_EVIDENCE_PACKET_SCHEMA_VERSION,
|
|
147
|
+
generatedAt: result.generatedAt,
|
|
148
|
+
providerContract: {
|
|
149
|
+
providerNeutral: true,
|
|
150
|
+
purpose: 'byok-document-understanding',
|
|
151
|
+
normalizedMethods: ['native-pdf-text', 'layout-ocr', 'visual-reasoning', 'hybrid', 'none'],
|
|
152
|
+
reviewGates: buildReviewGates(result),
|
|
153
|
+
},
|
|
154
|
+
source: {
|
|
155
|
+
...(result.source.fileName ? { fileName: result.source.fileName } : {}),
|
|
156
|
+
...(result.source.filePath ? { filePath: result.source.filePath } : {}),
|
|
157
|
+
inputKind: result.source.inputKind,
|
|
158
|
+
...(result.source.pageRange ? { pageRange: result.source.pageRange } : {}),
|
|
159
|
+
totalPages: result.source.totalPages,
|
|
160
|
+
successfulPages: result.source.successfulPages,
|
|
161
|
+
failedPages: result.source.failedPages,
|
|
162
|
+
},
|
|
163
|
+
document: {
|
|
164
|
+
title: result.title,
|
|
165
|
+
documentClass: result.documentClass,
|
|
166
|
+
parseStatus: result.parseStatus,
|
|
167
|
+
confidence: normalizeConfidence(result.confidence),
|
|
168
|
+
reviewRequired: result.reviewRequired,
|
|
169
|
+
canAutoProceed: result.canAutoProceed,
|
|
170
|
+
},
|
|
171
|
+
pages,
|
|
172
|
+
observations: {
|
|
173
|
+
materials: result.materials.map((material, index) => {
|
|
174
|
+
const sourcePages = sourcePagesFromObservation(material.sourcePages, material.description);
|
|
175
|
+
return {
|
|
176
|
+
id: observationId('material', index, material.description),
|
|
177
|
+
type: 'material',
|
|
178
|
+
label: material.kind,
|
|
179
|
+
value: material.description,
|
|
180
|
+
material: material.lithology,
|
|
181
|
+
context: material.uscsSymbol ? `USCS ${material.uscsSymbol}` : null,
|
|
182
|
+
sourcePages,
|
|
183
|
+
method: methodForSourcePages(sourcePages, pages),
|
|
184
|
+
confidence: confidenceForSourcePages(sourcePages, pages, result.confidence),
|
|
185
|
+
reviewStatus: sourcePages.length > 0 ? 'verified' : 'needs_review',
|
|
186
|
+
warnings: [],
|
|
187
|
+
};
|
|
188
|
+
}),
|
|
189
|
+
classifications: result.classifications.map((classification, index) => {
|
|
190
|
+
const sourcePages = sourcePagesFromObservation(classification.sourcePages, classification.context);
|
|
191
|
+
return {
|
|
192
|
+
id: observationId('classification', index, `${classification.system}:${classification.value}`),
|
|
193
|
+
type: 'classification',
|
|
194
|
+
label: classification.system,
|
|
195
|
+
value: classification.value,
|
|
196
|
+
context: classification.context,
|
|
197
|
+
sourcePages,
|
|
198
|
+
method: methodForSourcePages(sourcePages, pages),
|
|
199
|
+
confidence: confidenceForSourcePages(sourcePages, pages, result.confidence),
|
|
200
|
+
reviewStatus: sourcePages.length > 0 ? 'verified' : 'needs_review',
|
|
201
|
+
warnings: [],
|
|
202
|
+
};
|
|
203
|
+
}),
|
|
204
|
+
parameters: result.parameters.map((parameter, index) => {
|
|
205
|
+
const sourcePages = sourcePagesFromObservation(parameter.sourcePages, parameter.context);
|
|
206
|
+
const missing = isMissingValue(parameter.valueText, parameter.numericValue);
|
|
207
|
+
return {
|
|
208
|
+
id: observationId('parameter', index, `${parameter.name}:${parameter.valueText}`),
|
|
209
|
+
type: 'parameter',
|
|
210
|
+
label: parameter.name,
|
|
211
|
+
value: parameter.valueText,
|
|
212
|
+
numericValue: parameter.numericValue,
|
|
213
|
+
unit: parameter.unit,
|
|
214
|
+
material: parameter.material,
|
|
215
|
+
context: parameter.context,
|
|
216
|
+
sourcePages,
|
|
217
|
+
method: methodForSourcePages(sourcePages, pages),
|
|
218
|
+
confidence: missing ? 0 : confidenceForSourcePages(sourcePages, pages, result.confidence),
|
|
219
|
+
reviewStatus: missing
|
|
220
|
+
? 'missing'
|
|
221
|
+
: sourcePages.length > 0
|
|
222
|
+
? 'verified'
|
|
223
|
+
: 'needs_review',
|
|
224
|
+
warnings: missing ? ['Value is missing or not extracted.'] : [],
|
|
225
|
+
};
|
|
226
|
+
}),
|
|
227
|
+
},
|
|
228
|
+
contentChunks: (result.contentChunks ?? []).map(compactContentChunk),
|
|
229
|
+
engineeringSignals: {
|
|
230
|
+
risks: uniqueNonEmpty(result.risks).slice(0, 24),
|
|
231
|
+
recommendations: uniqueNonEmpty(result.recommendations).slice(0, 24),
|
|
232
|
+
},
|
|
233
|
+
synthesis: result.synthesis
|
|
234
|
+
? {
|
|
235
|
+
takeaways: result.synthesis.takeaways,
|
|
236
|
+
groundModel: result.synthesis.groundModel,
|
|
237
|
+
keyParameters: result.synthesis.keyParameters,
|
|
238
|
+
interpretation: result.synthesis.interpretation,
|
|
239
|
+
limitations: result.synthesis.limitations,
|
|
240
|
+
sourcePages: normalizePages(result.synthesis.sourcePages),
|
|
241
|
+
...(Number.isFinite(result.synthesis.latencyMs) ? { latencyMs: Math.max(0, Math.round(result.synthesis.latencyMs)) } : {}),
|
|
242
|
+
}
|
|
243
|
+
: null,
|
|
244
|
+
review: {
|
|
245
|
+
warnings: result.warnings,
|
|
246
|
+
reviewReasons: result.reviewReasons,
|
|
247
|
+
findings: result.reviewFindings.map(compactReviewFinding),
|
|
248
|
+
},
|
|
249
|
+
traceability,
|
|
250
|
+
};
|
|
251
|
+
return DocumentEvidencePacketSchema.parse(packet);
|
|
252
|
+
}
|
|
253
|
+
export function attachDocumentEvidencePacket(result) {
|
|
254
|
+
return {
|
|
255
|
+
...result,
|
|
256
|
+
evidencePacket: buildDocumentEvidencePacket(result),
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
export function documentEvidencePacketHasEngineeringSignal(packet) {
|
|
260
|
+
return packet.observations.materials.length > 0
|
|
261
|
+
|| packet.observations.classifications.length > 0
|
|
262
|
+
|| packet.observations.parameters.length > 0
|
|
263
|
+
|| packet.engineeringSignals.risks.length > 0
|
|
264
|
+
|| packet.engineeringSignals.recommendations.length > 0
|
|
265
|
+
|| packet.contentChunks.some((chunk) => chunk.text.trim().length > 0
|
|
266
|
+
&& chunk.sectionType !== 'administrative'
|
|
267
|
+
&& chunk.sectionType !== 'visual-appendix');
|
|
268
|
+
}
|
|
269
|
+
export function compileDocumentEvidenceSynthesisPrompt(packet, options = {}) {
|
|
270
|
+
const maxEvidenceChars = options.maxEvidenceChars ?? 9000;
|
|
271
|
+
const outlineChunks = [...packet.contentChunks]
|
|
272
|
+
.filter((chunk) => chunk.text.trim().length > 0 && chunk.sectionType !== 'administrative')
|
|
273
|
+
.sort((left, right) => left.pageRange[0] - right.pageRange[0]
|
|
274
|
+
|| left.pageRange[1] - right.pageRange[1])
|
|
275
|
+
.slice(0, options.maxOutlineChunks ?? 28)
|
|
276
|
+
.map((chunk) => formatContentChunkForSynthesis(chunk, 240));
|
|
277
|
+
const highSignalChunks = [...packet.contentChunks]
|
|
278
|
+
.filter((chunk) => chunk.text.trim().length > 0 && chunk.sectionType !== 'administrative')
|
|
279
|
+
.sort((left, right) => (right.significance ?? 0) - (left.significance ?? 0))
|
|
280
|
+
.slice(0, options.maxHighSignalChunks ?? 18)
|
|
281
|
+
.map((chunk) => formatContentChunkForSynthesis(chunk, 360));
|
|
282
|
+
const parameters = packet.observations.parameters
|
|
283
|
+
.slice(0, options.maxParameters ?? 48)
|
|
284
|
+
.map((observation) => formatObservationForSynthesis(observation));
|
|
285
|
+
const materials = packet.observations.materials
|
|
286
|
+
.slice(0, options.maxMaterials ?? 36)
|
|
287
|
+
.map((observation) => formatObservationForSynthesis(observation));
|
|
288
|
+
const classifications = packet.observations.classifications
|
|
289
|
+
.slice(0, options.maxClassifications ?? 28)
|
|
290
|
+
.map((observation) => formatObservationForSynthesis(observation));
|
|
291
|
+
const boreholeLines = buildBoreholeContinuityLines(packet);
|
|
292
|
+
const pageLines = packet.pages
|
|
293
|
+
.slice()
|
|
294
|
+
.sort((left, right) => left.pageNumber - right.pageNumber)
|
|
295
|
+
.map((page) => `Page ${page.pageNumber}: ${page.parseStatus}; method=${page.method}; source=${page.rawSource}; class=${page.classification ?? 'unknown'}; confidence=${page.confidence}%; counts m/c/p=${page.counts.materials}/${page.counts.classifications}/${page.counts.parameters}${page.warnings.length > 0 ? `; warnings=${page.warnings.slice(0, 3).join(' | ')}` : ''}`);
|
|
296
|
+
const missingParameters = packet.observations.parameters
|
|
297
|
+
.filter((observation) => observation.reviewStatus === 'missing')
|
|
298
|
+
.map((observation) => formatObservationForSynthesis(observation));
|
|
299
|
+
const reviewParameters = packet.observations.parameters
|
|
300
|
+
.filter((observation) => observation.reviewStatus === 'needs_review' || observation.reviewStatus === 'uncertain')
|
|
301
|
+
.map((observation) => formatObservationForSynthesis(observation));
|
|
302
|
+
const evidence = compactText([
|
|
303
|
+
'Provider-neutral DocumentEvidencePacket synthesis evidence contract.',
|
|
304
|
+
`Packet schema: v${packet.schemaVersion}; providerNeutral=${packet.providerContract.providerNeutral}; purpose=${packet.providerContract.purpose}.`,
|
|
305
|
+
`Source: ${packet.source.fileName ?? packet.source.filePath ?? packet.source.inputKind}; pages ${packet.source.successfulPages}/${packet.source.totalPages}; evidence pages ${pageList(packet.traceability.pagesWithEvidence)}.`,
|
|
306
|
+
`Document: ${packet.document.title ?? 'untitled'}; class=${packet.document.documentClass ?? 'unknown'}; status=${packet.document.parseStatus}; confidence=${packet.document.confidence}%; reviewRequired=${packet.document.reviewRequired ? 'yes' : 'no'}; canAutoProceed=${packet.document.canAutoProceed ? 'yes' : 'no'}.`,
|
|
307
|
+
`Traceability: source pages ${pageList(packet.traceability.sourcePages)}; native=${pageList(packet.traceability.nativeTextPages)}; layout/OCR=${pageList(packet.traceability.layoutOcrPages)}; visual=${pageList(packet.traceability.directVisualPages)}; parameter source-page rate=${Math.round(packet.traceability.parameterTraceabilityRate * 100)}%.`,
|
|
308
|
+
`Review gates: ${packet.providerContract.reviewGates.join('; ') || 'none'}.`,
|
|
309
|
+
`Borehole summary: ${packet.traceability.boreholeIds.join(', ') || 'not detected'}; max depth=${packet.traceability.maxDepthMeters != null ? `${packet.traceability.maxDepthMeters} m` : 'not extracted'}.`,
|
|
310
|
+
'',
|
|
311
|
+
'Ordered whole-report outline by source page:',
|
|
312
|
+
outlineChunks.join('\n') || 'No ordered outline retained.',
|
|
313
|
+
'',
|
|
314
|
+
'Borehole continuity evidence:',
|
|
315
|
+
boreholeLines.join('\n') || 'No borehole-specific continuity evidence detected.',
|
|
316
|
+
'',
|
|
317
|
+
'Engineering parameters:',
|
|
318
|
+
parameters.join('\n') || 'None extracted.',
|
|
319
|
+
'',
|
|
320
|
+
'Material observations:',
|
|
321
|
+
materials.join('\n') || 'None extracted.',
|
|
322
|
+
'',
|
|
323
|
+
'Classifications:',
|
|
324
|
+
classifications.join('\n') || 'None extracted.',
|
|
325
|
+
'',
|
|
326
|
+
'Risks extracted from page evidence:',
|
|
327
|
+
packet.engineeringSignals.risks.slice(0, 16).map((risk) => `- ${compactText(risk, 220)}`).join('\n') || 'None extracted.',
|
|
328
|
+
'',
|
|
329
|
+
'Recommendations extracted from page evidence:',
|
|
330
|
+
packet.engineeringSignals.recommendations.slice(0, 16).map((recommendation) => `- ${compactText(recommendation, 220)}`).join('\n') || 'None extracted.',
|
|
331
|
+
'',
|
|
332
|
+
'Missing or review-gated parameters:',
|
|
333
|
+
[...missingParameters.slice(0, 16), ...reviewParameters.slice(0, 16)].join('\n') || 'None flagged.',
|
|
334
|
+
'',
|
|
335
|
+
'High-signal source chunks:',
|
|
336
|
+
highSignalChunks.join('\n') || 'No high-signal chunks retained.',
|
|
337
|
+
'',
|
|
338
|
+
'Page-level method and audit summary:',
|
|
339
|
+
pageLines.join('\n') || 'No page audit retained.',
|
|
340
|
+
'',
|
|
341
|
+
'Warnings and review findings:',
|
|
342
|
+
[
|
|
343
|
+
...packet.review.warnings.slice(0, 12).map((warning) => `Warning: ${compactText(warning, 220)}`),
|
|
344
|
+
...packet.review.findings.slice(0, 12).map((finding) => `${finding.severity.toUpperCase()} ${finding.code}${finding.pageNumber ? ` page ${finding.pageNumber}` : ''}: ${compactText(finding.message, 220)}`),
|
|
345
|
+
].join('\n') || 'None.',
|
|
346
|
+
].join('\n'), maxEvidenceChars);
|
|
347
|
+
const prompt = `Create a concise engineering synthesis from the provider-neutral geotechnical document evidence packet below. Respond with ONLY a JSON object:
|
|
348
|
+
{
|
|
349
|
+
"takeaways": ["<report-level engineering takeaway with source-page wording where possible>"],
|
|
350
|
+
"groundModel": ["<depth-bounded soil/rock/groundwater model statement with source pages if known>"],
|
|
351
|
+
"keyParameters": ["<parameter, value, unit, material/context, source page if known>"],
|
|
352
|
+
"interpretation": ["<construction/design interpretation supported by extracted evidence>"],
|
|
353
|
+
"limitations": ["<uncertainty, missing evidence, OCR/layout/visual limitations>"],
|
|
354
|
+
"sourcePages": [<page numbers that support the synthesis>]
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
Instructions:
|
|
358
|
+
- Read the ordered whole-report outline first before summarizing individual extraction rows.
|
|
359
|
+
- Preserve borehole-log continuity when a borehole spans multiple pages; group depth intervals, lithology, SPT/RQD/recovery, groundwater, and source pages by borehole where evidence supports it.
|
|
360
|
+
- Cite source pages from the packet in every specific engineering claim where possible.
|
|
361
|
+
- Do not invent values, strata boundaries, groundwater, SPT, RQD, strength, density, or design parameters.
|
|
362
|
+
- Do not let synthesis raise extraction confidence. Confidence comes only from source evidence coverage and corroboration.
|
|
363
|
+
- Treat direct-visual-only, missing, needs-review, and uncertain values as review-gated evidence.
|
|
364
|
+
- Prefer explicit soil and rock mechanics parameters, groundwater observations, classification systems, and foundation/geohazard/construction implications.
|
|
365
|
+
|
|
366
|
+
Evidence:
|
|
367
|
+
${evidence}`;
|
|
368
|
+
return {
|
|
369
|
+
systemPrompt: 'You are a senior geotechnical engineer synthesizing provider-neutral document evidence into a review brief. Use cautious, evidence-bound language, cite source pages, preserve missing-data limitations, and respond with JSON only.',
|
|
370
|
+
prompt,
|
|
371
|
+
evidence,
|
|
372
|
+
hasEngineeringSignal: documentEvidencePacketHasEngineeringSignal(packet),
|
|
373
|
+
sourcePages: packet.traceability.sourcePages,
|
|
374
|
+
reviewGates: packet.providerContract.reviewGates,
|
|
375
|
+
schemaVersion: packet.schemaVersion,
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
export function summarizeDocumentEvidencePacketForAgent(packet, options = {}) {
|
|
379
|
+
const maxObservationsPerGroup = options.maxObservationsPerGroup ?? 8;
|
|
380
|
+
const maxReviewGates = options.maxReviewGates ?? 8;
|
|
381
|
+
const maxContentChars = options.maxContentChars ?? 2400;
|
|
382
|
+
const pageOutcomes = countPageOutcomes(packet);
|
|
383
|
+
const methods = Object.entries(packet.traceability.methodCounts)
|
|
384
|
+
.filter(([, count]) => count > 0)
|
|
385
|
+
.map(([method, count]) => `${method}:${count}`)
|
|
386
|
+
.join(', ') || 'none';
|
|
387
|
+
const reviewGates = packet.providerContract.reviewGates.slice(0, maxReviewGates);
|
|
388
|
+
const missingParameters = packet.observations.parameters
|
|
389
|
+
.filter((parameter) => parameter.reviewStatus === 'missing')
|
|
390
|
+
.slice(0, maxObservationsPerGroup)
|
|
391
|
+
.map((parameter) => parameter.label);
|
|
392
|
+
const reviewParameters = packet.observations.parameters
|
|
393
|
+
.filter((parameter) => parameter.reviewStatus === 'needs_review' || parameter.reviewStatus === 'uncertain')
|
|
394
|
+
.slice(0, maxObservationsPerGroup)
|
|
395
|
+
.map((parameter) => `${parameter.label}${parameter.context ? ` (${parameter.context})` : ''}`);
|
|
396
|
+
const verifiedParameters = packet.observations.parameters
|
|
397
|
+
.filter((parameter) => parameter.reviewStatus === 'verified')
|
|
398
|
+
.slice(0, maxObservationsPerGroup)
|
|
399
|
+
.map((parameter) => formatObservationForAgent(parameter));
|
|
400
|
+
const materials = packet.observations.materials
|
|
401
|
+
.slice(0, maxObservationsPerGroup)
|
|
402
|
+
.map((material) => formatObservationForAgent(material));
|
|
403
|
+
const classifications = packet.observations.classifications
|
|
404
|
+
.slice(0, maxObservationsPerGroup)
|
|
405
|
+
.map((classification) => formatObservationForAgent(classification));
|
|
406
|
+
const synthesis = [
|
|
407
|
+
...(packet.synthesis?.takeaways.slice(0, 4) ?? []),
|
|
408
|
+
...(packet.synthesis?.groundModel.slice(0, 4) ?? []),
|
|
409
|
+
...(packet.synthesis?.interpretation.slice(0, 3) ?? []),
|
|
410
|
+
...(packet.synthesis?.limitations.slice(0, 3) ?? []),
|
|
411
|
+
];
|
|
412
|
+
return compactText([
|
|
413
|
+
`DocumentEvidencePacket v${packet.schemaVersion} provider-neutral agent context.`,
|
|
414
|
+
`Source: ${packet.source.fileName ?? packet.source.filePath ?? packet.source.inputKind}; pages ${packet.source.successfulPages}/${packet.source.totalPages}; evidence pages ${pageList(packet.traceability.pagesWithEvidence)}.`,
|
|
415
|
+
`Document: ${packet.document.title ?? 'untitled'}; class ${packet.document.documentClass ?? 'unknown'}; status ${packet.document.parseStatus}; confidence ${packet.document.confidence}%; reviewRequired ${packet.document.reviewRequired ? 'yes' : 'no'}; canAutoProceed ${packet.document.canAutoProceed ? 'yes' : 'no'}.`,
|
|
416
|
+
`Methods: ${methods}; native pages ${pageList(packet.traceability.nativeTextPages)}; layout/OCR pages ${pageList(packet.traceability.layoutOcrPages)}; direct visual pages ${pageList(packet.traceability.directVisualPages)}.`,
|
|
417
|
+
`Page outcomes: parsed ${pageOutcomes.parsed}, partial ${pageOutcomes.partial}, failed ${pageOutcomes.failed}.`,
|
|
418
|
+
`Traceability: source pages ${pageList(packet.traceability.sourcePages)}; parameter source-page rate ${Math.round(packet.traceability.parameterTraceabilityRate * 100)}%; with source ${packet.traceability.parametersWithSourcePage}, without source ${packet.traceability.parametersWithoutSourcePage}.`,
|
|
419
|
+
`Boreholes: ${packet.traceability.boreholeIds.join(', ') || 'not detected'}; max depth ${packet.traceability.maxDepthMeters != null ? `${packet.traceability.maxDepthMeters} m` : 'not extracted'}.`,
|
|
420
|
+
reviewGates.length > 0 ? `Review gates: ${reviewGates.join('; ')}.` : 'Review gates: none.',
|
|
421
|
+
missingParameters.length > 0 ? `Missing parameters: ${missingParameters.join('; ')}.` : 'Missing parameters: none flagged in packet.',
|
|
422
|
+
reviewParameters.length > 0 ? `Parameters needing review: ${reviewParameters.join('; ')}.` : '',
|
|
423
|
+
verifiedParameters.length > 0 ? `Verified parameters: ${verifiedParameters.join('; ')}.` : '',
|
|
424
|
+
materials.length > 0 ? `Materials: ${materials.join('; ')}.` : '',
|
|
425
|
+
classifications.length > 0 ? `Classifications: ${classifications.join('; ')}.` : '',
|
|
426
|
+
synthesis.length > 0 ? `Synthesis evidence: ${synthesis.map((item) => compactText(item, 180)).join(' | ')}.` : '',
|
|
427
|
+
packet.engineeringSignals.risks.length > 0 ? `Risks: ${packet.engineeringSignals.risks.slice(0, 5).map((item) => compactText(item, 140)).join('; ')}.` : '',
|
|
428
|
+
packet.engineeringSignals.recommendations.length > 0 ? `Recommendations: ${packet.engineeringSignals.recommendations.slice(0, 5).map((item) => compactText(item, 140)).join('; ')}.` : '',
|
|
429
|
+
packet.review.warnings.length > 0 ? `Warnings: ${packet.review.warnings.slice(0, 6).join('; ')}.` : '',
|
|
430
|
+
'Agent rule: cite source pages from this packet; do not run deterministic calculations from missing, direct-visual-only, or needs-review evidence without explicit review/approval.',
|
|
431
|
+
].filter(Boolean).join('\n'), maxContentChars);
|
|
432
|
+
}
|
|
433
|
+
export function summarizeGeotechDocumentResultForAgent(result, options = {}) {
|
|
434
|
+
return summarizeDocumentEvidencePacketForAgent(result.evidencePacket ?? buildDocumentEvidencePacket(result), options);
|
|
435
|
+
}
|
|
436
|
+
function buildEvidencePage(audit) {
|
|
437
|
+
const cache = audit.evidenceCache;
|
|
438
|
+
return {
|
|
439
|
+
pageNumber: audit.pageNumber,
|
|
440
|
+
classification: audit.classification,
|
|
441
|
+
parseStatus: audit.parseStatus,
|
|
442
|
+
confidence: normalizeConfidence(audit.confidence),
|
|
443
|
+
method: methodFromTextHintSource(audit.textHintSource),
|
|
444
|
+
rawSource: audit.textHintSource,
|
|
445
|
+
sourceCategory: sourceCategoryFromTextHintSource(audit.textHintSource),
|
|
446
|
+
cacheStatus: cache?.status ?? 'unavailable',
|
|
447
|
+
...(cache?.entryId ? { cacheEntryId: cache.entryId } : {}),
|
|
448
|
+
counts: {
|
|
449
|
+
materials: audit.materialCount,
|
|
450
|
+
classifications: audit.classificationCount,
|
|
451
|
+
parameters: audit.parameterCount,
|
|
452
|
+
},
|
|
453
|
+
warnings: audit.warnings,
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
function countPageOutcomes(packet) {
|
|
457
|
+
return packet.pages.reduce((counts, page) => {
|
|
458
|
+
counts[page.parseStatus] += 1;
|
|
459
|
+
return counts;
|
|
460
|
+
}, { parsed: 0, partial: 0, failed: 0 });
|
|
461
|
+
}
|
|
462
|
+
function pageList(pages) {
|
|
463
|
+
return pages.length > 0 ? pages.join(', ') : 'none';
|
|
464
|
+
}
|
|
465
|
+
function formatObservationForAgent(observation) {
|
|
466
|
+
const value = observation.unit ? `${String(observation.value)} ${observation.unit}` : String(observation.value);
|
|
467
|
+
const pageSuffix = observation.sourcePages.length > 0 ? ` p${observation.sourcePages.join(',')}` : ' no-source-page';
|
|
468
|
+
const confidenceSuffix = observation.confidence > 0 ? ` ${observation.confidence}%` : '';
|
|
469
|
+
return `${observation.label}=${value}${observation.material ? ` ${observation.material}` : ''}${pageSuffix}${confidenceSuffix}`;
|
|
470
|
+
}
|
|
471
|
+
function formatObservationForSynthesis(observation) {
|
|
472
|
+
const value = observation.unit && String(observation.value).trim() && String(observation.value).trim() !== observation.unit
|
|
473
|
+
? `${String(observation.value)} ${observation.unit}`
|
|
474
|
+
: String(observation.value);
|
|
475
|
+
return [
|
|
476
|
+
observation.label,
|
|
477
|
+
`value=${value}`,
|
|
478
|
+
observation.material ? `material=${observation.material}` : null,
|
|
479
|
+
observation.context ? `context=${compactText(observation.context, 180)}` : null,
|
|
480
|
+
`pages=${pageList(observation.sourcePages)}`,
|
|
481
|
+
`method=${observation.method}`,
|
|
482
|
+
`confidence=${observation.confidence}%`,
|
|
483
|
+
`review=${observation.reviewStatus}`,
|
|
484
|
+
observation.warnings.length > 0 ? `warnings=${observation.warnings.slice(0, 3).join(' | ')}` : null,
|
|
485
|
+
].filter(Boolean).join(' | ');
|
|
486
|
+
}
|
|
487
|
+
function formatContentChunkForSynthesis(chunk, maxTextLength) {
|
|
488
|
+
return [
|
|
489
|
+
`Pages ${chunk.pageRange[0]}-${chunk.pageRange[1]}`,
|
|
490
|
+
chunk.sectionType ?? 'general',
|
|
491
|
+
chunk.scope,
|
|
492
|
+
chunk.headingAncestry.length > 0 ? compactText(chunk.headingAncestry.join(' > '), 110) : 'untitled',
|
|
493
|
+
`sources=${pageList(chunk.sourcePages)}`,
|
|
494
|
+
compactText(chunk.text, maxTextLength),
|
|
495
|
+
].join(' | ');
|
|
496
|
+
}
|
|
497
|
+
function buildBoreholeContinuityLines(packet) {
|
|
498
|
+
const observations = [
|
|
499
|
+
...packet.observations.parameters,
|
|
500
|
+
...packet.observations.materials,
|
|
501
|
+
...packet.observations.classifications,
|
|
502
|
+
];
|
|
503
|
+
return packet.traceability.boreholeIds.slice(0, 20).map((id) => {
|
|
504
|
+
const pattern = new RegExp(`\\b${id.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').replace(/^BH/i, 'B\\.?H\\.?\\s*')}(?:\\b|\\s|[-#:])`, 'i');
|
|
505
|
+
const matchingObservations = observations
|
|
506
|
+
.filter((observation) => pattern.test([
|
|
507
|
+
observation.label,
|
|
508
|
+
String(observation.value),
|
|
509
|
+
observation.material,
|
|
510
|
+
observation.context,
|
|
511
|
+
].filter(Boolean).join(' ')))
|
|
512
|
+
.slice(0, 12)
|
|
513
|
+
.map((observation) => `${observation.label}=${String(observation.value)} pages=${pageList(observation.sourcePages)} review=${observation.reviewStatus}`);
|
|
514
|
+
const matchingChunks = packet.contentChunks
|
|
515
|
+
.filter((chunk) => pattern.test(`${chunk.headingAncestry.join(' ')} ${chunk.text}`))
|
|
516
|
+
.slice(0, 4)
|
|
517
|
+
.map((chunk) => `pages ${chunk.pageRange[0]}-${chunk.pageRange[1]}: ${compactText(chunk.text, 180)}`);
|
|
518
|
+
return `${id}: ${[...matchingObservations, ...matchingChunks].join('; ') || 'borehole identifier detected, but no compact interval evidence retained.'}`;
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
function buildTraceability(result, pages) {
|
|
522
|
+
const parameterSourcePages = result.parameters.map((parameter) => sourcePagesFromObservation(parameter.sourcePages, parameter.context));
|
|
523
|
+
const parametersWithSourcePage = parameterSourcePages.filter((pagesForParameter) => pagesForParameter.length > 0).length;
|
|
524
|
+
const sourcePages = normalizePages([
|
|
525
|
+
...parameterSourcePages.flat(),
|
|
526
|
+
...result.materials.flatMap((material) => sourcePagesFromObservation(material.sourcePages, material.description)),
|
|
527
|
+
...result.classifications.flatMap((classification) => sourcePagesFromObservation(classification.sourcePages, classification.context)),
|
|
528
|
+
...(result.synthesis?.sourcePages ?? []),
|
|
529
|
+
...(result.contentChunks ?? []).flatMap((chunk) => chunk.sourcePages),
|
|
530
|
+
]);
|
|
531
|
+
const pagesWithEvidence = pages
|
|
532
|
+
.filter((page) => page.counts.materials + page.counts.classifications + page.counts.parameters > 0)
|
|
533
|
+
.map((page) => page.pageNumber);
|
|
534
|
+
const methodCounts = countMethods(pages);
|
|
535
|
+
const evidenceText = collectEvidenceText(result);
|
|
536
|
+
return {
|
|
537
|
+
sourcePages,
|
|
538
|
+
pagesWithEvidence: normalizePages(pagesWithEvidence),
|
|
539
|
+
directVisualPages: normalizePages(pages.filter((page) => page.rawSource === 'vision-visual').map((page) => page.pageNumber)),
|
|
540
|
+
layoutOcrPages: normalizePages(pages.filter((page) => page.sourceCategory === 'layout-ocr').map((page) => page.pageNumber)),
|
|
541
|
+
nativeTextPages: normalizePages(pages.filter((page) => page.sourceCategory === 'native-text').map((page) => page.pageNumber)),
|
|
542
|
+
boreholeIds: inferBoreholeIds(evidenceText),
|
|
543
|
+
maxDepthMeters: inferMaxDepthMeters(evidenceText),
|
|
544
|
+
parametersWithSourcePage,
|
|
545
|
+
parametersWithoutSourcePage: Math.max(0, result.parameters.length - parametersWithSourcePage),
|
|
546
|
+
parameterTraceabilityRate: result.parameters.length > 0
|
|
547
|
+
? roundRatio(parametersWithSourcePage / result.parameters.length)
|
|
548
|
+
: 0,
|
|
549
|
+
methodCounts,
|
|
550
|
+
};
|
|
551
|
+
}
|
|
552
|
+
function compactContentChunk(chunk) {
|
|
553
|
+
return {
|
|
554
|
+
chunkId: chunk.chunkId,
|
|
555
|
+
pageRange: chunk.pageRange,
|
|
556
|
+
headingAncestry: chunk.headingAncestry,
|
|
557
|
+
scope: chunk.scope,
|
|
558
|
+
...(chunk.sectionType ? { sectionType: chunk.sectionType } : {}),
|
|
559
|
+
...(Number.isFinite(chunk.significance) ? { significance: chunk.significance } : {}),
|
|
560
|
+
sourcePages: normalizePages(chunk.sourcePages),
|
|
561
|
+
text: compactText(chunk.text, 1200),
|
|
562
|
+
};
|
|
563
|
+
}
|
|
564
|
+
function compactReviewFinding(finding) {
|
|
565
|
+
return {
|
|
566
|
+
code: finding.code,
|
|
567
|
+
severity: finding.severity,
|
|
568
|
+
scope: finding.scope,
|
|
569
|
+
message: finding.message,
|
|
570
|
+
...(finding.pageNumber != null ? { pageNumber: finding.pageNumber } : {}),
|
|
571
|
+
...(finding.materialDescription ? { materialDescription: finding.materialDescription } : {}),
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
function methodFromTextHintSource(source) {
|
|
575
|
+
if (source === 'native-text' || source === 'pdfjs-text') {
|
|
576
|
+
return 'native-pdf-text';
|
|
577
|
+
}
|
|
578
|
+
if (source === 'glm-ocr' || source === 'local-ocr') {
|
|
579
|
+
return 'layout-ocr';
|
|
580
|
+
}
|
|
581
|
+
if (source === 'vision-ocr' || source === 'vision-visual') {
|
|
582
|
+
return 'visual-reasoning';
|
|
583
|
+
}
|
|
584
|
+
return 'none';
|
|
585
|
+
}
|
|
586
|
+
function sourceCategoryFromTextHintSource(source) {
|
|
587
|
+
if (source === 'native-text' || source === 'pdfjs-text') {
|
|
588
|
+
return 'native-text';
|
|
589
|
+
}
|
|
590
|
+
if (source === 'glm-ocr' || source === 'local-ocr') {
|
|
591
|
+
return 'layout-ocr';
|
|
592
|
+
}
|
|
593
|
+
if (source === 'vision-ocr' || source === 'vision-visual') {
|
|
594
|
+
return 'vision';
|
|
595
|
+
}
|
|
596
|
+
return 'none';
|
|
597
|
+
}
|
|
598
|
+
function methodForSourcePages(sourcePages, pages) {
|
|
599
|
+
const methods = new Set(pages
|
|
600
|
+
.filter((page) => sourcePages.includes(page.pageNumber))
|
|
601
|
+
.map((page) => page.method)
|
|
602
|
+
.filter((method) => method !== 'none'));
|
|
603
|
+
if (methods.size > 1) {
|
|
604
|
+
return 'hybrid';
|
|
605
|
+
}
|
|
606
|
+
return [...methods][0] ?? 'none';
|
|
607
|
+
}
|
|
608
|
+
function confidenceForSourcePages(sourcePages, pages, fallback) {
|
|
609
|
+
const matching = pages
|
|
610
|
+
.filter((page) => sourcePages.includes(page.pageNumber) && Number.isFinite(page.confidence))
|
|
611
|
+
.map((page) => page.confidence);
|
|
612
|
+
if (matching.length === 0) {
|
|
613
|
+
return normalizeConfidence(fallback);
|
|
614
|
+
}
|
|
615
|
+
return normalizeConfidence(matching.reduce((sum, value) => sum + value, 0) / matching.length);
|
|
616
|
+
}
|
|
617
|
+
function countMethods(pages) {
|
|
618
|
+
const counts = {
|
|
619
|
+
'native-pdf-text': 0,
|
|
620
|
+
'layout-ocr': 0,
|
|
621
|
+
'visual-reasoning': 0,
|
|
622
|
+
hybrid: 0,
|
|
623
|
+
none: 0,
|
|
624
|
+
};
|
|
625
|
+
for (const page of pages) {
|
|
626
|
+
counts[page.method] = (counts[page.method] ?? 0) + 1;
|
|
627
|
+
}
|
|
628
|
+
return counts;
|
|
629
|
+
}
|
|
630
|
+
function buildReviewGates(result) {
|
|
631
|
+
return [
|
|
632
|
+
result.reviewRequired ? 'human-review-required' : null,
|
|
633
|
+
result.pageFailures.length > 0 ? 'page-failures-present' : null,
|
|
634
|
+
result.pageAudits.some((audit) => audit.parseStatus === 'partial') ? 'partial-pages-present' : null,
|
|
635
|
+
result.pageAudits.some((audit) => audit.textHintSource === 'vision-visual') ? 'direct-visual-verification-required' : null,
|
|
636
|
+
result.parameters.some((parameter) => isMissingValue(parameter.valueText, parameter.numericValue)) ? 'missing-parameters-present' : null,
|
|
637
|
+
].filter((value) => value != null);
|
|
638
|
+
}
|
|
639
|
+
function sourcePagesFromObservation(sourcePages, context) {
|
|
640
|
+
return normalizePages([
|
|
641
|
+
...(sourcePages ?? []),
|
|
642
|
+
...extractPageNumbers(context),
|
|
643
|
+
]);
|
|
644
|
+
}
|
|
645
|
+
function extractPageNumbers(value) {
|
|
646
|
+
if (!value) {
|
|
647
|
+
return [];
|
|
648
|
+
}
|
|
649
|
+
return normalizePages([...value.matchAll(/\b(?:page|p\.?)\s*#?\s*(\d{1,4})\b/gi)]
|
|
650
|
+
.map((match) => Number(match[1])));
|
|
651
|
+
}
|
|
652
|
+
function normalizePages(values) {
|
|
653
|
+
return [...new Set(values
|
|
654
|
+
.map((value) => Number(value))
|
|
655
|
+
.filter((value) => Number.isInteger(value) && value > 0))].sort((left, right) => left - right);
|
|
656
|
+
}
|
|
657
|
+
function isMissingValue(valueText, numericValue) {
|
|
658
|
+
if (numericValue != null && Number.isFinite(numericValue)) {
|
|
659
|
+
return false;
|
|
660
|
+
}
|
|
661
|
+
const value = String(valueText ?? '').trim();
|
|
662
|
+
return !value || /^(?:-|--|—|n\/?a|nil|none|not\s+(?:reported|extracted|encountered|available)|unavailable|missing)$/i.test(value);
|
|
663
|
+
}
|
|
664
|
+
function normalizeConfidence(value) {
|
|
665
|
+
return Math.max(0, Math.min(100, Math.round(Number.isFinite(value) ? value : 0)));
|
|
666
|
+
}
|
|
667
|
+
function roundRatio(value) {
|
|
668
|
+
return Math.round(Math.max(0, Math.min(1, value)) * 1000) / 1000;
|
|
669
|
+
}
|
|
670
|
+
function uniqueNonEmpty(values) {
|
|
671
|
+
return [...new Set(values
|
|
672
|
+
.map((value) => compactText(value, 500))
|
|
673
|
+
.filter((value) => value.length > 0))];
|
|
674
|
+
}
|
|
675
|
+
function observationId(type, index, label) {
|
|
676
|
+
const slug = label
|
|
677
|
+
.toLowerCase()
|
|
678
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
679
|
+
.replace(/^-+|-+$/g, '')
|
|
680
|
+
.slice(0, 28) || 'item';
|
|
681
|
+
return `${type}-${index + 1}-${slug}`;
|
|
682
|
+
}
|
|
683
|
+
function compactText(value, maxLength) {
|
|
684
|
+
const normalized = String(value ?? '').replace(/\s+/g, ' ').trim();
|
|
685
|
+
return normalized.length > maxLength
|
|
686
|
+
? `${normalized.slice(0, Math.max(0, maxLength - 3)).trimEnd()}...`
|
|
687
|
+
: normalized;
|
|
688
|
+
}
|
|
689
|
+
function collectEvidenceText(result) {
|
|
690
|
+
return [
|
|
691
|
+
result.title,
|
|
692
|
+
result.summary,
|
|
693
|
+
...result.materials.flatMap((material) => [material.description, material.uscsSymbol, material.lithology]),
|
|
694
|
+
...result.parameters.flatMap((parameter) => [parameter.name, parameter.valueText, parameter.material, parameter.context]),
|
|
695
|
+
...result.classifications.flatMap((classification) => [classification.system, classification.value, classification.context]),
|
|
696
|
+
...result.risks,
|
|
697
|
+
...result.recommendations,
|
|
698
|
+
...(result.contentChunks ?? []).flatMap((chunk) => [...chunk.headingAncestry, chunk.text]),
|
|
699
|
+
...(result.synthesis
|
|
700
|
+
? [
|
|
701
|
+
...result.synthesis.takeaways,
|
|
702
|
+
...result.synthesis.groundModel,
|
|
703
|
+
...result.synthesis.keyParameters,
|
|
704
|
+
...result.synthesis.interpretation,
|
|
705
|
+
...result.synthesis.limitations,
|
|
706
|
+
]
|
|
707
|
+
: []),
|
|
708
|
+
].filter((value) => typeof value === 'string' && value.trim().length > 0).join('\n');
|
|
709
|
+
}
|
|
710
|
+
function inferBoreholeIds(text) {
|
|
711
|
+
const patterns = [
|
|
712
|
+
/\bB\.?\s*H\.?\s*(?:NO\.?)?\s*[:#-]?\s*0*(\d{1,3})\b/gi,
|
|
713
|
+
/\bBORE\s*HOLE\s*NO\.?\s*[:#-]?\s*0*(\d{1,3})\b/gi,
|
|
714
|
+
/\bBOREHOLE\s*NO\.?\s*[:#-]?\s*0*(\d{1,3})\b/gi,
|
|
715
|
+
];
|
|
716
|
+
return [...new Set(patterns.flatMap((pattern) => [...text.matchAll(pattern)]
|
|
717
|
+
.map((match) => `BH${match[1]}`)
|
|
718
|
+
.filter((id) => !/^BH0$/.test(id))))].sort((left, right) => left.localeCompare(right, undefined, { numeric: true }));
|
|
719
|
+
}
|
|
720
|
+
function inferMaxDepthMeters(text) {
|
|
721
|
+
const depths = [
|
|
722
|
+
...[...text.matchAll(/\b(\d{1,3}(?:\.\d+)?)\s*m\b/gi)].map((match) => Number(match[1])),
|
|
723
|
+
...[...text.matchAll(/\b(?:maximum\s+depth|termination|terminated)\b[^0-9]{0,60}(\d{1,3}(?:\.\d+)?)\b/gi)].map((match) => Number(match[1])),
|
|
724
|
+
].filter((depth) => Number.isFinite(depth) && depth > 0 && depth <= 120);
|
|
725
|
+
return depths.length > 0 ? Math.max(...depths) : null;
|
|
726
|
+
}
|
|
727
|
+
//# sourceMappingURL=document-evidence-packet.js.map
|