@geotechcli/core 0.4.44 → 0.4.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/agents/brain.d.ts.map +1 -1
  2. package/dist/agents/brain.js +9 -5
  3. package/dist/agents/brain.js.map +1 -1
  4. package/dist/agents/data-tools.js +44 -5
  5. package/dist/agents/data-tools.js.map +1 -1
  6. package/dist/agents/orchestrator.d.ts.map +1 -1
  7. package/dist/agents/orchestrator.js +25 -12
  8. package/dist/agents/orchestrator.js.map +1 -1
  9. package/dist/agents/provider-operating-contract.d.ts +20 -0
  10. package/dist/agents/provider-operating-contract.d.ts.map +1 -0
  11. package/dist/agents/provider-operating-contract.js +146 -0
  12. package/dist/agents/provider-operating-contract.js.map +1 -0
  13. package/dist/agents/safety.d.ts +1 -0
  14. package/dist/agents/safety.d.ts.map +1 -1
  15. package/dist/agents/safety.js +26 -0
  16. package/dist/agents/safety.js.map +1 -1
  17. package/dist/agents/swarm.d.ts.map +1 -1
  18. package/dist/agents/swarm.js +22 -11
  19. package/dist/agents/swarm.js.map +1 -1
  20. package/dist/index.d.ts +1 -0
  21. package/dist/index.d.ts.map +1 -1
  22. package/dist/index.js +1 -0
  23. package/dist/index.js.map +1 -1
  24. package/dist/ingest/document-evidence-packet.d.ts +814 -0
  25. package/dist/ingest/document-evidence-packet.d.ts.map +1 -0
  26. package/dist/ingest/document-evidence-packet.js +727 -0
  27. package/dist/ingest/document-evidence-packet.js.map +1 -0
  28. package/dist/ingest/geotech-document-benchmark.d.ts +15 -0
  29. package/dist/ingest/geotech-document-benchmark.d.ts.map +1 -1
  30. package/dist/ingest/geotech-document-benchmark.js +18 -0
  31. package/dist/ingest/geotech-document-benchmark.js.map +1 -1
  32. package/dist/ingest/geotech-document.d.ts +2 -0
  33. package/dist/ingest/geotech-document.d.ts.map +1 -1
  34. package/dist/ingest/geotech-document.js +12 -107
  35. package/dist/ingest/geotech-document.js.map +1 -1
  36. package/dist/ingest/index.d.ts +1 -0
  37. package/dist/ingest/index.d.ts.map +1 -1
  38. package/dist/ingest/index.js +1 -0
  39. package/dist/ingest/index.js.map +1 -1
  40. package/dist/ingest/job-worker.d.ts.map +1 -1
  41. package/dist/ingest/job-worker.js +4 -2
  42. package/dist/ingest/job-worker.js.map +1 -1
  43. package/dist/llm/capabilities.d.ts.map +1 -1
  44. package/dist/llm/capabilities.js +42 -0
  45. package/dist/llm/capabilities.js.map +1 -1
  46. package/dist/meta/metadata.json +1 -1
  47. package/package.json +1 -1
@@ -0,0 +1,727 @@
1
+ import { z } from 'zod';
2
+ export const DOCUMENT_EVIDENCE_PACKET_SCHEMA_VERSION = 2;
3
+ export const DocumentEvidenceMethodSchema = z.enum([
4
+ 'native-pdf-text',
5
+ 'layout-ocr',
6
+ 'visual-reasoning',
7
+ 'hybrid',
8
+ 'none',
9
+ ]);
10
+ export const DocumentEvidenceReviewStatusSchema = z.enum([
11
+ 'verified',
12
+ 'needs_review',
13
+ 'missing',
14
+ 'uncertain',
15
+ ]);
16
+ export const DocumentEvidenceObservationTypeSchema = z.enum([
17
+ 'material',
18
+ 'classification',
19
+ 'parameter',
20
+ ]);
21
+ const SourceSchema = z.object({
22
+ fileName: z.string().optional(),
23
+ filePath: z.string().optional(),
24
+ inputKind: z.enum(['image', 'pdf']),
25
+ pageRange: z.tuple([z.number().int().positive(), z.number().int().positive()]).optional(),
26
+ totalPages: z.number().int().nonnegative(),
27
+ successfulPages: z.number().int().nonnegative(),
28
+ failedPages: z.number().int().nonnegative(),
29
+ });
30
+ const DocumentSchema = z.object({
31
+ title: z.string().nullable(),
32
+ documentClass: z.string().nullable(),
33
+ parseStatus: z.enum(['parsed', 'partial', 'failed']),
34
+ confidence: z.number().min(0).max(100),
35
+ reviewRequired: z.boolean(),
36
+ canAutoProceed: z.boolean(),
37
+ });
38
+ const PageSchema = z.object({
39
+ pageNumber: z.number().int().positive(),
40
+ classification: z.string().nullable(),
41
+ parseStatus: z.enum(['parsed', 'partial', 'failed']),
42
+ confidence: z.number().min(0).max(100),
43
+ method: DocumentEvidenceMethodSchema,
44
+ rawSource: z.string(),
45
+ sourceCategory: z.enum(['native-text', 'layout-ocr', 'vision', 'none']),
46
+ cacheStatus: z.enum(['hit', 'miss', 'stored', 'skipped', 'unavailable']),
47
+ cacheEntryId: z.string().optional(),
48
+ counts: z.object({
49
+ materials: z.number().int().nonnegative(),
50
+ classifications: z.number().int().nonnegative(),
51
+ parameters: z.number().int().nonnegative(),
52
+ }),
53
+ warnings: z.array(z.string()),
54
+ });
55
+ const ObservationSchema = z.object({
56
+ id: z.string(),
57
+ type: DocumentEvidenceObservationTypeSchema,
58
+ label: z.string(),
59
+ value: z.union([z.string(), z.number(), z.boolean(), z.null()]),
60
+ numericValue: z.number().nullable().optional(),
61
+ unit: z.string().nullable().optional(),
62
+ material: z.string().nullable().optional(),
63
+ context: z.string().nullable().optional(),
64
+ sourcePages: z.array(z.number().int().positive()),
65
+ method: DocumentEvidenceMethodSchema,
66
+ confidence: z.number().min(0).max(100),
67
+ reviewStatus: DocumentEvidenceReviewStatusSchema,
68
+ warnings: z.array(z.string()),
69
+ });
70
+ const ContentChunkSchema = z.object({
71
+ chunkId: z.string(),
72
+ pageRange: z.tuple([z.number().int().positive(), z.number().int().positive()]),
73
+ headingAncestry: z.array(z.string()),
74
+ scope: z.enum(['page', 'table', 'figure', 'section']),
75
+ sectionType: z.string().optional(),
76
+ significance: z.number().optional(),
77
+ sourcePages: z.array(z.number().int().positive()),
78
+ text: z.string(),
79
+ });
80
+ const ReviewFindingSchema = z.object({
81
+ code: z.string(),
82
+ severity: z.enum(['advisory', 'review', 'blocking']),
83
+ scope: z.enum(['document', 'page', 'material']),
84
+ message: z.string(),
85
+ pageNumber: z.number().int().positive().optional(),
86
+ materialDescription: z.string().optional(),
87
+ });
88
+ const SynthesisSchema = z.object({
89
+ takeaways: z.array(z.string()),
90
+ groundModel: z.array(z.string()),
91
+ keyParameters: z.array(z.string()),
92
+ interpretation: z.array(z.string()),
93
+ limitations: z.array(z.string()),
94
+ sourcePages: z.array(z.number().int().positive()),
95
+ latencyMs: z.number().nonnegative().optional(),
96
+ }).nullable();
97
+ const EngineeringSignalsSchema = z.object({
98
+ risks: z.array(z.string()),
99
+ recommendations: z.array(z.string()),
100
+ });
101
+ export const DocumentEvidencePacketSchema = z.object({
102
+ kind: z.literal('document-evidence-packet'),
103
+ schemaVersion: z.literal(DOCUMENT_EVIDENCE_PACKET_SCHEMA_VERSION),
104
+ generatedAt: z.string(),
105
+ providerContract: z.object({
106
+ providerNeutral: z.literal(true),
107
+ purpose: z.literal('byok-document-understanding'),
108
+ normalizedMethods: z.array(DocumentEvidenceMethodSchema),
109
+ reviewGates: z.array(z.string()),
110
+ }),
111
+ source: SourceSchema,
112
+ document: DocumentSchema,
113
+ pages: z.array(PageSchema),
114
+ observations: z.object({
115
+ materials: z.array(ObservationSchema),
116
+ classifications: z.array(ObservationSchema),
117
+ parameters: z.array(ObservationSchema),
118
+ }),
119
+ contentChunks: z.array(ContentChunkSchema),
120
+ engineeringSignals: EngineeringSignalsSchema,
121
+ synthesis: SynthesisSchema,
122
+ review: z.object({
123
+ warnings: z.array(z.string()),
124
+ reviewReasons: z.array(z.string()),
125
+ findings: z.array(ReviewFindingSchema),
126
+ }),
127
+ traceability: z.object({
128
+ sourcePages: z.array(z.number().int().positive()),
129
+ pagesWithEvidence: z.array(z.number().int().positive()),
130
+ directVisualPages: z.array(z.number().int().positive()),
131
+ layoutOcrPages: z.array(z.number().int().positive()),
132
+ nativeTextPages: z.array(z.number().int().positive()),
133
+ boreholeIds: z.array(z.string()),
134
+ maxDepthMeters: z.number().nullable(),
135
+ parametersWithSourcePage: z.number().int().nonnegative(),
136
+ parametersWithoutSourcePage: z.number().int().nonnegative(),
137
+ parameterTraceabilityRate: z.number().min(0).max(1),
138
+ methodCounts: z.record(z.string(), z.number().int().nonnegative()),
139
+ }),
140
+ });
141
+ export function buildDocumentEvidencePacket(result) {
142
+ const pages = result.pageAudits.map((audit) => buildEvidencePage(audit));
143
+ const traceability = buildTraceability(result, pages);
144
+ const packet = {
145
+ kind: 'document-evidence-packet',
146
+ schemaVersion: DOCUMENT_EVIDENCE_PACKET_SCHEMA_VERSION,
147
+ generatedAt: result.generatedAt,
148
+ providerContract: {
149
+ providerNeutral: true,
150
+ purpose: 'byok-document-understanding',
151
+ normalizedMethods: ['native-pdf-text', 'layout-ocr', 'visual-reasoning', 'hybrid', 'none'],
152
+ reviewGates: buildReviewGates(result),
153
+ },
154
+ source: {
155
+ ...(result.source.fileName ? { fileName: result.source.fileName } : {}),
156
+ ...(result.source.filePath ? { filePath: result.source.filePath } : {}),
157
+ inputKind: result.source.inputKind,
158
+ ...(result.source.pageRange ? { pageRange: result.source.pageRange } : {}),
159
+ totalPages: result.source.totalPages,
160
+ successfulPages: result.source.successfulPages,
161
+ failedPages: result.source.failedPages,
162
+ },
163
+ document: {
164
+ title: result.title,
165
+ documentClass: result.documentClass,
166
+ parseStatus: result.parseStatus,
167
+ confidence: normalizeConfidence(result.confidence),
168
+ reviewRequired: result.reviewRequired,
169
+ canAutoProceed: result.canAutoProceed,
170
+ },
171
+ pages,
172
+ observations: {
173
+ materials: result.materials.map((material, index) => {
174
+ const sourcePages = sourcePagesFromObservation(material.sourcePages, material.description);
175
+ return {
176
+ id: observationId('material', index, material.description),
177
+ type: 'material',
178
+ label: material.kind,
179
+ value: material.description,
180
+ material: material.lithology,
181
+ context: material.uscsSymbol ? `USCS ${material.uscsSymbol}` : null,
182
+ sourcePages,
183
+ method: methodForSourcePages(sourcePages, pages),
184
+ confidence: confidenceForSourcePages(sourcePages, pages, result.confidence),
185
+ reviewStatus: sourcePages.length > 0 ? 'verified' : 'needs_review',
186
+ warnings: [],
187
+ };
188
+ }),
189
+ classifications: result.classifications.map((classification, index) => {
190
+ const sourcePages = sourcePagesFromObservation(classification.sourcePages, classification.context);
191
+ return {
192
+ id: observationId('classification', index, `${classification.system}:${classification.value}`),
193
+ type: 'classification',
194
+ label: classification.system,
195
+ value: classification.value,
196
+ context: classification.context,
197
+ sourcePages,
198
+ method: methodForSourcePages(sourcePages, pages),
199
+ confidence: confidenceForSourcePages(sourcePages, pages, result.confidence),
200
+ reviewStatus: sourcePages.length > 0 ? 'verified' : 'needs_review',
201
+ warnings: [],
202
+ };
203
+ }),
204
+ parameters: result.parameters.map((parameter, index) => {
205
+ const sourcePages = sourcePagesFromObservation(parameter.sourcePages, parameter.context);
206
+ const missing = isMissingValue(parameter.valueText, parameter.numericValue);
207
+ return {
208
+ id: observationId('parameter', index, `${parameter.name}:${parameter.valueText}`),
209
+ type: 'parameter',
210
+ label: parameter.name,
211
+ value: parameter.valueText,
212
+ numericValue: parameter.numericValue,
213
+ unit: parameter.unit,
214
+ material: parameter.material,
215
+ context: parameter.context,
216
+ sourcePages,
217
+ method: methodForSourcePages(sourcePages, pages),
218
+ confidence: missing ? 0 : confidenceForSourcePages(sourcePages, pages, result.confidence),
219
+ reviewStatus: missing
220
+ ? 'missing'
221
+ : sourcePages.length > 0
222
+ ? 'verified'
223
+ : 'needs_review',
224
+ warnings: missing ? ['Value is missing or not extracted.'] : [],
225
+ };
226
+ }),
227
+ },
228
+ contentChunks: (result.contentChunks ?? []).map(compactContentChunk),
229
+ engineeringSignals: {
230
+ risks: uniqueNonEmpty(result.risks).slice(0, 24),
231
+ recommendations: uniqueNonEmpty(result.recommendations).slice(0, 24),
232
+ },
233
+ synthesis: result.synthesis
234
+ ? {
235
+ takeaways: result.synthesis.takeaways,
236
+ groundModel: result.synthesis.groundModel,
237
+ keyParameters: result.synthesis.keyParameters,
238
+ interpretation: result.synthesis.interpretation,
239
+ limitations: result.synthesis.limitations,
240
+ sourcePages: normalizePages(result.synthesis.sourcePages),
241
+ ...(Number.isFinite(result.synthesis.latencyMs) ? { latencyMs: Math.max(0, Math.round(result.synthesis.latencyMs)) } : {}),
242
+ }
243
+ : null,
244
+ review: {
245
+ warnings: result.warnings,
246
+ reviewReasons: result.reviewReasons,
247
+ findings: result.reviewFindings.map(compactReviewFinding),
248
+ },
249
+ traceability,
250
+ };
251
+ return DocumentEvidencePacketSchema.parse(packet);
252
+ }
253
+ export function attachDocumentEvidencePacket(result) {
254
+ return {
255
+ ...result,
256
+ evidencePacket: buildDocumentEvidencePacket(result),
257
+ };
258
+ }
259
+ export function documentEvidencePacketHasEngineeringSignal(packet) {
260
+ return packet.observations.materials.length > 0
261
+ || packet.observations.classifications.length > 0
262
+ || packet.observations.parameters.length > 0
263
+ || packet.engineeringSignals.risks.length > 0
264
+ || packet.engineeringSignals.recommendations.length > 0
265
+ || packet.contentChunks.some((chunk) => chunk.text.trim().length > 0
266
+ && chunk.sectionType !== 'administrative'
267
+ && chunk.sectionType !== 'visual-appendix');
268
+ }
269
+ export function compileDocumentEvidenceSynthesisPrompt(packet, options = {}) {
270
+ const maxEvidenceChars = options.maxEvidenceChars ?? 9000;
271
+ const outlineChunks = [...packet.contentChunks]
272
+ .filter((chunk) => chunk.text.trim().length > 0 && chunk.sectionType !== 'administrative')
273
+ .sort((left, right) => left.pageRange[0] - right.pageRange[0]
274
+ || left.pageRange[1] - right.pageRange[1])
275
+ .slice(0, options.maxOutlineChunks ?? 28)
276
+ .map((chunk) => formatContentChunkForSynthesis(chunk, 240));
277
+ const highSignalChunks = [...packet.contentChunks]
278
+ .filter((chunk) => chunk.text.trim().length > 0 && chunk.sectionType !== 'administrative')
279
+ .sort((left, right) => (right.significance ?? 0) - (left.significance ?? 0))
280
+ .slice(0, options.maxHighSignalChunks ?? 18)
281
+ .map((chunk) => formatContentChunkForSynthesis(chunk, 360));
282
+ const parameters = packet.observations.parameters
283
+ .slice(0, options.maxParameters ?? 48)
284
+ .map((observation) => formatObservationForSynthesis(observation));
285
+ const materials = packet.observations.materials
286
+ .slice(0, options.maxMaterials ?? 36)
287
+ .map((observation) => formatObservationForSynthesis(observation));
288
+ const classifications = packet.observations.classifications
289
+ .slice(0, options.maxClassifications ?? 28)
290
+ .map((observation) => formatObservationForSynthesis(observation));
291
+ const boreholeLines = buildBoreholeContinuityLines(packet);
292
+ const pageLines = packet.pages
293
+ .slice()
294
+ .sort((left, right) => left.pageNumber - right.pageNumber)
295
+ .map((page) => `Page ${page.pageNumber}: ${page.parseStatus}; method=${page.method}; source=${page.rawSource}; class=${page.classification ?? 'unknown'}; confidence=${page.confidence}%; counts m/c/p=${page.counts.materials}/${page.counts.classifications}/${page.counts.parameters}${page.warnings.length > 0 ? `; warnings=${page.warnings.slice(0, 3).join(' | ')}` : ''}`);
296
+ const missingParameters = packet.observations.parameters
297
+ .filter((observation) => observation.reviewStatus === 'missing')
298
+ .map((observation) => formatObservationForSynthesis(observation));
299
+ const reviewParameters = packet.observations.parameters
300
+ .filter((observation) => observation.reviewStatus === 'needs_review' || observation.reviewStatus === 'uncertain')
301
+ .map((observation) => formatObservationForSynthesis(observation));
302
+ const evidence = compactText([
303
+ 'Provider-neutral DocumentEvidencePacket synthesis evidence contract.',
304
+ `Packet schema: v${packet.schemaVersion}; providerNeutral=${packet.providerContract.providerNeutral}; purpose=${packet.providerContract.purpose}.`,
305
+ `Source: ${packet.source.fileName ?? packet.source.filePath ?? packet.source.inputKind}; pages ${packet.source.successfulPages}/${packet.source.totalPages}; evidence pages ${pageList(packet.traceability.pagesWithEvidence)}.`,
306
+ `Document: ${packet.document.title ?? 'untitled'}; class=${packet.document.documentClass ?? 'unknown'}; status=${packet.document.parseStatus}; confidence=${packet.document.confidence}%; reviewRequired=${packet.document.reviewRequired ? 'yes' : 'no'}; canAutoProceed=${packet.document.canAutoProceed ? 'yes' : 'no'}.`,
307
+ `Traceability: source pages ${pageList(packet.traceability.sourcePages)}; native=${pageList(packet.traceability.nativeTextPages)}; layout/OCR=${pageList(packet.traceability.layoutOcrPages)}; visual=${pageList(packet.traceability.directVisualPages)}; parameter source-page rate=${Math.round(packet.traceability.parameterTraceabilityRate * 100)}%.`,
308
+ `Review gates: ${packet.providerContract.reviewGates.join('; ') || 'none'}.`,
309
+ `Borehole summary: ${packet.traceability.boreholeIds.join(', ') || 'not detected'}; max depth=${packet.traceability.maxDepthMeters != null ? `${packet.traceability.maxDepthMeters} m` : 'not extracted'}.`,
310
+ '',
311
+ 'Ordered whole-report outline by source page:',
312
+ outlineChunks.join('\n') || 'No ordered outline retained.',
313
+ '',
314
+ 'Borehole continuity evidence:',
315
+ boreholeLines.join('\n') || 'No borehole-specific continuity evidence detected.',
316
+ '',
317
+ 'Engineering parameters:',
318
+ parameters.join('\n') || 'None extracted.',
319
+ '',
320
+ 'Material observations:',
321
+ materials.join('\n') || 'None extracted.',
322
+ '',
323
+ 'Classifications:',
324
+ classifications.join('\n') || 'None extracted.',
325
+ '',
326
+ 'Risks extracted from page evidence:',
327
+ packet.engineeringSignals.risks.slice(0, 16).map((risk) => `- ${compactText(risk, 220)}`).join('\n') || 'None extracted.',
328
+ '',
329
+ 'Recommendations extracted from page evidence:',
330
+ packet.engineeringSignals.recommendations.slice(0, 16).map((recommendation) => `- ${compactText(recommendation, 220)}`).join('\n') || 'None extracted.',
331
+ '',
332
+ 'Missing or review-gated parameters:',
333
+ [...missingParameters.slice(0, 16), ...reviewParameters.slice(0, 16)].join('\n') || 'None flagged.',
334
+ '',
335
+ 'High-signal source chunks:',
336
+ highSignalChunks.join('\n') || 'No high-signal chunks retained.',
337
+ '',
338
+ 'Page-level method and audit summary:',
339
+ pageLines.join('\n') || 'No page audit retained.',
340
+ '',
341
+ 'Warnings and review findings:',
342
+ [
343
+ ...packet.review.warnings.slice(0, 12).map((warning) => `Warning: ${compactText(warning, 220)}`),
344
+ ...packet.review.findings.slice(0, 12).map((finding) => `${finding.severity.toUpperCase()} ${finding.code}${finding.pageNumber ? ` page ${finding.pageNumber}` : ''}: ${compactText(finding.message, 220)}`),
345
+ ].join('\n') || 'None.',
346
+ ].join('\n'), maxEvidenceChars);
347
+ const prompt = `Create a concise engineering synthesis from the provider-neutral geotechnical document evidence packet below. Respond with ONLY a JSON object:
348
+ {
349
+ "takeaways": ["<report-level engineering takeaway with source-page wording where possible>"],
350
+ "groundModel": ["<depth-bounded soil/rock/groundwater model statement with source pages if known>"],
351
+ "keyParameters": ["<parameter, value, unit, material/context, source page if known>"],
352
+ "interpretation": ["<construction/design interpretation supported by extracted evidence>"],
353
+ "limitations": ["<uncertainty, missing evidence, OCR/layout/visual limitations>"],
354
+ "sourcePages": [<page numbers that support the synthesis>]
355
+ }
356
+
357
+ Instructions:
358
+ - Read the ordered whole-report outline first before summarizing individual extraction rows.
359
+ - Preserve borehole-log continuity when a borehole spans multiple pages; group depth intervals, lithology, SPT/RQD/recovery, groundwater, and source pages by borehole where evidence supports it.
360
+ - Cite source pages from the packet in every specific engineering claim where possible.
361
+ - Do not invent values, strata boundaries, groundwater, SPT, RQD, strength, density, or design parameters.
362
+ - Do not let synthesis raise extraction confidence. Confidence comes only from source evidence coverage and corroboration.
363
+ - Treat direct-visual-only, missing, needs-review, and uncertain values as review-gated evidence.
364
+ - Prefer explicit soil and rock mechanics parameters, groundwater observations, classification systems, and foundation/geohazard/construction implications.
365
+
366
+ Evidence:
367
+ ${evidence}`;
368
+ return {
369
+ systemPrompt: 'You are a senior geotechnical engineer synthesizing provider-neutral document evidence into a review brief. Use cautious, evidence-bound language, cite source pages, preserve missing-data limitations, and respond with JSON only.',
370
+ prompt,
371
+ evidence,
372
+ hasEngineeringSignal: documentEvidencePacketHasEngineeringSignal(packet),
373
+ sourcePages: packet.traceability.sourcePages,
374
+ reviewGates: packet.providerContract.reviewGates,
375
+ schemaVersion: packet.schemaVersion,
376
+ };
377
+ }
378
+ export function summarizeDocumentEvidencePacketForAgent(packet, options = {}) {
379
+ const maxObservationsPerGroup = options.maxObservationsPerGroup ?? 8;
380
+ const maxReviewGates = options.maxReviewGates ?? 8;
381
+ const maxContentChars = options.maxContentChars ?? 2400;
382
+ const pageOutcomes = countPageOutcomes(packet);
383
+ const methods = Object.entries(packet.traceability.methodCounts)
384
+ .filter(([, count]) => count > 0)
385
+ .map(([method, count]) => `${method}:${count}`)
386
+ .join(', ') || 'none';
387
+ const reviewGates = packet.providerContract.reviewGates.slice(0, maxReviewGates);
388
+ const missingParameters = packet.observations.parameters
389
+ .filter((parameter) => parameter.reviewStatus === 'missing')
390
+ .slice(0, maxObservationsPerGroup)
391
+ .map((parameter) => parameter.label);
392
+ const reviewParameters = packet.observations.parameters
393
+ .filter((parameter) => parameter.reviewStatus === 'needs_review' || parameter.reviewStatus === 'uncertain')
394
+ .slice(0, maxObservationsPerGroup)
395
+ .map((parameter) => `${parameter.label}${parameter.context ? ` (${parameter.context})` : ''}`);
396
+ const verifiedParameters = packet.observations.parameters
397
+ .filter((parameter) => parameter.reviewStatus === 'verified')
398
+ .slice(0, maxObservationsPerGroup)
399
+ .map((parameter) => formatObservationForAgent(parameter));
400
+ const materials = packet.observations.materials
401
+ .slice(0, maxObservationsPerGroup)
402
+ .map((material) => formatObservationForAgent(material));
403
+ const classifications = packet.observations.classifications
404
+ .slice(0, maxObservationsPerGroup)
405
+ .map((classification) => formatObservationForAgent(classification));
406
+ const synthesis = [
407
+ ...(packet.synthesis?.takeaways.slice(0, 4) ?? []),
408
+ ...(packet.synthesis?.groundModel.slice(0, 4) ?? []),
409
+ ...(packet.synthesis?.interpretation.slice(0, 3) ?? []),
410
+ ...(packet.synthesis?.limitations.slice(0, 3) ?? []),
411
+ ];
412
+ return compactText([
413
+ `DocumentEvidencePacket v${packet.schemaVersion} provider-neutral agent context.`,
414
+ `Source: ${packet.source.fileName ?? packet.source.filePath ?? packet.source.inputKind}; pages ${packet.source.successfulPages}/${packet.source.totalPages}; evidence pages ${pageList(packet.traceability.pagesWithEvidence)}.`,
415
+ `Document: ${packet.document.title ?? 'untitled'}; class ${packet.document.documentClass ?? 'unknown'}; status ${packet.document.parseStatus}; confidence ${packet.document.confidence}%; reviewRequired ${packet.document.reviewRequired ? 'yes' : 'no'}; canAutoProceed ${packet.document.canAutoProceed ? 'yes' : 'no'}.`,
416
+ `Methods: ${methods}; native pages ${pageList(packet.traceability.nativeTextPages)}; layout/OCR pages ${pageList(packet.traceability.layoutOcrPages)}; direct visual pages ${pageList(packet.traceability.directVisualPages)}.`,
417
+ `Page outcomes: parsed ${pageOutcomes.parsed}, partial ${pageOutcomes.partial}, failed ${pageOutcomes.failed}.`,
418
+ `Traceability: source pages ${pageList(packet.traceability.sourcePages)}; parameter source-page rate ${Math.round(packet.traceability.parameterTraceabilityRate * 100)}%; with source ${packet.traceability.parametersWithSourcePage}, without source ${packet.traceability.parametersWithoutSourcePage}.`,
419
+ `Boreholes: ${packet.traceability.boreholeIds.join(', ') || 'not detected'}; max depth ${packet.traceability.maxDepthMeters != null ? `${packet.traceability.maxDepthMeters} m` : 'not extracted'}.`,
420
+ reviewGates.length > 0 ? `Review gates: ${reviewGates.join('; ')}.` : 'Review gates: none.',
421
+ missingParameters.length > 0 ? `Missing parameters: ${missingParameters.join('; ')}.` : 'Missing parameters: none flagged in packet.',
422
+ reviewParameters.length > 0 ? `Parameters needing review: ${reviewParameters.join('; ')}.` : '',
423
+ verifiedParameters.length > 0 ? `Verified parameters: ${verifiedParameters.join('; ')}.` : '',
424
+ materials.length > 0 ? `Materials: ${materials.join('; ')}.` : '',
425
+ classifications.length > 0 ? `Classifications: ${classifications.join('; ')}.` : '',
426
+ synthesis.length > 0 ? `Synthesis evidence: ${synthesis.map((item) => compactText(item, 180)).join(' | ')}.` : '',
427
+ packet.engineeringSignals.risks.length > 0 ? `Risks: ${packet.engineeringSignals.risks.slice(0, 5).map((item) => compactText(item, 140)).join('; ')}.` : '',
428
+ packet.engineeringSignals.recommendations.length > 0 ? `Recommendations: ${packet.engineeringSignals.recommendations.slice(0, 5).map((item) => compactText(item, 140)).join('; ')}.` : '',
429
+ packet.review.warnings.length > 0 ? `Warnings: ${packet.review.warnings.slice(0, 6).join('; ')}.` : '',
430
+ 'Agent rule: cite source pages from this packet; do not run deterministic calculations from missing, direct-visual-only, or needs-review evidence without explicit review/approval.',
431
+ ].filter(Boolean).join('\n'), maxContentChars);
432
+ }
433
+ export function summarizeGeotechDocumentResultForAgent(result, options = {}) {
434
+ return summarizeDocumentEvidencePacketForAgent(result.evidencePacket ?? buildDocumentEvidencePacket(result), options);
435
+ }
436
+ function buildEvidencePage(audit) {
437
+ const cache = audit.evidenceCache;
438
+ return {
439
+ pageNumber: audit.pageNumber,
440
+ classification: audit.classification,
441
+ parseStatus: audit.parseStatus,
442
+ confidence: normalizeConfidence(audit.confidence),
443
+ method: methodFromTextHintSource(audit.textHintSource),
444
+ rawSource: audit.textHintSource,
445
+ sourceCategory: sourceCategoryFromTextHintSource(audit.textHintSource),
446
+ cacheStatus: cache?.status ?? 'unavailable',
447
+ ...(cache?.entryId ? { cacheEntryId: cache.entryId } : {}),
448
+ counts: {
449
+ materials: audit.materialCount,
450
+ classifications: audit.classificationCount,
451
+ parameters: audit.parameterCount,
452
+ },
453
+ warnings: audit.warnings,
454
+ };
455
+ }
456
+ function countPageOutcomes(packet) {
457
+ return packet.pages.reduce((counts, page) => {
458
+ counts[page.parseStatus] += 1;
459
+ return counts;
460
+ }, { parsed: 0, partial: 0, failed: 0 });
461
+ }
462
+ function pageList(pages) {
463
+ return pages.length > 0 ? pages.join(', ') : 'none';
464
+ }
465
+ function formatObservationForAgent(observation) {
466
+ const value = observation.unit ? `${String(observation.value)} ${observation.unit}` : String(observation.value);
467
+ const pageSuffix = observation.sourcePages.length > 0 ? ` p${observation.sourcePages.join(',')}` : ' no-source-page';
468
+ const confidenceSuffix = observation.confidence > 0 ? ` ${observation.confidence}%` : '';
469
+ return `${observation.label}=${value}${observation.material ? ` ${observation.material}` : ''}${pageSuffix}${confidenceSuffix}`;
470
+ }
471
+ function formatObservationForSynthesis(observation) {
472
+ const value = observation.unit && String(observation.value).trim() && String(observation.value).trim() !== observation.unit
473
+ ? `${String(observation.value)} ${observation.unit}`
474
+ : String(observation.value);
475
+ return [
476
+ observation.label,
477
+ `value=${value}`,
478
+ observation.material ? `material=${observation.material}` : null,
479
+ observation.context ? `context=${compactText(observation.context, 180)}` : null,
480
+ `pages=${pageList(observation.sourcePages)}`,
481
+ `method=${observation.method}`,
482
+ `confidence=${observation.confidence}%`,
483
+ `review=${observation.reviewStatus}`,
484
+ observation.warnings.length > 0 ? `warnings=${observation.warnings.slice(0, 3).join(' | ')}` : null,
485
+ ].filter(Boolean).join(' | ');
486
+ }
487
+ function formatContentChunkForSynthesis(chunk, maxTextLength) {
488
+ return [
489
+ `Pages ${chunk.pageRange[0]}-${chunk.pageRange[1]}`,
490
+ chunk.sectionType ?? 'general',
491
+ chunk.scope,
492
+ chunk.headingAncestry.length > 0 ? compactText(chunk.headingAncestry.join(' > '), 110) : 'untitled',
493
+ `sources=${pageList(chunk.sourcePages)}`,
494
+ compactText(chunk.text, maxTextLength),
495
+ ].join(' | ');
496
+ }
497
+ function buildBoreholeContinuityLines(packet) {
498
+ const observations = [
499
+ ...packet.observations.parameters,
500
+ ...packet.observations.materials,
501
+ ...packet.observations.classifications,
502
+ ];
503
+ return packet.traceability.boreholeIds.slice(0, 20).map((id) => {
504
+ const pattern = new RegExp(`\\b${id.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').replace(/^BH/i, 'B\\.?H\\.?\\s*')}(?:\\b|\\s|[-#:])`, 'i');
505
+ const matchingObservations = observations
506
+ .filter((observation) => pattern.test([
507
+ observation.label,
508
+ String(observation.value),
509
+ observation.material,
510
+ observation.context,
511
+ ].filter(Boolean).join(' ')))
512
+ .slice(0, 12)
513
+ .map((observation) => `${observation.label}=${String(observation.value)} pages=${pageList(observation.sourcePages)} review=${observation.reviewStatus}`);
514
+ const matchingChunks = packet.contentChunks
515
+ .filter((chunk) => pattern.test(`${chunk.headingAncestry.join(' ')} ${chunk.text}`))
516
+ .slice(0, 4)
517
+ .map((chunk) => `pages ${chunk.pageRange[0]}-${chunk.pageRange[1]}: ${compactText(chunk.text, 180)}`);
518
+ return `${id}: ${[...matchingObservations, ...matchingChunks].join('; ') || 'borehole identifier detected, but no compact interval evidence retained.'}`;
519
+ });
520
+ }
521
+ function buildTraceability(result, pages) {
522
+ const parameterSourcePages = result.parameters.map((parameter) => sourcePagesFromObservation(parameter.sourcePages, parameter.context));
523
+ const parametersWithSourcePage = parameterSourcePages.filter((pagesForParameter) => pagesForParameter.length > 0).length;
524
+ const sourcePages = normalizePages([
525
+ ...parameterSourcePages.flat(),
526
+ ...result.materials.flatMap((material) => sourcePagesFromObservation(material.sourcePages, material.description)),
527
+ ...result.classifications.flatMap((classification) => sourcePagesFromObservation(classification.sourcePages, classification.context)),
528
+ ...(result.synthesis?.sourcePages ?? []),
529
+ ...(result.contentChunks ?? []).flatMap((chunk) => chunk.sourcePages),
530
+ ]);
531
+ const pagesWithEvidence = pages
532
+ .filter((page) => page.counts.materials + page.counts.classifications + page.counts.parameters > 0)
533
+ .map((page) => page.pageNumber);
534
+ const methodCounts = countMethods(pages);
535
+ const evidenceText = collectEvidenceText(result);
536
+ return {
537
+ sourcePages,
538
+ pagesWithEvidence: normalizePages(pagesWithEvidence),
539
+ directVisualPages: normalizePages(pages.filter((page) => page.rawSource === 'vision-visual').map((page) => page.pageNumber)),
540
+ layoutOcrPages: normalizePages(pages.filter((page) => page.sourceCategory === 'layout-ocr').map((page) => page.pageNumber)),
541
+ nativeTextPages: normalizePages(pages.filter((page) => page.sourceCategory === 'native-text').map((page) => page.pageNumber)),
542
+ boreholeIds: inferBoreholeIds(evidenceText),
543
+ maxDepthMeters: inferMaxDepthMeters(evidenceText),
544
+ parametersWithSourcePage,
545
+ parametersWithoutSourcePage: Math.max(0, result.parameters.length - parametersWithSourcePage),
546
+ parameterTraceabilityRate: result.parameters.length > 0
547
+ ? roundRatio(parametersWithSourcePage / result.parameters.length)
548
+ : 0,
549
+ methodCounts,
550
+ };
551
+ }
552
+ function compactContentChunk(chunk) {
553
+ return {
554
+ chunkId: chunk.chunkId,
555
+ pageRange: chunk.pageRange,
556
+ headingAncestry: chunk.headingAncestry,
557
+ scope: chunk.scope,
558
+ ...(chunk.sectionType ? { sectionType: chunk.sectionType } : {}),
559
+ ...(Number.isFinite(chunk.significance) ? { significance: chunk.significance } : {}),
560
+ sourcePages: normalizePages(chunk.sourcePages),
561
+ text: compactText(chunk.text, 1200),
562
+ };
563
+ }
564
+ function compactReviewFinding(finding) {
565
+ return {
566
+ code: finding.code,
567
+ severity: finding.severity,
568
+ scope: finding.scope,
569
+ message: finding.message,
570
+ ...(finding.pageNumber != null ? { pageNumber: finding.pageNumber } : {}),
571
+ ...(finding.materialDescription ? { materialDescription: finding.materialDescription } : {}),
572
+ };
573
+ }
574
+ function methodFromTextHintSource(source) {
575
+ if (source === 'native-text' || source === 'pdfjs-text') {
576
+ return 'native-pdf-text';
577
+ }
578
+ if (source === 'glm-ocr' || source === 'local-ocr') {
579
+ return 'layout-ocr';
580
+ }
581
+ if (source === 'vision-ocr' || source === 'vision-visual') {
582
+ return 'visual-reasoning';
583
+ }
584
+ return 'none';
585
+ }
586
+ function sourceCategoryFromTextHintSource(source) {
587
+ if (source === 'native-text' || source === 'pdfjs-text') {
588
+ return 'native-text';
589
+ }
590
+ if (source === 'glm-ocr' || source === 'local-ocr') {
591
+ return 'layout-ocr';
592
+ }
593
+ if (source === 'vision-ocr' || source === 'vision-visual') {
594
+ return 'vision';
595
+ }
596
+ return 'none';
597
+ }
598
+ function methodForSourcePages(sourcePages, pages) {
599
+ const methods = new Set(pages
600
+ .filter((page) => sourcePages.includes(page.pageNumber))
601
+ .map((page) => page.method)
602
+ .filter((method) => method !== 'none'));
603
+ if (methods.size > 1) {
604
+ return 'hybrid';
605
+ }
606
+ return [...methods][0] ?? 'none';
607
+ }
608
+ function confidenceForSourcePages(sourcePages, pages, fallback) {
609
+ const matching = pages
610
+ .filter((page) => sourcePages.includes(page.pageNumber) && Number.isFinite(page.confidence))
611
+ .map((page) => page.confidence);
612
+ if (matching.length === 0) {
613
+ return normalizeConfidence(fallback);
614
+ }
615
+ return normalizeConfidence(matching.reduce((sum, value) => sum + value, 0) / matching.length);
616
+ }
617
+ function countMethods(pages) {
618
+ const counts = {
619
+ 'native-pdf-text': 0,
620
+ 'layout-ocr': 0,
621
+ 'visual-reasoning': 0,
622
+ hybrid: 0,
623
+ none: 0,
624
+ };
625
+ for (const page of pages) {
626
+ counts[page.method] = (counts[page.method] ?? 0) + 1;
627
+ }
628
+ return counts;
629
+ }
630
+ function buildReviewGates(result) {
631
+ return [
632
+ result.reviewRequired ? 'human-review-required' : null,
633
+ result.pageFailures.length > 0 ? 'page-failures-present' : null,
634
+ result.pageAudits.some((audit) => audit.parseStatus === 'partial') ? 'partial-pages-present' : null,
635
+ result.pageAudits.some((audit) => audit.textHintSource === 'vision-visual') ? 'direct-visual-verification-required' : null,
636
+ result.parameters.some((parameter) => isMissingValue(parameter.valueText, parameter.numericValue)) ? 'missing-parameters-present' : null,
637
+ ].filter((value) => value != null);
638
+ }
639
+ function sourcePagesFromObservation(sourcePages, context) {
640
+ return normalizePages([
641
+ ...(sourcePages ?? []),
642
+ ...extractPageNumbers(context),
643
+ ]);
644
+ }
645
+ function extractPageNumbers(value) {
646
+ if (!value) {
647
+ return [];
648
+ }
649
+ return normalizePages([...value.matchAll(/\b(?:page|p\.?)\s*#?\s*(\d{1,4})\b/gi)]
650
+ .map((match) => Number(match[1])));
651
+ }
652
+ function normalizePages(values) {
653
+ return [...new Set(values
654
+ .map((value) => Number(value))
655
+ .filter((value) => Number.isInteger(value) && value > 0))].sort((left, right) => left - right);
656
+ }
657
+ function isMissingValue(valueText, numericValue) {
658
+ if (numericValue != null && Number.isFinite(numericValue)) {
659
+ return false;
660
+ }
661
+ const value = String(valueText ?? '').trim();
662
+ return !value || /^(?:-|--|—|n\/?a|nil|none|not\s+(?:reported|extracted|encountered|available)|unavailable|missing)$/i.test(value);
663
+ }
664
+ function normalizeConfidence(value) {
665
+ return Math.max(0, Math.min(100, Math.round(Number.isFinite(value) ? value : 0)));
666
+ }
667
+ function roundRatio(value) {
668
+ return Math.round(Math.max(0, Math.min(1, value)) * 1000) / 1000;
669
+ }
670
+ function uniqueNonEmpty(values) {
671
+ return [...new Set(values
672
+ .map((value) => compactText(value, 500))
673
+ .filter((value) => value.length > 0))];
674
+ }
675
+ function observationId(type, index, label) {
676
+ const slug = label
677
+ .toLowerCase()
678
+ .replace(/[^a-z0-9]+/g, '-')
679
+ .replace(/^-+|-+$/g, '')
680
+ .slice(0, 28) || 'item';
681
+ return `${type}-${index + 1}-${slug}`;
682
+ }
683
+ function compactText(value, maxLength) {
684
+ const normalized = String(value ?? '').replace(/\s+/g, ' ').trim();
685
+ return normalized.length > maxLength
686
+ ? `${normalized.slice(0, Math.max(0, maxLength - 3)).trimEnd()}...`
687
+ : normalized;
688
+ }
689
+ function collectEvidenceText(result) {
690
+ return [
691
+ result.title,
692
+ result.summary,
693
+ ...result.materials.flatMap((material) => [material.description, material.uscsSymbol, material.lithology]),
694
+ ...result.parameters.flatMap((parameter) => [parameter.name, parameter.valueText, parameter.material, parameter.context]),
695
+ ...result.classifications.flatMap((classification) => [classification.system, classification.value, classification.context]),
696
+ ...result.risks,
697
+ ...result.recommendations,
698
+ ...(result.contentChunks ?? []).flatMap((chunk) => [...chunk.headingAncestry, chunk.text]),
699
+ ...(result.synthesis
700
+ ? [
701
+ ...result.synthesis.takeaways,
702
+ ...result.synthesis.groundModel,
703
+ ...result.synthesis.keyParameters,
704
+ ...result.synthesis.interpretation,
705
+ ...result.synthesis.limitations,
706
+ ]
707
+ : []),
708
+ ].filter((value) => typeof value === 'string' && value.trim().length > 0).join('\n');
709
+ }
710
+ function inferBoreholeIds(text) {
711
+ const patterns = [
712
+ /\bB\.?\s*H\.?\s*(?:NO\.?)?\s*[:#-]?\s*0*(\d{1,3})\b/gi,
713
+ /\bBORE\s*HOLE\s*NO\.?\s*[:#-]?\s*0*(\d{1,3})\b/gi,
714
+ /\bBOREHOLE\s*NO\.?\s*[:#-]?\s*0*(\d{1,3})\b/gi,
715
+ ];
716
+ return [...new Set(patterns.flatMap((pattern) => [...text.matchAll(pattern)]
717
+ .map((match) => `BH${match[1]}`)
718
+ .filter((id) => !/^BH0$/.test(id))))].sort((left, right) => left.localeCompare(right, undefined, { numeric: true }));
719
+ }
720
+ function inferMaxDepthMeters(text) {
721
+ const depths = [
722
+ ...[...text.matchAll(/\b(\d{1,3}(?:\.\d+)?)\s*m\b/gi)].map((match) => Number(match[1])),
723
+ ...[...text.matchAll(/\b(?:maximum\s+depth|termination|terminated)\b[^0-9]{0,60}(\d{1,3}(?:\.\d+)?)\b/gi)].map((match) => Number(match[1])),
724
+ ].filter((depth) => Number.isFinite(depth) && depth > 0 && depth <= 120);
725
+ return depths.length > 0 ? Math.max(...depths) : null;
726
+ }
727
+ //# sourceMappingURL=document-evidence-packet.js.map