@geotechcli/core 0.4.21 → 0.4.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/brain.d.ts +1 -5
- package/dist/agents/brain.d.ts.map +1 -1
- package/dist/agents/brain.js +4 -120
- package/dist/agents/brain.js.map +1 -1
- package/dist/agents/data-tools.js +759 -0
- package/dist/agents/data-tools.js.map +1 -1
- package/dist/agents/runtime-bootstrap.d.ts +6 -0
- package/dist/agents/runtime-bootstrap.d.ts.map +1 -0
- package/dist/agents/runtime-bootstrap.js +8 -0
- package/dist/agents/runtime-bootstrap.js.map +1 -0
- package/dist/agents/runtime-fallbacks.d.ts +7 -0
- package/dist/agents/runtime-fallbacks.d.ts.map +1 -0
- package/dist/agents/runtime-fallbacks.js +87 -0
- package/dist/agents/runtime-fallbacks.js.map +1 -0
- package/dist/agents/swarm.d.ts +1 -4
- package/dist/agents/swarm.d.ts.map +1 -1
- package/dist/agents/swarm.js +74 -8
- package/dist/agents/swarm.js.map +1 -1
- package/dist/agents/tool-runtime.d.ts +7 -0
- package/dist/agents/tool-runtime.d.ts.map +1 -0
- package/dist/agents/tool-runtime.js +9 -0
- package/dist/agents/tool-runtime.js.map +1 -0
- package/dist/config/index.d.ts +4 -4
- package/dist/config/index.js +1 -1
- package/dist/config/index.js.map +1 -1
- package/dist/geo/coordinates.d.ts +40 -0
- package/dist/geo/coordinates.d.ts.map +1 -0
- package/dist/geo/coordinates.js +461 -0
- package/dist/geo/coordinates.js.map +1 -0
- package/dist/geo/index.d.ts +1 -0
- package/dist/geo/index.d.ts.map +1 -1
- package/dist/geo/index.js +1 -0
- package/dist/geo/index.js.map +1 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -2
- package/dist/index.js.map +1 -1
- package/dist/ingest/ags.d.ts +3 -0
- package/dist/ingest/ags.d.ts.map +1 -1
- package/dist/ingest/ags.js +98 -9
- package/dist/ingest/ags.js.map +1 -1
- package/dist/ingest/cpt.d.ts +4 -0
- package/dist/ingest/cpt.d.ts.map +1 -1
- package/dist/ingest/cpt.js +87 -25
- package/dist/ingest/cpt.js.map +1 -1
- package/dist/ingest/document-inputs.d.ts +37 -0
- package/dist/ingest/document-inputs.d.ts.map +1 -0
- package/dist/ingest/document-inputs.js +197 -0
- package/dist/ingest/document-inputs.js.map +1 -0
- package/dist/ingest/geotech-document.d.ts +118 -0
- package/dist/ingest/geotech-document.d.ts.map +1 -0
- package/dist/ingest/geotech-document.js +1006 -0
- package/dist/ingest/geotech-document.js.map +1 -0
- package/dist/ingest/geotech-extract.d.ts +86 -0
- package/dist/ingest/geotech-extract.d.ts.map +1 -0
- package/dist/ingest/geotech-extract.js +652 -0
- package/dist/ingest/geotech-extract.js.map +1 -0
- package/dist/ingest/geotech-schemas.d.ts +248 -0
- package/dist/ingest/geotech-schemas.d.ts.map +1 -0
- package/dist/ingest/geotech-schemas.js +150 -0
- package/dist/ingest/geotech-schemas.js.map +1 -0
- package/dist/ingest/index.d.ts +8 -0
- package/dist/ingest/index.d.ts.map +1 -1
- package/dist/ingest/index.js +8 -0
- package/dist/ingest/index.js.map +1 -1
- package/dist/ingest/ingest-job-child.d.ts +2 -0
- package/dist/ingest/ingest-job-child.d.ts.map +1 -0
- package/dist/ingest/ingest-job-child.js +45 -0
- package/dist/ingest/ingest-job-child.js.map +1 -0
- package/dist/ingest/job-store.d.ts +117 -0
- package/dist/ingest/job-store.d.ts.map +1 -0
- package/dist/ingest/job-store.js +541 -0
- package/dist/ingest/job-store.js.map +1 -0
- package/dist/ingest/job-worker.d.ts +24 -0
- package/dist/ingest/job-worker.d.ts.map +1 -0
- package/dist/ingest/job-worker.js +1129 -0
- package/dist/ingest/job-worker.js.map +1 -0
- package/dist/ingest/pdf.d.ts +102 -0
- package/dist/ingest/pdf.d.ts.map +1 -0
- package/dist/ingest/pdf.js +1544 -0
- package/dist/ingest/pdf.js.map +1 -0
- package/dist/ingest/review-store.d.ts +215 -0
- package/dist/ingest/review-store.d.ts.map +1 -0
- package/dist/ingest/review-store.js +1995 -0
- package/dist/ingest/review-store.js.map +1 -0
- package/dist/llm/capabilities.d.ts +8 -0
- package/dist/llm/capabilities.d.ts.map +1 -0
- package/dist/llm/capabilities.js +73 -0
- package/dist/llm/capabilities.js.map +1 -0
- package/dist/llm/index.d.ts +3 -2
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/providers/anthropic.d.ts +6 -0
- package/dist/llm/providers/anthropic.d.ts.map +1 -1
- package/dist/llm/providers/anthropic.js +10 -1
- package/dist/llm/providers/anthropic.js.map +1 -1
- package/dist/llm/providers/hosted-beta.d.ts +6 -0
- package/dist/llm/providers/hosted-beta.d.ts.map +1 -1
- package/dist/llm/providers/hosted-beta.js +40 -10
- package/dist/llm/providers/hosted-beta.js.map +1 -1
- package/dist/llm/providers/huggingface.d.ts +6 -0
- package/dist/llm/providers/huggingface.d.ts.map +1 -1
- package/dist/llm/providers/huggingface.js +21 -1
- package/dist/llm/providers/huggingface.js.map +1 -1
- package/dist/llm/providers/openai-compatible.d.ts +6 -0
- package/dist/llm/providers/openai-compatible.d.ts.map +1 -1
- package/dist/llm/providers/openai-compatible.js +21 -1
- package/dist/llm/providers/openai-compatible.js.map +1 -1
- package/dist/llm/providers/zhipu.d.ts +6 -0
- package/dist/llm/providers/zhipu.d.ts.map +1 -1
- package/dist/llm/providers/zhipu.js +15 -1
- package/dist/llm/providers/zhipu.js.map +1 -1
- package/dist/llm/router.d.ts +7 -0
- package/dist/llm/router.d.ts.map +1 -1
- package/dist/llm/router.js +33 -13
- package/dist/llm/router.js.map +1 -1
- package/dist/llm/types.d.ts +22 -4
- package/dist/llm/types.d.ts.map +1 -1
- package/dist/llm/types.js.map +1 -1
- package/dist/meta/metadata.json +1 -1
- package/dist/report/html.d.ts +3 -0
- package/dist/report/html.d.ts.map +1 -0
- package/dist/report/html.js +626 -0
- package/dist/report/html.js.map +1 -0
- package/dist/report/index.d.ts +2 -0
- package/dist/report/index.d.ts.map +1 -1
- package/dist/report/index.js +2 -0
- package/dist/report/index.js.map +1 -1
- package/dist/report/ingest-dossier.d.ts +81 -0
- package/dist/report/ingest-dossier.d.ts.map +1 -0
- package/dist/report/ingest-dossier.js +324 -0
- package/dist/report/ingest-dossier.js.map +1 -0
- package/dist/storage/index.d.ts +5 -0
- package/dist/storage/index.d.ts.map +1 -1
- package/dist/storage/index.js +12 -6
- package/dist/storage/index.js.map +1 -1
- package/dist/vision/geotech-document.d.ts +46 -0
- package/dist/vision/geotech-document.d.ts.map +1 -0
- package/dist/vision/geotech-document.js +576 -0
- package/dist/vision/geotech-document.js.map +1 -0
- package/dist/vision/index.d.ts +31 -0
- package/dist/vision/index.d.ts.map +1 -1
- package/dist/vision/index.js +659 -27
- package/dist/vision/index.js.map +1 -1
- package/dist/vision/ocr.d.ts +29 -0
- package/dist/vision/ocr.d.ts.map +1 -0
- package/dist/vision/ocr.js +287 -0
- package/dist/vision/ocr.js.map +1 -0
- package/dist/vision/preprocess.d.ts +26 -0
- package/dist/vision/preprocess.d.ts.map +1 -0
- package/dist/vision/preprocess.js +194 -0
- package/dist/vision/preprocess.js.map +1 -0
- package/package.json +5 -1
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import type { LLMConfig } from '../llm/types.js';
|
|
2
|
+
import type { PdfDocumentInspection, PdfPageClassification } from './pdf.js';
|
|
3
|
+
import type { BoreholeDocumentIngestResult } from './geotech-extract.js';
|
|
4
|
+
import type { GeotechDocumentIngestResult } from './geotech-document.js';
|
|
5
|
+
export type PersistedIngestJobDocumentType = 'borehole-log' | 'geotech-document';
|
|
6
|
+
export type PersistedIngestJobStatus = 'queued' | 'running' | 'completed' | 'failed' | 'canceled';
|
|
7
|
+
export type PersistedIngestJobPageStatus = 'pending' | 'completed' | 'failed';
|
|
8
|
+
export interface PersistedIngestJobPageCheckpoint {
|
|
9
|
+
pageNumber: number;
|
|
10
|
+
classification: PdfPageClassification | null;
|
|
11
|
+
sourceKind: 'pdf-page' | 'raster-image';
|
|
12
|
+
weight: number;
|
|
13
|
+
status: PersistedIngestJobPageStatus;
|
|
14
|
+
attempts: number;
|
|
15
|
+
updatedAt: string;
|
|
16
|
+
completedAt?: string;
|
|
17
|
+
error?: string;
|
|
18
|
+
downgraded?: boolean;
|
|
19
|
+
ocrTextHint?: string;
|
|
20
|
+
ocrSource?: 'native-text' | 'pdfjs-text' | 'local-ocr' | 'vision-ocr' | 'none';
|
|
21
|
+
ocrWarnings?: string[];
|
|
22
|
+
result?: unknown;
|
|
23
|
+
}
|
|
24
|
+
export interface PersistedIngestJobResultRecord {
|
|
25
|
+
ingestResult: BoreholeDocumentIngestResult | GeotechDocumentIngestResult;
|
|
26
|
+
persistedReview?: {
|
|
27
|
+
datasetName: string;
|
|
28
|
+
reviewId: string;
|
|
29
|
+
createdAt?: string;
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
export interface PersistedIngestJobRecord {
|
|
33
|
+
kind: 'geotech-ingest-job-record';
|
|
34
|
+
schemaVersion: 1;
|
|
35
|
+
jobId: string;
|
|
36
|
+
documentType: PersistedIngestJobDocumentType;
|
|
37
|
+
status: PersistedIngestJobStatus;
|
|
38
|
+
createdAt: string;
|
|
39
|
+
updatedAt: string;
|
|
40
|
+
startedAt?: string;
|
|
41
|
+
completedAt?: string;
|
|
42
|
+
canceledAt?: string;
|
|
43
|
+
source: {
|
|
44
|
+
filePath: string;
|
|
45
|
+
fileName: string;
|
|
46
|
+
inputKind: 'pdf';
|
|
47
|
+
totalPages: number;
|
|
48
|
+
weightedPageCost: number;
|
|
49
|
+
};
|
|
50
|
+
config: {
|
|
51
|
+
provider: LLMConfig['provider'];
|
|
52
|
+
baseUrl?: string;
|
|
53
|
+
modelId?: string;
|
|
54
|
+
visionModelId?: string;
|
|
55
|
+
timeout?: number;
|
|
56
|
+
};
|
|
57
|
+
processing: {
|
|
58
|
+
pagePreprocessingConcurrency: number;
|
|
59
|
+
chunkExtractionConcurrency: number;
|
|
60
|
+
};
|
|
61
|
+
request: {
|
|
62
|
+
projectId?: string;
|
|
63
|
+
overrideBoreholeId?: string;
|
|
64
|
+
reviewTitle?: string;
|
|
65
|
+
};
|
|
66
|
+
inspection: PdfDocumentInspection | null;
|
|
67
|
+
execution: {
|
|
68
|
+
runCount: number;
|
|
69
|
+
pid?: number;
|
|
70
|
+
lastHeartbeatAt?: string;
|
|
71
|
+
lastError?: string;
|
|
72
|
+
cancelRequested?: boolean;
|
|
73
|
+
};
|
|
74
|
+
checkpoints: {
|
|
75
|
+
pages: PersistedIngestJobPageCheckpoint[];
|
|
76
|
+
};
|
|
77
|
+
result?: PersistedIngestJobResultRecord;
|
|
78
|
+
}
|
|
79
|
+
export interface CreatePersistedIngestJobOptions {
|
|
80
|
+
documentType: PersistedIngestJobDocumentType;
|
|
81
|
+
filePath: string;
|
|
82
|
+
inspection: PdfDocumentInspection | null;
|
|
83
|
+
config: Pick<LLMConfig, 'provider' | 'baseUrl' | 'modelId' | 'visionModelId' | 'timeout'>;
|
|
84
|
+
projectId?: string;
|
|
85
|
+
overrideBoreholeId?: string;
|
|
86
|
+
reviewTitle?: string;
|
|
87
|
+
now?: () => Date;
|
|
88
|
+
}
|
|
89
|
+
export declare function computeWeightedPdfPageCost(inspection: PdfDocumentInspection | null | undefined): number;
|
|
90
|
+
export declare function shouldUseAsyncIngestJob(inspection: PdfDocumentInspection | null | undefined, totalPagesFallback?: number): boolean;
|
|
91
|
+
export declare function resolvePersistedIngestJobExtractionConcurrency(config: Pick<LLMConfig, 'provider' | 'modelId' | 'visionModelId'>): number;
|
|
92
|
+
export declare function createPersistedIngestJob(options: CreatePersistedIngestJobOptions): PersistedIngestJobRecord;
|
|
93
|
+
export declare function loadPersistedIngestJob(jobId: string): PersistedIngestJobRecord | null;
|
|
94
|
+
export declare function savePersistedIngestJob(record: PersistedIngestJobRecord): PersistedIngestJobRecord;
|
|
95
|
+
export declare function updatePersistedIngestJob(jobId: string, updater: (record: PersistedIngestJobRecord) => PersistedIngestJobRecord): PersistedIngestJobRecord;
|
|
96
|
+
export declare function failPersistedIngestJob(jobId: string, message: string, options?: {
|
|
97
|
+
now?: () => Date;
|
|
98
|
+
}): PersistedIngestJobRecord;
|
|
99
|
+
export declare function isPersistedIngestJobTerminalStatus(status: PersistedIngestJobStatus): boolean;
|
|
100
|
+
export declare function isPersistedIngestJobProcessAlive(pid?: number): boolean;
|
|
101
|
+
export declare function startPersistedIngestJob(jobId: string, options?: {
|
|
102
|
+
now?: () => Date;
|
|
103
|
+
}): PersistedIngestJobRecord;
|
|
104
|
+
export declare function createAndStartPersistedIngestJob(options: CreatePersistedIngestJobOptions): PersistedIngestJobRecord;
|
|
105
|
+
export declare function resumePersistedIngestJob(jobId: string, options?: {
|
|
106
|
+
now?: () => Date;
|
|
107
|
+
}): PersistedIngestJobRecord;
|
|
108
|
+
export declare function cancelPersistedIngestJob(jobId: string, options?: {
|
|
109
|
+
now?: () => Date;
|
|
110
|
+
}): PersistedIngestJobRecord;
|
|
111
|
+
export declare function waitForPersistedIngestJob(jobId: string, options?: {
|
|
112
|
+
pollMs?: number;
|
|
113
|
+
timeoutMs?: number;
|
|
114
|
+
}): Promise<PersistedIngestJobRecord>;
|
|
115
|
+
export declare function loadPersistedIngestJobResult(jobId: string): PersistedIngestJobResultRecord | null;
|
|
116
|
+
export declare function deletePersistedIngestJob(jobId: string): void;
|
|
117
|
+
//# sourceMappingURL=job-store.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"job-store.d.ts","sourceRoot":"","sources":["../../src/ingest/job-store.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAC7E,OAAO,KAAK,EAAE,4BAA4B,EAAE,MAAM,sBAAsB,CAAC;AACzE,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,uBAAuB,CAAC;AAEzE,MAAM,MAAM,8BAA8B,GAAG,cAAc,GAAG,kBAAkB,CAAC;AACjF,MAAM,MAAM,wBAAwB,GAAG,QAAQ,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,GAAG,UAAU,CAAC;AAClG,MAAM,MAAM,4BAA4B,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,CAAC;AAE9E,MAAM,WAAW,gCAAgC;IAC/C,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,qBAAqB,GAAG,IAAI,CAAC;IAC7C,UAAU,EAAE,UAAU,GAAG,cAAc,CAAC;IACxC,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,4BAA4B,CAAC;IACrC,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,aAAa,GAAG,YAAY,GAAG,WAAW,GAAG,YAAY,GAAG,MAAM,CAAC;IAC/E,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,8BAA8B;IAC7C,YAAY,EAAE,4BAA4B,GAAG,2BAA2B,CAAC;IACzE,eAAe,CAAC,EAAE;QAChB,WAAW,EAAE,MAAM,CAAC;QACpB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC;CACH;AAED,MAAM,WAAW,wBAAwB;IACvC,IAAI,EAAE,2BAA2B,CAAC;IAClC,aAAa,EAAE,CAAC,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,8BAA8B,CAAC;IAC7C,MAAM,EAAE,wBAAwB,CAAC;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE;QACN,QAAQ,EAAE,MAAM,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,KAAK,CAAC;QACjB,UAAU,EAAE,MAAM,CAAC;QACnB,gBAAgB,EAAE,MAAM,CAAC;KAC1B,CAAC;IACF,MAAM,EAAE;QACN,QAAQ,EAAE,SAAS,CAAC,UAAU,CAAC,CAAC;QAChC,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;IACF,UAAU,EAAE;QACV,4BAA4B,EAAE,MAAM,CAAC;QACrC,0BAA0B,EAAE,MAAM,CAAC;KACpC,CAAC;IACF,OAAO,EAAE;QACP,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,UAAU,EAAE,qBAAqB,GAAG,IAAI,CAAC;IACzC,SAAS,EAAE;QACT,QAAQ,EAAE,MAAM,CAAC;QACjB,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,eAAe,CAAC,EAAE,OAAO,CAAC;KAC3B,CAAC;IACF,WAAW,EAAE;QACX,KAAK,EAAE,gCAAgC,EAAE,CAAC;KAC3C,CAAC;IACF,MAAM,CAAC,EAAE,8BAA8B,CAAC;CACzC;AAED,MAAM,WAAW,+BAA+B;IAC9C,YAAY,EAAE,8BAA8B,CAAC;IAC7C,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,qBAAqB,GAAG,IAAI,CAAC;IACzC,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE,UAAU,GAAG,SAAS,GAAG,SAAS,GAAG,eAAe,GAAG,SAAS,CAAC,CAAC;IAC1F,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,IAAI,CAAC;CAClB;AA2PD,wBAAgB,0BAA0B,CAAC,UAAU,EAAE,qBAAqB,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,CAMvG;AAED,wBAAgB,uBAAuB,CACrC,UAAU,EAAE,qBAAqB,GAAG,IAAI,GAAG,SAAS,EACpD,kBAAkB,CAAC,EAAE,MAAM,GAC1B,OAAO,CAIT;AAED,wBAAgB,8CAA8C,CAC5D,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE,UAAU,GAAG,SAAS,GAAG,eAAe,CAAC,GAChE,MAAM,CAaR;AAED,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,+BAA+B,GACvC,wBAAwB,CA0D1B;AAED,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,wBAAwB,GAAG,IAAI,CAQrF;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,wBAAwB,GAAG,wBAAwB,CAGjG;AAED,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,CAAC,MAAM,EAAE,wBAAwB,KAAK,wBAAwB,GACtE,wBAAwB,CAQ1B;AAED,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,IAAI,CAAC;CAClB,GACA,wBAAwB,CAmB1B;AAED,wBAAgB,kCAAkC,CAAC,MAAM,EAAE,wBAAwB,GAAG,OAAO,CAE5F;AAED,wBAAgB,gCAAgC,CAAC,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,CAWtE;AA4BD,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,IAAI,CAAA;CAAE,GAC7B,wBAAwB,CA2E1B;AAED,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,+BAA+B,GACvC,wBAAwB,CAG1B;AAED,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,IAAI,CAAA;CAAE,GAC7B,wBAAwB,CAsC1B;AAED,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,IAAI,CAAA;CAAE,GAC7B,wBAAwB,CAgC1B;AAED,wBAAsB,yBAAyB,CAC7C,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;IACR,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GACA,OAAO,CAAC,wBAAwB,CAAC,CA6BnC;AAED,wBAAgB,4BAA4B,CAAC,KAAK,EAAE,MAAM,GAAG,8BAA8B,GAAG,IAAI,CAEjG;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAK5D"}
|
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, renameSync, rmSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import { homedir } from 'node:os';
|
|
3
|
+
import { basename, dirname, join, resolve } from 'node:path';
|
|
4
|
+
import { spawn } from 'node:child_process';
|
|
5
|
+
import { fileURLToPath } from 'node:url';
|
|
6
|
+
const JOB_SCHEMA_VERSION = 1;
|
|
7
|
+
const PAGE_PREPROCESSING_CONCURRENCY = 2;
|
|
8
|
+
const STALE_HEARTBEAT_FLOOR_MS = 10 * 60 * 1000;
|
|
9
|
+
function nowIso(now) {
|
|
10
|
+
return (now ?? (() => new Date()))().toISOString();
|
|
11
|
+
}
|
|
12
|
+
function getConfigRoot() {
|
|
13
|
+
return process.env.GEOTECHCLI_CONFIG_DIR ?? join(homedir(), '.geotechcli');
|
|
14
|
+
}
|
|
15
|
+
function getIngestJobsDir() {
|
|
16
|
+
const dir = join(getConfigRoot(), 'ingest-jobs');
|
|
17
|
+
if (!existsSync(dir)) {
|
|
18
|
+
mkdirSync(dir, { recursive: true });
|
|
19
|
+
}
|
|
20
|
+
return dir;
|
|
21
|
+
}
|
|
22
|
+
function sanitizeToken(value) {
|
|
23
|
+
return value
|
|
24
|
+
.toLowerCase()
|
|
25
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
26
|
+
.replace(/^-|-$/g, '')
|
|
27
|
+
.slice(0, 48);
|
|
28
|
+
}
|
|
29
|
+
function buildTimestampToken(value) {
|
|
30
|
+
return value
|
|
31
|
+
.toLowerCase()
|
|
32
|
+
.replace(/[^0-9tz]+/g, '-')
|
|
33
|
+
.replace(/^-|-$/g, '');
|
|
34
|
+
}
|
|
35
|
+
function buildJobId(filePath, createdAt) {
|
|
36
|
+
const fileToken = sanitizeToken(basename(filePath)) || 'document';
|
|
37
|
+
return `ingest-job-${buildTimestampToken(createdAt)}-${fileToken}`;
|
|
38
|
+
}
|
|
39
|
+
function getJobDir(jobId) {
|
|
40
|
+
return join(getIngestJobsDir(), sanitizeToken(jobId) || jobId);
|
|
41
|
+
}
|
|
42
|
+
function getJobRecordPath(jobId) {
|
|
43
|
+
return join(getJobDir(jobId), 'job.json');
|
|
44
|
+
}
|
|
45
|
+
function atomicWriteJson(filePath, value) {
|
|
46
|
+
const dir = dirname(filePath);
|
|
47
|
+
if (dir && !existsSync(dir)) {
|
|
48
|
+
mkdirSync(dir, { recursive: true });
|
|
49
|
+
}
|
|
50
|
+
const tempPath = `${filePath}.tmp`;
|
|
51
|
+
const serialized = JSON.stringify(value, null, 2);
|
|
52
|
+
writeFileSync(tempPath, serialized, 'utf-8');
|
|
53
|
+
try {
|
|
54
|
+
renameSync(tempPath, filePath);
|
|
55
|
+
}
|
|
56
|
+
catch (error) {
|
|
57
|
+
const code = error?.code;
|
|
58
|
+
if (code !== 'EPERM' && code !== 'EBUSY' && code !== 'EACCES') {
|
|
59
|
+
throw error;
|
|
60
|
+
}
|
|
61
|
+
// Windows can reject the atomic rename when another process is briefly
|
|
62
|
+
// reading the current job file; fall back to an in-place overwrite so the
|
|
63
|
+
// async ingest worker can keep checkpointing progress.
|
|
64
|
+
writeFileSync(filePath, serialized, 'utf-8');
|
|
65
|
+
rmSync(tempPath, { force: true });
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
function isRecord(value) {
|
|
69
|
+
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
70
|
+
}
|
|
71
|
+
function asOptionalString(value) {
|
|
72
|
+
return typeof value === 'string' && value.trim() ? value.trim() : undefined;
|
|
73
|
+
}
|
|
74
|
+
function asOptionalNumber(value) {
|
|
75
|
+
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
|
76
|
+
}
|
|
77
|
+
function isPersistedIngestJobStatus(value) {
|
|
78
|
+
return value === 'queued' || value === 'running' || value === 'completed' || value === 'failed' || value === 'canceled';
|
|
79
|
+
}
|
|
80
|
+
function isPersistedIngestJobPageStatus(value) {
|
|
81
|
+
return value === 'pending' || value === 'completed' || value === 'failed';
|
|
82
|
+
}
|
|
83
|
+
function normalizePageCheckpoint(value, index, now) {
|
|
84
|
+
if (!isRecord(value)) {
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
const pageNumber = asOptionalNumber(value.pageNumber);
|
|
88
|
+
const classification = asOptionalString(value.classification);
|
|
89
|
+
const sourceKind = asOptionalString(value.sourceKind);
|
|
90
|
+
const weight = asOptionalNumber(value.weight);
|
|
91
|
+
const status = value.status;
|
|
92
|
+
if (pageNumber == null
|
|
93
|
+
|| !Number.isInteger(pageNumber)
|
|
94
|
+
|| !isPersistedIngestJobPageStatus(status)
|
|
95
|
+
|| weight == null
|
|
96
|
+
|| (sourceKind !== 'pdf-page' && sourceKind !== 'raster-image')) {
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
return {
|
|
100
|
+
pageNumber,
|
|
101
|
+
classification: classification ?? null,
|
|
102
|
+
sourceKind,
|
|
103
|
+
weight,
|
|
104
|
+
status,
|
|
105
|
+
attempts: asOptionalNumber(value.attempts) ?? 0,
|
|
106
|
+
updatedAt: asOptionalString(value.updatedAt) ?? now,
|
|
107
|
+
completedAt: asOptionalString(value.completedAt),
|
|
108
|
+
error: asOptionalString(value.error),
|
|
109
|
+
downgraded: value.downgraded === true,
|
|
110
|
+
ocrTextHint: asOptionalString(value.ocrTextHint),
|
|
111
|
+
ocrSource: asOptionalString(value.ocrSource) ?? undefined,
|
|
112
|
+
ocrWarnings: Array.isArray(value.ocrWarnings)
|
|
113
|
+
? value.ocrWarnings.flatMap((item) => {
|
|
114
|
+
const normalized = asOptionalString(item);
|
|
115
|
+
return normalized ? [normalized] : [];
|
|
116
|
+
})
|
|
117
|
+
: undefined,
|
|
118
|
+
result: value.result,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
function normalizePersistedIngestJobRecord(value) {
|
|
122
|
+
if (!isRecord(value) || value.kind !== 'geotech-ingest-job-record' || value.schemaVersion !== JOB_SCHEMA_VERSION) {
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
125
|
+
const now = new Date().toISOString();
|
|
126
|
+
const jobId = asOptionalString(value.jobId);
|
|
127
|
+
const documentType = asOptionalString(value.documentType);
|
|
128
|
+
const status = value.status;
|
|
129
|
+
const createdAt = asOptionalString(value.createdAt);
|
|
130
|
+
const updatedAt = asOptionalString(value.updatedAt);
|
|
131
|
+
const source = isRecord(value.source) ? value.source : null;
|
|
132
|
+
const config = isRecord(value.config) ? value.config : null;
|
|
133
|
+
const processing = isRecord(value.processing) ? value.processing : null;
|
|
134
|
+
const request = isRecord(value.request) ? value.request : {};
|
|
135
|
+
const execution = isRecord(value.execution) ? value.execution : {};
|
|
136
|
+
const checkpoints = isRecord(value.checkpoints) ? value.checkpoints : null;
|
|
137
|
+
if (!jobId
|
|
138
|
+
|| (documentType !== 'borehole-log' && documentType !== 'geotech-document')
|
|
139
|
+
|| !isPersistedIngestJobStatus(status)
|
|
140
|
+
|| !createdAt
|
|
141
|
+
|| !updatedAt
|
|
142
|
+
|| !source
|
|
143
|
+
|| !config
|
|
144
|
+
|| !processing
|
|
145
|
+
|| !checkpoints) {
|
|
146
|
+
return null;
|
|
147
|
+
}
|
|
148
|
+
const filePath = asOptionalString(source.filePath);
|
|
149
|
+
const fileName = asOptionalString(source.fileName);
|
|
150
|
+
const inputKind = asOptionalString(source.inputKind);
|
|
151
|
+
const totalPages = asOptionalNumber(source.totalPages);
|
|
152
|
+
const weightedPageCost = asOptionalNumber(source.weightedPageCost);
|
|
153
|
+
if (!filePath || !fileName || inputKind !== 'pdf' || totalPages == null || weightedPageCost == null) {
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
const pages = Array.isArray(checkpoints.pages)
|
|
157
|
+
? checkpoints.pages
|
|
158
|
+
.map((page, index) => normalizePageCheckpoint(page, index, now))
|
|
159
|
+
.filter((page) => page !== null)
|
|
160
|
+
.sort((left, right) => left.pageNumber - right.pageNumber)
|
|
161
|
+
: [];
|
|
162
|
+
return {
|
|
163
|
+
kind: 'geotech-ingest-job-record',
|
|
164
|
+
schemaVersion: JOB_SCHEMA_VERSION,
|
|
165
|
+
jobId,
|
|
166
|
+
documentType,
|
|
167
|
+
status,
|
|
168
|
+
createdAt,
|
|
169
|
+
updatedAt,
|
|
170
|
+
startedAt: asOptionalString(value.startedAt),
|
|
171
|
+
completedAt: asOptionalString(value.completedAt),
|
|
172
|
+
canceledAt: asOptionalString(value.canceledAt),
|
|
173
|
+
source: {
|
|
174
|
+
filePath,
|
|
175
|
+
fileName,
|
|
176
|
+
inputKind: 'pdf',
|
|
177
|
+
totalPages,
|
|
178
|
+
weightedPageCost,
|
|
179
|
+
},
|
|
180
|
+
config: {
|
|
181
|
+
provider: asOptionalString(config.provider) ?? 'hosted-beta',
|
|
182
|
+
baseUrl: asOptionalString(config.baseUrl),
|
|
183
|
+
modelId: asOptionalString(config.modelId),
|
|
184
|
+
visionModelId: asOptionalString(config.visionModelId),
|
|
185
|
+
timeout: asOptionalNumber(config.timeout),
|
|
186
|
+
},
|
|
187
|
+
processing: {
|
|
188
|
+
pagePreprocessingConcurrency: asOptionalNumber(processing.pagePreprocessingConcurrency) ?? PAGE_PREPROCESSING_CONCURRENCY,
|
|
189
|
+
chunkExtractionConcurrency: asOptionalNumber(processing.chunkExtractionConcurrency) ?? 2,
|
|
190
|
+
},
|
|
191
|
+
request: {
|
|
192
|
+
projectId: asOptionalString(request.projectId),
|
|
193
|
+
overrideBoreholeId: asOptionalString(request.overrideBoreholeId),
|
|
194
|
+
reviewTitle: asOptionalString(request.reviewTitle),
|
|
195
|
+
},
|
|
196
|
+
inspection: isRecord(value.inspection) ? value.inspection : null,
|
|
197
|
+
execution: {
|
|
198
|
+
runCount: asOptionalNumber(execution.runCount) ?? 0,
|
|
199
|
+
pid: asOptionalNumber(execution.pid),
|
|
200
|
+
lastHeartbeatAt: asOptionalString(execution.lastHeartbeatAt),
|
|
201
|
+
lastError: asOptionalString(execution.lastError),
|
|
202
|
+
cancelRequested: execution.cancelRequested === true,
|
|
203
|
+
},
|
|
204
|
+
checkpoints: {
|
|
205
|
+
pages,
|
|
206
|
+
},
|
|
207
|
+
result: isRecord(value.result)
|
|
208
|
+
? {
|
|
209
|
+
ingestResult: value.result.ingestResult,
|
|
210
|
+
persistedReview: isRecord(value.result.persistedReview)
|
|
211
|
+
? {
|
|
212
|
+
datasetName: asOptionalString(value.result.persistedReview.datasetName) ?? '',
|
|
213
|
+
reviewId: asOptionalString(value.result.persistedReview.reviewId) ?? '',
|
|
214
|
+
createdAt: asOptionalString(value.result.persistedReview.createdAt),
|
|
215
|
+
}
|
|
216
|
+
: undefined,
|
|
217
|
+
}
|
|
218
|
+
: undefined,
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
function pageWeightForClassification(classification) {
|
|
222
|
+
return classification === 'image-only' || classification === 'text-unreadable' ? 2 : 1;
|
|
223
|
+
}
|
|
224
|
+
export function computeWeightedPdfPageCost(inspection) {
|
|
225
|
+
if (!inspection || inspection.pages.length === 0) {
|
|
226
|
+
return 0;
|
|
227
|
+
}
|
|
228
|
+
return inspection.pages.reduce((sum, page) => sum + pageWeightForClassification(page.classification), 0);
|
|
229
|
+
}
|
|
230
|
+
export function shouldUseAsyncIngestJob(inspection, totalPagesFallback) {
|
|
231
|
+
const totalPages = inspection?.totalPages ?? totalPagesFallback ?? 0;
|
|
232
|
+
const weightedPageCost = inspection ? computeWeightedPdfPageCost(inspection) : totalPages;
|
|
233
|
+
return totalPages > 5 || weightedPageCost > 5;
|
|
234
|
+
}
|
|
235
|
+
export function resolvePersistedIngestJobExtractionConcurrency(config) {
|
|
236
|
+
const provider = config.provider;
|
|
237
|
+
const visionModel = config.visionModelId?.toLowerCase() ?? '';
|
|
238
|
+
const model = config.modelId?.toLowerCase() ?? '';
|
|
239
|
+
const advertisesSafeMultimodalSupport = provider !== 'hosted-beta'
|
|
240
|
+
&& (provider === 'zhipu'
|
|
241
|
+
|| /qwen|internvl|glm|qvq/.test(visionModel)
|
|
242
|
+
|| /qwen|internvl|glm|qvq/.test(model));
|
|
243
|
+
return advertisesSafeMultimodalSupport ? 3 : 2;
|
|
244
|
+
}
|
|
245
|
+
export function createPersistedIngestJob(options) {
|
|
246
|
+
const createdAt = nowIso(options.now);
|
|
247
|
+
const resolvedFilePath = resolve(options.filePath);
|
|
248
|
+
const jobId = buildJobId(resolvedFilePath, createdAt);
|
|
249
|
+
const inspection = options.inspection && options.inspection.totalPages > 0 ? options.inspection : null;
|
|
250
|
+
const totalPages = inspection?.totalPages ?? 0;
|
|
251
|
+
const weightedPageCost = inspection ? computeWeightedPdfPageCost(inspection) : totalPages;
|
|
252
|
+
const record = {
|
|
253
|
+
kind: 'geotech-ingest-job-record',
|
|
254
|
+
schemaVersion: JOB_SCHEMA_VERSION,
|
|
255
|
+
jobId,
|
|
256
|
+
documentType: options.documentType,
|
|
257
|
+
status: 'queued',
|
|
258
|
+
createdAt,
|
|
259
|
+
updatedAt: createdAt,
|
|
260
|
+
source: {
|
|
261
|
+
filePath: resolvedFilePath,
|
|
262
|
+
fileName: basename(resolvedFilePath),
|
|
263
|
+
inputKind: 'pdf',
|
|
264
|
+
totalPages,
|
|
265
|
+
weightedPageCost,
|
|
266
|
+
},
|
|
267
|
+
config: {
|
|
268
|
+
provider: options.config.provider,
|
|
269
|
+
baseUrl: options.config.baseUrl,
|
|
270
|
+
modelId: options.config.modelId,
|
|
271
|
+
visionModelId: options.config.visionModelId,
|
|
272
|
+
timeout: options.config.timeout,
|
|
273
|
+
},
|
|
274
|
+
processing: {
|
|
275
|
+
pagePreprocessingConcurrency: PAGE_PREPROCESSING_CONCURRENCY,
|
|
276
|
+
chunkExtractionConcurrency: resolvePersistedIngestJobExtractionConcurrency(options.config),
|
|
277
|
+
},
|
|
278
|
+
request: {
|
|
279
|
+
projectId: options.projectId?.trim() || undefined,
|
|
280
|
+
overrideBoreholeId: options.overrideBoreholeId?.trim() || undefined,
|
|
281
|
+
reviewTitle: options.reviewTitle?.trim() || undefined,
|
|
282
|
+
},
|
|
283
|
+
inspection,
|
|
284
|
+
execution: {
|
|
285
|
+
runCount: 0,
|
|
286
|
+
cancelRequested: false,
|
|
287
|
+
},
|
|
288
|
+
checkpoints: {
|
|
289
|
+
pages: inspection?.pages.map((page) => ({
|
|
290
|
+
pageNumber: page.pageNumber,
|
|
291
|
+
classification: page.classification,
|
|
292
|
+
sourceKind: page.classification === 'image-only' || page.classification === 'text-unreadable' ? 'raster-image' : 'pdf-page',
|
|
293
|
+
weight: pageWeightForClassification(page.classification),
|
|
294
|
+
status: 'pending',
|
|
295
|
+
attempts: 0,
|
|
296
|
+
updatedAt: createdAt,
|
|
297
|
+
})) ?? [],
|
|
298
|
+
},
|
|
299
|
+
};
|
|
300
|
+
atomicWriteJson(getJobRecordPath(jobId), record);
|
|
301
|
+
return record;
|
|
302
|
+
}
|
|
303
|
+
export function loadPersistedIngestJob(jobId) {
|
|
304
|
+
const filePath = getJobRecordPath(jobId);
|
|
305
|
+
if (!existsSync(filePath)) {
|
|
306
|
+
return null;
|
|
307
|
+
}
|
|
308
|
+
const raw = JSON.parse(readFileSync(filePath, 'utf-8'));
|
|
309
|
+
return normalizePersistedIngestJobRecord(raw);
|
|
310
|
+
}
|
|
311
|
+
export function savePersistedIngestJob(record) {
|
|
312
|
+
atomicWriteJson(getJobRecordPath(record.jobId), record);
|
|
313
|
+
return record;
|
|
314
|
+
}
|
|
315
|
+
export function updatePersistedIngestJob(jobId, updater) {
|
|
316
|
+
const record = loadPersistedIngestJob(jobId);
|
|
317
|
+
if (!record) {
|
|
318
|
+
throw new Error(`No persisted ingest job named "${jobId}" was found.`);
|
|
319
|
+
}
|
|
320
|
+
const updated = updater(record);
|
|
321
|
+
return savePersistedIngestJob(updated);
|
|
322
|
+
}
|
|
323
|
+
export function failPersistedIngestJob(jobId, message, options) {
|
|
324
|
+
return updatePersistedIngestJob(jobId, (record) => {
|
|
325
|
+
if (isPersistedIngestJobTerminalStatus(record.status)) {
|
|
326
|
+
return record;
|
|
327
|
+
}
|
|
328
|
+
const timestamp = nowIso(options?.now);
|
|
329
|
+
return {
|
|
330
|
+
...record,
|
|
331
|
+
status: 'failed',
|
|
332
|
+
updatedAt: timestamp,
|
|
333
|
+
execution: {
|
|
334
|
+
...record.execution,
|
|
335
|
+
pid: undefined,
|
|
336
|
+
lastHeartbeatAt: timestamp,
|
|
337
|
+
lastError: message,
|
|
338
|
+
},
|
|
339
|
+
};
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
export function isPersistedIngestJobTerminalStatus(status) {
|
|
343
|
+
return status === 'completed' || status === 'failed' || status === 'canceled';
|
|
344
|
+
}
|
|
345
|
+
export function isPersistedIngestJobProcessAlive(pid) {
|
|
346
|
+
if (pid == null || !Number.isFinite(pid)) {
|
|
347
|
+
return false;
|
|
348
|
+
}
|
|
349
|
+
try {
|
|
350
|
+
process.kill(pid, 0);
|
|
351
|
+
return true;
|
|
352
|
+
}
|
|
353
|
+
catch {
|
|
354
|
+
return false;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
function resolvePersistedIngestJobStaleHeartbeatMs(record) {
|
|
358
|
+
const configuredTimeoutMs = Math.max(0, record.config.timeout ?? 0);
|
|
359
|
+
return Math.max(STALE_HEARTBEAT_FLOOR_MS, configuredTimeoutMs * 6);
|
|
360
|
+
}
|
|
361
|
+
function isPersistedIngestJobHeartbeatStale(record, options) {
|
|
362
|
+
const lastHeartbeatAt = record.execution.lastHeartbeatAt;
|
|
363
|
+
if (!lastHeartbeatAt) {
|
|
364
|
+
return false;
|
|
365
|
+
}
|
|
366
|
+
const lastHeartbeatMs = Date.parse(lastHeartbeatAt);
|
|
367
|
+
if (!Number.isFinite(lastHeartbeatMs)) {
|
|
368
|
+
return false;
|
|
369
|
+
}
|
|
370
|
+
return (options?.now?.() ?? new Date()).getTime() - lastHeartbeatMs > resolvePersistedIngestJobStaleHeartbeatMs(record);
|
|
371
|
+
}
|
|
372
|
+
function getIngestJobChildScriptPath() {
|
|
373
|
+
return fileURLToPath(new URL('./ingest-job-child.js', import.meta.url));
|
|
374
|
+
}
|
|
375
|
+
export function startPersistedIngestJob(jobId, options) {
|
|
376
|
+
const current = loadPersistedIngestJob(jobId);
|
|
377
|
+
if (!current) {
|
|
378
|
+
throw new Error(`No persisted ingest job named "${jobId}" was found.`);
|
|
379
|
+
}
|
|
380
|
+
if (current.status === 'completed') {
|
|
381
|
+
return current;
|
|
382
|
+
}
|
|
383
|
+
const alive = isPersistedIngestJobProcessAlive(current.execution.pid);
|
|
384
|
+
const staleHeartbeat = isPersistedIngestJobHeartbeatStale(current, options);
|
|
385
|
+
if (current.status === 'running' && alive && !staleHeartbeat) {
|
|
386
|
+
return current;
|
|
387
|
+
}
|
|
388
|
+
if (current.status === 'running' && alive && staleHeartbeat && current.execution.pid) {
|
|
389
|
+
try {
|
|
390
|
+
process.kill(current.execution.pid);
|
|
391
|
+
}
|
|
392
|
+
catch {
|
|
393
|
+
// no-op: if the process exits between checks we can proceed normally.
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
const childScript = getIngestJobChildScriptPath();
|
|
397
|
+
if (!existsSync(childScript)) {
|
|
398
|
+
throw new Error(`Ingest job child runner was not found at ${childScript}. Build @geotechcli/core before launching background ingest jobs.`);
|
|
399
|
+
}
|
|
400
|
+
const timestamp = nowIso(options?.now);
|
|
401
|
+
const primed = updatePersistedIngestJob(jobId, (record) => ({
|
|
402
|
+
...record,
|
|
403
|
+
status: 'running',
|
|
404
|
+
startedAt: record.startedAt ?? timestamp,
|
|
405
|
+
updatedAt: timestamp,
|
|
406
|
+
canceledAt: undefined,
|
|
407
|
+
execution: {
|
|
408
|
+
...record.execution,
|
|
409
|
+
runCount: record.execution.runCount + 1,
|
|
410
|
+
pid: undefined,
|
|
411
|
+
lastHeartbeatAt: timestamp,
|
|
412
|
+
lastError: undefined,
|
|
413
|
+
cancelRequested: false,
|
|
414
|
+
},
|
|
415
|
+
}));
|
|
416
|
+
const child = spawn(process.execPath, [childScript, jobId], {
|
|
417
|
+
detached: true,
|
|
418
|
+
stdio: 'ignore',
|
|
419
|
+
windowsHide: true,
|
|
420
|
+
});
|
|
421
|
+
child.unref();
|
|
422
|
+
return updatePersistedIngestJob(jobId, (record) => {
|
|
423
|
+
if (isPersistedIngestJobTerminalStatus(record.status)
|
|
424
|
+
|| record.execution.runCount !== primed.execution.runCount) {
|
|
425
|
+
return record;
|
|
426
|
+
}
|
|
427
|
+
return {
|
|
428
|
+
...record,
|
|
429
|
+
updatedAt: nowIso(options?.now),
|
|
430
|
+
execution: {
|
|
431
|
+
...record.execution,
|
|
432
|
+
pid: child.pid,
|
|
433
|
+
lastHeartbeatAt: record.execution.lastHeartbeatAt ?? timestamp,
|
|
434
|
+
},
|
|
435
|
+
};
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
export function createAndStartPersistedIngestJob(options) {
|
|
439
|
+
const created = createPersistedIngestJob(options);
|
|
440
|
+
return startPersistedIngestJob(created.jobId, { now: options.now });
|
|
441
|
+
}
|
|
442
|
+
export function resumePersistedIngestJob(jobId, options) {
|
|
443
|
+
const current = loadPersistedIngestJob(jobId);
|
|
444
|
+
if (!current) {
|
|
445
|
+
throw new Error(`No persisted ingest job named "${jobId}" was found.`);
|
|
446
|
+
}
|
|
447
|
+
if (current.status === 'completed') {
|
|
448
|
+
return current;
|
|
449
|
+
}
|
|
450
|
+
const alive = isPersistedIngestJobProcessAlive(current.execution.pid);
|
|
451
|
+
const staleHeartbeat = isPersistedIngestJobHeartbeatStale(current, options);
|
|
452
|
+
if (current.status === 'running' && alive && !staleHeartbeat) {
|
|
453
|
+
return current;
|
|
454
|
+
}
|
|
455
|
+
if (current.status === 'running' && alive && staleHeartbeat && current.execution.pid) {
|
|
456
|
+
try {
|
|
457
|
+
process.kill(current.execution.pid);
|
|
458
|
+
}
|
|
459
|
+
catch {
|
|
460
|
+
// no-op: if the process exits between checks we can proceed normally.
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
updatePersistedIngestJob(jobId, (record) => ({
|
|
464
|
+
...record,
|
|
465
|
+
status: 'queued',
|
|
466
|
+
updatedAt: nowIso(options?.now),
|
|
467
|
+
canceledAt: undefined,
|
|
468
|
+
execution: {
|
|
469
|
+
...record.execution,
|
|
470
|
+
cancelRequested: false,
|
|
471
|
+
lastError: undefined,
|
|
472
|
+
pid: undefined,
|
|
473
|
+
},
|
|
474
|
+
}));
|
|
475
|
+
return startPersistedIngestJob(jobId, options);
|
|
476
|
+
}
|
|
477
|
+
export function cancelPersistedIngestJob(jobId, options) {
|
|
478
|
+
const current = loadPersistedIngestJob(jobId);
|
|
479
|
+
if (!current) {
|
|
480
|
+
throw new Error(`No persisted ingest job named "${jobId}" was found.`);
|
|
481
|
+
}
|
|
482
|
+
if (current.status === 'completed') {
|
|
483
|
+
return current;
|
|
484
|
+
}
|
|
485
|
+
if (isPersistedIngestJobProcessAlive(current.execution.pid)) {
|
|
486
|
+
try {
|
|
487
|
+
process.kill(current.execution.pid);
|
|
488
|
+
}
|
|
489
|
+
catch {
|
|
490
|
+
// no-op: the persisted state still reflects cancellation
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
return updatePersistedIngestJob(jobId, (record) => {
|
|
494
|
+
const timestamp = nowIso(options?.now);
|
|
495
|
+
return {
|
|
496
|
+
...record,
|
|
497
|
+
status: 'canceled',
|
|
498
|
+
updatedAt: timestamp,
|
|
499
|
+
canceledAt: timestamp,
|
|
500
|
+
execution: {
|
|
501
|
+
...record.execution,
|
|
502
|
+
pid: undefined,
|
|
503
|
+
cancelRequested: true,
|
|
504
|
+
},
|
|
505
|
+
};
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
export async function waitForPersistedIngestJob(jobId, options) {
|
|
509
|
+
const pollMs = Math.max(250, options?.pollMs ?? 1000);
|
|
510
|
+
const timeoutMs = Math.max(0, options?.timeoutMs ?? 0);
|
|
511
|
+
const start = Date.now();
|
|
512
|
+
while (true) {
|
|
513
|
+
const record = loadPersistedIngestJob(jobId);
|
|
514
|
+
if (!record) {
|
|
515
|
+
throw new Error(`No persisted ingest job named "${jobId}" was found.`);
|
|
516
|
+
}
|
|
517
|
+
if (isPersistedIngestJobTerminalStatus(record.status)) {
|
|
518
|
+
return record;
|
|
519
|
+
}
|
|
520
|
+
if (record.status === 'running' && record.execution.pid && !isPersistedIngestJobProcessAlive(record.execution.pid)) {
|
|
521
|
+
throw new Error(`Persisted ingest job "${jobId}" is no longer running. Resume it with geotech ingest resume ${jobId}.`);
|
|
522
|
+
}
|
|
523
|
+
if (record.status === 'running' && record.execution.pid && isPersistedIngestJobHeartbeatStale(record)) {
|
|
524
|
+
throw new Error(`Persisted ingest job "${jobId}" stopped heartbeating and may be wedged. Resume it with geotech ingest resume ${jobId}.`);
|
|
525
|
+
}
|
|
526
|
+
if (timeoutMs > 0 && Date.now() - start > timeoutMs) {
|
|
527
|
+
throw new Error(`Timed out while waiting for persisted ingest job "${jobId}".`);
|
|
528
|
+
}
|
|
529
|
+
await new Promise((resolvePromise) => setTimeout(resolvePromise, pollMs));
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
export function loadPersistedIngestJobResult(jobId) {
|
|
533
|
+
return loadPersistedIngestJob(jobId)?.result ?? null;
|
|
534
|
+
}
|
|
535
|
+
export function deletePersistedIngestJob(jobId) {
|
|
536
|
+
const dir = getJobDir(jobId);
|
|
537
|
+
if (existsSync(dir)) {
|
|
538
|
+
rmSync(dir, { recursive: true, force: true });
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
//# sourceMappingURL=job-store.js.map
|