@malloy-publisher/server 0.0.198-dev1 → 0.0.198-dev3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build.ts +12 -22
- package/dist/instrumentation.mjs +57 -36
- package/dist/server.mjs +2259 -3180
- package/dist/service/schema_worker.mjs +61 -0
- package/package.json +2 -3
- package/src/health.ts +0 -13
- package/src/instrumentation.ts +50 -0
- package/src/server.ts +5 -0
- package/src/service/environment_store.ts +33 -3
- package/src/service/model.ts +3 -226
- package/src/service/package.spec.ts +11 -7
- package/src/service/package.ts +49 -53
- package/src/service/process_stats_reporter.ts +169 -0
- package/src/service/schema_worker.ts +123 -0
- package/src/service/schema_worker_pool.ts +287 -0
- package/tests/integration/concurrent_environment/concurrent_environment.integration.spec.ts +235 -0
- package/dist/compile_worker.mjs +0 -628
- package/src/compile/compile_pool.spec.ts +0 -227
- package/src/compile/compile_pool.ts +0 -729
- package/src/compile/compile_worker.ts +0 -683
- package/src/compile/protocol.ts +0 -251
- package/src/service/model_worker_path.spec.ts +0 -125
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
// src/service/schema_worker.ts
|
|
2
|
+
import { DuckDBConnection } from "@malloydata/db-duckdb";
|
|
3
|
+
import"@malloydata/db-duckdb/native";
|
|
4
|
+
import {
|
|
5
|
+
ConnectionRuntime,
|
|
6
|
+
EmptyURLReader
|
|
7
|
+
} from "@malloydata/malloy";
|
|
8
|
+
import * as path from "path";
|
|
9
|
+
import { parentPort } from "worker_threads";
|
|
10
|
+
if (!parentPort) {
|
|
11
|
+
throw new Error("schema_worker.ts loaded outside a worker thread");
|
|
12
|
+
}
|
|
13
|
+
var connection = new DuckDBConnection({
|
|
14
|
+
name: "duckdb",
|
|
15
|
+
databasePath: ":memory:",
|
|
16
|
+
threads: 1,
|
|
17
|
+
memoryLimit: "256MB"
|
|
18
|
+
});
|
|
19
|
+
async function handleRequest(req) {
|
|
20
|
+
try {
|
|
21
|
+
const fullPath = path.join(req.packagePath, req.databasePath);
|
|
22
|
+
const normalizedPath = fullPath.replace(/\\/g, "/");
|
|
23
|
+
const runtime = new ConnectionRuntime({
|
|
24
|
+
urlReader: new EmptyURLReader,
|
|
25
|
+
connections: [connection]
|
|
26
|
+
});
|
|
27
|
+
const model = runtime.loadModel(`source: temp is duckdb.table('${normalizedPath}')`);
|
|
28
|
+
const modelDef = await model.getModel();
|
|
29
|
+
const fields = modelDef._modelDef.contents["temp"].fields;
|
|
30
|
+
const columns = fields.map((field) => ({
|
|
31
|
+
type: String(field.type),
|
|
32
|
+
name: field.name
|
|
33
|
+
}));
|
|
34
|
+
const runner = model.loadQuery("run: temp->{aggregate: row_count is count()}");
|
|
35
|
+
const result = await runner.run();
|
|
36
|
+
const rowCount = result.data.value[0].row_count?.valueOf();
|
|
37
|
+
return {
|
|
38
|
+
id: req.id,
|
|
39
|
+
ok: true,
|
|
40
|
+
result: { name: req.databasePath, rowCount, columns }
|
|
41
|
+
};
|
|
42
|
+
} catch (err) {
|
|
43
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
44
|
+
return {
|
|
45
|
+
id: req.id,
|
|
46
|
+
ok: false,
|
|
47
|
+
error: { message: error.message, stack: error.stack }
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
parentPort.on("message", async (msg) => {
|
|
52
|
+
const response = await handleRequest(msg);
|
|
53
|
+
parentPort.postMessage(response);
|
|
54
|
+
});
|
|
55
|
+
var shutdown = async () => {
|
|
56
|
+
try {
|
|
57
|
+
await connection.close();
|
|
58
|
+
} catch {}
|
|
59
|
+
process.exit(0);
|
|
60
|
+
};
|
|
61
|
+
parentPort.on("close", () => void shutdown());
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@malloy-publisher/server",
|
|
3
3
|
"description": "Malloy Publisher Server",
|
|
4
|
-
"version": "0.0.198-
|
|
4
|
+
"version": "0.0.198-dev3",
|
|
5
5
|
"main": "dist/server.mjs",
|
|
6
6
|
"bin": {
|
|
7
7
|
"malloy-publisher": "dist/server.mjs"
|
|
@@ -51,7 +51,6 @@
|
|
|
51
51
|
"@opentelemetry/sdk-metrics": "^2.0.0",
|
|
52
52
|
"@opentelemetry/sdk-node": "^0.200.0",
|
|
53
53
|
"@opentelemetry/sdk-trace-node": "^2.0.0",
|
|
54
|
-
"adm-zip": "^0.5.16",
|
|
55
54
|
"async-mutex": "^0.5.0",
|
|
56
55
|
"aws-sdk": "^2.1692.0",
|
|
57
56
|
"body-parser": "^1.20.2",
|
|
@@ -61,6 +60,7 @@
|
|
|
61
60
|
"cors": "^2.8.5",
|
|
62
61
|
"duckdb": "1.4.4",
|
|
63
62
|
"express": "^4.21.0",
|
|
63
|
+
"extract-zip": "^2.0.1",
|
|
64
64
|
"globals": "^15.9.0",
|
|
65
65
|
"handlebars": "^4.7.8",
|
|
66
66
|
"http-proxy-middleware": "^3.0.5",
|
|
@@ -76,7 +76,6 @@
|
|
|
76
76
|
"@eslint/eslintrc": "^3.3.1",
|
|
77
77
|
"@eslint/js": "^9.23.0",
|
|
78
78
|
"@faker-js/faker": "^9.4.0",
|
|
79
|
-
"@types/adm-zip": "^0.5.7",
|
|
80
79
|
"@types/bun": "^1.2.20",
|
|
81
80
|
"@types/cors": "^2.8.12",
|
|
82
81
|
"@types/express": "^4.17.14",
|
package/src/health.ts
CHANGED
|
@@ -143,19 +143,6 @@ export async function performGracefulShutdownAfterDrain(
|
|
|
143
143
|
/* do nothing */
|
|
144
144
|
}
|
|
145
145
|
|
|
146
|
-
try {
|
|
147
|
-
// Drain in-flight compiles and terminate worker_threads before
|
|
148
|
-
// we exit so a slow compile doesn't leave orphan worker
|
|
149
|
-
// processes. Lazy-imported to avoid pulling the pool module
|
|
150
|
-
// into the health.ts dep graph for tests that don't exercise
|
|
151
|
-
// the compile path.
|
|
152
|
-
const { getCompilePool } = await import("./compile/compile_pool");
|
|
153
|
-
await getCompilePool().shutdown();
|
|
154
|
-
logger.info("Malloy compile worker pool shut down");
|
|
155
|
-
} catch (_error) {
|
|
156
|
-
/* do nothing */
|
|
157
|
-
}
|
|
158
|
-
|
|
159
146
|
if (shutdownGracefulCloseTimeoutSeconds > 0) {
|
|
160
147
|
logger.info(
|
|
161
148
|
`Waiting ${shutdownGracefulCloseTimeoutSeconds} seconds after server close before exit...`,
|
package/src/instrumentation.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { monitorEventLoopDelay } from "node:perf_hooks";
|
|
1
2
|
import { metrics } from "@opentelemetry/api";
|
|
2
3
|
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
|
|
3
4
|
import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-proto";
|
|
@@ -116,6 +117,55 @@ const httpRequestCount = meter.createCounter("http_server_requests_total", {
|
|
|
116
117
|
description: "Total number of HTTP requests",
|
|
117
118
|
});
|
|
118
119
|
|
|
120
|
+
// Event-loop-delay metrics. A blocked event loop is the only way the
|
|
121
|
+
// /health/liveness probe (a pure synchronous 200 handler) can fail under K8s,
|
|
122
|
+
// so we surface p50/p99/max so an operator can correlate liveness restarts
|
|
123
|
+
// with sustained event-loop pressure (large Malloy compiles, GC, etc.).
|
|
124
|
+
const eventLoopHistogram = monitorEventLoopDelay({ resolution: 20 });
|
|
125
|
+
eventLoopHistogram.enable();
|
|
126
|
+
|
|
127
|
+
const eventLoopLagP50 = meter.createObservableGauge(
|
|
128
|
+
"publisher_event_loop_lag_p50_ms",
|
|
129
|
+
{
|
|
130
|
+
description:
|
|
131
|
+
"Event loop delay p50 since the last scrape, in milliseconds",
|
|
132
|
+
unit: "ms",
|
|
133
|
+
},
|
|
134
|
+
);
|
|
135
|
+
const eventLoopLagP99 = meter.createObservableGauge(
|
|
136
|
+
"publisher_event_loop_lag_p99_ms",
|
|
137
|
+
{
|
|
138
|
+
description:
|
|
139
|
+
"Event loop delay p99 since the last scrape, in milliseconds",
|
|
140
|
+
unit: "ms",
|
|
141
|
+
},
|
|
142
|
+
);
|
|
143
|
+
const eventLoopLagMax = meter.createObservableGauge(
|
|
144
|
+
"publisher_event_loop_lag_max_ms",
|
|
145
|
+
{
|
|
146
|
+
description:
|
|
147
|
+
"Event loop delay max since the last scrape, in milliseconds",
|
|
148
|
+
unit: "ms",
|
|
149
|
+
},
|
|
150
|
+
);
|
|
151
|
+
|
|
152
|
+
// Sample all three in one batch so the histogram reset can't race the reads.
|
|
153
|
+
meter.addBatchObservableCallback(
|
|
154
|
+
(observableResult) => {
|
|
155
|
+
observableResult.observe(
|
|
156
|
+
eventLoopLagP50,
|
|
157
|
+
eventLoopHistogram.percentile(50) / 1e6,
|
|
158
|
+
);
|
|
159
|
+
observableResult.observe(
|
|
160
|
+
eventLoopLagP99,
|
|
161
|
+
eventLoopHistogram.percentile(99) / 1e6,
|
|
162
|
+
);
|
|
163
|
+
observableResult.observe(eventLoopLagMax, eventLoopHistogram.max / 1e6);
|
|
164
|
+
eventLoopHistogram.reset();
|
|
165
|
+
},
|
|
166
|
+
[eventLoopLagP50, eventLoopLagP99, eventLoopLagMax],
|
|
167
|
+
);
|
|
168
|
+
|
|
119
169
|
const IGNORED_PATHS = new Set([
|
|
120
170
|
"/health",
|
|
121
171
|
"/health/liveness",
|
package/src/server.ts
CHANGED
|
@@ -43,6 +43,7 @@ import { EnvironmentStore } from "./service/environment_store";
|
|
|
43
43
|
import { ManifestService } from "./service/manifest_service";
|
|
44
44
|
import { MaterializationService } from "./service/materialization_service";
|
|
45
45
|
import { PackageMemoryGovernor } from "./service/package_memory_governor";
|
|
46
|
+
import { ProcessStatsReporter } from "./service/process_stats_reporter";
|
|
46
47
|
|
|
47
48
|
/** Normalize an Express query param into a string[] or undefined. */
|
|
48
49
|
export function normalizeQueryArray(value: unknown): string[] | undefined {
|
|
@@ -172,6 +173,10 @@ const memoryGovernor = memoryGovernorConfig
|
|
|
172
173
|
: null;
|
|
173
174
|
memoryGovernor?.start();
|
|
174
175
|
environmentStore.setMemoryGovernor(memoryGovernor);
|
|
176
|
+
// Always-on process-stats heartbeat so we can correlate RSS / thread
|
|
177
|
+
// counts / heap usage with traffic in prod. Logs every 30s at info.
|
|
178
|
+
const processStatsReporter = new ProcessStatsReporter(memoryGovernor);
|
|
179
|
+
processStatsReporter.start();
|
|
175
180
|
const packageController = new PackageController(
|
|
176
181
|
environmentStore,
|
|
177
182
|
manifestService,
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { GetObjectCommand, S3 } from "@aws-sdk/client-s3";
|
|
2
2
|
import { Storage } from "@google-cloud/storage";
|
|
3
|
-
import AdmZip from "adm-zip";
|
|
4
3
|
import { Mutex } from "async-mutex";
|
|
5
4
|
import crypto from "crypto";
|
|
5
|
+
import extract from "extract-zip";
|
|
6
6
|
import * as fs from "fs";
|
|
7
7
|
import * as path from "path";
|
|
8
8
|
import simpleGit from "simple-git";
|
|
@@ -703,6 +703,15 @@ export class EnvironmentStore {
|
|
|
703
703
|
}
|
|
704
704
|
|
|
705
705
|
public async getStatus() {
|
|
706
|
+
const memoryGovernorStatus = this.memoryGovernor?.getStatus() ?? null;
|
|
707
|
+
// Log every /status hit so we have a trace of RSS / back-pressure
|
|
708
|
+
// state to correlate against pod OOMs and request-driven leaks.
|
|
709
|
+
// Logged at info so it shows up in prod without LOG_LEVEL changes;
|
|
710
|
+
// the endpoint is low-frequency (monitoring/UI), so volume is fine.
|
|
711
|
+
logger.info("Memory governor status", {
|
|
712
|
+
memoryGovernor: memoryGovernorStatus,
|
|
713
|
+
});
|
|
714
|
+
|
|
706
715
|
const status = {
|
|
707
716
|
timestamp: Date.now(),
|
|
708
717
|
environments: [] as Array<components["schemas"]["Environment"]>,
|
|
@@ -884,6 +893,7 @@ export class EnvironmentStore {
|
|
|
884
893
|
}
|
|
885
894
|
|
|
886
895
|
public async unzipEnvironment(absoluteEnvironmentPath: string) {
|
|
896
|
+
const startedAt = Date.now();
|
|
887
897
|
logger.info(
|
|
888
898
|
`Detected zip file at "${absoluteEnvironmentPath}". Unzipping...`,
|
|
889
899
|
);
|
|
@@ -897,8 +907,28 @@ export class EnvironmentStore {
|
|
|
897
907
|
});
|
|
898
908
|
await fs.promises.mkdir(unzippedEnvironmentPath, { recursive: true });
|
|
899
909
|
|
|
900
|
-
|
|
901
|
-
|
|
910
|
+
// Stream-extract via yauzl (wrapped by extract-zip). Each entry's
|
|
911
|
+
// inflate and write are dispatched to the libuv thread pool, so the
|
|
912
|
+
// main event loop stays responsive even for very large archives.
|
|
913
|
+
// The previous adm-zip path used fs.readFileSync + zlib.inflateRawSync
|
|
914
|
+
// on the main thread, which parked the loop long enough on multi-
|
|
915
|
+
// hundred-MB packages to fail Kubernetes liveness probes mid-extract.
|
|
916
|
+
let entryCount = 0;
|
|
917
|
+
let totalUncompressedBytes = 0;
|
|
918
|
+
await extract(absoluteEnvironmentPath, {
|
|
919
|
+
dir: path.resolve(unzippedEnvironmentPath),
|
|
920
|
+
onEntry: (entry) => {
|
|
921
|
+
entryCount += 1;
|
|
922
|
+
totalUncompressedBytes += entry.uncompressedSize ?? 0;
|
|
923
|
+
},
|
|
924
|
+
});
|
|
925
|
+
|
|
926
|
+
const mib = (totalUncompressedBytes / (1024 * 1024)).toFixed(1);
|
|
927
|
+
logger.info(
|
|
928
|
+
`Unzipped "${absoluteEnvironmentPath}" -> "${unzippedEnvironmentPath}" ` +
|
|
929
|
+
`(${entryCount} entries, ${mib} MiB uncompressed) in ` +
|
|
930
|
+
`${formatDuration(Date.now() - startedAt)}`,
|
|
931
|
+
);
|
|
902
932
|
|
|
903
933
|
return unzippedEnvironmentPath;
|
|
904
934
|
}
|
package/src/service/model.ts
CHANGED
|
@@ -29,7 +29,6 @@ import * as fs from "fs/promises";
|
|
|
29
29
|
import { createRequire } from "module";
|
|
30
30
|
import * as path from "path";
|
|
31
31
|
import { components } from "../api";
|
|
32
|
-
import { getCompilePool } from "../compile/compile_pool";
|
|
33
32
|
import {
|
|
34
33
|
MODEL_FILE_SUFFIX,
|
|
35
34
|
NOTEBOOK_FILE_SUFFIX,
|
|
@@ -141,31 +140,12 @@ interface RunnableNotebookCell {
|
|
|
141
140
|
queryInfo?: Malloy.QueryInfo;
|
|
142
141
|
}
|
|
143
142
|
|
|
144
|
-
/**
|
|
145
|
-
* Lazily produces a `ModelMaterializer` on demand. Used by the worker-
|
|
146
|
-
* compile path: the worker returns a fully-built `modelDef` but cannot
|
|
147
|
-
* ship the materializer (it binds to a Runtime that holds live native
|
|
148
|
-
* connection handles and would not survive a structured-clone). The
|
|
149
|
-
* first query that actually needs to execute calls this builder,
|
|
150
|
-
* which constructs the materializer in-process. After construction
|
|
151
|
-
* Malloy caches the compiled model internally on the materializer,
|
|
152
|
-
* so subsequent queries pay no recompile cost.
|
|
153
|
-
*/
|
|
154
|
-
type MaterializerBuilder = () => Promise<ModelMaterializer>;
|
|
155
|
-
|
|
156
143
|
export class Model {
|
|
157
144
|
private packageName: string;
|
|
158
145
|
private modelPath: string;
|
|
159
146
|
private dataStyles: DataStyles;
|
|
160
147
|
private modelType: ModelType;
|
|
161
148
|
private modelMaterializer: ModelMaterializer | undefined;
|
|
162
|
-
/**
|
|
163
|
-
* Lazy builder used when the model was compiled in a worker_threads
|
|
164
|
-
* worker. The first `getQueryResults`/`executeNotebookCell` call
|
|
165
|
-
* invokes this and caches the result in `modelMaterializer`.
|
|
166
|
-
*/
|
|
167
|
-
private materializerBuilder: MaterializerBuilder | undefined;
|
|
168
|
-
private materializerBuildPromise: Promise<ModelMaterializer> | undefined;
|
|
169
149
|
private modelDef: ModelDef | undefined;
|
|
170
150
|
private modelInfo: Malloy.ModelInfo | undefined;
|
|
171
151
|
private sources: ApiSource[] | undefined;
|
|
@@ -179,8 +159,6 @@ export class Model {
|
|
|
179
159
|
* `Model.givens` already collapses inheritance; we just stash the list
|
|
180
160
|
* for surfacing on the compiled-model response. */
|
|
181
161
|
private givens: ApiGiven[] | undefined;
|
|
182
|
-
/** Cached responses from `getStandardModel()` so we don't re-stringify a multi-MB modelDef on every GET. */
|
|
183
|
-
private cachedStandardModel: ApiCompiledModel | undefined;
|
|
184
162
|
private meter = metrics.getMeter("publisher");
|
|
185
163
|
private queryExecutionHistogram = this.meter.createHistogram(
|
|
186
164
|
"malloy_model_query_duration",
|
|
@@ -205,7 +183,6 @@ export class Model {
|
|
|
205
183
|
compilationError: MalloyError | Error | undefined,
|
|
206
184
|
filterMap?: Map<string, FilterDefinition[]>,
|
|
207
185
|
givens?: ApiGiven[],
|
|
208
|
-
materializerBuilder?: MaterializerBuilder,
|
|
209
186
|
) {
|
|
210
187
|
this.packageName = packageName;
|
|
211
188
|
this.modelPath = modelPath;
|
|
@@ -213,7 +190,6 @@ export class Model {
|
|
|
213
190
|
this.modelType = modelType;
|
|
214
191
|
this.modelDef = modelDef;
|
|
215
192
|
this.modelMaterializer = modelMaterializer;
|
|
216
|
-
this.materializerBuilder = materializerBuilder;
|
|
217
193
|
this.sources = sources;
|
|
218
194
|
this.queries = queries;
|
|
219
195
|
this.sourceInfos = sourceInfos;
|
|
@@ -226,28 +202,6 @@ export class Model {
|
|
|
226
202
|
: undefined;
|
|
227
203
|
}
|
|
228
204
|
|
|
229
|
-
/**
|
|
230
|
-
* Resolve the in-process `ModelMaterializer`, building it lazily if
|
|
231
|
-
* the model was compiled in a worker_threads worker. Memoizes both
|
|
232
|
-
* the materializer and the in-flight build promise so concurrent
|
|
233
|
-
* queries on the same model share a single construction.
|
|
234
|
-
*/
|
|
235
|
-
private async ensureMaterializer(): Promise<ModelMaterializer> {
|
|
236
|
-
if (this.modelMaterializer) return this.modelMaterializer;
|
|
237
|
-
if (!this.materializerBuilder) {
|
|
238
|
-
throw new BadRequestError("Model has no queryable entities.");
|
|
239
|
-
}
|
|
240
|
-
if (!this.materializerBuildPromise) {
|
|
241
|
-
this.materializerBuildPromise = this.materializerBuilder().then(
|
|
242
|
-
(mm) => {
|
|
243
|
-
this.modelMaterializer = mm;
|
|
244
|
-
return mm;
|
|
245
|
-
},
|
|
246
|
-
);
|
|
247
|
-
}
|
|
248
|
-
return this.materializerBuildPromise;
|
|
249
|
-
}
|
|
250
|
-
|
|
251
205
|
/**
|
|
252
206
|
* Get the parsed filter definitions for a given source name.
|
|
253
207
|
* Returns an empty array if no filters are declared.
|
|
@@ -273,158 +227,6 @@ export class Model {
|
|
|
273
227
|
modelPath: string,
|
|
274
228
|
malloyConfig: ModelConnectionInput,
|
|
275
229
|
options?: { buildManifest?: BuildManifest["entries"] },
|
|
276
|
-
): Promise<Model> {
|
|
277
|
-
// Worker-pool fast path for plain `.malloy` files. Notebooks
|
|
278
|
-
// stay in-process for v1 — their per-cell ModelMaterializer
|
|
279
|
-
// chain is too entangled to ship across a worker boundary.
|
|
280
|
-
// The MALLOY_COMPILE_WORKERS=0 kill switch / pool.enabled check
|
|
281
|
-
// funnels everything through the legacy in-process path when
|
|
282
|
-
// the pool is disabled, so this is safe to land dark.
|
|
283
|
-
const pool = getCompilePool();
|
|
284
|
-
if (pool.enabled && modelPath.endsWith(MODEL_FILE_SUFFIX)) {
|
|
285
|
-
try {
|
|
286
|
-
return await Model.createViaWorker(
|
|
287
|
-
packageName,
|
|
288
|
-
packagePath,
|
|
289
|
-
modelPath,
|
|
290
|
-
malloyConfig,
|
|
291
|
-
pool,
|
|
292
|
-
options,
|
|
293
|
-
);
|
|
294
|
-
} catch (poolError) {
|
|
295
|
-
// Real compile errors propagate to the caller as a Model
|
|
296
|
-
// with `compilationError` populated, matching the
|
|
297
|
-
// in-process path's contract.
|
|
298
|
-
if (
|
|
299
|
-
poolError instanceof ModelCompilationError ||
|
|
300
|
-
poolError instanceof MalloyError
|
|
301
|
-
) {
|
|
302
|
-
return Model.makeErrorModel(
|
|
303
|
-
packageName,
|
|
304
|
-
modelPath,
|
|
305
|
-
poolError instanceof MalloyError
|
|
306
|
-
? new ModelCompilationError(poolError)
|
|
307
|
-
: poolError,
|
|
308
|
-
);
|
|
309
|
-
}
|
|
310
|
-
// Anything else (worker exited, RPC timeout) — fall back
|
|
311
|
-
// to in-process compile so a transient pool failure
|
|
312
|
-
// doesn't take a package down.
|
|
313
|
-
logger.warn(
|
|
314
|
-
"Compile worker failed; falling back to in-process compile",
|
|
315
|
-
{ packageName, modelPath, error: poolError },
|
|
316
|
-
);
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
return Model.createInProcess(
|
|
320
|
-
packageName,
|
|
321
|
-
packagePath,
|
|
322
|
-
modelPath,
|
|
323
|
-
malloyConfig,
|
|
324
|
-
options,
|
|
325
|
-
);
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
/**
|
|
329
|
-
* Compile via the {@link CompileWorkerPool}. Builds a `Model` whose
|
|
330
|
-
* `modelDef` / `sources` / `queries` / `sourceInfos` / `givens` are
|
|
331
|
-
* populated up-front, but whose `ModelMaterializer` is constructed
|
|
332
|
-
* lazily on the first query through {@link ensureMaterializer}.
|
|
333
|
-
* This keeps the heavy CPU work (parse, type-check, IR build) off
|
|
334
|
-
* the main event loop so the K8s liveness probe stays responsive.
|
|
335
|
-
*/
|
|
336
|
-
private static async createViaWorker(
|
|
337
|
-
packageName: string,
|
|
338
|
-
packagePath: string,
|
|
339
|
-
modelPath: string,
|
|
340
|
-
malloyConfig: ModelConnectionInput,
|
|
341
|
-
pool: ReturnType<typeof getCompilePool>,
|
|
342
|
-
options?: { buildManifest?: BuildManifest["entries"] },
|
|
343
|
-
): Promise<Model> {
|
|
344
|
-
const resolvedConfig = Model.toMalloyConfig(malloyConfig);
|
|
345
|
-
const outcome = await pool.compile({
|
|
346
|
-
packagePath,
|
|
347
|
-
modelPath,
|
|
348
|
-
malloyConfig: resolvedConfig,
|
|
349
|
-
// Package-level configs wrap a "duckdb" default; matches
|
|
350
|
-
// Package.buildPackageMalloyConfig.
|
|
351
|
-
defaultConnectionName: "duckdb",
|
|
352
|
-
urlReader: URL_READER,
|
|
353
|
-
buildManifest: options?.buildManifest,
|
|
354
|
-
});
|
|
355
|
-
|
|
356
|
-
// Materializer construction is deferred until a query actually
|
|
357
|
-
// runs. Build it the same way the in-process path does so
|
|
358
|
-
// execution semantics stay identical.
|
|
359
|
-
const materializerBuilder: MaterializerBuilder = async () => {
|
|
360
|
-
const { runtime, modelURL, importBaseURL } =
|
|
361
|
-
await Model.getModelRuntime(
|
|
362
|
-
packagePath,
|
|
363
|
-
modelPath,
|
|
364
|
-
malloyConfig,
|
|
365
|
-
options,
|
|
366
|
-
);
|
|
367
|
-
return Model.getStandardModelMaterializer(
|
|
368
|
-
runtime,
|
|
369
|
-
importBaseURL,
|
|
370
|
-
modelURL,
|
|
371
|
-
modelPath,
|
|
372
|
-
);
|
|
373
|
-
};
|
|
374
|
-
|
|
375
|
-
return new Model(
|
|
376
|
-
packageName,
|
|
377
|
-
modelPath,
|
|
378
|
-
{} as DataStyles,
|
|
379
|
-
"model",
|
|
380
|
-
undefined, // modelMaterializer — built lazily
|
|
381
|
-
outcome.modelDef,
|
|
382
|
-
outcome.sources as ApiSource[],
|
|
383
|
-
outcome.queries as ApiQuery[],
|
|
384
|
-
outcome.sourceInfos.length > 0 ? outcome.sourceInfos : undefined,
|
|
385
|
-
undefined, // runnableNotebookCells — .malloy is not a notebook
|
|
386
|
-
undefined, // compilationError
|
|
387
|
-
outcome.filterMap,
|
|
388
|
-
outcome.givens as ApiGiven[] | undefined,
|
|
389
|
-
materializerBuilder,
|
|
390
|
-
);
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
private static makeErrorModel(
|
|
394
|
-
packageName: string,
|
|
395
|
-
modelPath: string,
|
|
396
|
-
error: Error,
|
|
397
|
-
): Model {
|
|
398
|
-
const isNotebook = modelPath.endsWith(NOTEBOOK_FILE_SUFFIX);
|
|
399
|
-
return new Model(
|
|
400
|
-
packageName,
|
|
401
|
-
modelPath,
|
|
402
|
-
{} as DataStyles,
|
|
403
|
-
isNotebook ? "notebook" : "model",
|
|
404
|
-
undefined,
|
|
405
|
-
undefined,
|
|
406
|
-
undefined,
|
|
407
|
-
undefined,
|
|
408
|
-
undefined,
|
|
409
|
-
undefined,
|
|
410
|
-
error,
|
|
411
|
-
);
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
/**
|
|
415
|
-
* Legacy in-process compile path. Retained for:
|
|
416
|
-
* - notebooks (`.malloynb`), whose per-cell materializer chain
|
|
417
|
-
* is too coupled to the Runtime to ship to a worker for v1.
|
|
418
|
-
* - environments where MALLOY_COMPILE_WORKERS=0.
|
|
419
|
-
* - fallback when the worker pool encounters a non-compile
|
|
420
|
-
* failure (worker exit, RPC timeout).
|
|
421
|
-
*/
|
|
422
|
-
private static async createInProcess(
|
|
423
|
-
packageName: string,
|
|
424
|
-
packagePath: string,
|
|
425
|
-
modelPath: string,
|
|
426
|
-
malloyConfig: ModelConnectionInput,
|
|
427
|
-
options?: { buildManifest?: BuildManifest["entries"] },
|
|
428
230
|
): Promise<Model> {
|
|
429
231
|
// getModelRuntime might throw a ModelNotFoundError. It's the callers responsibility
|
|
430
232
|
// to pass a valid model path or handle the error.
|
|
@@ -635,25 +437,9 @@ export class Model {
|
|
|
635
437
|
);
|
|
636
438
|
}
|
|
637
439
|
let runnable: QueryMaterializer;
|
|
638
|
-
if (!this.modelDef || !this.modelInfo)
|
|
440
|
+
if (!this.modelMaterializer || !this.modelDef || !this.modelInfo)
|
|
639
441
|
throw new BadRequestError("Model has no queryable entities.");
|
|
640
442
|
|
|
641
|
-
// Resolve the materializer — either already-built (in-process
|
|
642
|
-
// create path, or a previous query on this Model) or lazily
|
|
643
|
-
// constructed now (worker-compile path on first query).
|
|
644
|
-
let materializer: ModelMaterializer;
|
|
645
|
-
try {
|
|
646
|
-
materializer = await this.ensureMaterializer();
|
|
647
|
-
} catch (error) {
|
|
648
|
-
if (error instanceof BadRequestError) throw error;
|
|
649
|
-
if (error instanceof MalloyError) throw error;
|
|
650
|
-
throw new BadRequestError(
|
|
651
|
-
error instanceof Error
|
|
652
|
-
? `Failed to prepare model: ${error.message}`
|
|
653
|
-
: "Failed to prepare model.",
|
|
654
|
-
);
|
|
655
|
-
}
|
|
656
|
-
|
|
657
443
|
// Wrap loadQuery calls in try-catch to handle query parsing errors
|
|
658
444
|
try {
|
|
659
445
|
let queryString: string;
|
|
@@ -694,7 +480,7 @@ export class Model {
|
|
|
694
480
|
}
|
|
695
481
|
}
|
|
696
482
|
|
|
697
|
-
runnable =
|
|
483
|
+
runnable = this.modelMaterializer.loadQuery(queryString);
|
|
698
484
|
} catch (error) {
|
|
699
485
|
// Re-throw BadRequestError as-is
|
|
700
486
|
if (error instanceof BadRequestError) {
|
|
@@ -783,14 +569,7 @@ export class Model {
|
|
|
783
569
|
}
|
|
784
570
|
|
|
785
571
|
private getStandardModel(): ApiCompiledModel {
|
|
786
|
-
|
|
787
|
-
// (potentially multi-MB) JSON.stringify result can be memoised.
|
|
788
|
-
// Without this cache every `GET /environments/:e/packages/:p/
|
|
789
|
-
// models/:m` re-stringifies the whole tree on the main thread —
|
|
790
|
-
// a known source of multi-hundred-ms event-loop pauses that
|
|
791
|
-
// chips away at the K8s liveness budget.
|
|
792
|
-
if (this.cachedStandardModel) return this.cachedStandardModel;
|
|
793
|
-
const compiled: ApiCompiledModel = {
|
|
572
|
+
return {
|
|
794
573
|
type: "source",
|
|
795
574
|
packageName: this.packageName,
|
|
796
575
|
modelPath: this.modelPath,
|
|
@@ -807,8 +586,6 @@ export class Model {
|
|
|
807
586
|
queries: this.queries,
|
|
808
587
|
givens: this.givens,
|
|
809
588
|
} as ApiCompiledModel;
|
|
810
|
-
this.cachedStandardModel = compiled;
|
|
811
|
-
return compiled;
|
|
812
589
|
}
|
|
813
590
|
|
|
814
591
|
private async getNotebookModel(): Promise<ApiRawNotebook> {
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, it } from "bun:test";
|
|
2
|
-
import { Stats } from "fs";
|
|
3
2
|
import fs from "fs/promises";
|
|
4
3
|
import { join, resolve } from "path";
|
|
5
4
|
import sinon from "sinon";
|
|
@@ -336,12 +335,17 @@ describe("service/package", () => {
|
|
|
336
335
|
});
|
|
337
336
|
});
|
|
338
337
|
|
|
339
|
-
describe("
|
|
340
|
-
it("
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
//
|
|
344
|
-
|
|
338
|
+
describe("schema introspection (via worker pool)", () => {
|
|
339
|
+
it("returns columns and rowCount for a csv database", async () => {
|
|
340
|
+
// Schema introspection moved off the main thread into a
|
|
341
|
+
// worker pool to isolate DuckDB's native thread pool (see
|
|
342
|
+
// schema_worker_pool.ts). Hit the pool directly here so
|
|
343
|
+
// the test exercises the same code path prod uses.
|
|
344
|
+
const { getSchemaWorkerPool } = await import(
|
|
345
|
+
"./schema_worker_pool"
|
|
346
|
+
);
|
|
347
|
+
const pool = getSchemaWorkerPool();
|
|
348
|
+
const info = await pool.submit(
|
|
345
349
|
testPackageDirectory,
|
|
346
350
|
"database.csv",
|
|
347
351
|
);
|