@malloy-publisher/server 0.0.198-dev4 → 0.0.198-dev6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build.ts +17 -13
- package/dist/instrumentation.mjs +21 -0
- package/dist/package_load_worker.mjs +12213 -0
- package/dist/server.mjs +2026 -2622
- package/package.json +2 -3
- package/src/health.ts +5 -3
- package/src/instrumentation.ts +50 -0
- package/src/package_load/package_load_pool.spec.ts +252 -0
- package/src/package_load/package_load_pool.ts +920 -0
- package/src/{compile/compile_worker.ts → package_load/package_load_worker.ts} +505 -246
- package/src/package_load/protocol.ts +336 -0
- package/src/server.ts +12 -0
- package/src/service/environment_store.ts +24 -3
- package/src/service/given.ts +80 -0
- package/src/service/model.ts +255 -291
- package/src/service/package.spec.ts +10 -0
- package/src/service/package.ts +268 -259
- package/src/service/package_worker_path.spec.ts +196 -0
- package/dist/compile_worker.mjs +0 -633
- package/src/compile/compile_pool.spec.ts +0 -292
- package/src/compile/compile_pool.ts +0 -796
- package/src/compile/protocol.ts +0 -270
- package/src/service/model_worker_path.spec.ts +0 -133
package/src/service/package.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import * as fs from "fs/promises";
|
|
2
2
|
import * as path from "path";
|
|
3
3
|
|
|
4
|
-
import { DuckDBConnection } from "@malloydata/db-duckdb";
|
|
5
4
|
import "@malloydata/db-duckdb/native";
|
|
6
5
|
import {
|
|
7
6
|
Connection,
|
|
@@ -10,19 +9,24 @@ import {
|
|
|
10
9
|
EmptyURLReader,
|
|
11
10
|
FixedConnectionMap,
|
|
12
11
|
MalloyConfig,
|
|
12
|
+
MalloyError,
|
|
13
13
|
SourceDef,
|
|
14
14
|
} from "@malloydata/malloy";
|
|
15
15
|
import { metrics } from "@opentelemetry/api";
|
|
16
16
|
import recursive from "recursive-readdir";
|
|
17
17
|
import { components } from "../api";
|
|
18
|
-
import {
|
|
18
|
+
import { getPackageLoadPool } from "../package_load/package_load_pool";
|
|
19
19
|
import {
|
|
20
20
|
API_PREFIX,
|
|
21
21
|
MODEL_FILE_SUFFIX,
|
|
22
22
|
NOTEBOOK_FILE_SUFFIX,
|
|
23
23
|
PACKAGE_MANIFEST_NAME,
|
|
24
24
|
} from "../constants";
|
|
25
|
-
import {
|
|
25
|
+
import {
|
|
26
|
+
ModelCompilationError,
|
|
27
|
+
PackageNotFoundError,
|
|
28
|
+
ServiceUnavailableError,
|
|
29
|
+
} from "../errors";
|
|
26
30
|
import { formatDuration, logger } from "../logger";
|
|
27
31
|
import { BuildManifest } from "../storage/DatabaseInterface";
|
|
28
32
|
import { ignoreDotfiles } from "../utils";
|
|
@@ -95,23 +99,12 @@ export class Package {
|
|
|
95
99
|
});
|
|
96
100
|
|
|
97
101
|
try {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
),
|
|
105
|
-
});
|
|
106
|
-
packageConfig.resource = `${API_PREFIX}/environments/${environmentName}/packages/${packageName}`;
|
|
107
|
-
|
|
108
|
-
const databases = await Package.readDatabases(packagePath);
|
|
109
|
-
const databasesTime = performance.now();
|
|
110
|
-
logger.info("Databases read completed", {
|
|
111
|
-
packageName,
|
|
112
|
-
databaseCount: databases.length,
|
|
113
|
-
duration: formatDuration(databasesTime - packageConfigTime),
|
|
114
|
-
});
|
|
102
|
+
// The MalloyConfig is always built on the main thread — it
|
|
103
|
+
// owns the live native connection handles the package needs
|
|
104
|
+
// to *serve queries* after load (workers can't share native
|
|
105
|
+
// handles across the V8 isolate boundary). The worker proxies
|
|
106
|
+
// non-duckdb connection lookups back through this MalloyConfig
|
|
107
|
+
// during compile.
|
|
115
108
|
const malloyConfig = Package.buildPackageMalloyConfig(
|
|
116
109
|
packagePath,
|
|
117
110
|
typeof environmentMalloyConfig === "function"
|
|
@@ -119,68 +112,29 @@ export class Package {
|
|
|
119
112
|
: () => Package.toMalloyConfig(environmentMalloyConfig),
|
|
120
113
|
);
|
|
121
114
|
|
|
122
|
-
|
|
123
|
-
packageName,
|
|
124
|
-
packagePath,
|
|
125
|
-
malloyConfig,
|
|
126
|
-
);
|
|
127
|
-
const modelsTime = performance.now();
|
|
128
|
-
logger.info("Models loaded", {
|
|
129
|
-
packageName,
|
|
130
|
-
modelCount: models.size,
|
|
131
|
-
duration: formatDuration(modelsTime - databasesTime),
|
|
132
|
-
});
|
|
133
|
-
for (const [modelPath, model] of models.entries()) {
|
|
134
|
-
const maybeModel = model as unknown as {
|
|
135
|
-
compilationError?: unknown;
|
|
136
|
-
};
|
|
137
|
-
if (maybeModel.compilationError) {
|
|
138
|
-
const err = maybeModel.compilationError;
|
|
139
|
-
const message =
|
|
140
|
-
err instanceof Error
|
|
141
|
-
? err.message
|
|
142
|
-
: `Unknown compilation error in ${modelPath}`;
|
|
143
|
-
|
|
144
|
-
logger.error("Model compilation failed", {
|
|
145
|
-
packageName,
|
|
146
|
-
modelPath,
|
|
147
|
-
error: message,
|
|
148
|
-
});
|
|
149
|
-
|
|
150
|
-
this.packageLoadHistogram.record(performance.now() - startTime, {
|
|
151
|
-
malloy_package_name: packageName,
|
|
152
|
-
status: "compilation_error",
|
|
153
|
-
});
|
|
154
|
-
throw err;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
const endTime = performance.now();
|
|
158
|
-
const executionTime = endTime - startTime;
|
|
159
|
-
this.packageLoadHistogram.record(executionTime, {
|
|
160
|
-
malloy_package_name: packageName,
|
|
161
|
-
status: "success",
|
|
162
|
-
});
|
|
163
|
-
logger.info(`Successfully loaded package ${packageName}`, {
|
|
164
|
-
packageName,
|
|
165
|
-
duration: formatDuration(executionTime),
|
|
166
|
-
});
|
|
167
|
-
return new Package(
|
|
115
|
+
return await Package.loadViaWorker(
|
|
168
116
|
environmentName,
|
|
169
117
|
packageName,
|
|
170
118
|
packagePath,
|
|
171
|
-
packageConfig,
|
|
172
|
-
databases,
|
|
173
|
-
models,
|
|
174
119
|
malloyConfig,
|
|
120
|
+
startTime,
|
|
121
|
+
manifestValidationTime,
|
|
175
122
|
);
|
|
176
123
|
} catch (error) {
|
|
177
124
|
logger.error(`Error loading package ${packageName}`, { error });
|
|
178
125
|
console.error(error);
|
|
179
126
|
const endTime = performance.now();
|
|
180
127
|
const executionTime = endTime - startTime;
|
|
128
|
+
const status =
|
|
129
|
+
error instanceof ModelCompilationError ||
|
|
130
|
+
error instanceof MalloyError
|
|
131
|
+
? "compilation_error"
|
|
132
|
+
: error instanceof ServiceUnavailableError
|
|
133
|
+
? "pool_unavailable"
|
|
134
|
+
: "error";
|
|
181
135
|
this.packageLoadHistogram.record(executionTime, {
|
|
182
136
|
malloy_package_name: packageName,
|
|
183
|
-
status
|
|
137
|
+
status,
|
|
184
138
|
});
|
|
185
139
|
// Clean up package directory on failure
|
|
186
140
|
try {
|
|
@@ -198,6 +152,141 @@ export class Package {
|
|
|
198
152
|
}
|
|
199
153
|
}
|
|
200
154
|
|
|
155
|
+
/**
|
|
156
|
+
* Load the package via the package-load worker pool. The worker
|
|
157
|
+
* performs the CPU-bound bulk of the load off-thread (manifest
|
|
158
|
+
* read, every `.malloy` / `.malloynb` compile) and ships back a
|
|
159
|
+
* structured-clonable `LoadPackageOutcome`. Database probes
|
|
160
|
+
* (`.parquet` / `.csv`) run on the main thread, in parallel with
|
|
161
|
+
* the worker compile, against the package's existing DuckDB
|
|
162
|
+
* connection — they're async-IO-bound and don't compete with the
|
|
163
|
+
* worker for CPU.
|
|
164
|
+
*
|
|
165
|
+
* Pool-infrastructure failures (worker crash, RPC timeout, pool
|
|
166
|
+
* shutting down) are rewrapped as `ServiceUnavailableError` so
|
|
167
|
+
* the HTTP layer responds 503 (transient, retryable). Real compile
|
|
168
|
+
* errors (`MalloyError` / `ModelCompilationError`) propagate
|
|
169
|
+
* unchanged so they keep their 4xx mapping.
|
|
170
|
+
*/
|
|
171
|
+
private static async loadViaWorker(
|
|
172
|
+
environmentName: string,
|
|
173
|
+
packageName: string,
|
|
174
|
+
packagePath: string,
|
|
175
|
+
malloyConfig: MalloyConfig,
|
|
176
|
+
startTime: number,
|
|
177
|
+
manifestValidationTime: number,
|
|
178
|
+
): Promise<Package> {
|
|
179
|
+
const pool = getPackageLoadPool();
|
|
180
|
+
const dispatchTime = performance.now();
|
|
181
|
+
// Submit the worker job and run database probing on the main
|
|
182
|
+
// thread in parallel. We isolate the worker-job promise inside
|
|
183
|
+
// a wrapper so we can map pool-infrastructure failures (worker
|
|
184
|
+
// crash, RPC timeout, pool shutting down) to a 503 without
|
|
185
|
+
// accidentally re-mapping `readDatabases`'s own errors.
|
|
186
|
+
const workerOutcome = pool
|
|
187
|
+
.loadPackage({
|
|
188
|
+
packagePath,
|
|
189
|
+
packageName,
|
|
190
|
+
malloyConfig,
|
|
191
|
+
defaultConnectionName: "duckdb",
|
|
192
|
+
})
|
|
193
|
+
.catch((err: unknown) => {
|
|
194
|
+
// Compile errors surface in-band via
|
|
195
|
+
// `LoadPackageOutcome.models[i].compilationError`; if the
|
|
196
|
+
// pool itself rejects, it's an infra-side failure
|
|
197
|
+
// (shutting down, worker spawn failed, worker crashed,
|
|
198
|
+
// RPC timeout) and the client should retry. Real Malloy
|
|
199
|
+
// compile errors deserialised by the pool still carry
|
|
200
|
+
// their MalloyError / ModelCompilationError identity —
|
|
201
|
+
// let those bubble untouched so they keep their 4xx
|
|
202
|
+
// mapping in `errors.ts`.
|
|
203
|
+
const realError =
|
|
204
|
+
err instanceof Error
|
|
205
|
+
? err
|
|
206
|
+
: new Error(
|
|
207
|
+
`Package-load worker pool failure: ${String(err)}`,
|
|
208
|
+
);
|
|
209
|
+
if (
|
|
210
|
+
realError instanceof MalloyError ||
|
|
211
|
+
realError instanceof ModelCompilationError
|
|
212
|
+
) {
|
|
213
|
+
throw realError;
|
|
214
|
+
}
|
|
215
|
+
throw new ServiceUnavailableError(
|
|
216
|
+
`Package-load worker pool unavailable: ${realError.message}`,
|
|
217
|
+
);
|
|
218
|
+
});
|
|
219
|
+
const [outcome, databases] = await Promise.all([
|
|
220
|
+
workerOutcome,
|
|
221
|
+
Package.readDatabases(packagePath, malloyConfig),
|
|
222
|
+
]);
|
|
223
|
+
const workerDoneTime = performance.now();
|
|
224
|
+
logger.info("Package load via worker pool completed", {
|
|
225
|
+
packageName,
|
|
226
|
+
manifestValidationMs: dispatchTime - manifestValidationTime,
|
|
227
|
+
workerDurationMs: outcome.loadDurationMs,
|
|
228
|
+
dispatchOverheadMs:
|
|
229
|
+
workerDoneTime - dispatchTime - outcome.loadDurationMs,
|
|
230
|
+
modelCount: outcome.models.length,
|
|
231
|
+
databaseCount: databases.length,
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
// Override the manifest-derived resource URI — the worker only
|
|
235
|
+
// returns name/description from publisher.json, but the rest of
|
|
236
|
+
// the API surface expects a `resource` field too.
|
|
237
|
+
const packageConfig: ApiPackage = {
|
|
238
|
+
name: outcome.packageMetadata.name,
|
|
239
|
+
description: outcome.packageMetadata.description,
|
|
240
|
+
resource: `${API_PREFIX}/environments/${environmentName}/packages/${packageName}`,
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
// Build live `Model`s from worker output. Any per-model compile
|
|
244
|
+
// failure aborts the load — matches the historical behaviour of
|
|
245
|
+
// `Package.create` failing the whole package on the first model
|
|
246
|
+
// error. (`Package.reloadAllModels` keeps the failed-model
|
|
247
|
+
// placeholders instead; that branch goes through a different
|
|
248
|
+
// hydration path.)
|
|
249
|
+
const models = new Map<string, Model>();
|
|
250
|
+
for (const sm of outcome.models) {
|
|
251
|
+
if (sm.compilationError) {
|
|
252
|
+
const err = Model.deserializeCompilationError(sm.compilationError);
|
|
253
|
+
logger.error("Model compilation failed", {
|
|
254
|
+
packageName,
|
|
255
|
+
modelPath: sm.modelPath,
|
|
256
|
+
error: err.message,
|
|
257
|
+
});
|
|
258
|
+
// The outer catch in Package.create records the metric +
|
|
259
|
+
// cleans the package directory.
|
|
260
|
+
throw err;
|
|
261
|
+
}
|
|
262
|
+
models.set(
|
|
263
|
+
sm.modelPath,
|
|
264
|
+
Model.fromSerialized(packageName, packagePath, malloyConfig, sm),
|
|
265
|
+
);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
const endTime = performance.now();
|
|
269
|
+
const executionTime = endTime - startTime;
|
|
270
|
+
this.packageLoadHistogram.record(executionTime, {
|
|
271
|
+
malloy_package_name: packageName,
|
|
272
|
+
status: "success",
|
|
273
|
+
});
|
|
274
|
+
logger.info(`Successfully loaded package ${packageName}`, {
|
|
275
|
+
packageName,
|
|
276
|
+
duration: formatDuration(executionTime),
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
return new Package(
|
|
280
|
+
environmentName,
|
|
281
|
+
packageName,
|
|
282
|
+
packagePath,
|
|
283
|
+
packageConfig,
|
|
284
|
+
databases,
|
|
285
|
+
models,
|
|
286
|
+
malloyConfig,
|
|
287
|
+
);
|
|
288
|
+
}
|
|
289
|
+
|
|
201
290
|
public getPackageName(): string {
|
|
202
291
|
return this.packageName;
|
|
203
292
|
}
|
|
@@ -232,6 +321,21 @@ export class Package {
|
|
|
232
321
|
return Array.from(this.models.keys());
|
|
233
322
|
}
|
|
234
323
|
|
|
324
|
+
/**
|
|
325
|
+
* Re-compile every model in the package against a new build
|
|
326
|
+
* manifest (called after a materialization build commits new
|
|
327
|
+
* physicalised tables). Runs through the package-load worker pool
|
|
328
|
+
* — same off-main-thread compile path as initial `Package.create`
|
|
329
|
+
* — so a reload of a large package can't block the K8s liveness
|
|
330
|
+
* probe.
|
|
331
|
+
*
|
|
332
|
+
* Unlike `Package.create`, a per-model compile failure here does
|
|
333
|
+
* NOT abort the reload: we keep the failed model as a placeholder
|
|
334
|
+
* (`Model.fromCompilationError`) in `this.models`, matching the
|
|
335
|
+
* historical reload semantics. Whole-pool failures (worker crash,
|
|
336
|
+
* timeout, pool shutting down) propagate as `ServiceUnavailableError`
|
|
337
|
+
* — the caller (manifest service) decides how to retry.
|
|
338
|
+
*/
|
|
235
339
|
public async reloadAllModels(
|
|
236
340
|
buildManifest: BuildManifest["entries"],
|
|
237
341
|
): Promise<void> {
|
|
@@ -241,20 +345,62 @@ export class Package {
|
|
|
241
345
|
modelCount: modelPaths.length,
|
|
242
346
|
manifestEntryCount: Object.keys(buildManifest).length,
|
|
243
347
|
});
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
348
|
+
|
|
349
|
+
const pool = getPackageLoadPool();
|
|
350
|
+
let outcome;
|
|
351
|
+
try {
|
|
352
|
+
outcome = await pool.loadPackage({
|
|
353
|
+
packagePath: this.packagePath,
|
|
354
|
+
packageName: this.packageName,
|
|
355
|
+
malloyConfig: this.malloyConfig,
|
|
356
|
+
defaultConnectionName: "duckdb",
|
|
357
|
+
buildManifest,
|
|
358
|
+
});
|
|
359
|
+
} catch (err) {
|
|
360
|
+
const realError =
|
|
361
|
+
err instanceof Error
|
|
362
|
+
? err
|
|
363
|
+
: new Error(`Package-load worker pool failure: ${String(err)}`);
|
|
364
|
+
if (
|
|
365
|
+
realError instanceof MalloyError ||
|
|
366
|
+
realError instanceof ModelCompilationError
|
|
367
|
+
) {
|
|
368
|
+
throw realError;
|
|
369
|
+
}
|
|
370
|
+
throw new ServiceUnavailableError(
|
|
371
|
+
`Package-load worker pool unavailable: ${realError.message}`,
|
|
372
|
+
);
|
|
373
|
+
}
|
|
374
|
+
|
|
255
375
|
const nextModels = new Map<string, Model>();
|
|
256
|
-
for (const
|
|
257
|
-
|
|
376
|
+
for (const sm of outcome.models) {
|
|
377
|
+
if (sm.compilationError) {
|
|
378
|
+
const err = Model.deserializeCompilationError(sm.compilationError);
|
|
379
|
+
logger.warn("Model compilation failed during reload", {
|
|
380
|
+
packageName: this.packageName,
|
|
381
|
+
modelPath: sm.modelPath,
|
|
382
|
+
error: err.message,
|
|
383
|
+
});
|
|
384
|
+
nextModels.set(
|
|
385
|
+
sm.modelPath,
|
|
386
|
+
Model.fromCompilationError(
|
|
387
|
+
this.packageName,
|
|
388
|
+
sm.modelPath,
|
|
389
|
+
sm.modelType,
|
|
390
|
+
err,
|
|
391
|
+
),
|
|
392
|
+
);
|
|
393
|
+
} else {
|
|
394
|
+
nextModels.set(
|
|
395
|
+
sm.modelPath,
|
|
396
|
+
Model.fromSerialized(
|
|
397
|
+
this.packageName,
|
|
398
|
+
this.packagePath,
|
|
399
|
+
this.malloyConfig,
|
|
400
|
+
sm,
|
|
401
|
+
),
|
|
402
|
+
);
|
|
403
|
+
}
|
|
258
404
|
}
|
|
259
405
|
this.models = nextModels;
|
|
260
406
|
}
|
|
@@ -317,20 +463,6 @@ export class Package {
|
|
|
317
463
|
);
|
|
318
464
|
}
|
|
319
465
|
|
|
320
|
-
private static async loadModels(
|
|
321
|
-
packageName: string,
|
|
322
|
-
packagePath: string,
|
|
323
|
-
malloyConfig: MalloyConfig,
|
|
324
|
-
): Promise<Map<string, Model>> {
|
|
325
|
-
const modelPaths = await Package.getModelPaths(packagePath);
|
|
326
|
-
const models = await Promise.all(
|
|
327
|
-
modelPaths.map((modelPath) =>
|
|
328
|
-
Model.create(packageName, packagePath, modelPath, malloyConfig),
|
|
329
|
-
),
|
|
330
|
-
);
|
|
331
|
-
return new Map(models.map((model) => [model.getPath(), model]));
|
|
332
|
-
}
|
|
333
|
-
|
|
334
466
|
private static buildPackageMalloyConfig(
|
|
335
467
|
packagePath: string,
|
|
336
468
|
getEnvironmentMalloyConfig: () => MalloyConfig,
|
|
@@ -380,27 +512,6 @@ export class Package {
|
|
|
380
512
|
return malloyConfig;
|
|
381
513
|
}
|
|
382
514
|
|
|
383
|
-
private static async getModelPaths(packagePath: string): Promise<string[]> {
|
|
384
|
-
let files = undefined;
|
|
385
|
-
try {
|
|
386
|
-
files = await recursive(packagePath, [ignoreDotfiles]);
|
|
387
|
-
} catch (error) {
|
|
388
|
-
logger.error(error);
|
|
389
|
-
throw new PackageNotFoundError(
|
|
390
|
-
`Package config for ${packagePath} does not exist.`,
|
|
391
|
-
);
|
|
392
|
-
}
|
|
393
|
-
return files
|
|
394
|
-
.map((fullPath: string) => {
|
|
395
|
-
return path.relative(packagePath, fullPath).replace(/\\/g, "/");
|
|
396
|
-
})
|
|
397
|
-
.filter(
|
|
398
|
-
(modelPath: string) =>
|
|
399
|
-
modelPath.endsWith(MODEL_FILE_SUFFIX) ||
|
|
400
|
-
modelPath.endsWith(NOTEBOOK_FILE_SUFFIX),
|
|
401
|
-
);
|
|
402
|
-
}
|
|
403
|
-
|
|
404
515
|
private static async validatePackageManifestExistsOrThrowError(
|
|
405
516
|
packagePath: string,
|
|
406
517
|
) {
|
|
@@ -415,37 +526,32 @@ export class Package {
|
|
|
415
526
|
}
|
|
416
527
|
}
|
|
417
528
|
|
|
418
|
-
private static async readPackageConfig(
|
|
419
|
-
packagePath: string,
|
|
420
|
-
): Promise<ApiPackage> {
|
|
421
|
-
const packageConfigPath = path.join(packagePath, PACKAGE_MANIFEST_NAME);
|
|
422
|
-
const packageConfigContents = await fs.readFile(packageConfigPath);
|
|
423
|
-
// TODO: Validate package manifest. Define manifest type in public API.
|
|
424
|
-
const packageManifest = JSON.parse(packageConfigContents.toString());
|
|
425
|
-
return {
|
|
426
|
-
name: packageManifest.name,
|
|
427
|
-
description: packageManifest.description,
|
|
428
|
-
};
|
|
429
|
-
}
|
|
430
|
-
|
|
431
529
|
private static async readDatabases(
|
|
432
530
|
packagePath: string,
|
|
531
|
+
malloyConfig: MalloyConfig,
|
|
433
532
|
): Promise<ApiDatabase[]> {
|
|
533
|
+
const databasePaths = await Package.getDatabasePaths(packagePath);
|
|
534
|
+
if (databasePaths.length === 0) {
|
|
535
|
+
return [];
|
|
536
|
+
}
|
|
537
|
+
// Resolve the package's duckdb connection ONCE and reuse it for
|
|
538
|
+
// every schema/row-count probe in this package. Malloy caches the
|
|
539
|
+
// materialized connection on the MalloyConfig so the same instance
|
|
540
|
+
// will be returned to model compiles later in `Package.create`.
|
|
541
|
+
// This is the substantive optimization over the previous code:
|
|
542
|
+
// we go from `databasePaths.length` separate DuckDBConnections
|
|
543
|
+
// (each doing its own native init + extension load) to one.
|
|
544
|
+
const conn = await malloyConfig.connections.lookupConnection("duckdb");
|
|
434
545
|
return await Promise.all(
|
|
435
|
-
(
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
info: databaseInfo,
|
|
445
|
-
type: "embedded",
|
|
446
|
-
};
|
|
447
|
-
},
|
|
448
|
-
),
|
|
546
|
+
databasePaths.map(async (databasePath) => ({
|
|
547
|
+
path: databasePath,
|
|
548
|
+
info: await Package.getDatabaseInfo(
|
|
549
|
+
packagePath,
|
|
550
|
+
databasePath,
|
|
551
|
+
conn,
|
|
552
|
+
),
|
|
553
|
+
type: "embedded" as const,
|
|
554
|
+
})),
|
|
449
555
|
);
|
|
450
556
|
}
|
|
451
557
|
|
|
@@ -466,135 +572,38 @@ export class Package {
|
|
|
466
572
|
private static async getDatabaseInfo(
|
|
467
573
|
packagePath: string,
|
|
468
574
|
databasePath: string,
|
|
575
|
+
conn: Connection,
|
|
469
576
|
): Promise<ApiTableDescription> {
|
|
470
577
|
const fullPath = path.join(packagePath, databasePath);
|
|
471
|
-
// Normalize path to use forward slashes for cross-platform compatibility.
|
|
472
|
-
// DuckDB on Windows supports forward slashes, which avoids escaping issues.
|
|
473
|
-
const normalizedPath = fullPath.replace(/\\/g, "/");
|
|
474
|
-
|
|
475
|
-
// One DuckDB connection per file (matches the historical
|
|
476
|
-
// ConnectionRuntime shape). Reused for both the schema probe and
|
|
477
|
-
// the row-count SQL so we only pay native init once per call.
|
|
478
|
-
const conn = new DuckDBConnection("duckdb");
|
|
479
|
-
|
|
480
|
-
// Schema probe. We need Malloy's view of the column types
|
|
481
|
-
// (consumers of the API rely on Malloy type strings, not DuckDB
|
|
482
|
-
// native types), so this stays a Malloy compile. The compile is
|
|
483
|
-
// CPU-heavy on the main thread relative to the work it produces,
|
|
484
|
-
// so when the worker pool is enabled we ship the synthetic
|
|
485
|
-
// `source: temp is duckdb.table(…)` snippet to a worker and read
|
|
486
|
-
// the resulting modelDef back here. Schema-fetch RPCs from the
|
|
487
|
-
// worker proxy through the pool against `conn` below.
|
|
488
|
-
const pool = getCompilePool();
|
|
489
|
-
let schema: ApiColumn[];
|
|
490
|
-
if (pool.enabled) {
|
|
491
|
-
schema = await Package.getSchemaViaPool(
|
|
492
|
-
pool,
|
|
493
|
-
packagePath,
|
|
494
|
-
normalizedPath,
|
|
495
|
-
conn,
|
|
496
|
-
);
|
|
497
|
-
} else {
|
|
498
|
-
schema = await Package.getSchemaInProcess(normalizedPath, conn);
|
|
499
|
-
}
|
|
500
578
|
|
|
501
|
-
//
|
|
502
|
-
//
|
|
503
|
-
//
|
|
504
|
-
//
|
|
505
|
-
//
|
|
506
|
-
//
|
|
507
|
-
//
|
|
508
|
-
const escapedPath = normalizedPath.replace(/'/g, "''");
|
|
509
|
-
const sqlResult = await conn.runSQL(
|
|
510
|
-
`SELECT count(*)::BIGINT AS row_count FROM '${escapedPath}'`,
|
|
511
|
-
);
|
|
512
|
-
const firstRow = sqlResult.rows[0] as { row_count?: bigint | number };
|
|
513
|
-
const rowCount = Number(firstRow.row_count ?? 0);
|
|
514
|
-
|
|
515
|
-
return { name: databasePath, rowCount, columns: schema };
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
/**
|
|
519
|
-
* In-process schema probe (legacy / kill-switch path). Builds a
|
|
520
|
-
* minimal ConnectionRuntime + compiles the synthetic snippet on the
|
|
521
|
-
* main thread. Same behaviour as the pre-worker-pool implementation.
|
|
522
|
-
*/
|
|
523
|
-
private static async getSchemaInProcess(
|
|
524
|
-
normalizedPath: string,
|
|
525
|
-
conn: DuckDBConnection,
|
|
526
|
-
): Promise<ApiColumn[]> {
|
|
579
|
+
// Create a DuckDB source then:
|
|
580
|
+
// 1. Load the model and get the table schema from model
|
|
581
|
+
// 2. Run a query to get the row count from the table
|
|
582
|
+
// ConnectionRuntime is cheap (just a wrapper), and creating one
|
|
583
|
+
// per call keeps each probe's compile state isolated. The
|
|
584
|
+
// expensive piece — the underlying DuckDBConnection — is shared
|
|
585
|
+
// across all probes via `conn` (resolved once in readDatabases).
|
|
527
586
|
const runtime = new ConnectionRuntime({
|
|
528
587
|
urlReader: new EmptyURLReader(),
|
|
529
588
|
connections: [conn],
|
|
530
589
|
});
|
|
590
|
+
// Normalize path to use forward slashes for cross-platform compatibility
|
|
591
|
+
// DuckDB on Windows supports forward slashes, and this avoids escaping issues
|
|
592
|
+
const normalizedPath = fullPath.replace(/\\/g, "/");
|
|
531
593
|
const model = runtime.loadModel(
|
|
532
594
|
`source: temp is duckdb.table('${normalizedPath}')`,
|
|
533
595
|
);
|
|
534
596
|
const modelDef = await model.getModel();
|
|
535
597
|
const fields = (modelDef._modelDef.contents["temp"] as SourceDef).fields;
|
|
536
|
-
|
|
598
|
+
const schema = fields.map((field): ApiColumn => {
|
|
537
599
|
return { type: field.type, name: field.name };
|
|
538
600
|
});
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
/**
|
|
542
|
-
* Worker-pool schema probe. The synthetic Malloy snippet is compiled
|
|
543
|
-
* in a worker_threads worker; the worker's schema-fetch RPC bounces
|
|
544
|
-
* back to the main thread, which services it against the
|
|
545
|
-
* `MalloyConfig` we hold on `conn` below. The returned modelDef has
|
|
546
|
-
* the resolved field list ready to read.
|
|
547
|
-
*/
|
|
548
|
-
private static async getSchemaViaPool(
|
|
549
|
-
pool: ReturnType<typeof getCompilePool>,
|
|
550
|
-
packagePath: string,
|
|
551
|
-
normalizedPath: string,
|
|
552
|
-
conn: DuckDBConnection,
|
|
553
|
-
): Promise<ApiColumn[]> {
|
|
554
|
-
// Keep schema-fetch RPCs from the worker routed to *this* conn
|
|
555
|
-
// (same instance used by the row-count SQL) by wrapping it in a
|
|
556
|
-
// MalloyConfig. We don't ship this config across the worker
|
|
557
|
-
// boundary; the pool holds it on the main side.
|
|
558
|
-
const malloyConfig = new MalloyConfig(
|
|
559
|
-
{ connections: {} },
|
|
560
|
-
{ config: contextOverlay({ rootDirectory: packagePath }) },
|
|
601
|
+
const runner = model.loadQuery(
|
|
602
|
+
"run: temp->{aggregate: row_count is count()}",
|
|
561
603
|
);
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
}));
|
|
566
|
-
|
|
567
|
-
try {
|
|
568
|
-
const outcome = await pool.compileInline({
|
|
569
|
-
packagePath,
|
|
570
|
-
source: `source: temp is duckdb.table('${normalizedPath}')`,
|
|
571
|
-
malloyConfig,
|
|
572
|
-
defaultConnectionName: "duckdb",
|
|
573
|
-
});
|
|
574
|
-
const modelDef = outcome.modelDef as unknown as {
|
|
575
|
-
contents: Record<string, SourceDef>;
|
|
576
|
-
};
|
|
577
|
-
const fields = modelDef.contents["temp"].fields;
|
|
578
|
-
return fields.map((field): ApiColumn => {
|
|
579
|
-
return { type: field.type, name: field.name };
|
|
580
|
-
});
|
|
581
|
-
} catch (error) {
|
|
582
|
-
// Transient pool issues (worker exit, RPC timeout) shouldn't
|
|
583
|
-
// break package loading. Compile errors here would mean the
|
|
584
|
-
// file isn't a readable parquet/csv, which in-process would
|
|
585
|
-
// also throw — so let those propagate.
|
|
586
|
-
if (
|
|
587
|
-
error instanceof Error &&
|
|
588
|
-
!/timed out|exited unexpectedly|shutting down/i.test(error.message)
|
|
589
|
-
) {
|
|
590
|
-
throw error;
|
|
591
|
-
}
|
|
592
|
-
logger.warn(
|
|
593
|
-
"Compile worker failed for database probe; falling back to in-process",
|
|
594
|
-
{ normalizedPath, error: (error as Error).message },
|
|
595
|
-
);
|
|
596
|
-
return Package.getSchemaInProcess(normalizedPath, conn);
|
|
597
|
-
}
|
|
604
|
+
const result = await runner.run();
|
|
605
|
+
const rowCount = result.data.value[0].row_count?.valueOf() as number;
|
|
606
|
+
return { name: databasePath, rowCount, columns: schema };
|
|
598
607
|
}
|
|
599
608
|
|
|
600
609
|
public setName(name: string) {
|