@malloy-publisher/server 0.0.198-dev3 → 0.0.198-dev6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build.ts +26 -12
- package/dist/instrumentation.mjs +37 -37
- package/dist/package_load_worker.mjs +12213 -0
- package/dist/server.mjs +1091 -706
- package/package.json +1 -1
- package/src/health.ts +15 -0
- package/src/package_load/package_load_pool.spec.ts +252 -0
- package/src/package_load/package_load_pool.ts +920 -0
- package/src/package_load/package_load_worker.ts +980 -0
- package/src/package_load/protocol.ts +336 -0
- package/src/server.ts +12 -5
- package/src/service/environment_store.ts +0 -9
- package/src/service/given.ts +80 -0
- package/src/service/model.ts +253 -66
- package/src/service/package.spec.ts +17 -11
- package/src/service/package.ts +294 -178
- package/src/service/package_worker_path.spec.ts +196 -0
- package/dist/service/schema_worker.mjs +0 -61
- package/src/service/process_stats_reporter.ts +0 -169
- package/src/service/schema_worker.ts +0 -123
- package/src/service/schema_worker_pool.ts +0 -287
- package/tests/integration/concurrent_environment/concurrent_environment.integration.spec.ts +0 -235
package/src/service/package.ts
CHANGED
|
@@ -1,32 +1,43 @@
|
|
|
1
1
|
import * as fs from "fs/promises";
|
|
2
2
|
import * as path from "path";
|
|
3
3
|
|
|
4
|
+
import "@malloydata/db-duckdb/native";
|
|
4
5
|
import {
|
|
5
6
|
Connection,
|
|
7
|
+
ConnectionRuntime,
|
|
6
8
|
contextOverlay,
|
|
9
|
+
EmptyURLReader,
|
|
7
10
|
FixedConnectionMap,
|
|
8
11
|
MalloyConfig,
|
|
12
|
+
MalloyError,
|
|
13
|
+
SourceDef,
|
|
9
14
|
} from "@malloydata/malloy";
|
|
10
15
|
import { metrics } from "@opentelemetry/api";
|
|
11
16
|
import recursive from "recursive-readdir";
|
|
12
17
|
import { components } from "../api";
|
|
18
|
+
import { getPackageLoadPool } from "../package_load/package_load_pool";
|
|
13
19
|
import {
|
|
14
20
|
API_PREFIX,
|
|
15
21
|
MODEL_FILE_SUFFIX,
|
|
16
22
|
NOTEBOOK_FILE_SUFFIX,
|
|
17
23
|
PACKAGE_MANIFEST_NAME,
|
|
18
24
|
} from "../constants";
|
|
19
|
-
import {
|
|
25
|
+
import {
|
|
26
|
+
ModelCompilationError,
|
|
27
|
+
PackageNotFoundError,
|
|
28
|
+
ServiceUnavailableError,
|
|
29
|
+
} from "../errors";
|
|
20
30
|
import { formatDuration, logger } from "../logger";
|
|
21
31
|
import { BuildManifest } from "../storage/DatabaseInterface";
|
|
22
32
|
import { ignoreDotfiles } from "../utils";
|
|
23
33
|
import { Model } from "./model";
|
|
24
|
-
import { getSchemaWorkerPool } from "./schema_worker_pool";
|
|
25
34
|
|
|
26
35
|
type ApiDatabase = components["schemas"]["Database"];
|
|
27
36
|
type ApiModel = components["schemas"]["Model"];
|
|
28
37
|
type ApiNotebook = components["schemas"]["Notebook"];
|
|
29
38
|
export type ApiPackage = components["schemas"]["Package"];
|
|
39
|
+
type ApiColumn = components["schemas"]["Column"];
|
|
40
|
+
type ApiTableDescription = components["schemas"]["TableDescription"];
|
|
30
41
|
// A thunk lets callers pass a live reference to the *current* environment
|
|
31
42
|
// MalloyConfig so the package wrapper resolves environment connections against the
|
|
32
43
|
// generation that's active at lookup time, not the one that was current when
|
|
@@ -87,26 +98,13 @@ export class Package {
|
|
|
87
98
|
duration: formatDuration(manifestValidationTime - startTime),
|
|
88
99
|
});
|
|
89
100
|
|
|
90
|
-
let packageMalloyConfig: MalloyConfig | undefined;
|
|
91
|
-
|
|
92
101
|
try {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
),
|
|
100
|
-
});
|
|
101
|
-
packageConfig.resource = `${API_PREFIX}/environments/${environmentName}/packages/${packageName}`;
|
|
102
|
-
|
|
103
|
-
const databases = await Package.readDatabases(packagePath);
|
|
104
|
-
const databasesTime = performance.now();
|
|
105
|
-
logger.info("Databases read completed", {
|
|
106
|
-
packageName,
|
|
107
|
-
databaseCount: databases.length,
|
|
108
|
-
duration: formatDuration(databasesTime - packageConfigTime),
|
|
109
|
-
});
|
|
102
|
+
// The MalloyConfig is always built on the main thread — it
|
|
103
|
+
// owns the live native connection handles the package needs
|
|
104
|
+
// to *serve queries* after load (workers can't share native
|
|
105
|
+
// handles across the V8 isolate boundary). The worker proxies
|
|
106
|
+
// non-duckdb connection lookups back through this MalloyConfig
|
|
107
|
+
// during compile.
|
|
110
108
|
const malloyConfig = Package.buildPackageMalloyConfig(
|
|
111
109
|
packagePath,
|
|
112
110
|
typeof environmentMalloyConfig === "function"
|
|
@@ -114,80 +112,30 @@ export class Package {
|
|
|
114
112
|
: () => Package.toMalloyConfig(environmentMalloyConfig),
|
|
115
113
|
);
|
|
116
114
|
|
|
117
|
-
|
|
118
|
-
packageName,
|
|
119
|
-
packagePath,
|
|
120
|
-
malloyConfig,
|
|
121
|
-
);
|
|
122
|
-
const modelsTime = performance.now();
|
|
123
|
-
logger.info("Models loaded", {
|
|
124
|
-
packageName,
|
|
125
|
-
modelCount: models.size,
|
|
126
|
-
duration: formatDuration(modelsTime - databasesTime),
|
|
127
|
-
});
|
|
128
|
-
for (const [modelPath, model] of models.entries()) {
|
|
129
|
-
const maybeModel = model as unknown as {
|
|
130
|
-
compilationError?: unknown;
|
|
131
|
-
};
|
|
132
|
-
if (maybeModel.compilationError) {
|
|
133
|
-
const err = maybeModel.compilationError;
|
|
134
|
-
const message =
|
|
135
|
-
err instanceof Error
|
|
136
|
-
? err.message
|
|
137
|
-
: `Unknown compilation error in ${modelPath}`;
|
|
138
|
-
|
|
139
|
-
logger.error("Model compilation failed", {
|
|
140
|
-
packageName,
|
|
141
|
-
modelPath,
|
|
142
|
-
error: message,
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
this.packageLoadHistogram.record(performance.now() - startTime, {
|
|
146
|
-
malloy_package_name: packageName,
|
|
147
|
-
status: "compilation_error",
|
|
148
|
-
});
|
|
149
|
-
throw err;
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
const endTime = performance.now();
|
|
153
|
-
const executionTime = endTime - startTime;
|
|
154
|
-
this.packageLoadHistogram.record(executionTime, {
|
|
155
|
-
malloy_package_name: packageName,
|
|
156
|
-
status: "success",
|
|
157
|
-
});
|
|
158
|
-
logger.info(`Successfully loaded package ${packageName}`, {
|
|
159
|
-
packageName,
|
|
160
|
-
duration: formatDuration(executionTime),
|
|
161
|
-
});
|
|
162
|
-
return new Package(
|
|
115
|
+
return await Package.loadViaWorker(
|
|
163
116
|
environmentName,
|
|
164
117
|
packageName,
|
|
165
118
|
packagePath,
|
|
166
|
-
packageConfig,
|
|
167
|
-
databases,
|
|
168
|
-
models,
|
|
169
119
|
malloyConfig,
|
|
120
|
+
startTime,
|
|
121
|
+
manifestValidationTime,
|
|
170
122
|
);
|
|
171
123
|
} catch (error) {
|
|
172
124
|
logger.error(`Error loading package ${packageName}`, { error });
|
|
173
125
|
console.error(error);
|
|
174
126
|
const endTime = performance.now();
|
|
175
127
|
const executionTime = endTime - startTime;
|
|
128
|
+
const status =
|
|
129
|
+
error instanceof ModelCompilationError ||
|
|
130
|
+
error instanceof MalloyError
|
|
131
|
+
? "compilation_error"
|
|
132
|
+
: error instanceof ServiceUnavailableError
|
|
133
|
+
? "pool_unavailable"
|
|
134
|
+
: "error";
|
|
176
135
|
this.packageLoadHistogram.record(executionTime, {
|
|
177
136
|
malloy_package_name: packageName,
|
|
178
|
-
status
|
|
137
|
+
status,
|
|
179
138
|
});
|
|
180
|
-
|
|
181
|
-
if (packageMalloyConfig) {
|
|
182
|
-
try {
|
|
183
|
-
await packageMalloyConfig.shutdown("close");
|
|
184
|
-
} catch (releaseError) {
|
|
185
|
-
logger.warn(
|
|
186
|
-
`Failed to release package-local DuckDB for ${packageName}`,
|
|
187
|
-
{ error: releaseError },
|
|
188
|
-
);
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
139
|
// Clean up package directory on failure
|
|
192
140
|
try {
|
|
193
141
|
await fs.rm(packagePath, {
|
|
@@ -204,6 +152,141 @@ export class Package {
|
|
|
204
152
|
}
|
|
205
153
|
}
|
|
206
154
|
|
|
155
|
+
/**
|
|
156
|
+
* Load the package via the package-load worker pool. The worker
|
|
157
|
+
* performs the CPU-bound bulk of the load off-thread (manifest
|
|
158
|
+
* read, every `.malloy` / `.malloynb` compile) and ships back a
|
|
159
|
+
* structured-clonable `LoadPackageOutcome`. Database probes
|
|
160
|
+
* (`.parquet` / `.csv`) run on the main thread, in parallel with
|
|
161
|
+
* the worker compile, against the package's existing DuckDB
|
|
162
|
+
* connection — they're async-IO-bound and don't compete with the
|
|
163
|
+
* worker for CPU.
|
|
164
|
+
*
|
|
165
|
+
* Pool-infrastructure failures (worker crash, RPC timeout, pool
|
|
166
|
+
* shutting down) are rewrapped as `ServiceUnavailableError` so
|
|
167
|
+
* the HTTP layer responds 503 (transient, retryable). Real compile
|
|
168
|
+
* errors (`MalloyError` / `ModelCompilationError`) propagate
|
|
169
|
+
* unchanged so they keep their 4xx mapping.
|
|
170
|
+
*/
|
|
171
|
+
private static async loadViaWorker(
|
|
172
|
+
environmentName: string,
|
|
173
|
+
packageName: string,
|
|
174
|
+
packagePath: string,
|
|
175
|
+
malloyConfig: MalloyConfig,
|
|
176
|
+
startTime: number,
|
|
177
|
+
manifestValidationTime: number,
|
|
178
|
+
): Promise<Package> {
|
|
179
|
+
const pool = getPackageLoadPool();
|
|
180
|
+
const dispatchTime = performance.now();
|
|
181
|
+
// Submit the worker job and run database probing on the main
|
|
182
|
+
// thread in parallel. We isolate the worker-job promise inside
|
|
183
|
+
// a wrapper so we can map pool-infrastructure failures (worker
|
|
184
|
+
// crash, RPC timeout, pool shutting down) to a 503 without
|
|
185
|
+
// accidentally re-mapping `readDatabases`'s own errors.
|
|
186
|
+
const workerOutcome = pool
|
|
187
|
+
.loadPackage({
|
|
188
|
+
packagePath,
|
|
189
|
+
packageName,
|
|
190
|
+
malloyConfig,
|
|
191
|
+
defaultConnectionName: "duckdb",
|
|
192
|
+
})
|
|
193
|
+
.catch((err: unknown) => {
|
|
194
|
+
// Compile errors surface in-band via
|
|
195
|
+
// `LoadPackageOutcome.models[i].compilationError`; if the
|
|
196
|
+
// pool itself rejects, it's an infra-side failure
|
|
197
|
+
// (shutting down, worker spawn failed, worker crashed,
|
|
198
|
+
// RPC timeout) and the client should retry. Real Malloy
|
|
199
|
+
// compile errors deserialised by the pool still carry
|
|
200
|
+
// their MalloyError / ModelCompilationError identity —
|
|
201
|
+
// let those bubble untouched so they keep their 4xx
|
|
202
|
+
// mapping in `errors.ts`.
|
|
203
|
+
const realError =
|
|
204
|
+
err instanceof Error
|
|
205
|
+
? err
|
|
206
|
+
: new Error(
|
|
207
|
+
`Package-load worker pool failure: ${String(err)}`,
|
|
208
|
+
);
|
|
209
|
+
if (
|
|
210
|
+
realError instanceof MalloyError ||
|
|
211
|
+
realError instanceof ModelCompilationError
|
|
212
|
+
) {
|
|
213
|
+
throw realError;
|
|
214
|
+
}
|
|
215
|
+
throw new ServiceUnavailableError(
|
|
216
|
+
`Package-load worker pool unavailable: ${realError.message}`,
|
|
217
|
+
);
|
|
218
|
+
});
|
|
219
|
+
const [outcome, databases] = await Promise.all([
|
|
220
|
+
workerOutcome,
|
|
221
|
+
Package.readDatabases(packagePath, malloyConfig),
|
|
222
|
+
]);
|
|
223
|
+
const workerDoneTime = performance.now();
|
|
224
|
+
logger.info("Package load via worker pool completed", {
|
|
225
|
+
packageName,
|
|
226
|
+
manifestValidationMs: dispatchTime - manifestValidationTime,
|
|
227
|
+
workerDurationMs: outcome.loadDurationMs,
|
|
228
|
+
dispatchOverheadMs:
|
|
229
|
+
workerDoneTime - dispatchTime - outcome.loadDurationMs,
|
|
230
|
+
modelCount: outcome.models.length,
|
|
231
|
+
databaseCount: databases.length,
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
// Override the manifest-derived resource URI — the worker only
|
|
235
|
+
// returns name/description from publisher.json, but the rest of
|
|
236
|
+
// the API surface expects a `resource` field too.
|
|
237
|
+
const packageConfig: ApiPackage = {
|
|
238
|
+
name: outcome.packageMetadata.name,
|
|
239
|
+
description: outcome.packageMetadata.description,
|
|
240
|
+
resource: `${API_PREFIX}/environments/${environmentName}/packages/${packageName}`,
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
// Build live `Model`s from worker output. Any per-model compile
|
|
244
|
+
// failure aborts the load — matches the historical behaviour of
|
|
245
|
+
// `Package.create` failing the whole package on the first model
|
|
246
|
+
// error. (`Package.reloadAllModels` keeps the failed-model
|
|
247
|
+
// placeholders instead; that branch goes through a different
|
|
248
|
+
// hydration path.)
|
|
249
|
+
const models = new Map<string, Model>();
|
|
250
|
+
for (const sm of outcome.models) {
|
|
251
|
+
if (sm.compilationError) {
|
|
252
|
+
const err = Model.deserializeCompilationError(sm.compilationError);
|
|
253
|
+
logger.error("Model compilation failed", {
|
|
254
|
+
packageName,
|
|
255
|
+
modelPath: sm.modelPath,
|
|
256
|
+
error: err.message,
|
|
257
|
+
});
|
|
258
|
+
// The outer catch in Package.create records the metric +
|
|
259
|
+
// cleans the package directory.
|
|
260
|
+
throw err;
|
|
261
|
+
}
|
|
262
|
+
models.set(
|
|
263
|
+
sm.modelPath,
|
|
264
|
+
Model.fromSerialized(packageName, packagePath, malloyConfig, sm),
|
|
265
|
+
);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
const endTime = performance.now();
|
|
269
|
+
const executionTime = endTime - startTime;
|
|
270
|
+
this.packageLoadHistogram.record(executionTime, {
|
|
271
|
+
malloy_package_name: packageName,
|
|
272
|
+
status: "success",
|
|
273
|
+
});
|
|
274
|
+
logger.info(`Successfully loaded package ${packageName}`, {
|
|
275
|
+
packageName,
|
|
276
|
+
duration: formatDuration(executionTime),
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
return new Package(
|
|
280
|
+
environmentName,
|
|
281
|
+
packageName,
|
|
282
|
+
packagePath,
|
|
283
|
+
packageConfig,
|
|
284
|
+
databases,
|
|
285
|
+
models,
|
|
286
|
+
malloyConfig,
|
|
287
|
+
);
|
|
288
|
+
}
|
|
289
|
+
|
|
207
290
|
public getPackageName(): string {
|
|
208
291
|
return this.packageName;
|
|
209
292
|
}
|
|
@@ -238,6 +321,21 @@ export class Package {
|
|
|
238
321
|
return Array.from(this.models.keys());
|
|
239
322
|
}
|
|
240
323
|
|
|
324
|
+
/**
|
|
325
|
+
* Re-compile every model in the package against a new build
|
|
326
|
+
* manifest (called after a materialization build commits new
|
|
327
|
+
* physicalised tables). Runs through the package-load worker pool
|
|
328
|
+
* — same off-main-thread compile path as initial `Package.create`
|
|
329
|
+
* — so a reload of a large package can't block the K8s liveness
|
|
330
|
+
* probe.
|
|
331
|
+
*
|
|
332
|
+
* Unlike `Package.create`, a per-model compile failure here does
|
|
333
|
+
* NOT abort the reload: we keep the failed model as a placeholder
|
|
334
|
+
* (`Model.fromCompilationError`) in `this.models`, matching the
|
|
335
|
+
* historical reload semantics. Whole-pool failures (worker crash,
|
|
336
|
+
* timeout, pool shutting down) propagate as `ServiceUnavailableError`
|
|
337
|
+
* — the caller (manifest service) decides how to retry.
|
|
338
|
+
*/
|
|
241
339
|
public async reloadAllModels(
|
|
242
340
|
buildManifest: BuildManifest["entries"],
|
|
243
341
|
): Promise<void> {
|
|
@@ -247,20 +345,62 @@ export class Package {
|
|
|
247
345
|
modelCount: modelPaths.length,
|
|
248
346
|
manifestEntryCount: Object.keys(buildManifest).length,
|
|
249
347
|
});
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
348
|
+
|
|
349
|
+
const pool = getPackageLoadPool();
|
|
350
|
+
let outcome;
|
|
351
|
+
try {
|
|
352
|
+
outcome = await pool.loadPackage({
|
|
353
|
+
packagePath: this.packagePath,
|
|
354
|
+
packageName: this.packageName,
|
|
355
|
+
malloyConfig: this.malloyConfig,
|
|
356
|
+
defaultConnectionName: "duckdb",
|
|
357
|
+
buildManifest,
|
|
358
|
+
});
|
|
359
|
+
} catch (err) {
|
|
360
|
+
const realError =
|
|
361
|
+
err instanceof Error
|
|
362
|
+
? err
|
|
363
|
+
: new Error(`Package-load worker pool failure: ${String(err)}`);
|
|
364
|
+
if (
|
|
365
|
+
realError instanceof MalloyError ||
|
|
366
|
+
realError instanceof ModelCompilationError
|
|
367
|
+
) {
|
|
368
|
+
throw realError;
|
|
369
|
+
}
|
|
370
|
+
throw new ServiceUnavailableError(
|
|
371
|
+
`Package-load worker pool unavailable: ${realError.message}`,
|
|
372
|
+
);
|
|
373
|
+
}
|
|
374
|
+
|
|
261
375
|
const nextModels = new Map<string, Model>();
|
|
262
|
-
for (const
|
|
263
|
-
|
|
376
|
+
for (const sm of outcome.models) {
|
|
377
|
+
if (sm.compilationError) {
|
|
378
|
+
const err = Model.deserializeCompilationError(sm.compilationError);
|
|
379
|
+
logger.warn("Model compilation failed during reload", {
|
|
380
|
+
packageName: this.packageName,
|
|
381
|
+
modelPath: sm.modelPath,
|
|
382
|
+
error: err.message,
|
|
383
|
+
});
|
|
384
|
+
nextModels.set(
|
|
385
|
+
sm.modelPath,
|
|
386
|
+
Model.fromCompilationError(
|
|
387
|
+
this.packageName,
|
|
388
|
+
sm.modelPath,
|
|
389
|
+
sm.modelType,
|
|
390
|
+
err,
|
|
391
|
+
),
|
|
392
|
+
);
|
|
393
|
+
} else {
|
|
394
|
+
nextModels.set(
|
|
395
|
+
sm.modelPath,
|
|
396
|
+
Model.fromSerialized(
|
|
397
|
+
this.packageName,
|
|
398
|
+
this.packagePath,
|
|
399
|
+
this.malloyConfig,
|
|
400
|
+
sm,
|
|
401
|
+
),
|
|
402
|
+
);
|
|
403
|
+
}
|
|
264
404
|
}
|
|
265
405
|
this.models = nextModels;
|
|
266
406
|
}
|
|
@@ -323,20 +463,6 @@ export class Package {
|
|
|
323
463
|
);
|
|
324
464
|
}
|
|
325
465
|
|
|
326
|
-
private static async loadModels(
|
|
327
|
-
packageName: string,
|
|
328
|
-
packagePath: string,
|
|
329
|
-
malloyConfig: MalloyConfig,
|
|
330
|
-
): Promise<Map<string, Model>> {
|
|
331
|
-
const modelPaths = await Package.getModelPaths(packagePath);
|
|
332
|
-
const models = await Promise.all(
|
|
333
|
-
modelPaths.map((modelPath) =>
|
|
334
|
-
Model.create(packageName, packagePath, modelPath, malloyConfig),
|
|
335
|
-
),
|
|
336
|
-
);
|
|
337
|
-
return new Map(models.map((model) => [model.getPath(), model]));
|
|
338
|
-
}
|
|
339
|
-
|
|
340
466
|
private static buildPackageMalloyConfig(
|
|
341
467
|
packagePath: string,
|
|
342
468
|
getEnvironmentMalloyConfig: () => MalloyConfig,
|
|
@@ -386,27 +512,6 @@ export class Package {
|
|
|
386
512
|
return malloyConfig;
|
|
387
513
|
}
|
|
388
514
|
|
|
389
|
-
private static async getModelPaths(packagePath: string): Promise<string[]> {
|
|
390
|
-
let files = undefined;
|
|
391
|
-
try {
|
|
392
|
-
files = await recursive(packagePath, [ignoreDotfiles]);
|
|
393
|
-
} catch (error) {
|
|
394
|
-
logger.error(error);
|
|
395
|
-
throw new PackageNotFoundError(
|
|
396
|
-
`Package config for ${packagePath} does not exist.`,
|
|
397
|
-
);
|
|
398
|
-
}
|
|
399
|
-
return files
|
|
400
|
-
.map((fullPath: string) => {
|
|
401
|
-
return path.relative(packagePath, fullPath).replace(/\\/g, "/");
|
|
402
|
-
})
|
|
403
|
-
.filter(
|
|
404
|
-
(modelPath: string) =>
|
|
405
|
-
modelPath.endsWith(MODEL_FILE_SUFFIX) ||
|
|
406
|
-
modelPath.endsWith(NOTEBOOK_FILE_SUFFIX),
|
|
407
|
-
);
|
|
408
|
-
}
|
|
409
|
-
|
|
410
515
|
private static async validatePackageManifestExistsOrThrowError(
|
|
411
516
|
packagePath: string,
|
|
412
517
|
) {
|
|
@@ -421,59 +526,33 @@ export class Package {
|
|
|
421
526
|
}
|
|
422
527
|
}
|
|
423
528
|
|
|
424
|
-
private static async readPackageConfig(
|
|
425
|
-
packagePath: string,
|
|
426
|
-
): Promise<ApiPackage> {
|
|
427
|
-
const packageConfigPath = path.join(packagePath, PACKAGE_MANIFEST_NAME);
|
|
428
|
-
const packageConfigContents = await fs.readFile(packageConfigPath);
|
|
429
|
-
// TODO: Validate package manifest. Define manifest type in public API.
|
|
430
|
-
const packageManifest = JSON.parse(packageConfigContents.toString());
|
|
431
|
-
return {
|
|
432
|
-
name: packageManifest.name,
|
|
433
|
-
description: packageManifest.description,
|
|
434
|
-
};
|
|
435
|
-
}
|
|
436
|
-
|
|
437
529
|
private static async readDatabases(
|
|
438
530
|
packagePath: string,
|
|
531
|
+
malloyConfig: MalloyConfig,
|
|
439
532
|
): Promise<ApiDatabase[]> {
|
|
440
533
|
const databasePaths = await Package.getDatabasePaths(packagePath);
|
|
441
|
-
if (databasePaths.length === 0)
|
|
442
|
-
|
|
443
|
-
// Off-main-thread: schema introspection runs in the
|
|
444
|
-
// SchemaWorkerPool so DuckDB's native thread pool lives inside
|
|
445
|
-
// a worker we control. This is the leak class that OOM-killed
|
|
446
|
-
// prod (466 leaked Bun Pool threads on worker-76b49bdb89-8bsv4)
|
|
447
|
-
// — worker isolation puts a hard ceiling on per-package native
|
|
448
|
-
// thread usage and the worker's connection is reused across all
|
|
449
|
-
// schema queries for the life of the process.
|
|
450
|
-
const pool = getSchemaWorkerPool();
|
|
451
|
-
const settled = await Promise.allSettled(
|
|
452
|
-
databasePaths.map((databasePath) =>
|
|
453
|
-
pool.submit(packagePath, databasePath),
|
|
454
|
-
),
|
|
455
|
-
);
|
|
456
|
-
|
|
457
|
-
const results: ApiDatabase[] = [];
|
|
458
|
-
for (let i = 0; i < settled.length; i++) {
|
|
459
|
-
const outcome = settled[i];
|
|
460
|
-
if (outcome.status === "fulfilled") {
|
|
461
|
-
results.push({
|
|
462
|
-
path: databasePaths[i],
|
|
463
|
-
info: outcome.value,
|
|
464
|
-
type: "embedded",
|
|
465
|
-
});
|
|
466
|
-
} else {
|
|
467
|
-
// A single bad parquet (corrupt footer, unsupported type)
|
|
468
|
-
// must not fail the whole package load. Log and skip.
|
|
469
|
-
logger.warn("Schema introspection failed for database", {
|
|
470
|
-
packagePath,
|
|
471
|
-
databasePath: databasePaths[i],
|
|
472
|
-
error: outcome.reason,
|
|
473
|
-
});
|
|
474
|
-
}
|
|
534
|
+
if (databasePaths.length === 0) {
|
|
535
|
+
return [];
|
|
475
536
|
}
|
|
476
|
-
|
|
537
|
+
// Resolve the package's duckdb connection ONCE and reuse it for
|
|
538
|
+
// every schema/row-count probe in this package. Malloy caches the
|
|
539
|
+
// materialized connection on the MalloyConfig so the same instance
|
|
540
|
+
// will be returned to model compiles later in `Package.create`.
|
|
541
|
+
// This is the substantive optimization over the previous code:
|
|
542
|
+
// we go from `databasePaths.length` separate DuckDBConnections
|
|
543
|
+
// (each doing its own native init + extension load) to one.
|
|
544
|
+
const conn = await malloyConfig.connections.lookupConnection("duckdb");
|
|
545
|
+
return await Promise.all(
|
|
546
|
+
databasePaths.map(async (databasePath) => ({
|
|
547
|
+
path: databasePath,
|
|
548
|
+
info: await Package.getDatabaseInfo(
|
|
549
|
+
packagePath,
|
|
550
|
+
databasePath,
|
|
551
|
+
conn,
|
|
552
|
+
),
|
|
553
|
+
type: "embedded" as const,
|
|
554
|
+
})),
|
|
555
|
+
);
|
|
477
556
|
}
|
|
478
557
|
|
|
479
558
|
private static async getDatabasePaths(
|
|
@@ -490,6 +569,43 @@ export class Package {
|
|
|
490
569
|
);
|
|
491
570
|
}
|
|
492
571
|
|
|
572
|
+
private static async getDatabaseInfo(
|
|
573
|
+
packagePath: string,
|
|
574
|
+
databasePath: string,
|
|
575
|
+
conn: Connection,
|
|
576
|
+
): Promise<ApiTableDescription> {
|
|
577
|
+
const fullPath = path.join(packagePath, databasePath);
|
|
578
|
+
|
|
579
|
+
// Create a DuckDB source then:
|
|
580
|
+
// 1. Load the model and get the table schema from model
|
|
581
|
+
// 2. Run a query to get the row count from the table
|
|
582
|
+
// ConnectionRuntime is cheap (just a wrapper), and creating one
|
|
583
|
+
// per call keeps each probe's compile state isolated. The
|
|
584
|
+
// expensive piece — the underlying DuckDBConnection — is shared
|
|
585
|
+
// across all probes via `conn` (resolved once in readDatabases).
|
|
586
|
+
const runtime = new ConnectionRuntime({
|
|
587
|
+
urlReader: new EmptyURLReader(),
|
|
588
|
+
connections: [conn],
|
|
589
|
+
});
|
|
590
|
+
// Normalize path to use forward slashes for cross-platform compatibility
|
|
591
|
+
// DuckDB on Windows supports forward slashes, and this avoids escaping issues
|
|
592
|
+
const normalizedPath = fullPath.replace(/\\/g, "/");
|
|
593
|
+
const model = runtime.loadModel(
|
|
594
|
+
`source: temp is duckdb.table('${normalizedPath}')`,
|
|
595
|
+
);
|
|
596
|
+
const modelDef = await model.getModel();
|
|
597
|
+
const fields = (modelDef._modelDef.contents["temp"] as SourceDef).fields;
|
|
598
|
+
const schema = fields.map((field): ApiColumn => {
|
|
599
|
+
return { type: field.type, name: field.name };
|
|
600
|
+
});
|
|
601
|
+
const runner = model.loadQuery(
|
|
602
|
+
"run: temp->{aggregate: row_count is count()}",
|
|
603
|
+
);
|
|
604
|
+
const result = await runner.run();
|
|
605
|
+
const rowCount = result.data.value[0].row_count?.valueOf() as number;
|
|
606
|
+
return { name: databasePath, rowCount, columns: schema };
|
|
607
|
+
}
|
|
608
|
+
|
|
493
609
|
public setName(name: string) {
|
|
494
610
|
this.packageName = name;
|
|
495
611
|
}
|