@malloy-publisher/server 0.0.198 → 0.0.199

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/build.ts +30 -1
  2. package/dist/app/api-doc.yaml +51 -0
  3. package/dist/app/assets/{EnvironmentPage-C7rtH4mC.js → EnvironmentPage-Dpee_Kn6.js} +1 -1
  4. package/dist/app/assets/{HomePage-DwkH7OrS.js → HomePage-DLRWTNoL.js} +1 -1
  5. package/dist/app/assets/{MainPage-D38LtZDV.js → MainPage-DsVt5QGM.js} +1 -1
  6. package/dist/app/assets/{ModelPage-DOol8Mz7.js → ModelPage-AwAugZ37.js} +1 -1
  7. package/dist/app/assets/{PackagePage-0tgzA_kO.js → PackagePage-XQ-EWGTC.js} +1 -1
  8. package/dist/app/assets/{RouteError-BaMsOSly.js → RouteError-3Mv8JQw7.js} +1 -1
  9. package/dist/app/assets/{WorkbookPage-Cx4SePkx.js → WorkbookPage-DHYYpcYc.js} +1 -1
  10. package/dist/app/assets/{core-CbsC6R_Y.es-Cwf6asf3.js → core-DfcpQGVP.es-DQggNOdX.js} +1 -1
  11. package/dist/app/assets/{index-DNofXMxi.js → index-BUp81Qdm.js} +1 -1
  12. package/dist/app/assets/{index-DL6BZTuw.js → index-D1pdwrUW.js} +1 -1
  13. package/dist/app/assets/{index-U38AyjJL.js → index-Dv5bF4Ii.js} +4 -4
  14. package/dist/app/assets/{index.umd-B68wGGkM.js → index.umd-CQH4LZU8.js} +1 -1
  15. package/dist/app/index.html +1 -1
  16. package/dist/instrumentation.mjs +57 -36
  17. package/dist/package_load_worker.mjs +12213 -0
  18. package/dist/server.mjs +2807 -2729
  19. package/package.json +2 -3
  20. package/src/controller/compile.controller.ts +3 -1
  21. package/src/controller/model.controller.ts +8 -1
  22. package/src/controller/query.controller.ts +3 -0
  23. package/src/health.spec.ts +90 -0
  24. package/src/health.ts +88 -45
  25. package/src/instrumentation.ts +50 -0
  26. package/src/mcp/tools/execute_query_tool.ts +12 -0
  27. package/src/package_load/package_load_pool.spec.ts +252 -0
  28. package/src/package_load/package_load_pool.ts +920 -0
  29. package/src/package_load/package_load_worker.ts +980 -0
  30. package/src/package_load/protocol.ts +336 -0
  31. package/src/query_param_utils.ts +18 -0
  32. package/src/server-old.ts +1 -1
  33. package/src/server.ts +36 -10
  34. package/src/service/db_utils.spec.ts +1 -1
  35. package/src/service/environment.ts +3 -2
  36. package/src/service/environment_store.ts +24 -3
  37. package/src/service/filter_integration.spec.ts +110 -0
  38. package/src/service/given.ts +80 -0
  39. package/src/service/givens_integration.spec.ts +192 -0
  40. package/src/service/model.spec.ts +105 -0
  41. package/src/service/model.ts +287 -16
  42. package/src/service/package.spec.ts +10 -0
  43. package/src/service/package.ts +257 -145
  44. package/src/service/package_worker_path.spec.ts +196 -0
  45. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
@@ -1,7 +1,6 @@
1
1
  import * as fs from "fs/promises";
2
2
  import * as path from "path";
3
3
 
4
- import { DuckDBConnection } from "@malloydata/db-duckdb";
5
4
  import "@malloydata/db-duckdb/native";
6
5
  import {
7
6
  Connection,
@@ -10,18 +9,24 @@ import {
10
9
  EmptyURLReader,
11
10
  FixedConnectionMap,
12
11
  MalloyConfig,
12
+ MalloyError,
13
13
  SourceDef,
14
14
  } from "@malloydata/malloy";
15
15
  import { metrics } from "@opentelemetry/api";
16
16
  import recursive from "recursive-readdir";
17
17
  import { components } from "../api";
18
+ import { getPackageLoadPool } from "../package_load/package_load_pool";
18
19
  import {
19
20
  API_PREFIX,
20
21
  MODEL_FILE_SUFFIX,
21
22
  NOTEBOOK_FILE_SUFFIX,
22
23
  PACKAGE_MANIFEST_NAME,
23
24
  } from "../constants";
24
- import { PackageNotFoundError } from "../errors";
25
+ import {
26
+ ModelCompilationError,
27
+ PackageNotFoundError,
28
+ ServiceUnavailableError,
29
+ } from "../errors";
25
30
  import { formatDuration, logger } from "../logger";
26
31
  import { BuildManifest } from "../storage/DatabaseInterface";
27
32
  import { ignoreDotfiles } from "../utils";
@@ -94,23 +99,12 @@ export class Package {
94
99
  });
95
100
 
96
101
  try {
97
- const packageConfig = await Package.readPackageConfig(packagePath);
98
- const packageConfigTime = performance.now();
99
- logger.info("Package config read completed", {
100
- packageName,
101
- duration: formatDuration(
102
- packageConfigTime - manifestValidationTime,
103
- ),
104
- });
105
- packageConfig.resource = `${API_PREFIX}/environments/${environmentName}/packages/${packageName}`;
106
-
107
- const databases = await Package.readDatabases(packagePath);
108
- const databasesTime = performance.now();
109
- logger.info("Databases read completed", {
110
- packageName,
111
- databaseCount: databases.length,
112
- duration: formatDuration(databasesTime - packageConfigTime),
113
- });
102
+ // The MalloyConfig is always built on the main thread — it
103
+ // owns the live native connection handles the package needs
104
+ // to *serve queries* after load (workers can't share native
105
+ // handles across the V8 isolate boundary). The worker proxies
106
+ // non-duckdb connection lookups back through this MalloyConfig
107
+ // during compile.
114
108
  const malloyConfig = Package.buildPackageMalloyConfig(
115
109
  packagePath,
116
110
  typeof environmentMalloyConfig === "function"
@@ -118,68 +112,29 @@ export class Package {
118
112
  : () => Package.toMalloyConfig(environmentMalloyConfig),
119
113
  );
120
114
 
121
- const models = await Package.loadModels(
122
- packageName,
123
- packagePath,
124
- malloyConfig,
125
- );
126
- const modelsTime = performance.now();
127
- logger.info("Models loaded", {
128
- packageName,
129
- modelCount: models.size,
130
- duration: formatDuration(modelsTime - databasesTime),
131
- });
132
- for (const [modelPath, model] of models.entries()) {
133
- const maybeModel = model as unknown as {
134
- compilationError?: unknown;
135
- };
136
- if (maybeModel.compilationError) {
137
- const err = maybeModel.compilationError;
138
- const message =
139
- err instanceof Error
140
- ? err.message
141
- : `Unknown compilation error in ${modelPath}`;
142
-
143
- logger.error("Model compilation failed", {
144
- packageName,
145
- modelPath,
146
- error: message,
147
- });
148
-
149
- this.packageLoadHistogram.record(performance.now() - startTime, {
150
- malloy_package_name: packageName,
151
- status: "compilation_error",
152
- });
153
- throw err;
154
- }
155
- }
156
- const endTime = performance.now();
157
- const executionTime = endTime - startTime;
158
- this.packageLoadHistogram.record(executionTime, {
159
- malloy_package_name: packageName,
160
- status: "success",
161
- });
162
- logger.info(`Successfully loaded package ${packageName}`, {
163
- packageName,
164
- duration: formatDuration(executionTime),
165
- });
166
- return new Package(
115
+ return await Package.loadViaWorker(
167
116
  environmentName,
168
117
  packageName,
169
118
  packagePath,
170
- packageConfig,
171
- databases,
172
- models,
173
119
  malloyConfig,
120
+ startTime,
121
+ manifestValidationTime,
174
122
  );
175
123
  } catch (error) {
176
124
  logger.error(`Error loading package ${packageName}`, { error });
177
125
  console.error(error);
178
126
  const endTime = performance.now();
179
127
  const executionTime = endTime - startTime;
128
+ const status =
129
+ error instanceof ModelCompilationError ||
130
+ error instanceof MalloyError
131
+ ? "compilation_error"
132
+ : error instanceof ServiceUnavailableError
133
+ ? "pool_unavailable"
134
+ : "error";
180
135
  this.packageLoadHistogram.record(executionTime, {
181
136
  malloy_package_name: packageName,
182
- status: "error",
137
+ status,
183
138
  });
184
139
  // Clean up package directory on failure
185
140
  try {
@@ -197,6 +152,141 @@ export class Package {
197
152
  }
198
153
  }
199
154
 
155
+ /**
156
+ * Load the package via the package-load worker pool. The worker
157
+ * performs the CPU-bound bulk of the load off-thread (manifest
158
+ * read, every `.malloy` / `.malloynb` compile) and ships back a
159
+ * structured-clonable `LoadPackageOutcome`. Database probes
160
+ * (`.parquet` / `.csv`) run on the main thread, in parallel with
161
+ * the worker compile, against the package's existing DuckDB
162
+ * connection — they're async-IO-bound and don't compete with the
163
+ * worker for CPU.
164
+ *
165
+ * Pool-infrastructure failures (worker crash, RPC timeout, pool
166
+ * shutting down) are rewrapped as `ServiceUnavailableError` so
167
+ * the HTTP layer responds 503 (transient, retryable). Real compile
168
+ * errors (`MalloyError` / `ModelCompilationError`) propagate
169
+ * unchanged so they keep their 4xx mapping.
170
+ */
171
+ private static async loadViaWorker(
172
+ environmentName: string,
173
+ packageName: string,
174
+ packagePath: string,
175
+ malloyConfig: MalloyConfig,
176
+ startTime: number,
177
+ manifestValidationTime: number,
178
+ ): Promise<Package> {
179
+ const pool = getPackageLoadPool();
180
+ const dispatchTime = performance.now();
181
+ // Submit the worker job and run database probing on the main
182
+ // thread in parallel. We isolate the worker-job promise inside
183
+ // a wrapper so we can map pool-infrastructure failures (worker
184
+ // crash, RPC timeout, pool shutting down) to a 503 without
185
+ // accidentally re-mapping `readDatabases`'s own errors.
186
+ const workerOutcome = pool
187
+ .loadPackage({
188
+ packagePath,
189
+ packageName,
190
+ malloyConfig,
191
+ defaultConnectionName: "duckdb",
192
+ })
193
+ .catch((err: unknown) => {
194
+ // Compile errors surface in-band via
195
+ // `LoadPackageOutcome.models[i].compilationError`; if the
196
+ // pool itself rejects, it's an infra-side failure
197
+ // (shutting down, worker spawn failed, worker crashed,
198
+ // RPC timeout) and the client should retry. Real Malloy
199
+ // compile errors deserialised by the pool still carry
200
+ // their MalloyError / ModelCompilationError identity —
201
+ // let those bubble untouched so they keep their 4xx
202
+ // mapping in `errors.ts`.
203
+ const realError =
204
+ err instanceof Error
205
+ ? err
206
+ : new Error(
207
+ `Package-load worker pool failure: ${String(err)}`,
208
+ );
209
+ if (
210
+ realError instanceof MalloyError ||
211
+ realError instanceof ModelCompilationError
212
+ ) {
213
+ throw realError;
214
+ }
215
+ throw new ServiceUnavailableError(
216
+ `Package-load worker pool unavailable: ${realError.message}`,
217
+ );
218
+ });
219
+ const [outcome, databases] = await Promise.all([
220
+ workerOutcome,
221
+ Package.readDatabases(packagePath, malloyConfig),
222
+ ]);
223
+ const workerDoneTime = performance.now();
224
+ logger.info("Package load via worker pool completed", {
225
+ packageName,
226
+ manifestValidationMs: dispatchTime - manifestValidationTime,
227
+ workerDurationMs: outcome.loadDurationMs,
228
+ dispatchOverheadMs:
229
+ workerDoneTime - dispatchTime - outcome.loadDurationMs,
230
+ modelCount: outcome.models.length,
231
+ databaseCount: databases.length,
232
+ });
233
+
234
+ // Override the manifest-derived resource URI — the worker only
235
+ // returns name/description from publisher.json, but the rest of
236
+ // the API surface expects a `resource` field too.
237
+ const packageConfig: ApiPackage = {
238
+ name: outcome.packageMetadata.name,
239
+ description: outcome.packageMetadata.description,
240
+ resource: `${API_PREFIX}/environments/${environmentName}/packages/${packageName}`,
241
+ };
242
+
243
+ // Build live `Model`s from worker output. Any per-model compile
244
+ // failure aborts the load — matches the historical behaviour of
245
+ // `Package.create` failing the whole package on the first model
246
+ // error. (`Package.reloadAllModels` keeps the failed-model
247
+ // placeholders instead; that branch goes through a different
248
+ // hydration path.)
249
+ const models = new Map<string, Model>();
250
+ for (const sm of outcome.models) {
251
+ if (sm.compilationError) {
252
+ const err = Model.deserializeCompilationError(sm.compilationError);
253
+ logger.error("Model compilation failed", {
254
+ packageName,
255
+ modelPath: sm.modelPath,
256
+ error: err.message,
257
+ });
258
+ // The outer catch in Package.create records the metric +
259
+ // cleans the package directory.
260
+ throw err;
261
+ }
262
+ models.set(
263
+ sm.modelPath,
264
+ Model.fromSerialized(packageName, packagePath, malloyConfig, sm),
265
+ );
266
+ }
267
+
268
+ const endTime = performance.now();
269
+ const executionTime = endTime - startTime;
270
+ this.packageLoadHistogram.record(executionTime, {
271
+ malloy_package_name: packageName,
272
+ status: "success",
273
+ });
274
+ logger.info(`Successfully loaded package ${packageName}`, {
275
+ packageName,
276
+ duration: formatDuration(executionTime),
277
+ });
278
+
279
+ return new Package(
280
+ environmentName,
281
+ packageName,
282
+ packagePath,
283
+ packageConfig,
284
+ databases,
285
+ models,
286
+ malloyConfig,
287
+ );
288
+ }
289
+
200
290
  public getPackageName(): string {
201
291
  return this.packageName;
202
292
  }
@@ -231,6 +321,21 @@ export class Package {
231
321
  return Array.from(this.models.keys());
232
322
  }
233
323
 
324
+ /**
325
+ * Re-compile every model in the package against a new build
326
+ * manifest (called after a materialization build commits new
327
+ * physicalised tables). Runs through the package-load worker pool
328
+ * — same off-main-thread compile path as initial `Package.create`
329
+ * — so a reload of a large package can't block the K8s liveness
330
+ * probe.
331
+ *
332
+ * Unlike `Package.create`, a per-model compile failure here does
333
+ * NOT abort the reload: we keep the failed model as a placeholder
334
+ * (`Model.fromCompilationError`) in `this.models`, matching the
335
+ * historical reload semantics. Whole-pool failures (worker crash,
336
+ * timeout, pool shutting down) propagate as `ServiceUnavailableError`
337
+ * — the caller (manifest service) decides how to retry.
338
+ */
234
339
  public async reloadAllModels(
235
340
  buildManifest: BuildManifest["entries"],
236
341
  ): Promise<void> {
@@ -240,20 +345,62 @@ export class Package {
240
345
  modelCount: modelPaths.length,
241
346
  manifestEntryCount: Object.keys(buildManifest).length,
242
347
  });
243
- const reloaded = await Promise.all(
244
- modelPaths.map((modelPath) =>
245
- Model.create(
246
- this.packageName,
247
- this.packagePath,
248
- modelPath,
249
- this.malloyConfig,
250
- { buildManifest },
251
- ),
252
- ),
253
- );
348
+
349
+ const pool = getPackageLoadPool();
350
+ let outcome;
351
+ try {
352
+ outcome = await pool.loadPackage({
353
+ packagePath: this.packagePath,
354
+ packageName: this.packageName,
355
+ malloyConfig: this.malloyConfig,
356
+ defaultConnectionName: "duckdb",
357
+ buildManifest,
358
+ });
359
+ } catch (err) {
360
+ const realError =
361
+ err instanceof Error
362
+ ? err
363
+ : new Error(`Package-load worker pool failure: ${String(err)}`);
364
+ if (
365
+ realError instanceof MalloyError ||
366
+ realError instanceof ModelCompilationError
367
+ ) {
368
+ throw realError;
369
+ }
370
+ throw new ServiceUnavailableError(
371
+ `Package-load worker pool unavailable: ${realError.message}`,
372
+ );
373
+ }
374
+
254
375
  const nextModels = new Map<string, Model>();
255
- for (const model of reloaded) {
256
- nextModels.set(model.getPath(), model);
376
+ for (const sm of outcome.models) {
377
+ if (sm.compilationError) {
378
+ const err = Model.deserializeCompilationError(sm.compilationError);
379
+ logger.warn("Model compilation failed during reload", {
380
+ packageName: this.packageName,
381
+ modelPath: sm.modelPath,
382
+ error: err.message,
383
+ });
384
+ nextModels.set(
385
+ sm.modelPath,
386
+ Model.fromCompilationError(
387
+ this.packageName,
388
+ sm.modelPath,
389
+ sm.modelType,
390
+ err,
391
+ ),
392
+ );
393
+ } else {
394
+ nextModels.set(
395
+ sm.modelPath,
396
+ Model.fromSerialized(
397
+ this.packageName,
398
+ this.packagePath,
399
+ this.malloyConfig,
400
+ sm,
401
+ ),
402
+ );
403
+ }
257
404
  }
258
405
  this.models = nextModels;
259
406
  }
@@ -316,20 +463,6 @@ export class Package {
316
463
  );
317
464
  }
318
465
 
319
- private static async loadModels(
320
- packageName: string,
321
- packagePath: string,
322
- malloyConfig: MalloyConfig,
323
- ): Promise<Map<string, Model>> {
324
- const modelPaths = await Package.getModelPaths(packagePath);
325
- const models = await Promise.all(
326
- modelPaths.map((modelPath) =>
327
- Model.create(packageName, packagePath, modelPath, malloyConfig),
328
- ),
329
- );
330
- return new Map(models.map((model) => [model.getPath(), model]));
331
- }
332
-
333
466
  private static buildPackageMalloyConfig(
334
467
  packagePath: string,
335
468
  getEnvironmentMalloyConfig: () => MalloyConfig,
@@ -379,27 +512,6 @@ export class Package {
379
512
  return malloyConfig;
380
513
  }
381
514
 
382
- private static async getModelPaths(packagePath: string): Promise<string[]> {
383
- let files = undefined;
384
- try {
385
- files = await recursive(packagePath, [ignoreDotfiles]);
386
- } catch (error) {
387
- logger.error(error);
388
- throw new PackageNotFoundError(
389
- `Package config for ${packagePath} does not exist.`,
390
- );
391
- }
392
- return files
393
- .map((fullPath: string) => {
394
- return path.relative(packagePath, fullPath).replace(/\\/g, "/");
395
- })
396
- .filter(
397
- (modelPath: string) =>
398
- modelPath.endsWith(MODEL_FILE_SUFFIX) ||
399
- modelPath.endsWith(NOTEBOOK_FILE_SUFFIX),
400
- );
401
- }
402
-
403
515
  private static async validatePackageManifestExistsOrThrowError(
404
516
  packagePath: string,
405
517
  ) {
@@ -414,37 +526,32 @@ export class Package {
414
526
  }
415
527
  }
416
528
 
417
- private static async readPackageConfig(
418
- packagePath: string,
419
- ): Promise<ApiPackage> {
420
- const packageConfigPath = path.join(packagePath, PACKAGE_MANIFEST_NAME);
421
- const packageConfigContents = await fs.readFile(packageConfigPath);
422
- // TODO: Validate package manifest. Define manifest type in public API.
423
- const packageManifest = JSON.parse(packageConfigContents.toString());
424
- return {
425
- name: packageManifest.name,
426
- description: packageManifest.description,
427
- };
428
- }
429
-
430
529
  private static async readDatabases(
431
530
  packagePath: string,
531
+ malloyConfig: MalloyConfig,
432
532
  ): Promise<ApiDatabase[]> {
533
+ const databasePaths = await Package.getDatabasePaths(packagePath);
534
+ if (databasePaths.length === 0) {
535
+ return [];
536
+ }
537
+ // Resolve the package's duckdb connection ONCE and reuse it for
538
+ // every schema/row-count probe in this package. Malloy caches the
539
+ // materialized connection on the MalloyConfig so the same instance
540
+ // will be returned to model compiles later in `Package.create`.
541
+ // This is the substantive optimization over the previous code:
542
+ // we go from `databasePaths.length` separate DuckDBConnections
543
+ // (each doing its own native init + extension load) to one.
544
+ const conn = await malloyConfig.connections.lookupConnection("duckdb");
433
545
  return await Promise.all(
434
- (await Package.getDatabasePaths(packagePath)).map(
435
- async (databasePath) => {
436
- const databaseInfo = await Package.getDatabaseInfo(
437
- packagePath,
438
- databasePath,
439
- );
440
-
441
- return {
442
- path: databasePath,
443
- info: databaseInfo,
444
- type: "embedded",
445
- };
446
- },
447
- ),
546
+ databasePaths.map(async (databasePath) => ({
547
+ path: databasePath,
548
+ info: await Package.getDatabaseInfo(
549
+ packagePath,
550
+ databasePath,
551
+ conn,
552
+ ),
553
+ type: "embedded" as const,
554
+ })),
448
555
  );
449
556
  }
450
557
 
@@ -465,15 +572,20 @@ export class Package {
465
572
  private static async getDatabaseInfo(
466
573
  packagePath: string,
467
574
  databasePath: string,
575
+ conn: Connection,
468
576
  ): Promise<ApiTableDescription> {
469
577
  const fullPath = path.join(packagePath, databasePath);
470
578
 
471
579
  // Create a DuckDB source then:
472
580
  // 1. Load the model and get the table schema from model
473
581
  // 2. Run a query to get the row count from the table
582
+ // ConnectionRuntime is cheap (just a wrapper), and creating one
583
+ // per call keeps each probe's compile state isolated. The
584
+ // expensive piece — the underlying DuckDBConnection — is shared
585
+ // across all probes via `conn` (resolved once in readDatabases).
474
586
  const runtime = new ConnectionRuntime({
475
587
  urlReader: new EmptyURLReader(),
476
- connections: [new DuckDBConnection("duckdb")],
588
+ connections: [conn],
477
589
  });
478
590
  // Normalize path to use forward slashes for cross-platform compatibility
479
591
  // DuckDB on Windows supports forward slashes, and this avoids escaping issues