@malloy-publisher/server 0.0.198-dev → 0.0.198-dev2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.docker.md +135 -20
  2. package/README.md +15 -0
  3. package/build.ts +32 -1
  4. package/dist/app/api-doc.yaml +51 -0
  5. package/dist/app/assets/EnvironmentPage-Dpee_Kn6.js +1 -0
  6. package/dist/app/assets/HomePage-DLRWTNoL.js +1 -0
  7. package/dist/app/assets/MainPage-DsVt5QGM.js +2 -0
  8. package/dist/app/assets/ModelPage-AwAugZ37.js +1 -0
  9. package/dist/app/assets/PackagePage-XQ-EWGTC.js +1 -0
  10. package/dist/app/assets/RouteError-3Mv8JQw7.js +1 -0
  11. package/dist/app/assets/WorkbookPage-DHYYpcYc.js +1 -0
  12. package/dist/app/assets/{core-w79IMXAG.es-Bd0UlzOL.js → core-DfcpQGVP.es-DQggNOdX.js} +14 -14
  13. package/dist/app/assets/{index-C513UodQ.js → index-BUp81Qdm.js} +15 -15
  14. package/dist/app/assets/index-D1pdwrUW.js +1803 -0
  15. package/dist/app/assets/index-Dv5bF4Ii.js +451 -0
  16. package/dist/app/assets/{index.umd-BMeMPq_9.js → index.umd-CQH4LZU8.js} +1 -1
  17. package/dist/app/index.html +2 -3
  18. package/dist/default-publisher.config.json +23 -0
  19. package/dist/instrumentation.mjs +22 -3
  20. package/dist/server.mjs +1522 -651
  21. package/dist/service/schema_worker.mjs +61 -0
  22. package/package.json +11 -12
  23. package/publisher.config.example.bigquery.json +33 -0
  24. package/publisher.config.example.duckdb.json +23 -0
  25. package/publisher.config.json +1 -11
  26. package/src/config.spec.ts +306 -0
  27. package/src/config.ts +222 -2
  28. package/src/controller/compile.controller.ts +3 -1
  29. package/src/controller/connection.controller.ts +1 -1
  30. package/src/controller/model.controller.ts +8 -1
  31. package/src/controller/package.controller.ts +70 -29
  32. package/src/controller/query.controller.ts +3 -0
  33. package/src/default-publisher.config.json +23 -0
  34. package/src/errors.spec.ts +42 -0
  35. package/src/errors.ts +21 -0
  36. package/src/health.spec.ts +90 -0
  37. package/src/health.ts +73 -45
  38. package/src/instrumentation.ts +50 -0
  39. package/src/logger.ts +1 -3
  40. package/src/mcp/tools/discovery_tools.ts +6 -2
  41. package/src/mcp/tools/execute_query_tool.ts +12 -0
  42. package/src/path_safety.spec.ts +158 -0
  43. package/src/path_safety.ts +140 -0
  44. package/src/pg_helpers.spec.ts +226 -0
  45. package/src/pg_helpers.ts +129 -0
  46. package/src/server-old.ts +3 -23
  47. package/src/server.ts +54 -0
  48. package/src/service/connection.spec.ts +6 -4
  49. package/src/service/connection.ts +8 -3
  50. package/src/service/connection_config.ts +2 -2
  51. package/src/service/environment.ts +621 -176
  52. package/src/service/environment_admission.spec.ts +180 -0
  53. package/src/service/environment_store.ts +31 -0
  54. package/src/service/filter_integration.spec.ts +110 -0
  55. package/src/service/givens_integration.spec.ts +192 -0
  56. package/src/service/manifest_service.spec.ts +7 -2
  57. package/src/service/manifest_service.ts +8 -2
  58. package/src/service/materialization_service.ts +14 -3
  59. package/src/service/model.spec.ts +105 -0
  60. package/src/service/model.ts +91 -7
  61. package/src/service/package.spec.ts +11 -7
  62. package/src/service/package.ts +53 -56
  63. package/src/service/package_memory_governor.spec.ts +173 -0
  64. package/src/service/package_memory_governor.ts +233 -0
  65. package/src/service/package_race.spec.ts +208 -0
  66. package/src/service/process_stats_reporter.ts +169 -0
  67. package/src/service/schema_worker.ts +123 -0
  68. package/src/service/schema_worker_pool.ts +278 -0
  69. package/src/storage/StorageManager.ts +71 -11
  70. package/src/storage/duckdb/schema.ts +41 -0
  71. package/src/utils.ts +11 -0
  72. package/tests/harness/rest_e2e.ts +2 -2
  73. package/tests/integration/concurrent_environment/concurrent_environment.integration.spec.ts +235 -0
  74. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
  75. package/tests/integration/legacy_routes/legacy_routes.integration.spec.ts +259 -0
  76. package/tests/unit/duckdb/attached_databases.test.ts +5 -5
  77. package/tests/unit/duckdb/legacy_schema_migration.test.ts +194 -0
  78. package/tests/unit/storage/StorageManager.test.ts +166 -0
  79. package/dist/app/assets/EnvironmentPage-1j6QDWAy.js +0 -1
  80. package/dist/app/assets/HomePage-DMop21VG.js +0 -1
  81. package/dist/app/assets/MainPage-BbE8ETz1.js +0 -2
  82. package/dist/app/assets/ModelPage-D2jvfe3t.js +0 -1
  83. package/dist/app/assets/PackagePage-BbnhGoD3.js +0 -1
  84. package/dist/app/assets/RouteError-D3LGEZ3i.js +0 -1
  85. package/dist/app/assets/WorkbookPage-DttVIj4u.js +0 -1
  86. package/dist/app/assets/index-5K9YjIxF.js +0 -456
  87. package/dist/app/assets/index-DIgzgp69.js +0 -1742
@@ -234,6 +234,111 @@ describe("service/model", () => {
234
234
 
235
235
  sinon.restore();
236
236
  });
237
+
238
+ it("forwards givens to runnable.getPreparedResult and .run", async () => {
239
+ const givensArg = { region: "EU" };
240
+ const preparedResultStub = sinon
241
+ .stub()
242
+ .resolves({ resultExplore: { limit: 10 } });
243
+ const runStub = sinon
244
+ .stub()
245
+ .rejects(new MalloyError("stub-stop", []));
246
+ const modelMaterializer = {
247
+ loadQuery: sinon.stub().returns({
248
+ getPreparedResult: preparedResultStub,
249
+ run: runStub,
250
+ }),
251
+ };
252
+
253
+ const model = new Model(
254
+ packageName,
255
+ mockModelPath,
256
+ {},
257
+ "model",
258
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
259
+ modelMaterializer as any,
260
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
261
+ { contents: {}, exports: [], queryList: [] } as any,
262
+ undefined,
263
+ undefined,
264
+ undefined,
265
+ undefined,
266
+ undefined,
267
+ );
268
+
269
+ await expect(
270
+ model.getQueryResults(
271
+ undefined,
272
+ undefined,
273
+ "run: orders -> summary",
274
+ undefined,
275
+ undefined,
276
+ givensArg,
277
+ ),
278
+ ).rejects.toThrow(MalloyError);
279
+
280
+ expect(preparedResultStub.calledOnce).toBe(true);
281
+ expect(preparedResultStub.firstCall.args[0]).toEqual({
282
+ givens: givensArg,
283
+ });
284
+ expect(runStub.firstCall.args[0]).toMatchObject({
285
+ givens: givensArg,
286
+ });
287
+
288
+ sinon.restore();
289
+ });
290
+ });
291
+
292
+ describe("executeNotebookCell", () => {
293
+ it("forwards givens to runnable.getPreparedResult and .run", async () => {
294
+ const givensArg = { target_code: "AA" };
295
+ const preparedResultStub = sinon
296
+ .stub()
297
+ .resolves({ resultExplore: { limit: 10 } });
298
+ const runStub = sinon
299
+ .stub()
300
+ .rejects(new MalloyError("stub-stop", []));
301
+ const cellRunnable = {
302
+ getPreparedResult: preparedResultStub,
303
+ run: runStub,
304
+ };
305
+ const runnableCells = [
306
+ {
307
+ type: "code" as const,
308
+ text: "run: orders -> by_code",
309
+ runnable: cellRunnable,
310
+ },
311
+ ];
312
+
313
+ const model = new Model(
314
+ packageName,
315
+ "test.malloynb",
316
+ {},
317
+ "notebook",
318
+ undefined,
319
+ undefined,
320
+ undefined,
321
+ undefined,
322
+ undefined,
323
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
324
+ runnableCells as any,
325
+ undefined,
326
+ );
327
+
328
+ await expect(
329
+ model.executeNotebookCell(0, undefined, undefined, givensArg),
330
+ ).rejects.toThrow(MalloyError);
331
+
332
+ expect(preparedResultStub.calledOnce).toBe(true);
333
+ expect(preparedResultStub.firstCall.args[0]).toEqual({
334
+ givens: givensArg,
335
+ });
336
+ expect(runStub.firstCall.args[0]).toMatchObject({
337
+ givens: givensArg,
338
+ });
339
+
340
+ sinon.restore();
341
+ });
237
342
  });
238
343
  });
239
344
 
@@ -3,6 +3,7 @@ import {
3
3
  API,
4
4
  Connection,
5
5
  FixedConnectionMap,
6
+ GivenValue,
6
7
  isSourceDef,
7
8
  MalloyConfig,
8
9
  MalloyError,
@@ -56,6 +57,7 @@ type ApiNotebookCell = components["schemas"]["NotebookCell"];
56
57
  type ApiRawNotebook = components["schemas"]["RawNotebook"];
57
58
  type ApiSource = components["schemas"]["Source"];
58
59
  type ApiFilter = components["schemas"]["Filter"];
60
+ type ApiGiven = components["schemas"]["Given"];
59
61
  type ApiView = components["schemas"]["View"];
60
62
  type ApiQuery = components["schemas"]["Query"];
61
63
  export type ApiConnection = components["schemas"]["Connection"];
@@ -73,6 +75,61 @@ const MALLOY_VERSION = (
73
75
  export type ModelType = "model" | "notebook";
74
76
  type ModelConnectionInput = MalloyConfig | Map<string, Connection>;
75
77
 
78
+ /**
79
+ * Structural type for a Malloy SDK `Given` instance (the value type of
80
+ * `Model.givens`). The `Given` class is declared in
81
+ * `@malloydata/malloy/dist/api/foundation/core.d.ts` but is not re-exported
82
+ * from the package root, so we duck-type against the surface we use rather
83
+ * than importing it.
84
+ */
85
+ interface MalloyGiven {
86
+ readonly name: string;
87
+ readonly type: { type: string; filterType?: string };
88
+ getTaglines(prefix?: RegExp): string[];
89
+ }
90
+
91
+ /**
92
+ * Convert a Malloy SDK `Given` (returned from `Model.givens`) to the wire
93
+ * shape declared in `api-doc.yaml`. Two fields are deliberately not surfaced:
94
+ *
95
+ * - `location` — Malloy's `DocumentLocation.url` is an absolute `file://`
96
+ * path on the publisher's filesystem. Surfacing it would leak the OS user,
97
+ * install directory, and internal layout. Existing `Filter` introspection
98
+ * does not expose location either; matching that floor. A future PR can
99
+ * add a sanitized package-relative path if a client needs it.
100
+ *
101
+ * - `default` / `defaultText` — Malloy's API only exposes the parsed
102
+ * `ConstantExpr` AST, not a rendered source string. Rendering it here
103
+ * would duplicate the Malloy printer. Add when Malloy surfaces a
104
+ * stringified accessor.
105
+ *
106
+ * `annotations` is restricted to `#(...)` declaration annotations (the
107
+ * caller-facing kind, e.g. `#(doc)`). `getTaglines()` with no prefix would
108
+ * also return `##` doc-comment lines and the model-level `##!` pragma,
109
+ * which aren't part of the given's surface contract.
110
+ *
111
+ * Type rendering: `GivenTypeDef` is typed as `AtomicTypeDef |
112
+ * FilterExpressionParamTypeDef`, but Malloy's grammar only emits the
113
+ * scalar parameter types (`string` | `number` | `boolean` | `date` |
114
+ * `timestamp` | `timestamptz` | `filter expression` | `error`) for
115
+ * given declarations today. If the grammar expands to allow array or
116
+ * record givens, the bare `type.type` discriminator (`'array'`,
117
+ * `'record'`) will land in the wire response with no element info —
118
+ * revisit when that happens.
119
+ */
120
+ function malloyGivenToApi(given: MalloyGiven): ApiGiven {
121
+ const type = given.type;
122
+ const renderedType =
123
+ type.type === "filter expression"
124
+ ? `filter<${type.filterType}>`
125
+ : type.type;
126
+ return {
127
+ name: given.name,
128
+ type: renderedType,
129
+ annotations: given.getTaglines(/^#\(/),
130
+ };
131
+ }
132
+
76
133
  interface RunnableNotebookCell {
77
134
  type: "code" | "markdown";
78
135
  text: string;
@@ -98,6 +155,10 @@ export class Model {
98
155
  private compilationError: MalloyError | Error | undefined;
99
156
  /** Parsed #(filter) definitions keyed by source name. */
100
157
  private filterMap: Map<string, FilterDefinition[]>;
158
+ /** Givens declared on the model, in declaration order. Malloy's
159
+ * `Model.givens` already collapses inheritance; we just stash the list
160
+ * for surfacing on the compiled-model response. */
161
+ private givens: ApiGiven[] | undefined;
101
162
  private meter = metrics.getMeter("publisher");
102
163
  private queryExecutionHistogram = this.meter.createHistogram(
103
164
  "malloy_model_query_duration",
@@ -121,6 +182,7 @@ export class Model {
121
182
  runnableNotebookCells: RunnableNotebookCell[] | undefined,
122
183
  compilationError: MalloyError | Error | undefined,
123
184
  filterMap?: Map<string, FilterDefinition[]>,
185
+ givens?: ApiGiven[],
124
186
  ) {
125
187
  this.packageName = packageName;
126
188
  this.modelPath = modelPath;
@@ -134,6 +196,7 @@ export class Model {
134
196
  this.runnableNotebookCells = runnableNotebookCells;
135
197
  this.compilationError = compilationError;
136
198
  this.filterMap = filterMap ?? new Map();
199
+ this.givens = givens;
137
200
  this.modelInfo = this.modelDef
138
201
  ? modelDefToModelInfo(this.modelDef)
139
202
  : undefined;
@@ -188,10 +251,19 @@ export class Model {
188
251
  let sources = undefined;
189
252
  let queries = undefined;
190
253
  let filterMap: Map<string, FilterDefinition[]> | undefined;
254
+ let givens: ApiGiven[] | undefined;
191
255
  const sourceInfos: Malloy.SourceInfo[] = [];
192
256
  if (modelMaterializer) {
193
- modelDef = (await modelMaterializer.getModel())._modelDef;
194
- const sourceResult = Model.getSources(modelPath, modelDef);
257
+ const compiledModel = await modelMaterializer.getModel();
258
+ modelDef = compiledModel._modelDef;
259
+ // Malloy's `Model.givens` already collapses inheritance from imports
260
+ // and applies any `finalizeGivens` runtime config. Just read it.
261
+ const malloyGivens = Array.from(compiledModel.givens.values());
262
+ givens =
263
+ malloyGivens.length > 0
264
+ ? malloyGivens.map(malloyGivenToApi)
265
+ : undefined;
266
+ const sourceResult = Model.getSources(modelPath, modelDef, givens);
195
267
  sources = sourceResult.sources;
196
268
  filterMap = sourceResult.filterMap;
197
269
  queries = Model.getQueries(modelPath, modelDef);
@@ -255,6 +327,7 @@ export class Model {
255
327
  runnableNotebookCells,
256
328
  undefined,
257
329
  filterMap,
330
+ givens,
258
331
  );
259
332
  } catch (error) {
260
333
  let computedError = error;
@@ -342,6 +415,7 @@ export class Model {
342
415
  query?: string,
343
416
  filterParams?: FilterParams,
344
417
  bypassFilters?: boolean,
418
+ givens?: Record<string, GivenValue>,
345
419
  ): Promise<{
346
420
  result: Malloy.Result;
347
421
  compactResult: QueryData;
@@ -436,13 +510,14 @@ export class Model {
436
510
  }
437
511
 
438
512
  const rowLimit =
439
- (await runnable.getPreparedResult()).resultExplore.limit || ROW_LIMIT;
513
+ (await runnable.getPreparedResult({ givens })).resultExplore.limit ||
514
+ ROW_LIMIT;
440
515
  const endTime = performance.now();
441
516
  const executionTime = endTime - startTime;
442
517
 
443
518
  let queryResults;
444
519
  try {
445
- queryResults = await runnable.run({ rowLimit });
520
+ queryResults = await runnable.run({ rowLimit, givens });
446
521
  } catch (error) {
447
522
  // Record error metrics
448
523
  const errorEndTime = performance.now();
@@ -509,6 +584,7 @@ export class Model {
509
584
  ),
510
585
  sources: this.sources,
511
586
  queries: this.queries,
587
+ givens: this.givens,
512
588
  } as ApiCompiledModel;
513
589
  }
514
590
 
@@ -568,6 +644,7 @@ export class Model {
568
644
  cellIndex: number,
569
645
  filterParams?: FilterParams,
570
646
  bypassFilters?: boolean,
647
+ givens?: Record<string, GivenValue>,
571
648
  ): Promise<{
572
649
  type: "code" | "markdown";
573
650
  text: string;
@@ -629,9 +706,9 @@ export class Model {
629
706
  }
630
707
 
631
708
  const rowLimit =
632
- (await runnableToExecute.getPreparedResult()).resultExplore
633
- .limit || ROW_LIMIT;
634
- const result = await runnableToExecute.run({ rowLimit });
709
+ (await runnableToExecute.getPreparedResult({ givens }))
710
+ .resultExplore.limit || ROW_LIMIT;
711
+ const result = await runnableToExecute.run({ rowLimit, givens });
635
712
  const query = (await runnableToExecute.getPreparedQuery())._query;
636
713
  queryName = (query as NamedQueryDef).as || query.name;
637
714
  queryResult =
@@ -758,6 +835,7 @@ export class Model {
758
835
  private static getSources(
759
836
  modelPath: string,
760
837
  modelDef: ModelDef,
838
+ givens?: ApiGiven[],
761
839
  ): {
762
840
  sources: ApiSource[];
763
841
  filterMap: Map<string, FilterDefinition[]>;
@@ -846,6 +924,12 @@ export class Model {
846
924
  annotations,
847
925
  views,
848
926
  filters,
927
+ // Malloy exposes givens at the model level, not per-source.
928
+ // First pass: surface the full model-level list on every source
929
+ // — matches how filter introspection already collapses
930
+ // inheritance into the per-source list. Refine to view-scoped
931
+ // filtering if a customer asks.
932
+ givens,
849
933
  } as ApiSource;
850
934
  });
851
935
 
@@ -1,5 +1,4 @@
1
1
  import { afterEach, beforeEach, describe, expect, it } from "bun:test";
2
- import { Stats } from "fs";
3
2
  import fs from "fs/promises";
4
3
  import { join, resolve } from "path";
5
4
  import sinon from "sinon";
@@ -336,12 +335,17 @@ describe("service/package", () => {
336
335
  });
337
336
  });
338
337
 
339
- describe("getDatabaseInfo", () => {
340
- it("should return the size of the database file", async () => {
341
- sinon.stub(fs, "stat").resolves({ size: 13 } as Stats);
342
-
343
- // @ts-expect-error Accessing private static method for testing
344
- const info = await Package.getDatabaseInfo(
338
+ describe("schema introspection (via worker pool)", () => {
339
+ it("returns columns and rowCount for a csv database", async () => {
340
+ // Schema introspection moved off the main thread into a
341
+ // worker pool to isolate DuckDB's native thread pool (see
342
+ // schema_worker_pool.ts). Hit the pool directly here so
343
+ // the test exercises the same code path prod uses.
344
+ const { getSchemaWorkerPool } = await import(
345
+ "./schema_worker_pool"
346
+ );
347
+ const pool = getSchemaWorkerPool();
348
+ const info = await pool.submit(
345
349
  testPackageDirectory,
346
350
  "database.csv",
347
351
  );
@@ -1,16 +1,11 @@
1
1
  import * as fs from "fs/promises";
2
2
  import * as path from "path";
3
3
 
4
- import { DuckDBConnection } from "@malloydata/db-duckdb";
5
- import "@malloydata/db-duckdb/native";
6
4
  import {
7
5
  Connection,
8
- ConnectionRuntime,
9
6
  contextOverlay,
10
- EmptyURLReader,
11
7
  FixedConnectionMap,
12
8
  MalloyConfig,
13
- SourceDef,
14
9
  } from "@malloydata/malloy";
15
10
  import { metrics } from "@opentelemetry/api";
16
11
  import recursive from "recursive-readdir";
@@ -24,14 +19,14 @@ import {
24
19
  import { PackageNotFoundError } from "../errors";
25
20
  import { formatDuration, logger } from "../logger";
26
21
  import { BuildManifest } from "../storage/DatabaseInterface";
22
+ import { ignoreDotfiles } from "../utils";
27
23
  import { Model } from "./model";
24
+ import { getSchemaWorkerPool } from "./schema_worker_pool";
28
25
 
29
26
  type ApiDatabase = components["schemas"]["Database"];
30
27
  type ApiModel = components["schemas"]["Model"];
31
28
  type ApiNotebook = components["schemas"]["Notebook"];
32
29
  export type ApiPackage = components["schemas"]["Package"];
33
- type ApiColumn = components["schemas"]["Column"];
34
- type ApiTableDescription = components["schemas"]["TableDescription"];
35
30
  // A thunk lets callers pass a live reference to the *current* environment
36
31
  // MalloyConfig so the package wrapper resolves environment connections against the
37
32
  // generation that's active at lookup time, not the one that was current when
@@ -42,6 +37,7 @@ type PackageConnectionInput =
42
37
  | (() => MalloyConfig);
43
38
 
44
39
  const ENABLE_LIST_MODEL_COMPILATION = true;
40
+
45
41
  export class Package {
46
42
  private environmentName: string;
47
43
  private packageName: string;
@@ -91,6 +87,8 @@ export class Package {
91
87
  duration: formatDuration(manifestValidationTime - startTime),
92
88
  });
93
89
 
90
+ let packageMalloyConfig: MalloyConfig | undefined;
91
+
94
92
  try {
95
93
  const packageConfig = await Package.readPackageConfig(packagePath);
96
94
  const packageConfigTime = performance.now();
@@ -179,6 +177,17 @@ export class Package {
179
177
  malloy_package_name: packageName,
180
178
  status: "error",
181
179
  });
180
+
181
+ if (packageMalloyConfig) {
182
+ try {
183
+ await packageMalloyConfig.shutdown("close");
184
+ } catch (releaseError) {
185
+ logger.warn(
186
+ `Failed to release package-local DuckDB for ${packageName}`,
187
+ { error: releaseError },
188
+ );
189
+ }
190
+ }
182
191
  // Clean up package directory on failure
183
192
  try {
184
193
  await fs.rm(packagePath, {
@@ -380,7 +389,7 @@ export class Package {
380
389
  private static async getModelPaths(packagePath: string): Promise<string[]> {
381
390
  let files = undefined;
382
391
  try {
383
- files = await recursive(packagePath);
392
+ files = await recursive(packagePath, [ignoreDotfiles]);
384
393
  } catch (error) {
385
394
  logger.error(error);
386
395
  throw new PackageNotFoundError(
@@ -428,29 +437,49 @@ export class Package {
428
437
  private static async readDatabases(
429
438
  packagePath: string,
430
439
  ): Promise<ApiDatabase[]> {
431
- return await Promise.all(
432
- (await Package.getDatabasePaths(packagePath)).map(
433
- async (databasePath) => {
434
- const databaseInfo = await Package.getDatabaseInfo(
435
- packagePath,
436
- databasePath,
437
- );
438
-
439
- return {
440
- path: databasePath,
441
- info: databaseInfo,
442
- type: "embedded",
443
- };
444
- },
440
+ const databasePaths = await Package.getDatabasePaths(packagePath);
441
+ if (databasePaths.length === 0) return [];
442
+
443
+ // Off-main-thread: schema introspection runs in the
444
+ // SchemaWorkerPool so DuckDB's native thread pool lives inside
445
+ // a worker we control. This is the leak class that OOM-killed
446
+ // prod (466 leaked Bun Pool threads on worker-76b49bdb89-8bsv4)
447
+ // — worker isolation puts a hard ceiling on per-package native
448
+ // thread usage and the worker's connection is reused across all
449
+ // schema queries for the life of the process.
450
+ const pool = getSchemaWorkerPool();
451
+ const settled = await Promise.allSettled(
452
+ databasePaths.map((databasePath) =>
453
+ pool.submit(packagePath, databasePath),
445
454
  ),
446
455
  );
456
+
457
+ const results: ApiDatabase[] = [];
458
+ for (let i = 0; i < settled.length; i++) {
459
+ const outcome = settled[i];
460
+ if (outcome.status === "fulfilled") {
461
+ results.push({
462
+ path: databasePaths[i],
463
+ info: outcome.value,
464
+ type: "embedded",
465
+ });
466
+ } else {
467
+ // A single bad parquet (corrupt footer, unsupported type)
468
+ // must not fail the whole package load. Log and skip.
469
+ logger.warn("Schema introspection failed for database", {
470
+ packagePath,
471
+ databasePath: databasePaths[i],
472
+ error: outcome.reason,
473
+ });
474
+ }
475
+ }
476
+ return results;
447
477
  }
448
478
 
449
479
  private static async getDatabasePaths(
450
480
  packagePath: string,
451
481
  ): Promise<string[]> {
452
- let files = undefined;
453
- files = await recursive(packagePath);
482
+ const files = await recursive(packagePath, [ignoreDotfiles]);
454
483
  return files
455
484
  .map((fullPath: string) => {
456
485
  return path.relative(packagePath, fullPath).replace(/\\/g, "/");
@@ -461,38 +490,6 @@ export class Package {
461
490
  );
462
491
  }
463
492
 
464
- private static async getDatabaseInfo(
465
- packagePath: string,
466
- databasePath: string,
467
- ): Promise<ApiTableDescription> {
468
- const fullPath = path.join(packagePath, databasePath);
469
-
470
- // Create a DuckDB source then:
471
- // 1. Load the model and get the table schema from model
472
- // 2. Run a query to get the row count from the table
473
- const runtime = new ConnectionRuntime({
474
- urlReader: new EmptyURLReader(),
475
- connections: [new DuckDBConnection("duckdb")],
476
- });
477
- // Normalize path to use forward slashes for cross-platform compatibility
478
- // DuckDB on Windows supports forward slashes, and this avoids escaping issues
479
- const normalizedPath = fullPath.replace(/\\/g, "/");
480
- const model = runtime.loadModel(
481
- `source: temp is duckdb.table('${normalizedPath}')`,
482
- );
483
- const modelDef = await model.getModel();
484
- const fields = (modelDef._modelDef.contents["temp"] as SourceDef).fields;
485
- const schema = fields.map((field): ApiColumn => {
486
- return { type: field.type, name: field.name };
487
- });
488
- const runner = model.loadQuery(
489
- "run: temp->{aggregate: row_count is count()}",
490
- );
491
- const result = await runner.run();
492
- const rowCount = result.data.value[0].row_count?.valueOf() as number;
493
- return { name: databasePath, rowCount, columns: schema };
494
- }
495
-
496
493
  public setName(name: string) {
497
494
  this.packageName = name;
498
495
  }