@malloy-publisher/server 0.0.165 → 0.0.168

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/.eslintrc.json +9 -1
  2. package/dist/app/api-doc.yaml +143 -1
  3. package/dist/app/assets/HomePage-D2tUw_9U.js +1 -0
  4. package/dist/app/assets/{MainPage-DAyUfYba.js → MainPage-DBQW76L7.js} +2 -2
  5. package/dist/app/assets/{ModelPage-CrMryV1s.js → ModelPage-BnfOKuhQ.js} +1 -1
  6. package/dist/app/assets/PackagePage-zPhE-rDg.js +1 -0
  7. package/dist/app/assets/ProjectPage-BpSTvuW6.js +1 -0
  8. package/dist/app/assets/RouteError-Cp9-yCK5.js +1 -0
  9. package/dist/app/assets/{WorkbookPage-DZEVYGW3.js → WorkbookPage-FD_gmxeE.js} +1 -1
  10. package/dist/app/assets/{index-BvVmB5sv.js → index-D5QBYuLK.js} +150 -150
  11. package/dist/app/assets/{index-CsC07BYd.js → index-DNCvL_5f.js} +1 -1
  12. package/dist/app/assets/{index-DWhjtyBB.js → index-x9S1fsYn.js} +1 -1
  13. package/dist/app/assets/{index.umd-DvM-lTQa.js → index.umd-CTYdFEHH.js} +1 -1
  14. package/dist/app/index.html +1 -1
  15. package/dist/instrumentation.js +85955 -88560
  16. package/dist/server.js +197441 -106276
  17. package/package.json +2 -1
  18. package/src/controller/compile.controller.ts +35 -0
  19. package/src/controller/connection.controller.ts +22 -2
  20. package/src/controller/model.controller.ts +20 -9
  21. package/src/health.ts +8 -0
  22. package/src/instrumentation.ts +123 -34
  23. package/src/server.ts +49 -3
  24. package/src/service/connection.spec.ts +1331 -0
  25. package/src/service/connection.ts +407 -29
  26. package/src/service/db_utils.ts +104 -45
  27. package/src/service/gcs_s3_utils.ts +115 -40
  28. package/src/service/model.ts +5 -5
  29. package/src/service/project.ts +140 -4
  30. package/src/service/project_compile.spec.ts +197 -0
  31. package/src/service/project_store.ts +49 -21
  32. package/src/storage/StorageManager.ts +4 -3
  33. package/src/storage/duckdb/schema.ts +6 -5
  34. package/tests/harness/e2e.ts +4 -0
  35. package/tests/harness/mcp_test_setup.ts +172 -28
  36. package/tests/unit/duckdb/attached_databases.test.ts +61 -3
  37. package/tests/unit/ducklake/ducklake.test.ts +950 -0
  38. package/dist/app/assets/HomePage-QekMXs8r.js +0 -1
  39. package/dist/app/assets/PackagePage-DDaABD2A.js +0 -1
  40. package/dist/app/assets/ProjectPage-FAYUFGhL.js +0 -1
  41. package/dist/app/assets/RouteError-BKYctANX.js +0 -1
@@ -7,9 +7,8 @@ import {
7
7
  CloudStorageCredentials,
8
8
  gcsConnectionToCredentials,
9
9
  getCloudTablesWithColumns,
10
- listAllDataFilesInBucket,
11
- listCloudBuckets,
12
- listFilesInCloudDirectory,
10
+ listCloudDirectorySchemas,
11
+ listDataFilesInDirectory,
13
12
  parseCloudUri,
14
13
  s3ConnectionToCredentials,
15
14
  } from "./gcs_s3_utils";
@@ -340,23 +339,10 @@ export async function getSchemasForConnection(
340
339
  : s3ConnectionToCredentials(attachedDb.s3Connection!);
341
340
 
342
341
  try {
343
- const buckets = await listCloudBuckets(credentials);
344
- const scheme = dbType === "gcs" ? "gs" : "s3";
345
-
346
- logger.info(
347
- `Listed ${buckets.length} ${dbType.toUpperCase()} buckets for attached database ${attachedDb.name}`,
348
- );
349
-
350
- // Just return bucket URIs as schemas - fast!
351
- // Files/directories will be listed when user selects a bucket
352
- return buckets.map((bucket) => ({
353
- name: `${scheme}://${bucket.name}`,
354
- isHidden: false,
355
- isDefault: false,
356
- }));
342
+ return await listCloudDirectorySchemas(credentials);
357
343
  } catch (cloudError) {
358
344
  logger.warn(
359
- `Failed to list ${dbType.toUpperCase()} buckets for ${attachedDb.name}`,
345
+ `Failed to list ${dbType.toUpperCase()} directory schemas for ${attachedDb.name}`,
360
346
  { error: cloudError },
361
347
  );
362
348
  return [];
@@ -411,6 +397,38 @@ export async function getSchemasForConnection(
411
397
  `Failed to get schemas for MotherDuck connection ${connection.name}: ${(error as Error).message}`,
412
398
  );
413
399
  }
400
+ } else if (connection.type === "ducklake") {
401
+ try {
402
+ // Filter by catalog_name to only get schemas from the attached DuckLake catalog
403
+ // The catalog is attached with the connection name (see attachDuckLake in connection.ts)
404
+ const catalogName = connection.name;
405
+ const result = await malloyConnection.runSQL(
406
+ `SELECT schema_name FROM information_schema.schemata WHERE catalog_name = '${catalogName}' ORDER BY schema_name`,
407
+ { rowLimit: 1000 },
408
+ );
409
+ const rows = standardizeRunSQLResult(result);
410
+
411
+ return rows.map((row: unknown) => {
412
+ const typedRow = row as Record<string, unknown>;
413
+ const schemaName = typedRow.schema_name as string;
414
+
415
+ const shouldShow = schemaName === "main" || schemaName === "public";
416
+
417
+ return {
418
+ name: schemaName,
419
+ isHidden: !shouldShow,
420
+ isDefault: false,
421
+ };
422
+ });
423
+ } catch (error) {
424
+ logger.error(
425
+ `Error getting schemas for DuckLake connection ${connection.name}`,
426
+ { error },
427
+ );
428
+ throw new Error(
429
+ `Failed to get schemas for DuckLake connection ${connection.name}: ${(error as Error).message}`,
430
+ );
431
+ }
414
432
  } else {
415
433
  throw new Error(`Unsupported connection type: ${connection.type}`);
416
434
  }
@@ -444,19 +462,11 @@ export async function getTablesForSchema(
444
462
  );
445
463
  }
446
464
 
447
- let fileKeys: string[];
448
- if (directoryPath) {
449
- const fileNames = await listFilesInCloudDirectory(
450
- credentials,
451
- bucketName,
452
- directoryPath,
453
- );
454
- fileKeys = fileNames.map((fileName) => `${directoryPath}/${fileName}`);
455
- } else {
456
- fileKeys = await listAllDataFilesInBucket(credentials, bucketName);
457
- }
458
-
459
- console.log("File keys:", fileKeys);
465
+ const fileKeys = await listDataFilesInDirectory(
466
+ credentials,
467
+ bucketName,
468
+ directoryPath,
469
+ );
460
470
 
461
471
  return await getCloudTablesWithColumns(
462
472
  malloyConnection,
@@ -464,8 +474,13 @@ export async function getTablesForSchema(
464
474
  bucketName,
465
475
  fileKeys,
466
476
  );
477
+ } else if (connection.type === "ducklake") {
478
+ if (schemaName.split(".").length == 2) {
479
+ schemaName = `${connection.name}.${schemaName}`;
480
+ } else if (schemaName.split(".").length === 1) {
481
+ schemaName = `${connection.name}.${schemaName}`;
482
+ }
467
483
  }
468
-
469
484
  const tableNames = await listTablesForSchema(
470
485
  connection,
471
486
  schemaName,
@@ -476,7 +491,6 @@ export async function getTablesForSchema(
476
491
  const tableSourcePromises = tableNames.map(async (tableName) => {
477
492
  try {
478
493
  let tablePath: string;
479
-
480
494
  if (connection.type === "trino") {
481
495
  if (connection.trinoConnection?.catalog) {
482
496
  tablePath = `${connection.trinoConnection?.catalog}.${schemaName}.${tableName}`;
@@ -484,6 +498,10 @@ export async function getTablesForSchema(
484
498
  // Catalog name is included in the schema name
485
499
  tablePath = `${schemaName}.${tableName}`;
486
500
  }
501
+ } else if (connection.type === "ducklake") {
502
+ // For ducklake, schemaName already includes connection name prefix from above
503
+ // So tablePath should be schemaName.tableName (which is connectionName.schemaName.tableName)
504
+ tablePath = `${schemaName}.${tableName}`;
487
505
  } else {
488
506
  tablePath = `${schemaName}.${tableName}`;
489
507
  }
@@ -497,14 +515,13 @@ export async function getTablesForSchema(
497
515
  tableName,
498
516
  tablePath,
499
517
  );
500
-
501
518
  return {
502
519
  resource: tablePath,
503
520
  columns: tableSource.columns,
504
521
  };
505
522
  } catch (error) {
506
523
  logger.warn(`Failed to get schema for table ${tableName}`, {
507
- error,
524
+ error: extractErrorDataFromError(error),
508
525
  schemaName,
509
526
  tableName,
510
527
  });
@@ -575,7 +592,7 @@ export async function getConnectionTableSource(
575
592
  type: field.type,
576
593
  };
577
594
  });
578
- logger.info(`Successfully fetched schema for ${tablePath}`, {
595
+ logger.debug(`Successfully fetched schema for ${tablePath}`, {
579
596
  fieldCount: fields.length,
580
597
  });
581
598
  return {
@@ -748,15 +765,15 @@ export async function listTablesForSchema(
748
765
  }
749
766
 
750
767
  try {
751
- if (directoryPath) {
752
- return await listFilesInCloudDirectory(
753
- credentials,
754
- bucketName,
755
- directoryPath,
756
- );
757
- } else {
758
- return await listAllDataFilesInBucket(credentials, bucketName);
759
- }
768
+ const fileKeys = await listDataFilesInDirectory(
769
+ credentials,
770
+ bucketName,
771
+ directoryPath,
772
+ );
773
+ return fileKeys.map((key) => {
774
+ const lastSlash = key.lastIndexOf("/");
775
+ return lastSlash > 0 ? key.substring(lastSlash + 1) : key;
776
+ });
760
777
  } catch (error) {
761
778
  logger.error(
762
779
  `Error listing ${cloudType.toUpperCase()} objects in ${schemaName}`,
@@ -817,7 +834,49 @@ export async function listTablesForSchema(
817
834
  `Failed to get tables for MotherDuck schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
818
835
  );
819
836
  }
837
+ } else if (connection.type === "ducklake") {
838
+ const catalogName = schemaName.split(".")[0];
839
+ const actualSchemaName = schemaName.split(".")[1];
840
+ console.error("catalogName", catalogName);
841
+ console.error("actualSchemaName", actualSchemaName);
842
+ try {
843
+ const result = await malloyConnection.runSQL(
844
+ `SELECT table_name FROM information_schema.tables WHERE table_schema = '${actualSchemaName}' AND table_catalog = '${catalogName}' ORDER BY table_name`,
845
+ { rowLimit: 1000 },
846
+ );
847
+ const rows = standardizeRunSQLResult(result);
848
+ return rows.map((row: unknown) => {
849
+ const typedRow = row as Record<string, unknown>;
850
+ return typedRow.table_name as string;
851
+ });
852
+ } catch (error) {
853
+ logger.error(
854
+ `Error getting tables for DuckLake schema ${schemaName} in connection ${connection.name}`,
855
+ { error },
856
+ );
857
+ throw new Error(
858
+ `Failed to get tables for DuckLake schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
859
+ );
860
+ }
820
861
  } else {
821
862
  throw new Error(`Unsupported connection type: ${connection.type}`);
822
863
  }
823
864
  }
865
+
866
+ export function extractErrorDataFromError(error: unknown): {
867
+ error: string;
868
+ stack?: string;
869
+ task?: unknown;
870
+ } {
871
+ const errorMessage = error instanceof Error ? error.message : String(error);
872
+ const errorData: { error: string; stack?: string; task?: unknown } = {
873
+ error: errorMessage,
874
+ };
875
+ if (error instanceof Error && logger.level === "debug") {
876
+ errorData.stack = error.stack;
877
+ }
878
+ if (error && typeof error === "object" && "task" in error) {
879
+ errorData.task = (error as { task?: unknown }).task;
880
+ }
881
+ return errorData;
882
+ }
@@ -8,7 +8,6 @@ import { components } from "../api";
8
8
  import { logger } from "../logger";
9
9
 
10
10
  type ApiTable = components["schemas"]["Table"];
11
-
12
11
  type CloudStorageType = "gcs" | "s3";
13
12
 
14
13
  export interface CloudStorageCredentials {
@@ -29,7 +28,6 @@ interface CloudStorageObject {
29
28
  key: string;
30
29
  size?: number;
31
30
  lastModified?: Date;
32
- isFolder: boolean;
33
31
  }
34
32
 
35
33
  export function gcsConnectionToCredentials(gcsConnection: {
@@ -92,7 +90,7 @@ function createCloudStorageClient(
92
90
  return client;
93
91
  }
94
92
 
95
- export async function listCloudBuckets(
93
+ async function listCloudBuckets(
96
94
  credentials: CloudStorageCredentials,
97
95
  ): Promise<CloudStorageBucket[]> {
98
96
  const client = createCloudStorageClient(credentials);
@@ -143,7 +141,6 @@ async function listAllCloudFiles(
143
141
  key: content.Key,
144
142
  size: content.Size,
145
143
  lastModified: content.LastModified,
146
- isFolder: false,
147
144
  });
148
145
  }
149
146
  }
@@ -182,6 +179,15 @@ function isDataFile(key: string): boolean {
182
179
  );
183
180
  }
184
181
 
182
+ function buildCloudUri(
183
+ type: CloudStorageType,
184
+ bucket: string,
185
+ key: string,
186
+ ): string {
187
+ const scheme = type === "gcs" ? "gs" : "s3";
188
+ return `${scheme}://${bucket}/${key}`;
189
+ }
190
+
185
191
  function getFileType(key: string): string {
186
192
  const lowerKey = key.toLowerCase();
187
193
  if (lowerKey.endsWith(".csv")) return "csv";
@@ -192,23 +198,14 @@ function getFileType(key: string): string {
192
198
  return "unknown";
193
199
  }
194
200
 
195
- function buildCloudUri(
196
- type: CloudStorageType,
197
- bucket: string,
198
- key: string,
199
- ): string {
200
- const scheme = type === "gcs" ? "gs" : "s3";
201
- return `${scheme}://${bucket}/${key}`;
202
- }
203
-
204
201
  function standardizeRunSQLResult(result: unknown): unknown[] {
205
202
  return Array.isArray(result)
206
203
  ? result
207
204
  : (result as { rows?: unknown[] }).rows || [];
208
205
  }
209
206
 
210
- // Batch size for parallel schema fetching to avoid overwhelming the connection
211
207
  const SCHEMA_FETCH_BATCH_SIZE = 10;
208
+ const BUCKET_SCAN_BATCH_SIZE = 3;
212
209
 
213
210
  async function getTableSchema(
214
211
  malloyConnection: Connection,
@@ -268,11 +265,9 @@ export async function getCloudTablesWithColumns(
268
265
  ): Promise<ApiTable[]> {
269
266
  const allTables: ApiTable[] = [];
270
267
 
271
- // Process in batches to avoid overwhelming the connection
272
268
  for (let i = 0; i < fileKeys.length; i += SCHEMA_FETCH_BATCH_SIZE) {
273
269
  const batch = fileKeys.slice(i, i + SCHEMA_FETCH_BATCH_SIZE);
274
270
 
275
- // Process batch in parallel
276
271
  const batchResults = await Promise.all(
277
272
  batch.map((fileKey) =>
278
273
  getTableSchema(malloyConnection, credentials, bucketName, fileKey),
@@ -315,38 +310,118 @@ export function parseCloudUri(uri: string): {
315
310
  return null;
316
311
  }
317
312
 
318
- export async function listFilesInCloudDirectory(
313
+ export async function listDataFilesInDirectory(
319
314
  credentials: CloudStorageCredentials,
320
315
  bucketName: string,
321
316
  directoryPath: string,
322
317
  ): Promise<string[]> {
323
- const files = await listAllCloudFiles(credentials, bucketName);
324
-
325
- const filesInDirectory = files
326
- .filter((obj) => {
327
- if (!isDataFile(obj.key)) return false;
328
-
329
- const lastSlashIndex = obj.key.lastIndexOf("/");
330
- const fileDir =
331
- lastSlashIndex > 0 ? obj.key.substring(0, lastSlashIndex) : "";
332
-
333
- return fileDir === directoryPath;
334
- })
335
- .map((obj) => {
336
- const lastSlashIndex = obj.key.lastIndexOf("/");
337
- return lastSlashIndex > 0
338
- ? obj.key.substring(lastSlashIndex + 1)
339
- : obj.key;
340
- });
318
+ const prefix = directoryPath ? `${directoryPath}/` : "";
319
+ const client = createCloudStorageClient(credentials);
320
+ const storageType = credentials.type.toUpperCase();
321
+ const dataFiles: string[] = [];
322
+
323
+ try {
324
+ let continuationToken: string | undefined;
325
+
326
+ do {
327
+ const response = await client.send(
328
+ new ListObjectsV2Command({
329
+ Bucket: bucketName,
330
+ Prefix: prefix,
331
+ Delimiter: "/",
332
+ ContinuationToken: continuationToken,
333
+ }),
334
+ );
335
+
336
+ for (const content of response.Contents || []) {
337
+ if (content.Key && isDataFile(content.Key)) {
338
+ dataFiles.push(content.Key);
339
+ }
340
+ }
341
341
 
342
- return filesInDirectory;
342
+ continuationToken = response.IsTruncated
343
+ ? response.NextContinuationToken
344
+ : undefined;
345
+ } while (continuationToken);
346
+
347
+ logger.info(
348
+ `Listed ${dataFiles.length} data files in ${storageType} ${bucketName}/${directoryPath}`,
349
+ );
350
+ return dataFiles;
351
+ } catch (error) {
352
+ logger.error(
353
+ `Failed to list files in ${storageType} ${bucketName}/${directoryPath}`,
354
+ { error },
355
+ );
356
+ throw new Error(
357
+ `Failed to list files in ${storageType} ${bucketName}/${directoryPath}: ${error instanceof Error ? error.message : String(error)}`,
358
+ );
359
+ }
343
360
  }
344
361
 
345
- // List all data files in a bucket with their full relative paths
346
- export async function listAllDataFilesInBucket(
362
+ /**
363
+ * Scans an entire bucket and returns unique directory paths that contain data files.
364
+ * Uses flat listing for efficiency — O(total_files / 1000) API calls.
365
+ */
366
+ async function listDirectorySchemas(
347
367
  credentials: CloudStorageCredentials,
348
368
  bucketName: string,
349
369
  ): Promise<string[]> {
350
- const files = await listAllCloudFiles(credentials, bucketName);
351
- return files.filter((obj) => isDataFile(obj.key)).map((obj) => obj.key);
370
+ const allFiles = await listAllCloudFiles(credentials, bucketName);
371
+ const directories = new Set<string>();
372
+
373
+ for (const file of allFiles) {
374
+ if (!isDataFile(file.key)) continue;
375
+
376
+ const lastSlashIndex = file.key.lastIndexOf("/");
377
+ const dir =
378
+ lastSlashIndex > 0 ? file.key.substring(0, lastSlashIndex) : "";
379
+ directories.add(dir);
380
+ }
381
+
382
+ const scheme = credentials.type === "gcs" ? "gs" : "s3";
383
+ const sortedDirs = Array.from(directories).sort();
384
+
385
+ logger.info(
386
+ `Found ${sortedDirs.length} directories with data files in ${credentials.type.toUpperCase()} bucket ${bucketName}`,
387
+ );
388
+
389
+ return sortedDirs.map((dir) =>
390
+ dir ? `${scheme}://${bucketName}/${dir}` : `${scheme}://${bucketName}`,
391
+ );
392
+ }
393
+
394
+ export async function listCloudDirectorySchemas(
395
+ credentials: CloudStorageCredentials,
396
+ ): Promise<{ name: string; isHidden: boolean; isDefault: boolean }[]> {
397
+ const storageType = credentials.type.toUpperCase();
398
+ const buckets = await listCloudBuckets(credentials);
399
+
400
+ logger.info(
401
+ `Listed ${buckets.length} ${storageType} buckets, scanning for directories...`,
402
+ );
403
+
404
+ const allDirArrays: string[][] = [];
405
+
406
+ for (let i = 0; i < buckets.length; i += BUCKET_SCAN_BATCH_SIZE) {
407
+ const batch = buckets.slice(i, i + BUCKET_SCAN_BATCH_SIZE);
408
+ const batchResults = await Promise.all(
409
+ batch.map((bucket) =>
410
+ listDirectorySchemas(credentials, bucket.name).catch((err) => {
411
+ logger.warn(
412
+ `Failed to scan ${storageType} bucket ${bucket.name}`,
413
+ { error: err },
414
+ );
415
+ return [] as string[];
416
+ }),
417
+ ),
418
+ );
419
+ allDirArrays.push(...batchResults);
420
+ }
421
+
422
+ return allDirArrays.flat().map((dirUri) => ({
423
+ name: dirUri,
424
+ isHidden: false,
425
+ isDefault: false,
426
+ }));
352
427
  }
@@ -211,13 +211,13 @@ export class Model {
211
211
  } catch (error) {
212
212
  let computedError = error;
213
213
  if (error instanceof Error && error.stack) {
214
- console.error("Error stack", error.stack);
214
+ logger.error("Error stack", error.stack);
215
215
  }
216
216
 
217
217
  if (error instanceof MalloyError) {
218
218
  const problems = error.problems;
219
219
  for (const problem of problems) {
220
- console.error("Problem", problem);
220
+ logger.error("Problem", problem);
221
221
  }
222
222
  computedError = new ModelCompilationError(error);
223
223
  }
@@ -444,7 +444,7 @@ export class Model {
444
444
  const notebookCells: ApiNotebookCell[] = (
445
445
  this.runnableNotebookCells as RunnableNotebookCell[]
446
446
  ).map((cell) => {
447
- console.log("cell.queryInfo", cell.queryInfo);
447
+ logger.debug("cell.queryInfo", cell.queryInfo);
448
448
  return {
449
449
  type: cell.type,
450
450
  text: cell.text,
@@ -550,9 +550,9 @@ export class Model {
550
550
  text: cell.text,
551
551
  };
552
552
  } else {
553
- console.log("Error message: ", errorMessage);
553
+ logger.error("Error message: ", errorMessage);
554
554
  }
555
- console.log("Cell content: ", cellIndex, cell.type, cell.text);
555
+ logger.debug("Cell content: ", cellIndex, cell.type, cell.text);
556
556
  throw new BadRequestError(`Cell execution failed: ${errorMessage}`);
557
557
  }
558
558
  }