@malloy-publisher/server 0.0.176 → 0.0.177

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@ import { ContainerClient } from "@azure/storage-blob";
3
3
  import { BigQuery } from "@google-cloud/bigquery";
4
4
  import { Connection, TableSourceDef } from "@malloydata/malloy";
5
5
  import { components } from "../api";
6
- import { ConnectionError } from "../errors";
7
6
  import { logger } from "../logger";
8
7
  import {
9
8
  CloudStorageCredentials,
@@ -18,9 +17,43 @@ import { ApiConnection } from "./model";
18
17
 
19
18
  type ApiSchema = components["schemas"]["Schema"];
20
19
  type ApiTable = components["schemas"]["Table"];
21
- type ApiTableSource = components["schemas"]["TableSource"];
22
20
  type ApiAzureConnection = components["schemas"]["AzureConnection"];
23
21
 
22
+ /**
23
+ * Build a SQL `AND column IN (...)` fragment for optional table-name filtering.
24
+ * Returns an empty string when `values` is undefined or empty.
25
+ */
26
+ export function sqlInFilter(columnName: string, values?: string[]): string {
27
+ if (!values || values.length === 0) return "";
28
+ const escaped = values.map((v) => `'${v.replace(/'/g, "''")}'`);
29
+ return `AND ${columnName} IN (${escaped.join(", ")})`;
30
+ }
31
+
32
+ /**
33
+ * Group INFORMATION_SCHEMA.COLUMNS rows into ApiTable objects.
34
+ * Handles both upper-case (Snowflake) and lower-case (Postgres/DuckDB) column names.
35
+ */
36
+ function groupColumnRowsIntoTables(
37
+ rows: unknown[],
38
+ buildResource: (tableName: string) => string,
39
+ ): ApiTable[] {
40
+ const tableMap = new Map<string, { name: string; type: string }[]>();
41
+ for (const row of rows) {
42
+ const r = row as Record<string, unknown>;
43
+ const tableName = String(r.TABLE_NAME ?? r.table_name ?? "");
44
+ const columnName = String(r.COLUMN_NAME ?? r.column_name ?? "");
45
+ const dataType = String(r.DATA_TYPE ?? r.data_type ?? "").toLowerCase();
46
+ if (!tableName) continue;
47
+ if (!tableMap.has(tableName)) tableMap.set(tableName, []);
48
+ tableMap.get(tableName)!.push({ name: columnName, type: dataType });
49
+ }
50
+ const tables: ApiTable[] = [];
51
+ for (const [tableName, columns] of tableMap) {
52
+ tables.push({ resource: buildResource(tableName), columns });
53
+ }
54
+ return tables;
55
+ }
56
+
24
57
  function createBigQueryClient(connection: ApiConnection): BigQuery {
25
58
  if (!connection.bigqueryConnection) {
26
59
  throw new Error("BigQuery connection is required");
@@ -36,26 +69,25 @@ function createBigQueryClient(connection: ApiConnection): BigQuery {
36
69
 
37
70
  // Add service account key if provided
38
71
  if (connection.bigqueryConnection.serviceAccountKeyJson) {
72
+ let credentials: Record<string, unknown>;
39
73
  try {
40
- const credentials = JSON.parse(
74
+ credentials = JSON.parse(
41
75
  connection.bigqueryConnection.serviceAccountKeyJson,
42
76
  );
43
- config.credentials = credentials;
77
+ } catch (parseError) {
78
+ throw new Error(
79
+ `Failed to parse BigQuery service account key JSON: ${(parseError as Error).message}`,
80
+ );
81
+ }
82
+ config.credentials = credentials;
44
83
 
45
- // Use project_id from credentials if defaultProjectId is not set
46
- if (!config.projectId && credentials.project_id) {
47
- config.projectId = credentials.project_id;
48
- }
84
+ if (!config.projectId && credentials.project_id) {
85
+ config.projectId = credentials.project_id as string;
86
+ }
49
87
 
50
- if (!config.projectId) {
51
- throw new Error(
52
- "BigQuery project ID is required. Either set the defaultProjectId in the connection configuration or the project_id in the service account key JSON.",
53
- );
54
- }
55
- } catch (error) {
56
- logger.warn(
57
- "Failed to parse service account key JSON, using default credentials",
58
- { error },
88
+ if (!config.projectId) {
89
+ throw new Error(
90
+ "BigQuery project ID is required. Either set the defaultProjectId in the connection configuration or the project_id in the service account key JSON.",
59
91
  );
60
92
  }
61
93
  } else if (
@@ -103,358 +135,410 @@ function getCloudCredentialsFromAttachedDatabases(
103
135
  return null;
104
136
  }
105
137
 
106
- export async function getSchemasForConnection(
138
+ async function getSchemasForBigQuery(
139
+ connection: ApiConnection,
140
+ ): Promise<ApiSchema[]> {
141
+ if (!connection.bigqueryConnection) {
142
+ throw new Error("BigQuery connection is required");
143
+ }
144
+ try {
145
+ const bigquery = createBigQueryClient(connection);
146
+ const [datasets] = await bigquery.getDatasets();
147
+
148
+ return await Promise.all(
149
+ datasets.map(async (dataset) => {
150
+ const [metadata] = await dataset.getMetadata();
151
+ return {
152
+ name: dataset.id,
153
+ isHidden: false,
154
+ isDefault: false,
155
+ description: (metadata as { description?: string })?.description,
156
+ };
157
+ }),
158
+ );
159
+ } catch (error) {
160
+ logger.error(
161
+ `Error getting schemas for BigQuery connection ${connection.name}`,
162
+ { error },
163
+ );
164
+ throw new Error(
165
+ `Failed to get schemas for BigQuery connection ${connection.name}: ${(error as Error).message}`,
166
+ );
167
+ }
168
+ }
169
+
170
+ async function getSchemasForPostgres(
107
171
  connection: ApiConnection,
108
172
  malloyConnection: Connection,
109
173
  ): Promise<ApiSchema[]> {
110
- if (connection.type === "bigquery") {
111
- if (!connection.bigqueryConnection) {
112
- throw new Error("BigQuery connection is required");
113
- }
114
- try {
115
- const bigquery = createBigQueryClient(connection);
116
- const [datasets] = await bigquery.getDatasets();
117
-
118
- const schemas = await Promise.all(
119
- datasets.map(async (dataset) => {
120
- const [metadata] = await dataset.getMetadata();
121
- return {
122
- name: dataset.id,
123
- isHidden: false,
124
- isDefault: false,
125
- // Include description from dataset metadata if available
126
- description: (metadata as { description?: string })
127
- ?.description,
128
- };
129
- }),
130
- );
131
- return schemas;
132
- } catch (error) {
133
- console.error(
134
- `Error getting schemas for BigQuery connection ${connection.name}:`,
135
- error,
136
- );
137
- throw new Error(
138
- `Failed to get schemas for BigQuery connection ${connection.name}: ${(error as Error).message}`,
174
+ if (!connection.postgresConnection) {
175
+ throw new Error("Postgres connection is required");
176
+ }
177
+ try {
178
+ const result = await malloyConnection.runSQL(
179
+ "SELECT schema_name FROM information_schema.schemata ORDER BY schema_name",
180
+ );
181
+ const rows = standardizeRunSQLResult(result);
182
+ return rows.map((row: unknown) => {
183
+ const typedRow = row as Record<string, unknown>;
184
+ const schemaName = String(
185
+ typedRow.schema_name ?? typedRow.SCHEMA_NAME ?? "",
139
186
  );
187
+ return {
188
+ name: schemaName,
189
+ isHidden: ["information_schema", "pg_catalog", "pg_toast"].includes(
190
+ schemaName,
191
+ ),
192
+ isDefault: schemaName === "public",
193
+ };
194
+ });
195
+ } catch (error) {
196
+ logger.error(
197
+ `Error getting schemas for Postgres connection ${connection.name}`,
198
+ { error },
199
+ );
200
+ throw new Error(
201
+ `Failed to get schemas for Postgres connection ${connection.name}: ${(error as Error).message}`,
202
+ );
203
+ }
204
+ }
205
+
206
+ async function getSchemasForMySQL(
207
+ connection: ApiConnection,
208
+ ): Promise<ApiSchema[]> {
209
+ if (!connection.mysqlConnection) {
210
+ throw new Error("Mysql connection is required");
211
+ }
212
+ return [
213
+ {
214
+ name: connection.mysqlConnection.database || "mysql",
215
+ isHidden: false,
216
+ isDefault: true,
217
+ },
218
+ ];
219
+ }
220
+
221
+ async function getSchemasForSnowflake(
222
+ connection: ApiConnection,
223
+ malloyConnection: Connection,
224
+ ): Promise<ApiSchema[]> {
225
+ if (!connection.snowflakeConnection) {
226
+ throw new Error("Snowflake connection is required");
227
+ }
228
+ try {
229
+ const database = connection.snowflakeConnection.database;
230
+ const schema = connection.snowflakeConnection.schema;
231
+
232
+ const filters: string[] = [];
233
+ if (database) {
234
+ filters.push(`CATALOG_NAME = '${database}'`);
140
235
  }
141
- } else if (connection.type === "postgres") {
142
- if (!connection.postgresConnection) {
143
- throw new Error("Postgres connection is required");
236
+ if (schema) {
237
+ filters.push(`SCHEMA_NAME = '${schema}'`);
144
238
  }
145
- try {
146
- // Use the connection's runSQL method to query schemas
147
- const result = await malloyConnection.runSQL(
148
- "SELECT schema_name as row FROM information_schema.schemata ORDER BY schema_name",
149
- );
239
+ const whereClause =
240
+ filters.length > 0 ? `WHERE ${filters.join(" AND ")}` : "";
150
241
 
151
- const rows = standardizeRunSQLResult(result);
152
- return rows.map((row: unknown) => {
153
- const schemaName = row as string;
154
- return {
155
- name: schemaName,
156
- isHidden: [
157
- "information_schema",
158
- "pg_catalog",
159
- "pg_toast",
160
- ].includes(schemaName),
161
- isDefault: schemaName === "public",
162
- };
163
- });
164
- } catch (error) {
165
- console.error(
166
- `Error getting schemas for Postgres connection ${connection.name}:`,
167
- error,
242
+ const result = await malloyConnection.runSQL(
243
+ `SELECT CATALOG_NAME, SCHEMA_NAME, SCHEMA_OWNER FROM ${database ? `${database}.` : ""}INFORMATION_SCHEMA.SCHEMATA ${whereClause} ORDER BY SCHEMA_NAME`,
244
+ );
245
+ const rows = standardizeRunSQLResult(result);
246
+ return rows.map((row: unknown) => {
247
+ const typedRow = row as Record<string, unknown>;
248
+ const catalogName = String(
249
+ typedRow.CATALOG_NAME ?? typedRow.catalog_name ?? "",
168
250
  );
169
- throw new Error(
170
- `Failed to get schemas for Postgres connection ${connection.name}: ${(error as Error).message}`,
251
+ const schemaName = String(
252
+ typedRow.SCHEMA_NAME ?? typedRow.schema_name ?? "",
171
253
  );
172
- }
173
- } else if (connection.type === "mysql") {
174
- if (!connection.mysqlConnection) {
175
- throw new Error("Mysql connection is required");
176
- }
177
- try {
178
- // For MySQL, return the database name as the schema
179
- return [
180
- {
181
- name: connection.mysqlConnection.database || "mysql",
182
- isHidden: false,
183
- isDefault: true,
184
- },
185
- ];
186
- } catch (error) {
187
- console.error(
188
- `Error getting schemas for MySQL connection ${connection.name}:`,
189
- error,
254
+ const owner = String(
255
+ typedRow.SCHEMA_OWNER ?? typedRow.schema_owner ?? "",
190
256
  );
191
- throw new Error(
192
- `Failed to get schemas for MySQL connection ${connection.name}: ${(error as Error).message}`,
257
+ return {
258
+ name: `${catalogName}.${schemaName}`,
259
+ isHidden:
260
+ ["SNOWFLAKE", ""].includes(owner) ||
261
+ schemaName === "INFORMATION_SCHEMA",
262
+ isDefault: schema ? schemaName === schema : false,
263
+ };
264
+ });
265
+ } catch (error) {
266
+ logger.error(
267
+ `Error getting schemas for Snowflake connection ${connection.name}`,
268
+ { error },
269
+ );
270
+ throw new Error(
271
+ `Failed to get schemas for Snowflake connection ${connection.name}: ${(error as Error).message}`,
272
+ );
273
+ }
274
+ }
275
+
276
+ async function getSchemasForTrino(
277
+ connection: ApiConnection,
278
+ malloyConnection: Connection,
279
+ ): Promise<ApiSchema[]> {
280
+ if (!connection.trinoConnection) {
281
+ throw new Error("Trino connection is required");
282
+ }
283
+ try {
284
+ const configuredSchema = connection.trinoConnection.schema;
285
+ let allRows: { catalog: string; schema: string }[] = [];
286
+
287
+ if (connection.trinoConnection.catalog) {
288
+ const catalog = connection.trinoConnection.catalog;
289
+ const result = await malloyConnection.runSQL(
290
+ `SELECT schema_name FROM ${catalog}.information_schema.schemata ORDER BY schema_name`,
193
291
  );
194
- }
195
- } else if (connection.type === "snowflake") {
196
- if (!connection.snowflakeConnection) {
197
- throw new Error("Snowflake connection is required");
198
- }
199
- try {
200
- // Use the connection's runSQL method to query schemas
201
- const result = await malloyConnection.runSQL("SHOW SCHEMAS");
202
292
  const rows = standardizeRunSQLResult(result);
203
- return rows.map((row: unknown) => {
204
- const typedRow = row as Record<string, unknown>;
205
- const databaseName = String(
206
- typedRow.database_name ?? typedRow.DATABASE_NAME ?? "",
207
- );
208
- const name = String(typedRow.name ?? typedRow.NAME ?? "");
209
- const owner = String(typedRow.owner ?? typedRow.OWNER ?? "");
210
- const isDefaultVal =
211
- typedRow.is_default ?? typedRow.isDefault ?? typedRow.IS_DEFAULT;
293
+ allRows = rows.map((row: unknown) => {
294
+ const r = row as Record<string, unknown>;
212
295
  return {
213
- name: `${databaseName}.${name}`,
214
- isHidden: ["SNOWFLAKE", ""].includes(owner),
215
- isDefault: isDefaultVal === "Y",
296
+ catalog,
297
+ schema: String(r.schema_name ?? r.Schema ?? ""),
216
298
  };
217
299
  });
218
- } catch (error) {
219
- console.error(
220
- `Error getting schemas for Snowflake connection ${connection.name}:`,
221
- error,
222
- );
223
- throw new Error(
224
- `Failed to get schemas for Snowflake connection ${connection.name}: ${(error as Error).message}`,
300
+ } else {
301
+ const catalogsResult = await malloyConnection.runSQL(`SHOW CATALOGS`);
302
+ const catalogNames = standardizeRunSQLResult(catalogsResult).map(
303
+ (row: unknown) => {
304
+ const r = row as Record<string, unknown>;
305
+ return String(r.Catalog ?? r.catalog ?? "");
306
+ },
225
307
  );
226
- }
227
- } else if (connection.type === "trino") {
228
- if (!connection.trinoConnection) {
229
- throw new Error("Trino connection is required");
230
- }
231
- try {
232
- let result: unknown;
233
- // Use the connection's runSQL method to query schemas
234
- if (connection.trinoConnection.catalog) {
235
- result = await malloyConnection.runSQL(
236
- `SHOW SCHEMAS FROM ${connection.trinoConnection.catalog}`,
237
- );
238
- } else {
239
- const catalogs = await malloyConnection.runSQL(`SHOW CATALOGS`);
240
- console.log("catalogs", catalogs);
241
- let catalogNames = standardizeRunSQLResult(catalogs);
242
- catalogNames = catalogNames.map((catalog: unknown) => {
243
- const typedCatalog = catalog as Record<string, unknown>;
244
- return typedCatalog.Catalog as string;
245
- });
246
308
 
247
- const schemas: unknown[] = [];
248
-
249
- console.log("catalogNames", catalogNames);
250
- for (const catalog of catalogNames) {
251
- const schemasResult = await malloyConnection.runSQL(
252
- `SHOW SCHEMAS FROM ${catalog}`,
309
+ for (const catalog of catalogNames) {
310
+ try {
311
+ const result = await malloyConnection.runSQL(
312
+ `SELECT schema_name FROM ${catalog}.information_schema.schemata ORDER BY schema_name`,
253
313
  );
254
- const schemasResultRows = standardizeRunSQLResult(schemasResult);
255
- console.log("schemasResultRows", schemasResultRows);
256
-
257
- // Concat catalog name to schema name for each schema row
258
- const schemasWithCatalog = schemasResultRows.map(
259
- (row: unknown) => {
260
- const typedRow = row as Record<string, unknown>;
261
- // For display, use the convention "catalog.schema"
262
- return {
263
- ...typedRow,
264
- Schema: `${catalog}.${typedRow.Schema ?? typedRow.schema ?? ""}`,
265
- };
266
- },
314
+ const rows = standardizeRunSQLResult(result);
315
+ for (const row of rows) {
316
+ const r = row as Record<string, unknown>;
317
+ allRows.push({
318
+ catalog,
319
+ schema: String(r.schema_name ?? r.Schema ?? ""),
320
+ });
321
+ }
322
+ } catch (catalogError) {
323
+ logger.warn(
324
+ `Failed to list schemas for Trino catalog ${catalog}`,
325
+ { error: catalogError },
267
326
  );
268
- schemas.push(...schemasWithCatalog);
269
- console.log("schemas", schemas);
270
327
  }
271
- result = schemas;
272
328
  }
273
-
274
- const rows = standardizeRunSQLResult(result);
275
- return rows.map((row: unknown) => {
276
- const typedRow = row as Record<string, unknown>;
277
- return {
278
- name: typedRow.Schema as string,
279
- isHidden: ["information_schema", "performance_schema"].includes(
280
- typedRow.Schema as string,
281
- ),
282
- isDefault:
283
- typedRow.Schema === connection.trinoConnection?.schema,
284
- };
285
- });
286
- } catch (error) {
287
- console.error(
288
- `Error getting schemas for Trino connection ${connection.name}:`,
289
- error,
290
- );
291
- throw new Error(
292
- `Failed to get schemas for Trino connection ${connection.name}: ${(error as Error).message}`,
293
- );
294
- }
295
- } else if (connection.type === "duckdb") {
296
- if (!connection.duckdbConnection) {
297
- throw new Error("DuckDB connection is required");
298
329
  }
299
- try {
300
- // Use DuckDB's INFORMATION_SCHEMA.SCHEMATA to list schemas
301
- // Use DISTINCT to avoid duplicates from attached databases
302
- const result = await malloyConnection.runSQL(
303
- "SELECT DISTINCT schema_name,catalog_name FROM information_schema.schemata ORDER BY catalog_name,schema_name",
304
- { rowLimit: 1000 },
305
- );
306
330
 
307
- const rows = standardizeRunSQLResult(result);
331
+ return allRows.map(({ catalog, schema }) => {
332
+ const name = connection.trinoConnection?.catalog
333
+ ? schema
334
+ : `${catalog}.${schema}`;
335
+ return {
336
+ name,
337
+ isHidden: ["information_schema", "performance_schema"].includes(
338
+ schema,
339
+ ),
340
+ isDefault: configuredSchema ? schema === configuredSchema : false,
341
+ };
342
+ });
343
+ } catch (error) {
344
+ logger.error(
345
+ `Error getting schemas for Trino connection ${connection.name}`,
346
+ { error },
347
+ );
348
+ throw new Error(
349
+ `Failed to get schemas for Trino connection ${connection.name}: ${(error as Error).message}`,
350
+ );
351
+ }
352
+ }
308
353
 
309
- const schemas: ApiSchema[] = rows.map((row: unknown) => {
310
- const typedRow = row as Record<string, unknown>;
311
- const schemaName = typedRow.schema_name as string;
312
- const catalogName = typedRow.catalog_name as string;
354
+ async function getSchemasForDuckDB(
355
+ connection: ApiConnection,
356
+ malloyConnection: Connection,
357
+ ): Promise<ApiSchema[]> {
358
+ if (!connection.duckdbConnection) {
359
+ throw new Error("DuckDB connection is required");
360
+ }
361
+ try {
362
+ const result = await malloyConnection.runSQL(
363
+ "SELECT DISTINCT schema_name,catalog_name FROM information_schema.schemata ORDER BY catalog_name,schema_name",
364
+ { rowLimit: 1000 },
365
+ );
313
366
 
314
- return {
315
- name: `${catalogName}.${schemaName}`,
316
- isHidden:
317
- [
318
- "information_schema",
319
- "performance_schema",
320
- "",
321
- "SNOWFLAKE",
322
- "information_schema",
323
- "pg_catalog",
324
- "pg_toast",
325
- ].includes(schemaName as string) ||
326
- ["md_information_schema", "system"].includes(
327
- catalogName as string,
328
- ),
329
- isDefault: catalogName === "main",
330
- };
331
- });
367
+ const rows = standardizeRunSQLResult(result);
332
368
 
333
- const attachedDatabases =
334
- connection.duckdbConnection.attachedDatabases || [];
369
+ const schemas: ApiSchema[] = rows.map((row: unknown) => {
370
+ const typedRow = row as Record<string, unknown>;
371
+ const schemaName = String(typedRow.schema_name ?? "");
372
+ const catalogName = String(typedRow.catalog_name ?? "");
335
373
 
336
- // Process all cloud storage connections in parallel
337
- const cloudDatabases = attachedDatabases.filter(
338
- (attachedDb) =>
339
- (attachedDb.type === "gcs" || attachedDb.type === "s3") &&
340
- (attachedDb.gcsConnection || attachedDb.s3Connection),
341
- );
374
+ return {
375
+ name: `${catalogName}.${schemaName}`,
376
+ isHidden:
377
+ [
378
+ "information_schema",
379
+ "performance_schema",
380
+ "pg_catalog",
381
+ "pg_toast",
382
+ "",
383
+ ].includes(schemaName) ||
384
+ ["md_information_schema", "system"].includes(catalogName),
385
+ isDefault: catalogName === "main",
386
+ };
387
+ });
342
388
 
343
- const cloudDbPromises = cloudDatabases.map(async (attachedDb) => {
344
- const dbType = attachedDb.type as "gcs" | "s3";
345
- const credentials =
346
- dbType === "gcs"
347
- ? gcsConnectionToCredentials(attachedDb.gcsConnection!)
348
- : s3ConnectionToCredentials(attachedDb.s3Connection!);
389
+ const attachedDatabases =
390
+ connection.duckdbConnection.attachedDatabases || [];
349
391
 
350
- try {
351
- return await listCloudDirectorySchemas(credentials);
352
- } catch (cloudError) {
353
- logger.warn(
354
- `Failed to list ${dbType.toUpperCase()} directory schemas for ${attachedDb.name}`,
355
- { error: cloudError },
356
- );
357
- return [];
358
- }
359
- });
392
+ const cloudDatabases = attachedDatabases.filter(
393
+ (attachedDb) =>
394
+ (attachedDb.type === "gcs" || attachedDb.type === "s3") &&
395
+ (attachedDb.gcsConnection || attachedDb.s3Connection),
396
+ );
360
397
 
361
- const cloudSchemaArrays = await Promise.all(cloudDbPromises);
362
- for (const cloudSchemas of cloudSchemaArrays) {
363
- schemas.push(...cloudSchemas);
364
- }
398
+ const cloudDbPromises = cloudDatabases.map(async (attachedDb) => {
399
+ const dbType = attachedDb.type as "gcs" | "s3";
400
+ const credentials =
401
+ dbType === "gcs"
402
+ ? gcsConnectionToCredentials(attachedDb.gcsConnection!)
403
+ : s3ConnectionToCredentials(attachedDb.s3Connection!);
365
404
 
366
- // Add Azure ADLS attached databases as schemas (by name)
367
- const azureDatabases = attachedDatabases.filter(
368
- (attachedDb) =>
369
- attachedDb.type === "azure" && attachedDb.azureConnection,
370
- );
371
- for (const attachedDb of azureDatabases) {
372
- if (attachedDb.name) {
373
- schemas.push({
374
- name: attachedDb.name,
375
- isHidden: false,
376
- isDefault: false,
377
- });
378
- }
405
+ try {
406
+ return await listCloudDirectorySchemas(credentials);
407
+ } catch (cloudError) {
408
+ logger.warn(
409
+ `Failed to list ${dbType.toUpperCase()} directory schemas for ${attachedDb.name}`,
410
+ { error: cloudError },
411
+ );
412
+ return [];
379
413
  }
414
+ });
380
415
 
381
- return schemas;
382
- } catch (error) {
383
- console.error(
384
- `Error getting schemas for DuckDB connection ${connection.name}:`,
385
- error,
386
- );
387
- throw new Error(
388
- `Failed to get schemas for DuckDB connection ${connection.name}: ${(error as Error).message}`,
389
- );
416
+ const cloudSchemaArrays = await Promise.all(cloudDbPromises);
417
+ for (const cloudSchemas of cloudSchemaArrays) {
418
+ schemas.push(...cloudSchemas);
390
419
  }
391
- } else if (connection.type === "motherduck") {
392
- if (!connection.motherduckConnection) {
393
- throw new Error("MotherDuck connection is required");
394
- }
395
- try {
396
- // Use MotherDuck's INFORMATION_SCHEMA.SCHEMATA to list schemas
397
- const result = await malloyConnection.runSQL(
398
- "SELECT DISTINCT schema_name as row FROM information_schema.schemata ORDER BY schema_name",
399
- { rowLimit: 1000 },
400
- );
401
- const rows = standardizeRunSQLResult(result);
402
- console.log(rows);
403
- return rows.map((row: unknown) => {
404
- const typedRow = row as { row: string };
405
- return {
406
- name: typedRow.row,
407
- isHidden: [
408
- "information_schema",
409
- "performance_schema",
410
- "",
411
- ].includes(typedRow.row),
420
+
421
+ const azureDatabases = attachedDatabases.filter(
422
+ (attachedDb) =>
423
+ attachedDb.type === "azure" && attachedDb.azureConnection,
424
+ );
425
+ for (const attachedDb of azureDatabases) {
426
+ if (attachedDb.name) {
427
+ schemas.push({
428
+ name: attachedDb.name,
429
+ isHidden: false,
412
430
  isDefault: false,
413
- };
414
- });
415
- } catch (error) {
416
- console.error(
417
- `Error getting schemas for MotherDuck connection ${connection.name}:`,
418
- error,
419
- );
420
- throw new Error(
421
- `Failed to get schemas for MotherDuck connection ${connection.name}: ${(error as Error).message}`,
422
- );
431
+ });
432
+ }
423
433
  }
424
- } else if (connection.type === "ducklake") {
425
- try {
426
- // Filter by catalog_name to only get schemas from the attached DuckLake catalog
427
- // The catalog is attached with the connection name (see attachDuckLake in connection.ts)
428
- const catalogName = connection.name;
429
- const result = await malloyConnection.runSQL(
430
- `SELECT schema_name FROM information_schema.schemata WHERE catalog_name = '${catalogName}' ORDER BY schema_name`,
431
- { rowLimit: 1000 },
434
+
435
+ return schemas;
436
+ } catch (error) {
437
+ logger.error(
438
+ `Error getting schemas for DuckDB connection ${connection.name}`,
439
+ { error },
440
+ );
441
+ throw new Error(
442
+ `Failed to get schemas for DuckDB connection ${connection.name}: ${(error as Error).message}`,
443
+ );
444
+ }
445
+ }
446
+
447
+ async function getSchemasForMotherDuck(
448
+ connection: ApiConnection,
449
+ malloyConnection: Connection,
450
+ ): Promise<ApiSchema[]> {
451
+ if (!connection.motherduckConnection) {
452
+ throw new Error("MotherDuck connection is required");
453
+ }
454
+ try {
455
+ const database = connection.motherduckConnection.database;
456
+ const whereClause = database ? `WHERE catalog_name = '${database}'` : "";
457
+ const result = await malloyConnection.runSQL(
458
+ `SELECT DISTINCT schema_name FROM information_schema.schemata ${whereClause} ORDER BY schema_name`,
459
+ );
460
+ const rows = standardizeRunSQLResult(result);
461
+ return rows.map((row: unknown) => {
462
+ const typedRow = row as Record<string, unknown>;
463
+ const schemaName = String(
464
+ typedRow.schema_name ?? typedRow.SCHEMA_NAME ?? "",
432
465
  );
433
- const rows = standardizeRunSQLResult(result);
466
+ return {
467
+ name: schemaName,
468
+ isHidden: ["information_schema", "performance_schema", ""].includes(
469
+ schemaName,
470
+ ),
471
+ isDefault: schemaName === "main",
472
+ };
473
+ });
474
+ } catch (error) {
475
+ logger.error(
476
+ `Error getting schemas for MotherDuck connection ${connection.name}`,
477
+ { error },
478
+ );
479
+ throw new Error(
480
+ `Failed to get schemas for MotherDuck connection ${connection.name}: ${(error as Error).message}`,
481
+ );
482
+ }
483
+ }
434
484
 
435
- return rows.map((row: unknown) => {
436
- const typedRow = row as Record<string, unknown>;
437
- const schemaName = typedRow.schema_name as string;
485
+ async function getSchemasForDuckLake(
486
+ connection: ApiConnection,
487
+ malloyConnection: Connection,
488
+ ): Promise<ApiSchema[]> {
489
+ try {
490
+ // The catalog is attached with the connection name (see attachDuckLake in connection.ts)
491
+ const catalogName = connection.name;
492
+ const result = await malloyConnection.runSQL(
493
+ `SELECT schema_name FROM information_schema.schemata WHERE catalog_name = '${catalogName}' ORDER BY schema_name`,
494
+ { rowLimit: 1000 },
495
+ );
496
+ const rows = standardizeRunSQLResult(result);
438
497
 
439
- const shouldShow = schemaName === "main" || schemaName === "public";
498
+ return rows.map((row: unknown) => {
499
+ const typedRow = row as Record<string, unknown>;
500
+ const schemaName = typedRow.schema_name as string;
501
+ const shouldShow = schemaName === "main" || schemaName === "public";
502
+ return {
503
+ name: schemaName,
504
+ isHidden: !shouldShow,
505
+ isDefault: false,
506
+ };
507
+ });
508
+ } catch (error) {
509
+ logger.error(
510
+ `Error getting schemas for DuckLake connection ${connection.name}`,
511
+ { error },
512
+ );
513
+ throw new Error(
514
+ `Failed to get schemas for DuckLake connection ${connection.name}: ${(error as Error).message}`,
515
+ );
516
+ }
517
+ }
440
518
 
441
- return {
442
- name: schemaName,
443
- isHidden: !shouldShow,
444
- isDefault: false,
445
- };
446
- });
447
- } catch (error) {
448
- logger.error(
449
- `Error getting schemas for DuckLake connection ${connection.name}`,
450
- { error },
451
- );
452
- throw new Error(
453
- `Failed to get schemas for DuckLake connection ${connection.name}: ${(error as Error).message}`,
454
- );
455
- }
456
- } else {
457
- throw new Error(`Unsupported connection type: ${connection.type}`);
519
+ export async function getSchemasForConnection(
520
+ connection: ApiConnection,
521
+ malloyConnection: Connection,
522
+ ): Promise<ApiSchema[]> {
523
+ switch (connection.type) {
524
+ case "bigquery":
525
+ return getSchemasForBigQuery(connection);
526
+ case "postgres":
527
+ return getSchemasForPostgres(connection, malloyConnection);
528
+ case "mysql":
529
+ return getSchemasForMySQL(connection);
530
+ case "snowflake":
531
+ return getSchemasForSnowflake(connection, malloyConnection);
532
+ case "trino":
533
+ return getSchemasForTrino(connection, malloyConnection);
534
+ case "duckdb":
535
+ return getSchemasForDuckDB(connection, malloyConnection);
536
+ case "motherduck":
537
+ return getSchemasForMotherDuck(connection, malloyConnection);
538
+ case "ducklake":
539
+ return getSchemasForDuckLake(connection, malloyConnection);
540
+ default:
541
+ throw new Error(`Unsupported connection type: ${connection.type}`);
458
542
  }
459
543
  }
460
544
 
@@ -695,481 +779,428 @@ async function describeAzureFile(
695
779
  }
696
780
  }
697
781
 
698
- export async function getTablesForSchema(
782
+ export async function listTablesForSchema(
699
783
  connection: ApiConnection,
700
784
  schemaName: string,
701
785
  malloyConnection: Connection,
702
- fetchTableSchema = true,
786
+ tableNames?: string[],
703
787
  ): Promise<ApiTable[]> {
704
- // Check if schemaName matches an Azure attached database name
705
- if (connection.type === "duckdb") {
706
- const attachedDbs = connection.duckdbConnection?.attachedDatabases || [];
707
- const azureDb = attachedDbs.find(
708
- (db) =>
709
- db.type === "azure" && db.name === schemaName && db.azureConnection,
710
- );
711
- if (azureDb) {
712
- const azureConn = azureDb.azureConnection!;
713
- const fileUrl =
714
- azureConn.authType === "sas_token"
715
- ? azureConn.sasUrl
716
- : azureConn.fileUrl;
717
- if (fileUrl) {
718
- return await describeAzureFile(
719
- malloyConnection,
720
- fileUrl,
721
- azureConn,
722
- );
723
- }
724
- }
788
+ switch (connection.type) {
789
+ case "bigquery":
790
+ return listTablesForBigQuery(
791
+ connection,
792
+ schemaName,
793
+ malloyConnection,
794
+ tableNames,
795
+ );
796
+ case "mysql":
797
+ return listTablesForMySQL(
798
+ connection,
799
+ schemaName,
800
+ malloyConnection,
801
+ tableNames,
802
+ );
803
+ case "postgres":
804
+ return listTablesForPostgres(
805
+ connection,
806
+ schemaName,
807
+ malloyConnection,
808
+ tableNames,
809
+ );
810
+ case "snowflake":
811
+ return listTablesForSnowflake(
812
+ connection,
813
+ schemaName,
814
+ malloyConnection,
815
+ tableNames,
816
+ );
817
+ case "trino":
818
+ return listTablesForTrino(
819
+ connection,
820
+ schemaName,
821
+ malloyConnection,
822
+ tableNames,
823
+ );
824
+ case "duckdb":
825
+ return listTablesForDuckDB(
826
+ connection,
827
+ schemaName,
828
+ malloyConnection,
829
+ tableNames,
830
+ );
831
+ case "motherduck":
832
+ return listTablesForMotherDuck(
833
+ connection,
834
+ schemaName,
835
+ malloyConnection,
836
+ tableNames,
837
+ );
838
+ case "ducklake":
839
+ return listTablesForDuckLake(
840
+ connection,
841
+ schemaName,
842
+ malloyConnection,
843
+ tableNames,
844
+ );
845
+ default:
846
+ throw new Error(`Unsupported connection type: ${connection.type}`);
725
847
  }
848
+ }
726
849
 
727
- // Check if this is an Azure ADLS file path (abfss:// or HTTPS SAS URL)
850
+ /**
851
+ * BigQuery: list tables via API client, then fetch each table's schema
852
+ * individually since BigQuery's INFORMATION_SCHEMA is region-scoped.
853
+ */
854
+ async function listTablesForBigQuery(
855
+ connection: ApiConnection,
856
+ schemaName: string,
857
+ malloyConnection: Connection,
858
+ tableNames?: string[],
859
+ ): Promise<ApiTable[]> {
860
+ try {
861
+ const bigquery = createBigQueryClient(connection);
862
+ const dataset = bigquery.dataset(schemaName);
863
+ const [tables] = await dataset.getTables();
864
+
865
+ let names = tables
866
+ .map((table) => table.id)
867
+ .filter((id): id is string => id !== undefined);
868
+ if (tableNames) {
869
+ const allowed = new Set(tableNames);
870
+ names = names.filter((id) => allowed.has(id));
871
+ }
872
+
873
+ const results = await Promise.all(
874
+ names.map(async (tableName) => {
875
+ const tablePath = `${schemaName}.${tableName}`;
876
+ try {
877
+ const source = await (
878
+ malloyConnection as Connection & {
879
+ fetchTableSchema: (
880
+ tableKey: string,
881
+ tablePath: string,
882
+ ) => Promise<TableSourceDef | undefined>;
883
+ }
884
+ ).fetchTableSchema(tableName, tablePath);
885
+ const columns =
886
+ source?.fields?.map((field) => ({
887
+ name: field.name,
888
+ type: field.type,
889
+ })) || [];
890
+ return { resource: tablePath, columns };
891
+ } catch (error) {
892
+ logger.warn(`Failed to get schema for table ${tableName}`, {
893
+ error: extractErrorDataFromError(error),
894
+ schemaName,
895
+ tableName,
896
+ });
897
+ return { resource: tablePath, columns: [] };
898
+ }
899
+ }),
900
+ );
901
+ return results;
902
+ } catch (error) {
903
+ logger.error(
904
+ `Error getting tables for BigQuery schema ${schemaName} in connection ${connection.name}`,
905
+ { error },
906
+ );
907
+ throw new Error(
908
+ `Failed to get tables for BigQuery schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
909
+ );
910
+ }
911
+ }
912
+
913
+ async function listTablesForMySQL(
914
+ connection: ApiConnection,
915
+ schemaName: string,
916
+ malloyConnection: Connection,
917
+ tableNames?: string[],
918
+ ): Promise<ApiTable[]> {
919
+ if (!connection.mysqlConnection) {
920
+ throw new Error("Mysql connection is required");
921
+ }
922
+ try {
923
+ const result = await malloyConnection.runSQL(
924
+ `SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE FROM information_schema.columns WHERE table_schema = '${schemaName}' ${sqlInFilter("TABLE_NAME", tableNames)} ORDER BY TABLE_NAME, ORDINAL_POSITION`,
925
+ );
926
+ const rows = standardizeRunSQLResult(result);
927
+ return groupColumnRowsIntoTables(rows, (t) => `${schemaName}.${t}`);
928
+ } catch (error) {
929
+ logger.error(
930
+ `Error getting tables for MySQL schema ${schemaName} in connection ${connection.name}`,
931
+ { error },
932
+ );
933
+ throw new Error(
934
+ `Failed to get tables for MySQL schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
935
+ );
936
+ }
937
+ }
938
+
939
+ async function listTablesForPostgres(
940
+ connection: ApiConnection,
941
+ schemaName: string,
942
+ malloyConnection: Connection,
943
+ tableNames?: string[],
944
+ ): Promise<ApiTable[]> {
945
+ if (!connection.postgresConnection) {
946
+ throw new Error("Postgres connection is required");
947
+ }
948
+ try {
949
+ const result = await malloyConnection.runSQL(
950
+ `SELECT table_name, column_name, data_type FROM information_schema.columns WHERE table_schema = '${schemaName}' ${sqlInFilter("table_name", tableNames)} ORDER BY table_name, ordinal_position`,
951
+ );
952
+ const rows = standardizeRunSQLResult(result);
953
+ return groupColumnRowsIntoTables(rows, (t) => `${schemaName}.${t}`);
954
+ } catch (error) {
955
+ logger.error(
956
+ `Error getting tables for Postgres schema ${schemaName} in connection ${connection.name}`,
957
+ { error },
958
+ );
959
+ throw new Error(
960
+ `Failed to get tables for Postgres schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
961
+ );
962
+ }
963
+ }
964
+
965
+ async function listTablesForSnowflake(
966
+ connection: ApiConnection,
967
+ schemaName: string,
968
+ malloyConnection: Connection,
969
+ tableNames?: string[],
970
+ ): Promise<ApiTable[]> {
971
+ if (!connection.snowflakeConnection) {
972
+ throw new Error("Snowflake connection is required");
973
+ }
974
+ try {
975
+ const parts = schemaName.split(".");
976
+ let databaseName: string;
977
+ let schemaOnly: string;
978
+
979
+ if (parts.length >= 2) {
980
+ databaseName = parts[0];
981
+ schemaOnly = parts[1];
982
+ } else {
983
+ databaseName = connection.snowflakeConnection.database ?? "";
984
+ schemaOnly = parts[0];
985
+ }
986
+
987
+ if (!databaseName) {
988
+ throw new Error(
989
+ `Cannot resolve database for schema "${schemaName}": provide DATABASE.SCHEMA or configure a database on the connection`,
990
+ );
991
+ }
992
+
993
+ const qualifiedSchema = `${databaseName}.${schemaOnly}`;
994
+ const result = await malloyConnection.runSQL(
995
+ `SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE FROM ${databaseName}.INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '${schemaOnly}' ${sqlInFilter("TABLE_NAME", tableNames)} ORDER BY TABLE_NAME, ORDINAL_POSITION`,
996
+ );
997
+ const rows = standardizeRunSQLResult(result);
998
+ return groupColumnRowsIntoTables(rows, (t) => `${qualifiedSchema}.${t}`);
999
+ } catch (error) {
1000
+ logger.error(
1001
+ `Error getting tables for Snowflake schema ${schemaName} in connection ${connection.name}`,
1002
+ { error },
1003
+ );
1004
+ throw new Error(
1005
+ `Failed to get tables for Snowflake schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
1006
+ );
1007
+ }
1008
+ }
1009
+
1010
+ async function listTablesForTrino(
1011
+ connection: ApiConnection,
1012
+ schemaName: string,
1013
+ malloyConnection: Connection,
1014
+ tableNames?: string[],
1015
+ ): Promise<ApiTable[]> {
1016
+ if (!connection.trinoConnection) {
1017
+ throw new Error("Trino connection is required");
1018
+ }
1019
+ try {
1020
+ let catalogPrefix: string;
1021
+ let schemaOnly: string;
1022
+ let resourcePrefix: string;
1023
+
1024
+ if (connection.trinoConnection.catalog) {
1025
+ catalogPrefix = `${connection.trinoConnection.catalog}.`;
1026
+ schemaOnly = schemaName;
1027
+ resourcePrefix = `${connection.trinoConnection.catalog}.${schemaName}`;
1028
+ } else {
1029
+ const dotIdx = schemaName.indexOf(".");
1030
+ if (dotIdx > 0) {
1031
+ catalogPrefix = `${schemaName.substring(0, dotIdx)}.`;
1032
+ schemaOnly = schemaName.substring(dotIdx + 1);
1033
+ } else {
1034
+ catalogPrefix = "";
1035
+ schemaOnly = schemaName;
1036
+ }
1037
+ resourcePrefix = schemaName;
1038
+ }
1039
+
1040
+ const result = await malloyConnection.runSQL(
1041
+ `SELECT table_name, column_name, data_type FROM ${catalogPrefix}information_schema.columns WHERE table_schema = '${schemaOnly}' ${sqlInFilter("table_name", tableNames)} ORDER BY table_name, ordinal_position`,
1042
+ );
1043
+ const rows = standardizeRunSQLResult(result);
1044
+ return groupColumnRowsIntoTables(rows, (t) => `${resourcePrefix}.${t}`);
1045
+ } catch (error) {
1046
+ logger.error(
1047
+ `Error getting tables for Trino schema ${schemaName} in connection ${connection.name}`,
1048
+ { error },
1049
+ );
1050
+ throw new Error(
1051
+ `Failed to get tables for Trino schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
1052
+ );
1053
+ }
1054
+ }
1055
+
1056
+ async function listTablesForDuckDB(
1057
+ connection: ApiConnection,
1058
+ schemaName: string,
1059
+ malloyConnection: Connection,
1060
+ tableNames?: string[],
1061
+ ): Promise<ApiTable[]> {
1062
+ if (!connection.duckdbConnection) {
1063
+ throw new Error("DuckDB connection is required");
1064
+ }
1065
+
1066
+ const attachedDbs = connection.duckdbConnection.attachedDatabases || [];
1067
+
1068
+ // Azure attached database matched by name
1069
+ const azureDb = attachedDbs.find(
1070
+ (db) =>
1071
+ db.type === "azure" && db.name === schemaName && db.azureConnection,
1072
+ );
1073
+ if (azureDb) {
1074
+ const azureConn = azureDb.azureConnection!;
1075
+ const fileUrl =
1076
+ azureConn.authType === "sas_token"
1077
+ ? azureConn.sasUrl
1078
+ : azureConn.fileUrl;
1079
+ if (fileUrl) {
1080
+ return describeAzureFile(malloyConnection, fileUrl, azureConn);
1081
+ }
1082
+ }
1083
+
1084
+ // Azure ADLS file path (abfss://, https://, az://)
728
1085
  if (
729
- connection.type === "duckdb" &&
730
- (schemaName.startsWith("abfss://") ||
731
- schemaName.startsWith("https://") ||
732
- schemaName.startsWith("az://"))
1086
+ schemaName.startsWith("abfss://") ||
1087
+ schemaName.startsWith("https://") ||
1088
+ schemaName.startsWith("az://")
733
1089
  ) {
734
- return await describeAzureFile(malloyConnection, schemaName);
1090
+ return describeAzureFile(malloyConnection, schemaName);
735
1091
  }
736
1092
 
737
- // Check if this is a cloud storage file path (gs://bucket/path/file.ext or s3://bucket/path/file.ext)
1093
+ // Cloud storage (GCS/S3)
738
1094
  const parsedUri = parseCloudUri(schemaName);
739
-
740
- if (parsedUri && connection.type === "duckdb") {
1095
+ if (parsedUri) {
741
1096
  const {
742
1097
  type: cloudType,
743
1098
  bucket: bucketName,
744
1099
  path: directoryPath,
745
1100
  } = parsedUri;
746
-
747
- const attachedDatabases =
748
- connection.duckdbConnection?.attachedDatabases || [];
749
1101
  const credentials = getCloudCredentialsFromAttachedDatabases(
750
- attachedDatabases,
1102
+ attachedDbs,
751
1103
  cloudType,
752
1104
  );
753
-
754
1105
  if (!credentials) {
755
1106
  throw new Error(
756
1107
  `${cloudType.toUpperCase()} credentials not found in attached databases`,
757
1108
  );
758
1109
  }
759
-
760
1110
  const fileKeys = await listDataFilesInDirectory(
761
1111
  credentials,
762
1112
  bucketName,
763
1113
  directoryPath,
764
1114
  );
765
-
766
- return await getCloudTablesWithColumns(
1115
+ return getCloudTablesWithColumns(
767
1116
  malloyConnection,
768
1117
  credentials,
769
1118
  bucketName,
770
1119
  fileKeys,
771
1120
  );
772
- } else if (connection.type === "ducklake") {
773
- if (schemaName.split(".").length == 2) {
774
- schemaName = `${connection.name}.${schemaName}`;
775
- } else if (schemaName.split(".").length === 1) {
776
- schemaName = `${connection.name}.${schemaName}`;
777
- }
778
1121
  }
779
- const tableNames = await listTablesForSchema(
780
- connection,
781
- schemaName,
782
- malloyConnection,
783
- );
784
1122
 
785
- // Fetch all table sources in parallel
786
- const tableSourcePromises = tableNames.map(async (tableName) => {
787
- try {
788
- let tablePath: string;
789
- if (connection.type === "trino") {
790
- if (connection.trinoConnection?.catalog) {
791
- tablePath = `${connection.trinoConnection?.catalog}.${schemaName}.${tableName}`;
792
- } else {
793
- // Catalog name is included in the schema name
794
- tablePath = `${schemaName}.${tableName}`;
795
- }
796
- } else if (connection.type === "ducklake") {
797
- // For ducklake, schemaName already includes connection name prefix from above
798
- // So tablePath should be schemaName.tableName (which is connectionName.schemaName.tableName)
799
- tablePath = `${schemaName}.${tableName}`;
800
- } else {
801
- tablePath = `${schemaName}.${tableName}`;
802
- }
803
-
804
- logger.info(
805
- `Processing table: ${tableName} in schema: ${schemaName}`,
806
- { tablePath, connectionType: connection.type },
807
- );
808
- let tableSource: ApiTableSource | undefined;
809
- if (fetchTableSchema) {
810
- tableSource = await getConnectionTableSource(
811
- malloyConnection,
812
- tableName,
813
- tablePath,
814
- );
815
- }
816
- return {
817
- resource: tablePath,
818
- columns: tableSource?.columns || [],
819
- };
820
- } catch (error) {
821
- logger.warn(`Failed to get schema for table ${tableName}`, {
822
- error: extractErrorDataFromError(error),
823
- schemaName,
824
- tableName,
825
- });
826
- // Return table without columns if schema fetch fails
827
- return {
828
- resource: `${schemaName}.${tableName}`,
829
- columns: [],
830
- };
831
- }
832
- });
833
-
834
- // Wait for all table sources to be fetched
835
- const tableResults = await Promise.all(tableSourcePromises);
1123
+ // Regular DuckDB schema query information_schema.columns
1124
+ const dotIdx = schemaName.indexOf(".");
1125
+ if (dotIdx < 0) {
1126
+ throw new Error(
1127
+ `DuckDB schema name must be qualified as "catalog.schema", got "${schemaName}"`,
1128
+ );
1129
+ }
1130
+ const catalogName = schemaName.substring(0, dotIdx);
1131
+ const actualSchemaName = schemaName.substring(dotIdx + 1);
836
1132
 
837
- return tableResults;
1133
+ try {
1134
+ const result = await malloyConnection.runSQL(
1135
+ `SELECT table_name, column_name, data_type FROM information_schema.columns WHERE table_schema = '${actualSchemaName}' AND table_catalog = '${catalogName}' ${sqlInFilter("table_name", tableNames)} ORDER BY table_name, ordinal_position`,
1136
+ );
1137
+ const rows = standardizeRunSQLResult(result);
1138
+ return groupColumnRowsIntoTables(rows, (t) => `${schemaName}.${t}`);
1139
+ } catch (error) {
1140
+ logger.error(
1141
+ `Error getting tables for DuckDB schema ${schemaName} in connection ${connection.name}`,
1142
+ { error },
1143
+ );
1144
+ throw new Error(
1145
+ `Failed to get tables for DuckDB schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
1146
+ );
1147
+ }
838
1148
  }
839
1149
 
840
- export async function getConnectionTableSource(
1150
+ async function listTablesForMotherDuck(
1151
+ connection: ApiConnection,
1152
+ schemaName: string,
841
1153
  malloyConnection: Connection,
842
- tableKey: string,
843
- tablePath: string,
844
- ): Promise<ApiTableSource> {
1154
+ tableNames?: string[],
1155
+ ): Promise<ApiTable[]> {
1156
+ if (!connection.motherduckConnection) {
1157
+ throw new Error("MotherDuck connection is required");
1158
+ }
845
1159
  try {
846
- logger.info(`Attempting to fetch table schema for: ${tablePath}`, {
847
- tableKey,
848
- tablePath,
849
- });
850
- const source = await (
851
- malloyConnection as Connection & {
852
- fetchTableSchema: (
853
- tableKey: string,
854
- tablePath: string,
855
- ) => Promise<TableSourceDef | undefined>;
856
- }
857
- ).fetchTableSchema(tableKey, tablePath);
858
- if (source === undefined) {
859
- throw new ConnectionError(
860
- `Table ${tablePath} not found: ${JSON.stringify(source)}`,
861
- );
862
- }
863
-
864
- // Validate that source has the expected structure
865
- if (!source) {
866
- throw new ConnectionError(
867
- `Invalid table source returned for ${tablePath}`,
868
- );
869
- } else if (typeof source !== "object") {
870
- throw new ConnectionError(JSON.stringify(source));
871
- }
872
-
873
- const malloyFields = (source as TableSourceDef).fields;
874
- if (!malloyFields || !Array.isArray(malloyFields)) {
875
- throw new ConnectionError(
876
- `Table ${tablePath} has no fields or invalid field structure`,
877
- );
878
- }
879
-
880
- //This is for the Trino connection. The connection will not throw an error if the table is not found.
881
- // Instead it will return an empty fields array. So we need to check for that.
882
- // But it is fine to have it for all other connections as well.
883
- if (malloyFields.length === 0) {
884
- throw new ConnectionError(`Table ${tablePath} not found`);
885
- }
886
-
887
- const fields = malloyFields.map((field) => {
888
- return {
889
- name: field.name,
890
- type: field.type,
891
- };
892
- });
893
- logger.debug(`Successfully fetched schema for ${tablePath}`, {
894
- fieldCount: fields.length,
895
- });
896
- return {
897
- source: JSON.stringify(source),
898
- resource: tablePath,
899
- columns: fields,
900
- };
1160
+ const result = await malloyConnection.runSQL(
1161
+ `SELECT table_name, column_name, data_type FROM information_schema.columns WHERE table_schema = '${schemaName}' ${sqlInFilter("table_name", tableNames)} ORDER BY table_name, ordinal_position`,
1162
+ );
1163
+ const rows = standardizeRunSQLResult(result);
1164
+ return groupColumnRowsIntoTables(rows, (t) => `${schemaName}.${t}`);
901
1165
  } catch (error) {
902
- const errorMessage =
903
- error instanceof Error
904
- ? error.message
905
- : typeof error === "string"
906
- ? error
907
- : JSON.stringify(error);
908
- logger.error("fetchTableSchema error", {
909
- error,
910
- tableKey,
911
- tablePath,
912
- });
913
- throw new ConnectionError(errorMessage);
1166
+ logger.error(
1167
+ `Error getting tables for MotherDuck schema ${schemaName} in connection ${connection.name}`,
1168
+ { error },
1169
+ );
1170
+ throw new Error(
1171
+ `Failed to get tables for MotherDuck schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
1172
+ );
914
1173
  }
915
1174
  }
916
1175
 
917
- export async function listTablesForSchema(
1176
+ async function listTablesForDuckLake(
918
1177
  connection: ApiConnection,
919
1178
  schemaName: string,
920
1179
  malloyConnection: Connection,
921
- ): Promise<string[]> {
922
- if (connection.type === "bigquery") {
923
- try {
924
- // Use BigQuery client directly for efficient table listing
925
- // This is much faster than querying all regions
926
- const bigquery = createBigQueryClient(connection);
927
- const dataset = bigquery.dataset(schemaName);
928
- const [tables] = await dataset.getTables();
929
-
930
- // Return table names, filtering out any undefined values
931
- return tables
932
- .map((table) => table.id)
933
- .filter((id): id is string => id !== undefined);
934
- } catch (error) {
935
- logger.error(
936
- `Error getting tables for BigQuery schema ${schemaName} in connection ${connection.name}`,
937
- { error },
938
- );
939
- throw new Error(
940
- `Failed to get tables for BigQuery schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
941
- );
942
- }
943
- } else if (connection.type === "mysql") {
944
- if (!connection.mysqlConnection) {
945
- throw new Error("Mysql connection is required");
946
- }
947
- try {
948
- const result = await malloyConnection.runSQL(
949
- `SELECT TABLE_NAME FROM information_schema.tables WHERE table_schema = '${schemaName}' AND table_type = 'BASE TABLE'`,
950
- );
951
- const rows = standardizeRunSQLResult(result);
952
- return rows.map((row: unknown) => {
953
- const typedRow = row as Record<string, unknown>;
954
- return typedRow.TABLE_NAME as string;
955
- });
956
- } catch (error) {
957
- logger.error(
958
- `Error getting tables for MySQL schema ${schemaName} in connection ${connection.name}`,
959
- { error },
960
- );
961
- throw new Error(
962
- `Failed to get tables for MySQL schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
963
- );
964
- }
965
- } else if (connection.type === "postgres") {
966
- if (!connection.postgresConnection) {
967
- throw new Error("Postgres connection is required");
968
- }
969
- try {
970
- const result = await malloyConnection.runSQL(
971
- `SELECT table_name as row FROM information_schema.tables WHERE table_schema = '${schemaName}' ORDER BY table_name`,
972
- );
973
- const rows = standardizeRunSQLResult(result);
974
- return rows as string[];
975
- } catch (error) {
976
- logger.error(
977
- `Error getting tables for Postgres schema ${schemaName} in connection ${connection.name}`,
978
- { error },
979
- );
980
- throw new Error(
981
- `Failed to get tables for Postgres schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
982
- );
983
- }
984
- } else if (connection.type === "snowflake") {
985
- if (!connection.snowflakeConnection) {
986
- throw new Error("Snowflake connection is required");
987
- }
988
- try {
989
- // TODO: Switch to INFORMATION_SCHEMA.TABLES and INFORMATION_SCHEMA.VIEWS, with pagination support implemented in both backend and the frontend.
990
- // Note: LIMIT 1000 is a temporary workaround to avoid pagination.
991
- const tablesResult = await malloyConnection.runSQL(
992
- `SHOW TABLES IN SCHEMA ${schemaName} LIMIT 1000`,
993
- );
994
- const viewsResult = await malloyConnection.runSQL(
995
- `SHOW VIEWS IN SCHEMA ${schemaName} LIMIT 1000`,
996
- );
997
- const tableRows = standardizeRunSQLResult(tablesResult);
998
- const viewRows = standardizeRunSQLResult(viewsResult);
999
- logger.debug("Snowflake Tables Listed", { tableRows });
1000
- logger.debug("Snowflake Views Listed", { viewRows });
1001
- const rows = [...tableRows, ...viewRows];
1002
- return rows
1003
- .map((row: unknown) => {
1004
- const typedRow = row as Record<string, unknown>;
1005
- const name = typedRow.name ?? typedRow.NAME;
1006
- return typeof name === "string" ? name : String(name);
1007
- })
1008
- .filter((id) => id.length > 0);
1009
- } catch (error) {
1010
- logger.error(
1011
- `Error getting tables for Snowflake schema ${schemaName} in connection ${connection.name}`,
1012
- { error },
1013
- );
1014
- throw new Error(
1015
- `Failed to get tables for Snowflake schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
1016
- );
1017
- }
1018
- } else if (connection.type === "trino") {
1019
- if (!connection.trinoConnection) {
1020
- throw new Error("Trino connection is required");
1021
- }
1022
- try {
1023
- let result: unknown;
1024
-
1025
- if (connection.trinoConnection?.catalog) {
1026
- result = await malloyConnection.runSQL(
1027
- `SHOW TABLES FROM ${connection.trinoConnection.catalog}.${schemaName}`,
1028
- );
1029
- } else {
1030
- // Catalog name is included in the schema name
1031
- result = await malloyConnection.runSQL(
1032
- `SHOW TABLES FROM ${schemaName}`,
1033
- );
1034
- }
1035
- const rows = standardizeRunSQLResult(result);
1036
- return rows.map((row: unknown) => {
1037
- const typedRow = row as Record<string, unknown>;
1038
- return typedRow.Table as string;
1039
- });
1040
- } catch (error) {
1041
- logger.error(
1042
- `Error getting tables for Trino schema ${schemaName} in connection ${connection.name}`,
1043
- { error },
1044
- );
1045
- throw new Error(
1046
- `Failed to get tables for Trino schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
1047
- );
1048
- }
1049
- } else if (connection.type === "duckdb") {
1050
- if (!connection.duckdbConnection) {
1051
- throw new Error("DuckDB connection is required");
1052
- }
1053
-
1054
- const parsedUri = parseCloudUri(schemaName);
1055
-
1056
- if (parsedUri) {
1057
- const {
1058
- type: cloudType,
1059
- bucket: bucketName,
1060
- path: directoryPath,
1061
- } = parsedUri;
1062
-
1063
- const attachedDatabases =
1064
- connection.duckdbConnection.attachedDatabases || [];
1065
-
1066
- const credentials = getCloudCredentialsFromAttachedDatabases(
1067
- attachedDatabases,
1068
- cloudType,
1069
- );
1070
-
1071
- if (!credentials) {
1072
- throw new Error(
1073
- `${cloudType.toUpperCase()} credentials not found in attached databases`,
1074
- );
1075
- }
1076
-
1077
- try {
1078
- const fileKeys = await listDataFilesInDirectory(
1079
- credentials,
1080
- bucketName,
1081
- directoryPath,
1082
- );
1083
- return fileKeys.map((key) => {
1084
- const lastSlash = key.lastIndexOf("/");
1085
- return lastSlash > 0 ? key.substring(lastSlash + 1) : key;
1086
- });
1087
- } catch (error) {
1088
- logger.error(
1089
- `Error listing ${cloudType.toUpperCase()} objects in ${schemaName}`,
1090
- {
1091
- error,
1092
- },
1093
- );
1094
- throw new Error(
1095
- `Failed to list files in ${schemaName}: ${(error as Error).message}`,
1096
- );
1097
- }
1098
- }
1099
-
1100
- const catalogName = schemaName.split(".")[0];
1101
- const actualSchemaName = schemaName.split(".")[1];
1102
-
1103
- // Regular DuckDB table listing
1104
- try {
1105
- const result = await malloyConnection.runSQL(
1106
- `SELECT table_name FROM information_schema.tables WHERE table_schema = '${actualSchemaName}' and table_catalog = '${catalogName}' ORDER BY table_name`,
1107
- { rowLimit: 1000 },
1108
- );
1180
+ tableNames?: string[],
1181
+ ): Promise<ApiTable[]> {
1182
+ // Prefix bare schema names with the catalog (connection) name.
1183
+ // Two-part names like "catalog.schema" are already qualified.
1184
+ if (!schemaName.includes(".")) {
1185
+ schemaName = `${connection.name}.${schemaName}`;
1186
+ }
1109
1187
 
1110
- const rows = standardizeRunSQLResult(result);
1111
- return rows.map((row: unknown) => {
1112
- const typedRow = row as Record<string, unknown>;
1113
- return typedRow.table_name as string;
1114
- });
1115
- } catch (error) {
1116
- logger.error(
1117
- `Error getting tables for DuckDB schema ${schemaName} in connection ${connection.name}`,
1118
- { error },
1119
- );
1120
- throw new Error(
1121
- `Failed to get tables for DuckDB schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
1122
- );
1123
- }
1124
- } else if (connection.type === "motherduck") {
1125
- if (!connection.motherduckConnection) {
1126
- throw new Error("MotherDuck connection is required");
1127
- }
1128
- try {
1129
- const result = await malloyConnection.runSQL(
1130
- `SELECT table_name as row FROM information_schema.tables WHERE table_schema = '${schemaName}' ORDER BY table_name`,
1131
- { rowLimit: 1000 },
1132
- );
1133
- const rows = standardizeRunSQLResult(result);
1134
- return rows.map((row: unknown) => {
1135
- const typedRow = row as { row: string };
1136
- return typedRow.row;
1137
- });
1138
- } catch (error) {
1139
- logger.error(
1140
- `Error getting tables for MotherDuck schema ${schemaName} in connection ${connection.name}`,
1141
- { error },
1142
- );
1143
- throw new Error(
1144
- `Failed to get tables for MotherDuck schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
1145
- );
1146
- }
1147
- } else if (connection.type === "ducklake") {
1148
- const catalogName = schemaName.split(".")[0];
1149
- const actualSchemaName = schemaName.split(".")[1];
1150
- console.error("catalogName", catalogName);
1151
- console.error("actualSchemaName", actualSchemaName);
1152
- try {
1153
- const result = await malloyConnection.runSQL(
1154
- `SELECT table_name FROM information_schema.tables WHERE table_schema = '${actualSchemaName}' AND table_catalog = '${catalogName}' ORDER BY table_name`,
1155
- { rowLimit: 1000 },
1156
- );
1157
- const rows = standardizeRunSQLResult(result);
1158
- return rows.map((row: unknown) => {
1159
- const typedRow = row as Record<string, unknown>;
1160
- return typedRow.table_name as string;
1161
- });
1162
- } catch (error) {
1163
- logger.error(
1164
- `Error getting tables for DuckLake schema ${schemaName} in connection ${connection.name}`,
1165
- { error },
1166
- );
1167
- throw new Error(
1168
- `Failed to get tables for DuckLake schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
1169
- );
1170
- }
1171
- } else {
1172
- throw new Error(`Unsupported connection type: ${connection.type}`);
1188
+ const catalogName = schemaName.split(".")[0];
1189
+ const actualSchemaName = schemaName.split(".")[1];
1190
+ try {
1191
+ const result = await malloyConnection.runSQL(
1192
+ `SELECT table_name, column_name, data_type FROM information_schema.columns WHERE table_schema = '${actualSchemaName}' AND table_catalog = '${catalogName}' ${sqlInFilter("table_name", tableNames)} ORDER BY table_name, ordinal_position`,
1193
+ );
1194
+ const rows = standardizeRunSQLResult(result);
1195
+ return groupColumnRowsIntoTables(rows, (t) => `${schemaName}.${t}`);
1196
+ } catch (error) {
1197
+ logger.error(
1198
+ `Error getting tables for DuckLake schema ${schemaName} in connection ${connection.name}`,
1199
+ { error },
1200
+ );
1201
+ throw new Error(
1202
+ `Failed to get tables for DuckLake schema ${schemaName} in connection ${connection.name}: ${(error as Error).message}`,
1203
+ );
1173
1204
  }
1174
1205
  }
1175
1206