@malloy-publisher/server 0.0.171 → 0.0.173

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,71 @@ type ApiTable = components["schemas"]["Table"];
20
20
  type ApiQueryData = components["schemas"]["QueryData"];
21
21
  type ApiTemporaryTable = components["schemas"]["TemporaryTable"];
22
22
  type ApiSchema = components["schemas"]["Schema"];
23
+ const AZURE_SUPPORTED_SCHEMES = ["https://", "http://", "abfss://", "az://"];
24
+ const AZURE_DATA_EXTENSIONS = [
25
+ ".parquet",
26
+ ".csv",
27
+ ".json",
28
+ ".jsonl",
29
+ ".ndjson",
30
+ ];
31
+
32
+ /**
33
+ * Validates an Azure URL against the three supported patterns:
34
+ * 1. Single file: path/file.parquet
35
+ * 2. Directory glob: path/*.ext (direct children only, no sub-dirs)
36
+ * 3. Recursive: path/** (all data files recursively)
37
+ */
38
+ function validateAzureUrl(url: string, fieldName: string): void {
39
+ if (!AZURE_SUPPORTED_SCHEMES.some((s) => url.startsWith(s))) {
40
+ throw new BadRequestError(
41
+ `Azure ${fieldName} must use one of: ${AZURE_SUPPORTED_SCHEMES.join(", ")}`,
42
+ );
43
+ }
44
+
45
+ const pathWithoutQuery = url.split("?")[0];
46
+ const stars = (pathWithoutQuery.match(/\*/g) || []).length;
47
+
48
+ if (stars === 0) {
49
+ // Single file — must end with a data extension
50
+ const lower = pathWithoutQuery.toLowerCase();
51
+ if (!AZURE_DATA_EXTENSIONS.some((ext) => lower.endsWith(ext))) {
52
+ throw new BadRequestError(
53
+ `Azure ${fieldName}: a single-file URL must end with a data file extension (${AZURE_DATA_EXTENSIONS.join(", ")})`,
54
+ );
55
+ }
56
+ } else if (pathWithoutQuery.endsWith("**")) {
57
+ // Recursive — valid, no further checks needed
58
+ } else {
59
+ // Must be exactly path/*.ext — one star, in the last path segment only
60
+ const lastSegment = pathWithoutQuery.split("/").pop() || "";
61
+ if (stars !== 1 || !lastSegment.startsWith("*")) {
62
+ throw new BadRequestError(
63
+ `Azure ${fieldName}: only three URL patterns are supported:\n` +
64
+ ` • Single file: path/file.parquet\n` +
65
+ ` • Directory glob: path/*.ext (direct children only)\n` +
66
+ ` • Recursive: path/** (all data files in subtree)\n` +
67
+ `Multi-level globs such as "sub_dir/*/*.parquet" are not supported.`,
68
+ );
69
+ }
70
+ }
71
+ }
72
+
73
+ function validateAzureAttachedDatabases(connectionConfig: ApiConnection): void {
74
+ if (connectionConfig.type !== "duckdb") return;
75
+ const attachedDbs =
76
+ connectionConfig.duckdbConnection?.attachedDatabases || [];
77
+ for (const db of attachedDbs) {
78
+ if (db.type !== "azure" || !db.azureConnection) continue;
79
+ const { authType, sasUrl, fileUrl } = db.azureConnection;
80
+ if (authType === "sas_token" && sasUrl) {
81
+ validateAzureUrl(sasUrl, `"${db.name}" sasUrl`);
82
+ } else if (authType === "service_principal" && fileUrl) {
83
+ validateAzureUrl(fileUrl, `"${db.name}" fileUrl`);
84
+ }
85
+ }
86
+ }
87
+
23
88
  export class ConnectionController {
24
89
  private projectStore: ProjectStore;
25
90
  private connectionService: ConnectionService;
@@ -162,7 +227,9 @@ export class ConnectionController {
162
227
  projectName,
163
228
  connectionName,
164
229
  );
165
- const connection = await this.getConnection(projectName, connectionName);
230
+ // Use getApiConnection to get the unwrapped ApiConnection config, consistent with listSchemas and listTables.
231
+ const project = await this.projectStore.getProject(projectName, false);
232
+ const connection = project.getApiConnection(connectionName);
166
233
 
167
234
  if (connection.type === "ducklake") {
168
235
  if (tablePath.split(".").length === 1) {
@@ -178,6 +245,53 @@ export class ConnectionController {
178
245
  // If tablePath already has 3+ parts or starts with connection name, use as-is
179
246
  }
180
247
 
248
+ // Check if this is an Azure attached database
249
+ if (connection.type === "duckdb") {
250
+ const attachedDbs =
251
+ connection.duckdbConnection?.attachedDatabases || [];
252
+ const azureDb = attachedDbs.find(
253
+ (db) =>
254
+ db.type === "azure" &&
255
+ db.name === schemaName &&
256
+ db.azureConnection,
257
+ );
258
+ if (azureDb && azureDb.azureConnection) {
259
+ // Reconstruct the full SAS URL for the specific file
260
+ const azureConn = azureDb.azureConnection;
261
+ const baseUrl =
262
+ azureConn.authType === "sas_token"
263
+ ? azureConn.sasUrl
264
+ : azureConn.fileUrl;
265
+ if (baseUrl) {
266
+ // Extract the file name from tablePath (e.g., "a.aircraft.parquet" -> "aircraft.parquet")
267
+ const fileName = tablePath.includes(".")
268
+ ? tablePath.split(".").slice(1).join(".")
269
+ : tablePath;
270
+ // Replace the file portion in the base URL with the specific file name
271
+ const urlParts = baseUrl.split("?");
272
+ const basePath = urlParts[0];
273
+ const queryString = urlParts[1] ? `?${urlParts[1]}` : "";
274
+ // Replace the last path segment (or glob) with the actual file name
275
+ const dirPath = basePath.substring(
276
+ 0,
277
+ basePath.lastIndexOf("/") + 1,
278
+ );
279
+ const fullFileUrl = `${dirPath}${fileName}${queryString}`;
280
+
281
+ const tableSource = await getConnectionTableSource(
282
+ malloyConnection,
283
+ fileName,
284
+ fullFileUrl,
285
+ );
286
+ return {
287
+ resource: tablePath,
288
+ columns: tableSource.columns,
289
+ source: tableSource.source,
290
+ };
291
+ }
292
+ }
293
+ }
294
+
181
295
  const tableKey = tablePath.split(".").pop();
182
296
  if (!tableKey) {
183
297
  throw new Error(`Invalid tablePath: ${tablePath}`);
@@ -254,6 +368,16 @@ export class ConnectionController {
254
368
  public async testConnectionConfiguration(
255
369
  connectionConfig: ApiConnection,
256
370
  ): Promise<ApiConnectionStatus> {
371
+ if (
372
+ connectionConfig &&
373
+ "config" in connectionConfig &&
374
+ typeof (connectionConfig as Record<string, unknown>).config ===
375
+ "object"
376
+ ) {
377
+ connectionConfig = (connectionConfig as Record<string, unknown>)
378
+ .config as ApiConnection;
379
+ }
380
+
257
381
  if (
258
382
  !connectionConfig ||
259
383
  typeof connectionConfig !== "object" ||
@@ -297,6 +421,8 @@ export class ConnectionController {
297
421
  throw new BadRequestError("Connection type is required");
298
422
  }
299
423
 
424
+ validateAzureAttachedDatabases(connectionConfig);
425
+
300
426
  logger.info(
301
427
  `Creating connection "${connectionName}" in project "${projectName}"`,
302
428
  );
@@ -321,6 +447,8 @@ export class ConnectionController {
321
447
  throw new BadRequestError("Connection payload is required");
322
448
  }
323
449
 
450
+ validateAzureAttachedDatabases(connection as ApiConnection);
451
+
324
452
  logger.info(
325
453
  `Updating connection "${connectionName}" in project "${projectName}"`,
326
454
  );
@@ -22,8 +22,8 @@ export class WatchModeController {
22
22
  public getWatchStatus: Handler<void, WatchStatusRes> = async (_req, res) => {
23
23
  return res.json({
24
24
  enabled: !!this.watchingPath,
25
- watchingPath: this.watchingPath,
26
- projectName: this.watchingProjectName ?? undefined,
25
+ watchingPath: this.watchingPath ?? "",
26
+ projectName: this.watchingProjectName ?? "",
27
27
  });
28
28
  };
29
29
 
@@ -589,6 +589,78 @@ async function attachCloudStorage(
589
589
  logger.info(`${storageType} connection configured for: ${attachedDb.name}`);
590
590
  }
591
591
 
592
+ async function attachAzureStorage(
593
+ connection: DuckDBConnection,
594
+ attachedDb: AttachedDatabase,
595
+ ): Promise<void> {
596
+ if (!attachedDb.azureConnection) {
597
+ throw new Error(
598
+ `Azure connection configuration missing for: ${attachedDb.name}`,
599
+ );
600
+ }
601
+
602
+ const config = attachedDb.azureConnection;
603
+
604
+ // Extensions are loaded once in attachDatabasesToDuckDB before the loop
605
+ const secretName = sanitizeSecretName(`azure_${attachedDb.name}`);
606
+
607
+ let createSecretCommand: string;
608
+
609
+ if (config.authType === "service_principal") {
610
+ if (
611
+ !config.tenantId ||
612
+ !config.clientId ||
613
+ !config.clientSecret ||
614
+ !config.accountName
615
+ ) {
616
+ throw new Error(
617
+ `Azure SPN auth requires tenantId, clientId, clientSecret, and accountName for: ${attachedDb.name}`,
618
+ );
619
+ }
620
+
621
+ const escapedTenantId = escapeSQL(config.tenantId);
622
+ const escapedClientId = escapeSQL(config.clientId);
623
+ const escapedClientSecret = escapeSQL(config.clientSecret);
624
+ const escapedAccountName = escapeSQL(config.accountName);
625
+
626
+ createSecretCommand = `
627
+ CREATE OR REPLACE SECRET ${secretName} (
628
+ TYPE azure,
629
+ PROVIDER service_principal,
630
+ TENANT_ID '${escapedTenantId}',
631
+ CLIENT_ID '${escapedClientId}',
632
+ CLIENT_SECRET '${escapedClientSecret}',
633
+ ACCOUNT_NAME '${escapedAccountName}'
634
+ );
635
+ `;
636
+ } else if (config.authType === "sas_token") {
637
+ if (!config.sasUrl) {
638
+ throw new Error(
639
+ `Azure SAS token auth requires sasUrl for: ${attachedDb.name}`,
640
+ );
641
+ }
642
+
643
+ // For SAS token auth, DuckDB can read the HTTPS SAS URL directly via httpfs.
644
+ // No Azure secret is needed — just ensure httpfs is loaded.
645
+ logger.info(
646
+ `Azure SAS token configured for: ${attachedDb.name} (no secret needed, using direct URL)`,
647
+ );
648
+ return;
649
+ } else {
650
+ throw new Error(
651
+ `Unsupported Azure auth type: ${config.authType} for: ${attachedDb.name}`,
652
+ );
653
+ }
654
+
655
+ if (await doesSecretExistInDuckDB(connection, secretName)) {
656
+ await connection.runSQL(`DETACH ${attachedDb.name};`).catch(() => {});
657
+ }
658
+ await connection.runSQL(createSecretCommand);
659
+
660
+ logger.info(`Created Azure secret: ${secretName}`);
661
+ logger.info(`Azure ADLS connection configured for: ${attachedDb.name}`);
662
+ }
663
+
592
664
  async function doesSecretExistInDuckDB(
593
665
  connection: DuckDBConnection,
594
666
  secretName: string,
@@ -614,8 +686,16 @@ async function attachDatabasesToDuckDB(
614
686
  postgres: attachPostgres,
615
687
  gcs: attachCloudStorage,
616
688
  s3: attachCloudStorage,
689
+ azure: attachAzureStorage,
617
690
  };
618
691
 
692
+ // Pre-load extensions needed by any attached database type, once per connection
693
+ const hasAzure = attachedDatabases.some((db) => db.type === "azure");
694
+ if (hasAzure) {
695
+ await installAndLoadExtension(duckdbConnection, "azure");
696
+ await installAndLoadExtension(duckdbConnection, "httpfs");
697
+ }
698
+
619
699
  for (const attachedDb of attachedDatabases) {
620
700
  try {
621
701
  // Check if already attached
@@ -650,6 +730,106 @@ async function attachDatabasesToDuckDB(
650
730
  }
651
731
  }
652
732
 
733
+ type ApiAzureConnection = components["schemas"]["AzureConnection"];
734
+
735
+ /**
736
+ * Builds the actual Azure URL for a blob given an AzureConnection config.
737
+ * Strips any glob pattern from the base URL and appends the blob name.
738
+ */
739
+ function buildAzureFileUrl(
740
+ azureConn: ApiAzureConnection,
741
+ blobName: string,
742
+ ): string {
743
+ if (azureConn.authType === "sas_token" && azureConn.sasUrl) {
744
+ const qIdx = azureConn.sasUrl.indexOf("?");
745
+ const baseUrl =
746
+ qIdx >= 0 ? azureConn.sasUrl.substring(0, qIdx) : azureConn.sasUrl;
747
+ const token = qIdx >= 0 ? azureConn.sasUrl.substring(qIdx) : "";
748
+ // Single file URL — replace the filename portion so we don't double-append
749
+ if (/\.(parquet|csv|json|jsonl|ndjson)$/i.test(baseUrl)) {
750
+ const dir = baseUrl.replace(/\/[^/]+$/, "");
751
+ return `${dir}/${blobName}${token}`;
752
+ }
753
+ // Glob or directory — strip trailing glob/slash and append blobName
754
+ const cleanBase = baseUrl.replace(/\/\*[^/]*$/, "").replace(/\/+$/, "");
755
+ return `${cleanBase}/${blobName}${token}`;
756
+ } else if (azureConn.authType === "service_principal" && azureConn.fileUrl) {
757
+ const url = azureConn.fileUrl;
758
+ // Single file URL (ends with a data file extension) — replace the
759
+ // filename with blobName so we don't double-append
760
+ if (/\.(parquet|csv|json|jsonl|ndjson)$/i.test(url)) {
761
+ return url.replace(/\/[^/]+$/, `/${blobName}`);
762
+ }
763
+ // Glob or directory — strip glob pattern and trailing slash, append blobName
764
+ const base = url.replace(/\*[^/]*$/, "").replace(/\/+$/, "");
765
+ return `${base}/${blobName}`;
766
+ }
767
+ throw new Error(
768
+ `Cannot build Azure file URL: missing sasUrl or fileUrl in config`,
769
+ );
770
+ }
771
+
772
+ /**
773
+ * Extends DuckDBConnection to resolve Azure attached-database table paths.
774
+ * When Malloy compiles di.table('azure_schema.blob_name'), the path
775
+ * 'azure_schema.blob_name' is passed to fetchTableSchema. This override
776
+ * detects that prefix, constructs the real Azure URL, and forwards it to
777
+ * DuckDB so the azure/httpfs extension can read the file.
778
+ */
779
+ class AzureDuckDBConnection extends DuckDBConnection {
780
+ private azureDatabases: AttachedDatabase[];
781
+
782
+ constructor(
783
+ connectionName: string,
784
+ databasePath: string,
785
+ workingDirectory: string,
786
+ azureDatabases: AttachedDatabase[],
787
+ ) {
788
+ super(connectionName, databasePath, workingDirectory);
789
+ this.azureDatabases = azureDatabases;
790
+ }
791
+
792
+ async fetchTableSchema(
793
+ tableKey: string,
794
+ tablePath: string,
795
+ ): Promise<TableSourceDef> {
796
+ const dotIdx = tablePath.indexOf(".");
797
+ if (dotIdx > 0) {
798
+ const schemaName = tablePath.substring(0, dotIdx);
799
+ const blobName = tablePath.substring(dotIdx + 1);
800
+
801
+ const azureDb = this.azureDatabases.find(
802
+ (db) =>
803
+ db.type === "azure" &&
804
+ db.name === schemaName &&
805
+ db.azureConnection,
806
+ );
807
+
808
+ if (azureDb) {
809
+ const azureUrl = buildAzureFileUrl(
810
+ azureDb.azureConnection!,
811
+ blobName,
812
+ );
813
+ logger.debug("Resolved Azure table path", {
814
+ original: tablePath,
815
+ resolved: azureUrl,
816
+ });
817
+ const result = await super.fetchTableSchema(tableKey, azureUrl);
818
+ if (!result) {
819
+ throw new Error(`Azure file not found: ${azureUrl}`);
820
+ }
821
+ return result;
822
+ }
823
+ }
824
+
825
+ const result = await super.fetchTableSchema(tableKey, tablePath);
826
+ if (!result) {
827
+ throw new Error(`Table ${tablePath} not found`);
828
+ }
829
+ return result;
830
+ }
831
+ }
832
+
653
833
  class DuckLakeConnection extends DuckDBConnection {
654
834
  private connectionName: string;
655
835
 
@@ -968,21 +1148,29 @@ export async function createProjectConnections(
968
1148
  // Create DuckDB connection with project basePath as working directory
969
1149
  // This ensures relative paths in the project are resolved correctly
970
1150
  // Use unique memory database path to prevent sharing across connections
971
- const duckdbConnection = new DuckDBConnection(
972
- connection.name,
973
- path.join(projectPath, `${connection.name}.duckdb`),
974
- projectPath,
1151
+ const attachedDatabases =
1152
+ connection.duckdbConnection.attachedDatabases ?? [];
1153
+ const hasAzureAttached = attachedDatabases.some(
1154
+ (db) => db.type === "azure",
975
1155
  );
1156
+ const duckdbConnection = hasAzureAttached
1157
+ ? new AzureDuckDBConnection(
1158
+ connection.name,
1159
+ path.join(projectPath, `${connection.name}.duckdb`),
1160
+ projectPath,
1161
+ attachedDatabases,
1162
+ )
1163
+ : new DuckDBConnection(
1164
+ connection.name,
1165
+ path.join(projectPath, `${connection.name}.duckdb`),
1166
+ projectPath,
1167
+ );
976
1168
 
977
1169
  // Attach databases if configured
978
- if (
979
- connection.duckdbConnection.attachedDatabases &&
980
- Array.isArray(connection.duckdbConnection.attachedDatabases) &&
981
- connection.duckdbConnection.attachedDatabases.length > 0
982
- ) {
1170
+ if (attachedDatabases.length > 0) {
983
1171
  await attachDatabasesToDuckDB(
984
1172
  duckdbConnection,
985
- connection.duckdbConnection.attachedDatabases,
1173
+ attachedDatabases,
986
1174
  );
987
1175
  }
988
1176
 
@@ -1088,6 +1276,7 @@ function getConnectionAttributes(
1088
1276
  };
1089
1277
  }
1090
1278
 
1279
+ // TODO: Re-write these tests to validate based on credentials provided in the connection config
1091
1280
  async function testDuckDBConnection(
1092
1281
  duckdbConnection: DuckDBConnection,
1093
1282
  connectionConfig: InternalConnection,
@@ -1153,10 +1342,16 @@ async function testDuckDBConnection(
1153
1342
  );
1154
1343
  break;
1155
1344
  }
1156
- case "gcs":
1345
+ case "gcs": {
1346
+ await duckdbConnection.runSQL(
1347
+ `SELECT name FROM duckdb_secrets() WHERE name LIKE '%${attachedDb.name}%' LIMIT 1`,
1348
+ );
1349
+ logger.info(
1350
+ `Cloud storage credentials test passed: ${attachedDb.name}`,
1351
+ );
1352
+ break;
1353
+ }
1157
1354
  case "s3": {
1158
- // For cloud storage, verify the secret was created
1159
- // Cloud storage doesn't attach as a database, it uses secrets for auth
1160
1355
  await duckdbConnection.runSQL(
1161
1356
  `SELECT name FROM duckdb_secrets() WHERE name LIKE '%${attachedDb.name}%' LIMIT 1`,
1162
1357
  );
@@ -1165,6 +1360,28 @@ async function testDuckDBConnection(
1165
1360
  );
1166
1361
  break;
1167
1362
  }
1363
+ case "azure": {
1364
+ const azureConfig = attachedDb.azureConnection;
1365
+ if (azureConfig?.authType === "sas_token") {
1366
+ // SAS token is embedded in the URL — no DuckDB secret is created.
1367
+ if (!azureConfig.sasUrl) {
1368
+ throw new Error(
1369
+ `Azure SAS token URL is missing for: ${attachedDb.name}`,
1370
+ );
1371
+ }
1372
+ logger.info(
1373
+ `Azure SAS token URL present for: ${attachedDb.name}`,
1374
+ );
1375
+ } else {
1376
+ await duckdbConnection.runSQL(
1377
+ `SELECT name FROM duckdb_secrets() WHERE name LIKE '%${attachedDb.name}%' LIMIT 1`,
1378
+ );
1379
+ logger.info(
1380
+ `Azure SPN credentials test passed: ${attachedDb.name}`,
1381
+ );
1382
+ }
1383
+ break;
1384
+ }
1168
1385
  default: {
1169
1386
  logger.warn(
1170
1387
  `Unknown attached database type: ${attachedDb.type}`,