@malloy-publisher/server 0.0.170 → 0.0.172

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ import { ClientSecretCredential } from "@azure/identity";
2
+ import { ContainerClient } from "@azure/storage-blob";
1
3
  import { BigQuery } from "@google-cloud/bigquery";
2
4
  import { Connection, TableSourceDef } from "@malloydata/malloy";
3
5
  import { components } from "../api";
@@ -17,6 +19,7 @@ import { ApiConnection } from "./model";
17
19
  type ApiSchema = components["schemas"]["Schema"];
18
20
  type ApiTable = components["schemas"]["Table"];
19
21
  type ApiTableSource = components["schemas"]["TableSource"];
22
+ type ApiAzureConnection = components["schemas"]["AzureConnection"];
20
23
 
21
24
  function createBigQueryClient(connection: ApiConnection): BigQuery {
22
25
  if (!connection.bigqueryConnection) {
@@ -354,6 +357,21 @@ export async function getSchemasForConnection(
354
357
  schemas.push(...cloudSchemas);
355
358
  }
356
359
 
360
+ // Add Azure ADLS attached databases as schemas (by name)
361
+ const azureDatabases = attachedDatabases.filter(
362
+ (attachedDb) =>
363
+ attachedDb.type === "azure" && attachedDb.azureConnection,
364
+ );
365
+ for (const attachedDb of azureDatabases) {
366
+ if (attachedDb.name) {
367
+ schemas.push({
368
+ name: attachedDb.name,
369
+ isHidden: false,
370
+ isDefault: false,
371
+ });
372
+ }
373
+ }
374
+
357
375
  return schemas;
358
376
  } catch (error) {
359
377
  console.error(
@@ -434,11 +452,281 @@ export async function getSchemasForConnection(
434
452
  }
435
453
  }
436
454
 
455
+ function getFileType(key: string): string {
456
+ const lowerKey = key.toLowerCase();
457
+ if (lowerKey.endsWith(".csv")) return "csv";
458
+ if (lowerKey.endsWith(".parquet")) return "parquet";
459
+ if (lowerKey.endsWith(".json")) return "json";
460
+ if (lowerKey.endsWith(".jsonl") || lowerKey.endsWith(".ndjson"))
461
+ return "jsonl";
462
+ return "unknown";
463
+ }
464
+
465
+ /**
466
+ * Lists blobs in an Azure container matching a glob-like prefix/extension filter.
467
+ * Parses an HTTPS SAS URL like:
468
+ * https://account.blob.core.windows.net/container/path/*.parquet?sasToken
469
+ * Returns individual file URLs with the SAS token appended.
470
+ */
471
+ async function listAzureBlobs(
472
+ fileUrl: string,
473
+ azureConnection?: ApiAzureConnection,
474
+ ): Promise<{ url: string; blobName: string }[]> {
475
+ // Split URL and SAS token carefully to avoid encoding issues with signatures
476
+ const queryStart = fileUrl.indexOf("?");
477
+ const baseUrl = queryStart >= 0 ? fileUrl.substring(0, queryStart) : fileUrl;
478
+ const sasToken = queryStart >= 0 ? fileUrl.substring(queryStart) : "";
479
+
480
+ // Parse the URL to extract account, container, and blob path
481
+ let accountUrl: string;
482
+ let container: string;
483
+ let blobPath: string;
484
+
485
+ if (baseUrl.startsWith("abfss://")) {
486
+ // abfss://container/path or abfss://account.dfs.core.windows.net/container/path
487
+ const withoutScheme = baseUrl.substring("abfss://".length);
488
+ const parts = withoutScheme.split("/").filter(Boolean);
489
+ if (parts[0].includes(".")) {
490
+ // Fully qualified: abfss://account.dfs.core.windows.net/container/path
491
+ const accountName = parts[0].split(".")[0];
492
+ accountUrl = `https://${accountName}.blob.core.windows.net`;
493
+ container = parts[1];
494
+ blobPath = parts.slice(2).join("/");
495
+ } else {
496
+ // Short form: abfss://container/path — need accountName from config
497
+ if (!azureConnection?.accountName) {
498
+ throw new Error(
499
+ "accountName is required to list blobs with abfss:// URLs",
500
+ );
501
+ }
502
+ accountUrl = `https://${azureConnection.accountName}.blob.core.windows.net`;
503
+ container = parts[0];
504
+ blobPath = parts.slice(1).join("/");
505
+ }
506
+ } else {
507
+ // https://account.blob.core.windows.net/container/path
508
+ const url = new URL(baseUrl);
509
+ const pathParts = url.pathname.split("/").filter(Boolean);
510
+ container = pathParts[0];
511
+ blobPath = pathParts.slice(1).join("/");
512
+ accountUrl = `${url.protocol}//${url.host}`;
513
+ }
514
+
515
+ // Three supported glob patterns:
516
+ // path/file.ext → single file (handled upstream by isAzureSingleFileUrl)
517
+ // path/*.ext → files directly in path/ with that extension (no subdirs)
518
+ // path/** → all valid data files in path/ and nested dirs (recursive)
519
+ let prefix: string;
520
+ let extensionFilter = ""; // for *.ext pattern
521
+ let recursive = true; // for ** pattern
522
+
523
+ if (blobPath.endsWith("**")) {
524
+ // Recursive listing: everything under this prefix
525
+ prefix = blobPath.slice(0, -2);
526
+ recursive = true;
527
+ } else if (blobPath.includes("*")) {
528
+ // Single-level glob: path/*.ext — files directly in that dir only
529
+ const starIndex = blobPath.indexOf("*");
530
+ prefix = blobPath.substring(0, starIndex);
531
+ extensionFilter = blobPath.substring(starIndex + 1); // e.g. ".parquet"
532
+ recursive = false;
533
+ } else {
534
+ // No glob — use blobPath as prefix (container-level listing)
535
+ prefix = blobPath;
536
+ recursive = true;
537
+ }
538
+
539
+ // Create ContainerClient with appropriate authentication
540
+ let containerClient: ContainerClient;
541
+ if (
542
+ azureConnection?.authType === "service_principal" &&
543
+ azureConnection.tenantId &&
544
+ azureConnection.clientId &&
545
+ azureConnection.clientSecret
546
+ ) {
547
+ const credential = new ClientSecretCredential(
548
+ azureConnection.tenantId,
549
+ azureConnection.clientId,
550
+ azureConnection.clientSecret,
551
+ );
552
+ containerClient = new ContainerClient(
553
+ `${accountUrl}/${container}`,
554
+ credential,
555
+ );
556
+ } else {
557
+ // SAS token auth — append token to container URL
558
+ const containerUrl = `${accountUrl}/${container}${sasToken}`;
559
+ containerClient = new ContainerClient(containerUrl);
560
+ }
561
+
562
+ const matchingFiles: { url: string; blobName: string }[] = [];
563
+ for await (const blob of containerClient.listBlobsFlat({
564
+ prefix: prefix || undefined,
565
+ })) {
566
+ if (extensionFilter && !blob.name.endsWith(extensionFilter)) continue;
567
+ // For *.ext (non-recursive): only allow files directly in prefix dir
568
+ if (!recursive) {
569
+ const nameAfterPrefix = blob.name.substring(prefix.length);
570
+ if (nameAfterPrefix.includes("/")) continue;
571
+ }
572
+ if (!isDataFile(blob.name)) continue;
573
+ // For SPN: use abfss:// URLs that DuckDB's azure extension can read
574
+ // For SAS: use https:// URLs with token appended
575
+ let url: string;
576
+ if (azureConnection?.authType === "service_principal") {
577
+ const account =
578
+ azureConnection.accountName ||
579
+ accountUrl.split("//")[1]?.split(".")[0];
580
+ url = `abfss://${account}.dfs.core.windows.net/${container}/${blob.name}`;
581
+ } else {
582
+ url = `${accountUrl}/${container}/${blob.name}${sasToken}`;
583
+ }
584
+ matchingFiles.push({ url, blobName: blob.name });
585
+ }
586
+
587
+ logger.info(
588
+ `Listed ${matchingFiles.length} matching blobs in Azure container ${container} with prefix "${prefix}"`,
589
+ );
590
+ return matchingFiles;
591
+ }
592
+
593
+ function isDataFile(key: string): boolean {
594
+ const lowerKey = key.toLowerCase();
595
+ return (
596
+ lowerKey.endsWith(".csv") ||
597
+ lowerKey.endsWith(".parquet") ||
598
+ lowerKey.endsWith(".json") ||
599
+ lowerKey.endsWith(".jsonl") ||
600
+ lowerKey.endsWith(".ndjson")
601
+ );
602
+ }
603
+
604
+ async function describeRemoteFile(
605
+ malloyConnection: Connection,
606
+ fileUri: string,
607
+ ): Promise<ApiTable> {
608
+ const pathWithoutQuery = fileUri.split("?")[0];
609
+ const fileType = getFileType(pathWithoutQuery);
610
+
611
+ let describeQuery: string;
612
+ switch (fileType) {
613
+ case "csv":
614
+ describeQuery = `DESCRIBE SELECT * FROM read_csv('${fileUri}', auto_detect=true) LIMIT 1`;
615
+ break;
616
+ case "parquet":
617
+ describeQuery = `DESCRIBE SELECT * FROM read_parquet('${fileUri}') LIMIT 1`;
618
+ break;
619
+ case "json":
620
+ describeQuery = `DESCRIBE SELECT * FROM read_json('${fileUri}', auto_detect=true) LIMIT 1`;
621
+ break;
622
+ case "jsonl":
623
+ describeQuery = `DESCRIBE SELECT * FROM read_json('${fileUri}', format='newline_delimited', auto_detect=true) LIMIT 1`;
624
+ break;
625
+ default:
626
+ logger.warn(`Unsupported file type for file: ${fileUri}`);
627
+ return { resource: fileUri, columns: [] };
628
+ }
629
+
630
+ const result = await malloyConnection.runSQL(describeQuery);
631
+ const rows = standardizeRunSQLResult(result);
632
+ const columns = rows.map((row: unknown) => {
633
+ const typedRow = row as Record<string, unknown>;
634
+ return {
635
+ name: (typedRow.column_name || typedRow.name) as string,
636
+ type: (typedRow.column_type || typedRow.type) as string,
637
+ };
638
+ });
639
+
640
+ const fileName = pathWithoutQuery.split("/").pop() || fileUri;
641
+ return { resource: fileName, columns };
642
+ }
643
+
644
+ function isAzureSingleFileUrl(fileUri: string): boolean {
645
+ const pathWithoutQuery = fileUri.split("?")[0];
646
+ // Has a glob — not a single file
647
+ if (pathWithoutQuery.includes("*")) return false;
648
+ // Ends with / — directory listing
649
+ if (pathWithoutQuery.endsWith("/")) return false;
650
+ // Check if the last path segment has a data file extension
651
+ const lastSegment = pathWithoutQuery.split("/").pop() || "";
652
+ return isDataFile(lastSegment);
653
+ }
654
+
655
+ async function describeAzureFile(
656
+ malloyConnection: Connection,
657
+ fileUri: string,
658
+ azureConnection?: ApiAzureConnection,
659
+ ): Promise<ApiTable[]> {
660
+ try {
661
+ if (isAzureSingleFileUrl(fileUri)) {
662
+ // Single file — describe directly via DuckDB
663
+ return [await describeRemoteFile(malloyConnection, fileUri)];
664
+ }
665
+
666
+ // Glob pattern or container/directory URL — list blobs via Azure SDK
667
+ const blobs = await listAzureBlobs(fileUri, azureConnection);
668
+ if (blobs.length === 0) {
669
+ return [{ resource: fileUri, columns: [] }];
670
+ }
671
+
672
+ const results = await Promise.all(
673
+ blobs.map(async ({ url, blobName }) => {
674
+ try {
675
+ const table = await describeRemoteFile(malloyConnection, url);
676
+ return { ...table, resource: blobName };
677
+ } catch (error) {
678
+ logger.warn(`Failed to describe Azure blob: ${url}`, { error });
679
+ return { resource: blobName, columns: [] } as ApiTable;
680
+ }
681
+ }),
682
+ );
683
+ return results;
684
+ } catch (error) {
685
+ logger.error(`Failed to describe Azure file: ${fileUri}`, { error });
686
+ throw new Error(
687
+ `Failed to describe Azure file: ${error instanceof Error ? error.message : String(error)}`,
688
+ );
689
+ }
690
+ }
691
+
437
692
  export async function getTablesForSchema(
438
693
  connection: ApiConnection,
439
694
  schemaName: string,
440
695
  malloyConnection: Connection,
441
696
  ): Promise<ApiTable[]> {
697
+ // Check if schemaName matches an Azure attached database name
698
+ if (connection.type === "duckdb") {
699
+ const attachedDbs = connection.duckdbConnection?.attachedDatabases || [];
700
+ const azureDb = attachedDbs.find(
701
+ (db) =>
702
+ db.type === "azure" && db.name === schemaName && db.azureConnection,
703
+ );
704
+ if (azureDb) {
705
+ const azureConn = azureDb.azureConnection!;
706
+ const fileUrl =
707
+ azureConn.authType === "sas_token"
708
+ ? azureConn.sasUrl
709
+ : azureConn.fileUrl;
710
+ if (fileUrl) {
711
+ return await describeAzureFile(
712
+ malloyConnection,
713
+ fileUrl,
714
+ azureConn,
715
+ );
716
+ }
717
+ }
718
+ }
719
+
720
+ // Check if this is an Azure ADLS file path (abfss:// or HTTPS SAS URL)
721
+ if (
722
+ connection.type === "duckdb" &&
723
+ (schemaName.startsWith("abfss://") ||
724
+ schemaName.startsWith("https://") ||
725
+ schemaName.startsWith("az://"))
726
+ ) {
727
+ return await describeAzureFile(malloyConnection, schemaName);
728
+ }
729
+
442
730
  // Check if this is a cloud storage file path (gs://bucket/path/file.ext or s3://bucket/path/file.ext)
443
731
  const parsedUri = parseCloudUri(schemaName);
444
732
 
@@ -20,6 +20,7 @@ import {
20
20
  PUBLISHER_DATA_DIR,
21
21
  } from "../constants";
22
22
  import {
23
+ BadRequestError,
23
24
  FrozenConfigError,
24
25
  PackageNotFoundError,
25
26
  ProjectNotFoundError,
@@ -31,6 +32,63 @@ import { StorageConfig, StorageManager } from "../storage/StorageManager";
31
32
  import { PackageStatus, Project } from "./project";
32
33
  type ApiProject = components["schemas"]["Project"];
33
34
 
35
+ const AZURE_SUPPORTED_SCHEMES = ["https://", "http://", "abfss://", "az://"];
36
+ const AZURE_DATA_EXTENSIONS = [
37
+ ".parquet",
38
+ ".csv",
39
+ ".json",
40
+ ".jsonl",
41
+ ".ndjson",
42
+ ];
43
+
44
+ function validateAzureUrl(url: string, fieldName: string): void {
45
+ if (!AZURE_SUPPORTED_SCHEMES.some((s) => url.startsWith(s))) {
46
+ throw new BadRequestError(
47
+ `Azure ${fieldName} must use one of: ${AZURE_SUPPORTED_SCHEMES.join(", ")}`,
48
+ );
49
+ }
50
+ const pathWithoutQuery = url.split("?")[0];
51
+ const stars = (pathWithoutQuery.match(/\*/g) || []).length;
52
+
53
+ if (stars === 0) {
54
+ const lower = pathWithoutQuery.toLowerCase();
55
+ if (!AZURE_DATA_EXTENSIONS.some((ext) => lower.endsWith(ext))) {
56
+ throw new BadRequestError(
57
+ `Azure ${fieldName}: a single-file URL must end with a data file extension (${AZURE_DATA_EXTENSIONS.join(", ")})`,
58
+ );
59
+ }
60
+ } else if (pathWithoutQuery.endsWith("**")) {
61
+ // recursive — valid
62
+ // includes all data files in the container and all subdirectories
63
+ } else {
64
+ const lastSegment = pathWithoutQuery.split("/").pop() || "";
65
+ if (stars !== 1 || !lastSegment.startsWith("*")) {
66
+ throw new BadRequestError(
67
+ `Azure ${fieldName}: only three URL patterns are supported:\n` +
68
+ ` • Single file: path/file.parquet\n` +
69
+ ` • Directory glob: path/*.ext (direct children only)\n` +
70
+ ` • Recursive: path/** (includes all data files in the container and all subdirectories)\n` +
71
+ `Multi-level globs such as "sub_dir/*/*.parquet" are not supported.`,
72
+ );
73
+ }
74
+ }
75
+ }
76
+
77
+ function validateProjectAzureUrls(project: ApiProject): void {
78
+ for (const conn of project.connections || []) {
79
+ if (conn.type !== "duckdb") continue;
80
+ for (const db of conn.duckdbConnection?.attachedDatabases || []) {
81
+ if (db.type !== "azure" || !db.azureConnection) continue;
82
+ const { authType, sasUrl, fileUrl } = db.azureConnection;
83
+ if (authType === "sas_token" && sasUrl) {
84
+ validateAzureUrl(sasUrl, `"${db.name}" sasUrl`);
85
+ } else if (authType === "service_principal" && fileUrl) {
86
+ validateAzureUrl(fileUrl, `"${db.name}" fileUrl`);
87
+ }
88
+ }
89
+ }
90
+ }
91
+
34
92
  export class ProjectStore {
35
93
  public serverRootPath: string;
36
94
  private projects: Map<string, Project> = new Map();
@@ -762,6 +820,7 @@ export class ProjectStore {
762
820
  if (this.publisherConfigIsFrozen) {
763
821
  throw new FrozenConfigError();
764
822
  }
823
+ validateProjectAzureUrls(project);
765
824
  const projectName = project.name;
766
825
  if (!projectName) {
767
826
  throw new Error("Project name is required");
@@ -1112,7 +1171,12 @@ export class ProjectStore {
1112
1171
  logger.info(
1113
1172
  `Downloading S3 directory from "${location}" to "${targetPath}"`,
1114
1173
  );
1115
- await this.downloadS3Directory(location, projectName, targetPath);
1174
+ await this.downloadS3Directory(
1175
+ location,
1176
+ projectName,
1177
+ targetPath,
1178
+ isCompressedFile,
1179
+ );
1116
1180
  return;
1117
1181
  } catch (error) {
1118
1182
  const errorData = this.extractErrorDataFromError(error);
@@ -1242,10 +1306,43 @@ export class ProjectStore {
1242
1306
  s3Path: string,
1243
1307
  projectName: string,
1244
1308
  absoluteDirPath: string,
1309
+ isCompressedFile: boolean = false,
1245
1310
  ) {
1246
1311
  const trimmedPath = s3Path.slice(5);
1247
1312
  const [bucketName, ...prefixParts] = trimmedPath.split("/");
1248
1313
  const prefix = prefixParts.join("/");
1314
+
1315
+ if (isCompressedFile) {
1316
+ // Download the single zip file
1317
+ const zipFilePath = `${absoluteDirPath}.zip`;
1318
+ await fs.promises.mkdir(path.dirname(zipFilePath), {
1319
+ recursive: true,
1320
+ });
1321
+
1322
+ const command = new GetObjectCommand({
1323
+ Bucket: bucketName,
1324
+ Key: prefix,
1325
+ });
1326
+ const item = await this.s3Client.send(command);
1327
+ if (!item.Body) {
1328
+ throw new ProjectNotFoundError(
1329
+ `Project ${projectName} not found in ${s3Path}`,
1330
+ );
1331
+ }
1332
+ const file = fs.createWriteStream(zipFilePath);
1333
+ item.Body.transformToWebStream().pipeTo(Writable.toWeb(file));
1334
+ await new Promise<void>((resolve, reject) => {
1335
+ file.on("error", reject);
1336
+ file.on("finish", resolve);
1337
+ });
1338
+
1339
+ // Extract the zip file
1340
+ await this.unzipProject(zipFilePath);
1341
+ logger.info(`Downloaded S3 zip file ${s3Path} to ${absoluteDirPath}`);
1342
+ return;
1343
+ }
1344
+
1345
+ // Original behavior: download directory contents
1249
1346
  const objects = await this.s3Client.listObjectsV2({
1250
1347
  Bucket: bucketName,
1251
1348
  Prefix: prefix,
@@ -1291,6 +1388,7 @@ export class ProjectStore {
1291
1388
  });
1292
1389
  }),
1293
1390
  );
1391
+ logger.info(`Downloaded S3 directory ${s3Path} to ${absoluteDirPath}`);
1294
1392
  }
1295
1393
 
1296
1394
  private parseGitHubUrl(
@@ -1 +0,0 @@
1
- import{l as o,j as r,r as a}from"./index-DUmIwFQ_.js";function s(){const t=o();return r.jsx(a,{onClickProject:t})}export{s as default};
@@ -1 +0,0 @@
1
- import{t as n,j as e,D as i,E as t,F as c}from"./index-DUmIwFQ_.js";function o(){const a=n(),r=a["*"];if(!a.projectName)return e.jsx("div",{children:e.jsx("h2",{children:"Missing project name"})});if(!a.packageName)return e.jsx("div",{children:e.jsx("h2",{children:"Missing package name"})});const s=i({projectName:a.projectName,packageName:a.packageName,modelPath:r});return r?.endsWith(".malloy")?e.jsx(t,{resourceUri:s,runOnDemand:!0,maxResultSize:512*1024}):r?.endsWith(".malloynb")?e.jsx(c,{resourceUri:s,maxResultSize:1024*1024}):e.jsx("div",{children:e.jsxs("h2",{children:["Unrecognized file type: ",r]})})}export{o as default};
@@ -1 +0,0 @@
1
- import{l as a,t as n,j as e,D as c,H as o}from"./index-DUmIwFQ_.js";function j(){const r=a(),{projectName:s}=n();if(s){const t=c({projectName:s});return e.jsx(o,{onSelectPackage:r,resourceUri:t})}else return e.jsx("div",{children:e.jsx("h2",{children:"Missing project name"})})}export{j as default};
@@ -1 +0,0 @@
1
- import{t as a,j as e,D as t,K as c}from"./index-DUmIwFQ_.js";function d(){const{workspace:r,workbookPath:s,projectName:i,packageName:n}=a();if(r)if(s)if(i)if(n){const o=t({projectName:i,packageName:n});return e.jsx(c,{workbookPath:{path:s,workspace:r},resourceUri:o},`${s}`)}else return e.jsx("div",{children:e.jsx("h2",{children:"Missing package name"})});else return e.jsx("div",{children:e.jsx("h2",{children:"Missing project name"})});else return e.jsx("div",{children:e.jsx("h2",{children:"Missing workbook path"})});else return e.jsx("div",{children:e.jsx("h2",{children:"Missing workspace"})})}export{d as default};