@malloy-publisher/server 0.0.171 → 0.0.173

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ import { ClientSecretCredential } from "@azure/identity";
2
+ import { ContainerClient } from "@azure/storage-blob";
1
3
  import { BigQuery } from "@google-cloud/bigquery";
2
4
  import { Connection, TableSourceDef } from "@malloydata/malloy";
3
5
  import { components } from "../api";
@@ -17,6 +19,7 @@ import { ApiConnection } from "./model";
17
19
  type ApiSchema = components["schemas"]["Schema"];
18
20
  type ApiTable = components["schemas"]["Table"];
19
21
  type ApiTableSource = components["schemas"]["TableSource"];
22
+ type ApiAzureConnection = components["schemas"]["AzureConnection"];
20
23
 
21
24
  function createBigQueryClient(connection: ApiConnection): BigQuery {
22
25
  if (!connection.bigqueryConnection) {
@@ -354,6 +357,21 @@ export async function getSchemasForConnection(
354
357
  schemas.push(...cloudSchemas);
355
358
  }
356
359
 
360
+ // Add Azure ADLS attached databases as schemas (by name)
361
+ const azureDatabases = attachedDatabases.filter(
362
+ (attachedDb) =>
363
+ attachedDb.type === "azure" && attachedDb.azureConnection,
364
+ );
365
+ for (const attachedDb of azureDatabases) {
366
+ if (attachedDb.name) {
367
+ schemas.push({
368
+ name: attachedDb.name,
369
+ isHidden: false,
370
+ isDefault: false,
371
+ });
372
+ }
373
+ }
374
+
357
375
  return schemas;
358
376
  } catch (error) {
359
377
  console.error(
@@ -434,11 +452,281 @@ export async function getSchemasForConnection(
434
452
  }
435
453
  }
436
454
 
455
+ function getFileType(key: string): string {
456
+ const lowerKey = key.toLowerCase();
457
+ if (lowerKey.endsWith(".csv")) return "csv";
458
+ if (lowerKey.endsWith(".parquet")) return "parquet";
459
+ if (lowerKey.endsWith(".json")) return "json";
460
+ if (lowerKey.endsWith(".jsonl") || lowerKey.endsWith(".ndjson"))
461
+ return "jsonl";
462
+ return "unknown";
463
+ }
464
+
465
+ /**
466
+ * Lists blobs in an Azure container matching a glob-like prefix/extension filter.
467
+ * Parses an HTTPS SAS URL like:
468
+ * https://account.blob.core.windows.net/container/path/*.parquet?sasToken
469
+ * Returns individual file URLs with the SAS token appended.
470
+ */
471
+ async function listAzureBlobs(
472
+ fileUrl: string,
473
+ azureConnection?: ApiAzureConnection,
474
+ ): Promise<{ url: string; blobName: string }[]> {
475
+ // Split URL and SAS token carefully to avoid encoding issues with signatures
476
+ const queryStart = fileUrl.indexOf("?");
477
+ const baseUrl = queryStart >= 0 ? fileUrl.substring(0, queryStart) : fileUrl;
478
+ const sasToken = queryStart >= 0 ? fileUrl.substring(queryStart) : "";
479
+
480
+ // Parse the URL to extract account, container, and blob path
481
+ let accountUrl: string;
482
+ let container: string;
483
+ let blobPath: string;
484
+
485
+ if (baseUrl.startsWith("abfss://")) {
486
+ // abfss://container/path or abfss://account.dfs.core.windows.net/container/path
487
+ const withoutScheme = baseUrl.substring("abfss://".length);
488
+ const parts = withoutScheme.split("/").filter(Boolean);
489
+ if (parts[0].includes(".")) {
490
+ // Fully qualified: abfss://account.dfs.core.windows.net/container/path
491
+ const accountName = parts[0].split(".")[0];
492
+ accountUrl = `https://${accountName}.blob.core.windows.net`;
493
+ container = parts[1];
494
+ blobPath = parts.slice(2).join("/");
495
+ } else {
496
+ // Short form: abfss://container/path — need accountName from config
497
+ if (!azureConnection?.accountName) {
498
+ throw new Error(
499
+ "accountName is required to list blobs with abfss:// URLs",
500
+ );
501
+ }
502
+ accountUrl = `https://${azureConnection.accountName}.blob.core.windows.net`;
503
+ container = parts[0];
504
+ blobPath = parts.slice(1).join("/");
505
+ }
506
+ } else {
507
+ // https://account.blob.core.windows.net/container/path
508
+ const url = new URL(baseUrl);
509
+ const pathParts = url.pathname.split("/").filter(Boolean);
510
+ container = pathParts[0];
511
+ blobPath = pathParts.slice(1).join("/");
512
+ accountUrl = `${url.protocol}//${url.host}`;
513
+ }
514
+
515
+ // Three supported glob patterns:
516
+ // path/file.ext → single file (handled upstream by isAzureSingleFileUrl)
517
+ // path/*.ext → files directly in path/ with that extension (no subdirs)
518
+ // path/** → all valid data files in path/ and nested dirs (recursive)
519
+ let prefix: string;
520
+ let extensionFilter = ""; // for *.ext pattern
521
+ let recursive = true; // for ** pattern
522
+
523
+ if (blobPath.endsWith("**")) {
524
+ // Recursive listing: everything under this prefix
525
+ prefix = blobPath.slice(0, -2);
526
+ recursive = true;
527
+ } else if (blobPath.includes("*")) {
528
+ // Single-level glob: path/*.ext — files directly in that dir only
529
+ const starIndex = blobPath.indexOf("*");
530
+ prefix = blobPath.substring(0, starIndex);
531
+ extensionFilter = blobPath.substring(starIndex + 1); // e.g. ".parquet"
532
+ recursive = false;
533
+ } else {
534
+ // No glob — use blobPath as prefix (container-level listing)
535
+ prefix = blobPath;
536
+ recursive = true;
537
+ }
538
+
539
+ // Create ContainerClient with appropriate authentication
540
+ let containerClient: ContainerClient;
541
+ if (
542
+ azureConnection?.authType === "service_principal" &&
543
+ azureConnection.tenantId &&
544
+ azureConnection.clientId &&
545
+ azureConnection.clientSecret
546
+ ) {
547
+ const credential = new ClientSecretCredential(
548
+ azureConnection.tenantId,
549
+ azureConnection.clientId,
550
+ azureConnection.clientSecret,
551
+ );
552
+ containerClient = new ContainerClient(
553
+ `${accountUrl}/${container}`,
554
+ credential,
555
+ );
556
+ } else {
557
+ // SAS token auth — append token to container URL
558
+ const containerUrl = `${accountUrl}/${container}${sasToken}`;
559
+ containerClient = new ContainerClient(containerUrl);
560
+ }
561
+
562
+ const matchingFiles: { url: string; blobName: string }[] = [];
563
+ for await (const blob of containerClient.listBlobsFlat({
564
+ prefix: prefix || undefined,
565
+ })) {
566
+ if (extensionFilter && !blob.name.endsWith(extensionFilter)) continue;
567
+ // For *.ext (non-recursive): only allow files directly in prefix dir
568
+ if (!recursive) {
569
+ const nameAfterPrefix = blob.name.substring(prefix.length);
570
+ if (nameAfterPrefix.includes("/")) continue;
571
+ }
572
+ if (!isDataFile(blob.name)) continue;
573
+ // For SPN: use abfss:// URLs that DuckDB's azure extension can read
574
+ // For SAS: use https:// URLs with token appended
575
+ let url: string;
576
+ if (azureConnection?.authType === "service_principal") {
577
+ const account =
578
+ azureConnection.accountName ||
579
+ accountUrl.split("//")[1]?.split(".")[0];
580
+ url = `abfss://${account}.dfs.core.windows.net/${container}/${blob.name}`;
581
+ } else {
582
+ url = `${accountUrl}/${container}/${blob.name}${sasToken}`;
583
+ }
584
+ matchingFiles.push({ url, blobName: blob.name });
585
+ }
586
+
587
+ logger.info(
588
+ `Listed ${matchingFiles.length} matching blobs in Azure container ${container} with prefix "${prefix}"`,
589
+ );
590
+ return matchingFiles;
591
+ }
592
+
593
+ function isDataFile(key: string): boolean {
594
+ const lowerKey = key.toLowerCase();
595
+ return (
596
+ lowerKey.endsWith(".csv") ||
597
+ lowerKey.endsWith(".parquet") ||
598
+ lowerKey.endsWith(".json") ||
599
+ lowerKey.endsWith(".jsonl") ||
600
+ lowerKey.endsWith(".ndjson")
601
+ );
602
+ }
603
+
604
+ async function describeRemoteFile(
605
+ malloyConnection: Connection,
606
+ fileUri: string,
607
+ ): Promise<ApiTable> {
608
+ const pathWithoutQuery = fileUri.split("?")[0];
609
+ const fileType = getFileType(pathWithoutQuery);
610
+
611
+ let describeQuery: string;
612
+ switch (fileType) {
613
+ case "csv":
614
+ describeQuery = `DESCRIBE SELECT * FROM read_csv('${fileUri}', auto_detect=true) LIMIT 1`;
615
+ break;
616
+ case "parquet":
617
+ describeQuery = `DESCRIBE SELECT * FROM read_parquet('${fileUri}') LIMIT 1`;
618
+ break;
619
+ case "json":
620
+ describeQuery = `DESCRIBE SELECT * FROM read_json('${fileUri}', auto_detect=true) LIMIT 1`;
621
+ break;
622
+ case "jsonl":
623
+ describeQuery = `DESCRIBE SELECT * FROM read_json('${fileUri}', format='newline_delimited', auto_detect=true) LIMIT 1`;
624
+ break;
625
+ default:
626
+ logger.warn(`Unsupported file type for file: ${fileUri}`);
627
+ return { resource: fileUri, columns: [] };
628
+ }
629
+
630
+ const result = await malloyConnection.runSQL(describeQuery);
631
+ const rows = standardizeRunSQLResult(result);
632
+ const columns = rows.map((row: unknown) => {
633
+ const typedRow = row as Record<string, unknown>;
634
+ return {
635
+ name: (typedRow.column_name || typedRow.name) as string,
636
+ type: (typedRow.column_type || typedRow.type) as string,
637
+ };
638
+ });
639
+
640
+ const fileName = pathWithoutQuery.split("/").pop() || fileUri;
641
+ return { resource: fileName, columns };
642
+ }
643
+
644
+ function isAzureSingleFileUrl(fileUri: string): boolean {
645
+ const pathWithoutQuery = fileUri.split("?")[0];
646
+ // Has a glob — not a single file
647
+ if (pathWithoutQuery.includes("*")) return false;
648
+ // Ends with / — directory listing
649
+ if (pathWithoutQuery.endsWith("/")) return false;
650
+ // Check if the last path segment has a data file extension
651
+ const lastSegment = pathWithoutQuery.split("/").pop() || "";
652
+ return isDataFile(lastSegment);
653
+ }
654
+
655
+ async function describeAzureFile(
656
+ malloyConnection: Connection,
657
+ fileUri: string,
658
+ azureConnection?: ApiAzureConnection,
659
+ ): Promise<ApiTable[]> {
660
+ try {
661
+ if (isAzureSingleFileUrl(fileUri)) {
662
+ // Single file — describe directly via DuckDB
663
+ return [await describeRemoteFile(malloyConnection, fileUri)];
664
+ }
665
+
666
+ // Glob pattern or container/directory URL — list blobs via Azure SDK
667
+ const blobs = await listAzureBlobs(fileUri, azureConnection);
668
+ if (blobs.length === 0) {
669
+ return [{ resource: fileUri, columns: [] }];
670
+ }
671
+
672
+ const results = await Promise.all(
673
+ blobs.map(async ({ url, blobName }) => {
674
+ try {
675
+ const table = await describeRemoteFile(malloyConnection, url);
676
+ return { ...table, resource: blobName };
677
+ } catch (error) {
678
+ logger.warn(`Failed to describe Azure blob: ${url}`, { error });
679
+ return { resource: blobName, columns: [] } as ApiTable;
680
+ }
681
+ }),
682
+ );
683
+ return results;
684
+ } catch (error) {
685
+ logger.error(`Failed to describe Azure file: ${fileUri}`, { error });
686
+ throw new Error(
687
+ `Failed to describe Azure file: ${error instanceof Error ? error.message : String(error)}`,
688
+ );
689
+ }
690
+ }
691
+
437
692
  export async function getTablesForSchema(
438
693
  connection: ApiConnection,
439
694
  schemaName: string,
440
695
  malloyConnection: Connection,
441
696
  ): Promise<ApiTable[]> {
697
+ // Check if schemaName matches an Azure attached database name
698
+ if (connection.type === "duckdb") {
699
+ const attachedDbs = connection.duckdbConnection?.attachedDatabases || [];
700
+ const azureDb = attachedDbs.find(
701
+ (db) =>
702
+ db.type === "azure" && db.name === schemaName && db.azureConnection,
703
+ );
704
+ if (azureDb) {
705
+ const azureConn = azureDb.azureConnection!;
706
+ const fileUrl =
707
+ azureConn.authType === "sas_token"
708
+ ? azureConn.sasUrl
709
+ : azureConn.fileUrl;
710
+ if (fileUrl) {
711
+ return await describeAzureFile(
712
+ malloyConnection,
713
+ fileUrl,
714
+ azureConn,
715
+ );
716
+ }
717
+ }
718
+ }
719
+
720
+ // Check if this is an Azure ADLS file path (abfss:// or HTTPS SAS URL)
721
+ if (
722
+ connection.type === "duckdb" &&
723
+ (schemaName.startsWith("abfss://") ||
724
+ schemaName.startsWith("https://") ||
725
+ schemaName.startsWith("az://"))
726
+ ) {
727
+ return await describeAzureFile(malloyConnection, schemaName);
728
+ }
729
+
442
730
  // Check if this is a cloud storage file path (gs://bucket/path/file.ext or s3://bucket/path/file.ext)
443
731
  const parsedUri = parseCloudUri(schemaName);
444
732
 
@@ -180,11 +180,17 @@ export class Project {
180
180
  const virtualUri = `file://${path.join(modelDir, "__compile_check.malloy")}`;
181
181
  const virtualUrl = new URL(virtualUri);
182
182
 
183
- // Read the model file and extract its preamble (pragmas + imports) so that
184
- // the user's query inherits the model's import context.
183
+ // Read the full model file so the submitted source inherits the model's
184
+ // complete namespace imports, source definitions, queries, etc.
185
185
  const modelPath = path.join(this.projectPath, packageName, modelName);
186
- const preamble = await extractPreamble(modelPath);
187
- const fullSource = preamble ? `${preamble}\n${source}` : source;
186
+ let modelContent = "";
187
+ try {
188
+ modelContent = await fs.promises.readFile(modelPath, "utf8");
189
+ } catch {
190
+ // If the model file can't be read, proceed with empty content
191
+ // and let compilation surface any errors naturally.
192
+ }
193
+ const fullSource = modelContent ? `${modelContent}\n${source}` : source;
188
194
 
189
195
  // Create a URL Reader that serves the source string for the virtual file,
190
196
  // but falls back to the disk for everything else (imports).
@@ -20,6 +20,7 @@ import {
20
20
  PUBLISHER_DATA_DIR,
21
21
  } from "../constants";
22
22
  import {
23
+ BadRequestError,
23
24
  FrozenConfigError,
24
25
  PackageNotFoundError,
25
26
  ProjectNotFoundError,
@@ -31,6 +32,63 @@ import { StorageConfig, StorageManager } from "../storage/StorageManager";
31
32
  import { PackageStatus, Project } from "./project";
32
33
  type ApiProject = components["schemas"]["Project"];
33
34
 
35
+ const AZURE_SUPPORTED_SCHEMES = ["https://", "http://", "abfss://", "az://"];
36
+ const AZURE_DATA_EXTENSIONS = [
37
+ ".parquet",
38
+ ".csv",
39
+ ".json",
40
+ ".jsonl",
41
+ ".ndjson",
42
+ ];
43
+
44
+ function validateAzureUrl(url: string, fieldName: string): void {
45
+ if (!AZURE_SUPPORTED_SCHEMES.some((s) => url.startsWith(s))) {
46
+ throw new BadRequestError(
47
+ `Azure ${fieldName} must use one of: ${AZURE_SUPPORTED_SCHEMES.join(", ")}`,
48
+ );
49
+ }
50
+ const pathWithoutQuery = url.split("?")[0];
51
+ const stars = (pathWithoutQuery.match(/\*/g) || []).length;
52
+
53
+ if (stars === 0) {
54
+ const lower = pathWithoutQuery.toLowerCase();
55
+ if (!AZURE_DATA_EXTENSIONS.some((ext) => lower.endsWith(ext))) {
56
+ throw new BadRequestError(
57
+ `Azure ${fieldName}: a single-file URL must end with a data file extension (${AZURE_DATA_EXTENSIONS.join(", ")})`,
58
+ );
59
+ }
60
+ } else if (pathWithoutQuery.endsWith("**")) {
61
+ // recursive — valid
62
+ // includes all data files in the container and all subdirectories
63
+ } else {
64
+ const lastSegment = pathWithoutQuery.split("/").pop() || "";
65
+ if (stars !== 1 || !lastSegment.startsWith("*")) {
66
+ throw new BadRequestError(
67
+ `Azure ${fieldName}: only three URL patterns are supported:\n` +
68
+ ` • Single file: path/file.parquet\n` +
69
+ ` • Directory glob: path/*.ext (direct children only)\n` +
70
+ ` • Recursive: path/** (includes all data files in the container and all subdirectories)\n` +
71
+ `Multi-level globs such as "sub_dir/*/*.parquet" are not supported.`,
72
+ );
73
+ }
74
+ }
75
+ }
76
+
77
+ function validateProjectAzureUrls(project: ApiProject): void {
78
+ for (const conn of project.connections || []) {
79
+ if (conn.type !== "duckdb") continue;
80
+ for (const db of conn.duckdbConnection?.attachedDatabases || []) {
81
+ if (db.type !== "azure" || !db.azureConnection) continue;
82
+ const { authType, sasUrl, fileUrl } = db.azureConnection;
83
+ if (authType === "sas_token" && sasUrl) {
84
+ validateAzureUrl(sasUrl, `"${db.name}" sasUrl`);
85
+ } else if (authType === "service_principal" && fileUrl) {
86
+ validateAzureUrl(fileUrl, `"${db.name}" fileUrl`);
87
+ }
88
+ }
89
+ }
90
+ }
91
+
34
92
  export class ProjectStore {
35
93
  public serverRootPath: string;
36
94
  private projects: Map<string, Project> = new Map();
@@ -762,6 +820,7 @@ export class ProjectStore {
762
820
  if (this.publisherConfigIsFrozen) {
763
821
  throw new FrozenConfigError();
764
822
  }
823
+ validateProjectAzureUrls(project);
765
824
  const projectName = project.name;
766
825
  if (!projectName) {
767
826
  throw new Error("Project name is required");
@@ -1 +0,0 @@
1
- import{l as o,j as r,r as a}from"./index-DUmIwFQ_.js";function s(){const t=o();return r.jsx(a,{onClickProject:t})}export{s as default};
@@ -1 +0,0 @@
1
- import{t as n,j as e,D as i,E as t,F as c}from"./index-DUmIwFQ_.js";function o(){const a=n(),r=a["*"];if(!a.projectName)return e.jsx("div",{children:e.jsx("h2",{children:"Missing project name"})});if(!a.packageName)return e.jsx("div",{children:e.jsx("h2",{children:"Missing package name"})});const s=i({projectName:a.projectName,packageName:a.packageName,modelPath:r});return r?.endsWith(".malloy")?e.jsx(t,{resourceUri:s,runOnDemand:!0,maxResultSize:512*1024}):r?.endsWith(".malloynb")?e.jsx(c,{resourceUri:s,maxResultSize:1024*1024}):e.jsx("div",{children:e.jsxs("h2",{children:["Unrecognized file type: ",r]})})}export{o as default};
@@ -1 +0,0 @@
1
- import{l as a,t as n,j as e,D as c,H as o}from"./index-DUmIwFQ_.js";function j(){const r=a(),{projectName:s}=n();if(s){const t=c({projectName:s});return e.jsx(o,{onSelectPackage:r,resourceUri:t})}else return e.jsx("div",{children:e.jsx("h2",{children:"Missing project name"})})}export{j as default};
@@ -1 +0,0 @@
1
- import{t as a,j as e,D as t,K as c}from"./index-DUmIwFQ_.js";function d(){const{workspace:r,workbookPath:s,projectName:i,packageName:n}=a();if(r)if(s)if(i)if(n){const o=t({projectName:i,packageName:n});return e.jsx(c,{workbookPath:{path:s,workspace:r},resourceUri:o},`${s}`)}else return e.jsx("div",{children:e.jsx("h2",{children:"Missing package name"})});else return e.jsx("div",{children:e.jsx("h2",{children:"Missing project name"})});else return e.jsx("div",{children:e.jsx("h2",{children:"Missing workbook path"})});else return e.jsx("div",{children:e.jsx("h2",{children:"Missing workspace"})})}export{d as default};