@malloy-publisher/server 0.0.151 → 0.0.152
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/api-doc.yaml +43 -2
- package/dist/app/assets/{HomePage-C1T4QFCi.js → HomePage-b8QeDnVg.js} +1 -1
- package/dist/app/assets/{MainPage-NHw0FBGk.js → MainPage-DB4TgCht.js} +1 -1
- package/dist/app/assets/{ModelPage-BQ2s25F2.js → ModelPage-D5uBRVZG.js} +1 -1
- package/dist/app/assets/{PackagePage-B-8ugkWc.js → PackagePage-Da02VPXi.js} +1 -1
- package/dist/app/assets/{ProjectPage-9YmacvrN.js → ProjectPage-C-b1ld7t.js} +1 -1
- package/dist/app/assets/{RouteError-CZ8PWEDH.js → RouteError-BihPY0CF.js} +1 -1
- package/dist/app/assets/{WorkbookPage-BJzhdY2E.js → WorkbookPage-DXFExeYg.js} +1 -1
- package/dist/app/assets/{index-BORdJk_c.js → index-C7CEd8eo.js} +1 -1
- package/dist/app/assets/{index-BZoYL91v.js → index-CRmTvqUQ.js} +45 -45
- package/dist/app/assets/{index-fiSlV3Cu.js → index-XR6f8p6F.js} +1 -1
- package/dist/app/assets/{index.umd-CQzt_QJa.js → index.umd-xDUYP0Tb.js} +1 -1
- package/dist/app/index.html +1 -1
- package/dist/server.js +29865 -32910
- package/package.json +3 -2
- package/src/service/connection.ts +122 -2
- package/src/service/db_utils.ts +144 -4
- package/src/service/gcs_s3_utils.ts +304 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@malloy-publisher/server",
|
|
3
3
|
"description": "Malloy Publisher Server",
|
|
4
|
-
"version": "0.0.
|
|
4
|
+
"version": "0.0.152",
|
|
5
5
|
"main": "dist/server.js",
|
|
6
6
|
"bin": {
|
|
7
7
|
"malloy-publisher": "dist/server.js"
|
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
"generate-api-types": "bunx openapi-typescript ../../api-doc.yaml --output src/api.ts"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
+
"@aws-sdk/client-s3": "^3.958.0",
|
|
27
28
|
"@google-cloud/storage": "^7.16.0",
|
|
28
29
|
"@malloydata/db-bigquery": "^0.0.318",
|
|
29
30
|
"@malloydata/db-duckdb": "^0.0.318",
|
|
@@ -48,7 +49,7 @@
|
|
|
48
49
|
"class-transformer": "^0.5.1",
|
|
49
50
|
"class-validator": "^0.14.1",
|
|
50
51
|
"cors": "^2.8.5",
|
|
51
|
-
"duckdb": "
|
|
52
|
+
"duckdb": "1.3.4",
|
|
52
53
|
"express": "^4.21.0",
|
|
53
54
|
"globals": "^15.9.0",
|
|
54
55
|
"handlebars": "^4.7.8",
|
|
@@ -13,6 +13,7 @@ import { v4 as uuidv4 } from "uuid";
|
|
|
13
13
|
import { components } from "../api";
|
|
14
14
|
import { TEMP_DIR_PATH } from "../constants";
|
|
15
15
|
import { logAxiosError, logger } from "../logger";
|
|
16
|
+
import { CloudStorageCredentials } from "./gcs_s3_utils";
|
|
16
17
|
|
|
17
18
|
type AttachedDatabase = components["schemas"]["AttachedDatabase"];
|
|
18
19
|
type ApiConnection = components["schemas"]["Connection"];
|
|
@@ -377,6 +378,121 @@ async function attachPostgres(
|
|
|
377
378
|
logger.info(`Successfully attached PostgreSQL database: ${attachedDb.name}`);
|
|
378
379
|
}
|
|
379
380
|
|
|
381
|
+
async function attachCloudStorage(
|
|
382
|
+
connection: DuckDBConnection,
|
|
383
|
+
attachedDb: AttachedDatabase,
|
|
384
|
+
): Promise<void> {
|
|
385
|
+
const isGCS = attachedDb.type === "gcs";
|
|
386
|
+
const isS3 = attachedDb.type === "s3";
|
|
387
|
+
|
|
388
|
+
if (!isGCS && !isS3) {
|
|
389
|
+
throw new Error(`Invalid cloud storage type: ${attachedDb.type}`);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
const storageType = attachedDb.type?.toUpperCase() || "";
|
|
393
|
+
let credentials: CloudStorageCredentials;
|
|
394
|
+
|
|
395
|
+
if (isGCS) {
|
|
396
|
+
if (!attachedDb.gcsConnection) {
|
|
397
|
+
throw new Error(
|
|
398
|
+
`GCS connection configuration missing for: ${attachedDb.name}`,
|
|
399
|
+
);
|
|
400
|
+
}
|
|
401
|
+
if (!attachedDb.gcsConnection.keyId || !attachedDb.gcsConnection.secret) {
|
|
402
|
+
throw new Error(
|
|
403
|
+
`GCS keyId and secret are required for: ${attachedDb.name}`,
|
|
404
|
+
);
|
|
405
|
+
}
|
|
406
|
+
credentials = {
|
|
407
|
+
type: "gcs",
|
|
408
|
+
accessKeyId: attachedDb.gcsConnection.keyId,
|
|
409
|
+
secretAccessKey: attachedDb.gcsConnection.secret,
|
|
410
|
+
};
|
|
411
|
+
} else {
|
|
412
|
+
if (!attachedDb.s3Connection) {
|
|
413
|
+
throw new Error(
|
|
414
|
+
`S3 connection configuration missing for: ${attachedDb.name}`,
|
|
415
|
+
);
|
|
416
|
+
}
|
|
417
|
+
if (
|
|
418
|
+
!attachedDb.s3Connection.accessKeyId ||
|
|
419
|
+
!attachedDb.s3Connection.secretAccessKey
|
|
420
|
+
) {
|
|
421
|
+
throw new Error(
|
|
422
|
+
`S3 accessKeyId and secretAccessKey are required for: ${attachedDb.name}`,
|
|
423
|
+
);
|
|
424
|
+
}
|
|
425
|
+
credentials = {
|
|
426
|
+
type: "s3",
|
|
427
|
+
accessKeyId: attachedDb.s3Connection.accessKeyId,
|
|
428
|
+
secretAccessKey: attachedDb.s3Connection.secretAccessKey,
|
|
429
|
+
region: attachedDb.s3Connection.region,
|
|
430
|
+
endpoint: attachedDb.s3Connection.endpoint,
|
|
431
|
+
sessionToken: attachedDb.s3Connection.sessionToken,
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
await installAndLoadExtension(connection, "httpfs");
|
|
436
|
+
|
|
437
|
+
const secretName = sanitizeSecretName(
|
|
438
|
+
`${attachedDb.type}_${attachedDb.name}`,
|
|
439
|
+
);
|
|
440
|
+
const escapedKeyId = escapeSQL(credentials.accessKeyId);
|
|
441
|
+
const escapedSecret = escapeSQL(credentials.secretAccessKey);
|
|
442
|
+
|
|
443
|
+
let createSecretCommand: string;
|
|
444
|
+
|
|
445
|
+
if (isGCS) {
|
|
446
|
+
createSecretCommand = `
|
|
447
|
+
CREATE OR REPLACE SECRET ${secretName} (
|
|
448
|
+
TYPE gcs,
|
|
449
|
+
KEY_ID '${escapedKeyId}',
|
|
450
|
+
SECRET '${escapedSecret}'
|
|
451
|
+
);
|
|
452
|
+
`;
|
|
453
|
+
} else {
|
|
454
|
+
const region = credentials.region || "us-east-1";
|
|
455
|
+
|
|
456
|
+
if (credentials.endpoint) {
|
|
457
|
+
const escapedEndpoint = escapeSQL(credentials.endpoint);
|
|
458
|
+
createSecretCommand = `
|
|
459
|
+
CREATE OR REPLACE SECRET ${secretName} (
|
|
460
|
+
TYPE s3,
|
|
461
|
+
KEY_ID '${escapedKeyId}',
|
|
462
|
+
SECRET '${escapedSecret}',
|
|
463
|
+
REGION '${region}',
|
|
464
|
+
ENDPOINT '${escapedEndpoint}',
|
|
465
|
+
URL_STYLE 'path'
|
|
466
|
+
);
|
|
467
|
+
`;
|
|
468
|
+
} else if (credentials.sessionToken) {
|
|
469
|
+
const escapedToken = escapeSQL(credentials.sessionToken);
|
|
470
|
+
createSecretCommand = `
|
|
471
|
+
CREATE OR REPLACE SECRET ${secretName} (
|
|
472
|
+
TYPE s3,
|
|
473
|
+
KEY_ID '${escapedKeyId}',
|
|
474
|
+
SECRET '${escapedSecret}',
|
|
475
|
+
REGION '${region}',
|
|
476
|
+
SESSION_TOKEN '${escapedToken}'
|
|
477
|
+
);
|
|
478
|
+
`;
|
|
479
|
+
} else {
|
|
480
|
+
createSecretCommand = `
|
|
481
|
+
CREATE OR REPLACE SECRET ${secretName} (
|
|
482
|
+
TYPE s3,
|
|
483
|
+
KEY_ID '${escapedKeyId}',
|
|
484
|
+
SECRET '${escapedSecret}',
|
|
485
|
+
REGION '${region}'
|
|
486
|
+
);
|
|
487
|
+
`;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
await connection.runSQL(createSecretCommand);
|
|
492
|
+
logger.info(`Created ${storageType} secret: ${secretName}`);
|
|
493
|
+
logger.info(`${storageType} connection configured for: ${attachedDb.name}`);
|
|
494
|
+
}
|
|
495
|
+
|
|
380
496
|
// Main attachment function
|
|
381
497
|
async function attachDatabasesToDuckDB(
|
|
382
498
|
duckdbConnection: DuckDBConnection,
|
|
@@ -386,6 +502,8 @@ async function attachDatabasesToDuckDB(
|
|
|
386
502
|
bigquery: attachBigQuery,
|
|
387
503
|
snowflake: attachSnowflake,
|
|
388
504
|
postgres: attachPostgres,
|
|
505
|
+
gcs: attachCloudStorage,
|
|
506
|
+
s3: attachCloudStorage,
|
|
389
507
|
};
|
|
390
508
|
|
|
391
509
|
for (const attachedDb of attachedDatabases) {
|
|
@@ -633,9 +751,10 @@ export async function createProjectConnections(
|
|
|
633
751
|
|
|
634
752
|
// Create DuckDB connection with project basePath as working directory
|
|
635
753
|
// This ensures relative paths in the project are resolved correctly
|
|
754
|
+
// Use unique memory database path to prevent sharing across connections
|
|
636
755
|
const duckdbConnection = new DuckDBConnection(
|
|
637
756
|
connection.name,
|
|
638
|
-
|
|
757
|
+
path.join(projectPath, `${connection.name}.duckdb`),
|
|
639
758
|
projectPath,
|
|
640
759
|
);
|
|
641
760
|
|
|
@@ -747,9 +866,10 @@ export async function createPackageDuckDBConnections(
|
|
|
747
866
|
|
|
748
867
|
// Create DuckDB connection with project basePath as working directory
|
|
749
868
|
// This ensures relative paths in the project are resolved correctly
|
|
869
|
+
// Use unique memory database path to prevent sharing across connections
|
|
750
870
|
const duckdbConnection = new DuckDBConnection(
|
|
751
871
|
connection.name,
|
|
752
|
-
|
|
872
|
+
path.join(packagePath, `${connection.name}.duckdb`),
|
|
753
873
|
packagePath,
|
|
754
874
|
);
|
|
755
875
|
|
package/src/service/db_utils.ts
CHANGED
|
@@ -3,6 +3,15 @@ import { Connection, TableSourceDef } from "@malloydata/malloy";
|
|
|
3
3
|
import { components } from "../api";
|
|
4
4
|
import { ConnectionError } from "../errors";
|
|
5
5
|
import { logger } from "../logger";
|
|
6
|
+
import {
|
|
7
|
+
CloudStorageCredentials,
|
|
8
|
+
gcsConnectionToCredentials,
|
|
9
|
+
getCloudTablesWithColumns,
|
|
10
|
+
isDataFile,
|
|
11
|
+
listAllCloudFiles,
|
|
12
|
+
listCloudBuckets,
|
|
13
|
+
s3ConnectionToCredentials,
|
|
14
|
+
} from "./gcs_s3_utils";
|
|
6
15
|
import { ApiConnection } from "./model";
|
|
7
16
|
|
|
8
17
|
type ApiSchema = components["schemas"]["Schema"];
|
|
@@ -68,6 +77,29 @@ function standardizeRunSQLResult(result: unknown): unknown[] {
|
|
|
68
77
|
: (result as { rows?: unknown[] }).rows || [];
|
|
69
78
|
}
|
|
70
79
|
|
|
80
|
+
function getCloudCredentialsFromAttachedDatabases(
|
|
81
|
+
attachedDatabases: components["schemas"]["AttachedDatabase"][],
|
|
82
|
+
storageType: "gcs" | "s3",
|
|
83
|
+
): CloudStorageCredentials | null {
|
|
84
|
+
for (const attachedDb of attachedDatabases) {
|
|
85
|
+
if (
|
|
86
|
+
attachedDb.type === "gcs" &&
|
|
87
|
+
storageType === "gcs" &&
|
|
88
|
+
attachedDb.gcsConnection
|
|
89
|
+
) {
|
|
90
|
+
return gcsConnectionToCredentials(attachedDb.gcsConnection);
|
|
91
|
+
}
|
|
92
|
+
if (
|
|
93
|
+
attachedDb.type === "s3" &&
|
|
94
|
+
storageType === "s3" &&
|
|
95
|
+
attachedDb.s3Connection
|
|
96
|
+
) {
|
|
97
|
+
return s3ConnectionToCredentials(attachedDb.s3Connection);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
|
|
71
103
|
export async function getSchemasForConnection(
|
|
72
104
|
connection: ApiConnection,
|
|
73
105
|
malloyConnection: Connection,
|
|
@@ -265,7 +297,7 @@ export async function getSchemasForConnection(
|
|
|
265
297
|
|
|
266
298
|
const rows = standardizeRunSQLResult(result);
|
|
267
299
|
|
|
268
|
-
|
|
300
|
+
const schemas: ApiSchema[] = rows.map((row: unknown) => {
|
|
269
301
|
const typedRow = row as Record<string, unknown>;
|
|
270
302
|
const schemaName = typedRow.schema_name as string;
|
|
271
303
|
const catalogName = typedRow.catalog_name as string;
|
|
@@ -288,6 +320,42 @@ export async function getSchemasForConnection(
|
|
|
288
320
|
isDefault: catalogName === "main",
|
|
289
321
|
};
|
|
290
322
|
});
|
|
323
|
+
|
|
324
|
+
const attachedDatabases =
|
|
325
|
+
connection.duckdbConnection.attachedDatabases || [];
|
|
326
|
+
|
|
327
|
+
for (const attachedDb of attachedDatabases) {
|
|
328
|
+
if (
|
|
329
|
+
(attachedDb.type === "gcs" || attachedDb.type === "s3") &&
|
|
330
|
+
(attachedDb.gcsConnection || attachedDb.s3Connection)
|
|
331
|
+
) {
|
|
332
|
+
const credentials =
|
|
333
|
+
attachedDb.type === "gcs"
|
|
334
|
+
? gcsConnectionToCredentials(attachedDb.gcsConnection!)
|
|
335
|
+
: s3ConnectionToCredentials(attachedDb.s3Connection!);
|
|
336
|
+
|
|
337
|
+
try {
|
|
338
|
+
const buckets = await listCloudBuckets(credentials);
|
|
339
|
+
for (const bucket of buckets) {
|
|
340
|
+
schemas.push({
|
|
341
|
+
name: `${attachedDb.type}.${bucket.name}`,
|
|
342
|
+
isHidden: false,
|
|
343
|
+
isDefault: false,
|
|
344
|
+
});
|
|
345
|
+
}
|
|
346
|
+
logger.info(
|
|
347
|
+
`Listed ${buckets.length} ${attachedDb.type.toUpperCase()} buckets for attached database ${attachedDb.name}`,
|
|
348
|
+
);
|
|
349
|
+
} catch (cloudError) {
|
|
350
|
+
logger.warn(
|
|
351
|
+
`Failed to list ${attachedDb.type.toUpperCase()} buckets for ${attachedDb.name}`,
|
|
352
|
+
{ error: cloudError },
|
|
353
|
+
);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
return schemas;
|
|
291
359
|
} catch (error) {
|
|
292
360
|
console.error(
|
|
293
361
|
`Error getting schemas for DuckDB connection ${connection.name}:`,
|
|
@@ -347,6 +415,41 @@ export async function getTablesForSchema(
|
|
|
347
415
|
malloyConnection,
|
|
348
416
|
);
|
|
349
417
|
|
|
418
|
+
const catalogName = schemaName.split(".")[0];
|
|
419
|
+
|
|
420
|
+
if (
|
|
421
|
+
(catalogName === "gcs" || catalogName === "s3") &&
|
|
422
|
+
connection.type === "duckdb"
|
|
423
|
+
) {
|
|
424
|
+
console.log(
|
|
425
|
+
`Getting ${catalogName.toUpperCase()} tables for schema`,
|
|
426
|
+
schemaName,
|
|
427
|
+
);
|
|
428
|
+
console.log("tableNames", tableNames);
|
|
429
|
+
const bucketName = schemaName.split(".")[1];
|
|
430
|
+
console.log("bucketName", bucketName);
|
|
431
|
+
|
|
432
|
+
const attachedDatabases =
|
|
433
|
+
connection.duckdbConnection?.attachedDatabases || [];
|
|
434
|
+
const credentials = getCloudCredentialsFromAttachedDatabases(
|
|
435
|
+
attachedDatabases,
|
|
436
|
+
catalogName as "gcs" | "s3",
|
|
437
|
+
);
|
|
438
|
+
|
|
439
|
+
if (!credentials) {
|
|
440
|
+
throw new Error(
|
|
441
|
+
`${catalogName.toUpperCase()} credentials not found in attached databases`,
|
|
442
|
+
);
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
return await getCloudTablesWithColumns(
|
|
446
|
+
malloyConnection,
|
|
447
|
+
credentials,
|
|
448
|
+
bucketName,
|
|
449
|
+
tableNames,
|
|
450
|
+
);
|
|
451
|
+
}
|
|
452
|
+
|
|
350
453
|
// Fetch all table sources in parallel
|
|
351
454
|
const tableSourcePromises = tableNames.map(async (tableName) => {
|
|
352
455
|
try {
|
|
@@ -598,11 +701,48 @@ export async function listTablesForSchema(
|
|
|
598
701
|
if (!connection.duckdbConnection) {
|
|
599
702
|
throw new Error("DuckDB connection is required");
|
|
600
703
|
}
|
|
704
|
+
|
|
705
|
+
const catalogName = schemaName.split(".")[0];
|
|
706
|
+
const actualSchemaName = schemaName.split(".")[1];
|
|
707
|
+
|
|
708
|
+
if (catalogName === "gcs" || catalogName === "s3") {
|
|
709
|
+
const bucketName = actualSchemaName;
|
|
710
|
+
const attachedDatabases =
|
|
711
|
+
connection.duckdbConnection.attachedDatabases || [];
|
|
712
|
+
|
|
713
|
+
const credentials = getCloudCredentialsFromAttachedDatabases(
|
|
714
|
+
attachedDatabases,
|
|
715
|
+
catalogName as "gcs" | "s3",
|
|
716
|
+
);
|
|
717
|
+
|
|
718
|
+
if (!credentials) {
|
|
719
|
+
throw new Error(
|
|
720
|
+
`${catalogName.toUpperCase()} credentials not found in attached databases`,
|
|
721
|
+
);
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
try {
|
|
725
|
+
const objects = await listAllCloudFiles(credentials, bucketName);
|
|
726
|
+
return objects
|
|
727
|
+
.filter((obj) => isDataFile(obj.key))
|
|
728
|
+
.map((obj) => obj.key);
|
|
729
|
+
} catch (error) {
|
|
730
|
+
logger.error(
|
|
731
|
+
`Error listing ${catalogName.toUpperCase()} objects in bucket ${bucketName}`,
|
|
732
|
+
{
|
|
733
|
+
error,
|
|
734
|
+
},
|
|
735
|
+
);
|
|
736
|
+
throw new Error(
|
|
737
|
+
`Failed to list files in ${catalogName.toUpperCase()} bucket ${bucketName}: ${(error as Error).message}`,
|
|
738
|
+
);
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
// Regular DuckDB table listing
|
|
601
743
|
try {
|
|
602
|
-
const catalogName = schemaName.split(".")[0];
|
|
603
|
-
schemaName = schemaName.split(".")[1];
|
|
604
744
|
const result = await malloyConnection.runSQL(
|
|
605
|
-
`SELECT table_name FROM information_schema.tables WHERE table_schema = '${
|
|
745
|
+
`SELECT table_name FROM information_schema.tables WHERE table_schema = '${actualSchemaName}' and table_catalog = '${catalogName}' ORDER BY table_name`,
|
|
606
746
|
{ rowLimit: 1000 },
|
|
607
747
|
);
|
|
608
748
|
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ListBucketsCommand,
|
|
3
|
+
ListObjectsV2Command,
|
|
4
|
+
S3Client,
|
|
5
|
+
} from "@aws-sdk/client-s3";
|
|
6
|
+
import { Connection } from "@malloydata/malloy";
|
|
7
|
+
import { components } from "../api";
|
|
8
|
+
import { logger } from "../logger";
|
|
9
|
+
|
|
10
|
+
type ApiTable = components["schemas"]["Table"];
|
|
11
|
+
|
|
12
|
+
export type CloudStorageType = "gcs" | "s3";
|
|
13
|
+
|
|
14
|
+
export interface CloudStorageCredentials {
|
|
15
|
+
type: CloudStorageType;
|
|
16
|
+
accessKeyId: string;
|
|
17
|
+
secretAccessKey: string;
|
|
18
|
+
region?: string;
|
|
19
|
+
endpoint?: string;
|
|
20
|
+
sessionToken?: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface CloudStorageBucket {
|
|
24
|
+
name: string;
|
|
25
|
+
creationDate?: Date;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface CloudStorageObject {
|
|
29
|
+
key: string;
|
|
30
|
+
size?: number;
|
|
31
|
+
lastModified?: Date;
|
|
32
|
+
isFolder: boolean;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function gcsConnectionToCredentials(gcsConnection: {
|
|
36
|
+
keyId?: string;
|
|
37
|
+
secret?: string;
|
|
38
|
+
}): CloudStorageCredentials {
|
|
39
|
+
return {
|
|
40
|
+
type: "gcs",
|
|
41
|
+
accessKeyId: gcsConnection.keyId || "",
|
|
42
|
+
secretAccessKey: gcsConnection.secret || "",
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function s3ConnectionToCredentials(s3Connection: {
|
|
47
|
+
accessKeyId?: string;
|
|
48
|
+
secretAccessKey?: string;
|
|
49
|
+
region?: string;
|
|
50
|
+
endpoint?: string;
|
|
51
|
+
sessionToken?: string;
|
|
52
|
+
}): CloudStorageCredentials {
|
|
53
|
+
return {
|
|
54
|
+
type: "s3",
|
|
55
|
+
accessKeyId: s3Connection.accessKeyId || "",
|
|
56
|
+
secretAccessKey: s3Connection.secretAccessKey || "",
|
|
57
|
+
region: s3Connection.region,
|
|
58
|
+
endpoint: s3Connection.endpoint,
|
|
59
|
+
sessionToken: s3Connection.sessionToken,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function createCloudStorageClient(
|
|
64
|
+
credentials: CloudStorageCredentials,
|
|
65
|
+
): S3Client {
|
|
66
|
+
const isGCS = credentials.type === "gcs";
|
|
67
|
+
|
|
68
|
+
const client = new S3Client({
|
|
69
|
+
endpoint: isGCS ? "https://storage.googleapis.com" : credentials.endpoint,
|
|
70
|
+
region: isGCS ? "auto" : credentials.region || "us-east-1",
|
|
71
|
+
credentials: {
|
|
72
|
+
accessKeyId: credentials.accessKeyId,
|
|
73
|
+
secretAccessKey: credentials.secretAccessKey,
|
|
74
|
+
sessionToken: credentials.sessionToken,
|
|
75
|
+
},
|
|
76
|
+
forcePathStyle: isGCS || !!credentials.endpoint,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
if (isGCS) {
|
|
80
|
+
client.middlewareStack.add(
|
|
81
|
+
(next) => async (args) => {
|
|
82
|
+
const request = args.request as { query?: Record<string, string> };
|
|
83
|
+
if (request.query) {
|
|
84
|
+
delete request.query["x-id"];
|
|
85
|
+
}
|
|
86
|
+
return next(args);
|
|
87
|
+
},
|
|
88
|
+
{ step: "build", name: "removeXIdParam" },
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return client;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export async function listCloudBuckets(
|
|
96
|
+
credentials: CloudStorageCredentials,
|
|
97
|
+
): Promise<CloudStorageBucket[]> {
|
|
98
|
+
const client = createCloudStorageClient(credentials);
|
|
99
|
+
const storageType = credentials.type.toUpperCase();
|
|
100
|
+
|
|
101
|
+
try {
|
|
102
|
+
const response = await client.send(new ListBucketsCommand({}));
|
|
103
|
+
return (response.Buckets || []).map((bucket) => ({
|
|
104
|
+
name: bucket.Name || "",
|
|
105
|
+
creationDate: bucket.CreationDate,
|
|
106
|
+
}));
|
|
107
|
+
} catch (error) {
|
|
108
|
+
logger.error(`Failed to list ${storageType} buckets`, { error });
|
|
109
|
+
throw new Error(
|
|
110
|
+
`Failed to list ${storageType} buckets: ${error instanceof Error ? error.message : String(error)}`,
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
async function listCloudObjectsInFolder(
|
|
116
|
+
credentials: CloudStorageCredentials,
|
|
117
|
+
bucket: string,
|
|
118
|
+
prefix: string = "",
|
|
119
|
+
): Promise<CloudStorageObject[]> {
|
|
120
|
+
const client = createCloudStorageClient(credentials);
|
|
121
|
+
const storageType = credentials.type.toUpperCase();
|
|
122
|
+
const uri = buildCloudUri(credentials.type, bucket, prefix);
|
|
123
|
+
|
|
124
|
+
try {
|
|
125
|
+
const response = await client.send(
|
|
126
|
+
new ListObjectsV2Command({
|
|
127
|
+
Bucket: bucket,
|
|
128
|
+
Prefix: prefix,
|
|
129
|
+
Delimiter: "/",
|
|
130
|
+
}),
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
const objects: CloudStorageObject[] = [];
|
|
134
|
+
|
|
135
|
+
for (const folderPrefix of response.CommonPrefixes || []) {
|
|
136
|
+
if (folderPrefix.Prefix) {
|
|
137
|
+
objects.push({
|
|
138
|
+
key: folderPrefix.Prefix,
|
|
139
|
+
isFolder: true,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
for (const content of response.Contents || []) {
|
|
145
|
+
if (content.Key && content.Key !== prefix) {
|
|
146
|
+
objects.push({
|
|
147
|
+
key: content.Key,
|
|
148
|
+
size: content.Size,
|
|
149
|
+
lastModified: content.LastModified,
|
|
150
|
+
isFolder: false,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return objects;
|
|
156
|
+
} catch (error) {
|
|
157
|
+
logger.error(`Failed to list ${storageType} objects`, {
|
|
158
|
+
error,
|
|
159
|
+
bucket,
|
|
160
|
+
prefix,
|
|
161
|
+
});
|
|
162
|
+
throw new Error(
|
|
163
|
+
`Failed to list objects in ${uri}: ${error instanceof Error ? error.message : String(error)}`,
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export async function listAllCloudFiles(
|
|
169
|
+
credentials: CloudStorageCredentials,
|
|
170
|
+
bucket: string,
|
|
171
|
+
prefix: string = "",
|
|
172
|
+
): Promise<CloudStorageObject[]> {
|
|
173
|
+
const allFiles: CloudStorageObject[] = [];
|
|
174
|
+
|
|
175
|
+
async function traverse(currentPrefix: string): Promise<void> {
|
|
176
|
+
const objects = await listCloudObjectsInFolder(
|
|
177
|
+
credentials,
|
|
178
|
+
bucket,
|
|
179
|
+
currentPrefix,
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
for (const obj of objects) {
|
|
183
|
+
if (obj.isFolder) {
|
|
184
|
+
await traverse(obj.key);
|
|
185
|
+
} else {
|
|
186
|
+
allFiles.push(obj);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
await traverse(prefix);
|
|
192
|
+
return allFiles;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
export function isDataFile(key: string): boolean {
|
|
196
|
+
const lowerKey = key.toLowerCase();
|
|
197
|
+
return (
|
|
198
|
+
lowerKey.endsWith(".csv") ||
|
|
199
|
+
lowerKey.endsWith(".parquet") ||
|
|
200
|
+
lowerKey.endsWith(".json") ||
|
|
201
|
+
lowerKey.endsWith(".jsonl") ||
|
|
202
|
+
lowerKey.endsWith(".ndjson")
|
|
203
|
+
);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
export function getFileType(key: string): string {
|
|
207
|
+
const lowerKey = key.toLowerCase();
|
|
208
|
+
if (lowerKey.endsWith(".csv")) return "csv";
|
|
209
|
+
if (lowerKey.endsWith(".parquet")) return "parquet";
|
|
210
|
+
if (lowerKey.endsWith(".json")) return "json";
|
|
211
|
+
if (lowerKey.endsWith(".jsonl") || lowerKey.endsWith(".ndjson"))
|
|
212
|
+
return "jsonl";
|
|
213
|
+
return "unknown";
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
export function buildCloudUri(
|
|
217
|
+
type: CloudStorageType,
|
|
218
|
+
bucket: string,
|
|
219
|
+
key: string,
|
|
220
|
+
): string {
|
|
221
|
+
const scheme = type === "gcs" ? "gs" : "s3";
|
|
222
|
+
return `${scheme}://${bucket}/${key}`;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function standardizeRunSQLResult(result: unknown): unknown[] {
|
|
226
|
+
return Array.isArray(result)
|
|
227
|
+
? result
|
|
228
|
+
: (result as { rows?: unknown[] }).rows || [];
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
export async function getCloudTablesWithColumns(
|
|
232
|
+
malloyConnection: Connection,
|
|
233
|
+
credentials: CloudStorageCredentials,
|
|
234
|
+
bucketName: string,
|
|
235
|
+
fileKeys: string[],
|
|
236
|
+
): Promise<ApiTable[]> {
|
|
237
|
+
const tables: ApiTable[] = [];
|
|
238
|
+
|
|
239
|
+
for (const fileKey of fileKeys) {
|
|
240
|
+
const uri = buildCloudUri(credentials.type, bucketName, fileKey);
|
|
241
|
+
const fileType = getFileType(fileKey);
|
|
242
|
+
|
|
243
|
+
try {
|
|
244
|
+
let describeQuery: string;
|
|
245
|
+
|
|
246
|
+
switch (fileType) {
|
|
247
|
+
case "csv":
|
|
248
|
+
describeQuery = `DESCRIBE SELECT * FROM read_csv('${uri}', auto_detect=true) LIMIT 1`;
|
|
249
|
+
break;
|
|
250
|
+
case "parquet":
|
|
251
|
+
describeQuery = `DESCRIBE SELECT * FROM read_parquet('${uri}') LIMIT 1`;
|
|
252
|
+
break;
|
|
253
|
+
case "json":
|
|
254
|
+
describeQuery = `DESCRIBE SELECT * FROM read_json('${uri}', auto_detect=true) LIMIT 1`;
|
|
255
|
+
break;
|
|
256
|
+
case "jsonl":
|
|
257
|
+
describeQuery = `DESCRIBE SELECT * FROM read_json('${uri}', format='newline_delimited', auto_detect=true) LIMIT 1`;
|
|
258
|
+
break;
|
|
259
|
+
default:
|
|
260
|
+
logger.warn(`Unsupported file type for ${fileKey}`);
|
|
261
|
+
tables.push({
|
|
262
|
+
resource: uri,
|
|
263
|
+
columns: [],
|
|
264
|
+
});
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
const result = await malloyConnection.runSQL(describeQuery);
|
|
269
|
+
const rows = standardizeRunSQLResult(result);
|
|
270
|
+
const columns = rows.map((row: unknown) => {
|
|
271
|
+
const typedRow = row as Record<string, unknown>;
|
|
272
|
+
return {
|
|
273
|
+
name: (typedRow.column_name || typedRow.name) as string,
|
|
274
|
+
type: (typedRow.column_type || typedRow.type) as string,
|
|
275
|
+
};
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
tables.push({
|
|
279
|
+
resource: uri,
|
|
280
|
+
columns,
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
logger.info(
|
|
284
|
+
`Got schema for ${credentials.type.toUpperCase()} file: ${uri}`,
|
|
285
|
+
{
|
|
286
|
+
columnCount: columns.length,
|
|
287
|
+
},
|
|
288
|
+
);
|
|
289
|
+
} catch (error) {
|
|
290
|
+
logger.warn(
|
|
291
|
+
`Failed to get schema for ${credentials.type.toUpperCase()} file: ${uri}`,
|
|
292
|
+
{
|
|
293
|
+
error,
|
|
294
|
+
},
|
|
295
|
+
);
|
|
296
|
+
tables.push({
|
|
297
|
+
resource: uri,
|
|
298
|
+
columns: [],
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return tables;
|
|
304
|
+
}
|