@malloy-publisher/server 0.0.151 → 0.0.153

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@malloy-publisher/server",
3
3
  "description": "Malloy Publisher Server",
4
- "version": "0.0.151",
4
+ "version": "0.0.153",
5
5
  "main": "dist/server.js",
6
6
  "bin": {
7
7
  "malloy-publisher": "dist/server.js"
@@ -24,6 +24,7 @@
24
24
  "generate-api-types": "bunx openapi-typescript ../../api-doc.yaml --output src/api.ts"
25
25
  },
26
26
  "dependencies": {
27
+ "@aws-sdk/client-s3": "^3.958.0",
27
28
  "@google-cloud/storage": "^7.16.0",
28
29
  "@malloydata/db-bigquery": "^0.0.318",
29
30
  "@malloydata/db-duckdb": "^0.0.318",
@@ -48,7 +49,7 @@
48
49
  "class-transformer": "^0.5.1",
49
50
  "class-validator": "^0.14.1",
50
51
  "cors": "^2.8.5",
51
- "duckdb": "^1.4.1",
52
+ "duckdb": "1.3.4",
52
53
  "express": "^4.21.0",
53
54
  "globals": "^15.9.0",
54
55
  "handlebars": "^4.7.8",
package/src/logger.ts CHANGED
@@ -9,6 +9,9 @@ export const logger = winston.createLogger({
9
9
  ? winston.format.combine(
10
10
  winston.format.uncolorize(),
11
11
  winston.format.timestamp(),
12
+ winston.format.metadata({
13
+ fillExcept: ["message", "level", "timestamp"],
14
+ }),
12
15
  winston.format.json(),
13
16
  )
14
17
  : winston.format.combine(
@@ -18,6 +21,33 @@ export const logger = winston.createLogger({
18
21
  transports: [new winston.transports.Console()],
19
22
  });
20
23
 
24
+ /**
25
+ * Extracts the trace ID from a W3C traceparent header.
26
+ * Format: version-trace-id-parent-id-trace-flags
27
+ * Example: 00-81f2264f363f1b5596c84ab29e6be171-83ef39df12ab6bab-01
28
+ *
29
+ * @param traceparent The traceparent header value
30
+ * @returns The trace ID (32 hex characters) or undefined if invalid
31
+ */
32
+ function extractTraceIdFromTraceparent(
33
+ traceparent: string | undefined,
34
+ ): string | undefined {
35
+ if (!traceparent) {
36
+ return undefined;
37
+ }
38
+
39
+ // format of traceparent can either be: version-traceId-parentId-traceFlags or traceId
40
+ const parts = traceparent.split("-");
41
+ const traceId =
42
+ parts.length >= 2 ? parts[1] : parts.length == 1 ? parts[0] : undefined;
43
+ // Validate that the traceId is 32 hex characters
44
+ if (traceId && traceId.length === 32 && /^[0-9a-fA-F]{32}$/.test(traceId)) {
45
+ return traceId;
46
+ }
47
+
48
+ return undefined;
49
+ }
50
+
21
51
  export const loggerMiddleware: RequestHandler = (req, res, next) => {
22
52
  const startTime = performance.now();
23
53
  const resJson = res.json;
@@ -27,14 +57,26 @@ export const loggerMiddleware: RequestHandler = (req, res, next) => {
27
57
  };
28
58
  res.on("finish", () => {
29
59
  const endTime = performance.now();
30
- logger.info(`${req.method} ${req.url}`, {
60
+
61
+ // Extract trace ID from traceparent header if present
62
+ const traceparent = req.headers["traceparent"] as string | undefined;
63
+ const traceId = extractTraceIdFromTraceparent(traceparent);
64
+
65
+ const logMetadata: Record<string, unknown> = {
31
66
  statusCode: res.statusCode,
32
67
  duration: endTime - startTime,
33
68
  payload: req.body,
34
69
  response: res.locals.body,
35
70
  params: req.params,
36
71
  query: req.query,
37
- });
72
+ };
73
+
74
+ // Add traceId to log metadata if present
75
+ if (traceId) {
76
+ logMetadata.traceId = traceId;
77
+ }
78
+
79
+ logger.info(`${req.method} ${req.url}`, logMetadata);
38
80
  });
39
81
  next();
40
82
  };
@@ -13,6 +13,7 @@ import { v4 as uuidv4 } from "uuid";
13
13
  import { components } from "../api";
14
14
  import { TEMP_DIR_PATH } from "../constants";
15
15
  import { logAxiosError, logger } from "../logger";
16
+ import { CloudStorageCredentials } from "./gcs_s3_utils";
16
17
 
17
18
  type AttachedDatabase = components["schemas"]["AttachedDatabase"];
18
19
  type ApiConnection = components["schemas"]["Connection"];
@@ -377,6 +378,121 @@ async function attachPostgres(
377
378
  logger.info(`Successfully attached PostgreSQL database: ${attachedDb.name}`);
378
379
  }
379
380
 
381
+ async function attachCloudStorage(
382
+ connection: DuckDBConnection,
383
+ attachedDb: AttachedDatabase,
384
+ ): Promise<void> {
385
+ const isGCS = attachedDb.type === "gcs";
386
+ const isS3 = attachedDb.type === "s3";
387
+
388
+ if (!isGCS && !isS3) {
389
+ throw new Error(`Invalid cloud storage type: ${attachedDb.type}`);
390
+ }
391
+
392
+ const storageType = attachedDb.type?.toUpperCase() || "";
393
+ let credentials: CloudStorageCredentials;
394
+
395
+ if (isGCS) {
396
+ if (!attachedDb.gcsConnection) {
397
+ throw new Error(
398
+ `GCS connection configuration missing for: ${attachedDb.name}`,
399
+ );
400
+ }
401
+ if (!attachedDb.gcsConnection.keyId || !attachedDb.gcsConnection.secret) {
402
+ throw new Error(
403
+ `GCS keyId and secret are required for: ${attachedDb.name}`,
404
+ );
405
+ }
406
+ credentials = {
407
+ type: "gcs",
408
+ accessKeyId: attachedDb.gcsConnection.keyId,
409
+ secretAccessKey: attachedDb.gcsConnection.secret,
410
+ };
411
+ } else {
412
+ if (!attachedDb.s3Connection) {
413
+ throw new Error(
414
+ `S3 connection configuration missing for: ${attachedDb.name}`,
415
+ );
416
+ }
417
+ if (
418
+ !attachedDb.s3Connection.accessKeyId ||
419
+ !attachedDb.s3Connection.secretAccessKey
420
+ ) {
421
+ throw new Error(
422
+ `S3 accessKeyId and secretAccessKey are required for: ${attachedDb.name}`,
423
+ );
424
+ }
425
+ credentials = {
426
+ type: "s3",
427
+ accessKeyId: attachedDb.s3Connection.accessKeyId,
428
+ secretAccessKey: attachedDb.s3Connection.secretAccessKey,
429
+ region: attachedDb.s3Connection.region,
430
+ endpoint: attachedDb.s3Connection.endpoint,
431
+ sessionToken: attachedDb.s3Connection.sessionToken,
432
+ };
433
+ }
434
+
435
+ await installAndLoadExtension(connection, "httpfs");
436
+
437
+ const secretName = sanitizeSecretName(
438
+ `${attachedDb.type}_${attachedDb.name}`,
439
+ );
440
+ const escapedKeyId = escapeSQL(credentials.accessKeyId);
441
+ const escapedSecret = escapeSQL(credentials.secretAccessKey);
442
+
443
+ let createSecretCommand: string;
444
+
445
+ if (isGCS) {
446
+ createSecretCommand = `
447
+ CREATE OR REPLACE SECRET ${secretName} (
448
+ TYPE gcs,
449
+ KEY_ID '${escapedKeyId}',
450
+ SECRET '${escapedSecret}'
451
+ );
452
+ `;
453
+ } else {
454
+ const region = credentials.region || "us-east-1";
455
+
456
+ if (credentials.endpoint) {
457
+ const escapedEndpoint = escapeSQL(credentials.endpoint);
458
+ createSecretCommand = `
459
+ CREATE OR REPLACE SECRET ${secretName} (
460
+ TYPE s3,
461
+ KEY_ID '${escapedKeyId}',
462
+ SECRET '${escapedSecret}',
463
+ REGION '${region}',
464
+ ENDPOINT '${escapedEndpoint}',
465
+ URL_STYLE 'path'
466
+ );
467
+ `;
468
+ } else if (credentials.sessionToken) {
469
+ const escapedToken = escapeSQL(credentials.sessionToken);
470
+ createSecretCommand = `
471
+ CREATE OR REPLACE SECRET ${secretName} (
472
+ TYPE s3,
473
+ KEY_ID '${escapedKeyId}',
474
+ SECRET '${escapedSecret}',
475
+ REGION '${region}',
476
+ SESSION_TOKEN '${escapedToken}'
477
+ );
478
+ `;
479
+ } else {
480
+ createSecretCommand = `
481
+ CREATE OR REPLACE SECRET ${secretName} (
482
+ TYPE s3,
483
+ KEY_ID '${escapedKeyId}',
484
+ SECRET '${escapedSecret}',
485
+ REGION '${region}'
486
+ );
487
+ `;
488
+ }
489
+ }
490
+
491
+ await connection.runSQL(createSecretCommand);
492
+ logger.info(`Created ${storageType} secret: ${secretName}`);
493
+ logger.info(`${storageType} connection configured for: ${attachedDb.name}`);
494
+ }
495
+
380
496
  // Main attachment function
381
497
  async function attachDatabasesToDuckDB(
382
498
  duckdbConnection: DuckDBConnection,
@@ -386,6 +502,8 @@ async function attachDatabasesToDuckDB(
386
502
  bigquery: attachBigQuery,
387
503
  snowflake: attachSnowflake,
388
504
  postgres: attachPostgres,
505
+ gcs: attachCloudStorage,
506
+ s3: attachCloudStorage,
389
507
  };
390
508
 
391
509
  for (const attachedDb of attachedDatabases) {
@@ -633,9 +751,10 @@ export async function createProjectConnections(
633
751
 
634
752
  // Create DuckDB connection with project basePath as working directory
635
753
  // This ensures relative paths in the project are resolved correctly
754
+ // Use unique memory database path to prevent sharing across connections
636
755
  const duckdbConnection = new DuckDBConnection(
637
756
  connection.name,
638
- ":memory:",
757
+ path.join(projectPath, `${connection.name}.duckdb`),
639
758
  projectPath,
640
759
  );
641
760
 
@@ -747,9 +866,10 @@ export async function createPackageDuckDBConnections(
747
866
 
748
867
  // Create DuckDB connection with project basePath as working directory
749
868
  // This ensures relative paths in the project are resolved correctly
869
+ // Use unique memory database path to prevent sharing across connections
750
870
  const duckdbConnection = new DuckDBConnection(
751
871
  connection.name,
752
- ":memory:",
872
+ path.join(packagePath, `${connection.name}.duckdb`),
753
873
  packagePath,
754
874
  );
755
875
 
@@ -3,6 +3,15 @@ import { Connection, TableSourceDef } from "@malloydata/malloy";
3
3
  import { components } from "../api";
4
4
  import { ConnectionError } from "../errors";
5
5
  import { logger } from "../logger";
6
+ import {
7
+ CloudStorageCredentials,
8
+ gcsConnectionToCredentials,
9
+ getCloudTablesWithColumns,
10
+ isDataFile,
11
+ listAllCloudFiles,
12
+ listCloudBuckets,
13
+ s3ConnectionToCredentials,
14
+ } from "./gcs_s3_utils";
6
15
  import { ApiConnection } from "./model";
7
16
 
8
17
  type ApiSchema = components["schemas"]["Schema"];
@@ -68,6 +77,29 @@ function standardizeRunSQLResult(result: unknown): unknown[] {
68
77
  : (result as { rows?: unknown[] }).rows || [];
69
78
  }
70
79
 
80
+ function getCloudCredentialsFromAttachedDatabases(
81
+ attachedDatabases: components["schemas"]["AttachedDatabase"][],
82
+ storageType: "gcs" | "s3",
83
+ ): CloudStorageCredentials | null {
84
+ for (const attachedDb of attachedDatabases) {
85
+ if (
86
+ attachedDb.type === "gcs" &&
87
+ storageType === "gcs" &&
88
+ attachedDb.gcsConnection
89
+ ) {
90
+ return gcsConnectionToCredentials(attachedDb.gcsConnection);
91
+ }
92
+ if (
93
+ attachedDb.type === "s3" &&
94
+ storageType === "s3" &&
95
+ attachedDb.s3Connection
96
+ ) {
97
+ return s3ConnectionToCredentials(attachedDb.s3Connection);
98
+ }
99
+ }
100
+ return null;
101
+ }
102
+
71
103
  export async function getSchemasForConnection(
72
104
  connection: ApiConnection,
73
105
  malloyConnection: Connection,
@@ -265,7 +297,7 @@ export async function getSchemasForConnection(
265
297
 
266
298
  const rows = standardizeRunSQLResult(result);
267
299
 
268
- return rows.map((row: unknown) => {
300
+ const schemas: ApiSchema[] = rows.map((row: unknown) => {
269
301
  const typedRow = row as Record<string, unknown>;
270
302
  const schemaName = typedRow.schema_name as string;
271
303
  const catalogName = typedRow.catalog_name as string;
@@ -288,6 +320,42 @@ export async function getSchemasForConnection(
288
320
  isDefault: catalogName === "main",
289
321
  };
290
322
  });
323
+
324
+ const attachedDatabases =
325
+ connection.duckdbConnection.attachedDatabases || [];
326
+
327
+ for (const attachedDb of attachedDatabases) {
328
+ if (
329
+ (attachedDb.type === "gcs" || attachedDb.type === "s3") &&
330
+ (attachedDb.gcsConnection || attachedDb.s3Connection)
331
+ ) {
332
+ const credentials =
333
+ attachedDb.type === "gcs"
334
+ ? gcsConnectionToCredentials(attachedDb.gcsConnection!)
335
+ : s3ConnectionToCredentials(attachedDb.s3Connection!);
336
+
337
+ try {
338
+ const buckets = await listCloudBuckets(credentials);
339
+ for (const bucket of buckets) {
340
+ schemas.push({
341
+ name: `${attachedDb.type}.${bucket.name}`,
342
+ isHidden: false,
343
+ isDefault: false,
344
+ });
345
+ }
346
+ logger.info(
347
+ `Listed ${buckets.length} ${attachedDb.type.toUpperCase()} buckets for attached database ${attachedDb.name}`,
348
+ );
349
+ } catch (cloudError) {
350
+ logger.warn(
351
+ `Failed to list ${attachedDb.type.toUpperCase()} buckets for ${attachedDb.name}`,
352
+ { error: cloudError },
353
+ );
354
+ }
355
+ }
356
+ }
357
+
358
+ return schemas;
291
359
  } catch (error) {
292
360
  console.error(
293
361
  `Error getting schemas for DuckDB connection ${connection.name}:`,
@@ -347,6 +415,41 @@ export async function getTablesForSchema(
347
415
  malloyConnection,
348
416
  );
349
417
 
418
+ const catalogName = schemaName.split(".")[0];
419
+
420
+ if (
421
+ (catalogName === "gcs" || catalogName === "s3") &&
422
+ connection.type === "duckdb"
423
+ ) {
424
+ console.log(
425
+ `Getting ${catalogName.toUpperCase()} tables for schema`,
426
+ schemaName,
427
+ );
428
+ console.log("tableNames", tableNames);
429
+ const bucketName = schemaName.split(".")[1];
430
+ console.log("bucketName", bucketName);
431
+
432
+ const attachedDatabases =
433
+ connection.duckdbConnection?.attachedDatabases || [];
434
+ const credentials = getCloudCredentialsFromAttachedDatabases(
435
+ attachedDatabases,
436
+ catalogName as "gcs" | "s3",
437
+ );
438
+
439
+ if (!credentials) {
440
+ throw new Error(
441
+ `${catalogName.toUpperCase()} credentials not found in attached databases`,
442
+ );
443
+ }
444
+
445
+ return await getCloudTablesWithColumns(
446
+ malloyConnection,
447
+ credentials,
448
+ bucketName,
449
+ tableNames,
450
+ );
451
+ }
452
+
350
453
  // Fetch all table sources in parallel
351
454
  const tableSourcePromises = tableNames.map(async (tableName) => {
352
455
  try {
@@ -598,11 +701,48 @@ export async function listTablesForSchema(
598
701
  if (!connection.duckdbConnection) {
599
702
  throw new Error("DuckDB connection is required");
600
703
  }
704
+
705
+ const catalogName = schemaName.split(".")[0];
706
+ const actualSchemaName = schemaName.split(".")[1];
707
+
708
+ if (catalogName === "gcs" || catalogName === "s3") {
709
+ const bucketName = actualSchemaName;
710
+ const attachedDatabases =
711
+ connection.duckdbConnection.attachedDatabases || [];
712
+
713
+ const credentials = getCloudCredentialsFromAttachedDatabases(
714
+ attachedDatabases,
715
+ catalogName as "gcs" | "s3",
716
+ );
717
+
718
+ if (!credentials) {
719
+ throw new Error(
720
+ `${catalogName.toUpperCase()} credentials not found in attached databases`,
721
+ );
722
+ }
723
+
724
+ try {
725
+ const objects = await listAllCloudFiles(credentials, bucketName);
726
+ return objects
727
+ .filter((obj) => isDataFile(obj.key))
728
+ .map((obj) => obj.key);
729
+ } catch (error) {
730
+ logger.error(
731
+ `Error listing ${catalogName.toUpperCase()} objects in bucket ${bucketName}`,
732
+ {
733
+ error,
734
+ },
735
+ );
736
+ throw new Error(
737
+ `Failed to list files in ${catalogName.toUpperCase()} bucket ${bucketName}: ${(error as Error).message}`,
738
+ );
739
+ }
740
+ }
741
+
742
+ // Regular DuckDB table listing
601
743
  try {
602
- const catalogName = schemaName.split(".")[0];
603
- schemaName = schemaName.split(".")[1];
604
744
  const result = await malloyConnection.runSQL(
605
- `SELECT table_name FROM information_schema.tables WHERE table_schema = '${schemaName}' and table_catalog = '${catalogName}' ORDER BY table_name`,
745
+ `SELECT table_name FROM information_schema.tables WHERE table_schema = '${actualSchemaName}' and table_catalog = '${catalogName}' ORDER BY table_name`,
606
746
  { rowLimit: 1000 },
607
747
  );
608
748