npm - @malloy-publisher/server - Versions diffs - 0.0.198-dev1 → 0.0.198-dev3 - Mend

@malloy-publisher/server 0.0.198-dev1 → 0.0.198-dev3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/build.ts +12 -22
package/dist/instrumentation.mjs +57 -36
package/dist/server.mjs +2259 -3180
package/dist/service/schema_worker.mjs +61 -0
package/package.json +2 -3
package/src/health.ts +0 -13
package/src/instrumentation.ts +50 -0
package/src/server.ts +5 -0
package/src/service/environment_store.ts +33 -3
package/src/service/model.ts +3 -226
package/src/service/package.spec.ts +11 -7
package/src/service/package.ts +49 -53
package/src/service/process_stats_reporter.ts +169 -0
package/src/service/schema_worker.ts +123 -0
package/src/service/schema_worker_pool.ts +287 -0
package/tests/integration/concurrent_environment/concurrent_environment.integration.spec.ts +235 -0
package/dist/compile_worker.mjs +0 -628
package/src/compile/compile_pool.spec.ts +0 -227
package/src/compile/compile_pool.ts +0 -729
package/src/compile/compile_worker.ts +0 -683
package/src/compile/protocol.ts +0 -251
package/src/service/model_worker_path.spec.ts +0 -125

package/src/service/package.ts CHANGED Viewed

@@ -1,16 +1,11 @@
 import * as fs from "fs/promises";
 import * as path from "path";
-import { DuckDBConnection } from "@malloydata/db-duckdb";
-import "@malloydata/db-duckdb/native";
 import {
    Connection,
-   ConnectionRuntime,
    contextOverlay,
-   EmptyURLReader,
    FixedConnectionMap,
    MalloyConfig,
-   SourceDef,
 } from "@malloydata/malloy";
 import { metrics } from "@opentelemetry/api";
 import recursive from "recursive-readdir";
@@ -26,13 +21,12 @@ import { formatDuration, logger } from "../logger";
 import { BuildManifest } from "../storage/DatabaseInterface";
 import { ignoreDotfiles } from "../utils";
 import { Model } from "./model";
+import { getSchemaWorkerPool } from "./schema_worker_pool";
 type ApiDatabase = components["schemas"]["Database"];
 type ApiModel = components["schemas"]["Model"];
 type ApiNotebook = components["schemas"]["Notebook"];
 export type ApiPackage = components["schemas"]["Package"];
-type ApiColumn = components["schemas"]["Column"];
-type ApiTableDescription = components["schemas"]["TableDescription"];
 // A thunk lets callers pass a live reference to the *current* environment
 // MalloyConfig so the package wrapper resolves environment connections against the
 // generation that's active at lookup time, not the one that was current when
@@ -93,6 +87,8 @@ export class Package {
          duration: formatDuration(manifestValidationTime - startTime),
       });
+      let packageMalloyConfig: MalloyConfig | undefined;
       try {
          const packageConfig = await Package.readPackageConfig(packagePath);
          const packageConfigTime = performance.now();
@@ -181,6 +177,17 @@ export class Package {
             malloy_package_name: packageName,
             status: "error",
          });
+         if (packageMalloyConfig) {
+            try {
+               await packageMalloyConfig.shutdown("close");
+            } catch (releaseError) {
+               logger.warn(
+                  `Failed to release package-local DuckDB for ${packageName}`,
+                  { error: releaseError },
+               );
+            }
+         }
          // Clean up package directory on failure
          try {
             await fs.rm(packagePath, {
@@ -430,22 +437,43 @@ export class Package {
    private static async readDatabases(
       packagePath: string,
    ): Promise<ApiDatabase[]> {
-      return await Promise.all(
-         (await Package.getDatabasePaths(packagePath)).map(
-            async (databasePath) => {
-               const databaseInfo = await Package.getDatabaseInfo(
-                  packagePath,
-                  databasePath,
-               );
-               return {
-                  path: databasePath,
-                  info: databaseInfo,
-                  type: "embedded",
-               };
-            },
+      const databasePaths = await Package.getDatabasePaths(packagePath);
+      if (databasePaths.length === 0) return [];
+      // Off-main-thread: schema introspection runs in the
+      // SchemaWorkerPool so DuckDB's native thread pool lives inside
+      // a worker we control. This is the leak class that OOM-killed
+      // prod (466 leaked Bun Pool threads on worker-76b49bdb89-8bsv4)
+      // — worker isolation puts a hard ceiling on per-package native
+      // thread usage and the worker's connection is reused across all
+      // schema queries for the life of the process.
+      const pool = getSchemaWorkerPool();
+      const settled = await Promise.allSettled(
+         databasePaths.map((databasePath) =>
+            pool.submit(packagePath, databasePath),
          ),
       );
+      const results: ApiDatabase[] = [];
+      for (let i = 0; i < settled.length; i++) {
+         const outcome = settled[i];
+         if (outcome.status === "fulfilled") {
+            results.push({
+               path: databasePaths[i],
+               info: outcome.value,
+               type: "embedded",
+            });
+         } else {
+            // A single bad parquet (corrupt footer, unsupported type)
+            // must not fail the whole package load. Log and skip.
+            logger.warn("Schema introspection failed for database", {
+               packagePath,
+               databasePath: databasePaths[i],
+               error: outcome.reason,
+            });
+         }
+      }
+      return results;
    }
    private static async getDatabasePaths(
@@ -462,38 +490,6 @@ export class Package {
          );
    }
-   private static async getDatabaseInfo(
-      packagePath: string,
-      databasePath: string,
-   ): Promise<ApiTableDescription> {
-      const fullPath = path.join(packagePath, databasePath);
-      // Create a DuckDB source then:
-      // 1. Load the model and get the table schema from model
-      // 2. Run a query to get the row count from the table
-      const runtime = new ConnectionRuntime({
-         urlReader: new EmptyURLReader(),
-         connections: [new DuckDBConnection("duckdb")],
-      });
-      // Normalize path to use forward slashes for cross-platform compatibility
-      // DuckDB on Windows supports forward slashes, and this avoids escaping issues
-      const normalizedPath = fullPath.replace(/\\/g, "/");
-      const model = runtime.loadModel(
-         `source: temp is duckdb.table('${normalizedPath}')`,
-      );
-      const modelDef = await model.getModel();
-      const fields = (modelDef._modelDef.contents["temp"] as SourceDef).fields;
-      const schema = fields.map((field): ApiColumn => {
-         return { type: field.type, name: field.name };
-      });
-      const runner = model.loadQuery(
-         "run: temp->{aggregate: row_count is count()}",
-      );
-      const result = await runner.run();
-      const rowCount = result.data.value[0].row_count?.valueOf() as number;
-      return { name: databasePath, rowCount, columns: schema };
-   }
    public setName(name: string) {
       this.packageName = name;
    }

package/src/service/process_stats_reporter.ts ADDED Viewed

@@ -0,0 +1,169 @@
+import * as fs from "fs";
+import { logger } from "../logger";
+import type { PackageMemoryGovernor } from "./package_memory_governor";
+const DEFAULT_INTERVAL_MS = 30_000;
+interface LinuxProcStatus {
+   threads?: number;
+   vmRssBytes?: number;
+   vmSizeBytes?: number;
+   vmPeakBytes?: number;
+   vmDataBytes?: number;
+   voluntaryCtxSwitches?: number;
+   nonvoluntaryCtxSwitches?: number;
+}
+/**
+ * Parse the subset of `/proc/self/status` that matters for diagnosing
+ * thread / virtual-memory leaks. The file is small (<5KB), so reading
+ * it synchronously here is cheap and avoids fs-promise queueing.
+ *
+ * Format is `Key:\t<value> [unit]` per line. Sizes are reported in kB;
+ * we normalize to bytes so log output matches `process.memoryUsage()`.
+ */
+function readLinuxProcStatus(): LinuxProcStatus | null {
+   try {
+      const raw = fs.readFileSync("/proc/self/status", "utf8");
+      const out: LinuxProcStatus = {};
+      for (const line of raw.split("\n")) {
+         const [keyRaw, valueRaw] = line.split(":");
+         if (!keyRaw || !valueRaw) continue;
+         const key = keyRaw.trim();
+         const value = valueRaw.trim();
+         switch (key) {
+            case "Threads":
+               out.threads = Number(value);
+               break;
+            case "VmRSS":
+               out.vmRssBytes = kBToBytes(value);
+               break;
+            case "VmSize":
+               out.vmSizeBytes = kBToBytes(value);
+               break;
+            case "VmPeak":
+               out.vmPeakBytes = kBToBytes(value);
+               break;
+            case "VmData":
+               out.vmDataBytes = kBToBytes(value);
+               break;
+            case "voluntary_ctxt_switches":
+               out.voluntaryCtxSwitches = Number(value);
+               break;
+            case "nonvoluntary_ctxt_switches":
+               out.nonvoluntaryCtxSwitches = Number(value);
+               break;
+         }
+      }
+      return out;
+   } catch {
+      return null;
+   }
+}
+function kBToBytes(value: string): number | undefined {
+   const num = Number(value.replace(/\s*kB$/, ""));
+   if (!Number.isFinite(num)) return undefined;
+   return num * 1024;
+}
+/**
+ * Bun exposes JSC heap stats via the `bun:jsc` builtin. Optional —
+ * absent under plain Node — and best-effort: failures are swallowed
+ * so the reporter never crashes the process.
+ */
+async function readBunJscStats(): Promise<Record<string, number> | null> {
+   if (typeof (globalThis as { Bun?: unknown }).Bun === "undefined") {
+      return null;
+   }
+   try {
+      // Dynamic import so Node builds don't fail at parse time.
+      const jsc = (await import("bun:jsc")) as unknown as {
+         heapStats?: () => Record<string, number>;
+         memoryUsage?: () => Record<string, number>;
+      };
+      const heap = jsc.heapStats?.();
+      const mem = jsc.memoryUsage?.();
+      if (!heap && !mem) return null;
+      return { ...(heap ?? {}), ...(mem ?? {}) };
+   } catch {
+      return null;
+   }
+}
+/**
+ * Periodically logs process memory and thread counts to give ops a
+ * cheap, always-on signal for the leak classes that have OOM-killed
+ * prod (DuckDB connection thread pools, libuv worker pool, Malloy
+ * compile heap, etc.).
+ *
+ * Logs at `info` so it shows up without flipping `LOG_LEVEL`. Volume
+ * is low (~2 lines/minute by default). Pulls the memory governor's
+ * snapshot too so RSS/back-pressure state appears in the same line as
+ * Node/Bun heap.
+ */
+export class ProcessStatsReporter {
+   private timer: ReturnType<typeof setInterval> | null = null;
+   private readonly intervalMs: number;
+   private readonly memoryGovernor: PackageMemoryGovernor | null;
+   constructor(
+      memoryGovernor: PackageMemoryGovernor | null,
+      intervalMs: number = DEFAULT_INTERVAL_MS,
+   ) {
+      this.memoryGovernor = memoryGovernor;
+      this.intervalMs = intervalMs;
+   }
+   public start(): void {
+      if (this.timer !== null) return;
+      // Immediate first sample so a freshly-started pod logs its
+      // baseline before the first 30s has elapsed.
+      void this.tick();
+      this.timer = setInterval(() => void this.tick(), this.intervalMs);
+      // Don't keep the event loop alive on our account — if everything
+      // else has shut down, the reporter shouldn't block exit.
+      (
+         this.timer as ReturnType<typeof setInterval> & {
+            unref?: () => void;
+         }
+      ).unref?.();
+      logger.info(
+         `ProcessStatsReporter started (intervalMs=${this.intervalMs})`,
+      );
+   }
+   public stop(): void {
+      if (this.timer !== null) {
+         clearInterval(this.timer);
+         this.timer = null;
+      }
+   }
+   private async tick(): Promise<void> {
+      try {
+         const mem = process.memoryUsage();
+         const proc =
+            process.platform === "linux" ? readLinuxProcStatus() : null;
+         const bun = await readBunJscStats();
+         const governor = this.memoryGovernor?.getStatus() ?? null;
+         logger.info("process stats", {
+            uptimeSeconds: Math.round(process.uptime()),
+            nodeMemory: {
+               rssBytes: mem.rss,
+               heapTotalBytes: mem.heapTotal,
+               heapUsedBytes: mem.heapUsed,
+               externalBytes: mem.external,
+               arrayBuffersBytes: mem.arrayBuffers,
+            },
+            linux: proc,
+            bunJsc: bun,
+            memoryGovernor: governor,
+         });
+      } catch (err) {
+         logger.warn("ProcessStatsReporter tick failed", { error: err });
+      }
+   }
+}

package/src/service/schema_worker.ts ADDED Viewed

@@ -0,0 +1,123 @@
+/**
+ * Worker thread that owns one capped DuckDB connection and answers
+ * schema-introspection requests for parquet/csv files. Running this
+ * off the main thread isolates the native DuckDB thread pool — when
+ * the worker exits, its threads die with it, which puts a hard
+ * ceiling on the leak class that OOM-killed prod
+ * (worker-76b49bdb89-8bsv4: 466 leaked Bun Pool threads).
+ *
+ * Protocol (parent ↔ worker):
+ *   parent → worker:  { id, packagePath, databasePath }
+ *   worker → parent:  { id, ok: true,  result: SchemaResult }
+ *                  |  { id, ok: false, error: { message, stack? } }
+ *
+ * One request at a time per worker — the pool in the parent
+ * (`schema_worker_pool.ts`) handles fan-out. Keeping the worker
+ * single-threaded from the JS side matches DuckDB's behavior on a
+ * single connection and avoids head-of-line blocking inside the
+ * worker itself.
+ */
+import { DuckDBConnection } from "@malloydata/db-duckdb";
+import "@malloydata/db-duckdb/native";
+import {
+   ConnectionRuntime,
+   EmptyURLReader,
+   SourceDef,
+} from "@malloydata/malloy";
+import * as path from "path";
+import { parentPort } from "worker_threads";
+export interface SchemaRequest {
+   id: number;
+   packagePath: string;
+   databasePath: string;
+}
+export interface SchemaResponse {
+   id: number;
+   ok: boolean;
+   result?: {
+      name: string;
+      rowCount: number;
+      columns: Array<{ type: string; name: string }>;
+   };
+   error?: { message: string; stack?: string };
+}
+if (!parentPort) {
+   // Defensive: schema_worker.ts must only be loaded as a worker. If
+   // someone accidentally imports it from the main thread the
+   // connection below would still allocate its native pool there,
+   // recreating the exact leak this file exists to fix.
+   throw new Error("schema_worker.ts loaded outside a worker thread");
+}
+// One DuckDB connection per worker, capped tight. Schema introspection
+// reads parquet footers / csv headers — it does not need parallelism
+// or a large memory arena. The cap is what keeps the per-worker
+// native-thread cost bounded.
+const connection = new DuckDBConnection({
+   name: "duckdb",
+   databasePath: ":memory:",
+   threads: 1,
+   memoryLimit: "256MB",
+});
+async function handleRequest(req: SchemaRequest): Promise<SchemaResponse> {
+   try {
+      const fullPath = path.join(req.packagePath, req.databasePath);
+      // DuckDB on Windows supports forward slashes, and this avoids
+      // escaping issues in the inline SQL below.
+      const normalizedPath = fullPath.replace(/\\/g, "/");
+      const runtime = new ConnectionRuntime({
+         urlReader: new EmptyURLReader(),
+         connections: [connection],
+      });
+      const model = runtime.loadModel(
+         `source: temp is duckdb.table('${normalizedPath}')`,
+      );
+      const modelDef = await model.getModel();
+      const fields = (modelDef._modelDef.contents["temp"] as SourceDef).fields;
+      const columns = fields.map((field) => ({
+         type: String(field.type),
+         name: field.name,
+      }));
+      const runner = model.loadQuery(
+         "run: temp->{aggregate: row_count is count()}",
+      );
+      const result = await runner.run();
+      const rowCount = result.data.value[0].row_count?.valueOf() as number;
+      return {
+         id: req.id,
+         ok: true,
+         result: { name: req.databasePath, rowCount, columns },
+      };
+   } catch (err) {
+      const error = err instanceof Error ? err : new Error(String(err));
+      return {
+         id: req.id,
+         ok: false,
+         error: { message: error.message, stack: error.stack },
+      };
+   }
+}
+parentPort.on("message", async (msg: SchemaRequest) => {
+   const response = await handleRequest(msg);
+   parentPort!.postMessage(response);
+});
+// On any termination signal, close the connection so DuckDB releases
+// its native threads cleanly instead of leaking them past worker exit.
+const shutdown = async () => {
+   try {
+      await connection.close();
+   } catch {
+      // best effort
+   }
+   process.exit(0);
+};
+parentPort.on("close", () => void shutdown());