@malloy-publisher/server 0.0.198-dev2 → 0.0.198-dev3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@malloy-publisher/server",
3
3
  "description": "Malloy Publisher Server",
4
- "version": "0.0.198-dev2",
4
+ "version": "0.0.198-dev3",
5
5
  "main": "dist/server.mjs",
6
6
  "bin": {
7
7
  "malloy-publisher": "dist/server.mjs"
@@ -51,7 +51,6 @@
51
51
  "@opentelemetry/sdk-metrics": "^2.0.0",
52
52
  "@opentelemetry/sdk-node": "^0.200.0",
53
53
  "@opentelemetry/sdk-trace-node": "^2.0.0",
54
- "adm-zip": "^0.5.16",
55
54
  "async-mutex": "^0.5.0",
56
55
  "aws-sdk": "^2.1692.0",
57
56
  "body-parser": "^1.20.2",
@@ -61,6 +60,7 @@
61
60
  "cors": "^2.8.5",
62
61
  "duckdb": "1.4.4",
63
62
  "express": "^4.21.0",
63
+ "extract-zip": "^2.0.1",
64
64
  "globals": "^15.9.0",
65
65
  "handlebars": "^4.7.8",
66
66
  "http-proxy-middleware": "^3.0.5",
@@ -76,7 +76,6 @@
76
76
  "@eslint/eslintrc": "^3.3.1",
77
77
  "@eslint/js": "^9.23.0",
78
78
  "@faker-js/faker": "^9.4.0",
79
- "@types/adm-zip": "^0.5.7",
80
79
  "@types/bun": "^1.2.20",
81
80
  "@types/cors": "^2.8.12",
82
81
  "@types/express": "^4.17.14",
@@ -1,8 +1,8 @@
1
1
  import { GetObjectCommand, S3 } from "@aws-sdk/client-s3";
2
2
  import { Storage } from "@google-cloud/storage";
3
- import AdmZip from "adm-zip";
4
3
  import { Mutex } from "async-mutex";
5
4
  import crypto from "crypto";
5
+ import extract from "extract-zip";
6
6
  import * as fs from "fs";
7
7
  import * as path from "path";
8
8
  import simpleGit from "simple-git";
@@ -893,6 +893,7 @@ export class EnvironmentStore {
893
893
  }
894
894
 
895
895
  public async unzipEnvironment(absoluteEnvironmentPath: string) {
896
+ const startedAt = Date.now();
896
897
  logger.info(
897
898
  `Detected zip file at "${absoluteEnvironmentPath}". Unzipping...`,
898
899
  );
@@ -906,8 +907,28 @@ export class EnvironmentStore {
906
907
  });
907
908
  await fs.promises.mkdir(unzippedEnvironmentPath, { recursive: true });
908
909
 
909
- const zip = new AdmZip(absoluteEnvironmentPath);
910
- zip.extractAllTo(unzippedEnvironmentPath, true);
910
+ // Stream-extract via yauzl (wrapped by extract-zip). Each entry's
911
+ // inflate and write are dispatched to the libuv thread pool, so the
912
+ // main event loop stays responsive even for very large archives.
913
+ // The previous adm-zip path used fs.readFileSync + zlib.inflateRawSync
914
+ // on the main thread, which parked the loop long enough on multi-
915
+ // hundred-MB packages to fail Kubernetes liveness probes mid-extract.
916
+ let entryCount = 0;
917
+ let totalUncompressedBytes = 0;
918
+ await extract(absoluteEnvironmentPath, {
919
+ dir: path.resolve(unzippedEnvironmentPath),
920
+ onEntry: (entry) => {
921
+ entryCount += 1;
922
+ totalUncompressedBytes += entry.uncompressedSize ?? 0;
923
+ },
924
+ });
925
+
926
+ const mib = (totalUncompressedBytes / (1024 * 1024)).toFixed(1);
927
+ logger.info(
928
+ `Unzipped "${absoluteEnvironmentPath}" -> "${unzippedEnvironmentPath}" ` +
929
+ `(${entryCount} entries, ${mib} MiB uncompressed) in ` +
930
+ `${formatDuration(Date.now() - startedAt)}`,
931
+ );
911
932
 
912
933
  return unzippedEnvironmentPath;
913
934
  }
@@ -184,11 +184,14 @@ export class SchemaWorkerPool {
184
184
  },
185
185
  );
186
186
 
187
+ // Lifecycle: `error` fires first (if it fires) and reports the
188
+ // crash; `exit` always fires next and is the single point where
189
+ // we replace the slot. Splitting it this way avoids a class of
190
+ // bugs where `error` respawns the slot and then `exit` respawns
191
+ // it again, leaking the worker created in between (alive Worker
192
+ // instance with a DuckDB connection — exactly what this pool
193
+ // exists to prevent).
187
194
  worker.on("error", (err) => {
188
- // A native crash inside the worker — fail the in-flight request
189
- // attributed to this slot, then respawn the worker so the pool
190
- // self-heals. Without respawn, one crash silently shrinks
191
- // capacity and concurrent loads would queue forever.
192
195
  const inFlightId = this.workerCurrentId.get(index);
193
196
  if (inFlightId !== undefined) {
194
197
  const req = this.inFlight.get(inFlightId);
@@ -198,39 +201,45 @@ export class SchemaWorkerPool {
198
201
  }
199
202
  this.workerCurrentId.delete(index);
200
203
  }
201
- logger.error("SchemaWorkerPool: worker errored, respawning", {
204
+ logger.error("SchemaWorkerPool: worker errored", {
202
205
  workerIndex: index,
203
206
  error: err,
204
207
  });
205
- if (!this.stopped) {
206
- this.workers[index] = this.spawn(index);
207
- this.drain();
208
- }
208
+ // Don't respawn here — `exit` will, after the worker has
209
+ // fully torn down its native resources.
209
210
  });
210
211
 
211
212
  worker.on("exit", (code) => {
212
213
  if (this.stopped) return;
214
+ // If `error` already fired, workerCurrentId is empty and this
215
+ // is a no-op. If the worker exited without firing `error`
216
+ // (e.g. process.exit inside the worker, or a clean exit while
217
+ // mid-request), reject any in-flight request so the caller
218
+ // doesn't hang forever.
219
+ const inFlightId = this.workerCurrentId.get(index);
220
+ if (inFlightId !== undefined) {
221
+ const req = this.inFlight.get(inFlightId);
222
+ if (req) {
223
+ this.inFlight.delete(inFlightId);
224
+ req.reject(new Error(`SchemaWorker exited with code ${code}`));
225
+ }
226
+ this.workerCurrentId.delete(index);
227
+ }
213
228
  if (code !== 0) {
214
229
  logger.warn("SchemaWorkerPool: worker exited unexpectedly", {
215
230
  workerIndex: index,
216
231
  code,
217
232
  });
218
- // Treat unexpected exit like an error: respawn so the pool
219
- // doesn't silently lose capacity.
220
- const inFlightId = this.workerCurrentId.get(index);
221
- if (inFlightId !== undefined) {
222
- const req = this.inFlight.get(inFlightId);
223
- if (req) {
224
- this.inFlight.delete(inFlightId);
225
- req.reject(
226
- new Error(`SchemaWorker exited with code ${code}`),
227
- );
228
- }
229
- this.workerCurrentId.delete(index);
230
- }
231
- this.workers[index] = this.spawn(index);
232
- this.drain();
233
+ } else {
234
+ // A clean exit while the pool is still running is also
235
+ // unexpected workers are supposed to live as long as
236
+ // the pool does. Respawn so capacity isn't silently lost.
237
+ logger.info("SchemaWorkerPool: worker exited cleanly, respawning", {
238
+ workerIndex: index,
239
+ });
233
240
  }
241
+ this.workers[index] = this.spawn(index);
242
+ this.drain();
234
243
  });
235
244
 
236
245
  return slot;