@malloy-publisher/server 0.0.198-dev → 0.0.198-dev2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.docker.md +135 -20
  2. package/README.md +15 -0
  3. package/build.ts +32 -1
  4. package/dist/app/api-doc.yaml +51 -0
  5. package/dist/app/assets/EnvironmentPage-Dpee_Kn6.js +1 -0
  6. package/dist/app/assets/HomePage-DLRWTNoL.js +1 -0
  7. package/dist/app/assets/MainPage-DsVt5QGM.js +2 -0
  8. package/dist/app/assets/ModelPage-AwAugZ37.js +1 -0
  9. package/dist/app/assets/PackagePage-XQ-EWGTC.js +1 -0
  10. package/dist/app/assets/RouteError-3Mv8JQw7.js +1 -0
  11. package/dist/app/assets/WorkbookPage-DHYYpcYc.js +1 -0
  12. package/dist/app/assets/{core-w79IMXAG.es-Bd0UlzOL.js → core-DfcpQGVP.es-DQggNOdX.js} +14 -14
  13. package/dist/app/assets/{index-C513UodQ.js → index-BUp81Qdm.js} +15 -15
  14. package/dist/app/assets/index-D1pdwrUW.js +1803 -0
  15. package/dist/app/assets/index-Dv5bF4Ii.js +451 -0
  16. package/dist/app/assets/{index.umd-BMeMPq_9.js → index.umd-CQH4LZU8.js} +1 -1
  17. package/dist/app/index.html +2 -3
  18. package/dist/default-publisher.config.json +23 -0
  19. package/dist/instrumentation.mjs +22 -3
  20. package/dist/server.mjs +1522 -651
  21. package/dist/service/schema_worker.mjs +61 -0
  22. package/package.json +11 -12
  23. package/publisher.config.example.bigquery.json +33 -0
  24. package/publisher.config.example.duckdb.json +23 -0
  25. package/publisher.config.json +1 -11
  26. package/src/config.spec.ts +306 -0
  27. package/src/config.ts +222 -2
  28. package/src/controller/compile.controller.ts +3 -1
  29. package/src/controller/connection.controller.ts +1 -1
  30. package/src/controller/model.controller.ts +8 -1
  31. package/src/controller/package.controller.ts +70 -29
  32. package/src/controller/query.controller.ts +3 -0
  33. package/src/default-publisher.config.json +23 -0
  34. package/src/errors.spec.ts +42 -0
  35. package/src/errors.ts +21 -0
  36. package/src/health.spec.ts +90 -0
  37. package/src/health.ts +73 -45
  38. package/src/instrumentation.ts +50 -0
  39. package/src/logger.ts +1 -3
  40. package/src/mcp/tools/discovery_tools.ts +6 -2
  41. package/src/mcp/tools/execute_query_tool.ts +12 -0
  42. package/src/path_safety.spec.ts +158 -0
  43. package/src/path_safety.ts +140 -0
  44. package/src/pg_helpers.spec.ts +226 -0
  45. package/src/pg_helpers.ts +129 -0
  46. package/src/server-old.ts +3 -23
  47. package/src/server.ts +54 -0
  48. package/src/service/connection.spec.ts +6 -4
  49. package/src/service/connection.ts +8 -3
  50. package/src/service/connection_config.ts +2 -2
  51. package/src/service/environment.ts +621 -176
  52. package/src/service/environment_admission.spec.ts +180 -0
  53. package/src/service/environment_store.ts +31 -0
  54. package/src/service/filter_integration.spec.ts +110 -0
  55. package/src/service/givens_integration.spec.ts +192 -0
  56. package/src/service/manifest_service.spec.ts +7 -2
  57. package/src/service/manifest_service.ts +8 -2
  58. package/src/service/materialization_service.ts +14 -3
  59. package/src/service/model.spec.ts +105 -0
  60. package/src/service/model.ts +91 -7
  61. package/src/service/package.spec.ts +11 -7
  62. package/src/service/package.ts +53 -56
  63. package/src/service/package_memory_governor.spec.ts +173 -0
  64. package/src/service/package_memory_governor.ts +233 -0
  65. package/src/service/package_race.spec.ts +208 -0
  66. package/src/service/process_stats_reporter.ts +169 -0
  67. package/src/service/schema_worker.ts +123 -0
  68. package/src/service/schema_worker_pool.ts +278 -0
  69. package/src/storage/StorageManager.ts +71 -11
  70. package/src/storage/duckdb/schema.ts +41 -0
  71. package/src/utils.ts +11 -0
  72. package/tests/harness/rest_e2e.ts +2 -2
  73. package/tests/integration/concurrent_environment/concurrent_environment.integration.spec.ts +235 -0
  74. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
  75. package/tests/integration/legacy_routes/legacy_routes.integration.spec.ts +259 -0
  76. package/tests/unit/duckdb/attached_databases.test.ts +5 -5
  77. package/tests/unit/duckdb/legacy_schema_migration.test.ts +194 -0
  78. package/tests/unit/storage/StorageManager.test.ts +166 -0
  79. package/dist/app/assets/EnvironmentPage-1j6QDWAy.js +0 -1
  80. package/dist/app/assets/HomePage-DMop21VG.js +0 -1
  81. package/dist/app/assets/MainPage-BbE8ETz1.js +0 -2
  82. package/dist/app/assets/ModelPage-D2jvfe3t.js +0 -1
  83. package/dist/app/assets/PackagePage-BbnhGoD3.js +0 -1
  84. package/dist/app/assets/RouteError-D3LGEZ3i.js +0 -1
  85. package/dist/app/assets/WorkbookPage-DttVIj4u.js +0 -1
  86. package/dist/app/assets/index-5K9YjIxF.js +0 -456
  87. package/dist/app/assets/index-DIgzgp69.js +0 -1742
@@ -0,0 +1,90 @@
1
+ import { afterEach, beforeEach, describe, expect, it, spyOn } from "bun:test";
2
+ import { Server } from "http";
3
+ import { performGracefulShutdownAfterDrain } from "./health";
4
+ import { logger } from "./logger";
5
+
6
+ // Regression test for the graceful-shutdown ordering bug that caused
7
+ // [winston] Attempt to write logs with no transports: {"message":"Waiting 50 seconds..."}
8
+ // to appear in production logs. logger.close() must run after every
9
+ // logger.* call, including the "Waiting ... seconds after server close
10
+ // before exit..." message.
11
+ //
12
+ // Tests call performGracefulShutdownAfterDrain directly rather than
13
+ // emitting SIGTERM, so module-level operationalState is not mutated
14
+ // and the spec stays isolated from sibling tests in the same process.
15
+ describe("performGracefulShutdownAfterDrain: shutdown ordering", () => {
16
+ const originalExit = process.exit;
17
+ let callOrder: string[];
18
+
19
+ beforeEach(() => {
20
+ callOrder = [];
21
+
22
+ spyOn(logger, "info").mockImplementation(((msg: string) => {
23
+ callOrder.push(`info:${msg}`);
24
+ return logger;
25
+ }) as never);
26
+ spyOn(logger, "close").mockImplementation((() => {
27
+ callOrder.push("close");
28
+ return logger;
29
+ }) as never);
30
+ // Silence warn/error calls so spec output stays clean. They are
31
+ // not load-bearing for these assertions.
32
+ spyOn(logger, "warn").mockImplementation((() => logger) as never);
33
+ spyOn(logger, "error").mockImplementation((() => logger) as never);
34
+
35
+ process.exit = ((_code?: number) => {
36
+ callOrder.push("exit");
37
+ }) as never;
38
+ });
39
+
40
+ afterEach(() => {
41
+ process.exit = originalExit;
42
+ });
43
+
44
+ const fakeServer = (): Server => ({ listening: false }) as unknown as Server;
45
+
46
+ it("logs the 'Waiting ...' message before closing the logger", async () => {
47
+ await performGracefulShutdownAfterDrain(fakeServer(), fakeServer(), 0.05);
48
+
49
+ const waitingIdx = callOrder.findIndex((entry) =>
50
+ entry.startsWith("info:Waiting"),
51
+ );
52
+ const closeIdx = callOrder.indexOf("close");
53
+ const exitIdx = callOrder.indexOf("exit");
54
+
55
+ expect(waitingIdx).toBeGreaterThanOrEqual(0);
56
+ expect(closeIdx).toBeGreaterThanOrEqual(0);
57
+ expect(exitIdx).toBeGreaterThanOrEqual(0);
58
+ expect(waitingIdx).toBeLessThan(closeIdx);
59
+ expect(closeIdx).toBeLessThan(exitIdx);
60
+ });
61
+
62
+ it("emits no logger.info calls after logger.close", async () => {
63
+ await performGracefulShutdownAfterDrain(fakeServer(), fakeServer(), 0.05);
64
+
65
+ const closeIdx = callOrder.indexOf("close");
66
+ const lateInfoIdx = callOrder.findIndex(
67
+ (entry, idx) => idx > closeIdx && entry.startsWith("info:"),
68
+ );
69
+ expect(closeIdx).toBeGreaterThanOrEqual(0);
70
+ expect(lateInfoIdx).toBe(-1);
71
+ });
72
+
73
+ it("closes the logger exactly once", async () => {
74
+ await performGracefulShutdownAfterDrain(fakeServer(), fakeServer(), 0.05);
75
+
76
+ const closes = callOrder.filter((entry) => entry === "close").length;
77
+ expect(closes).toBe(1);
78
+ });
79
+
80
+ it("skips the 'Waiting ...' message when gracefulCloseTimeoutSeconds is 0", async () => {
81
+ await performGracefulShutdownAfterDrain(fakeServer(), fakeServer(), 0);
82
+
83
+ const waitingCalls = callOrder.filter((entry) =>
84
+ entry.startsWith("info:Waiting"),
85
+ );
86
+ expect(waitingCalls.length).toBe(0);
87
+ expect(callOrder.indexOf("close")).toBeGreaterThanOrEqual(0);
88
+ expect(callOrder.indexOf("exit")).toBeGreaterThanOrEqual(0);
89
+ });
90
+ });
package/src/health.ts CHANGED
@@ -57,8 +57,8 @@ export function markNotReady(): void {
57
57
  * 2. Waits shutdownDrainDurationSeconds to allow in-flight requests to complete
58
58
  * 3. Sets preGracefulShutdownCompleted flag (enables drainingGuard middleware to reject new requests)
59
59
  * 4. Closes main server and MCP server (stops accepting new connections)
60
- * 5. Closes logger
61
- * 6. Waits shutdownGracefulCloseTimeoutSeconds (if > 0) for final cleanup
60
+ * 5. Waits shutdownGracefulCloseTimeoutSeconds (if > 0) for final cleanup
61
+ * 6. Closes logger (last, so any logs emitted during cleanup are flushed)
62
62
  * 7. Exits process
63
63
  *
64
64
  * Note: drainingGuard only rejects requests after step 3 completes. During step 2,
@@ -92,51 +92,79 @@ export function registerSignalHandlers(
92
92
  }, shutdownDrainDurationSeconds * 1000),
93
93
  );
94
94
 
95
- const closeServer = (server: Server, name: string) =>
96
- new Promise<void>((resolve) => {
97
- if (server && server.listening) {
98
- server.close((err) => {
99
- if (err) {
100
- logger.error(`${name} close error:`, err);
101
- } else {
102
- logger.info(`${name} closed`);
103
- }
104
- resolve();
105
- });
106
- } else {
107
- resolve();
108
- }
109
- });
110
-
111
- await Promise.all([
112
- closeServer(server, "Main server"),
113
- closeServer(mcpServer, "MCP server"),
114
- ]);
115
-
116
- try {
117
- await shutdownSDK();
118
- logger.info("OpenTelemetry SDK shut down");
119
- } catch (_error) {
120
- /* do nothing */
121
- }
122
-
123
- try {
124
- logger.close();
125
- } catch (_error) {
126
- /* do nothing */
127
- }
128
-
129
- if (shutdownGracefulCloseTimeoutSeconds > 0) {
130
- logger.info(
131
- `Waiting ${shutdownGracefulCloseTimeoutSeconds} seconds after server close before exit...`,
132
- );
133
- await new Promise((resolve) =>
134
- setTimeout(resolve, shutdownGracefulCloseTimeoutSeconds * 1000),
135
- );
136
- }
137
- process.exit(0);
95
+ await performGracefulShutdownAfterDrain(
96
+ server,
97
+ mcpServer,
98
+ shutdownGracefulCloseTimeoutSeconds,
99
+ );
138
100
  });
139
101
  }
102
+
103
+ /**
104
+ * Performs the post-drain shutdown work: closes both HTTP servers,
105
+ * shuts down the OpenTelemetry SDK, waits the optional graceful-close
106
+ * window so any in-flight cleanup can finish logging, closes the
107
+ * winston logger, and exits the process.
108
+ *
109
+ * Exported so unit tests can exercise the close + log + exit ordering
110
+ * without emitting SIGTERM (which would leave module-level
111
+ * operationalState stuck in "draining" and leak into sibling specs).
112
+ */
113
+ export async function performGracefulShutdownAfterDrain(
114
+ server: Server,
115
+ mcpServer: Server,
116
+ shutdownGracefulCloseTimeoutSeconds: number,
117
+ ): Promise<void> {
118
+ const closeServer = (server: Server, name: string) =>
119
+ new Promise<void>((resolve) => {
120
+ if (server && server.listening) {
121
+ server.close((err) => {
122
+ if (err) {
123
+ logger.error(`${name} close error:`, err);
124
+ } else {
125
+ logger.info(`${name} closed`);
126
+ }
127
+ resolve();
128
+ });
129
+ } else {
130
+ resolve();
131
+ }
132
+ });
133
+
134
+ await Promise.all([
135
+ closeServer(server, "Main server"),
136
+ closeServer(mcpServer, "MCP server"),
137
+ ]);
138
+
139
+ try {
140
+ await shutdownSDK();
141
+ logger.info("OpenTelemetry SDK shut down");
142
+ } catch (_error) {
143
+ /* do nothing */
144
+ }
145
+
146
+ if (shutdownGracefulCloseTimeoutSeconds > 0) {
147
+ logger.info(
148
+ `Waiting ${shutdownGracefulCloseTimeoutSeconds} seconds after server close before exit...`,
149
+ );
150
+ await new Promise((resolve) =>
151
+ setTimeout(resolve, shutdownGracefulCloseTimeoutSeconds * 1000),
152
+ );
153
+ }
154
+
155
+ // Close the logger last so anything emitted during the wait window
156
+ // above (or by other shutdown paths still running) reaches its
157
+ // transports. Closing earlier triggers winston's
158
+ // "Attempt to write logs with no transports" warning on any
159
+ // subsequent logger call.
160
+ try {
161
+ logger.close();
162
+ } catch (_error) {
163
+ /* do nothing */
164
+ }
165
+
166
+ process.exit(0);
167
+ }
140
168
  /**
141
169
  * Middleware that returns 503 for non-health and metrics requests when service is draining.
142
170
  * Must be registered before application routes.
@@ -1,3 +1,4 @@
1
+ import { monitorEventLoopDelay } from "node:perf_hooks";
1
2
  import { metrics } from "@opentelemetry/api";
2
3
  import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
3
4
  import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-proto";
@@ -116,6 +117,55 @@ const httpRequestCount = meter.createCounter("http_server_requests_total", {
116
117
  description: "Total number of HTTP requests",
117
118
  });
118
119
 
120
+ // Event-loop-delay metrics. A blocked event loop is the only way the
121
+ // /health/liveness probe (a pure synchronous 200 handler) can fail under K8s,
122
+ // so we surface p50/p99/max so an operator can correlate liveness restarts
123
+ // with sustained event-loop pressure (large Malloy compiles, GC, etc.).
124
+ const eventLoopHistogram = monitorEventLoopDelay({ resolution: 20 });
125
+ eventLoopHistogram.enable();
126
+
127
+ const eventLoopLagP50 = meter.createObservableGauge(
128
+ "publisher_event_loop_lag_p50_ms",
129
+ {
130
+ description:
131
+ "Event loop delay p50 since the last scrape, in milliseconds",
132
+ unit: "ms",
133
+ },
134
+ );
135
+ const eventLoopLagP99 = meter.createObservableGauge(
136
+ "publisher_event_loop_lag_p99_ms",
137
+ {
138
+ description:
139
+ "Event loop delay p99 since the last scrape, in milliseconds",
140
+ unit: "ms",
141
+ },
142
+ );
143
+ const eventLoopLagMax = meter.createObservableGauge(
144
+ "publisher_event_loop_lag_max_ms",
145
+ {
146
+ description:
147
+ "Event loop delay max since the last scrape, in milliseconds",
148
+ unit: "ms",
149
+ },
150
+ );
151
+
152
+ // Sample all three in one batch so the histogram reset can't race the reads.
153
+ meter.addBatchObservableCallback(
154
+ (observableResult) => {
155
+ observableResult.observe(
156
+ eventLoopLagP50,
157
+ eventLoopHistogram.percentile(50) / 1e6,
158
+ );
159
+ observableResult.observe(
160
+ eventLoopLagP99,
161
+ eventLoopHistogram.percentile(99) / 1e6,
162
+ );
163
+ observableResult.observe(eventLoopLagMax, eventLoopHistogram.max / 1e6);
164
+ eventLoopHistogram.reset();
165
+ },
166
+ [eventLoopLagP50, eventLoopLagP99, eventLoopLagMax],
167
+ );
168
+
119
169
  const IGNORED_PATHS = new Set([
120
170
  "/health",
121
171
  "/health/liveness",
package/src/logger.ts CHANGED
@@ -28,9 +28,7 @@ export const logger = winston.createLogger({
28
28
  ? winston.format.combine(
29
29
  winston.format.uncolorize(),
30
30
  winston.format.timestamp(),
31
- winston.format.metadata({
32
- fillExcept: ["message", "level", "timestamp"],
33
- }),
31
+ winston.format.errors({ stack: true }),
34
32
  winston.format.json(),
35
33
  )
36
34
  : winston.format.combine(
@@ -222,8 +222,12 @@ export function registerTools(
222
222
  throw new Error(`Model not found: ${modelPath}`);
223
223
  }
224
224
 
225
- // Use the new getModelFileText method
226
- const fileText = await pkg.getModelFileText(modelPath);
225
+ // Route through the Environment so the disk read is serialized
226
+ // against installPackage / deletePackage.
227
+ const fileText = await environment.getModelFileText(
228
+ packageName,
229
+ modelPath,
230
+ );
227
231
 
228
232
  console.log(
229
233
  `[MCP LOG] Successfully retrieved model text for ${modelPath}`,
@@ -1,6 +1,7 @@
1
1
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
2
  import { ErrorCode, McpError } from "@modelcontextprotocol/sdk/types.js";
3
3
  import { z } from "zod";
4
+ import type { GivenValue } from "@malloydata/malloy";
4
5
  import { logger } from "../../logger";
5
6
  import { EnvironmentStore } from "../../service/environment_store";
6
7
  import { getMalloyErrorDetails, type ErrorDetails } from "../error_messages";
@@ -30,6 +31,12 @@ const executeQueryShape = {
30
31
  .describe(
31
32
  "Filter parameter values keyed by filter name. Used with sources that declare #(filter) annotations.",
32
33
  ),
34
+ givens: z
35
+ .record(z.unknown())
36
+ .optional()
37
+ .describe(
38
+ "Per-query given values that override model defaults. Keys are given names declared in the model's given: block.",
39
+ ),
33
40
  };
34
41
 
35
42
  // Type inference is handled automatically by the MCP server based on the executeQueryShape
@@ -56,6 +63,7 @@ export function registerExecuteQueryTool(
56
63
  sourceName,
57
64
  queryName,
58
65
  filterParams,
66
+ givens,
59
67
  } = params;
60
68
 
61
69
  logger.info("[MCP Tool executeQuery] Received params:", { params });
@@ -128,6 +136,8 @@ export function registerExecuteQueryTool(
128
136
  undefined,
129
137
  query,
130
138
  filterParams,
139
+ undefined,
140
+ givens as Record<string, GivenValue> | undefined,
131
141
  );
132
142
  const { validateRenderTags } = await import(
133
143
  "@malloydata/render-validator"
@@ -174,6 +184,8 @@ export function registerExecuteQueryTool(
174
184
  queryName,
175
185
  undefined,
176
186
  filterParams,
187
+ undefined,
188
+ givens as Record<string, GivenValue> | undefined,
177
189
  );
178
190
  const { validateRenderTags } = await import(
179
191
  "@malloydata/render-validator"
@@ -0,0 +1,158 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import * as path from "path";
3
+
4
+ import { BadRequestError } from "./errors";
5
+ import {
6
+ assertSafeEnvironmentPath,
7
+ assertSafePackageName,
8
+ assertSafeRelativeModelPath,
9
+ safeJoinUnderRoot,
10
+ } from "./path_safety";
11
+
12
+ describe("assertSafePackageName", () => {
13
+ it.each([
14
+ "pkg",
15
+ "test_package",
16
+ "test-package",
17
+ "TestPackage1",
18
+ "test.package.name",
19
+ "a",
20
+ "x".repeat(255),
21
+ ])("accepts %p", (name) => {
22
+ expect(() => assertSafePackageName(name)).not.toThrow();
23
+ });
24
+
25
+ it.each([
26
+ ["empty", ""],
27
+ ["dot", "."],
28
+ ["dot-dot", ".."],
29
+ ["leading dot", ".staging"],
30
+ ["forward slash", "foo/bar"],
31
+ ["backslash", "foo\\bar"],
32
+ ["null byte", "foo\0bar"],
33
+ ["traversal", "../etc/passwd"],
34
+ ["abs", "/etc/passwd"],
35
+ ["space", "my pkg"],
36
+ ["unicode", "pkg\u202E"],
37
+ ["too long", "x".repeat(256)],
38
+ ])("rejects %s (%p)", (_label, name) => {
39
+ expect(() => assertSafePackageName(name)).toThrow(BadRequestError);
40
+ });
41
+
42
+ it.each([
43
+ ["number", 42],
44
+ ["null", null],
45
+ ["undefined", undefined],
46
+ ["object", { name: "pkg" }],
47
+ ])("rejects non-string %s (%p)", (_label, value) => {
48
+ expect(() => assertSafePackageName(value)).toThrow(BadRequestError);
49
+ });
50
+ });
51
+
52
+ describe("assertSafeRelativeModelPath", () => {
53
+ it.each([
54
+ "model.malloy",
55
+ "models/foo.malloy",
56
+ "a/b/c/d.malloynb",
57
+ "deep/nested/file_name-1.malloy",
58
+ ])("accepts %p", (modelPath) => {
59
+ expect(() => assertSafeRelativeModelPath(modelPath)).not.toThrow();
60
+ });
61
+
62
+ it.each([
63
+ ["empty", ""],
64
+ ["leading slash (absolute)", "/etc/passwd"],
65
+ ["traversal", "../etc/passwd"],
66
+ ["embedded traversal", "models/../../../etc/passwd"],
67
+ ["embedded dot segment", "models/./foo.malloy"],
68
+ ["double slash", "models//foo.malloy"],
69
+ ["trailing slash", "models/foo/"],
70
+ ["backslash", "models\\foo.malloy"],
71
+ ["null byte", "models/foo\0.malloy"],
72
+ ["dotfile segment", ".staging/foo.malloy"],
73
+ ["dotfile leaf", "models/.hidden.malloy"],
74
+ ])("rejects %s (%p)", (_label, modelPath) => {
75
+ expect(() => assertSafeRelativeModelPath(modelPath)).toThrow(
76
+ BadRequestError,
77
+ );
78
+ });
79
+
80
+ it("rejects non-string inputs", () => {
81
+ expect(() => assertSafeRelativeModelPath(undefined)).toThrow(
82
+ BadRequestError,
83
+ );
84
+ expect(() => assertSafeRelativeModelPath(123)).toThrow(BadRequestError);
85
+ });
86
+ });
87
+
88
+ describe("assertSafeEnvironmentPath", () => {
89
+ it.each([
90
+ "/etc/publisher",
91
+ "/var/lib/publisher/env1",
92
+ "/Users/me/data",
93
+ "/a",
94
+ "C:\\Users\\me\\publisher",
95
+ "C:/Users/me/publisher",
96
+ ])("accepts %p", (p) => {
97
+ expect(() => assertSafeEnvironmentPath(p)).not.toThrow();
98
+ });
99
+
100
+ it.each([
101
+ ["empty", ""],
102
+ ["relative", "publisher/data"],
103
+ ["traversal in middle", "/var/lib/../../etc/passwd"],
104
+ ["traversal at end", "/var/lib/publisher/.."],
105
+ ["null byte", "/var/lib/publisher\0"],
106
+ ["bare dot-dot", ".."],
107
+ ["bare dot", "."],
108
+ ["too long", "/" + "a".repeat(5000)],
109
+ ])("rejects %s (%p)", (_label, p) => {
110
+ expect(() => assertSafeEnvironmentPath(p)).toThrow(BadRequestError);
111
+ });
112
+
113
+ it("rejects non-string inputs", () => {
114
+ expect(() => assertSafeEnvironmentPath(undefined)).toThrow(
115
+ BadRequestError,
116
+ );
117
+ expect(() => assertSafeEnvironmentPath(null)).toThrow(BadRequestError);
118
+ expect(() => assertSafeEnvironmentPath(42)).toThrow(BadRequestError);
119
+ });
120
+ });
121
+
122
+ describe("safeJoinUnderRoot", () => {
123
+ const root = "/tmp/test-root";
124
+
125
+ it("returns the resolved root when joined with no segments", () => {
126
+ expect(safeJoinUnderRoot(root)).toBe(path.resolve(root));
127
+ });
128
+
129
+ it("joins safe segments into a path under root", () => {
130
+ expect(safeJoinUnderRoot(root, "pkg", "model.malloy")).toBe(
131
+ path.resolve(root, "pkg", "model.malloy"),
132
+ );
133
+ });
134
+
135
+ it("throws when traversal escapes the root", () => {
136
+ expect(() => safeJoinUnderRoot(root, "..")).toThrow(BadRequestError);
137
+ expect(() => safeJoinUnderRoot(root, "..", "etc", "passwd")).toThrow(
138
+ BadRequestError,
139
+ );
140
+ expect(() => safeJoinUnderRoot(root, "pkg", "..", "..", "etc")).toThrow(
141
+ BadRequestError,
142
+ );
143
+ });
144
+
145
+ it("throws when an absolute segment overrides the root", () => {
146
+ expect(() => safeJoinUnderRoot(root, "/etc/passwd")).toThrow(
147
+ BadRequestError,
148
+ );
149
+ });
150
+
151
+ it("does NOT match a sibling directory with the same prefix", () => {
152
+ // path.resolve("/tmp/test-root", "../test-root-bad") -> "/tmp/test-root-bad"
153
+ // which starts with "/tmp/test-root" textually but is NOT a child.
154
+ expect(() => safeJoinUnderRoot(root, "..", "test-root-bad")).toThrow(
155
+ BadRequestError,
156
+ );
157
+ });
158
+ });
@@ -0,0 +1,140 @@
1
+ import * as path from "path";
2
+
3
+ import { BadRequestError } from "./errors";
4
+
5
+ /**
6
+ * Path-safety helpers used by `Environment` (and any other service that
7
+ * builds an on-disk path from request data) to defend against directory
8
+ * traversal. The intent is two-fold:
9
+ *
10
+ * 1. **Source-side allowlist**: `assertSafePackageName` /
11
+ * `assertSafeRelativeModelPath` reject hostile inputs (`..`, leading
12
+ * `/`, `\`, NUL, dotfiles) at the entry of every public service
13
+ * method before any path-construction happens. These throw
14
+ * `BadRequestError` so the controller layer's error mapper returns
15
+ * HTTP 400.
16
+ *
17
+ * 2. **Sink-side containment**: `safeJoinUnderRoot` joins, resolves,
18
+ * and verifies the result is strictly within the supplied root.
19
+ * Even if a future caller forgets the source-side check, the sink
20
+ * refuses to hand back an escaping path. This is the standard
21
+ * "resolve-and-contain" pattern that CodeQL's `js/path-injection`
22
+ * query recognises as a sanitizer.
23
+ */
24
+
25
+ // Single path segment: ASCII letters, digits, `-`, `_`, `.`. No leading
26
+ // `.` so internal sibling dirs (`.staging`, `.retired`) and editor /
27
+ // VCS dirs can't be addressed by name from outside.
28
+ const SAFE_NAME_RE = /^(?!\.\.?$)(?!\.)[A-Za-z0-9._-]{1,255}$/;
29
+
30
+ const MAX_MODEL_PATH_LEN = 1024;
31
+
32
+ // An environment path is server-controlled (config / disk-derived), but
33
+ // CodeQL conservatively treats it as tainted because Express handlers on
34
+ // the same class touch user input. The combined regex test +
35
+ // `..` / NUL / length check at the constructor gate is the sanitizer
36
+ // barrier the `js/path-injection` query recognises. Printable ASCII
37
+ // only; absolute POSIX-or-Windows path; no `..`, no NUL.
38
+ const SAFE_ENVIRONMENT_PATH_RE = /^(?:\/|[A-Za-z]:[\\/])[\x20-\x7E]*$/;
39
+ const MAX_ENVIRONMENT_PATH_LEN = 4096;
40
+
41
+ /**
42
+ * Reject anything that isn't a plausible single-segment package name.
43
+ * The allowlist is deliberately conservative — every existing test and
44
+ * production package name we've seen fits within it, and tightening
45
+ * here costs nothing.
46
+ */
47
+ export function assertSafePackageName(packageName: unknown): void {
48
+ if (typeof packageName !== "string" || !SAFE_NAME_RE.test(packageName)) {
49
+ throw new BadRequestError(
50
+ `Invalid package name: must be 1-255 characters of letters, digits, "-", "_", or "." and must not start with "."`,
51
+ );
52
+ }
53
+ }
54
+
55
+ /**
56
+ * Reject anything that isn't a plausible *relative* path to a model
57
+ * file inside a package directory. Forward slashes are allowed (models
58
+ * live in subdirectories like `models/foo.malloy`); backslashes,
59
+ * absolute paths, NUL bytes, and `..` / `.` segments are not.
60
+ */
61
+ export function assertSafeRelativeModelPath(modelPath: unknown): void {
62
+ if (
63
+ typeof modelPath !== "string" ||
64
+ modelPath.length === 0 ||
65
+ modelPath.length > MAX_MODEL_PATH_LEN ||
66
+ modelPath.includes("\0") ||
67
+ modelPath.includes("\\") ||
68
+ path.isAbsolute(modelPath) ||
69
+ modelPath.startsWith("/")
70
+ ) {
71
+ throw new BadRequestError(`Invalid model path`);
72
+ }
73
+
74
+ const segments = modelPath.split("/");
75
+ for (const segment of segments) {
76
+ if (segment === "" || segment === "." || segment === "..") {
77
+ throw new BadRequestError(`Invalid model path`);
78
+ }
79
+ if (segment.startsWith(".")) {
80
+ throw new BadRequestError(`Invalid model path`);
81
+ }
82
+ }
83
+ }
84
+
85
+ /**
86
+ * Reject anything that doesn't look like a server-controlled absolute
87
+ * filesystem path. Applied to `environmentPath` at the constructor
88
+ * gate so all downstream `path.join(this.environmentPath, …)` sites
89
+ * see a value that has cleared an allowlist check — the canonical
90
+ * sanitizer-barrier pattern CodeQL's `js/path-injection` query
91
+ * recognises.
92
+ */
93
+ export function assertSafeEnvironmentPath(environmentPath: unknown): void {
94
+ if (typeof environmentPath !== "string") {
95
+ throw new BadRequestError(`Invalid environment path: must be a string`);
96
+ }
97
+ if (
98
+ environmentPath.length === 0 ||
99
+ environmentPath.length > MAX_ENVIRONMENT_PATH_LEN
100
+ ) {
101
+ throw new BadRequestError(`Invalid environment path: bad length`);
102
+ }
103
+ if (environmentPath.indexOf("\0") !== -1) {
104
+ throw new BadRequestError(`Invalid environment path: contains NUL byte`);
105
+ }
106
+ // Sanitizer barrier in the shape `x.indexOf("..") !== -1` that the
107
+ // CodeQL `js/path-injection` query recognises as a traversal guard.
108
+ if (environmentPath.indexOf("..") !== -1) {
109
+ throw new BadRequestError(
110
+ `Invalid environment path: contains ".." traversal segment`,
111
+ );
112
+ }
113
+ if (!SAFE_ENVIRONMENT_PATH_RE.test(environmentPath)) {
114
+ throw new BadRequestError(
115
+ `Invalid environment path: must be an absolute path of printable ASCII characters`,
116
+ );
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Resolve `path.join(root, ...segments)` and verify the result lives
122
+ * strictly inside `root` (or is `root` itself). Throws
123
+ * `BadRequestError` if the resolved path escapes the root via `..`,
124
+ * absolute segments, or symlink-style trickery in the input.
125
+ *
126
+ * Callers should still run `assertSafePackageName` / similar on
127
+ * user-controlled segments first — this helper is the second line of
128
+ * defense, not the first.
129
+ */
130
+ export function safeJoinUnderRoot(root: string, ...segments: string[]): string {
131
+ const resolvedRoot = path.resolve(root);
132
+ const joined = path.resolve(resolvedRoot, ...segments);
133
+ const rootWithSep = resolvedRoot.endsWith(path.sep)
134
+ ? resolvedRoot
135
+ : resolvedRoot + path.sep;
136
+ if (joined !== resolvedRoot && !joined.startsWith(rootWithSep)) {
137
+ throw new BadRequestError(`Resolved path is outside of root`);
138
+ }
139
+ return joined;
140
+ }