@malloy-publisher/server 0.0.199 → 0.0.201

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/app/api-doc.yaml +110 -118
  2. package/dist/app/assets/{EnvironmentPage-Dpee_Kn6.js → EnvironmentPage-KoP4wt8H.js} +1 -1
  3. package/dist/app/assets/HomePage-HbPwKL84.js +1 -0
  4. package/dist/app/assets/MainPage-DfK4zDYO.js +2 -0
  5. package/dist/app/assets/{ModelPage-AwAugZ37.js → ModelPage-CUgSwGXg.js} +1 -1
  6. package/dist/app/assets/{PackagePage-XQ-EWGTC.js → PackagePage-CUDQNL5k.js} +1 -1
  7. package/dist/app/assets/{RouteError-3Mv8JQw7.js → RouteError-sgmtBdg8.js} +1 -1
  8. package/dist/app/assets/{WorkbookPage-DHYYpcYc.js → WorkbookPage-tnWmLcrW.js} +1 -1
  9. package/dist/app/assets/{core-DfcpQGVP.es-DQggNOdX.js → core-B3IQNPBD.es-foBNuT8L.js} +10 -10
  10. package/dist/app/assets/{index-D1pdwrUW.js → index-B5We8x8r.js} +1 -1
  11. package/dist/app/assets/{index-BUp81Qdm.js → index-KIvi9k3F.js} +1 -1
  12. package/dist/app/assets/index-PNYovl3E.js +452 -0
  13. package/dist/app/assets/{index.umd-CQH4LZU8.js → index.umd-BXcsl2XW.js} +1 -1
  14. package/dist/app/index.html +1 -1
  15. package/dist/package_load_worker.mjs +1 -1
  16. package/dist/server.mjs +1556 -1018
  17. package/package.json +1 -1
  18. package/publisher.config.json +4 -0
  19. package/src/config.spec.ts +246 -0
  20. package/src/config.ts +121 -1
  21. package/src/constants.ts +84 -1
  22. package/src/controller/connection.controller.spec.ts +803 -0
  23. package/src/controller/connection.controller.ts +207 -20
  24. package/src/controller/model.controller.ts +16 -5
  25. package/src/controller/query.controller.ts +20 -7
  26. package/src/controller/watch-mode.controller.ts +11 -2
  27. package/src/errors.spec.ts +44 -0
  28. package/src/errors.ts +34 -0
  29. package/src/filter_deprecation.spec.ts +64 -0
  30. package/src/filter_deprecation.ts +42 -0
  31. package/src/heap_check.spec.ts +144 -0
  32. package/src/heap_check.ts +144 -0
  33. package/src/mcp/handler_utils.ts +14 -0
  34. package/src/mcp/tools/execute_query_tool.ts +44 -14
  35. package/src/oom_guards.integration.spec.ts +261 -0
  36. package/src/path_safety.ts +9 -3
  37. package/src/query_cap_metrics.spec.ts +89 -0
  38. package/src/query_cap_metrics.ts +115 -0
  39. package/src/query_concurrency.spec.ts +247 -0
  40. package/src/query_concurrency.ts +236 -0
  41. package/src/query_timeout.spec.ts +224 -0
  42. package/src/query_timeout.ts +178 -0
  43. package/src/server-old.ts +20 -0
  44. package/src/server.ts +57 -72
  45. package/src/service/connection.spec.ts +244 -0
  46. package/src/service/connection.ts +14 -4
  47. package/src/service/environment.ts +124 -4
  48. package/src/service/environment_admission.spec.ts +165 -1
  49. package/src/service/environment_store.spec.ts +103 -0
  50. package/src/service/environment_store.ts +74 -23
  51. package/src/service/filter_integration.spec.ts +69 -0
  52. package/src/service/model.spec.ts +193 -3
  53. package/src/service/model.ts +95 -14
  54. package/src/service/model_limits.spec.ts +181 -0
  55. package/src/service/model_limits.ts +110 -0
  56. package/src/service/package.spec.ts +2 -6
  57. package/src/service/package.ts +6 -1
  58. package/src/service/path_injection.spec.ts +39 -0
  59. package/src/stream_helpers.spec.ts +280 -0
  60. package/src/stream_helpers.ts +162 -0
  61. package/src/test_helpers/metrics_harness.ts +126 -0
  62. package/dist/app/assets/HomePage-DLRWTNoL.js +0 -1
  63. package/dist/app/assets/MainPage-DsVt5QGM.js +0 -2
  64. package/dist/app/assets/index-Dv5bF4Ii.js +0 -451
package/src/server-old.ts CHANGED
@@ -41,6 +41,7 @@ import {
41
41
  NotImplementedError,
42
42
  } from "./errors";
43
43
  import { logger } from "./logger";
44
+ import { queryConcurrency } from "./query_concurrency";
44
45
  import { normalizeQueryArray } from "./query_param_utils";
45
46
  import { EnvironmentStore } from "./service/environment_store";
46
47
 
@@ -459,8 +460,18 @@ export function registerLegacyRoutes(
459
460
 
460
461
  // queryData (deprecated GET) + sqlQuery (supported POST), per-project +
461
462
  // per-package
463
+ // Legacy `/projects/...` query routes keep the GET `queryData`
464
+ // endpoints (unlike the modern `/environments/...` surface, which
465
+ // removed them) so existing SDK clients are not broken. The
466
+ // missing protection is concurrency: without `queryConcurrency()`
467
+ // a flood of legacy clients can saturate the pod even while the
468
+ // modern routes are properly gated. Apply the same per-pod cap
469
+ // here so the legacy surface respects PUBLISHER_MAX_CONCURRENT_QUERIES.
470
+ // Admission, timeout, and row/byte caps are already enforced by
471
+ // the shared controllers downstream.
462
472
  app.get(
463
473
  `${LEGACY_API_PREFIX}/projects/:projectName/connections/:connectionName/queryData`,
474
+ queryConcurrency(),
464
475
  async (req, res) => {
465
476
  try {
466
477
  res.status(200).json(
@@ -481,6 +492,7 @@ export function registerLegacyRoutes(
481
492
 
482
493
  app.get(
483
494
  `${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/connections/:connectionName/queryData`,
495
+ queryConcurrency(),
484
496
  async (req, res) => {
485
497
  try {
486
498
  res.status(200).json(
@@ -502,6 +514,7 @@ export function registerLegacyRoutes(
502
514
 
503
515
  app.post(
504
516
  `${LEGACY_API_PREFIX}/projects/:projectName/connections/:connectionName/sqlQuery`,
517
+ queryConcurrency(),
505
518
  async (req, res) => {
506
519
  try {
507
520
  let options: string | ParsedQs | (string | ParsedQs)[] | undefined;
@@ -528,6 +541,7 @@ export function registerLegacyRoutes(
528
541
 
529
542
  app.post(
530
543
  `${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/connections/:connectionName/sqlQuery`,
544
+ queryConcurrency(),
531
545
  async (req, res) => {
532
546
  try {
533
547
  let options: string | ParsedQs | (string | ParsedQs)[] | undefined;
@@ -556,6 +570,7 @@ export function registerLegacyRoutes(
556
570
  // temporaryTable (deprecated GET) + sqlTemporaryTable (supported POST)
557
571
  app.get(
558
572
  `${LEGACY_API_PREFIX}/projects/:projectName/connections/:connectionName/temporaryTable`,
573
+ queryConcurrency(),
559
574
  async (req, res) => {
560
575
  try {
561
576
  res.status(200).json(
@@ -575,6 +590,7 @@ export function registerLegacyRoutes(
575
590
 
576
591
  app.get(
577
592
  `${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/connections/:connectionName/temporaryTable`,
593
+ queryConcurrency(),
578
594
  async (req, res) => {
579
595
  try {
580
596
  res.status(200).json(
@@ -595,6 +611,7 @@ export function registerLegacyRoutes(
595
611
 
596
612
  app.post(
597
613
  `${LEGACY_API_PREFIX}/projects/:projectName/connections/:connectionName/sqlTemporaryTable`,
614
+ queryConcurrency(),
598
615
  async (req, res) => {
599
616
  try {
600
617
  res.status(200).json(
@@ -614,6 +631,7 @@ export function registerLegacyRoutes(
614
631
 
615
632
  app.post(
616
633
  `${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/connections/:connectionName/sqlTemporaryTable`,
634
+ queryConcurrency(),
617
635
  async (req, res) => {
618
636
  try {
619
637
  res.status(200).json(
@@ -780,6 +798,7 @@ export function registerLegacyRoutes(
780
798
 
781
799
  app.post(
782
800
  `${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/models/*?/query`,
801
+ queryConcurrency(),
783
802
  async (req, res) => {
784
803
  if (req.body.versionId) {
785
804
  setVersionIdError(res);
@@ -856,6 +875,7 @@ export function registerLegacyRoutes(
856
875
  // Cell execution route comes BEFORE the general getNotebook wildcard
857
876
  app.get(
858
877
  `${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/notebooks/*/cells/:cellIndex`,
878
+ queryConcurrency(),
859
879
  async (req, res) => {
860
880
  if (req.query.versionId) {
861
881
  setVersionIdError(res);
package/src/server.ts CHANGED
@@ -35,6 +35,9 @@ import {
35
35
  import { logger, loggerMiddleware } from "./logger";
36
36
 
37
37
  import { getMemoryGovernorConfig } from "./config";
38
+ import { setFilterDeprecationHeaders } from "./filter_deprecation";
39
+ import { checkHeapConfiguration } from "./heap_check";
40
+ import { queryConcurrency } from "./query_concurrency";
38
41
  import { ManifestController } from "./controller/manifest.controller";
39
42
  import { MaterializationController } from "./controller/materialization.controller";
40
43
  import { initializeMcpServer } from "./mcp/server";
@@ -154,6 +157,12 @@ const isDevelopment = process.env["NODE_ENV"] === "development";
154
157
  export const app = express();
155
158
  app.use(loggerMiddleware);
156
159
  app.use(httpMetricsMiddleware);
160
+ // Probe the V8 heap ceiling once at startup and warn if it's below
161
+ // the recommended floor. The row/byte caps from Steps 1–3 still
162
+ // bound per-request memory; this is a "your --max-old-space-size
163
+ // looks low for the default caps" advisory so operators don't
164
+ // chase OOMKills before checking the obvious config.
165
+ checkHeapConfiguration();
157
166
  const environmentStore = new EnvironmentStore(SERVER_ROOT);
158
167
  const manifestService = new ManifestService(environmentStore);
159
168
  const watchModeController = new WatchModeController(environmentStore);
@@ -713,55 +722,18 @@ app.post(
713
722
  },
714
723
  );
715
724
 
716
- /**
717
- * @deprecated Use /environments/:environmentName/connections/:connectionName/sqlQuery POST method instead
718
- */
719
- app.get(
720
- `${API_PREFIX}/environments/:environmentName/connections/:connectionName/queryData`,
721
- async (req, res) => {
722
- try {
723
- res.status(200).json(
724
- await connectionController.getConnectionQueryData(
725
- req.params.environmentName,
726
- req.params.connectionName,
727
- req.query.sqlStatement as string,
728
- req.query.options as string,
729
- ),
730
- );
731
- } catch (error) {
732
- logger.error(error);
733
- const { json, status } = internalErrorToHttpError(error as Error);
734
- res.status(status).json(json);
735
- }
736
- },
737
- );
738
-
739
- /**
740
- * @deprecated Use /environments/:environmentName/packages/:packageName/connections/:connectionName/sqlQuery
741
- */
742
- app.get(
743
- `${API_PREFIX}/environments/:environmentName/packages/:packageName/connections/:connectionName/queryData`,
744
- async (req, res) => {
745
- try {
746
- res.status(200).json(
747
- await connectionController.getConnectionQueryData(
748
- req.params.environmentName,
749
- req.params.connectionName,
750
- req.query.sqlStatement as string,
751
- req.query.options as string,
752
- req.params.packageName,
753
- ),
754
- );
755
- } catch (error) {
756
- logger.error(error);
757
- const { json, status } = internalErrorToHttpError(error as Error);
758
- res.status(status).json(json);
759
- }
760
- },
761
- );
762
-
725
+ // NOTE: The deprecated `GET …/connections/:connectionName/queryData`
726
+ // and `GET …/packages/:packageName/connections/:connectionName/queryData`
727
+ // routes were removed in the operational-guards changeset.
728
+ // They had been marked `@deprecated` for several releases; clients
729
+ // must now use the POST `…/sqlQuery` endpoints below, which take the
730
+ // SQL in the request body so the row/byte caps and query-timeout
731
+ // signals introduced in the OOM-mitigation work apply uniformly.
732
+ // The legacy `GET /projects/…/queryData` twins under `server-old.ts`
733
+ // remain in place for now.
763
734
  app.post(
764
735
  `${API_PREFIX}/environments/:environmentName/connections/:connectionName/sqlQuery`,
736
+ queryConcurrency(),
765
737
  async (req, res) => {
766
738
  try {
767
739
  let options: string | ParsedQs | (string | ParsedQs)[] | undefined;
@@ -791,6 +763,7 @@ app.post(
791
763
 
792
764
  app.post(
793
765
  `${API_PREFIX}/environments/:environmentName/packages/:packageName/connections/:connectionName/sqlQuery`,
766
+ queryConcurrency(),
794
767
  async (req, res) => {
795
768
  try {
796
769
  let options: string | ParsedQs | (string | ParsedQs)[] | undefined;
@@ -821,6 +794,7 @@ app.post(
821
794
  */
822
795
  app.get(
823
796
  `${API_PREFIX}/environments/:environmentName/connections/:connectionName/temporaryTable`,
797
+ queryConcurrency(),
824
798
  async (req, res) => {
825
799
  try {
826
800
  res.status(200).json(
@@ -843,6 +817,7 @@ app.get(
843
817
  */
844
818
  app.get(
845
819
  `${API_PREFIX}/environments/:environmentName/packages/:packageName/connections/:connectionName/temporaryTable`,
820
+ queryConcurrency(),
846
821
  async (req, res) => {
847
822
  try {
848
823
  res.status(200).json(
@@ -863,6 +838,7 @@ app.get(
863
838
 
864
839
  app.post(
865
840
  `${API_PREFIX}/environments/:environmentName/connections/:connectionName/sqlTemporaryTable`,
841
+ queryConcurrency(),
866
842
  async (req, res) => {
867
843
  try {
868
844
  res.status(200).json(
@@ -882,6 +858,7 @@ app.post(
882
858
 
883
859
  app.post(
884
860
  `${API_PREFIX}/environments/:environmentName/packages/:packageName/connections/:connectionName/sqlTemporaryTable`,
861
+ queryConcurrency(),
885
862
  async (req, res) => {
886
863
  try {
887
864
  res.status(200).json(
@@ -1076,6 +1053,7 @@ app.get(
1076
1053
  // to avoid the wildcard matching incorrectly
1077
1054
  app.get(
1078
1055
  `${API_PREFIX}/environments/:environmentName/packages/:packageName/notebooks/*/cells/:cellIndex`,
1056
+ queryConcurrency(),
1079
1057
  async (req, res) => {
1080
1058
  if (req.query.versionId) {
1081
1059
  setVersionIdError(res);
@@ -1121,17 +1099,20 @@ app.get(
1121
1099
  }
1122
1100
  }
1123
1101
 
1124
- res.status(200).json(
1125
- await modelController.executeNotebookCell(
1126
- req.params.environmentName,
1127
- req.params.packageName,
1128
- notebookPath,
1129
- cellIndex,
1130
- filterParams,
1131
- bypassFilters,
1132
- givens,
1133
- ),
1102
+ const result = await modelController.executeNotebookCell(
1103
+ req.params.environmentName,
1104
+ req.params.packageName,
1105
+ notebookPath,
1106
+ cellIndex,
1107
+ filterParams,
1108
+ bypassFilters,
1109
+ givens,
1134
1110
  );
1111
+ setFilterDeprecationHeaders(res, {
1112
+ filterParams,
1113
+ bypassFilters,
1114
+ });
1115
+ res.status(200).json(result);
1135
1116
  } catch (error) {
1136
1117
  logger.error(error);
1137
1118
  const { json, status } = internalErrorToHttpError(error as Error);
@@ -1168,6 +1149,7 @@ app.get(
1168
1149
 
1169
1150
  app.post(
1170
1151
  `${API_PREFIX}/environments/:environmentName/packages/:packageName/models/*?/query`,
1152
+ queryConcurrency(),
1171
1153
  async (req, res) => {
1172
1154
  if (req.body.versionId) {
1173
1155
  setVersionIdError(res);
@@ -1177,22 +1159,25 @@ app.post(
1177
1159
  try {
1178
1160
  // Express stores wildcard matches in params['0']
1179
1161
  const modelPath = (req.params as Record<string, string>)["0"];
1180
- res.status(200).json(
1181
- await queryController.getQuery(
1182
- req.params.environmentName,
1183
- req.params.packageName,
1184
- modelPath,
1185
- req.body.sourceName as string,
1186
- req.body.queryName as string,
1187
- req.body.query as string,
1188
- req.body.compactJson === true,
1189
- (req.body.filterParams ?? req.body.sourceFilters) as
1190
- | Record<string, string | string[]>
1191
- | undefined,
1192
- req.body.bypassFilters === true ? true : undefined,
1193
- req.body.givens as Record<string, GivenValue> | undefined,
1194
- ),
1162
+ const result = await queryController.getQuery(
1163
+ req.params.environmentName,
1164
+ req.params.packageName,
1165
+ modelPath,
1166
+ req.body.sourceName as string,
1167
+ req.body.queryName as string,
1168
+ req.body.query as string,
1169
+ req.body.compactJson === true,
1170
+ (req.body.filterParams ?? req.body.sourceFilters) as
1171
+ | Record<string, string | string[]>
1172
+ | undefined,
1173
+ req.body.bypassFilters === true ? true : undefined,
1174
+ req.body.givens as Record<string, GivenValue> | undefined,
1195
1175
  );
1176
+ setFilterDeprecationHeaders(res, {
1177
+ filterParams: req.body.filterParams ?? req.body.sourceFilters,
1178
+ bypassFilters: req.body.bypassFilters === true ? true : undefined,
1179
+ });
1180
+ res.status(200).json(result);
1196
1181
  } catch (error) {
1197
1182
  logger.error(error);
1198
1183
  const { json, status } = internalErrorToHttpError(error as Error);
@@ -9,6 +9,7 @@ import {
9
9
  testConnectionConfig,
10
10
  } from "./connection";
11
11
  import { assembleEnvironmentConnections } from "./connection_config";
12
+ import { EnvironmentStore } from "./environment_store";
12
13
 
13
14
  type ApiConnection = components["schemas"]["Connection"];
14
15
  type AttachedDatabase = components["schemas"]["AttachedDatabase"];
@@ -45,6 +46,21 @@ const hasGCSCredentials = () =>
45
46
  const readBigQueryServiceAccountJson = async (): Promise<string> =>
46
47
  fs.readFile(process.env.GOOGLE_APPLICATION_CREDENTIALS!, "utf-8");
47
48
 
49
+ // `BIGQUERY_PUBLIC_DATA_*` env vars are populated by the
50
+ // `Setup BigQuery public-data credentials` step in
51
+ // `.github/workflows/connection-integration-tests.yml`. Kept separate
52
+ // from the existing `BIGQUERY_TEST_*` vars (which point at the
53
+ // org-scoped BQ_PRESTO_TRINO_KEY service account) so the two SAs can
54
+ // coexist without one overwriting the other.
55
+ const hasPublicDataBigQueryCredentials = () =>
56
+ !!(
57
+ process.env.BIGQUERY_PUBLIC_DATA_CREDENTIALS &&
58
+ process.env.BIGQUERY_PUBLIC_DATA_PROJECT_ID
59
+ );
60
+
61
+ const readPublicDataBigQueryServiceAccountJson = async (): Promise<string> =>
62
+ fs.readFile(process.env.BIGQUERY_PUBLIC_DATA_CREDENTIALS!, "utf-8");
63
+
48
64
  describe("connection integration tests", () => {
49
65
  const testEnvironmentPath = path.join(
50
66
  process.cwd(),
@@ -672,6 +688,234 @@ describe("connection integration tests", () => {
672
688
  );
673
689
  });
674
690
 
691
+ describe("BigQuery direct connection (public-data)", () => {
692
+ // Single end-to-end check that a real `bigquery`-typed
693
+ // connection authenticated by the BIGQUERY_PUBLIC_DATA_SA
694
+ // service account can actually round-trip a query against
695
+ // bigquery-public-data. Skips locally if the creds aren't
696
+ // set; runs in CI under .github/workflows/connection-integration-tests.yml.
697
+ //
698
+ // Picked `samples.shakespeare` because (a) it's tiny (~6 MB
699
+ // / ~165k rows) so the BigQuery byte-scanned cost is well
700
+ // under the 1 TB/month free tier even across many PR runs,
701
+ // (b) it's a long-stable public table so the assertion stays
702
+ // valid across years of reruns. If this assertion ever fails
703
+ // the cause is almost certainly auth, not data drift.
704
+ it(
705
+ "should query bigquery-public-data via real BigQuery connection",
706
+ async () => {
707
+ if (!hasPublicDataBigQueryCredentials()) {
708
+ console.log(
709
+ "Skipping: BIGQUERY_PUBLIC_DATA_CREDENTIALS or BIGQUERY_PUBLIC_DATA_PROJECT_ID not configured",
710
+ );
711
+ return;
712
+ }
713
+
714
+ const serviceAccountJson =
715
+ await readPublicDataBigQueryServiceAccountJson();
716
+
717
+ const bqConnection: ApiConnection = {
718
+ name: "bq_public_data",
719
+ type: "bigquery",
720
+ bigqueryConnection: {
721
+ // Billing/auth project (the SA's own project_id);
722
+ // the queried tables live in bigquery-public-data
723
+ // and are referenced explicitly in the SQL below.
724
+ defaultProjectId:
725
+ process.env.BIGQUERY_PUBLIC_DATA_PROJECT_ID!,
726
+ serviceAccountKeyJson: serviceAccountJson,
727
+ },
728
+ };
729
+
730
+ const { malloyConnections } = await createEnvironmentConnections(
731
+ [bqConnection],
732
+ testEnvironmentPath,
733
+ );
734
+
735
+ const connection = malloyConnections.get("bq_public_data");
736
+ expect(connection).toBeDefined();
737
+
738
+ try {
739
+ const result = await connection!.runSQL(
740
+ "SELECT COUNT(*) AS row_count FROM `bigquery-public-data.samples.shakespeare`",
741
+ );
742
+ expect(result.rows.length).toBe(1);
743
+ // Shakespeare has ~165k rows; bound on both sides so
744
+ // we catch both "auth succeeded but query returned
745
+ // nothing" and "got a confusingly large value" failure
746
+ // modes, while staying tolerant of any minor row-count
747
+ // jitter Google might introduce.
748
+ const row = result.rows[0] as Record<string, unknown>;
749
+ const rowCount = Number(row.row_count);
750
+ expect(rowCount).toBeGreaterThan(100_000);
751
+ expect(rowCount).toBeLessThan(200_000);
752
+ } finally {
753
+ // BigQuery driver holds an HTTP/2 client + auth refresh
754
+ // state; close it explicitly so we don't leak across
755
+ // the rest of the test run. (createdConnections is
756
+ // typed for DuckDBConnection, so we can't use the
757
+ // shared cleanup array here.)
758
+ await connection?.close();
759
+ }
760
+ },
761
+ { timeout: 60000 },
762
+ );
763
+ });
764
+
765
+ describe("BigQuery package end-to-end (bq-hackernews)", () => {
766
+ // Step 2 of Sagar's bun-setup-fixes ask: not just "does the
767
+ // BigQuery driver round-trip a SQL query" (the previous
768
+ // describe block covers that), but "does the publisher's
769
+ // package-loading path successfully use the BigQuery
770
+ // connection on behalf of a Malloy package."
771
+ //
772
+ // Mechanism: stand up a real EnvironmentStore against a
773
+ // temp publisher.config.json that declares both the BQ
774
+ // connection (with the BIGQUERY_PUBLIC_DATA_SA injected via
775
+ // serviceAccountKeyJson) AND the bq-hackernews package
776
+ // (loaded from credibledata/malloy-samples on GitHub).
777
+ // EnvironmentStore.initialize then: clones the malloy-samples
778
+ // repo, extracts the bigquery-hackernews subdirectory, parses
779
+ // the package's Malloy models, and — crucially — introspects
780
+ // their BigQuery table schemas during model compilation. A
781
+ // successful Package.listModels() call therefore proves the
782
+ // entire package-uses-connection path, not just the bare
783
+ // driver auth.
784
+ //
785
+ // Cost: ~50 MB git clone (one time per test run since
786
+ // publisher_data lives under testEnvironmentPath which the
787
+ // afterEach cleans up) + ~6 MB BQ schema scan. Both well
788
+ // under any meaningful free-tier limit. Test budget: 3 min
789
+ // (clone is ~30-60s on GH runners; compile is ~10s).
790
+ it(
791
+ "should load bq-hackernews package and compile its models via real BQ",
792
+ async () => {
793
+ if (!hasPublicDataBigQueryCredentials()) {
794
+ console.log(
795
+ "Skipping: BIGQUERY_PUBLIC_DATA_CREDENTIALS or BIGQUERY_PUBLIC_DATA_PROJECT_ID not configured",
796
+ );
797
+ return;
798
+ }
799
+
800
+ const serviceAccountJson =
801
+ await readPublicDataBigQueryServiceAccountJson();
802
+
803
+ // Each test gets its own serverRoot so publisher_data
804
+ // doesn't leak across tests; afterEach removes
805
+ // testEnvironmentPath which carries the whole tree
806
+ // away.
807
+ const tempServerRoot = path.join(
808
+ testEnvironmentPath,
809
+ "bq-hackernews-pkg-test",
810
+ );
811
+ await fs.mkdir(tempServerRoot, { recursive: true });
812
+
813
+ const config = {
814
+ frozenConfig: false,
815
+ environments: [
816
+ {
817
+ name: "malloy-samples",
818
+ packages: [
819
+ {
820
+ name: "bigquery-hackernews",
821
+ location:
822
+ "https://github.com/credibledata/malloy-samples/tree/main/bigquery-hackernews",
823
+ },
824
+ ],
825
+ connections: [
826
+ {
827
+ name: "bigquery",
828
+ type: "bigquery",
829
+ bigqueryConnection: {
830
+ defaultProjectId:
831
+ process.env
832
+ .BIGQUERY_PUBLIC_DATA_PROJECT_ID!,
833
+ serviceAccountKeyJson: serviceAccountJson,
834
+ },
835
+ },
836
+ ],
837
+ },
838
+ ],
839
+ };
840
+ await fs.writeFile(
841
+ path.join(tempServerRoot, "publisher.config.json"),
842
+ JSON.stringify(config),
843
+ );
844
+
845
+ // Force the load-from-config init path. Without this,
846
+ // EnvironmentStore.initialize() takes the load-from-DB
847
+ // branch and only falls back to config when the DB is
848
+ // empty (which it is here, by construction, but relying
849
+ // on that is brittle if the test pattern is reused).
850
+ //
851
+ // SAFETY PRECONDITION: this flag is the SAME one users
852
+ // opt into via `--init` / `start:init` to wipe persisted
853
+ // storage and re-initialize from config (see CLAUDE.md
854
+ // and `environment_store.ts:158`). It is destructive on
855
+ // a real serverRoot. We only set it here because
856
+ // `tempServerRoot` is a freshly-created empty directory
857
+ // (path.join(testEnvironmentPath, "bq-hackernews-pkg-test")
858
+ // mkdir'd ~20 lines above), so there is nothing to wipe.
859
+ // DO NOT copy this pattern into a test that points
860
+ // EnvironmentStore at a non-empty or shared serverRoot
861
+ // — it will delete state.
862
+ const previousInitializeStorage = process.env.INITIALIZE_STORAGE;
863
+ process.env.INITIALIZE_STORAGE = "true";
864
+
865
+ try {
866
+ const envStore = new EnvironmentStore(tempServerRoot);
867
+ await envStore.finishedInitialization;
868
+
869
+ // operationalState=serving is the only signal that
870
+ // initialize() actually succeeded. initialize swallows
871
+ // top-level errors and just calls markNotReady() (see
872
+ // environment_store.ts:297-301), so
873
+ // finishedInitialization always resolves regardless of
874
+ // success. By construction (env_store:288-292), serving
875
+ // also implies all configured environments loaded.
876
+ const status = await envStore.getStatus();
877
+ expect(status.operationalState).toBe("serving");
878
+
879
+ const env = await envStore.getEnvironment("malloy-samples");
880
+ const apiPackages = await env.listPackages();
881
+ expect(apiPackages.map((p) => p.name)).toContain(
882
+ "bigquery-hackernews",
883
+ );
884
+
885
+ const apiConnections = env.listApiConnections();
886
+ expect(apiConnections.map((c) => c.name)).toContain(
887
+ "bigquery",
888
+ );
889
+
890
+ // The actual integration assertion. Package.listModels()
891
+ // collects compile errors per-model and returns ALL
892
+ // models in the result (with `error` populated on
893
+ // failures) — so models.length>0 alone would pass even
894
+ // if every BQ schema introspection failed. Filter for
895
+ // models that compiled cleanly: at least one is the
896
+ // real proof that BQ-on-behalf-of-a-package works.
897
+ const pkg = await env.getPackage("bigquery-hackernews");
898
+ const models = await pkg.listModels();
899
+ const okModels = models.filter((m) => !m.error);
900
+ if (okModels.length === 0) {
901
+ console.error(
902
+ "All bq-hackernews model compilations failed:",
903
+ models.map((m) => `${m.path}: ${m.error}`),
904
+ );
905
+ }
906
+ expect(okModels.length).toBeGreaterThan(0);
907
+ } finally {
908
+ if (previousInitializeStorage === undefined) {
909
+ delete process.env.INITIALIZE_STORAGE;
910
+ } else {
911
+ process.env.INITIALIZE_STORAGE = previousInitializeStorage;
912
+ }
913
+ }
914
+ },
915
+ { timeout: 180000 },
916
+ );
917
+ });
918
+
675
919
  describe("DuckDB with Snowflake attachment", () => {
676
920
  it(
677
921
  "should create DuckDB connection with attached Snowflake database",
@@ -22,10 +22,14 @@ import {
22
22
  import type { LookupConnection } from "@malloydata/malloy/connection";
23
23
  import { AxiosError } from "axios";
24
24
  import fs from "fs/promises";
25
- import path from "path";
26
25
  import { components } from "../api";
27
26
  import { logAxiosError, logger } from "../logger";
28
27
  import { redactPgSecrets } from "../pg_helpers";
28
+ import {
29
+ assertSafeEnvironmentPath,
30
+ assertSafePackageName,
31
+ safeJoinUnderRoot,
32
+ } from "../path_safety";
29
33
  import {
30
34
  assembleEnvironmentConnections,
31
35
  CoreConnectionEntry,
@@ -91,7 +95,9 @@ async function isDatabaseAttached(
91
95
  ? existingDatabases
92
96
  : existingDatabases.rows || [];
93
97
 
94
- logger.debug(`Existing databases:`, rows);
98
+ logger.debug("connection.duckdb.databases.queried", {
99
+ count: rows.length,
100
+ });
95
101
 
96
102
  return rows.some((row: Record<string, unknown>) =>
97
103
  Object.values(row).some(
@@ -814,7 +820,9 @@ export async function deleteDuckLakeConnectionFile(
814
820
  connectionName: string,
815
821
  environmentPath: string,
816
822
  ): Promise<void> {
817
- const ducklakePath = path.join(
823
+ assertSafePackageName(connectionName);
824
+ assertSafeEnvironmentPath(environmentPath);
825
+ const ducklakePath = safeJoinUnderRoot(
818
826
  environmentPath,
819
827
  `${connectionName}_ducklake.duckdb`,
820
828
  );
@@ -1126,7 +1134,9 @@ export async function createEnvironmentConnections(
1126
1134
 
1127
1135
  for (const connection of environmentConfig.apiConnections) {
1128
1136
  if (!connection.name) continue;
1129
- logger.info(`Adding connection ${connection.name}`, { connection });
1137
+ logger.info(`Adding connection ${connection.name}`, {
1138
+ type: connection.type,
1139
+ });
1130
1140
  const malloyConnection =
1131
1141
  await environmentConfig.malloyConfig.connections.lookupConnection(
1132
1142
  connection.name,