@rocicorp/zero 0.26.0 → 0.26.1-canary.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/out/zero/package.json.js +1 -1
  2. package/out/zero-cache/src/observability/events.d.ts.map +1 -1
  3. package/out/zero-cache/src/observability/events.js +28 -9
  4. package/out/zero-cache/src/observability/events.js.map +1 -1
  5. package/out/zero-cache/src/services/change-source/pg/backfill-stream.d.ts.map +1 -1
  6. package/out/zero-cache/src/services/change-source/pg/backfill-stream.js +26 -13
  7. package/out/zero-cache/src/services/change-source/pg/backfill-stream.js.map +1 -1
  8. package/out/zero-cache/src/services/change-source/pg/initial-sync.d.ts +6 -1
  9. package/out/zero-cache/src/services/change-source/pg/initial-sync.d.ts.map +1 -1
  10. package/out/zero-cache/src/services/change-source/pg/initial-sync.js +64 -18
  11. package/out/zero-cache/src/services/change-source/pg/initial-sync.js.map +1 -1
  12. package/out/zero-cache/src/services/change-source/protocol/current/data.d.ts +26 -0
  13. package/out/zero-cache/src/services/change-source/protocol/current/data.d.ts.map +1 -1
  14. package/out/zero-cache/src/services/change-source/protocol/current/data.js +15 -3
  15. package/out/zero-cache/src/services/change-source/protocol/current/data.js.map +1 -1
  16. package/out/zero-cache/src/services/change-source/protocol/current/downstream.d.ts +30 -0
  17. package/out/zero-cache/src/services/change-source/protocol/current/downstream.d.ts.map +1 -1
  18. package/out/zero-cache/src/services/change-source/protocol/current.js +2 -1
  19. package/out/zero-cache/src/services/change-streamer/change-streamer.d.ts +10 -0
  20. package/out/zero-cache/src/services/change-streamer/change-streamer.d.ts.map +1 -1
  21. package/out/zero-cache/src/services/replicator/change-processor.d.ts +2 -0
  22. package/out/zero-cache/src/services/replicator/change-processor.d.ts.map +1 -1
  23. package/out/zero-cache/src/services/replicator/change-processor.js +8 -6
  24. package/out/zero-cache/src/services/replicator/change-processor.js.map +1 -1
  25. package/out/zero-cache/src/services/replicator/incremental-sync.d.ts.map +1 -1
  26. package/out/zero-cache/src/services/replicator/incremental-sync.js +39 -1
  27. package/out/zero-cache/src/services/replicator/incremental-sync.js.map +1 -1
  28. package/out/zero-cache/src/services/replicator/replication-status.d.ts +4 -3
  29. package/out/zero-cache/src/services/replicator/replication-status.d.ts.map +1 -1
  30. package/out/zero-cache/src/services/replicator/replication-status.js +24 -9
  31. package/out/zero-cache/src/services/replicator/replication-status.js.map +1 -1
  32. package/out/zero-client/src/client/version.js +1 -1
  33. package/out/zero-events/src/status.d.ts +8 -0
  34. package/out/zero-events/src/status.d.ts.map +1 -1
  35. package/package.json +1 -1
@@ -1,4 +1,4 @@
1
- const version = "0.26.0";
1
+ const version = "0.26.1-canary.2";
2
2
  const packageJson = {
3
3
  version
4
4
  };
@@ -1 +1 @@
1
- {"version":3,"file":"events.d.ts","sourceRoot":"","sources":["../../../../../zero-cache/src/observability/events.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAAc,KAAK,UAAU,EAAC,MAAM,6BAA6B,CAAC;AAKzE,OAAO,EAAC,KAAK,SAAS,EAAC,MAAM,mCAAmC,CAAC;AACjE,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,wBAAwB,CAAC;AAyBjE;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,EAAE,EAAE,UAAU,EACd,EAAC,MAAM,EAAE,UAAU,EAAC,EAAE,IAAI,CAAC,oBAAoB,EAAE,QAAQ,GAAG,YAAY,CAAC,QAqD1E;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,SAAS,EAAE,EAAE,GAAG,OAAa,QAO1E;AAED,wBAAgB,YAAY,CAAC,CAAC,SAAS,SAAS,EAAE,EAAE,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,QAEzE;AAED,wBAAsB,oBAAoB,CAAC,CAAC,SAAS,SAAS,EAC5D,EAAE,EAAE,UAAU,EACd,KAAK,EAAE,CAAC,iBAGT;AAED,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,OAAO,GAAG,UAAU,CAevD"}
1
+ {"version":3,"file":"events.d.ts","sourceRoot":"","sources":["../../../../../zero-cache/src/observability/events.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAMjD,OAAO,EAAc,KAAK,UAAU,EAAC,MAAM,6BAA6B,CAAC;AAKzE,OAAO,EAAC,KAAK,SAAS,EAAC,MAAM,mCAAmC,CAAC;AACjE,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,wBAAwB,CAAC;AA+BjE;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,EAAE,EAAE,UAAU,EACd,EAAC,MAAM,EAAE,UAAU,EAAC,EAAE,IAAI,CAAC,oBAAoB,EAAE,QAAQ,GAAG,YAAY,CAAC,QAsE1E;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,SAAS,EAAE,EAAE,GAAG,OAAa,QAO1E;AAED,wBAAgB,YAAY,CAAC,CAAC,SAAS,SAAS,EAAE,EAAE,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,QAEzE;AAED,wBAAsB,oBAAoB,CAAC,CAAC,SAAS,SAAS,EAC5D,EAAE,EAAE,UAAU,EACd,KAAK,EAAE,CAAC,iBAGT;AAED,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,OAAO,GAAG,UAAU,CAevD"}
@@ -1,5 +1,8 @@
1
+ import { resolver } from "@rocicorp/resolver";
1
2
  import { emitterFor, httpTransport, CloudEvent } from "cloudevents";
2
3
  import { nanoid } from "nanoid";
4
+ import { gzip } from "node:zlib";
5
+ import { stringify } from "../../../shared/src/bigint-json.js";
3
6
  import { isJSONValue } from "../../../shared/src/json.js";
4
7
  import { must } from "../../../shared/src/must.js";
5
8
  import { promiseVoid } from "../../../shared/src/resolved-promises.js";
@@ -17,6 +20,11 @@ let publishFn = (lc, { type }) => {
17
20
  const attributeValueSchema = union(string(), number(), boolean());
18
21
  const eventSchema = record(attributeValueSchema);
19
22
  const extensionsObjectSchema = object({ extensions: eventSchema });
23
+ async function base64gzip(str) {
24
+ const { promise: gzipped, resolve, reject } = resolver();
25
+ gzip(Buffer.from(str), (err, buf) => err ? reject(err) : resolve(buf));
26
+ return (await gzipped).toString("base64");
27
+ }
20
28
  function initEventSink(lc, { taskID, cloudEvent }) {
21
29
  if (!cloudEvent.sinkEnv) {
22
30
  publishFn = (lc2, event) => {
@@ -31,13 +39,21 @@ function initEventSink(lc, { taskID, cloudEvent }) {
31
39
  const { extensions } = parse(JSON.parse(strVal), extensionsObjectSchema);
32
40
  overrides = extensions;
33
41
  }
34
- function createCloudEvent(data) {
35
- const { type, time } = data;
42
+ async function createCloudEvent(event) {
43
+ const { type, time } = event;
44
+ const json = stringify(event);
45
+ const data = await base64gzip(json);
36
46
  return new CloudEvent({
37
47
  id: nanoid(),
38
48
  source: taskID,
39
49
  type,
40
50
  time,
51
+ // Pass `data` as text/plain to prevent intermediaries from
52
+ // base64-decoding it. It is the responsibility of the final processor
53
+ // to recognize that datacontentencoding === "gzip" and unpack the
54
+ // `data` accordingly before parsing it.
55
+ datacontenttype: "text/plain",
56
+ datacontentencoding: "gzip",
41
57
  data,
42
58
  ...overrides
43
59
  });
@@ -46,7 +62,13 @@ function initEventSink(lc, { taskID, cloudEvent }) {
46
62
  const emit = emitterFor(httpTransport(sinkURI));
47
63
  lc.debug?.(`Publishing ZeroEvents to ${sinkURI}`);
48
64
  publishFn = async (lc2, event) => {
49
- const cloudEvent2 = createCloudEvent(event);
65
+ let cloudEvent2;
66
+ try {
67
+ cloudEvent2 = await createCloudEvent(event);
68
+ } catch (e) {
69
+ lc2.error?.(`Error creating CloudEvent ${event.type}`, e);
70
+ return;
71
+ }
50
72
  lc2.debug?.(`Publishing CloudEvent: ${cloudEvent2.type}`);
51
73
  for (let i = 0; i < MAX_PUBLISH_ATTEMPTS; i++) {
52
74
  if (i > 0) {
@@ -54,7 +76,8 @@ function initEventSink(lc, { taskID, cloudEvent }) {
54
76
  }
55
77
  try {
56
78
  await emit(cloudEvent2);
57
- lc2.info?.(`Published CloudEvent: ${cloudEvent2.type}`, cloudEvent2);
79
+ const { data: _, ...event2 } = cloudEvent2;
80
+ lc2.info?.(`Published CloudEvent: ${cloudEvent2.type}`, { event: event2 });
58
81
  return;
59
82
  } catch (e) {
60
83
  lc2.warn?.(`Error publishing ${cloudEvent2.type} (attempt ${i + 1})`, e);
@@ -62,9 +85,6 @@ function initEventSink(lc, { taskID, cloudEvent }) {
62
85
  }
63
86
  };
64
87
  }
65
- function publishEvent(lc, event) {
66
- void publishFn(lc, event);
67
- }
68
88
  async function publishCriticalEvent(lc, event) {
69
89
  await publishFn(lc, event);
70
90
  }
@@ -86,7 +106,6 @@ function makeErrorDetails(e) {
86
106
  export {
87
107
  initEventSink,
88
108
  makeErrorDetails,
89
- publishCriticalEvent,
90
- publishEvent
109
+ publishCriticalEvent
91
110
  };
92
111
  //# sourceMappingURL=events.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"events.js","sources":["../../../../../zero-cache/src/observability/events.ts"],"sourcesContent":["import type {LogContext} from '@rocicorp/logger';\nimport {CloudEvent, emitterFor, httpTransport} from 'cloudevents';\nimport {nanoid} from 'nanoid';\nimport {isJSONValue, type JSONObject} from '../../../shared/src/json.ts';\nimport {must} from '../../../shared/src/must.ts';\nimport {promiseVoid} from '../../../shared/src/resolved-promises.ts';\nimport {sleep} from '../../../shared/src/sleep.ts';\nimport * as v from '../../../shared/src/valita.ts';\nimport {type ZeroEvent} from '../../../zero-events/src/index.ts';\nimport type {NormalizedZeroConfig} from '../config/normalize.ts';\n\nconst MAX_PUBLISH_ATTEMPTS = 6;\nconst INITIAL_PUBLISH_BACKOFF_MS = 500;\n\ntype PublisherFn = (lc: LogContext, event: ZeroEvent) => Promise<void>;\n\nlet publishFn: PublisherFn = (lc, {type}) => {\n lc.warn?.(\n `Cannot publish \"${type}\" event before initEventSink(). ` +\n `This is only expected in unit tests.`,\n );\n return promiseVoid;\n};\n\nconst attributeValueSchema = v.union(v.string(), v.number(), v.boolean());\n\nconst eventSchema = v.record(attributeValueSchema);\n\ntype PartialEvent = v.Infer<typeof eventSchema>;\n\n// Note: This conforms to the format of the knative K_CE_OVERRIDES binding:\n// https://github.com/knative/eventing/blob/main/docs/spec/sources.md#sinkbinding\nconst extensionsObjectSchema = v.object({extensions: eventSchema});\n\n/**\n * Initializes a per-process event sink according to the cloud event\n * parameters in the ZeroConfig. This must be called at the beginning\n * of the process, before any ZeroEvents are generated / published.\n */\nexport function initEventSink(\n lc: LogContext,\n {taskID, cloudEvent}: Pick<NormalizedZeroConfig, 'taskID' | 'cloudEvent'>,\n) {\n if (!cloudEvent.sinkEnv) {\n // The default implementation just outputs the events to logs.\n publishFn = (lc, event) => {\n lc.info?.(`ZeroEvent: ${event.type}`, event);\n return promiseVoid;\n };\n return;\n }\n\n let overrides: PartialEvent = {};\n\n if (cloudEvent.extensionOverridesEnv) {\n const strVal = must(process.env[cloudEvent.extensionOverridesEnv]);\n const {extensions} = v.parse(JSON.parse(strVal), extensionsObjectSchema);\n overrides = extensions;\n }\n\n function createCloudEvent(data: ZeroEvent) {\n const {type, time} = data;\n return new CloudEvent({\n id: nanoid(),\n source: taskID,\n type,\n time,\n data,\n ...overrides,\n });\n }\n\n const sinkURI = must(process.env[cloudEvent.sinkEnv]);\n const emit = emitterFor(httpTransport(sinkURI));\n lc.debug?.(`Publishing ZeroEvents to ${sinkURI}`);\n\n publishFn = async (lc, event) => {\n const cloudEvent = createCloudEvent(event);\n lc.debug?.(`Publishing CloudEvent: ${cloudEvent.type}`);\n\n for (let i = 0; i < MAX_PUBLISH_ATTEMPTS; i++) {\n if (i > 0) {\n // exponential backoff on retries\n await sleep(INITIAL_PUBLISH_BACKOFF_MS * 2 ** (i - 1));\n }\n try {\n await emit(cloudEvent);\n lc.info?.(`Published CloudEvent: ${cloudEvent.type}`, cloudEvent);\n return;\n } catch (e) {\n lc.warn?.(`Error publishing ${cloudEvent.type} (attempt ${i + 1})`, e);\n }\n }\n };\n}\n\nexport function initEventSinkForTesting(sink: ZeroEvent[], now = new Date()) {\n publishFn = (lc, event) => {\n lc.info?.(`Testing event sink received ${event.type} event`, event);\n // Replace the default Date.now() with the test instance for determinism.\n sink.push({...event, time: now.toISOString()});\n return promiseVoid;\n };\n}\n\nexport function publishEvent<E extends ZeroEvent>(lc: LogContext, event: E) {\n void publishFn(lc, event);\n}\n\nexport async function publishCriticalEvent<E extends ZeroEvent>(\n lc: LogContext,\n event: E,\n) {\n await publishFn(lc, event);\n}\n\nexport function makeErrorDetails(e: unknown): JSONObject {\n const err = e instanceof Error ? e : new Error(String(e));\n const errorDetails: JSONObject = {\n name: err.name,\n message: err.message,\n stack: err.stack,\n cause: err.cause ? makeErrorDetails(err.cause) : undefined,\n };\n // Include any enumerable properties (e.g. of Error subtypes).\n for (const [field, value] of Object.entries(err)) {\n if (isJSONValue(value, [])) {\n errorDetails[field] = value;\n }\n }\n return errorDetails;\n}\n"],"names":["v.union","v.string","v.number","v.boolean","v.record","v.object","lc","v.parse","cloudEvent"],"mappings":";;;;;;;;AAWA,MAAM,uBAAuB;AAC7B,MAAM,6BAA6B;AAInC,IAAI,YAAyB,CAAC,IAAI,EAAC,WAAU;AAC3C,KAAG;AAAA,IACD,mBAAmB,IAAI;AAAA,EAAA;AAGzB,SAAO;AACT;AAEA,MAAM,uBAAuBA,MAAQC,OAAE,GAAUC,OAAE,GAAUC,QAAE,CAAS;AAExE,MAAM,cAAcC,OAAS,oBAAoB;AAMjD,MAAM,yBAAyBC,OAAS,EAAC,YAAY,aAAY;AAO1D,SAAS,cACd,IACA,EAAC,QAAQ,cACT;AACA,MAAI,CAAC,WAAW,SAAS;AAEvB,gBAAY,CAACC,KAAI,UAAU;AACzBA,UAAG,OAAO,cAAc,MAAM,IAAI,IAAI,KAAK;AAC3C,aAAO;AAAA,IACT;AACA;AAAA,EACF;AAEA,MAAI,YAA0B,CAAA;AAE9B,MAAI,WAAW,uBAAuB;AACpC,UAAM,SAAS,KAAK,QAAQ,IAAI,WAAW,qBAAqB,CAAC;AACjE,UAAM,EAAC,eAAcC,MAAQ,KAAK,MAAM,MAAM,GAAG,sBAAsB;AACvE,gBAAY;AAAA,EACd;AAEA,WAAS,iBAAiB,MAAiB;AACzC,UAAM,EAAC,MAAM,KAAA,IAAQ;AACrB,WAAO,IAAI,WAAW;AAAA,MACpB,IAAI,OAAA;AAAA,MACJ,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG;AAAA,IAAA,CACJ;AAAA,EACH;AAEA,QAAM,UAAU,KAAK,QAAQ,IAAI,WAAW,OAAO,CAAC;AACpD,QAAM,OAAO,WAAW,cAAc,OAAO,CAAC;AAC9C,KAAG,QAAQ,4BAA4B,OAAO,EAAE;AAEhD,cAAY,OAAOD,KAAI,UAAU;AAC/B,UAAME,cAAa,iBAAiB,KAAK;AACzCF,QAAG,QAAQ,0BAA0BE,YAAW,IAAI,EAAE;AAEtD,aAAS,IAAI,GAAG,IAAI,sBAAsB,KAAK;AAC7C,UAAI,IAAI,GAAG;AAET,cAAM,MAAM,6BAA6B,MAAM,IAAI,EAAE;AAAA,MACvD;AACA,UAAI;AACF,cAAM,KAAKA,WAAU;AACrBF,YAAG,OAAO,yBAAyBE,YAAW,IAAI,IAAIA,WAAU;AAChE;AAAA,MACF,SAAS,GAAG;AACVF,YAAG,OAAO,oBAAoBE,YAAW,IAAI,aAAa,IAAI,CAAC,KAAK,CAAC;AAAA,MACvE;AAAA,IACF;AAAA,EACF;AACF;AAWO,SAAS,aAAkC,IAAgB,OAAU;AAC1E,OAAK,UAAU,IAAI,KAAK;AAC1B;AAEA,eAAsB,qBACpB,IACA,OACA;AACA,QAAM,UAAU,IAAI,KAAK;AAC3B;AAEO,SAAS,iBAAiB,GAAwB;AACvD,QAAM,MAAM,aAAa,QAAQ,IAAI,IAAI,MAAM,OAAO,CAAC,CAAC;AACxD,QAAM,eAA2B;AAAA,IAC/B,MAAM,IAAI;AAAA,IACV,SAAS,IAAI;AAAA,IACb,OAAO,IAAI;AAAA,IACX,OAAO,IAAI,QAAQ,iBAAiB,IAAI,KAAK,IAAI;AAAA,EAAA;AAGnD,aAAW,CAAC,OAAO,KAAK,KAAK,OAAO,QAAQ,GAAG,GAAG;AAChD,QAAI,YAAY,OAAO,CAAA,CAAE,GAAG;AAC1B,mBAAa,KAAK,IAAI;AAAA,IACxB;AAAA,EACF;AACA,SAAO;AACT;"}
1
+ {"version":3,"file":"events.js","sources":["../../../../../zero-cache/src/observability/events.ts"],"sourcesContent":["import type {LogContext} from '@rocicorp/logger';\nimport {resolver} from '@rocicorp/resolver';\nimport {CloudEvent, emitterFor, httpTransport} from 'cloudevents';\nimport {nanoid} from 'nanoid';\nimport {gzip} from 'node:zlib';\nimport {stringify} from '../../../shared/src/bigint-json.ts';\nimport {isJSONValue, type JSONObject} from '../../../shared/src/json.ts';\nimport {must} from '../../../shared/src/must.ts';\nimport {promiseVoid} from '../../../shared/src/resolved-promises.ts';\nimport {sleep} from '../../../shared/src/sleep.ts';\nimport * as v from '../../../shared/src/valita.ts';\nimport {type ZeroEvent} from '../../../zero-events/src/index.ts';\nimport type {NormalizedZeroConfig} from '../config/normalize.ts';\n\nconst MAX_PUBLISH_ATTEMPTS = 6;\nconst INITIAL_PUBLISH_BACKOFF_MS = 500;\n\ntype PublisherFn = (lc: LogContext, event: ZeroEvent) => Promise<void>;\n\nlet publishFn: PublisherFn = (lc, {type}) => {\n lc.warn?.(\n `Cannot publish \"${type}\" event before initEventSink(). ` +\n `This is only expected in unit tests.`,\n );\n return promiseVoid;\n};\n\nconst attributeValueSchema = v.union(v.string(), v.number(), v.boolean());\n\nconst eventSchema = v.record(attributeValueSchema);\n\ntype PartialEvent = v.Infer<typeof eventSchema>;\n\n// Note: This conforms to the format of the knative K_CE_OVERRIDES binding:\n// https://github.com/knative/eventing/blob/main/docs/spec/sources.md#sinkbinding\nconst extensionsObjectSchema = v.object({extensions: eventSchema});\n\nasync function base64gzip(str: string): Promise<string> {\n const {promise: gzipped, resolve, reject} = resolver<Buffer>();\n gzip(Buffer.from(str), (err, buf) => (err ? reject(err) : resolve(buf)));\n return (await gzipped).toString('base64');\n}\n\n/**\n * Initializes a per-process event sink according to the cloud event\n * parameters in the ZeroConfig. This must be called at the beginning\n * of the process, before any ZeroEvents are generated / published.\n */\nexport function initEventSink(\n lc: LogContext,\n {taskID, cloudEvent}: Pick<NormalizedZeroConfig, 'taskID' | 'cloudEvent'>,\n) {\n if (!cloudEvent.sinkEnv) {\n // The default implementation just outputs the events to logs.\n publishFn = (lc, event) => {\n lc.info?.(`ZeroEvent: ${event.type}`, event);\n return promiseVoid;\n };\n return;\n }\n\n let overrides: PartialEvent = {};\n\n if (cloudEvent.extensionOverridesEnv) {\n const strVal = must(process.env[cloudEvent.extensionOverridesEnv]);\n const {extensions} = v.parse(JSON.parse(strVal), extensionsObjectSchema);\n overrides = extensions;\n }\n\n async function createCloudEvent(event: ZeroEvent) {\n const {type, time} = event;\n const json = stringify(event);\n const data = await base64gzip(json);\n\n return new CloudEvent({\n id: nanoid(),\n source: taskID,\n type,\n time,\n // Pass `data` as text/plain to prevent intermediaries from\n // base64-decoding it. It is the responsibility of the final processor\n // to recognize that datacontentencoding === \"gzip\" and unpack the\n // `data` accordingly before parsing it.\n datacontenttype: 'text/plain',\n datacontentencoding: 'gzip',\n data,\n ...overrides,\n });\n }\n\n const sinkURI = must(process.env[cloudEvent.sinkEnv]);\n const emit = emitterFor(httpTransport(sinkURI));\n lc.debug?.(`Publishing ZeroEvents to ${sinkURI}`);\n\n publishFn = async (lc, event) => {\n let cloudEvent: CloudEvent<string>;\n try {\n cloudEvent = await createCloudEvent(event);\n } catch (e) {\n lc.error?.(`Error creating CloudEvent ${event.type}`, e);\n return;\n }\n lc.debug?.(`Publishing CloudEvent: ${cloudEvent.type}`);\n\n for (let i = 0; i < MAX_PUBLISH_ATTEMPTS; i++) {\n if (i > 0) {\n // exponential backoff on retries\n await sleep(INITIAL_PUBLISH_BACKOFF_MS * 2 ** (i - 1));\n }\n try {\n await emit(cloudEvent);\n // Avoid logging the (possibly large and) unreadable data field.\n const {data: _, ...event} = cloudEvent;\n lc.info?.(`Published CloudEvent: ${cloudEvent.type}`, {event});\n return;\n } catch (e) {\n lc.warn?.(`Error publishing ${cloudEvent.type} (attempt ${i + 1})`, e);\n }\n }\n };\n}\n\nexport function initEventSinkForTesting(sink: ZeroEvent[], now = new Date()) {\n publishFn = (lc, event) => {\n lc.info?.(`Testing event sink received ${event.type} event`, event);\n // Replace the default Date.now() with the test instance for determinism.\n sink.push({...event, time: now.toISOString()});\n return promiseVoid;\n };\n}\n\nexport function publishEvent<E extends ZeroEvent>(lc: LogContext, event: E) {\n void publishFn(lc, event);\n}\n\nexport async function publishCriticalEvent<E extends ZeroEvent>(\n lc: LogContext,\n event: E,\n) {\n await publishFn(lc, event);\n}\n\nexport function makeErrorDetails(e: unknown): JSONObject {\n const err = e instanceof Error ? e : new Error(String(e));\n const errorDetails: JSONObject = {\n name: err.name,\n message: err.message,\n stack: err.stack,\n cause: err.cause ? makeErrorDetails(err.cause) : undefined,\n };\n // Include any enumerable properties (e.g. of Error subtypes).\n for (const [field, value] of Object.entries(err)) {\n if (isJSONValue(value, [])) {\n errorDetails[field] = value;\n }\n }\n return errorDetails;\n}\n"],"names":["v.union","v.string","v.number","v.boolean","v.record","v.object","lc","v.parse","cloudEvent","event"],"mappings":";;;;;;;;;;;AAcA,MAAM,uBAAuB;AAC7B,MAAM,6BAA6B;AAInC,IAAI,YAAyB,CAAC,IAAI,EAAC,WAAU;AAC3C,KAAG;AAAA,IACD,mBAAmB,IAAI;AAAA,EAAA;AAGzB,SAAO;AACT;AAEA,MAAM,uBAAuBA,MAAQC,OAAE,GAAUC,OAAE,GAAUC,QAAE,CAAS;AAExE,MAAM,cAAcC,OAAS,oBAAoB;AAMjD,MAAM,yBAAyBC,OAAS,EAAC,YAAY,aAAY;AAEjE,eAAe,WAAW,KAA8B;AACtD,QAAM,EAAC,SAAS,SAAS,SAAS,OAAA,IAAU,SAAA;AAC5C,OAAK,OAAO,KAAK,GAAG,GAAG,CAAC,KAAK,QAAS,MAAM,OAAO,GAAG,IAAI,QAAQ,GAAG,CAAE;AACvE,UAAQ,MAAM,SAAS,SAAS,QAAQ;AAC1C;AAOO,SAAS,cACd,IACA,EAAC,QAAQ,cACT;AACA,MAAI,CAAC,WAAW,SAAS;AAEvB,gBAAY,CAACC,KAAI,UAAU;AACzBA,UAAG,OAAO,cAAc,MAAM,IAAI,IAAI,KAAK;AAC3C,aAAO;AAAA,IACT;AACA;AAAA,EACF;AAEA,MAAI,YAA0B,CAAA;AAE9B,MAAI,WAAW,uBAAuB;AACpC,UAAM,SAAS,KAAK,QAAQ,IAAI,WAAW,qBAAqB,CAAC;AACjE,UAAM,EAAC,eAAcC,MAAQ,KAAK,MAAM,MAAM,GAAG,sBAAsB;AACvE,gBAAY;AAAA,EACd;AAEA,iBAAe,iBAAiB,OAAkB;AAChD,UAAM,EAAC,MAAM,KAAA,IAAQ;AACrB,UAAM,OAAO,UAAU,KAAK;AAC5B,UAAM,OAAO,MAAM,WAAW,IAAI;AAElC,WAAO,IAAI,WAAW;AAAA,MACpB,IAAI,OAAA;AAAA,MACJ,QAAQ;AAAA,MACR;AAAA,MACA;AAAA;AAAA;AAAA;AAAA;AAAA,MAKA,iBAAiB;AAAA,MACjB,qBAAqB;AAAA,MACrB;AAAA,MACA,GAAG;AAAA,IAAA,CACJ;AAAA,EACH;AAEA,QAAM,UAAU,KAAK,QAAQ,IAAI,WAAW,OAAO,CAAC;AACpD,QAAM,OAAO,WAAW,cAAc,OAAO,CAAC;AAC9C,KAAG,QAAQ,4BAA4B,OAAO,EAAE;AAEhD,cAAY,OAAOD,KAAI,UAAU;AAC/B,QAAIE;AACJ,QAAI;AACFA,oBAAa,MAAM,iBAAiB,KAAK;AAAA,IAC3C,SAAS,GAAG;AACVF,UAAG,QAAQ,6BAA6B,MAAM,IAAI,IAAI,CAAC;AACvD;AAAA,IACF;AACAA,QAAG,QAAQ,0BAA0BE,YAAW,IAAI,EAAE;AAEtD,aAAS,IAAI,GAAG,IAAI,sBAAsB,KAAK;AAC7C,UAAI,IAAI,GAAG;AAET,cAAM,MAAM,6BAA6B,MAAM,IAAI,EAAE;AAAA,MACvD;AACA,UAAI;AACF,cAAM,KAAKA,WAAU;AAErB,cAAM,EAAC,MAAM,GAAG,GAAGC,WAASD;AAC5BF,YAAG,OAAO,yBAAyBE,YAAW,IAAI,IAAI,EAAC,OAAAC,QAAM;AAC7D;AAAA,MACF,SAAS,GAAG;AACVH,YAAG,OAAO,oBAAoBE,YAAW,IAAI,aAAa,IAAI,CAAC,KAAK,CAAC;AAAA,MACvE;AAAA,IACF;AAAA,EACF;AACF;AAeA,eAAsB,qBACpB,IACA,OACA;AACA,QAAM,UAAU,IAAI,KAAK;AAC3B;AAEO,SAAS,iBAAiB,GAAwB;AACvD,QAAM,MAAM,aAAa,QAAQ,IAAI,IAAI,MAAM,OAAO,CAAC,CAAC;AACxD,QAAM,eAA2B;AAAA,IAC/B,MAAM,IAAI;AAAA,IACV,SAAS,IAAI;AAAA,IACb,OAAO,IAAI;AAAA,IACX,OAAO,IAAI,QAAQ,iBAAiB,IAAI,KAAK,IAAI;AAAA,EAAA;AAGnD,aAAW,CAAC,OAAO,KAAK,KAAK,OAAO,QAAQ,GAAG,GAAG;AAChD,QAAI,YAAY,OAAO,CAAA,CAAE,GAAG;AAC1B,mBAAa,KAAK,IAAI;AAAA,IACxB;AAAA,EACF;AACA,SAAO;AACT;"}
@@ -1 +1 @@
1
- {"version":3,"file":"backfill-stream.d.ts","sourceRoot":"","sources":["../../../../../../../zero-cache/src/services/change-source/pg/backfill-stream.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAWjD,OAAO,KAAK,EACV,iBAAiB,EACjB,eAAe,EAEf,eAAe,EAChB,MAAM,wBAAwB,CAAC;AAWhC,OAAO,KAAK,EAAC,OAAO,EAAC,MAAM,mBAAmB,CAAC;AAI/C,KAAK,aAAa,GAAG;IACnB;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B,CAAC;AAOF;;;;;GAKG;AACH,wBAAuB,cAAc,CACnC,EAAE,EAAE,UAAU,EACd,WAAW,EAAE,MAAM,EACnB,EAAC,IAAI,EAAE,YAAY,EAAC,EAAE,IAAI,CAAC,OAAO,EAAE,MAAM,GAAG,cAAc,CAAC,EAC5D,EAAE,EAAE,eAAe,EACnB,IAAI,GAAE,aAAkB,GACvB,cAAc,CAAC,eAAe,GAAG,iBAAiB,CAAC,CAwDrD"}
1
+ {"version":3,"file":"backfill-stream.d.ts","sourceRoot":"","sources":["../../../../../../../zero-cache/src/services/change-source/pg/backfill-stream.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAWjD,OAAO,KAAK,EACV,iBAAiB,EACjB,eAAe,EAGf,eAAe,EAChB,MAAM,wBAAwB,CAAC;AAYhC,OAAO,KAAK,EAAC,OAAO,EAAC,MAAM,mBAAmB,CAAC;AAI/C,KAAK,aAAa,GAAG;IACnB;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B,CAAC;AAOF;;;;;GAKG;AACH,wBAAuB,cAAc,CACnC,EAAE,EAAE,UAAU,EACd,WAAW,EAAE,MAAM,EACnB,EAAC,IAAI,EAAE,YAAY,EAAC,EAAE,IAAI,CAAC,OAAO,EAAE,MAAM,GAAG,cAAc,CAAC,EAC5D,EAAE,EAAE,eAAe,EACnB,IAAI,GAAE,aAAkB,GACvB,cAAc,CAAC,eAAe,GAAG,iBAAiB,CAAC,CAwDrD"}
@@ -9,7 +9,7 @@ import { TransactionPool } from "../../../db/transaction-pool.js";
9
9
  import { pgClient } from "../../../types/pg.js";
10
10
  import { SchemaIncompatibilityError } from "../common/backfill-manager.js";
11
11
  import { tableMetadataSchema, columnMetadataSchema } from "./backfill-metadata.js";
12
- import { makeSelectPublishedStmt, createReplicationSlot } from "./initial-sync.js";
12
+ import { makeDownloadStatements, createReplicationSlot } from "./initial-sync.js";
13
13
  import { toStateVersionString } from "./lsn.js";
14
14
  import { getPublicationInfo } from "./schema/published.js";
15
15
  const POSTGRES_COPY_CHUNK_SIZE = 64 * 1024;
@@ -35,7 +35,7 @@ async function* streamBackfill(lc, upstreamURI, { slot, publications }, bf, opts
35
35
  lc,
36
36
  tx,
37
37
  backfill,
38
- makeSelectPublishedStmt(tableSpec, cols),
38
+ makeDownloadStatements(tableSpec, cols),
39
39
  cols.map((col) => types.getTypeParser(tableSpec.columns[col].typeOID)),
40
40
  flushThresholdBytes
41
41
  );
@@ -51,21 +51,34 @@ async function* streamBackfill(lc, upstreamURI, { slot, publications }, bf, opts
51
51
  void db.end().catch((e) => lc.warn?.(`error closing backfill connection`, e));
52
52
  }
53
53
  }
54
- async function* stream(lc, tx, backfill, selectStmt, colParsers, flushThresholdBytes) {
54
+ async function* stream(lc, tx, backfill, { select, getTotalRows, getTotalBytes }, colParsers, flushThresholdBytes) {
55
55
  const start = performance.now();
56
- lc.info?.(`Starting backfill copy stream:`, selectStmt);
56
+ const [rows, bytes] = await tx.processReadTask(
57
+ (sql) => Promise.all([
58
+ sql.unsafe(getTotalRows),
59
+ sql.unsafe(getTotalBytes)
60
+ ])
61
+ );
62
+ const status = {
63
+ rows: 0,
64
+ totalRows: Number(rows[0].totalRows),
65
+ totalBytes: Number(bytes[0].totalBytes)
66
+ };
67
+ let elapsed = (performance.now() - start).toFixed(3);
68
+ lc.info?.(`Computed total rows and bytes for: ${select} (${elapsed} ms)`, {
69
+ status
70
+ });
57
71
  const copyStream = await tx.processReadTask(
58
- (sql) => sql.unsafe(`COPY (${selectStmt}) TO STDOUT`).readable()
72
+ (sql) => sql.unsafe(`COPY (${select}) TO STDOUT`).readable()
59
73
  );
60
74
  const tsvParser = new TsvParser();
61
- let totalRows = 0;
62
75
  let totalBytes = 0;
63
76
  let totalMsgs = 0;
64
77
  let rowValues = [];
65
78
  let bufferedBytes = 0;
66
79
  const logFlushed = () => {
67
80
  lc.debug?.(
68
- `Flushed ${rowValues.length} rows, ${bufferedBytes} bytes (total: rows=${totalRows}, msgs=${totalMsgs}, bytes=${totalBytes})`
81
+ `Flushed ${rowValues.length} rows, ${bufferedBytes} bytes (total: rows=${status.rows}, msgs=${totalMsgs}, bytes=${totalBytes})`
69
82
  );
70
83
  };
71
84
  let row = Array.from({ length: colParsers.length });
@@ -76,7 +89,7 @@ async function* stream(lc, tx, backfill, selectStmt, colParsers, flushThresholdB
76
89
  row[col] = text === null ? null : colParsers[col](text);
77
90
  if (++col === colParsers.length) {
78
91
  rowValues.push(row);
79
- totalRows++;
92
+ status.rows++;
80
93
  row = Array.from({ length: colParsers.length });
81
94
  col = 0;
82
95
  }
@@ -84,7 +97,7 @@ async function* stream(lc, tx, backfill, selectStmt, colParsers, flushThresholdB
84
97
  bufferedBytes += chunk.byteLength;
85
98
  totalBytes += chunk.byteLength;
86
99
  if (bufferedBytes >= flushThresholdBytes) {
87
- yield { tag: "backfill", ...backfill, rowValues };
100
+ yield { tag: "backfill", ...backfill, rowValues, status };
88
101
  totalMsgs++;
89
102
  logFlushed();
90
103
  rowValues = [];
@@ -92,14 +105,14 @@ async function* stream(lc, tx, backfill, selectStmt, colParsers, flushThresholdB
92
105
  }
93
106
  }
94
107
  if (rowValues.length > 0) {
95
- yield { tag: "backfill", ...backfill, rowValues };
108
+ yield { tag: "backfill", ...backfill, rowValues, status };
96
109
  totalMsgs++;
97
110
  logFlushed();
98
111
  }
99
- yield { tag: "backfill-completed", ...backfill };
100
- const elapsed = performance.now() - start;
112
+ yield { tag: "backfill-completed", ...backfill, status };
113
+ elapsed = (performance.now() - start).toFixed(3);
101
114
  lc.info?.(
102
- `Finished streaming ${totalRows} rows, ${totalMsgs} msgs, ${totalBytes} bytes (${elapsed.toFixed(3)} ms)`
115
+ `Finished streaming ${status.rows} rows, ${totalMsgs} msgs, ${totalBytes} bytes (${elapsed} ms)`
103
116
  );
104
117
  }
105
118
  async function setSnapshot(lc, upstreamURI, tx, slotNamePrefix) {
@@ -1 +1 @@
1
- {"version":3,"file":"backfill-stream.js","sources":["../../../../../../../zero-cache/src/services/change-source/pg/backfill-stream.ts"],"sourcesContent":["import {\n PG_UNDEFINED_COLUMN,\n PG_UNDEFINED_TABLE,\n} from '@drdgvhbh/postgres-error-codes';\nimport type {LogContext} from '@rocicorp/logger';\nimport postgres from 'postgres';\nimport {equals} from '../../../../../shared/src/set-utils.ts';\nimport * as v from '../../../../../shared/src/valita.ts';\nimport {READONLY} from '../../../db/mode-enum.ts';\nimport {TsvParser} from '../../../db/pg-copy.ts';\nimport {getTypeParsers, type TypeParser} from '../../../db/pg-type-parser.ts';\nimport type {PublishedTableSpec} from '../../../db/specs.ts';\nimport {TransactionPool} from '../../../db/transaction-pool.ts';\nimport {pgClient} from '../../../types/pg.ts';\nimport {SchemaIncompatibilityError} from '../common/backfill-manager.ts';\nimport type {\n BackfillCompleted,\n BackfillRequest,\n JSONValue,\n MessageBackfill,\n} from '../protocol/current.ts';\nimport {\n columnMetadataSchema,\n tableMetadataSchema,\n} from './backfill-metadata.ts';\nimport {\n createReplicationSlot,\n makeSelectPublishedStmt,\n} from './initial-sync.ts';\nimport {toStateVersionString} from './lsn.ts';\nimport {getPublicationInfo} from './schema/published.ts';\nimport type {Replica} from './schema/shard.ts';\n\ntype BackfillParams = Omit<BackfillCompleted, 'tag'>;\n\ntype StreamOptions = {\n /**\n * The number of bytes at which to flush a batch of rows in a\n * backfill message. Defaults to Node's getDefaultHighWatermark().\n */\n flushThresholdBytes?: number;\n};\n\n// The size of chunks that Postgres sends on COPY stream.\n// This happens to match NodeJS's getDefaultHighWatermark()\n// (for Node v20+).\nconst POSTGRES_COPY_CHUNK_SIZE = 64 * 1024;\n\n/**\n * Streams a series of `backfill` messages (ending with `backfill-complete`)\n * at a set watermark (i.e. LSN). The data is retrieved via a COPY stream\n * made at a transaction snapshot corresponding to specific LSN, obtained by\n * creating a short-lived replication slot.\n */\nexport async function* streamBackfill(\n lc: LogContext,\n upstreamURI: string,\n {slot, publications}: Pick<Replica, 'slot' | 'publications'>,\n bf: BackfillRequest,\n opts: StreamOptions = {},\n): AsyncGenerator<MessageBackfill | BackfillCompleted> {\n lc = lc\n .withContext('component', 'backfill')\n .withContext('table', bf.table.name);\n\n const {flushThresholdBytes = POSTGRES_COPY_CHUNK_SIZE} = opts;\n const db = pgClient(lc, upstreamURI, {\n connection: {['application_name']: 'backfill-stream'},\n });\n const tx = new TransactionPool(lc, READONLY).run(db);\n try {\n const watermark = await setSnapshot(lc, upstreamURI, tx, slot);\n const {tableSpec, backfill} = await validateSchema(\n tx,\n publications,\n bf,\n watermark,\n );\n const types = await getTypeParsers(db, {returnJsonAsString: true});\n\n // Note: validateSchema ensures that the rowKey and columns are disjoint\n const {relation, columns} = backfill;\n const cols = [...relation.rowKey.columns, ...columns];\n\n yield* stream(\n lc,\n tx,\n backfill,\n makeSelectPublishedStmt(tableSpec, cols),\n cols.map(col => types.getTypeParser(tableSpec.columns[col].typeOID)),\n flushThresholdBytes,\n );\n } catch (e) {\n // Although we make the best effort to validate the schema at the\n // transaction snapshot, certain forms of `ALTER TABLE` are not\n // MVCC safe and not \"frozen\" in the snapshot:\n //\n // https://www.postgresql.org/docs/current/mvcc-caveats.html\n //\n // Handle these errors as schema incompatibility errors rather than\n // unknown runtime errors.\n if (\n e instanceof postgres.PostgresError &&\n (e.code === PG_UNDEFINED_TABLE || e.code === PG_UNDEFINED_COLUMN)\n ) {\n throw new SchemaIncompatibilityError(bf, String(e), {cause: e});\n }\n throw e;\n } finally {\n tx.setDone();\n // errors are already thrown and handled from processReadTask()\n void tx.done().catch(() => {});\n // Workaround postgres.js hanging at the end of some COPY commands:\n // https://github.com/porsager/postgres/issues/499\n void db.end().catch(e => lc.warn?.(`error closing backfill connection`, e));\n }\n}\n\nasync function* stream(\n lc: LogContext,\n tx: TransactionPool,\n backfill: BackfillParams,\n selectStmt: string,\n colParsers: TypeParser[],\n flushThresholdBytes: number,\n): AsyncGenerator<MessageBackfill | BackfillCompleted> {\n const start = performance.now();\n lc.info?.(`Starting backfill copy stream:`, selectStmt);\n const copyStream = await tx.processReadTask(sql =>\n sql.unsafe(`COPY (${selectStmt}) TO STDOUT`).readable(),\n );\n\n const tsvParser = new TsvParser();\n let totalRows = 0;\n let totalBytes = 0;\n let totalMsgs = 0;\n let rowValues: JSONValue[][] = [];\n let bufferedBytes = 0;\n\n const logFlushed = () => {\n lc.debug?.(\n `Flushed ${rowValues.length} rows, ${bufferedBytes} bytes ` +\n `(total: rows=${totalRows}, msgs=${totalMsgs}, bytes=${totalBytes})`,\n );\n };\n\n // Tracks the row being parsed.\n let row: JSONValue[] = Array.from({length: colParsers.length});\n let col = 0;\n\n for await (const data of copyStream) {\n const chunk = data as Buffer;\n for (const text of tsvParser.parse(chunk)) {\n row[col] = text === null ? null : (colParsers[col](text) as JSONValue);\n\n if (++col === colParsers.length) {\n rowValues.push(row);\n totalRows++;\n row = Array.from({length: colParsers.length});\n col = 0;\n }\n }\n bufferedBytes += chunk.byteLength;\n totalBytes += chunk.byteLength;\n\n if (bufferedBytes >= flushThresholdBytes) {\n yield {tag: 'backfill', ...backfill, rowValues};\n totalMsgs++;\n logFlushed();\n rowValues = [];\n bufferedBytes = 0;\n }\n }\n\n // Flush the last batch of rows.\n if (rowValues.length > 0) {\n yield {tag: 'backfill', ...backfill, rowValues};\n totalMsgs++;\n logFlushed();\n }\n\n yield {tag: 'backfill-completed', ...backfill};\n const elapsed = performance.now() - start;\n lc.info?.(\n `Finished streaming ${totalRows} rows, ${totalMsgs} msgs, ${totalBytes} bytes ` +\n `(${elapsed.toFixed(3)} ms)`,\n );\n}\n\n/**\n * Creates (and drops) a replication slot in order to obtain a snapshot\n * that corresponds with a specific LSN. Sets the snapshot on the\n * TransactionPool and returns the watermark corresponding to the LSN.\n *\n * (Note that PG's other LSN-related functions are not scoped to a\n * transaction; this is the only way to get set a transaction at a specific\n * LSN.)\n */\nasync function setSnapshot(\n lc: LogContext,\n upstreamURI: string,\n tx: TransactionPool,\n slotNamePrefix: string,\n) {\n const replicationSession = pgClient(lc, upstreamURI, {\n ['fetch_types']: false, // Necessary for the streaming protocol\n connection: {replication: 'database'}, // https://www.postgresql.org/docs/current/protocol-replication.html\n });\n const tempSlot = `${slotNamePrefix}_bf_${Date.now()}`;\n try {\n const {snapshot_name: snapshot, consistent_point: lsn} =\n await createReplicationSlot(lc, replicationSession, tempSlot);\n\n await tx.processReadTask(sql =>\n sql.unsafe(`SET TRANSACTION SNAPSHOT '${snapshot}'`),\n );\n // Once the snapshot has been set, the replication session and slot can\n // be closed / dropped.\n await replicationSession.unsafe(`DROP_REPLICATION_SLOT \"${tempSlot}\"`);\n\n const watermark = toStateVersionString(lsn);\n lc.info?.(`Opened snapshot transaction at LSN ${lsn} (${watermark})`);\n return watermark;\n } catch (e) {\n // In the event of a failure, clean up the replication slot if created.\n await replicationSession.unsafe(\n /*sql*/\n `SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots\n WHERE slot_name = '${tempSlot}'`,\n );\n lc.error?.(`Failed to create backfill snapshot`, e);\n throw e;\n } finally {\n await replicationSession.end();\n }\n}\n\nfunction validateSchema(\n tx: TransactionPool,\n publications: string[],\n bf: BackfillRequest,\n watermark: string,\n): Promise<{\n tableSpec: PublishedTableSpec;\n backfill: BackfillParams;\n}> {\n return tx.processReadTask(async sql => {\n const {tables} = await getPublicationInfo(sql, publications);\n const spec = tables.find(\n spec => spec.schema === bf.table.schema && spec.name === bf.table.name,\n );\n if (!spec) {\n throw new SchemaIncompatibilityError(\n bf,\n `Table has been renamed or dropped`,\n );\n }\n const tableMeta = v.parse(bf.table.metadata, tableMetadataSchema);\n if (spec.schemaOID !== tableMeta.schemaOID) {\n throw new SchemaIncompatibilityError(\n bf,\n `Schema no longer corresponds to the original schema`,\n );\n }\n if (spec.oid !== tableMeta.relationOID) {\n throw new SchemaIncompatibilityError(\n bf,\n `Table no longer corresponds to the original table`,\n );\n }\n if (\n !equals(\n new Set(Object.keys(tableMeta.rowKey)),\n new Set(spec.replicaIdentityColumns),\n )\n ) {\n throw new SchemaIncompatibilityError(\n bf,\n 'Row key (e.g. PRIMARY KEY or INDEX) has changed',\n );\n }\n const allCols = [\n ...Object.entries(tableMeta.rowKey),\n ...Object.entries(bf.columns),\n ];\n for (const [col, val] of allCols) {\n const colSpec = spec.columns[col];\n if (!colSpec) {\n throw new SchemaIncompatibilityError(\n bf,\n `Column ${col} has been renamed or dropped`,\n );\n }\n const colMeta = v.parse(val, columnMetadataSchema);\n if (colMeta.attNum !== colSpec.pos) {\n throw new SchemaIncompatibilityError(\n bf,\n `Column ${col} no longer corresponds to the original column`,\n );\n }\n }\n const backfill: BackfillParams = {\n relation: {\n schema: bf.table.schema,\n name: bf.table.name,\n rowKey: {columns: Object.keys(tableMeta.rowKey)},\n },\n columns: Object.keys(bf.columns).filter(\n col => !(col in tableMeta.rowKey),\n ),\n watermark,\n };\n return {tableSpec: spec, backfill};\n });\n}\n"],"names":["spec","v.parse"],"mappings":";;;;;;;;;;;;;;AA8CA,MAAM,2BAA2B,KAAK;AAQtC,gBAAuB,eACrB,IACA,aACA,EAAC,MAAM,gBACP,IACA,OAAsB,IAC+B;AACrD,OAAK,GACF,YAAY,aAAa,UAAU,EACnC,YAAY,SAAS,GAAG,MAAM,IAAI;AAErC,QAAM,EAAC,sBAAsB,yBAAA,IAA4B;AACzD,QAAM,KAAK,SAAS,IAAI,aAAa;AAAA,IACnC,YAAY,EAAC,CAAC,kBAAkB,GAAG,kBAAA;AAAA,EAAiB,CACrD;AACD,QAAM,KAAK,IAAI,gBAAgB,IAAI,QAAQ,EAAE,IAAI,EAAE;AACnD,MAAI;AACF,UAAM,YAAY,MAAM,YAAY,IAAI,aAAa,IAAI,IAAI;AAC7D,UAAM,EAAC,WAAW,SAAA,IAAY,MAAM;AAAA,MAClC;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IAAA;AAEF,UAAM,QAAQ,MAAM,eAAe,IAAI,EAAC,oBAAoB,MAAK;AAGjE,UAAM,EAAC,UAAU,QAAA,IAAW;AAC5B,UAAM,OAAO,CAAC,GAAG,SAAS,OAAO,SAAS,GAAG,OAAO;AAEpD,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA,wBAAwB,WAAW,IAAI;AAAA,MACvC,KAAK,IAAI,CAAA,QAAO,MAAM,cAAc,UAAU,QAAQ,GAAG,EAAE,OAAO,CAAC;AAAA,MACnE;AAAA,IAAA;AAAA,EAEJ,SAAS,GAAG;AASV,QACE,aAAa,SAAS,kBACrB,EAAE,SAAS,sBAAsB,EAAE,SAAS,sBAC7C;AACA,YAAM,IAAI,2BAA2B,IAAI,OAAO,CAAC,GAAG,EAAC,OAAO,GAAE;AAAA,IAChE;AACA,UAAM;AAAA,EACR,UAAA;AACE,OAAG,QAAA;AAEH,SAAK,GAAG,OAAO,MAAM,MAAM;AAAA,IAAC,CAAC;AAG7B,SAAK,GAAG,MAAM,MAAM,OAAK,GAAG,OAAO,qCAAqC,CAAC,CAAC;AAAA,EAC5E;AACF;AAEA,gBAAgB,OACd,IACA,IACA,UACA,YACA,YACA,qBACqD;AACrD,QAAM,QAAQ,YAAY,IAAA;AAC1B,KAAG,OAAO,kCAAkC,UAAU;AACtD,QAAM,aAAa,MAAM,GAAG;AAAA,IAAgB,SAC1C,IAAI,OAAO,SAAS,UAAU,aAAa,EAAE,SAAA;AAAA,EAAS;AAGxD,QAAM,YAAY,IAAI,UAAA;AACtB,MAAI,YAAY;AAChB,MAAI,aAAa;AACjB,MAAI,YAAY;AAChB,MAAI,YAA2B,CAAA;AAC/B,MAAI,gBAAgB;AAEpB,QAAM,aAAa,MAAM;AACvB,OAAG;AAAA,MACD,WAAW,UAAU,MAAM,UAAU,aAAa,uBAChC,SAAS,UAAU,SAAS,WAAW,UAAU;AAAA,IAAA;AAAA,EAEvE;AAGA,MAAI,MAAmB,MAAM,KAAK,EAAC,QAAQ,WAAW,QAAO;AAC7D,MAAI,MAAM;AAEV,mBAAiB,QAAQ,YAAY;AACnC,UAAM,QAAQ;AACd,eAAW,QAAQ,UAAU,MAAM,KAAK,GAAG;AACzC,UAAI,GAAG,IAAI,SAAS,OAAO,OAAQ,WAAW,GAAG,EAAE,IAAI;AAEvD,UAAI,EAAE,QAAQ,WAAW,QAAQ;AAC/B,kBAAU,KAAK,GAAG;AAClB;AACA,cAAM,MAAM,KAAK,EAAC,QAAQ,WAAW,QAAO;AAC5C,cAAM;AAAA,MACR;AAAA,IACF;AACA,qBAAiB,MAAM;AACvB,kBAAc,MAAM;AAEpB,QAAI,iBAAiB,qBAAqB;AACxC,YAAM,EAAC,KAAK,YAAY,GAAG,UAAU,UAAA;AACrC;AACA,iBAAA;AACA,kBAAY,CAAA;AACZ,sBAAgB;AAAA,IAClB;AAAA,EACF;AAGA,MAAI,UAAU,SAAS,GAAG;AACxB,UAAM,EAAC,KAAK,YAAY,GAAG,UAAU,UAAA;AACrC;AACA,eAAA;AAAA,EACF;AAEA,QAAM,EAAC,KAAK,sBAAsB,GAAG,SAAA;AACrC,QAAM,UAAU,YAAY,IAAA,IAAQ;AACpC,KAAG;AAAA,IACD,sBAAsB,SAAS,UAAU,SAAS,UAAU,UAAU,WAChE,QAAQ,QAAQ,CAAC,CAAC;AAAA,EAAA;AAE5B;AAWA,eAAe,YACb,IACA,aACA,IACA,gBACA;AACA,QAAM,qBAAqB,SAAS,IAAI,aAAa;AAAA,IACnD,CAAC,aAAa,GAAG;AAAA;AAAA,IACjB,YAAY,EAAC,aAAa,WAAA;AAAA;AAAA,EAAU,CACrC;AACD,QAAM,WAAW,GAAG,cAAc,OAAO,KAAK,KAAK;AACnD,MAAI;AACF,UAAM,EAAC,eAAe,UAAU,kBAAkB,QAChD,MAAM,sBAAsB,IAAI,oBAAoB,QAAQ;AAE9D,UAAM,GAAG;AAAA,MAAgB,CAAA,QACvB,IAAI,OAAO,6BAA6B,QAAQ,GAAG;AAAA,IAAA;AAIrD,UAAM,mBAAmB,OAAO,0BAA0B,QAAQ,GAAG;AAErE,UAAM,YAAY,qBAAqB,GAAG;AAC1C,OAAG,OAAO,sCAAsC,GAAG,KAAK,SAAS,GAAG;AACpE,WAAO;AAAA,EACT,SAAS,GAAG;AAEV,UAAM,mBAAmB;AAAA;AAAA,MAEvB;AAAA,8BACwB,QAAQ;AAAA,IAAA;AAElC,OAAG,QAAQ,sCAAsC,CAAC;AAClD,UAAM;AAAA,EACR,UAAA;AACE,UAAM,mBAAmB,IAAA;AAAA,EAC3B;AACF;AAEA,SAAS,eACP,IACA,cACA,IACA,WAIC;AACD,SAAO,GAAG,gBAAgB,OAAM,QAAO;AACrC,UAAM,EAAC,OAAA,IAAU,MAAM,mBAAmB,KAAK,YAAY;AAC3D,UAAM,OAAO,OAAO;AAAA,MAClB,CAAAA,UAAQA,MAAK,WAAW,GAAG,MAAM,UAAUA,MAAK,SAAS,GAAG,MAAM;AAAA,IAAA;AAEpE,QAAI,CAAC,MAAM;AACT,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,MAAA;AAAA,IAEJ;AACA,UAAM,YAAYC,MAAQ,GAAG,MAAM,UAAU,mBAAmB;AAChE,QAAI,KAAK,cAAc,UAAU,WAAW;AAC1C,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,MAAA;AAAA,IAEJ;AACA,QAAI,KAAK,QAAQ,UAAU,aAAa;AACtC,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,MAAA;AAAA,IAEJ;AACA,QACE,CAAC;AAAA,MACC,IAAI,IAAI,OAAO,KAAK,UAAU,MAAM,CAAC;AAAA,MACrC,IAAI,IAAI,KAAK,sBAAsB;AAAA,IAAA,GAErC;AACA,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,MAAA;AAAA,IAEJ;AACA,UAAM,UAAU;AAAA,MACd,GAAG,OAAO,QAAQ,UAAU,MAAM;AAAA,MAClC,GAAG,OAAO,QAAQ,GAAG,OAAO;AAAA,IAAA;AAE9B,eAAW,CAAC,KAAK,GAAG,KAAK,SAAS;AAChC,YAAM,UAAU,KAAK,QAAQ,GAAG;AAChC,UAAI,CAAC,SAAS;AACZ,cAAM,IAAI;AAAA,UACR;AAAA,UACA,UAAU,GAAG;AAAA,QAAA;AAAA,MAEjB;AACA,YAAM,UAAUA,MAAQ,KAAK,oBAAoB;AACjD,UAAI,QAAQ,WAAW,QAAQ,KAAK;AAClC,cAAM,IAAI;AAAA,UACR;AAAA,UACA,UAAU,GAAG;AAAA,QAAA;AAAA,MAEjB;AAAA,IACF;AACA,UAAM,WAA2B;AAAA,MAC/B,UAAU;AAAA,QACR,QAAQ,GAAG,MAAM;AAAA,QACjB,MAAM,GAAG,MAAM;AAAA,QACf,QAAQ,EAAC,SAAS,OAAO,KAAK,UAAU,MAAM,EAAA;AAAA,MAAC;AAAA,MAEjD,SAAS,OAAO,KAAK,GAAG,OAAO,EAAE;AAAA,QAC/B,CAAA,QAAO,EAAE,OAAO,UAAU;AAAA,MAAA;AAAA,MAE5B;AAAA,IAAA;AAEF,WAAO,EAAC,WAAW,MAAM,SAAA;AAAA,EAC3B,CAAC;AACH;"}
1
+ {"version":3,"file":"backfill-stream.js","sources":["../../../../../../../zero-cache/src/services/change-source/pg/backfill-stream.ts"],"sourcesContent":["import {\n PG_UNDEFINED_COLUMN,\n PG_UNDEFINED_TABLE,\n} from '@drdgvhbh/postgres-error-codes';\nimport type {LogContext} from '@rocicorp/logger';\nimport postgres from 'postgres';\nimport {equals} from '../../../../../shared/src/set-utils.ts';\nimport * as v from '../../../../../shared/src/valita.ts';\nimport {READONLY} from '../../../db/mode-enum.ts';\nimport {TsvParser} from '../../../db/pg-copy.ts';\nimport {getTypeParsers, type TypeParser} from '../../../db/pg-type-parser.ts';\nimport type {PublishedTableSpec} from '../../../db/specs.ts';\nimport {TransactionPool} from '../../../db/transaction-pool.ts';\nimport {pgClient} from '../../../types/pg.ts';\nimport {SchemaIncompatibilityError} from '../common/backfill-manager.ts';\nimport type {\n BackfillCompleted,\n BackfillRequest,\n DownloadStatus,\n JSONValue,\n MessageBackfill,\n} from '../protocol/current.ts';\nimport {\n columnMetadataSchema,\n tableMetadataSchema,\n} from './backfill-metadata.ts';\nimport {\n createReplicationSlot,\n makeDownloadStatements,\n type DownloadStatements,\n} from './initial-sync.ts';\nimport {toStateVersionString} from './lsn.ts';\nimport {getPublicationInfo} from './schema/published.ts';\nimport type {Replica} from './schema/shard.ts';\n\ntype BackfillParams = Omit<BackfillCompleted, 'tag'>;\n\ntype StreamOptions = {\n /**\n * The number of bytes at which to flush a batch of rows in a\n * backfill message. Defaults to Node's getDefaultHighWatermark().\n */\n flushThresholdBytes?: number;\n};\n\n// The size of chunks that Postgres sends on COPY stream.\n// This happens to match NodeJS's getDefaultHighWatermark()\n// (for Node v20+).\nconst POSTGRES_COPY_CHUNK_SIZE = 64 * 1024;\n\n/**\n * Streams a series of `backfill` messages (ending with `backfill-complete`)\n * at a set watermark (i.e. LSN). The data is retrieved via a COPY stream\n * made at a transaction snapshot corresponding to specific LSN, obtained by\n * creating a short-lived replication slot.\n */\nexport async function* streamBackfill(\n lc: LogContext,\n upstreamURI: string,\n {slot, publications}: Pick<Replica, 'slot' | 'publications'>,\n bf: BackfillRequest,\n opts: StreamOptions = {},\n): AsyncGenerator<MessageBackfill | BackfillCompleted> {\n lc = lc\n .withContext('component', 'backfill')\n .withContext('table', bf.table.name);\n\n const {flushThresholdBytes = POSTGRES_COPY_CHUNK_SIZE} = opts;\n const db = pgClient(lc, upstreamURI, {\n connection: {['application_name']: 'backfill-stream'},\n });\n const tx = new TransactionPool(lc, READONLY).run(db);\n try {\n const watermark = await setSnapshot(lc, upstreamURI, tx, slot);\n const {tableSpec, backfill} = await validateSchema(\n tx,\n publications,\n bf,\n watermark,\n );\n const types = await getTypeParsers(db, {returnJsonAsString: true});\n\n // Note: validateSchema ensures that the rowKey and columns are disjoint\n const {relation, columns} = backfill;\n const cols = [...relation.rowKey.columns, ...columns];\n\n yield* stream(\n lc,\n tx,\n backfill,\n makeDownloadStatements(tableSpec, cols),\n cols.map(col => types.getTypeParser(tableSpec.columns[col].typeOID)),\n flushThresholdBytes,\n );\n } catch (e) {\n // Although we make the best effort to validate the schema at the\n // transaction snapshot, certain forms of `ALTER TABLE` are not\n // MVCC safe and not \"frozen\" in the snapshot:\n //\n // https://www.postgresql.org/docs/current/mvcc-caveats.html\n //\n // Handle these errors as schema incompatibility errors rather than\n // unknown runtime errors.\n if (\n e instanceof postgres.PostgresError &&\n (e.code === PG_UNDEFINED_TABLE || e.code === PG_UNDEFINED_COLUMN)\n ) {\n throw new SchemaIncompatibilityError(bf, String(e), {cause: e});\n }\n throw e;\n } finally {\n tx.setDone();\n // errors are already thrown and handled from processReadTask()\n void tx.done().catch(() => {});\n // Workaround postgres.js hanging at the end of some COPY commands:\n // https://github.com/porsager/postgres/issues/499\n void db.end().catch(e => lc.warn?.(`error closing backfill connection`, e));\n }\n}\n\nasync function* stream(\n lc: LogContext,\n tx: TransactionPool,\n backfill: BackfillParams,\n {select, getTotalRows, getTotalBytes}: DownloadStatements,\n colParsers: TypeParser[],\n flushThresholdBytes: number,\n): AsyncGenerator<MessageBackfill | BackfillCompleted> {\n const start = performance.now();\n const [rows, bytes] = await tx.processReadTask(sql =>\n Promise.all([\n sql.unsafe<{totalRows: bigint}[]>(getTotalRows),\n sql.unsafe<{totalBytes: bigint}[]>(getTotalBytes),\n ]),\n );\n const status: DownloadStatus = {\n rows: 0,\n totalRows: Number(rows[0].totalRows),\n totalBytes: Number(bytes[0].totalBytes),\n };\n\n let elapsed = (performance.now() - start).toFixed(3);\n lc.info?.(`Computed total rows and bytes for: ${select} (${elapsed} ms)`, {\n status,\n });\n const copyStream = await tx.processReadTask(sql =>\n sql.unsafe(`COPY (${select}) TO STDOUT`).readable(),\n );\n\n const tsvParser = new TsvParser();\n let totalBytes = 0;\n let totalMsgs = 0;\n let rowValues: JSONValue[][] = [];\n let bufferedBytes = 0;\n\n const logFlushed = () => {\n lc.debug?.(\n `Flushed ${rowValues.length} rows, ${bufferedBytes} bytes ` +\n `(total: rows=${status.rows}, msgs=${totalMsgs}, bytes=${totalBytes})`,\n );\n };\n\n // Tracks the row being parsed.\n let row: JSONValue[] = Array.from({length: colParsers.length});\n let col = 0;\n\n for await (const data of copyStream) {\n const chunk = data as Buffer;\n for (const text of tsvParser.parse(chunk)) {\n row[col] = text === null ? null : (colParsers[col](text) as JSONValue);\n\n if (++col === colParsers.length) {\n rowValues.push(row);\n status.rows++;\n row = Array.from({length: colParsers.length});\n col = 0;\n }\n }\n bufferedBytes += chunk.byteLength;\n totalBytes += chunk.byteLength;\n\n if (bufferedBytes >= flushThresholdBytes) {\n yield {tag: 'backfill', ...backfill, rowValues, status};\n totalMsgs++;\n logFlushed();\n rowValues = [];\n bufferedBytes = 0;\n }\n }\n\n // Flush the last batch of rows.\n if (rowValues.length > 0) {\n yield {tag: 'backfill', ...backfill, rowValues, status};\n totalMsgs++;\n logFlushed();\n }\n\n yield {tag: 'backfill-completed', ...backfill, status};\n elapsed = (performance.now() - start).toFixed(3);\n lc.info?.(\n `Finished streaming ${status.rows} rows, ${totalMsgs} msgs, ${totalBytes} bytes ` +\n `(${elapsed} ms)`,\n );\n}\n\n/**\n * Creates (and drops) a replication slot in order to obtain a snapshot\n * that corresponds with a specific LSN. Sets the snapshot on the\n * TransactionPool and returns the watermark corresponding to the LSN.\n *\n * (Note that PG's other LSN-related functions are not scoped to a\n * transaction; this is the only way to get set a transaction at a specific\n * LSN.)\n */\nasync function setSnapshot(\n lc: LogContext,\n upstreamURI: string,\n tx: TransactionPool,\n slotNamePrefix: string,\n) {\n const replicationSession = pgClient(lc, upstreamURI, {\n ['fetch_types']: false, // Necessary for the streaming protocol\n connection: {replication: 'database'}, // https://www.postgresql.org/docs/current/protocol-replication.html\n });\n const tempSlot = `${slotNamePrefix}_bf_${Date.now()}`;\n try {\n const {snapshot_name: snapshot, consistent_point: lsn} =\n await createReplicationSlot(lc, replicationSession, tempSlot);\n\n await tx.processReadTask(sql =>\n sql.unsafe(`SET TRANSACTION SNAPSHOT '${snapshot}'`),\n );\n // Once the snapshot has been set, the replication session and slot can\n // be closed / dropped.\n await replicationSession.unsafe(`DROP_REPLICATION_SLOT \"${tempSlot}\"`);\n\n const watermark = toStateVersionString(lsn);\n lc.info?.(`Opened snapshot transaction at LSN ${lsn} (${watermark})`);\n return watermark;\n } catch (e) {\n // In the event of a failure, clean up the replication slot if created.\n await replicationSession.unsafe(\n /*sql*/\n `SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots\n WHERE slot_name = '${tempSlot}'`,\n );\n lc.error?.(`Failed to create backfill snapshot`, e);\n throw e;\n } finally {\n await replicationSession.end();\n }\n}\n\nfunction validateSchema(\n tx: TransactionPool,\n publications: string[],\n bf: BackfillRequest,\n watermark: string,\n): Promise<{\n tableSpec: PublishedTableSpec;\n backfill: BackfillParams;\n}> {\n return tx.processReadTask(async sql => {\n const {tables} = await getPublicationInfo(sql, publications);\n const spec = tables.find(\n spec => spec.schema === bf.table.schema && spec.name === bf.table.name,\n );\n if (!spec) {\n throw new SchemaIncompatibilityError(\n bf,\n `Table has been renamed or dropped`,\n );\n }\n const tableMeta = v.parse(bf.table.metadata, tableMetadataSchema);\n if (spec.schemaOID !== tableMeta.schemaOID) {\n throw new SchemaIncompatibilityError(\n bf,\n `Schema no longer corresponds to the original schema`,\n );\n }\n if (spec.oid !== tableMeta.relationOID) {\n throw new SchemaIncompatibilityError(\n bf,\n `Table no longer corresponds to the original table`,\n );\n }\n if (\n !equals(\n new Set(Object.keys(tableMeta.rowKey)),\n new Set(spec.replicaIdentityColumns),\n )\n ) {\n throw new SchemaIncompatibilityError(\n bf,\n 'Row key (e.g. PRIMARY KEY or INDEX) has changed',\n );\n }\n const allCols = [\n ...Object.entries(tableMeta.rowKey),\n ...Object.entries(bf.columns),\n ];\n for (const [col, val] of allCols) {\n const colSpec = spec.columns[col];\n if (!colSpec) {\n throw new SchemaIncompatibilityError(\n bf,\n `Column ${col} has been renamed or dropped`,\n );\n }\n const colMeta = v.parse(val, columnMetadataSchema);\n if (colMeta.attNum !== colSpec.pos) {\n throw new SchemaIncompatibilityError(\n bf,\n `Column ${col} no longer corresponds to the original column`,\n );\n }\n }\n const backfill: BackfillParams = {\n relation: {\n schema: bf.table.schema,\n name: bf.table.name,\n rowKey: {columns: Object.keys(tableMeta.rowKey)},\n },\n columns: Object.keys(bf.columns).filter(\n col => !(col in tableMeta.rowKey),\n ),\n watermark,\n };\n return {tableSpec: spec, backfill};\n });\n}\n"],"names":["spec","v.parse"],"mappings":";;;;;;;;;;;;;;AAgDA,MAAM,2BAA2B,KAAK;AAQtC,gBAAuB,eACrB,IACA,aACA,EAAC,MAAM,gBACP,IACA,OAAsB,IAC+B;AACrD,OAAK,GACF,YAAY,aAAa,UAAU,EACnC,YAAY,SAAS,GAAG,MAAM,IAAI;AAErC,QAAM,EAAC,sBAAsB,yBAAA,IAA4B;AACzD,QAAM,KAAK,SAAS,IAAI,aAAa;AAAA,IACnC,YAAY,EAAC,CAAC,kBAAkB,GAAG,kBAAA;AAAA,EAAiB,CACrD;AACD,QAAM,KAAK,IAAI,gBAAgB,IAAI,QAAQ,EAAE,IAAI,EAAE;AACnD,MAAI;AACF,UAAM,YAAY,MAAM,YAAY,IAAI,aAAa,IAAI,IAAI;AAC7D,UAAM,EAAC,WAAW,SAAA,IAAY,MAAM;AAAA,MAClC;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IAAA;AAEF,UAAM,QAAQ,MAAM,eAAe,IAAI,EAAC,oBAAoB,MAAK;AAGjE,UAAM,EAAC,UAAU,QAAA,IAAW;AAC5B,UAAM,OAAO,CAAC,GAAG,SAAS,OAAO,SAAS,GAAG,OAAO;AAEpD,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA,uBAAuB,WAAW,IAAI;AAAA,MACtC,KAAK,IAAI,CAAA,QAAO,MAAM,cAAc,UAAU,QAAQ,GAAG,EAAE,OAAO,CAAC;AAAA,MACnE;AAAA,IAAA;AAAA,EAEJ,SAAS,GAAG;AASV,QACE,aAAa,SAAS,kBACrB,EAAE,SAAS,sBAAsB,EAAE,SAAS,sBAC7C;AACA,YAAM,IAAI,2BAA2B,IAAI,OAAO,CAAC,GAAG,EAAC,OAAO,GAAE;AAAA,IAChE;AACA,UAAM;AAAA,EACR,UAAA;AACE,OAAG,QAAA;AAEH,SAAK,GAAG,OAAO,MAAM,MAAM;AAAA,IAAC,CAAC;AAG7B,SAAK,GAAG,MAAM,MAAM,OAAK,GAAG,OAAO,qCAAqC,CAAC,CAAC;AAAA,EAC5E;AACF;AAEA,gBAAgB,OACd,IACA,IACA,UACA,EAAC,QAAQ,cAAc,cAAA,GACvB,YACA,qBACqD;AACrD,QAAM,QAAQ,YAAY,IAAA;AAC1B,QAAM,CAAC,MAAM,KAAK,IAAI,MAAM,GAAG;AAAA,IAAgB,CAAA,QAC7C,QAAQ,IAAI;AAAA,MACV,IAAI,OAA8B,YAAY;AAAA,MAC9C,IAAI,OAA+B,aAAa;AAAA,IAAA,CACjD;AAAA,EAAA;AAEH,QAAM,SAAyB;AAAA,IAC7B,MAAM;AAAA,IACN,WAAW,OAAO,KAAK,CAAC,EAAE,SAAS;AAAA,IACnC,YAAY,OAAO,MAAM,CAAC,EAAE,UAAU;AAAA,EAAA;AAGxC,MAAI,WAAW,YAAY,IAAA,IAAQ,OAAO,QAAQ,CAAC;AACnD,KAAG,OAAO,sCAAsC,MAAM,KAAK,OAAO,QAAQ;AAAA,IACxE;AAAA,EAAA,CACD;AACD,QAAM,aAAa,MAAM,GAAG;AAAA,IAAgB,SAC1C,IAAI,OAAO,SAAS,MAAM,aAAa,EAAE,SAAA;AAAA,EAAS;AAGpD,QAAM,YAAY,IAAI,UAAA;AACtB,MAAI,aAAa;AACjB,MAAI,YAAY;AAChB,MAAI,YAA2B,CAAA;AAC/B,MAAI,gBAAgB;AAEpB,QAAM,aAAa,MAAM;AACvB,OAAG;AAAA,MACD,WAAW,UAAU,MAAM,UAAU,aAAa,uBAChC,OAAO,IAAI,UAAU,SAAS,WAAW,UAAU;AAAA,IAAA;AAAA,EAEzE;AAGA,MAAI,MAAmB,MAAM,KAAK,EAAC,QAAQ,WAAW,QAAO;AAC7D,MAAI,MAAM;AAEV,mBAAiB,QAAQ,YAAY;AACnC,UAAM,QAAQ;AACd,eAAW,QAAQ,UAAU,MAAM,KAAK,GAAG;AACzC,UAAI,GAAG,IAAI,SAAS,OAAO,OAAQ,WAAW,GAAG,EAAE,IAAI;AAEvD,UAAI,EAAE,QAAQ,WAAW,QAAQ;AAC/B,kBAAU,KAAK,GAAG;AAClB,eAAO;AACP,cAAM,MAAM,KAAK,EAAC,QAAQ,WAAW,QAAO;AAC5C,cAAM;AAAA,MACR;AAAA,IACF;AACA,qBAAiB,MAAM;AACvB,kBAAc,MAAM;AAEpB,QAAI,iBAAiB,qBAAqB;AACxC,YAAM,EAAC,KAAK,YAAY,GAAG,UAAU,WAAW,OAAA;AAChD;AACA,iBAAA;AACA,kBAAY,CAAA;AACZ,sBAAgB;AAAA,IAClB;AAAA,EACF;AAGA,MAAI,UAAU,SAAS,GAAG;AACxB,UAAM,EAAC,KAAK,YAAY,GAAG,UAAU,WAAW,OAAA;AAChD;AACA,eAAA;AAAA,EACF;AAEA,QAAM,EAAC,KAAK,sBAAsB,GAAG,UAAU,OAAA;AAC/C,aAAW,YAAY,IAAA,IAAQ,OAAO,QAAQ,CAAC;AAC/C,KAAG;AAAA,IACD,sBAAsB,OAAO,IAAI,UAAU,SAAS,UAAU,UAAU,WAClE,OAAO;AAAA,EAAA;AAEjB;AAWA,eAAe,YACb,IACA,aACA,IACA,gBACA;AACA,QAAM,qBAAqB,SAAS,IAAI,aAAa;AAAA,IACnD,CAAC,aAAa,GAAG;AAAA;AAAA,IACjB,YAAY,EAAC,aAAa,WAAA;AAAA;AAAA,EAAU,CACrC;AACD,QAAM,WAAW,GAAG,cAAc,OAAO,KAAK,KAAK;AACnD,MAAI;AACF,UAAM,EAAC,eAAe,UAAU,kBAAkB,QAChD,MAAM,sBAAsB,IAAI,oBAAoB,QAAQ;AAE9D,UAAM,GAAG;AAAA,MAAgB,CAAA,QACvB,IAAI,OAAO,6BAA6B,QAAQ,GAAG;AAAA,IAAA;AAIrD,UAAM,mBAAmB,OAAO,0BAA0B,QAAQ,GAAG;AAErE,UAAM,YAAY,qBAAqB,GAAG;AAC1C,OAAG,OAAO,sCAAsC,GAAG,KAAK,SAAS,GAAG;AACpE,WAAO;AAAA,EACT,SAAS,GAAG;AAEV,UAAM,mBAAmB;AAAA;AAAA,MAEvB;AAAA,8BACwB,QAAQ;AAAA,IAAA;AAElC,OAAG,QAAQ,sCAAsC,CAAC;AAClD,UAAM;AAAA,EACR,UAAA;AACE,UAAM,mBAAmB,IAAA;AAAA,EAC3B;AACF;AAEA,SAAS,eACP,IACA,cACA,IACA,WAIC;AACD,SAAO,GAAG,gBAAgB,OAAM,QAAO;AACrC,UAAM,EAAC,OAAA,IAAU,MAAM,mBAAmB,KAAK,YAAY;AAC3D,UAAM,OAAO,OAAO;AAAA,MAClB,CAAAA,UAAQA,MAAK,WAAW,GAAG,MAAM,UAAUA,MAAK,SAAS,GAAG,MAAM;AAAA,IAAA;AAEpE,QAAI,CAAC,MAAM;AACT,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,MAAA;AAAA,IAEJ;AACA,UAAM,YAAYC,MAAQ,GAAG,MAAM,UAAU,mBAAmB;AAChE,QAAI,KAAK,cAAc,UAAU,WAAW;AAC1C,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,MAAA;AAAA,IAEJ;AACA,QAAI,KAAK,QAAQ,UAAU,aAAa;AACtC,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,MAAA;AAAA,IAEJ;AACA,QACE,CAAC;AAAA,MACC,IAAI,IAAI,OAAO,KAAK,UAAU,MAAM,CAAC;AAAA,MACrC,IAAI,IAAI,KAAK,sBAAsB;AAAA,IAAA,GAErC;AACA,YAAM,IAAI;AAAA,QACR;AAAA,QACA;AAAA,MAAA;AAAA,IAEJ;AACA,UAAM,UAAU;AAAA,MACd,GAAG,OAAO,QAAQ,UAAU,MAAM;AAAA,MAClC,GAAG,OAAO,QAAQ,GAAG,OAAO;AAAA,IAAA;AAE9B,eAAW,CAAC,KAAK,GAAG,KAAK,SAAS;AAChC,YAAM,UAAU,KAAK,QAAQ,GAAG;AAChC,UAAI,CAAC,SAAS;AACZ,cAAM,IAAI;AAAA,UACR;AAAA,UACA,UAAU,GAAG;AAAA,QAAA;AAAA,MAEjB;AACA,YAAM,UAAUA,MAAQ,KAAK,oBAAoB;AACjD,UAAI,QAAQ,WAAW,QAAQ,KAAK;AAClC,cAAM,IAAI;AAAA,UACR;AAAA,UACA,UAAU,GAAG;AAAA,QAAA;AAAA,MAEjB;AAAA,IACF;AACA,UAAM,WAA2B;AAAA,MAC/B,UAAU;AAAA,QACR,QAAQ,GAAG,MAAM;AAAA,QACjB,MAAM,GAAG,MAAM;AAAA,QACf,QAAQ,EAAC,SAAS,OAAO,KAAK,UAAU,MAAM,EAAA;AAAA,MAAC;AAAA,MAEjD,SAAS,OAAO,KAAK,GAAG,OAAO,EAAE;AAAA,QAC/B,CAAA,QAAO,EAAE,OAAO,UAAU;AAAA,MAAA;AAAA,MAE5B;AAAA,IAAA;AAEF,WAAO,EAAC,WAAW,MAAM,SAAA;AAAA,EAC3B,CAAC;AACH;"}
@@ -19,6 +19,11 @@ type ReplicationSlot = {
19
19
  };
20
20
  export declare function createReplicationSlot(lc: LogContext, session: postgres.Sql, slotName: string): Promise<ReplicationSlot>;
21
21
  export declare const INSERT_BATCH_SIZE = 50;
22
- export declare function makeSelectPublishedStmt(table: PublishedTableSpec, columns: string[]): string;
22
+ export type DownloadStatements = {
23
+ select: string;
24
+ getTotalRows: string;
25
+ getTotalBytes: string;
26
+ };
27
+ export declare function makeDownloadStatements(table: PublishedTableSpec, cols: string[]): DownloadStatements;
23
28
  export {};
24
29
  //# sourceMappingURL=initial-sync.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"initial-sync.d.ts","sourceRoot":"","sources":["../../../../../../../zero-cache/src/services/change-source/pg/initial-sync.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAIjD,OAAO,QAAQ,MAAM,UAAU,CAAC;AAChC,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,0CAA0C,CAAC;AAGzE,OAAO,KAAK,EAAC,QAAQ,EAAC,MAAM,iCAAiC,CAAC;AAa9D,OAAO,KAAK,EAAY,kBAAkB,EAAC,MAAM,sBAAsB,CAAC;AAexE,OAAO,KAAK,EAAC,WAAW,EAAC,MAAM,0BAA0B,CAAC;AAkB1D,MAAM,MAAM,kBAAkB,GAAG;IAC/B,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACnC,CAAC;AAEF,4EAA4E;AAC5E,MAAM,MAAM,aAAa,GAAG,UAAU,CAAC;AAEvC,wBAAsB,WAAW,CAC/B,EAAE,EAAE,UAAU,EACd,KAAK,EAAE,WAAW,EAClB,EAAE,EAAE,QAAQ,EACZ,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,kBAAkB,EAC/B,OAAO,EAAE,aAAa,iBAyLvB;AAkGD,KAAK,eAAe,GAAG;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,CAAC;CACvB,CAAC;AAKF,wBAAsB,qBAAqB,CACzC,EAAE,EAAE,UAAU,EACd,OAAO,EAAE,QAAQ,CAAC,GAAG,EACrB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,eAAe,CAAC,CAQ1B;AA6BD,eAAO,MAAM,iBAAiB,KAAK,CAAC;AAMpC,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,kBAAkB,EACzB,OAAO,EAAE,MAAM,EAAE,UAYlB"}
1
+ {"version":3,"file":"initial-sync.d.ts","sourceRoot":"","sources":["../../../../../../../zero-cache/src/services/change-source/pg/initial-sync.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAIjD,OAAO,QAAQ,MAAM,UAAU,CAAC;AAChC,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,0CAA0C,CAAC;AAIzE,OAAO,KAAK,EAAC,QAAQ,EAAC,MAAM,iCAAiC,CAAC;AAa9D,OAAO,KAAK,EAAY,kBAAkB,EAAC,MAAM,sBAAsB,CAAC;AAexE,OAAO,KAAK,EAAC,WAAW,EAAC,MAAM,0BAA0B,CAAC;AAkB1D,MAAM,MAAM,kBAAkB,GAAG;IAC/B,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACnC,CAAC;AAEF,4EAA4E;AAC5E,MAAM,MAAM,aAAa,GAAG,UAAU,CAAC;AAEvC,wBAAsB,WAAW,CAC/B,EAAE,EAAE,UAAU,EACd,KAAK,EAAE,WAAW,EAClB,EAAE,EAAE,QAAQ,EACZ,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,kBAAkB,EAC/B,OAAO,EAAE,aAAa,iBAiMvB;AAkGD,KAAK,eAAe,GAAG;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,CAAC;CACvB,CAAC;AAKF,wBAAsB,qBAAqB,CACzC,EAAE,EAAE,UAAU,EACd,OAAO,EAAE,QAAQ,CAAC,GAAG,EACrB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,eAAe,CAAC,CAQ1B;AA6BD,eAAO,MAAM,iBAAiB,KAAK,CAAC;AAMpC,MAAM,MAAM,kBAAkB,GAAG;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;CACvB,CAAC;AAEF,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,kBAAkB,EACzB,IAAI,EAAE,MAAM,EAAE,GACb,kBAAkB,CAgBpB"}
@@ -119,15 +119,23 @@ async function initialSync(lc, shard, tx, upstreamURI, syncOptions, context) {
119
119
  );
120
120
  try {
121
121
  createLiteTables(tx, tables, initialVersion);
122
+ const downloads = await Promise.all(
123
+ tables.map(
124
+ (spec) => copiers.processReadTask(
125
+ (db, lc2) => getInitialDownloadState(lc2, db, spec)
126
+ )
127
+ )
128
+ );
122
129
  statusPublisher.publish(
123
130
  lc,
124
131
  "Initializing",
125
132
  `Copying ${numTables} upstream tables at version ${initialVersion}`,
126
- 5e3
133
+ 5e3,
134
+ () => ({ downloadStatus: downloads.map(({ status }) => status) })
127
135
  );
128
136
  void copyProfiler?.start();
129
137
  const rowCounts = await Promise.all(
130
- tables.map(
138
+ downloads.map(
131
139
  (table) => copiers.processReadTask(
132
140
  (db, lc2) => copy(lc2, table, copyPool, db, tx)
133
141
  )
@@ -285,20 +293,58 @@ const INSERT_BATCH_SIZE = 50;
285
293
  const MB = 1024 * 1024;
286
294
  const MAX_BUFFERED_ROWS = 1e4;
287
295
  const BUFFERED_SIZE_THRESHOLD = 8 * MB;
288
- function makeSelectPublishedStmt(table, columns) {
296
+ function makeDownloadStatements(table, cols) {
289
297
  const filterConditions = Object.values(table.publications).map(({ rowFilter }) => rowFilter).filter((f) => !!f);
290
- return (
298
+ const where = filterConditions.length === 0 ? "" : (
291
299
  /*sql*/
292
- `
293
- SELECT ${columns.map(id).join(",")} FROM ${id(table.schema)}.${id(table.name)}` + (filterConditions.length === 0 ? "" : (
294
- /*sql*/
295
- ` WHERE ${filterConditions.join(" OR ")}`
296
- ))
300
+ `WHERE ${filterConditions.join(" OR ")}`
297
301
  );
302
+ const fromTable = (
303
+ /*sql*/
304
+ `FROM ${id(table.schema)}.${id(table.name)} ${where}`
305
+ );
306
+ const totalBytes = `(${cols.map((col) => `SUM(COALESCE(pg_column_size(${id(col)}), 0))`).join(" + ")})`;
307
+ const stmts = {
308
+ select: (
309
+ /*sql*/
310
+ `SELECT ${cols.map(id).join(",")} ${fromTable}`
311
+ ),
312
+ getTotalRows: (
313
+ /*sql*/
314
+ `SELECT COUNT(*) AS "totalRows" ${fromTable}`
315
+ ),
316
+ getTotalBytes: (
317
+ /*sql*/
318
+ `SELECT ${totalBytes} AS "totalBytes" ${fromTable}`
319
+ )
320
+ };
321
+ return stmts;
322
+ }
323
+ async function getInitialDownloadState(lc, sql, spec) {
324
+ const start = performance.now();
325
+ const table = liteTableName(spec);
326
+ const columns = Object.keys(spec.columns);
327
+ const stmts = makeDownloadStatements(spec, columns);
328
+ const rowsResult = sql.unsafe(stmts.getTotalRows).execute();
329
+ const bytesResult = sql.unsafe(stmts.getTotalBytes).execute();
330
+ const state = {
331
+ spec,
332
+ status: {
333
+ table,
334
+ columns,
335
+ rows: 0,
336
+ totalRows: Number((await rowsResult)[0].totalRows),
337
+ totalBytes: Number((await bytesResult)[0].totalBytes)
338
+ }
339
+ };
340
+ const elapsed = (performance.now() - start).toFixed(3);
341
+ lc.info?.(`Computed initial download state for ${table} (${elapsed} ms)`, {
342
+ state: state.status
343
+ });
344
+ return state;
298
345
  }
299
- async function copy(lc, table, dbClient, from, to) {
346
+ async function copy(lc, { spec: table, status }, dbClient, from, to) {
300
347
  const start = performance.now();
301
- let rows = 0;
302
348
  let flushTime = 0;
303
349
  const tableName = liteTableName(table);
304
350
  const orderedColumns = Object.entries(table.columns);
@@ -315,7 +361,7 @@ async function copy(lc, table, dbClient, from, to) {
315
361
  const insertBatchStmt = to.prepare(
316
362
  insertSql + `,${valuesSql}`.repeat(INSERT_BATCH_SIZE - 1)
317
363
  );
318
- const selectStmt = makeSelectPublishedStmt(table, columnNames);
364
+ const { select } = makeDownloadStatements(table, columnNames);
319
365
  const valuesPerRow = columnSpecs.length;
320
366
  const valuesPerBatch = valuesPerRow * INSERT_BATCH_SIZE;
321
367
  const pendingValues = Array.from({
@@ -338,14 +384,14 @@ async function copy(lc, table, dbClient, from, to) {
338
384
  pendingValues[i] = void 0;
339
385
  }
340
386
  pendingSize = 0;
341
- rows += flushedRows;
387
+ status.rows += flushedRows;
342
388
  const elapsed2 = performance.now() - start2;
343
389
  flushTime += elapsed2;
344
390
  lc.debug?.(
345
391
  `flushed ${flushedRows} ${tableName} rows (${flushedSize} bytes) in ${elapsed2.toFixed(3)} ms`
346
392
  );
347
393
  }
348
- lc.info?.(`Starting copy stream of ${tableName}:`, selectStmt);
394
+ lc.info?.(`Starting copy stream of ${tableName}:`, select);
349
395
  const pgParsers = await getTypeParsers(dbClient, { returnJsonAsString: true });
350
396
  const parsers = columnSpecs.map((c) => {
351
397
  const pgParse = pgParsers.getTypeParser(c.typeOID);
@@ -358,7 +404,7 @@ async function copy(lc, table, dbClient, from, to) {
358
404
  const tsvParser = new TsvParser();
359
405
  let col = 0;
360
406
  await pipeline(
361
- await from.unsafe(`COPY (${selectStmt}) TO STDOUT`).readable(),
407
+ await from.unsafe(`COPY (${select}) TO STDOUT`).readable(),
362
408
  new Writable({
363
409
  highWaterMark: BUFFERED_SIZE_THRESHOLD,
364
410
  write(chunk, _encoding, callback) {
@@ -390,14 +436,14 @@ async function copy(lc, table, dbClient, from, to) {
390
436
  );
391
437
  const elapsed = performance.now() - start;
392
438
  lc.info?.(
393
- `Finished copying ${rows} rows into ${tableName} (flush: ${flushTime.toFixed(3)} ms) (total: ${elapsed.toFixed(3)} ms) `
439
+ `Finished copying ${status.rows} rows into ${tableName} (flush: ${flushTime.toFixed(3)} ms) (total: ${elapsed.toFixed(3)} ms) `
394
440
  );
395
- return { rows, flushTime };
441
+ return { rows: status.rows, flushTime };
396
442
  }
397
443
  export {
398
444
  INSERT_BATCH_SIZE,
399
445
  createReplicationSlot,
400
446
  initialSync,
401
- makeSelectPublishedStmt
447
+ makeDownloadStatements
402
448
  };
403
449
  //# sourceMappingURL=initial-sync.js.map