@firebaseextensions/firestore-bigquery-change-tracker 1.1.16 → 1.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.viewRequiresUpdate = exports.tableRequiresUpdate = void 0;
4
+ const partitioning_1 = require("./partitioning");
5
+ async function tableRequiresUpdate({ table, config, documentIdColExists, pathParamsColExists, }) {
6
+ /* Setup checks */
7
+ const { metadata } = table;
8
+ /** Check clustering */
9
+ const configCluster = JSON.stringify(config.clustering);
10
+ const tableCluster = JSON.stringify(metadata.clustering?.fields || []);
11
+ if (configCluster !== tableCluster)
12
+ return true;
13
+ /** Check wildcards */
14
+ if (!!config.wildcardIds !== pathParamsColExists)
15
+ return true;
16
+ /** Check document id column */
17
+ if (!documentIdColExists)
18
+ return true;
19
+ /** Check partitioning */
20
+ const partitioning = new partitioning_1.Partitioning(config, table);
21
+ const isValidPartition = await partitioning.isValidPartitionForExistingTable();
22
+ if (isValidPartition)
23
+ return true;
24
+ // No updates have occured.
25
+ return false;
26
+ }
27
+ exports.tableRequiresUpdate = tableRequiresUpdate;
28
+ function viewRequiresUpdate({ metadata, config, documentIdColExists, pathParamsColExists, }) {
29
+ /** Check if documentId column exists */
30
+ if (!documentIdColExists)
31
+ return true;
32
+ /** Check wildcards */
33
+ if (!!config.wildcardIds !== pathParamsColExists)
34
+ return true;
35
+ /** Check document id column */
36
+ if (!documentIdColExists)
37
+ return true;
38
+ /* Using the new query syntax for snapshots */
39
+ if (metadata) {
40
+ const query = metadata.view?.query || "";
41
+ const hasLegacyQuery = query.includes("FIRST_VALUE");
42
+ const { useNewSnapshotQuerySyntax } = config;
43
+ /** If enabled and has legacy query, can update */
44
+ if (useNewSnapshotQuerySyntax && hasLegacyQuery)
45
+ return true;
46
+ /** If not enabled and has an updated query, can update */
47
+ if (!useNewSnapshotQuerySyntax && !hasLegacyQuery)
48
+ return true;
49
+ }
50
+ // No updates have occured.
51
+ return false;
52
+ }
53
+ exports.viewRequiresUpdate = viewRequiresUpdate;
@@ -27,6 +27,7 @@ const tracker_1 = require("../tracker");
27
27
  const logs = require("../logs");
28
28
  const partitioning_1 = require("./partitioning");
29
29
  const clustering_1 = require("./clustering");
30
+ const checkUpdates_1 = require("./checkUpdates");
30
31
  var schema_2 = require("./schema");
31
32
  Object.defineProperty(exports, "RawChangelogSchema", { enumerable: true, get: function () { return schema_2.RawChangelogSchema; } });
32
33
  Object.defineProperty(exports, "RawChangelogViewSchema", { enumerable: true, get: function () { return schema_2.RawChangelogViewSchema; } });
@@ -64,6 +65,9 @@ class FirestoreBigQueryEventHistoryTracker {
64
65
  document_id: event.documentId,
65
66
  operation: tracker_1.ChangeType[event.operation],
66
67
  data: JSON.stringify(this.serializeData(event.data)),
68
+ old_data: event.oldData
69
+ ? JSON.stringify(this.serializeData(event.oldData))
70
+ : null,
67
71
  ...partitionValue,
68
72
  ...(this.config.wildcardIds &&
69
73
  event.pathParams && { path_params: JSON.stringify(pathParams) }),
@@ -258,7 +262,14 @@ class FirestoreBigQueryEventHistoryTracker {
258
262
  logs.addNewColumn(this.rawChangeLogTableName(), schema_1.documentPathParams.name);
259
263
  }
260
264
  await partitioning.addPartitioningToSchema(metadata.schema.fields);
261
- if (!documentIdColExists || !pathParamsColExists) {
265
+ /** Updated table metadata if required */
266
+ const shouldUpdate = await checkUpdates_1.tableRequiresUpdate({
267
+ table,
268
+ config: this.config,
269
+ documentIdColExists,
270
+ pathParamsColExists,
271
+ });
272
+ if (shouldUpdate) {
262
273
  await table.setMetadata(metadata);
263
274
  }
264
275
  }
@@ -293,7 +304,6 @@ class FirestoreBigQueryEventHistoryTracker {
293
304
  const view = dataset.table(this.rawLatestView());
294
305
  const [viewExists] = await view.exists();
295
306
  const schema = schema_1.RawChangelogViewSchema;
296
- const partitioning = new partitioning_1.Partitioning(this.config, view);
297
307
  if (viewExists) {
298
308
  logs.bigQueryViewAlreadyExists(view.id, dataset.id);
299
309
  const [metadata] = await view.getMetadata();
@@ -303,45 +313,41 @@ class FirestoreBigQueryEventHistoryTracker {
303
313
  }
304
314
  const documentIdColExists = fields.find((column) => column.name === "document_id");
305
315
  const pathParamsColExists = fields.find((column) => column.name === "path_params");
306
- if (!documentIdColExists) {
307
- metadata.view = snapshot_1.latestConsistentSnapshotView(this.config.datasetId, this.rawChangeLogTableName(), schema);
316
+ /** If new view or opt-in to new query syntax **/
317
+ const updateView = checkUpdates_1.viewRequiresUpdate({
318
+ metadata,
319
+ config: this.config,
320
+ documentIdColExists,
321
+ pathParamsColExists,
322
+ });
323
+ if (updateView) {
324
+ metadata.view = snapshot_1.latestConsistentSnapshotView({
325
+ datasetId: this.config.datasetId,
326
+ tableName: this.rawChangeLogTableName(),
327
+ schema,
328
+ useLegacyQuery: !this.config.useNewSnapshotQuerySyntax,
329
+ });
308
330
  logs.addNewColumn(this.rawLatestView(), schema_1.documentIdField.name);
331
+ await view.setMetadata(metadata);
309
332
  }
310
- if (!pathParamsColExists && this.config.wildcardIds) {
311
- metadata.view = snapshot_1.latestConsistentSnapshotView(this.config.datasetId, this.rawChangeLogTableName(), schema);
312
- logs.addNewColumn(this.rawLatestView(), schema_1.documentPathParams.name);
313
- }
314
- //Add partitioning
315
- await partitioning.addPartitioningToSchema(schema.fields);
316
- //TODO: Tidy up and format / add test cases?
317
- // if (
318
- // !documentIdColExists ||
319
- // (!pathParamsColExists && this.config.wildcardIds) ||
320
- // partition.isValidPartitionForExistingTable(partitionColExists)
321
- // ) {
322
- await view.setMetadata(metadata);
323
- // }
324
333
  }
325
334
  else {
326
335
  const schema = { fields: [...schema_1.RawChangelogViewSchema.fields] };
327
- //Add partitioning field
328
- await partitioning.addPartitioningToSchema(schema.fields);
329
- //TODO Create notification for a user that View cannot be Time Partitioned by the field.
330
- // await partitioning.updateTableMetadata(options);
331
336
  if (this.config.wildcardIds) {
332
337
  schema.fields.push(schema_1.documentPathParams);
333
338
  }
334
- const latestSnapshot = snapshot_1.latestConsistentSnapshotView(this.config.datasetId, this.rawChangeLogTableName(), schema, this.bq.projectId);
339
+ const latestSnapshot = snapshot_1.latestConsistentSnapshotView({
340
+ datasetId: this.config.datasetId,
341
+ tableName: this.rawChangeLogTableName(),
342
+ schema,
343
+ bqProjectId: this.bq.projectId,
344
+ useLegacyQuery: !this.config.useNewSnapshotQuerySyntax,
345
+ });
335
346
  logs.bigQueryViewCreating(this.rawLatestView(), latestSnapshot.query);
336
347
  const options = {
337
348
  friendlyName: this.rawLatestView(),
338
349
  view: latestSnapshot,
339
350
  };
340
- if (this.config.timePartitioning) {
341
- options.timePartitioning = {
342
- type: this.config.timePartitioning,
343
- };
344
- }
345
351
  try {
346
352
  await view.create(options);
347
353
  await view.setMetadata({ schema: schema_1.RawChangelogViewSchema });
@@ -56,7 +56,7 @@ class Partitioning {
56
56
  const hasNoCustomOptions = !timePartitioningField &&
57
57
  !timePartitioningFieldType &&
58
58
  !timePartitioningFirestoreField;
59
- /* No custom congig has been set, use partition value option only */
59
+ /* No custom config has been set, use partition value option only */
60
60
  if (hasNoCustomOptions)
61
61
  return true;
62
62
  /* check if all options have been provided to be */
@@ -107,8 +107,9 @@ class Partitioning {
107
107
  return schemaFields.some((column) => column.name === this.config.timePartitioningField);
108
108
  }
109
109
  async isValidPartitionForExistingTable() {
110
- if (this.isTablePartitioned())
111
- return false;
110
+ const isPartitioned = await this.isTablePartitioned();
111
+ if (isPartitioned)
112
+ return Promise.resolve(false);
112
113
  return this.hasValidCustomPartitionConfig();
113
114
  }
114
115
  isValidPartitionForNewTable() {
@@ -79,6 +79,12 @@ exports.RawChangelogViewSchema = {
79
79
  type: "STRING",
80
80
  description: "The full JSON representation of the current document state.",
81
81
  },
82
+ {
83
+ name: "old_data",
84
+ mode: "NULLABLE",
85
+ type: "STRING",
86
+ description: "The full JSON representation of the document state before the indicated operation is applied.",
87
+ },
82
88
  exports.documentIdField,
83
89
  ],
84
90
  };
@@ -114,6 +120,12 @@ exports.RawChangelogSchema = {
114
120
  type: "STRING",
115
121
  description: "The full JSON representation of the document state after the indicated operation is applied. This field will be null for DELETE operations.",
116
122
  },
123
+ {
124
+ name: "old_data",
125
+ mode: "NULLABLE",
126
+ type: "STRING",
127
+ description: "The full JSON representation of the document state before the indicated operation is applied. This field will be null for CREATE operations.",
128
+ },
117
129
  exports.documentIdField,
118
130
  ],
119
131
  };
@@ -19,49 +19,86 @@ exports.buildLatestSnapshotViewQuery = exports.latestConsistentSnapshotView = vo
19
19
  const sqlFormatter = require("sql-formatter");
20
20
  const schema_1 = require("./schema");
21
21
  const excludeFields = ["document_name", "document_id"];
22
- exports.latestConsistentSnapshotView = (datasetId, tableName, schema, bqProjectId) => ({
23
- query: buildLatestSnapshotViewQuery(datasetId, tableName, schema_1.timestampField.name, schema["fields"]
24
- .map((field) => field.name)
25
- .filter((name) => excludeFields.indexOf(name) === -1), bqProjectId),
22
+ exports.latestConsistentSnapshotView = ({ datasetId, tableName, schema, bqProjectId, useLegacyQuery = false, }) => ({
23
+ query: buildLatestSnapshotViewQuery({
24
+ datasetId,
25
+ tableName,
26
+ timestampColumnName: schema_1.timestampField.name,
27
+ groupByColumns: schema["fields"]
28
+ .map((field) => field.name)
29
+ .filter((name) => excludeFields.indexOf(name) === -1),
30
+ bqProjectId,
31
+ useLegacyQuery,
32
+ }),
26
33
  useLegacySql: false,
27
34
  });
28
- function buildLatestSnapshotViewQuery(datasetId, tableName, timestampColumnName, groupByColumns, bqProjectId) {
35
+ function buildLatestSnapshotViewQuery({ datasetId, tableName, timestampColumnName, groupByColumns, bqProjectId, useLegacyQuery = true, }) {
29
36
  if (datasetId === "" || tableName === "" || timestampColumnName === "") {
30
37
  throw Error(`Missing some query parameters!`);
31
38
  }
32
- for (let columnName in groupByColumns) {
39
+ for (let columnName of groupByColumns) {
33
40
  if (columnName === "") {
34
41
  throw Error(`Found empty group by column!`);
35
42
  }
36
43
  }
44
+ const legacyQuery = sqlFormatter.format(` -- Retrieves the latest document change events for all live documents.
45
+ -- timestamp: The Firestore timestamp at which the event took place.
46
+ -- operation: One of INSERT, UPDATE, DELETE, IMPORT.
47
+ -- event_id: The id of the event that triggered the cloud function mirrored the event.
48
+ -- data: A raw JSON payload of the current state of the document.
49
+ -- document_id: The document id as defined in the Firestore database
50
+ SELECT
51
+ document_name,
52
+ document_id${groupByColumns.length > 0 ? `,` : ``}
53
+ ${groupByColumns.join(",")}
54
+ FROM (
55
+ SELECT
56
+ document_name,
57
+ document_id,
58
+ ${groupByColumns
59
+ .map((columnName) => `FIRST_VALUE(${columnName})
60
+ OVER(PARTITION BY document_name ORDER BY ${timestampColumnName} DESC)
61
+ AS ${columnName}`)
62
+ .join(",")}${groupByColumns.length > 0 ? `,` : ``}
63
+ FIRST_VALUE(operation)
64
+ OVER(PARTITION BY document_name ORDER BY ${timestampColumnName} DESC) = "DELETE"
65
+ AS is_deleted
66
+ FROM \`${bqProjectId || process.env.PROJECT_ID}.${datasetId}.${tableName}\`
67
+ ORDER BY document_name, ${timestampColumnName} DESC
68
+ )
69
+ WHERE NOT is_deleted
70
+ GROUP BY document_name, document_id${groupByColumns.length > 0 ? `, ` : ``}${groupByColumns.join(",")}`);
71
+ const nonGroupFields = ["event_id", "data", "old_data"];
72
+ const joinFields = ["document_name"];
73
+ const addSelectField = (field) => {
74
+ if (joinFields.includes(field))
75
+ return `t.${field}`;
76
+ return nonGroupFields.includes(field)
77
+ ? `ANY_VALUE(${field}) as ${field}`
78
+ : `${field} as ${field}`;
79
+ };
80
+ const filterGroupField = (field) => {
81
+ return nonGroupFields.includes(field);
82
+ };
37
83
  const query = sqlFormatter.format(` -- Retrieves the latest document change events for all live documents.
38
84
  -- timestamp: The Firestore timestamp at which the event took place.
39
85
  -- operation: One of INSERT, UPDATE, DELETE, IMPORT.
40
86
  -- event_id: The id of the event that triggered the cloud function mirrored the event.
41
87
  -- data: A raw JSON payload of the current state of the document.
42
88
  -- document_id: The document id as defined in the Firestore database
89
+ WITH latest AS (
90
+ SELECT max(${timestampColumnName}) as latest_timestamp, document_name
91
+ FROM \`${bqProjectId || process.env.PROJECT_ID}.${datasetId}.${tableName}\`
92
+ GROUP BY document_name
93
+ )
43
94
  SELECT
44
- document_name,
95
+ t.document_name,
45
96
  document_id${groupByColumns.length > 0 ? `,` : ``}
46
- ${groupByColumns.join(",")}
47
- FROM (
48
- SELECT
49
- document_name,
50
- document_id,
51
- ${groupByColumns
52
- .map((columnName) => `FIRST_VALUE(${columnName})
53
- OVER(PARTITION BY document_name ORDER BY ${timestampColumnName} DESC)
54
- AS ${columnName}`)
55
- .join(",")}${groupByColumns.length > 0 ? `,` : ``}
56
- FIRST_VALUE(operation)
57
- OVER(PARTITION BY document_name ORDER BY ${timestampColumnName} DESC) = "DELETE"
58
- AS is_deleted
59
- FROM \`${bqProjectId ||
60
- process.env.PROJECT_ID}.${datasetId}.${tableName}\`
61
- ORDER BY document_name, ${timestampColumnName} DESC
62
- )
63
- WHERE NOT is_deleted
64
- GROUP BY document_name, document_id${groupByColumns.length > 0 ? `, ` : ``}${groupByColumns.join(",")}`);
65
- return query;
97
+ ${groupByColumns.map((f) => addSelectField(f)).join(",")}
98
+ FROM \`${bqProjectId || process.env.PROJECT_ID}.${datasetId}.${tableName}\` AS t
99
+ JOIN latest ON (t.document_name = latest.document_name AND (IFNULL(t.${timestampColumnName}, timestamp("1970-01-01 00:00:00+00"))) = (IFNULL(latest.latest_timestamp, timestamp("1970-01-01 00:00:00+00"))))
100
+ WHERE operation != "DELETE"
101
+ GROUP BY document_name, document_id${groupByColumns.length > 0 ? `, ` : ``}${groupByColumns.filter((c) => !filterGroupField(c)).join(",")}`);
102
+ return useLegacyQuery ? legacyQuery : query;
66
103
  }
67
104
  exports.buildLatestSnapshotViewQuery = buildLatestSnapshotViewQuery;
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "url": "github.com/firebase/extensions.git",
6
6
  "directory": "firestore-bigquery-export/firestore-bigquery-change-tracker"
7
7
  },
8
- "version": "1.1.16",
8
+ "version": "1.1.17",
9
9
  "description": "Core change-tracker library for Cloud Firestore Collection BigQuery Exports",
10
10
  "main": "./lib/index.js",
11
11
  "scripts": {
@@ -13,7 +13,8 @@
13
13
  "clean": "rimraf lib",
14
14
  "compile": "tsc",
15
15
  "test:local": "firebase ext:dev:emulators:exec ./node_modules/.bin/jest --test-params=./src/__tests__/emulator-params.env --project=extensions-testing --config=./src/__tests__/firebase.json",
16
- "prepare": "npm run build"
16
+ "prepare": "npm run build",
17
+ "generate-stresstest-table": "bq query --project_id=extensions-testing --use_legacy_sql=false < ./src/__tests__/fixtures/sql/generateSnapshotStresstestTable.sql"
17
18
  },
18
19
  "files": [
19
20
  "lib/*.js",