@firebaseextensions/firestore-bigquery-change-tracker 1.1.13 → 1.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/bigquery/clustering.js +49 -0
- package/lib/bigquery/handleFailedTransactions.js +31 -0
- package/lib/bigquery/index.js +129 -30
- package/lib/bigquery/partitioning.js +207 -0
- package/lib/bigquery/schema.js +17 -1
- package/lib/bigquery/snapshot.js +6 -5
- package/lib/bigquery/validateProject.js +18 -0
- package/lib/logs.js +54 -3
- package/package.json +4 -2
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.Clustering = void 0;
|
|
4
|
+
const logs = require("../logs");
|
|
5
|
+
class Clustering {
|
|
6
|
+
constructor(config, table, schema) {
|
|
7
|
+
this.updateCluster = async (metaData) => {
|
|
8
|
+
const clustering = { fields: this.config.clustering };
|
|
9
|
+
metaData.clustering = clustering;
|
|
10
|
+
logs.updatedClustering(this.config.clustering.join(","));
|
|
11
|
+
return Promise.resolve();
|
|
12
|
+
};
|
|
13
|
+
this.removeCluster = async (metaData) => {
|
|
14
|
+
metaData.clustering = null;
|
|
15
|
+
logs.removedClustering(this.table.id);
|
|
16
|
+
return Promise.resolve();
|
|
17
|
+
};
|
|
18
|
+
this.updateClustering = async (metaData) => {
|
|
19
|
+
/** Return if invalid config */
|
|
20
|
+
if (await this.hasInvalidFields(metaData))
|
|
21
|
+
return Promise.resolve();
|
|
22
|
+
return !!this.config.clustering && !!this.config.clustering.length
|
|
23
|
+
? this.updateCluster(metaData)
|
|
24
|
+
: this.removeCluster(metaData);
|
|
25
|
+
};
|
|
26
|
+
this.config = config;
|
|
27
|
+
this.table = table;
|
|
28
|
+
this.schema = schema;
|
|
29
|
+
}
|
|
30
|
+
hasValidTableReference() {
|
|
31
|
+
logs.invalidTableReference();
|
|
32
|
+
return !!this.table;
|
|
33
|
+
}
|
|
34
|
+
async hasInvalidFields(metaData) {
|
|
35
|
+
const { clustering = [] } = this.config;
|
|
36
|
+
if (!clustering)
|
|
37
|
+
return Promise.resolve(false);
|
|
38
|
+
const fieldNames = metaData
|
|
39
|
+
? metaData.schema.fields.map(($) => $.name)
|
|
40
|
+
: [];
|
|
41
|
+
const invalidFields = clustering.filter(($) => !fieldNames.includes($));
|
|
42
|
+
if (invalidFields.length) {
|
|
43
|
+
logs.invalidClustering(invalidFields.join(","));
|
|
44
|
+
return Promise.resolve(true);
|
|
45
|
+
}
|
|
46
|
+
return Promise.resolve(false);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
exports.Clustering = Clustering;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const firebase = require("firebase-admin");
|
|
4
|
+
if (!firebase.apps.length) {
|
|
5
|
+
firebase.initializeApp();
|
|
6
|
+
firebase.firestore().settings({ ignoreUndefinedProperties: true });
|
|
7
|
+
}
|
|
8
|
+
exports.default = async (rows, config, e) => {
|
|
9
|
+
const db = firebase.firestore();
|
|
10
|
+
const batchArray = [db.batch()];
|
|
11
|
+
let operationCounter = 0;
|
|
12
|
+
let batchIndex = 0;
|
|
13
|
+
rows.forEach((row) => {
|
|
14
|
+
var ref = db.collection(config.backupTableId).doc(row.insertId);
|
|
15
|
+
batchArray[batchIndex].set(ref, {
|
|
16
|
+
...row,
|
|
17
|
+
error_details: e.message,
|
|
18
|
+
});
|
|
19
|
+
operationCounter++;
|
|
20
|
+
// Check if max limit for batch has been met.
|
|
21
|
+
if (operationCounter === 499) {
|
|
22
|
+
batchArray.push(db.batch());
|
|
23
|
+
batchIndex++;
|
|
24
|
+
operationCounter = 0;
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
for (let batch of batchArray) {
|
|
28
|
+
await batch.commit();
|
|
29
|
+
}
|
|
30
|
+
return Promise.resolve();
|
|
31
|
+
};
|
package/lib/bigquery/index.js
CHANGED
|
@@ -19,10 +19,14 @@ exports.FirestoreBigQueryEventHistoryTracker = void 0;
|
|
|
19
19
|
const bigquery = require("@google-cloud/bigquery");
|
|
20
20
|
const firebase = require("firebase-admin");
|
|
21
21
|
const traverse = require("traverse");
|
|
22
|
+
const node_fetch_1 = require("node-fetch");
|
|
22
23
|
const schema_1 = require("./schema");
|
|
23
24
|
const snapshot_1 = require("./snapshot");
|
|
25
|
+
const handleFailedTransactions_1 = require("./handleFailedTransactions");
|
|
24
26
|
const tracker_1 = require("../tracker");
|
|
25
27
|
const logs = require("../logs");
|
|
28
|
+
const partitioning_1 = require("./partitioning");
|
|
29
|
+
const clustering_1 = require("./clustering");
|
|
26
30
|
var schema_2 = require("./schema");
|
|
27
31
|
Object.defineProperty(exports, "RawChangelogSchema", { enumerable: true, get: function () { return schema_2.RawChangelogSchema; } });
|
|
28
32
|
Object.defineProperty(exports, "RawChangelogViewSchema", { enumerable: true, get: function () { return schema_2.RawChangelogViewSchema; } });
|
|
@@ -40,13 +44,17 @@ class FirestoreBigQueryEventHistoryTracker {
|
|
|
40
44
|
this.config = config;
|
|
41
45
|
this.initialized = false;
|
|
42
46
|
this.bq = new bigquery.BigQuery();
|
|
47
|
+
this.bq.projectId = config.bqProjectId || process.env.PROJECT_ID;
|
|
43
48
|
if (!this.config.datasetLocation) {
|
|
44
49
|
this.config.datasetLocation = "us";
|
|
45
50
|
}
|
|
46
51
|
}
|
|
47
52
|
async record(events) {
|
|
48
53
|
await this.initialize();
|
|
54
|
+
const partitionHandler = new partitioning_1.Partitioning(this.config);
|
|
49
55
|
const rows = events.map((event) => {
|
|
56
|
+
const partitionValue = partitionHandler.getPartitionValue(event);
|
|
57
|
+
const { documentId, ...pathParams } = event.pathParams || {};
|
|
50
58
|
return {
|
|
51
59
|
insertId: event.eventId,
|
|
52
60
|
json: {
|
|
@@ -56,10 +64,26 @@ class FirestoreBigQueryEventHistoryTracker {
|
|
|
56
64
|
document_id: event.documentId,
|
|
57
65
|
operation: tracker_1.ChangeType[event.operation],
|
|
58
66
|
data: JSON.stringify(this.serializeData(event.data)),
|
|
67
|
+
...partitionValue,
|
|
68
|
+
...(this.config.wildcardIds &&
|
|
69
|
+
event.pathParams && { path_params: JSON.stringify(pathParams) }),
|
|
59
70
|
},
|
|
60
71
|
};
|
|
61
72
|
});
|
|
62
|
-
await this.
|
|
73
|
+
const transformedRows = await this.transformRows(rows);
|
|
74
|
+
await this.insertData(transformedRows);
|
|
75
|
+
}
|
|
76
|
+
async transformRows(rows) {
|
|
77
|
+
if (this.config.transformFunction && this.config.transformFunction !== "") {
|
|
78
|
+
const response = await node_fetch_1.default(this.config.transformFunction, {
|
|
79
|
+
method: "post",
|
|
80
|
+
body: JSON.stringify({ data: rows }),
|
|
81
|
+
headers: { "Content-Type": "application/json" },
|
|
82
|
+
});
|
|
83
|
+
const responseJson = await response.json();
|
|
84
|
+
return responseJson.data;
|
|
85
|
+
}
|
|
86
|
+
return rows;
|
|
63
87
|
}
|
|
64
88
|
serializeData(eventData) {
|
|
65
89
|
if (typeof eventData === "undefined") {
|
|
@@ -87,10 +111,8 @@ class FirestoreBigQueryEventHistoryTracker {
|
|
|
87
111
|
async isRetryableInsertionError(e) {
|
|
88
112
|
let isRetryable = true;
|
|
89
113
|
const expectedErrors = [
|
|
90
|
-
{
|
|
91
|
-
|
|
92
|
-
location: "document_id",
|
|
93
|
-
},
|
|
114
|
+
{ message: "no such field.", location: schema_1.documentIdField.name },
|
|
115
|
+
{ message: "no such field.", location: schema_1.documentPathParams.name },
|
|
94
116
|
];
|
|
95
117
|
if (e.response &&
|
|
96
118
|
e.response.insertErrors &&
|
|
@@ -111,6 +133,23 @@ class FirestoreBigQueryEventHistoryTracker {
|
|
|
111
133
|
}
|
|
112
134
|
return isRetryable;
|
|
113
135
|
}
|
|
136
|
+
/**
|
|
137
|
+
* Tables can often take time to create and propagate.
|
|
138
|
+
* A half a second delay is added per check while the function
|
|
139
|
+
* continually re-checks until the referenced dataset and table become available.
|
|
140
|
+
*/
|
|
141
|
+
async waitForInitialization(dataset, table) {
|
|
142
|
+
return new Promise((resolve) => {
|
|
143
|
+
let handle = setInterval(async () => {
|
|
144
|
+
const [datasetExists] = await dataset.exists();
|
|
145
|
+
const [tableExists] = await table.exists();
|
|
146
|
+
if (datasetExists && tableExists) {
|
|
147
|
+
clearInterval(handle);
|
|
148
|
+
return resolve(table);
|
|
149
|
+
}
|
|
150
|
+
}, 500);
|
|
151
|
+
});
|
|
152
|
+
}
|
|
114
153
|
/**
|
|
115
154
|
* Inserts rows of data into the BigQuery raw change log table.
|
|
116
155
|
*/
|
|
@@ -124,6 +163,7 @@ class FirestoreBigQueryEventHistoryTracker {
|
|
|
124
163
|
try {
|
|
125
164
|
const dataset = this.bigqueryDataset();
|
|
126
165
|
const table = dataset.table(this.rawChangeLogTableName());
|
|
166
|
+
await this.waitForInitialization(dataset, table);
|
|
127
167
|
logs.dataInserting(rows.length);
|
|
128
168
|
await table.insert(rows, options);
|
|
129
169
|
logs.dataInserted(rows.length);
|
|
@@ -134,8 +174,13 @@ class FirestoreBigQueryEventHistoryTracker {
|
|
|
134
174
|
logs.dataInsertRetried(rows.length);
|
|
135
175
|
return this.insertData(rows, { ...overrideOptions, ignoreUnknownValues: true }, retry);
|
|
136
176
|
}
|
|
177
|
+
// Exceeded number of retires, save in failed collection
|
|
178
|
+
if (!retry && this.config.backupTableId) {
|
|
179
|
+
await handleFailedTransactions_1.default(rows, this.config, e);
|
|
180
|
+
}
|
|
137
181
|
// Reinitializing in case the destintation table is modified.
|
|
138
182
|
this.initialized = false;
|
|
183
|
+
logs.bigQueryTableInsertErrors(e.errors);
|
|
139
184
|
throw e;
|
|
140
185
|
}
|
|
141
186
|
}
|
|
@@ -162,9 +207,14 @@ class FirestoreBigQueryEventHistoryTracker {
|
|
|
162
207
|
logs.bigQueryDatasetExists(this.config.datasetId);
|
|
163
208
|
}
|
|
164
209
|
else {
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
210
|
+
try {
|
|
211
|
+
logs.bigQueryDatasetCreating(this.config.datasetId);
|
|
212
|
+
await dataset.create();
|
|
213
|
+
logs.bigQueryDatasetCreated(this.config.datasetId);
|
|
214
|
+
}
|
|
215
|
+
catch (ex) {
|
|
216
|
+
logs.tableCreationError(this.config.datasetId, ex.message);
|
|
217
|
+
}
|
|
168
218
|
}
|
|
169
219
|
return dataset;
|
|
170
220
|
}
|
|
@@ -176,30 +226,47 @@ class FirestoreBigQueryEventHistoryTracker {
|
|
|
176
226
|
const dataset = this.bigqueryDataset();
|
|
177
227
|
const table = dataset.table(changelogName);
|
|
178
228
|
const [tableExists] = await table.exists();
|
|
229
|
+
const partitioning = new partitioning_1.Partitioning(this.config, table);
|
|
230
|
+
const clustering = new clustering_1.Clustering(this.config, table);
|
|
179
231
|
if (tableExists) {
|
|
180
232
|
logs.bigQueryTableAlreadyExists(table.id, dataset.id);
|
|
181
233
|
const [metadata] = await table.getMetadata();
|
|
182
|
-
const fields = metadata.schema.fields;
|
|
234
|
+
const fields = metadata.schema ? metadata.schema.fields : [];
|
|
235
|
+
await clustering.updateClustering(metadata);
|
|
183
236
|
const documentIdColExists = fields.find((column) => column.name === "document_id");
|
|
237
|
+
const pathParamsColExists = fields.find((column) => column.name === "path_params");
|
|
184
238
|
if (!documentIdColExists) {
|
|
185
239
|
fields.push(schema_1.documentIdField);
|
|
240
|
+
logs.addNewColumn(this.rawChangeLogTableName(), schema_1.documentIdField.name);
|
|
241
|
+
}
|
|
242
|
+
if (!pathParamsColExists && this.config.wildcardIds) {
|
|
243
|
+
fields.push(schema_1.documentPathParams);
|
|
244
|
+
logs.addNewColumn(this.rawChangeLogTableName(), schema_1.documentPathParams.name);
|
|
245
|
+
}
|
|
246
|
+
await partitioning.addPartitioningToSchema(metadata.schema.fields);
|
|
247
|
+
if (!documentIdColExists || !pathParamsColExists) {
|
|
186
248
|
await table.setMetadata(metadata);
|
|
187
|
-
logs.addDocumentIdColumn(this.rawChangeLogTableName());
|
|
188
249
|
}
|
|
189
250
|
}
|
|
190
251
|
else {
|
|
191
252
|
logs.bigQueryTableCreating(changelogName);
|
|
192
|
-
const
|
|
193
|
-
|
|
194
|
-
schema
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
253
|
+
const schema = { fields: [...schema_1.RawChangelogSchema.fields] };
|
|
254
|
+
if (this.config.wildcardIds) {
|
|
255
|
+
schema.fields.push(schema_1.documentPathParams);
|
|
256
|
+
}
|
|
257
|
+
const options = { friendlyName: changelogName, schema };
|
|
258
|
+
//Add partitioning
|
|
259
|
+
await partitioning.addPartitioningToSchema(schema.fields);
|
|
260
|
+
await partitioning.updateTableMetadata(options);
|
|
261
|
+
// Add clustering
|
|
262
|
+
await clustering.updateClustering(options);
|
|
263
|
+
try {
|
|
264
|
+
await table.create(options);
|
|
265
|
+
logs.bigQueryTableCreated(changelogName);
|
|
266
|
+
}
|
|
267
|
+
catch (ex) {
|
|
268
|
+
logs.tableCreationError(changelogName, ex.message);
|
|
200
269
|
}
|
|
201
|
-
await table.create(options);
|
|
202
|
-
logs.bigQueryTableCreated(changelogName);
|
|
203
270
|
}
|
|
204
271
|
return table;
|
|
205
272
|
}
|
|
@@ -211,32 +278,64 @@ class FirestoreBigQueryEventHistoryTracker {
|
|
|
211
278
|
const dataset = this.bigqueryDataset();
|
|
212
279
|
const view = dataset.table(this.rawLatestView());
|
|
213
280
|
const [viewExists] = await view.exists();
|
|
281
|
+
const schema = schema_1.RawChangelogViewSchema;
|
|
282
|
+
const partitioning = new partitioning_1.Partitioning(this.config, view);
|
|
214
283
|
if (viewExists) {
|
|
215
284
|
logs.bigQueryViewAlreadyExists(view.id, dataset.id);
|
|
216
285
|
const [metadata] = await view.getMetadata();
|
|
217
|
-
const fields = metadata.schema.fields;
|
|
286
|
+
const fields = metadata.schema ? metadata.schema.fields : [];
|
|
287
|
+
if (this.config.wildcardIds) {
|
|
288
|
+
schema.fields.push(schema_1.documentPathParams);
|
|
289
|
+
}
|
|
218
290
|
const documentIdColExists = fields.find((column) => column.name === "document_id");
|
|
291
|
+
const pathParamsColExists = fields.find((column) => column.name === "path_params");
|
|
219
292
|
if (!documentIdColExists) {
|
|
220
|
-
metadata.view = snapshot_1.latestConsistentSnapshotView(this.config.datasetId, this.rawChangeLogTableName());
|
|
221
|
-
|
|
222
|
-
|
|
293
|
+
metadata.view = snapshot_1.latestConsistentSnapshotView(this.config.datasetId, this.rawChangeLogTableName(), schema);
|
|
294
|
+
logs.addNewColumn(this.rawLatestView(), schema_1.documentIdField.name);
|
|
295
|
+
}
|
|
296
|
+
if (!pathParamsColExists && this.config.wildcardIds) {
|
|
297
|
+
metadata.view = snapshot_1.latestConsistentSnapshotView(this.config.datasetId, this.rawChangeLogTableName(), schema);
|
|
298
|
+
logs.addNewColumn(this.rawLatestView(), schema_1.documentPathParams.name);
|
|
223
299
|
}
|
|
300
|
+
//Add partitioning
|
|
301
|
+
await partitioning.addPartitioningToSchema(schema.fields);
|
|
302
|
+
//TODO: Tidy up and format / add test cases?
|
|
303
|
+
// if (
|
|
304
|
+
// !documentIdColExists ||
|
|
305
|
+
// (!pathParamsColExists && this.config.wildcardIds) ||
|
|
306
|
+
// partition.isValidPartitionForExistingTable(partitionColExists)
|
|
307
|
+
// ) {
|
|
308
|
+
await view.setMetadata(metadata);
|
|
309
|
+
// }
|
|
224
310
|
}
|
|
225
311
|
else {
|
|
226
|
-
const
|
|
312
|
+
const schema = { fields: [...schema_1.RawChangelogViewSchema.fields] };
|
|
313
|
+
//Add partitioning field
|
|
314
|
+
await partitioning.addPartitioningToSchema(schema.fields);
|
|
315
|
+
//TODO Create notification for a user that View cannot be Time Partitioned by the field.
|
|
316
|
+
// await partitioning.updateTableMetadata(options);
|
|
317
|
+
if (this.config.wildcardIds) {
|
|
318
|
+
schema.fields.push(schema_1.documentPathParams);
|
|
319
|
+
}
|
|
320
|
+
const latestSnapshot = snapshot_1.latestConsistentSnapshotView(this.config.datasetId, this.rawChangeLogTableName(), schema, this.bq.projectId);
|
|
227
321
|
logs.bigQueryViewCreating(this.rawLatestView(), latestSnapshot.query);
|
|
228
322
|
const options = {
|
|
229
323
|
friendlyName: this.rawLatestView(),
|
|
230
324
|
view: latestSnapshot,
|
|
231
325
|
};
|
|
232
|
-
if (this.config.
|
|
326
|
+
if (this.config.timePartitioning) {
|
|
233
327
|
options.timePartitioning = {
|
|
234
|
-
type: this.config.
|
|
328
|
+
type: this.config.timePartitioning,
|
|
235
329
|
};
|
|
236
330
|
}
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
331
|
+
try {
|
|
332
|
+
await view.create(options);
|
|
333
|
+
await view.setMetadata({ schema: schema_1.RawChangelogViewSchema });
|
|
334
|
+
logs.bigQueryViewCreated(this.rawLatestView());
|
|
335
|
+
}
|
|
336
|
+
catch (ex) {
|
|
337
|
+
logs.tableCreationError(this.rawLatestView(), ex.message);
|
|
338
|
+
}
|
|
240
339
|
}
|
|
241
340
|
return view;
|
|
242
341
|
}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.Partitioning = void 0;
|
|
4
|
+
const firebase = require("firebase-admin");
|
|
5
|
+
const logs = require("../logs");
|
|
6
|
+
const schema_1 = require("./schema");
|
|
7
|
+
class Partitioning {
|
|
8
|
+
constructor(config, table, schema) {
|
|
9
|
+
this.config = config;
|
|
10
|
+
this.table = table;
|
|
11
|
+
this.schema = schema;
|
|
12
|
+
}
|
|
13
|
+
isPartitioningEnabled() {
|
|
14
|
+
const { timePartitioning } = this.config;
|
|
15
|
+
return !!timePartitioning;
|
|
16
|
+
}
|
|
17
|
+
isValidPartitionTypeString(value) {
|
|
18
|
+
return typeof value === "string";
|
|
19
|
+
}
|
|
20
|
+
async metaDataSchemaFields() {
|
|
21
|
+
let metadata;
|
|
22
|
+
try {
|
|
23
|
+
[metadata] = await this.table.getMetadata();
|
|
24
|
+
}
|
|
25
|
+
catch {
|
|
26
|
+
console.log("No metadata found");
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
/** Return null if no valid schema on table **/
|
|
30
|
+
if (!metadata.schema)
|
|
31
|
+
return null;
|
|
32
|
+
return metadata.schema.fields;
|
|
33
|
+
}
|
|
34
|
+
isValidPartitionTypeDate(value) {
|
|
35
|
+
return value instanceof firebase.firestore.Timestamp;
|
|
36
|
+
}
|
|
37
|
+
hasHourAndDatePartitionConfig() {
|
|
38
|
+
if (this.config.timePartitioning === "HOUR" &&
|
|
39
|
+
this.config.timePartitioningFieldType === "DATE") {
|
|
40
|
+
logs.hourAndDatePartitioningWarning();
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
hasValidCustomPartitionConfig() {
|
|
46
|
+
/* Return false if partition type option has not been set*/
|
|
47
|
+
if (!this.isPartitioningEnabled())
|
|
48
|
+
return false;
|
|
49
|
+
const { timePartitioningField, timePartitioningFieldType, timePartitioningFirestoreField, } = this.config;
|
|
50
|
+
const hasNoCustomOptions = !timePartitioningField &&
|
|
51
|
+
!timePartitioningFieldType &&
|
|
52
|
+
!timePartitioningFirestoreField;
|
|
53
|
+
/* No custom congig has been set, use partition value option only */
|
|
54
|
+
if (hasNoCustomOptions)
|
|
55
|
+
return true;
|
|
56
|
+
/* check if all options have been provided to be */
|
|
57
|
+
return (!!timePartitioningField &&
|
|
58
|
+
!!timePartitioningFieldType &&
|
|
59
|
+
!!timePartitioningFirestoreField);
|
|
60
|
+
}
|
|
61
|
+
hasValidTimePartitionOption() {
|
|
62
|
+
const { timePartitioning } = this.config;
|
|
63
|
+
return ["HOUR", "DAY", "MONTH", "YEAR"].includes(timePartitioning);
|
|
64
|
+
}
|
|
65
|
+
hasValidTimePartitionType() {
|
|
66
|
+
const { timePartitioningFieldType } = this.config;
|
|
67
|
+
if (!timePartitioningFieldType || timePartitioningFieldType === undefined)
|
|
68
|
+
return true;
|
|
69
|
+
return ["TIMESTAMP", "DATE", "DATETIME"].includes(timePartitioningFieldType);
|
|
70
|
+
}
|
|
71
|
+
async hasExistingSchema() {
|
|
72
|
+
const [metadata] = await this.table.getMetadata();
|
|
73
|
+
return !!metadata.schema;
|
|
74
|
+
}
|
|
75
|
+
hasValidTableReference() {
|
|
76
|
+
logs.invalidTableReference();
|
|
77
|
+
return !!this.table;
|
|
78
|
+
}
|
|
79
|
+
async isTablePartitioned() {
|
|
80
|
+
if (!this.table)
|
|
81
|
+
return Promise.resolve(false);
|
|
82
|
+
// No table provided, cannot evaluate
|
|
83
|
+
if (this.table.exists()) {
|
|
84
|
+
logs.cannotPartitionExistingTable(this.table);
|
|
85
|
+
return Promise.resolve(false);
|
|
86
|
+
}
|
|
87
|
+
/*** No table exists, return */
|
|
88
|
+
const [tableExists] = await this.table.exists();
|
|
89
|
+
if (!tableExists)
|
|
90
|
+
return Promise.resolve(false);
|
|
91
|
+
/* Check if partition metadata already exists */
|
|
92
|
+
const [metadata] = await this.table.getMetadata();
|
|
93
|
+
if (!!metadata.timePartitioning)
|
|
94
|
+
return Promise.resolve(true);
|
|
95
|
+
/** Find schema fields **/
|
|
96
|
+
const schemaFields = await this.metaDataSchemaFields();
|
|
97
|
+
/** No Schema exists, return */
|
|
98
|
+
if (!schemaFields)
|
|
99
|
+
return Promise.resolve(false);
|
|
100
|
+
/* Return false if time partition field not found */
|
|
101
|
+
return schemaFields.some((column) => column.name === this.config.timePartitioningField);
|
|
102
|
+
}
|
|
103
|
+
async isValidPartitionForExistingTable() {
|
|
104
|
+
if (this.isTablePartitioned())
|
|
105
|
+
return false;
|
|
106
|
+
return this.hasValidCustomPartitionConfig();
|
|
107
|
+
}
|
|
108
|
+
isValidPartitionForNewTable() {
|
|
109
|
+
if (!this.isPartitioningEnabled())
|
|
110
|
+
return false;
|
|
111
|
+
return this.hasValidCustomPartitionConfig();
|
|
112
|
+
}
|
|
113
|
+
/*
|
|
114
|
+
Extracts a valid Partition field from the Document Change Event.
|
|
115
|
+
Matches result based on a pre-defined Firestore field matching the event data object.
|
|
116
|
+
Return an empty object if no field name or value provided.
|
|
117
|
+
Returns empty object if not a string or timestamp
|
|
118
|
+
Logs warning if not a valid datatype
|
|
119
|
+
Delete changes events have no data, return early as cannot partition on empty data.
|
|
120
|
+
**/
|
|
121
|
+
getPartitionValue(event) {
|
|
122
|
+
if (!event.data)
|
|
123
|
+
return {};
|
|
124
|
+
const firestoreFieldName = this.config.timePartitioningFirestoreField;
|
|
125
|
+
const fieldName = this.config.timePartitioningField;
|
|
126
|
+
const fieldValue = event.data[firestoreFieldName];
|
|
127
|
+
if (!fieldName || !fieldValue) {
|
|
128
|
+
return {};
|
|
129
|
+
}
|
|
130
|
+
if (this.isValidPartitionTypeString(fieldValue)) {
|
|
131
|
+
return { [fieldName]: fieldValue };
|
|
132
|
+
}
|
|
133
|
+
if (this.isValidPartitionTypeDate(fieldValue))
|
|
134
|
+
return { [fieldName]: fieldValue.toDate() };
|
|
135
|
+
logs.firestoreTimePartitionFieldError(event.documentName, fieldName, firestoreFieldName, fieldValue);
|
|
136
|
+
return {};
|
|
137
|
+
}
|
|
138
|
+
customFieldExists(fields = []) {
|
|
139
|
+
if (!fields.length)
|
|
140
|
+
return false;
|
|
141
|
+
const { timePartitioningField } = this.config;
|
|
142
|
+
return fields.map(($) => $.name).includes(timePartitioningField);
|
|
143
|
+
}
|
|
144
|
+
async addPartitioningToSchema(fields = []) {
|
|
145
|
+
/** check if class has valid table reference */
|
|
146
|
+
if (!this.hasValidTableReference())
|
|
147
|
+
return Promise.resolve();
|
|
148
|
+
/** return if table is already partitioned **/
|
|
149
|
+
if (await this.isTablePartitioned())
|
|
150
|
+
return Promise.resolve();
|
|
151
|
+
/** return if an invalid partition type has been requested**/
|
|
152
|
+
if (!this.hasValidTimePartitionType())
|
|
153
|
+
return Promise.resolve();
|
|
154
|
+
/** Return if invalid partitioning and field type combination */
|
|
155
|
+
if (this.hasHourAndDatePartitionConfig())
|
|
156
|
+
return Promise.resolve();
|
|
157
|
+
/** return if an invalid partition type has been requested**/
|
|
158
|
+
if (!this.hasValidCustomPartitionConfig())
|
|
159
|
+
return Promise.resolve();
|
|
160
|
+
/** return if an invalid partition type has been requested**/
|
|
161
|
+
if (!this.hasValidCustomPartitionConfig())
|
|
162
|
+
return Promise.resolve();
|
|
163
|
+
/** update fields with new schema option ** */
|
|
164
|
+
if (!this.hasValidTimePartitionOption())
|
|
165
|
+
return Promise.resolve();
|
|
166
|
+
/* Check if partition field has been provided */
|
|
167
|
+
if (!this.config.timePartitioningField)
|
|
168
|
+
return Promise.resolve();
|
|
169
|
+
// if (await !this.hasExistingSchema) return Promise.resolve();
|
|
170
|
+
// Field already exists on schema, skip
|
|
171
|
+
if (this.customFieldExists(fields))
|
|
172
|
+
return Promise.resolve();
|
|
173
|
+
fields.push(schema_1.getNewPartitionField(this.config));
|
|
174
|
+
/** log successful addition of partition column */
|
|
175
|
+
logs.addPartitionFieldColumn(this.table.id, this.config.timePartitioningField);
|
|
176
|
+
return Promise.resolve();
|
|
177
|
+
}
|
|
178
|
+
async updateTableMetadata(options) {
|
|
179
|
+
/** return if table is already partitioned **/
|
|
180
|
+
if (await this.isTablePartitioned())
|
|
181
|
+
return Promise.resolve();
|
|
182
|
+
/** return if an invalid partition type has been requested**/
|
|
183
|
+
if (!this.hasValidTimePartitionType())
|
|
184
|
+
return Promise.resolve();
|
|
185
|
+
/** update fields with new schema option ** */
|
|
186
|
+
if (!this.hasValidTimePartitionOption())
|
|
187
|
+
return Promise.resolve();
|
|
188
|
+
/** Return if invalid partitioning and field type combination */
|
|
189
|
+
if (this.hasHourAndDatePartitionConfig())
|
|
190
|
+
return Promise.resolve();
|
|
191
|
+
/** return if an invalid partition type has been requested**/
|
|
192
|
+
if (!this.hasValidCustomPartitionConfig())
|
|
193
|
+
return Promise.resolve();
|
|
194
|
+
// if (await !this.hasExistingSchema) return Promise.resolve();
|
|
195
|
+
if (this.config.timePartitioning) {
|
|
196
|
+
options.timePartitioning = { type: this.config.timePartitioning };
|
|
197
|
+
}
|
|
198
|
+
//TODO: Add check for skipping adding views partition field, this is not a feature that can be added .
|
|
199
|
+
if (this.config.timePartitioningField) {
|
|
200
|
+
options.timePartitioning = {
|
|
201
|
+
...options.timePartitioning,
|
|
202
|
+
field: this.config.timePartitioningField,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
exports.Partitioning = Partitioning;
|
package/lib/bigquery/schema.js
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
*/
|
|
17
17
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
18
|
-
exports.RawChangelogSchema = exports.RawChangelogViewSchema = exports.documentIdField = exports.longitudeField = exports.latitudeField = exports.timestampField = exports.operationField = exports.eventIdField = exports.documentNameField = exports.dataField = void 0;
|
|
18
|
+
exports.getNewPartitionField = exports.RawChangelogSchema = exports.RawChangelogViewSchema = exports.documentPathParams = exports.documentIdField = exports.longitudeField = exports.latitudeField = exports.timestampField = exports.operationField = exports.eventIdField = exports.documentNameField = exports.dataField = void 0;
|
|
19
19
|
const bigQueryField = (name, type, mode, fields) => ({
|
|
20
20
|
fields,
|
|
21
21
|
mode: mode || "NULLABLE",
|
|
@@ -37,6 +37,12 @@ exports.documentIdField = {
|
|
|
37
37
|
type: "STRING",
|
|
38
38
|
description: "The document id as defined in the firestore database.",
|
|
39
39
|
};
|
|
40
|
+
exports.documentPathParams = {
|
|
41
|
+
name: "path_params",
|
|
42
|
+
mode: "NULLABLE",
|
|
43
|
+
type: "STRING",
|
|
44
|
+
description: "JSON string representing wildcard params with Firestore Document ids",
|
|
45
|
+
};
|
|
40
46
|
/*
|
|
41
47
|
* We cannot specify a schema for view creation, and all view columns default
|
|
42
48
|
* to the NULLABLE mode.
|
|
@@ -111,3 +117,13 @@ exports.RawChangelogSchema = {
|
|
|
111
117
|
exports.documentIdField,
|
|
112
118
|
],
|
|
113
119
|
};
|
|
120
|
+
// Helper function for Partitioned Changelogs field
|
|
121
|
+
exports.getNewPartitionField = (config) => {
|
|
122
|
+
const { timePartitioningField, timePartitioningFieldType } = config;
|
|
123
|
+
return {
|
|
124
|
+
name: timePartitioningField,
|
|
125
|
+
mode: "NULLABLE",
|
|
126
|
+
type: timePartitioningFieldType,
|
|
127
|
+
description: "The document TimePartition partition field selected by user",
|
|
128
|
+
};
|
|
129
|
+
};
|
package/lib/bigquery/snapshot.js
CHANGED
|
@@ -19,13 +19,13 @@ exports.buildLatestSnapshotViewQuery = exports.latestConsistentSnapshotView = vo
|
|
|
19
19
|
const sqlFormatter = require("sql-formatter");
|
|
20
20
|
const schema_1 = require("./schema");
|
|
21
21
|
const excludeFields = ["document_name", "document_id"];
|
|
22
|
-
exports.latestConsistentSnapshotView = (datasetId, tableName) => ({
|
|
23
|
-
query: buildLatestSnapshotViewQuery(datasetId, tableName, schema_1.timestampField.name,
|
|
22
|
+
exports.latestConsistentSnapshotView = (datasetId, tableName, schema, bqProjectId) => ({
|
|
23
|
+
query: buildLatestSnapshotViewQuery(datasetId, tableName, schema_1.timestampField.name, schema["fields"]
|
|
24
24
|
.map((field) => field.name)
|
|
25
|
-
.filter((name) => excludeFields.indexOf(name) === -1)),
|
|
25
|
+
.filter((name) => excludeFields.indexOf(name) === -1), bqProjectId),
|
|
26
26
|
useLegacySql: false,
|
|
27
27
|
});
|
|
28
|
-
function buildLatestSnapshotViewQuery(datasetId, tableName, timestampColumnName, groupByColumns) {
|
|
28
|
+
function buildLatestSnapshotViewQuery(datasetId, tableName, timestampColumnName, groupByColumns, bqProjectId) {
|
|
29
29
|
if (datasetId === "" || tableName === "" || timestampColumnName === "") {
|
|
30
30
|
throw Error(`Missing some query parameters!`);
|
|
31
31
|
}
|
|
@@ -56,7 +56,8 @@ function buildLatestSnapshotViewQuery(datasetId, tableName, timestampColumnName,
|
|
|
56
56
|
FIRST_VALUE(operation)
|
|
57
57
|
OVER(PARTITION BY document_name ORDER BY ${timestampColumnName} DESC) = "DELETE"
|
|
58
58
|
AS is_deleted
|
|
59
|
-
FROM \`${
|
|
59
|
+
FROM \`${bqProjectId ||
|
|
60
|
+
process.env.PROJECT_ID}.${datasetId}.${tableName}\`
|
|
60
61
|
ORDER BY document_name, ${timestampColumnName} DESC
|
|
61
62
|
)
|
|
62
63
|
WHERE NOT is_deleted
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.validateProject = void 0;
|
|
4
|
+
const { ProjectsClient } = require("@google-cloud/resource-manager");
|
|
5
|
+
/* TODO: searchProjectsAsync sometimes returns {}.
|
|
6
|
+
* Could be resource intensive, if checked on every records insert.
|
|
7
|
+
*/
|
|
8
|
+
exports.validateProject = async (id) => {
|
|
9
|
+
let isValid = false;
|
|
10
|
+
const client = new ProjectsClient();
|
|
11
|
+
const projects = client.searchProjectsAsync();
|
|
12
|
+
for await (const project of projects) {
|
|
13
|
+
if (project.projectId === id) {
|
|
14
|
+
isValid = true;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
return isValid;
|
|
18
|
+
};
|
package/lib/logs.js
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
*/
|
|
17
17
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
18
|
-
exports.
|
|
18
|
+
exports.tableCreationError = exports.invalidClustering = exports.hourAndDatePartitioningWarning = exports.invalidTableReference = exports.invalidProjectIdWarning = exports.cannotPartitionExistingTable = exports.removedClustering = exports.updatedClustering = exports.bigQueryTableInsertErrors = exports.firestoreTimePartitioningParametersWarning = exports.firestoreTimePartitionFieldError = exports.addPartitionFieldColumn = exports.addNewColumn = exports.timestampMissingValue = exports.error = exports.dataTypeInvalid = exports.dataInserting = exports.dataInsertRetried = exports.dataInserted = exports.complete = exports.bigQueryViewValidating = exports.bigQueryViewValidated = exports.bigQueryViewUpToDate = exports.bigQueryViewUpdating = exports.bigQueryViewUpdated = exports.bigQueryViewAlreadyExists = exports.bigQueryViewCreating = exports.bigQueryViewCreated = exports.bigQueryUserDefinedFunctionCreated = exports.bigQueryUserDefinedFunctionCreating = exports.bigQueryTableValidating = exports.bigQueryTableValidated = exports.bigQueryTableUpToDate = exports.bigQueryTableUpdating = exports.bigQueryTableUpdated = exports.bigQueryTableCreating = exports.bigQueryTableCreated = exports.bigQueryTableAlreadyExists = exports.bigQuerySchemaViewCreated = exports.bigQueryLatestSnapshotViewQueryCreated = exports.bigQueryErrorRecordingDocumentChange = exports.bigQueryDatasetExists = exports.bigQueryDatasetCreating = exports.bigQueryDatasetCreated = exports.arrayFieldInvalid = void 0;
|
|
19
19
|
const firebase_functions_1 = require("firebase-functions");
|
|
20
20
|
exports.arrayFieldInvalid = (fieldName) => {
|
|
21
21
|
firebase_functions_1.logger.warn(`Array field '${fieldName}' does not contain an array, skipping`);
|
|
@@ -114,6 +114,57 @@ exports.error = (err) => {
|
|
|
114
114
|
exports.timestampMissingValue = (fieldName) => {
|
|
115
115
|
firebase_functions_1.logger.warn(`Missing value for timestamp field: ${fieldName}, using default timestamp instead.`);
|
|
116
116
|
};
|
|
117
|
-
exports.
|
|
118
|
-
firebase_functions_1.logger.log(`Updated '${table}' table with a '
|
|
117
|
+
exports.addNewColumn = (table, field) => {
|
|
118
|
+
firebase_functions_1.logger.log(`Updated '${table}' table with a '${field}' column`);
|
|
119
|
+
};
|
|
120
|
+
exports.addPartitionFieldColumn = (table, field) => {
|
|
121
|
+
firebase_functions_1.logger.log(`Updated '${table}' table with a partition field '${field}' column`);
|
|
122
|
+
};
|
|
123
|
+
exports.firestoreTimePartitionFieldError = (documentName, fieldName, firestoreFieldName, firestoreFieldData) => {
|
|
124
|
+
firebase_functions_1.logger.warn(`Wrong type of Firestore Field for TimePartitioning. Accepts only strings in BigQuery format (DATE, DATETIME, TIMESTAMP) and Firestore Timestamp. Firestore Document field path: ${documentName}. Field name: ${firestoreFieldName}. Field data: ${firestoreFieldData}. Schema field "${fieldName}" value will be null.`);
|
|
125
|
+
};
|
|
126
|
+
exports.firestoreTimePartitioningParametersWarning = (fieldName, fieldType, firestoreFieldName, dataFirestoreField) => {
|
|
127
|
+
firebase_functions_1.logger.warn("All TimePartitioning option parameters need to be available to create new custom schema field");
|
|
128
|
+
!fieldName && firebase_functions_1.logger.warn(`Parameter missing: TIME_PARTITIONING_FIELD`);
|
|
129
|
+
!fieldType && firebase_functions_1.logger.warn(`Parameter missing: TIME_PARTITIONING_FIELD_TYPE`);
|
|
130
|
+
!firestoreFieldName &&
|
|
131
|
+
firebase_functions_1.logger.warn(`Parameter missing: TIME_PARTITIONING_FIRESTORE_FIELD`);
|
|
132
|
+
!dataFirestoreField &&
|
|
133
|
+
firebase_functions_1.logger.warn(`No data found in Firestore Document under selected field: "${firestoreFieldName}"`);
|
|
134
|
+
};
|
|
135
|
+
exports.bigQueryTableInsertErrors = (insertErrors) => {
|
|
136
|
+
firebase_functions_1.logger.warn(`Error when inserting data to table.`);
|
|
137
|
+
insertErrors.forEach((error) => {
|
|
138
|
+
firebase_functions_1.logger.warn("ROW DATA JSON:");
|
|
139
|
+
firebase_functions_1.logger.warn(error.row);
|
|
140
|
+
error.errors.forEach((error) => firebase_functions_1.logger.warn(`ROW ERROR MESSAGE: ${error.message}`));
|
|
141
|
+
});
|
|
142
|
+
};
|
|
143
|
+
exports.updatedClustering = (fields) => {
|
|
144
|
+
firebase_functions_1.logger.info(`Clustering updated with new settings fields: ${fields}`);
|
|
145
|
+
};
|
|
146
|
+
exports.removedClustering = (tableName) => {
|
|
147
|
+
firebase_functions_1.logger.info(`Clustering removed on ${tableName}`);
|
|
148
|
+
};
|
|
149
|
+
exports.cannotPartitionExistingTable = (table) => {
|
|
150
|
+
firebase_functions_1.logger.warn(`Cannot partition an existing table ${table.dataset.id}_${table.id}`);
|
|
151
|
+
};
|
|
152
|
+
function invalidProjectIdWarning(bqProjectId) {
|
|
153
|
+
firebase_functions_1.logger.warn(`Invalid project Id ${bqProjectId}, data cannot be synchronized`);
|
|
154
|
+
}
|
|
155
|
+
exports.invalidProjectIdWarning = invalidProjectIdWarning;
|
|
156
|
+
function invalidTableReference() {
|
|
157
|
+
firebase_functions_1.logger.warn(`No valid table reference is available. Skipping partitioning`);
|
|
158
|
+
}
|
|
159
|
+
exports.invalidTableReference = invalidTableReference;
|
|
160
|
+
function hourAndDatePartitioningWarning() {
|
|
161
|
+
firebase_functions_1.logger.warn(`Cannot partition table with hour partitioning and Date. For DATE columns, the partitions can have daily, monthly, or yearly granularity. Skipping partitioning`);
|
|
162
|
+
}
|
|
163
|
+
exports.hourAndDatePartitioningWarning = hourAndDatePartitioningWarning;
|
|
164
|
+
function invalidClustering(fields) {
|
|
165
|
+
firebase_functions_1.logger.warn(`Unable to add clustering, field(s) ${fields} do not exist on the expected table`);
|
|
166
|
+
}
|
|
167
|
+
exports.invalidClustering = invalidClustering;
|
|
168
|
+
exports.tableCreationError = (table, message) => {
|
|
169
|
+
firebase_functions_1.logger.warn(`Error caught creating table`, message);
|
|
119
170
|
};
|
package/package.json
CHANGED
|
@@ -5,14 +5,14 @@
|
|
|
5
5
|
"url": "github.com/firebase/extensions.git",
|
|
6
6
|
"directory": "firestore-bigquery-export/firestore-bigquery-change-tracker"
|
|
7
7
|
},
|
|
8
|
-
"version": "1.1.
|
|
8
|
+
"version": "1.1.14",
|
|
9
9
|
"description": "Core change-tracker library for Cloud Firestore Collection BigQuery Exports",
|
|
10
10
|
"main": "./lib/index.js",
|
|
11
11
|
"scripts": {
|
|
12
12
|
"build": "npm run clean && npm run compile",
|
|
13
13
|
"clean": "rimraf lib",
|
|
14
14
|
"compile": "tsc",
|
|
15
|
-
"test": "jest",
|
|
15
|
+
"test:local": "firebase ext:dev:emulators:exec ./node_modules/.bin/jest --test-params=./src/__tests__/emulator-params.env --project=extensions-testing --config=./src/__tests__/firebase.json",
|
|
16
16
|
"prepare": "npm run build"
|
|
17
17
|
},
|
|
18
18
|
"files": [
|
|
@@ -23,11 +23,13 @@
|
|
|
23
23
|
"license": "Apache-2.0",
|
|
24
24
|
"dependencies": {
|
|
25
25
|
"@google-cloud/bigquery": "^4.7.0",
|
|
26
|
+
"@google-cloud/resource-manager": "^3.0.0",
|
|
26
27
|
"firebase-admin": "^8.0.0",
|
|
27
28
|
"firebase-functions": "^3.13.2",
|
|
28
29
|
"generate-schema": "^2.6.0",
|
|
29
30
|
"inquirer": "^6.4.0",
|
|
30
31
|
"lodash": "^4.17.14",
|
|
32
|
+
"node-fetch": "^2.6.1",
|
|
31
33
|
"sql-formatter": "^2.3.3",
|
|
32
34
|
"traverse": "^0.6.6"
|
|
33
35
|
},
|