@cumulus/db 21.3.1 → 21.3.2-testlerna.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -1
- package/dist/index.js +3 -3
- package/dist/lib/granule.d.ts +4 -33
- package/dist/lib/granule.js +10 -61
- package/dist/models/granule.js +2 -2
- package/dist/s3search/AsyncOperationS3Search.d.ts +20 -0
- package/dist/s3search/AsyncOperationS3Search.js +29 -0
- package/dist/s3search/CollectionS3Search.d.ts +39 -0
- package/dist/s3search/CollectionS3Search.js +113 -0
- package/dist/s3search/DuckDBSearchExecutor.d.ts +36 -0
- package/dist/s3search/DuckDBSearchExecutor.js +57 -0
- package/dist/s3search/ExecutionS3Search.d.ts +20 -0
- package/dist/s3search/ExecutionS3Search.js +29 -0
- package/dist/s3search/GranuleS3Search.d.ts +31 -0
- package/dist/s3search/GranuleS3Search.js +100 -0
- package/dist/s3search/PdrS3Search.d.ts +20 -0
- package/dist/s3search/PdrS3Search.js +29 -0
- package/dist/s3search/ProviderS3Search.d.ts +20 -0
- package/dist/s3search/ProviderS3Search.js +29 -0
- package/dist/s3search/ReconciliationReportS3Search.d.ts +20 -0
- package/dist/s3search/ReconciliationReportS3Search.js +29 -0
- package/dist/s3search/RuleS3Search.d.ts +20 -0
- package/dist/s3search/RuleS3Search.js +29 -0
- package/dist/s3search/StatsS3Search.d.ts +25 -0
- package/dist/s3search/StatsS3Search.js +51 -0
- package/dist/s3search/duckdbHelpers.d.ts +43 -0
- package/dist/s3search/duckdbHelpers.js +83 -0
- package/dist/s3search/s3TableSchemas.d.ts +11 -0
- package/dist/s3search/s3TableSchemas.js +272 -0
- package/dist/search/BaseSearch.d.ts +46 -2
- package/dist/search/BaseSearch.js +84 -22
- package/dist/search/CollectionSearch.d.ts +6 -4
- package/dist/search/CollectionSearch.js +2 -3
- package/dist/search/ExecutionSearch.d.ts +1 -1
- package/dist/search/ExecutionSearch.js +3 -3
- package/dist/search/GranuleSearch.d.ts +2 -3
- package/dist/search/GranuleSearch.js +3 -3
- package/dist/search/PdrSearch.js +1 -1
- package/dist/search/ReconciliationReportSearch.js +1 -1
- package/dist/search/RuleSearch.js +4 -4
- package/dist/search/StatsSearch.d.ts +15 -4
- package/dist/search/StatsSearch.js +12 -6
- package/dist/search/field-mapping.d.ts +1 -3
- package/dist/search/field-mapping.js +40 -19
- package/dist/test-duckdb-utils.d.ts +31 -0
- package/dist/test-duckdb-utils.js +125 -0
- package/dist/test-utils.js +6 -0
- package/dist/translate/async_operations.js +7 -3
- package/dist/translate/collections.js +6 -6
- package/dist/translate/executions.js +7 -7
- package/dist/translate/granules.js +16 -11
- package/dist/translate/pdr.js +4 -4
- package/dist/translate/providers.js +2 -2
- package/dist/translate/reconciliation_reports.js +5 -4
- package/dist/translate/rules.d.ts +1 -1
- package/dist/translate/rules.js +6 -6
- package/dist/types/file.d.ts +2 -0
- package/dist/types/granule.d.ts +1 -1
- package/package.json +12 -11
|
@@ -17,9 +17,9 @@ const log = new logger_1.default({ sender: '@cumulus/db/GranuleSearch' });
|
|
|
17
17
|
* Class to build and execute db search query for granules
|
|
18
18
|
*/
|
|
19
19
|
class GranuleSearch extends BaseSearch_1.BaseSearch {
|
|
20
|
-
constructor(event) {
|
|
20
|
+
constructor(event, enableEstimate = true) {
|
|
21
21
|
// estimate the table rowcount by default
|
|
22
|
-
if (event?.queryStringParameters?.estimateTableRowCount !== 'false') {
|
|
22
|
+
if (enableEstimate && event?.queryStringParameters?.estimateTableRowCount !== 'false') {
|
|
23
23
|
(0, set_1.default)(event, 'queryStringParameters.estimateTableRowCount', 'true');
|
|
24
24
|
}
|
|
25
25
|
super(event, 'granule');
|
|
@@ -33,7 +33,7 @@ class GranuleSearch extends BaseSearch_1.BaseSearch {
|
|
|
33
33
|
buildBasicQuery(knex) {
|
|
34
34
|
const { collections: collectionsTable, providers: providersTable, pdrs: pdrsTable, } = tables_1.TableNames;
|
|
35
35
|
const countQuery = knex(this.tableName)
|
|
36
|
-
.count('*');
|
|
36
|
+
.count('* as count');
|
|
37
37
|
const searchQuery = knex(this.tableName)
|
|
38
38
|
.select(`${this.tableName}.*`)
|
|
39
39
|
.select({
|
package/dist/search/PdrSearch.js
CHANGED
|
@@ -26,7 +26,7 @@ class PdrSearch extends BaseSearch_1.BaseSearch {
|
|
|
26
26
|
buildBasicQuery(knex) {
|
|
27
27
|
const { collections: collectionsTable, providers: providersTable, executions: executionsTable, } = tables_1.TableNames;
|
|
28
28
|
const countQuery = knex(this.tableName)
|
|
29
|
-
.count('*');
|
|
29
|
+
.count('* as count');
|
|
30
30
|
const searchQuery = knex(this.tableName)
|
|
31
31
|
.select(`${this.tableName}.*`)
|
|
32
32
|
.select({
|
|
@@ -26,7 +26,7 @@ class ReconciliationReportSearch extends BaseSearch_1.BaseSearch {
|
|
|
26
26
|
buildBasicQuery(knex) {
|
|
27
27
|
const { reconciliationReports: reconciliationReportsTable, } = tables_1.TableNames;
|
|
28
28
|
const countQuery = knex(this.tableName)
|
|
29
|
-
.count('*');
|
|
29
|
+
.count('* as count');
|
|
30
30
|
const searchQuery = knex(this.tableName)
|
|
31
31
|
.select(`${this.tableName}.*`)
|
|
32
32
|
.select({
|
|
@@ -26,7 +26,7 @@ class RuleSearch extends BaseSearch_1.BaseSearch {
|
|
|
26
26
|
buildBasicQuery(knex) {
|
|
27
27
|
const { collections: collectionsTable, providers: providersTable, } = tables_1.TableNames;
|
|
28
28
|
const countQuery = knex(this.tableName)
|
|
29
|
-
.count(
|
|
29
|
+
.count('* as count');
|
|
30
30
|
const searchQuery = knex(this.tableName)
|
|
31
31
|
.select(`${this.tableName}.*`)
|
|
32
32
|
.select({
|
|
@@ -77,18 +77,18 @@ class RuleSearch extends BaseSearch_1.BaseSearch {
|
|
|
77
77
|
*/
|
|
78
78
|
async translatePostgresRecordsToApiRecords(pgRecords) {
|
|
79
79
|
log.debug(`translatePostgresRecordsToApiRecords number of records ${pgRecords.length} `);
|
|
80
|
-
const apiRecords = pgRecords.map(
|
|
80
|
+
const apiRecords = pgRecords.map((record) => {
|
|
81
81
|
const providerPgRecord = record.providerName ? { name: record.providerName } : undefined;
|
|
82
82
|
const collectionPgRecord = record.collectionName ? {
|
|
83
83
|
name: record.collectionName,
|
|
84
84
|
version: record.collectionVersion,
|
|
85
85
|
} : undefined;
|
|
86
|
-
const apiRecord =
|
|
86
|
+
const apiRecord = (0, rules_1.translatePostgresRuleToApiRuleWithoutDbQuery)(record, collectionPgRecord, providerPgRecord);
|
|
87
87
|
return this.dbQueryParameters.fields
|
|
88
88
|
? (0, pick_1.default)(apiRecord, this.dbQueryParameters.fields)
|
|
89
89
|
: apiRecord;
|
|
90
90
|
});
|
|
91
|
-
return
|
|
91
|
+
return apiRecords;
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
exports.RuleSearch = RuleSearch;
|
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
import { Knex } from 'knex';
|
|
2
2
|
import { DbQueryParameters, QueryEvent } from '../types/search';
|
|
3
3
|
import { BaseSearch } from './BaseSearch';
|
|
4
|
+
export declare type TotalSummary = {
|
|
5
|
+
count_errors: number;
|
|
6
|
+
count_collections: number;
|
|
7
|
+
count_granules: number;
|
|
8
|
+
avg_processing_time: number;
|
|
9
|
+
};
|
|
10
|
+
declare type Aggregate = {
|
|
11
|
+
count: string;
|
|
12
|
+
aggregatedfield: string;
|
|
13
|
+
};
|
|
4
14
|
declare type Summary = {
|
|
5
15
|
dateFrom: string;
|
|
6
16
|
dateTo: string;
|
|
@@ -8,7 +18,7 @@ declare type Summary = {
|
|
|
8
18
|
aggregation: string;
|
|
9
19
|
unit: string;
|
|
10
20
|
};
|
|
11
|
-
declare type SummaryResult = {
|
|
21
|
+
export declare type SummaryResult = {
|
|
12
22
|
errors: Summary;
|
|
13
23
|
granules: Summary;
|
|
14
24
|
collections: Summary;
|
|
@@ -23,7 +33,7 @@ declare type AggregateRes = {
|
|
|
23
33
|
key: string;
|
|
24
34
|
count: number;
|
|
25
35
|
};
|
|
26
|
-
declare type ApiAggregateResult = {
|
|
36
|
+
export declare type ApiAggregateResult = {
|
|
27
37
|
meta: Meta;
|
|
28
38
|
count: AggregateRes[];
|
|
29
39
|
};
|
|
@@ -39,14 +49,15 @@ declare class StatsSearch extends BaseSearch {
|
|
|
39
49
|
* @param result - the postgres query results
|
|
40
50
|
* @returns the api object with the aggregate statistics
|
|
41
51
|
*/
|
|
42
|
-
|
|
52
|
+
protected formatAggregateResult(result: Record<string, Aggregate>): ApiAggregateResult;
|
|
43
53
|
/**
|
|
44
54
|
* Formats the postgres results into an API stats/summary response
|
|
45
55
|
*
|
|
46
56
|
* @param result - the knex summary query results
|
|
47
57
|
* @returns the api object with the summary statistics
|
|
48
58
|
*/
|
|
49
|
-
|
|
59
|
+
protected formatSummaryResult(result: TotalSummary): SummaryResult;
|
|
60
|
+
protected buildSummaryQuery(knex: Knex): Knex.QueryBuilder;
|
|
50
61
|
/**
|
|
51
62
|
* Queries postgres for a summary of statistics around the granules in the system
|
|
52
63
|
*
|
|
@@ -94,6 +94,12 @@ class StatsSearch extends BaseSearch_1.BaseSearch {
|
|
|
94
94
|
},
|
|
95
95
|
};
|
|
96
96
|
}
|
|
97
|
+
buildSummaryQuery(knex) {
|
|
98
|
+
const aggregateQuery = knex(this.tableName);
|
|
99
|
+
this.buildRangeQuery({ searchQuery: aggregateQuery });
|
|
100
|
+
aggregateQuery.select(knex.raw(`COUNT(CASE WHEN ${this.tableName}.error ->> 'Error' is not null THEN 1 END) AS count_errors`), knex.raw('COUNT(*) AS count_granules'), knex.raw(`AVG(${this.tableName}.duration) AS avg_processing_time`), knex.raw(`COUNT(DISTINCT ${this.tableName}.collection_cumulus_id) AS count_collections`));
|
|
101
|
+
return aggregateQuery;
|
|
102
|
+
}
|
|
97
103
|
/**
|
|
98
104
|
* Queries postgres for a summary of statistics around the granules in the system
|
|
99
105
|
*
|
|
@@ -102,9 +108,7 @@ class StatsSearch extends BaseSearch_1.BaseSearch {
|
|
|
102
108
|
*/
|
|
103
109
|
async summary(testKnex) {
|
|
104
110
|
const knex = testKnex ?? await (0, connection_1.getKnexClient)();
|
|
105
|
-
const aggregateQuery =
|
|
106
|
-
this.buildRangeQuery({ searchQuery: aggregateQuery });
|
|
107
|
-
aggregateQuery.select(knex.raw(`COUNT(CASE WHEN ${this.tableName}.error ->> 'Error' is not null THEN 1 END) AS count_errors`), knex.raw('COUNT(*) AS count_granules'), knex.raw(`AVG(${this.tableName}.duration) AS avg_processing_time`), knex.raw(`COUNT(DISTINCT ${this.tableName}.collection_cumulus_id) AS count_collections`));
|
|
111
|
+
const aggregateQuery = this.buildSummaryQuery(knex);
|
|
108
112
|
log.debug(`summary about to execute query: ${aggregateQuery?.toSQL().sql}`);
|
|
109
113
|
const aggregateQueryRes = await aggregateQuery;
|
|
110
114
|
return this.formatSummaryResult(aggregateQueryRes[0]);
|
|
@@ -133,11 +137,13 @@ class StatsSearch extends BaseSearch_1.BaseSearch {
|
|
|
133
137
|
* @param knex - the knex client to be used
|
|
134
138
|
*/
|
|
135
139
|
aggregateQueryField(query, knex) {
|
|
136
|
-
|
|
137
|
-
|
|
140
|
+
const normalizedKey = this.field === 'error.Error.keyword' ? 'error.Error' : this.field;
|
|
141
|
+
if (normalizedKey?.includes('.')) {
|
|
142
|
+
const [root, ...nested] = normalizedKey.split('.');
|
|
143
|
+
query.select(knex.raw(`${root} ->> '${nested.join('.')}' as aggregatedfield`));
|
|
138
144
|
}
|
|
139
145
|
else {
|
|
140
|
-
query.select(`${this.tableName}.${
|
|
146
|
+
query.select(`${this.tableName}.${normalizedKey} as aggregatedfield`);
|
|
141
147
|
}
|
|
142
148
|
query.modify((queryBuilder) => this.joinTables(queryBuilder))
|
|
143
149
|
.count('* as count')
|
|
@@ -10,7 +10,7 @@ const log = new logger_1.default({ sender: '@cumulus/db/field-mapping' });
|
|
|
10
10
|
// functions to map the api search string field name and value to postgres db field
|
|
11
11
|
const granuleMapping = {
|
|
12
12
|
archived: (value) => ({
|
|
13
|
-
archived: value,
|
|
13
|
+
archived: value === 'true',
|
|
14
14
|
}),
|
|
15
15
|
beginningDateTime: (value) => ({
|
|
16
16
|
beginning_date_time: value,
|
|
@@ -72,13 +72,6 @@ const granuleMapping = {
|
|
|
72
72
|
error: (value) => ({
|
|
73
73
|
error: value,
|
|
74
74
|
}),
|
|
75
|
-
// nested error field
|
|
76
|
-
'error.Error': (value) => ({
|
|
77
|
-
'error.Error': value,
|
|
78
|
-
}),
|
|
79
|
-
'error.Error.keyword': (value) => ({
|
|
80
|
-
'error.Error': value,
|
|
81
|
-
}),
|
|
82
75
|
// The following fields require querying other tables
|
|
83
76
|
collectionId: (value) => {
|
|
84
77
|
const { name, version } = (value && (0, Collections_1.deconstructCollectionId)(value)) || {};
|
|
@@ -175,13 +168,6 @@ const executionMapping = {
|
|
|
175
168
|
duration: (value) => ({
|
|
176
169
|
duration: value && Number(value),
|
|
177
170
|
}),
|
|
178
|
-
// nested error field
|
|
179
|
-
'error.Error': (value) => ({
|
|
180
|
-
'error.Error': value,
|
|
181
|
-
}),
|
|
182
|
-
'error.Error.keyword': (value) => ({
|
|
183
|
-
'error.Error': value,
|
|
184
|
-
}),
|
|
185
171
|
execution: (value) => ({
|
|
186
172
|
url: value,
|
|
187
173
|
}),
|
|
@@ -212,7 +198,7 @@ const executionMapping = {
|
|
|
212
198
|
};
|
|
213
199
|
},
|
|
214
200
|
archived: (value) => ({
|
|
215
|
-
archived: value,
|
|
201
|
+
archived: value === 'true',
|
|
216
202
|
}),
|
|
217
203
|
};
|
|
218
204
|
const pdrMapping = {
|
|
@@ -386,6 +372,12 @@ const reconciliationReportMapping = {
|
|
|
386
372
|
updated_at: value && new Date(Number(value)),
|
|
387
373
|
}),
|
|
388
374
|
};
|
|
375
|
+
const nestedRootsByType = {
|
|
376
|
+
execution: new Set(['error']),
|
|
377
|
+
granule: new Set(['error', 'queryFields']),
|
|
378
|
+
pdr: new Set(['stats']),
|
|
379
|
+
reconciliationReport: new Set(['error']),
|
|
380
|
+
};
|
|
389
381
|
// type and its mapping
|
|
390
382
|
const supportedMappings = {
|
|
391
383
|
granule: granuleMapping,
|
|
@@ -397,6 +389,19 @@ const supportedMappings = {
|
|
|
397
389
|
rule: ruleMapping,
|
|
398
390
|
reconciliationReport: reconciliationReportMapping,
|
|
399
391
|
};
|
|
392
|
+
const toSnakeCase = (str) => str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
|
|
393
|
+
const mapNestedKey = (type, key) => {
|
|
394
|
+
const normalizedKey = key === 'error.Error.keyword' ? 'error.Error' : key;
|
|
395
|
+
const [root, ...nested] = normalizedKey.split('.');
|
|
396
|
+
const allowedRoots = nestedRootsByType[type];
|
|
397
|
+
if (!allowedRoots || !allowedRoots.has(root)) {
|
|
398
|
+
return undefined;
|
|
399
|
+
}
|
|
400
|
+
const mappedRoot = toSnakeCase(root);
|
|
401
|
+
if (nested.length === 0)
|
|
402
|
+
return mappedRoot;
|
|
403
|
+
return [mappedRoot, ...nested].join('.');
|
|
404
|
+
};
|
|
400
405
|
/**
|
|
401
406
|
* Map query string field to db field
|
|
402
407
|
*
|
|
@@ -407,11 +412,27 @@ const supportedMappings = {
|
|
|
407
412
|
* @returns db field
|
|
408
413
|
*/
|
|
409
414
|
const mapQueryStringFieldToDbField = (type, queryField) => {
|
|
410
|
-
|
|
411
|
-
|
|
415
|
+
const typeMapping = supportedMappings[type];
|
|
416
|
+
if (!typeMapping) {
|
|
417
|
+
log.warn(`No mapping found for type: ${type}`);
|
|
412
418
|
return undefined;
|
|
413
419
|
}
|
|
414
|
-
|
|
420
|
+
// Exact match (typed + custom logic)
|
|
421
|
+
const exactMapper = typeMapping[queryField.name];
|
|
422
|
+
if (exactMapper) {
|
|
423
|
+
return exactMapper(queryField.value);
|
|
424
|
+
}
|
|
425
|
+
// Nested fallback with type inference
|
|
426
|
+
if (queryField.name.includes('.')) {
|
|
427
|
+
const mappedKey = mapNestedKey(type, queryField.name);
|
|
428
|
+
if (mappedKey) {
|
|
429
|
+
return {
|
|
430
|
+
[mappedKey]: queryField.value,
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
log.warn(`No db mapping field found for type: ${type}, field ${JSON.stringify(queryField)}`);
|
|
435
|
+
return undefined;
|
|
415
436
|
};
|
|
416
437
|
exports.mapQueryStringFieldToDbField = mapQueryStringFieldToDbField;
|
|
417
438
|
//# sourceMappingURL=field-mapping.js.map
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { Knex } from 'knex';
|
|
2
|
+
import { DuckDBInstance, DuckDBConnection } from '@duckdb/node-api';
|
|
3
|
+
/**
|
|
4
|
+
* Creates a DuckDB in-memory instance and sets up S3/httpfs for testing.
|
|
5
|
+
* Configures S3-related settings on the DuckDB instance.
|
|
6
|
+
*
|
|
7
|
+
* @param {string} dbFilePath - The path to the DuckDB database file. Defaults to in-memory
|
|
8
|
+
* @returns {Promise<{ instance: DuckDBInstance, connection: DuckDBConnection }>}
|
|
9
|
+
* - The created DuckDB instance and the connection object for interacting with the database.
|
|
10
|
+
* - The connection is configured with HTTPFS for S3.
|
|
11
|
+
*/
|
|
12
|
+
export declare function setupDuckDBWithS3ForTesting(dbFilePath?: string): Promise<{
|
|
13
|
+
instance: DuckDBInstance;
|
|
14
|
+
connection: DuckDBConnection;
|
|
15
|
+
}>;
|
|
16
|
+
/**
|
|
17
|
+
* Stages data into a temporary DuckDB table, exports it to Parquet (S3),
|
|
18
|
+
* and then loads it into the target table.
|
|
19
|
+
*
|
|
20
|
+
* @template T - Shape of the row object being inserted.
|
|
21
|
+
* @param connection - Active DuckDB connection.
|
|
22
|
+
* @param knexBuilder - Knex instance used to generate SQL insert statements.
|
|
23
|
+
* @param tableName - Name of the destination table.
|
|
24
|
+
* @param tableSql - Function that returns the CREATE TABLE SQL for a given table name.
|
|
25
|
+
* @param data - A single row or array of rows to insert.
|
|
26
|
+
* @param s3Path - Destination S3 path where the staged data will be exported as Parquet.
|
|
27
|
+
* @returns Promise that resolves when the staging, export, and load process completes.
|
|
28
|
+
*/
|
|
29
|
+
export declare function stageAndLoadDuckDBTableFromData<T extends Record<string, any>>(connection: DuckDBConnection, knexBuilder: Knex, tableName: string, tableSql: (tableName: string) => string, data: T | T[], s3Path: string): Promise<void>;
|
|
30
|
+
export declare function createDuckDBTables(connection: DuckDBConnection): Promise<void>;
|
|
31
|
+
//# sourceMappingURL=test-duckdb-utils.d.ts.map
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createDuckDBTables = exports.stageAndLoadDuckDBTableFromData = exports.setupDuckDBWithS3ForTesting = void 0;
|
|
4
|
+
const node_api_1 = require("@duckdb/node-api");
|
|
5
|
+
const s3TableSchemas_1 = require("./s3search/s3TableSchemas");
|
|
6
|
+
const duckdbHelpers_1 = require("./s3search/duckdbHelpers");
|
|
7
|
+
/**
|
|
8
|
+
* Creates a DuckDB in-memory instance and sets up S3/httpfs for testing.
|
|
9
|
+
* Configures S3-related settings on the DuckDB instance.
|
|
10
|
+
*
|
|
11
|
+
* @param {string} dbFilePath - The path to the DuckDB database file. Defaults to in-memory
|
|
12
|
+
* @returns {Promise<{ instance: DuckDBInstance, connection: DuckDBConnection }>}
|
|
13
|
+
* - The created DuckDB instance and the connection object for interacting with the database.
|
|
14
|
+
* - The connection is configured with HTTPFS for S3.
|
|
15
|
+
*/
|
|
16
|
+
async function setupDuckDBWithS3ForTesting(dbFilePath = ':memory:') {
|
|
17
|
+
const instance = await node_api_1.DuckDBInstance.create(dbFilePath);
|
|
18
|
+
const connection = await instance.connect();
|
|
19
|
+
// Configure DuckDB HTTPFS for S3
|
|
20
|
+
await connection.run(`
|
|
21
|
+
INSTALL httpfs;
|
|
22
|
+
LOAD httpfs;
|
|
23
|
+
SET s3_region='us-east-1';
|
|
24
|
+
SET s3_access_key_id='test';
|
|
25
|
+
SET s3_secret_access_key='test';
|
|
26
|
+
SET s3_endpoint='localhost:4566';
|
|
27
|
+
SET s3_use_ssl=false;
|
|
28
|
+
SET s3_url_style='path';
|
|
29
|
+
`);
|
|
30
|
+
return { instance, connection };
|
|
31
|
+
}
|
|
32
|
+
exports.setupDuckDBWithS3ForTesting = setupDuckDBWithS3ForTesting;
|
|
33
|
+
/**
|
|
34
|
+
* Stages data into a temporary DuckDB table, exports it to Parquet (S3),
|
|
35
|
+
* and then loads it into the target table.
|
|
36
|
+
*
|
|
37
|
+
* @template T - Shape of the row object being inserted.
|
|
38
|
+
* @param connection - Active DuckDB connection.
|
|
39
|
+
* @param knexBuilder - Knex instance used to generate SQL insert statements.
|
|
40
|
+
* @param tableName - Name of the destination table.
|
|
41
|
+
* @param tableSql - Function that returns the CREATE TABLE SQL for a given table name.
|
|
42
|
+
* @param data - A single row or array of rows to insert.
|
|
43
|
+
* @param s3Path - Destination S3 path where the staged data will be exported as Parquet.
|
|
44
|
+
* @returns Promise that resolves when the staging, export, and load process completes.
|
|
45
|
+
*/
|
|
46
|
+
async function stageAndLoadDuckDBTableFromData(connection, knexBuilder, tableName, tableSql, data, s3Path) {
|
|
47
|
+
if (!data || (Array.isArray(data) && data.length === 0))
|
|
48
|
+
return;
|
|
49
|
+
const rows = Array.isArray(data) ? data : [data];
|
|
50
|
+
const tmpTableName = `${tableName}_tmp`;
|
|
51
|
+
// Create temporary staging table
|
|
52
|
+
await connection.run(tableSql(tmpTableName));
|
|
53
|
+
// Insert into staging table
|
|
54
|
+
if (tableName === 'executions') {
|
|
55
|
+
const execRows = rows;
|
|
56
|
+
const parentRows = execRows.filter((r) => !r.parent_cumulus_id);
|
|
57
|
+
const childRows = execRows.filter((r) => r.parent_cumulus_id);
|
|
58
|
+
if (parentRows.length > 0) {
|
|
59
|
+
const parentInsert = knexBuilder(tmpTableName)
|
|
60
|
+
.insert(parentRows)
|
|
61
|
+
.toSQL()
|
|
62
|
+
.toNative();
|
|
63
|
+
await connection.run(parentInsert.sql, (0, duckdbHelpers_1.prepareBindings)(parentInsert.bindings));
|
|
64
|
+
}
|
|
65
|
+
if (childRows.length > 0) {
|
|
66
|
+
const childInsert = knexBuilder(tmpTableName)
|
|
67
|
+
.insert(childRows)
|
|
68
|
+
.toSQL()
|
|
69
|
+
.toNative();
|
|
70
|
+
await connection.run(childInsert.sql, (0, duckdbHelpers_1.prepareBindings)(childInsert.bindings));
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
// Generic insert for other tables
|
|
75
|
+
const insertQuery = knexBuilder(tmpTableName)
|
|
76
|
+
.insert(rows)
|
|
77
|
+
.toSQL()
|
|
78
|
+
.toNative();
|
|
79
|
+
await connection.run(insertQuery.sql, (0, duckdbHelpers_1.prepareBindings)(insertQuery.bindings));
|
|
80
|
+
}
|
|
81
|
+
// Export staging table to Parquet (S3)
|
|
82
|
+
await connection.run(`
|
|
83
|
+
COPY ${tmpTableName}
|
|
84
|
+
TO '${s3Path}'
|
|
85
|
+
(FORMAT PARQUET);
|
|
86
|
+
`);
|
|
87
|
+
// Load from staging table into final table
|
|
88
|
+
if (tableName === 'executions') {
|
|
89
|
+
// Insert parents first
|
|
90
|
+
await connection.run(`
|
|
91
|
+
INSERT INTO ${tableName}
|
|
92
|
+
SELECT * FROM ${tmpTableName}
|
|
93
|
+
WHERE parent_cumulus_id IS NULL;
|
|
94
|
+
`);
|
|
95
|
+
// Insert children next
|
|
96
|
+
await connection.run(`
|
|
97
|
+
INSERT INTO ${tableName}
|
|
98
|
+
SELECT * FROM ${tmpTableName}
|
|
99
|
+
WHERE parent_cumulus_id IS NOT NULL;
|
|
100
|
+
`);
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
await connection.run(`
|
|
104
|
+
INSERT INTO ${tableName}
|
|
105
|
+
SELECT * FROM ${tmpTableName};
|
|
106
|
+
`);
|
|
107
|
+
}
|
|
108
|
+
// Drop staging table
|
|
109
|
+
await connection.run(`DROP TABLE IF EXISTS ${tmpTableName};`);
|
|
110
|
+
}
|
|
111
|
+
exports.stageAndLoadDuckDBTableFromData = stageAndLoadDuckDBTableFromData;
|
|
112
|
+
async function createDuckDBTables(connection) {
|
|
113
|
+
await connection.run((0, s3TableSchemas_1.asyncOperationsS3TableSql)());
|
|
114
|
+
await connection.run((0, s3TableSchemas_1.collectionsS3TableSql)());
|
|
115
|
+
await connection.run((0, s3TableSchemas_1.providersS3TableSql)());
|
|
116
|
+
await connection.run((0, s3TableSchemas_1.granulesS3TableSql)());
|
|
117
|
+
await connection.run((0, s3TableSchemas_1.filesS3TableSql)());
|
|
118
|
+
await connection.run((0, s3TableSchemas_1.executionsS3TableSql)());
|
|
119
|
+
await connection.run((0, s3TableSchemas_1.granulesExecutionsS3TableSql)());
|
|
120
|
+
await connection.run((0, s3TableSchemas_1.pdrsS3TableSql)());
|
|
121
|
+
await connection.run((0, s3TableSchemas_1.reconciliationReportsS3TableSql)());
|
|
122
|
+
await connection.run((0, s3TableSchemas_1.rulesS3TableSql)());
|
|
123
|
+
}
|
|
124
|
+
exports.createDuckDBTables = createDuckDBTables;
|
|
125
|
+
//# sourceMappingURL=test-duckdb-utils.js.map
|
package/dist/test-utils.js
CHANGED
|
@@ -94,12 +94,15 @@ const fakeGranuleRecordFactory = (params) => ({
|
|
|
94
94
|
producer_granule_id: (0, crypto_random_string_1.default)({ length: 5 }),
|
|
95
95
|
status: 'completed',
|
|
96
96
|
created_at: new Date(),
|
|
97
|
+
updated_at: new Date(),
|
|
97
98
|
...params,
|
|
98
99
|
});
|
|
99
100
|
exports.fakeGranuleRecordFactory = fakeGranuleRecordFactory;
|
|
100
101
|
const fakeFileRecordFactory = (params) => ({
|
|
101
102
|
bucket: (0, crypto_random_string_1.default)({ length: 3 }),
|
|
102
103
|
key: (0, crypto_random_string_1.default)({ length: 3 }),
|
|
104
|
+
created_at: new Date(),
|
|
105
|
+
updated_at: new Date(),
|
|
103
106
|
...params,
|
|
104
107
|
});
|
|
105
108
|
exports.fakeFileRecordFactory = fakeFileRecordFactory;
|
|
@@ -110,6 +113,8 @@ const fakeAsyncOperationRecordFactory = (params) => ({
|
|
|
110
113
|
status: 'RUNNING',
|
|
111
114
|
output: { test: 'output' },
|
|
112
115
|
task_arn: (0, crypto_random_string_1.default)({ length: 3 }),
|
|
116
|
+
created_at: new Date(),
|
|
117
|
+
updated_at: new Date(),
|
|
113
118
|
...params,
|
|
114
119
|
});
|
|
115
120
|
exports.fakeAsyncOperationRecordFactory = fakeAsyncOperationRecordFactory;
|
|
@@ -117,6 +122,7 @@ const fakePdrRecordFactory = (params) => ({
|
|
|
117
122
|
name: `pdr${(0, crypto_random_string_1.default)({ length: 10 })}`,
|
|
118
123
|
status: 'running',
|
|
119
124
|
created_at: new Date(),
|
|
125
|
+
updated_at: new Date(),
|
|
120
126
|
...params,
|
|
121
127
|
});
|
|
122
128
|
exports.fakePdrRecordFactory = fakePdrRecordFactory;
|
|
@@ -4,8 +4,10 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.translatePostgresAsyncOperationToApiAsyncOperation = exports.translateApiAsyncOperationToPostgresAsyncOperation = void 0;
|
|
7
|
+
const isObject_1 = __importDefault(require("lodash/isObject"));
|
|
7
8
|
const omit_1 = __importDefault(require("lodash/omit"));
|
|
8
9
|
const snake_camel_1 = require("snake-camel");
|
|
10
|
+
const util_1 = require("@cumulus/common/util");
|
|
9
11
|
const logger_1 = __importDefault(require("@cumulus/logger"));
|
|
10
12
|
const log = new logger_1.default({ sender: '@cumulus/db/translate/async-operations' });
|
|
11
13
|
/**
|
|
@@ -67,10 +69,12 @@ const translatePostgresAsyncOperationToApiAsyncOperation = (pgAsyncOperation) =>
|
|
|
67
69
|
description: pgAsyncOperation.description,
|
|
68
70
|
operationType: pgAsyncOperation.operation_type,
|
|
69
71
|
status: pgAsyncOperation.status,
|
|
70
|
-
output:
|
|
72
|
+
output: (0, isObject_1.default)(pgAsyncOperation.output)
|
|
73
|
+
? JSON.stringify(pgAsyncOperation.output)
|
|
74
|
+
: pgAsyncOperation.output,
|
|
71
75
|
taskArn: pgAsyncOperation.task_arn,
|
|
72
|
-
createdAt:
|
|
73
|
-
updatedAt:
|
|
76
|
+
createdAt: (0, util_1.returnNullOrUndefinedOrDate)(pgAsyncOperation.created_at)?.getTime(),
|
|
77
|
+
updatedAt: (0, util_1.returnNullOrUndefinedOrDate)(pgAsyncOperation.created_at)?.getTime(),
|
|
74
78
|
};
|
|
75
79
|
return apiAsyncOperation;
|
|
76
80
|
};
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.translateApiCollectionToPostgresCollection = exports.translatePostgresCollectionToApiCollection = void 0;
|
|
4
|
-
const { removeNilProperties } = require('@cumulus/common/util');
|
|
4
|
+
const { parseIfJson, returnNullOrUndefinedOrDate, removeNilProperties } = require('@cumulus/common/util');
|
|
5
5
|
/**
|
|
6
6
|
* Translates a PostgresCollectionRecord object to a `CollectionRecord` API collection object
|
|
7
7
|
* @param {PostgresCollectionRecord} collectionRecord - PostgreSQL collection record to translate
|
|
8
8
|
* @returns {CollectionRecord} - Translated record
|
|
9
9
|
*/
|
|
10
10
|
const translatePostgresCollectionToApiCollection = (collectionRecord) => removeNilProperties(({
|
|
11
|
-
createdAt: collectionRecord.created_at.getTime(),
|
|
12
|
-
updatedAt: collectionRecord.updated_at.getTime(),
|
|
11
|
+
createdAt: returnNullOrUndefinedOrDate(collectionRecord.created_at).getTime(),
|
|
12
|
+
updatedAt: returnNullOrUndefinedOrDate(collectionRecord.updated_at).getTime(),
|
|
13
13
|
name: collectionRecord.name,
|
|
14
14
|
version: collectionRecord.version,
|
|
15
15
|
process: collectionRecord.process,
|
|
@@ -17,12 +17,12 @@ const translatePostgresCollectionToApiCollection = (collectionRecord) => removeN
|
|
|
17
17
|
duplicateHandling: collectionRecord.duplicate_handling,
|
|
18
18
|
granuleId: collectionRecord.granule_id_validation_regex,
|
|
19
19
|
granuleIdExtraction: collectionRecord.granule_id_extraction_regex,
|
|
20
|
-
files: collectionRecord.files,
|
|
20
|
+
files: parseIfJson(collectionRecord.files),
|
|
21
21
|
reportToEms: collectionRecord.report_to_ems,
|
|
22
22
|
sampleFileName: collectionRecord.sample_file_name,
|
|
23
23
|
ignoreFilesConfigForDiscovery: collectionRecord.ignore_files_config_for_discovery,
|
|
24
|
-
meta: collectionRecord.meta,
|
|
25
|
-
tags: collectionRecord.tags,
|
|
24
|
+
meta: parseIfJson(collectionRecord.meta),
|
|
25
|
+
tags: parseIfJson(collectionRecord.tags),
|
|
26
26
|
}));
|
|
27
27
|
exports.translatePostgresCollectionToApiCollection = translatePostgresCollectionToApiCollection;
|
|
28
28
|
/**
|
|
@@ -24,10 +24,10 @@ const translatePostgresExecutionToApiExecutionWithoutDbQuery = ({ executionRecor
|
|
|
24
24
|
status: executionRecord.status,
|
|
25
25
|
arn: executionRecord.arn,
|
|
26
26
|
duration: executionRecord.duration,
|
|
27
|
-
error: executionRecord.error,
|
|
28
|
-
tasks: executionRecord.tasks,
|
|
29
|
-
originalPayload: executionRecord.original_payload,
|
|
30
|
-
finalPayload: executionRecord.final_payload,
|
|
27
|
+
error: (0, util_1.parseIfJson)(executionRecord.error),
|
|
28
|
+
tasks: (0, util_1.parseIfJson)(executionRecord.tasks),
|
|
29
|
+
originalPayload: (0, util_1.parseIfJson)(executionRecord.original_payload),
|
|
30
|
+
finalPayload: (0, util_1.parseIfJson)(executionRecord.final_payload),
|
|
31
31
|
type: executionRecord.workflow_name,
|
|
32
32
|
execution: executionRecord.url,
|
|
33
33
|
cumulusVersion: executionRecord.cumulus_version,
|
|
@@ -35,9 +35,9 @@ const translatePostgresExecutionToApiExecutionWithoutDbQuery = ({ executionRecor
|
|
|
35
35
|
collectionId,
|
|
36
36
|
parentArn,
|
|
37
37
|
archived: executionRecord.archived,
|
|
38
|
-
createdAt: executionRecord.created_at
|
|
39
|
-
updatedAt: executionRecord.updated_at
|
|
40
|
-
timestamp: executionRecord.timestamp?.getTime(),
|
|
38
|
+
createdAt: (0, util_1.returnNullOrUndefinedOrDate)(executionRecord.created_at)?.getTime(),
|
|
39
|
+
updatedAt: (0, util_1.returnNullOrUndefinedOrDate)(executionRecord.updated_at)?.getTime(),
|
|
40
|
+
timestamp: (0, util_1.returnNullOrUndefinedOrDate)(executionRecord.timestamp)?.getTime(),
|
|
41
41
|
};
|
|
42
42
|
return (0, util_1.removeNilProperties)(translatedRecord);
|
|
43
43
|
};
|
|
@@ -30,31 +30,36 @@ const file_2 = require("./file");
|
|
|
30
30
|
*/
|
|
31
31
|
const translatePostgresGranuleToApiGranuleWithoutDbQuery = ({ granulePgRecord, collectionPgRecord, executionUrls = [], files = [], pdr, providerPgRecord, }) => (0, util_1.removeNilProperties)({
|
|
32
32
|
archived: granulePgRecord.archived,
|
|
33
|
-
beginningDateTime: granulePgRecord.beginning_date_time
|
|
33
|
+
beginningDateTime: (0, util_1.returnNullOrUndefinedOrDate)(granulePgRecord.beginning_date_time)
|
|
34
|
+
?.toISOString(),
|
|
34
35
|
cmrLink: granulePgRecord.cmr_link,
|
|
35
36
|
collectionId: (0, Collections_1.constructCollectionId)(collectionPgRecord.name, collectionPgRecord.version),
|
|
36
|
-
createdAt: granulePgRecord.created_at?.getTime(),
|
|
37
|
+
createdAt: new Date(granulePgRecord.created_at)?.getTime(),
|
|
37
38
|
duration: granulePgRecord.duration,
|
|
38
|
-
endingDateTime: granulePgRecord.ending_date_time?.toISOString(),
|
|
39
|
-
error: granulePgRecord.error,
|
|
39
|
+
endingDateTime: (0, util_1.returnNullOrUndefinedOrDate)(granulePgRecord.ending_date_time)?.toISOString(),
|
|
40
|
+
error: (0, util_1.parseIfJson)(granulePgRecord.error),
|
|
40
41
|
execution: executionUrls[0] ? executionUrls[0].url : undefined,
|
|
41
42
|
files: files.length > 0 ? files.map((file) => (0, file_2.translatePostgresFileToApiFile)(file)) : [],
|
|
42
43
|
granuleId: granulePgRecord.granule_id,
|
|
43
|
-
lastUpdateDateTime: granulePgRecord.last_update_date_time
|
|
44
|
+
lastUpdateDateTime: (0, util_1.returnNullOrUndefinedOrDate)(granulePgRecord.last_update_date_time)
|
|
45
|
+
?.toISOString(),
|
|
44
46
|
pdrName: pdr ? pdr.name : undefined,
|
|
45
|
-
processingEndDateTime: granulePgRecord.processing_end_date_time
|
|
46
|
-
|
|
47
|
+
processingEndDateTime: (0, util_1.returnNullOrUndefinedOrDate)(granulePgRecord.processing_end_date_time)
|
|
48
|
+
?.toISOString(),
|
|
49
|
+
processingStartDateTime: (0, util_1.returnNullOrUndefinedOrDate)(granulePgRecord.processing_start_date_time)
|
|
50
|
+
?.toISOString(),
|
|
47
51
|
producerGranuleId: granulePgRecord.producer_granule_id,
|
|
48
|
-
productionDateTime: granulePgRecord.production_date_time
|
|
52
|
+
productionDateTime: (0, util_1.returnNullOrUndefinedOrDate)(granulePgRecord.production_date_time)
|
|
53
|
+
?.toISOString(),
|
|
49
54
|
productVolume: granulePgRecord.product_volume,
|
|
50
55
|
provider: providerPgRecord ? providerPgRecord.name : undefined,
|
|
51
56
|
published: granulePgRecord.published,
|
|
52
|
-
queryFields: granulePgRecord.query_fields,
|
|
57
|
+
queryFields: (0, util_1.parseIfJson)(granulePgRecord.query_fields),
|
|
53
58
|
status: granulePgRecord.status,
|
|
54
|
-
timestamp: granulePgRecord.timestamp?.getTime(),
|
|
59
|
+
timestamp: (0, util_1.returnNullOrUndefinedOrDate)(granulePgRecord.timestamp)?.getTime(),
|
|
55
60
|
timeToArchive: granulePgRecord.time_to_archive,
|
|
56
61
|
timeToPreprocess: granulePgRecord.time_to_process,
|
|
57
|
-
updatedAt: granulePgRecord.updated_at?.getTime(),
|
|
62
|
+
updatedAt: new Date(granulePgRecord.updated_at)?.getTime(),
|
|
58
63
|
});
|
|
59
64
|
exports.translatePostgresGranuleToApiGranuleWithoutDbQuery = translatePostgresGranuleToApiGranuleWithoutDbQuery;
|
|
60
65
|
/**
|
package/dist/translate/pdr.js
CHANGED
|
@@ -55,17 +55,17 @@ const translatePostgresPdrToApiPdrWithoutDbQuery = ({ pdrPgRecord, collectionPgR
|
|
|
55
55
|
provider: providerPgRecord?.name,
|
|
56
56
|
collectionId: (0, Collections_1.constructCollectionId)(collectionPgRecord.name, collectionPgRecord.version),
|
|
57
57
|
status: pdrPgRecord.status,
|
|
58
|
-
createdAt: pdrPgRecord.created_at
|
|
58
|
+
createdAt: (0, util_1.returnNullOrUndefinedOrDate)(pdrPgRecord.created_at)?.getTime(),
|
|
59
59
|
progress: pdrPgRecord.progress,
|
|
60
60
|
execution: executionArn ? (0, Executions_1.getExecutionUrlFromArn)(executionArn) : undefined,
|
|
61
61
|
PANSent: pdrPgRecord.pan_sent,
|
|
62
62
|
PANmessage: pdrPgRecord.pan_message,
|
|
63
|
-
stats: pdrPgRecord.stats,
|
|
63
|
+
stats: (0, util_1.parseIfJson)(pdrPgRecord.stats),
|
|
64
64
|
address: pdrPgRecord.address,
|
|
65
65
|
originalUrl: pdrPgRecord.original_url,
|
|
66
|
-
timestamp: (
|
|
66
|
+
timestamp: (0, util_1.returnNullOrUndefinedOrDate)(pdrPgRecord.timestamp)?.getTime(),
|
|
67
67
|
duration: pdrPgRecord.duration,
|
|
68
|
-
updatedAt: pdrPgRecord.updated_at
|
|
68
|
+
updatedAt: (0, util_1.returnNullOrUndefinedOrDate)(pdrPgRecord.updated_at)?.getTime(),
|
|
69
69
|
});
|
|
70
70
|
exports.translatePostgresPdrToApiPdrWithoutDbQuery = translatePostgresPdrToApiPdrWithoutDbQuery;
|
|
71
71
|
/**
|
|
@@ -20,8 +20,8 @@ const translatePostgresProviderToApiProvider = (record) => {
|
|
|
20
20
|
port: record.port,
|
|
21
21
|
host: record.host,
|
|
22
22
|
protocol: record.protocol,
|
|
23
|
-
createdAt: record.created_at
|
|
24
|
-
updatedAt: record.updated_at
|
|
23
|
+
createdAt: (0, util_1.returnNullOrUndefinedOrDate)(record.created_at)?.getTime(),
|
|
24
|
+
updatedAt: (0, util_1.returnNullOrUndefinedOrDate)(record.updated_at)?.getTime(),
|
|
25
25
|
username: record.username,
|
|
26
26
|
password: record.password,
|
|
27
27
|
allowedRedirects: record.allowed_redirects,
|