@vizabi/reader-ddfcsv 4.5.3 → 4.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.nyc_output/574fe7f0-58f6-4de2-9524-16e72b784218.json +1 -0
- package/.nyc_output/9aede69f-00e6-432d-bdeb-2c9f4084a6dc.json +1 -0
- package/.nyc_output/processinfo/574fe7f0-58f6-4de2-9524-16e72b784218.json +1 -0
- package/.nyc_output/processinfo/9aede69f-00e6-432d-bdeb-2c9f4084a6dc.json +1 -0
- package/.nyc_output/processinfo/index.json +1 -0
- package/LICENSE +0 -0
- package/README.md +2 -2
- package/coverage/base.css +224 -0
- package/coverage/block-navigation.js +87 -0
- package/coverage/favicon.png +0 -0
- package/coverage/index.html +161 -0
- package/coverage/prettify.css +1 -0
- package/coverage/prettify.js +2 -0
- package/coverage/sort-arrow-sprite.png +0 -0
- package/coverage/sorter.js +210 -0
- package/coverage/src/ddf-csv.ts.html +2866 -0
- package/coverage/src/ddfcsv-error.ts.html +148 -0
- package/coverage/src/ddfcsv-reader.ts.html +538 -0
- package/coverage/src/file-readers/backend-file-reader.ts.html +175 -0
- package/coverage/src/file-readers/github-path-adapter.ts.html +244 -0
- package/coverage/src/file-readers/index.html +131 -0
- package/coverage/src/index.html +176 -0
- package/coverage/src/index.ts.html +145 -0
- package/coverage/src/interfaces.ts.html +196 -0
- package/coverage/src/resource-selection-optimizer/in-clause-under-conjunction.ts.html +745 -0
- package/coverage/src/resource-selection-optimizer/index.html +131 -0
- package/coverage/src/resource-selection-optimizer/index.ts.html +118 -0
- package/coverage/src/test-cases/concepts.ts.html +166 -0
- package/coverage/src/test-cases/entities.ts.html +241 -0
- package/coverage/src/test-cases/index.html +131 -0
- package/dist/reader-ddfcsv.js +15 -1
- package/dist/reader-ddfcsv.js.map +1 -0
- package/dist/stats.html +4950 -0
- package/icon.png +0 -0
- package/lib/src/ddf-csv.d.ts +0 -0
- package/lib/src/ddf-csv.js +13 -14
- package/lib/src/ddf-csv.js.map +1 -1
- package/lib/src/ddfcsv-error.d.ts +0 -0
- package/lib/src/ddfcsv-error.js +0 -0
- package/lib/src/ddfcsv-error.js.map +0 -0
- package/lib/src/ddfcsv-reader.d.ts +0 -0
- package/lib/src/ddfcsv-reader.js +5 -5
- package/lib/src/ddfcsv-reader.js.map +1 -1
- package/lib/src/file-readers/backend-file-reader.d.ts +0 -0
- package/lib/src/file-readers/backend-file-reader.js +0 -0
- package/lib/src/file-readers/backend-file-reader.js.map +0 -0
- package/lib/src/file-readers/github-path-adapter.d.ts +0 -0
- package/lib/src/file-readers/github-path-adapter.js +0 -0
- package/lib/src/file-readers/github-path-adapter.js.map +0 -0
- package/lib/src/index.d.ts +0 -0
- package/lib/src/index.js +2 -2
- package/lib/src/index.js.map +0 -0
- package/lib/src/interfaces.d.ts +0 -0
- package/lib/src/interfaces.js +0 -0
- package/lib/src/interfaces.js.map +0 -0
- package/lib/src/resource-selection-optimizer/in-clause-under-conjunction.d.ts +0 -0
- package/lib/src/resource-selection-optimizer/in-clause-under-conjunction.js +16 -17
- package/lib/src/resource-selection-optimizer/in-clause-under-conjunction.js.map +1 -1
- package/lib/src/resource-selection-optimizer/index.d.ts +0 -0
- package/lib/src/resource-selection-optimizer/index.js +2 -2
- package/lib/src/resource-selection-optimizer/index.js.map +1 -1
- package/lib-web/src/ddf-csv.d.ts +0 -0
- package/lib-web/src/ddf-csv.js +30 -34
- package/lib-web/src/ddf-csv.js.map +1 -1
- package/lib-web/src/ddfcsv-error.d.ts +0 -0
- package/lib-web/src/ddfcsv-error.js +5 -9
- package/lib-web/src/ddfcsv-error.js.map +1 -1
- package/lib-web/src/ddfcsv-reader.d.ts +0 -0
- package/lib-web/src/ddfcsv-reader.js +16 -19
- package/lib-web/src/ddfcsv-reader.js.map +1 -1
- package/lib-web/src/file-readers/frontend-file-reader.d.ts +0 -0
- package/lib-web/src/file-readers/frontend-file-reader.js +1 -5
- package/lib-web/src/file-readers/frontend-file-reader.js.map +1 -1
- package/lib-web/src/file-readers/github-path-adapter.d.ts +0 -0
- package/lib-web/src/file-readers/github-path-adapter.js +1 -4
- package/lib-web/src/file-readers/github-path-adapter.js.map +1 -1
- package/lib-web/src/index-web.d.ts +0 -0
- package/lib-web/src/index-web.js +9 -14
- package/lib-web/src/index-web.js.map +1 -1
- package/lib-web/src/interfaces.d.ts +0 -0
- package/lib-web/src/interfaces.js +1 -2
- package/lib-web/src/interfaces.js.map +0 -0
- package/lib-web/src/resource-selection-optimizer/in-clause-under-conjunction.d.ts +0 -0
- package/lib-web/src/resource-selection-optimizer/in-clause-under-conjunction.js +12 -17
- package/lib-web/src/resource-selection-optimizer/in-clause-under-conjunction.js.map +1 -1
- package/lib-web/src/resource-selection-optimizer/index.d.ts +0 -0
- package/lib-web/src/resource-selection-optimizer/index.js +4 -7
- package/lib-web/src/resource-selection-optimizer/index.js.map +1 -1
- package/package.json +93 -129
- package/rollup.config.mjs +30 -0
- package/scripts/set-own-version.js +0 -0
- package/src/ddf-csv.ts +927 -927
- package/src/ddfcsv-error.ts +0 -0
- package/src/ddfcsv-reader.ts +151 -151
- package/src/file-readers/backend-file-reader.ts +0 -0
- package/src/file-readers/frontend-file-reader.ts +0 -0
- package/src/file-readers/github-path-adapter.ts +0 -0
- package/src/index-web.ts +0 -0
- package/src/index.ts +0 -0
- package/src/interfaces.ts +0 -0
- package/src/resource-selection-optimizer/in-clause-under-conjunction.ts +220 -220
- package/src/resource-selection-optimizer/index.ts +11 -11
- package/src/test-cases/concepts.ts +0 -0
- package/src/test-cases/entities.ts +52 -52
- package/test/assets-fixtures/world-50m.json +0 -0
- package/test/assets.spec.ts +0 -0
- package/test/common.ts +1 -1
- package/test/definition/concepts-definition.spec.ts +0 -0
- package/test/definition/datapoints-definition.spec.ts +1 -1
- package/test/definition/entities-definition.spec.ts +264 -264
- package/test/definition/schema-definition.spec.ts +0 -0
- package/test/diagnostics.spec.ts +0 -0
- package/test/features-service.spec.ts +95 -95
- package/test/high-load.spec.ts +0 -0
- package/test/main.spec.ts +0 -0
- package/test/multi-instances.spec.ts +0 -0
- package/test/result-fixtures/datapoints-assets.json +0 -0
- package/test/result-fixtures/in-clause-under-conjunction-1.json +0 -0
- package/test/result-fixtures/in-clause-under-conjunction-2.json +0 -0
- package/test/result-fixtures/multi-instances/concepts-sg.json +0 -0
- package/test/result-fixtures/multi-instances/concepts-soderstornsmodellen.json +0 -0
- package/test/result-fixtures/multi-instances/datapoints-sg.json +0 -0
- package/test/result-fixtures/multi-instances/datapoints-soderstornsmodellen.json +0 -0
- package/test/result-fixtures/multi-instances/entities-sg.json +0 -0
- package/test/result-fixtures/multi-instances/entities-soderstornsmodellen.json +0 -0
- package/test/result-fixtures/multi-instances/schema-sg.json +0 -0
- package/test/result-fixtures/multi-instances/schema-soderstornsmodellen.json +0 -0
- package/test/schema.spec.ts +0 -0
- package/test/tslint.json +0 -0
- package/tsconfig-web.json +41 -41
- package/tsconfig.json +40 -37
- package/tslint.json +0 -0
- package/.travis.yml +0 -37
- package/deploy.js +0 -87
- package/dist/reader-ddfcsv-polyfill.js +0 -2
- package/dist/reader-ddfcsv-polyfill.js.map +0 -1
- package/test/mocha.opts +0 -7
package/src/ddf-csv.ts
CHANGED
|
@@ -1,927 +1,927 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
import { getAppropriatePlugin } from './resource-selection-optimizer';
|
|
5
|
-
import { CSV_PARSING_ERROR, DDF_ERROR, DdfCsvError, FILE_READING_ERROR, JSON_PARSING_ERROR } from './ddfcsv-error';
|
|
6
|
-
import { getFilePath, isSchemaQuery, validateQueryDefinitions, validateQueryStructure } from '@vizabi/ddf-query-validator';
|
|
7
|
-
|
|
8
|
-
import * as Papa from 'papaparse';
|
|
9
|
-
import { utcParse } from 'd3-time-format';
|
|
10
|
-
import { IBaseReaderOptions, IDatapackage } from './interfaces';
|
|
11
|
-
|
|
12
|
-
const isValidNumeric = val => typeof val !== 'number' && !val ? false : true;
|
|
13
|
-
|
|
14
|
-
export function ddfCsvReader (logger?: any) {
|
|
15
|
-
const internalConcepts = [
|
|
16
|
-
{ concept: 'concept', concept_type: 'string', domain: null },
|
|
17
|
-
{ concept: 'concept_type', concept_type: 'string', domain: null }
|
|
18
|
-
];
|
|
19
|
-
|
|
20
|
-
const operators = new Map([
|
|
21
|
-
/* logical operators */
|
|
22
|
-
[ '$and', (row, predicates) => predicates.every(p => applyFilterRow(row, p)) ],
|
|
23
|
-
[ '$or', (row, predicates) => predicates.some(p => applyFilterRow(row, p)) ],
|
|
24
|
-
[ '$not', (row, predicate) => !applyFilterRow(row, predicate) ],
|
|
25
|
-
[ '$nor', (row, predicates) => !predicates.some(p => applyFilterRow(row, p)) ],
|
|
26
|
-
|
|
27
|
-
/* equality operators */
|
|
28
|
-
[ '$eq', (rowValue, filterValue) => rowValue == filterValue ],
|
|
29
|
-
[ '$ne', (rowValue, filterValue) => rowValue != filterValue ],
|
|
30
|
-
[ '$gt', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue > filterValue ],
|
|
31
|
-
[ '$gte', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue >= filterValue ],
|
|
32
|
-
[ '$lt', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue < filterValue ],
|
|
33
|
-
[ '$lte', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue <= filterValue ],
|
|
34
|
-
[ '$in', (rowValue, filterValue) => filterValue.has(rowValue) ],
|
|
35
|
-
[ '$nin', (rowValue, filterValue) => !filterValue.has(rowValue) ],
|
|
36
|
-
]);
|
|
37
|
-
|
|
38
|
-
const keyValueLookup = new Map<string, any>();
|
|
39
|
-
const resourcesLookup = new Map();
|
|
40
|
-
|
|
41
|
-
let optimalFilesSet = [];
|
|
42
|
-
let datapackage;
|
|
43
|
-
let datapackagePromise;
|
|
44
|
-
let datasetWithConstraints = false;
|
|
45
|
-
|
|
46
|
-
function getDatasetInfo(baseOptions: IBaseReaderOptions) : Promise<Object> {
|
|
47
|
-
return (datapackagePromise || loadDataPackage(baseOptions));
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
function loadDataPackage (baseOptions: IBaseReaderOptions): Promise<IDatapackage> {
|
|
51
|
-
const datapackagePath = getFilePath(baseOptions.basePath);
|
|
52
|
-
const { debug, error } = baseOptions.diagnostic.prepareDiagnosticFor('loadDataPackage');
|
|
53
|
-
|
|
54
|
-
return new Promise((resolve, reject) => {
|
|
55
|
-
if (datapackage) {
|
|
56
|
-
return resolve(datapackage);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
baseOptions.fileReader.readText(datapackagePath, (err, data) => {
|
|
60
|
-
if (err) {
|
|
61
|
-
error('file reading', err);
|
|
62
|
-
return reject(new DdfCsvError(FILE_READING_ERROR, err, datapackagePath));
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
try {
|
|
66
|
-
datapackage = JSON.parse(stripBom(data));
|
|
67
|
-
optimalFilesSet = [];
|
|
68
|
-
buildResourcesLookup(datapackage);
|
|
69
|
-
buildKeyValueLookup(datapackage);
|
|
70
|
-
} catch (parseErr) {
|
|
71
|
-
error('json file parsing', parseErr);
|
|
72
|
-
return reject(new DdfCsvError(JSON_PARSING_ERROR, parseErr.message, datapackagePath));
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
debug('datapackage content is ready');
|
|
76
|
-
|
|
77
|
-
resolve(datapackage);
|
|
78
|
-
});
|
|
79
|
-
});
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
async function loadConcepts (queryParam, options: IBaseReaderOptions): Promise<object> {
|
|
83
|
-
const { error } = options.diagnostic.prepareDiagnosticFor('loadConcepts');
|
|
84
|
-
// start off with internal concepts
|
|
85
|
-
setConceptsLookup(internalConcepts, options);
|
|
86
|
-
// query concepts
|
|
87
|
-
const conceptQuery = {
|
|
88
|
-
select: { key: [ 'concept' ], value: [ 'concept_type', 'domain' ] },
|
|
89
|
-
from: 'concepts'
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
let result;
|
|
93
|
-
|
|
94
|
-
// not using query() to circumvent the conceptPromise resolving
|
|
95
|
-
try {
|
|
96
|
-
const concepts = await queryData(conceptQuery, options);
|
|
97
|
-
buildConceptsLookup(concepts, options);
|
|
98
|
-
// with conceptsLookup built, we can parse other concept properties
|
|
99
|
-
// according to their concept_type
|
|
100
|
-
result = await reparseConcepts(options);
|
|
101
|
-
} catch (err) {
|
|
102
|
-
error('concepts processing', err);
|
|
103
|
-
throw err;
|
|
104
|
-
}
|
|
105
|
-
return result;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
function buildConceptsLookup (concepts, options) {
|
|
109
|
-
const entitySetMembershipConcepts = concepts
|
|
110
|
-
.filter(concept => concept.concept_type === 'entity_set')
|
|
111
|
-
.map(concept => ({
|
|
112
|
-
concept: 'is--' + concept.concept,
|
|
113
|
-
concept_type: 'boolean',
|
|
114
|
-
domain: null
|
|
115
|
-
}));
|
|
116
|
-
|
|
117
|
-
concepts = concepts
|
|
118
|
-
.concat(entitySetMembershipConcepts)
|
|
119
|
-
.concat(internalConcepts);
|
|
120
|
-
|
|
121
|
-
setConceptsLookup(concepts, options);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
/**
|
|
125
|
-
* Iterates resources for query and applies parsing according to concept_type
|
|
126
|
-
* of headers. Does not take into account join clause.
|
|
127
|
-
* Impure function as it parses data in-place.
|
|
128
|
-
* @return {[type]} [description]
|
|
129
|
-
*/
|
|
130
|
-
function reparseConcepts ({ conceptsLookup }) {
|
|
131
|
-
const parsingFunctions = new Map<string, Function>([
|
|
132
|
-
[ 'boolean', (str) => str === 'true' || str === 'TRUE' ],
|
|
133
|
-
[ 'measure', (str) => parseFloat(str) ]
|
|
134
|
-
]);
|
|
135
|
-
|
|
136
|
-
const resources = getResources([ 'concept' ]);
|
|
137
|
-
|
|
138
|
-
const resourceUpdates = [ ...resources ].map(resource => {
|
|
139
|
-
return resource.data.then(response => {
|
|
140
|
-
|
|
141
|
-
// first find out which resource concepts need parsing
|
|
142
|
-
const resourceConcepts = Object.keys(response.data[ 0 ]);
|
|
143
|
-
const parsingConcepts = new Map<string, Function>();
|
|
144
|
-
|
|
145
|
-
resourceConcepts.forEach(concept => {
|
|
146
|
-
const type = conceptsLookup.get(concept).concept_type;
|
|
147
|
-
const fn = parsingFunctions.get(type);
|
|
148
|
-
|
|
149
|
-
if (fn) {
|
|
150
|
-
parsingConcepts.set(concept, fn);
|
|
151
|
-
}
|
|
152
|
-
});
|
|
153
|
-
|
|
154
|
-
// then parse only those concepts
|
|
155
|
-
return response.data.forEach(row => {
|
|
156
|
-
for (const [ concept, parseFn ] of parsingConcepts) {
|
|
157
|
-
row[ concept ] = parseFn(row[ concept ]);
|
|
158
|
-
}
|
|
159
|
-
});
|
|
160
|
-
|
|
161
|
-
});
|
|
162
|
-
});
|
|
163
|
-
|
|
164
|
-
return Promise.all(resourceUpdates);
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// can only take single-dimensional key
|
|
168
|
-
function setConceptsLookup (concepts, options) {
|
|
169
|
-
options.conceptsLookup.clear();
|
|
170
|
-
concepts.forEach(row => options.conceptsLookup.set(row.concept, row));
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
function preValidateQueryStructure(queryParam, baseOptions): boolean {
|
|
174
|
-
if (queryParam.from == "datapoints" && queryParam.select.value.length == 0) return true;
|
|
175
|
-
return false;
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
async function query (queryParam, _baseOptions: IBaseReaderOptions) {
|
|
179
|
-
const baseOptions = Object.assign({}, _baseOptions);
|
|
180
|
-
const { warning, error } = baseOptions.diagnostic.prepareDiagnosticFor('query');
|
|
181
|
-
let data;
|
|
182
|
-
|
|
183
|
-
if (preValidateQueryStructure(queryParam, baseOptions)) {
|
|
184
|
-
return Promise.resolve([]);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
try {
|
|
188
|
-
await validateQueryStructure(queryParam, baseOptions);
|
|
189
|
-
baseOptions.datapackage = await (datapackagePromise || (datapackagePromise = loadDataPackage(baseOptions)));
|
|
190
|
-
baseOptions.resourcesLookup = resourcesLookup;
|
|
191
|
-
await loadConcepts(queryParam, baseOptions);
|
|
192
|
-
await validateQueryDefinitions(queryParam, baseOptions);
|
|
193
|
-
|
|
194
|
-
if (isSchemaQuery(queryParam)) {
|
|
195
|
-
data = await querySchema(queryParam, baseOptions);
|
|
196
|
-
} else {
|
|
197
|
-
const appropriatePlugin = datasetWithConstraints && getAppropriatePlugin(this, queryParam, baseOptions);
|
|
198
|
-
|
|
199
|
-
optimalFilesSet = [];
|
|
200
|
-
if (appropriatePlugin) {
|
|
201
|
-
const files = await appropriatePlugin.getRecommendedFilesSet();
|
|
202
|
-
optimalFilesSet = files;
|
|
203
|
-
queryParam.optimalFilesSet = [].concat(files, queryParam.optimalFilesSet);
|
|
204
|
-
|
|
205
|
-
warning('get custom optimal files list by a plugin', optimalFilesSet);
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
data = await queryData(queryParam, baseOptions);
|
|
209
|
-
}
|
|
210
|
-
} catch (err) {
|
|
211
|
-
error('general query error', err);
|
|
212
|
-
throw err;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
return data;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
function queryData (queryParam, _options: IBaseReaderOptions) {
|
|
219
|
-
const options = Object.assign({}, _options);
|
|
220
|
-
const { debug } = options.diagnostic.prepareDiagnosticFor('queryData');
|
|
221
|
-
const {
|
|
222
|
-
select: { key = [], value = [] },
|
|
223
|
-
from = '',
|
|
224
|
-
where = {},
|
|
225
|
-
join = {},
|
|
226
|
-
order_by = [],
|
|
227
|
-
language
|
|
228
|
-
} = queryParam;
|
|
229
|
-
const select = { key, value };
|
|
230
|
-
|
|
231
|
-
debug('start all data loading', queryParam);
|
|
232
|
-
|
|
233
|
-
const projection = new Set(select.key.concat(select.value));
|
|
234
|
-
const filterFields = getFilterFields(where).filter(field => from === 'entities' || !projection.has(field));
|
|
235
|
-
// load all relevant resources
|
|
236
|
-
const resourcesPromise = loadResources(select.key, [ ...select.value, ...filterFields ], language, options, queryParam);
|
|
237
|
-
// list of entities selected from a join clause, later insterted in where clause
|
|
238
|
-
const joinsPromise = getJoinFilters(join, queryParam, options);
|
|
239
|
-
// filter which ensures result only includes queried entity sets
|
|
240
|
-
const entitySetFilterPromise = getEntitySetFilter(select.key, queryParam, options);
|
|
241
|
-
|
|
242
|
-
return Promise.all([ resourcesPromise, entitySetFilterPromise, joinsPromise ])
|
|
243
|
-
.then(([ resourceResponses, entitySetFilter, joinFilters ]) => {
|
|
244
|
-
debug('finish all data loading', queryParam);
|
|
245
|
-
// create filter from where, join filters and entity set filters
|
|
246
|
-
const whereResolved = processWhere(where, joinFilters);
|
|
247
|
-
const filter = mergeFilters(entitySetFilter, whereResolved);
|
|
248
|
-
|
|
249
|
-
debug('dataTables processing', queryParam);
|
|
250
|
-
const dataTables = resourceResponses
|
|
251
|
-
// rename key-columns and remove irrelevant value-columns
|
|
252
|
-
.map(response => processResourceResponse(response, select, filterFields, options));
|
|
253
|
-
|
|
254
|
-
debug('queryResult processing', queryParam);
|
|
255
|
-
// join (reduce) data to one data table
|
|
256
|
-
const queryResult = joinData(select.key, 'overwrite', ...dataTables)
|
|
257
|
-
.filter(row => applyFilterRow(row, filter)) // apply filters (entity sets and where (including join))
|
|
258
|
-
.map(row => fillMissingValues(row, projection)) // fill any missing values with null values
|
|
259
|
-
.map(row => projectRow(row, projection)); // remove fields used only for filtering
|
|
260
|
-
|
|
261
|
-
debug('result ordering', queryParam);
|
|
262
|
-
orderData(queryResult, order_by);
|
|
263
|
-
debug('final result is ready', queryParam);
|
|
264
|
-
|
|
265
|
-
return parseTime(queryResult, options);
|
|
266
|
-
});
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
/**
|
|
270
|
-
* Parses time concept strings in result to Date objects
|
|
271
|
-
* @param result
|
|
272
|
-
* @param options
|
|
273
|
-
*/
|
|
274
|
-
function parseTime(result, options: IBaseReaderOptions) {
|
|
275
|
-
const conceptsLookup = options.conceptsLookup;
|
|
276
|
-
const concepts = Object.keys(result[0] || {});
|
|
277
|
-
const timeConcepts = concepts.map(c => conceptsLookup.get(c) || {}).filter(co => co.concept_type == 'time');
|
|
278
|
-
timeConcepts.forEach(({ concept }) => {
|
|
279
|
-
const parse = getTimeParser(concept, options);
|
|
280
|
-
result.forEach(row => {
|
|
281
|
-
row[concept] = parse(row[concept]);
|
|
282
|
-
});
|
|
283
|
-
});
|
|
284
|
-
return result;
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
/**
|
|
288
|
-
* Time parsers for DDF built-in time concepts
|
|
289
|
-
* @param concept
|
|
290
|
-
*/
|
|
291
|
-
function getTimeParser(concept, options: IBaseReaderOptions) {
|
|
292
|
-
const { error } = options.diagnostic.prepareDiagnosticFor('queryData');
|
|
293
|
-
const parsers = {
|
|
294
|
-
year: utcParse('%Y'),
|
|
295
|
-
month: utcParse('%Y-%m'),
|
|
296
|
-
day: utcParse('%Y%m%d'),
|
|
297
|
-
hour: utcParse('%Y%m%dt%H'),
|
|
298
|
-
minute: utcParse('%Y%m%dt%H%M'),
|
|
299
|
-
second: utcParse('%Y%m%dt%H%M%S'),
|
|
300
|
-
week: utcParse('%Yw%V'),
|
|
301
|
-
quarter: utcParse('%Yq%q')
|
|
302
|
-
};
|
|
303
|
-
function tryParse(str) {
|
|
304
|
-
for (const i in parsers) {
|
|
305
|
-
const dateObject = parsers[i](str);
|
|
306
|
-
if (dateObject) {
|
|
307
|
-
return dateObject;
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
error('Could not parse time string: ' + str);
|
|
311
|
-
return null;
|
|
312
|
-
}
|
|
313
|
-
if (concept == 'time') {
|
|
314
|
-
return tryParse;
|
|
315
|
-
}
|
|
316
|
-
if (!parsers[concept]) {
|
|
317
|
-
error('No time parser found for time concept: ' + concept);
|
|
318
|
-
return str => str;
|
|
319
|
-
}
|
|
320
|
-
return parsers[concept];
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
function orderData (data, orderBy = []) {
|
|
324
|
-
if (orderBy.length === 0) {
|
|
325
|
-
return;
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
// process ["geo"] or [{"geo": "asc"}] to [{ concept: "geo", direction: 1 }];
|
|
329
|
-
const orderNormalized = orderBy.map(orderPart => {
|
|
330
|
-
if (typeof orderPart === 'string') {
|
|
331
|
-
return { concept: orderPart, direction: 1 };
|
|
332
|
-
} else {
|
|
333
|
-
const concept = Object.keys(orderPart)[ 0 ];
|
|
334
|
-
const direction = (orderPart[ concept ] === 'asc' ? 1 : -1);
|
|
335
|
-
|
|
336
|
-
return { concept, direction };
|
|
337
|
-
}
|
|
338
|
-
});
|
|
339
|
-
|
|
340
|
-
// sort by one or more fields
|
|
341
|
-
const n = orderNormalized.length;
|
|
342
|
-
|
|
343
|
-
data.sort((a, b) => {
|
|
344
|
-
for (let i = 0; i < n; i++) {
|
|
345
|
-
const order = orderNormalized[ i ];
|
|
346
|
-
|
|
347
|
-
if (a[ order.concept ] < b[ order.concept ]) {
|
|
348
|
-
return -1 * order.direction;
|
|
349
|
-
} else if (a[ order.concept ] > b[ order.concept ]) {
|
|
350
|
-
return 1 * order.direction;
|
|
351
|
-
}
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
return 0;
|
|
355
|
-
});
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
/**
|
|
359
|
-
* Replaces `$join` placeholders with relevant `{ "$in": [...] }` operator.
|
|
360
|
-
* Replaces $in- and $nin-arrays with sets for faster filtering
|
|
361
|
-
* @param {Object} where Where clause possibly containing $join placeholders as field values.
|
|
362
|
-
* @param {Object} joinFilters Collection of lists of entity or time values,
|
|
363
|
-
* coming from other tables defined in query `join` clause.
|
|
364
|
-
* @return {Object} Where clause with $join placeholders replaced by valid filter statements
|
|
365
|
-
*/
|
|
366
|
-
function processWhere (where, joinFilters) {
|
|
367
|
-
const result = {};
|
|
368
|
-
|
|
369
|
-
for (const field in where) {
|
|
370
|
-
const fieldValue = where[ field ];
|
|
371
|
-
|
|
372
|
-
if (includes([ '$and', '$or', '$nor' ], field)) {
|
|
373
|
-
result[ field ] = fieldValue.map(subFilter => processWhere(subFilter, joinFilters));
|
|
374
|
-
} else if (field === '$in' || field === '$nin') {
|
|
375
|
-
// prepare "$in" fields for optimized lookup
|
|
376
|
-
result[ field ] = new Set(fieldValue);
|
|
377
|
-
} else if (typeof joinFilters[ fieldValue ] !== 'undefined') {
|
|
378
|
-
// found a join!
|
|
379
|
-
// not assigning to result[field] because joinFilter can contain $and/$or statements in case of
|
|
380
|
-
// time concept (join-where is directly copied, not executed)
|
|
381
|
-
// otherwise could end up with where: { year: { $and: [{ ... }]}}, which is invalid
|
|
382
|
-
// (no boolean ops inside field objects)
|
|
383
|
-
// in case of entity join, joinFilters contains correct field
|
|
384
|
-
Object.assign(result, joinFilters[ fieldValue ]);
|
|
385
|
-
} else if (typeof fieldValue === 'object') {
|
|
386
|
-
// catches $not and fields with equality operator-objects
|
|
387
|
-
// { <field>: { "$lt": 1500 }}
|
|
388
|
-
result[ field ] = processWhere(fieldValue, joinFilters);
|
|
389
|
-
} else {
|
|
390
|
-
// catches rest, being all equality operators except for $in and $nin
|
|
391
|
-
// { "$lt": 1500 }
|
|
392
|
-
result[ field ] = fieldValue;
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
return result;
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
function patchFilterForOrClause(filter) {
|
|
400
|
-
|
|
401
|
-
function processFilter(where) {
|
|
402
|
-
const whereKeys = Object.keys(where);
|
|
403
|
-
for (const key of whereKeys) {
|
|
404
|
-
if (key == "$or") {
|
|
405
|
-
where[key] = where[key].reduce((res, value) => {
|
|
406
|
-
const valueKeys = Object.keys(value);
|
|
407
|
-
if (valueKeys.length > 1) {
|
|
408
|
-
for (const vKey of valueKeys) {
|
|
409
|
-
res.push({ [vKey]: value[vKey] });
|
|
410
|
-
}
|
|
411
|
-
} else {
|
|
412
|
-
res.push(value);
|
|
413
|
-
}
|
|
414
|
-
return res;
|
|
415
|
-
}, []);
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
return where;
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
return processFilter(filter);
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
function mergeFilters (...filters) {
|
|
425
|
-
return filters.reduce((a, b) => {
|
|
426
|
-
if (!isEmpty(b)) {
|
|
427
|
-
patchFilterForOrClause(b);
|
|
428
|
-
a.$and.push(b);
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
return a;
|
|
432
|
-
}, { $and: [] });
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
function querySchema (queryParam, baseOptions: IBaseReaderOptions) {
|
|
436
|
-
const { debug, error } = baseOptions.diagnostic.prepareDiagnosticFor('query');
|
|
437
|
-
const getSchemaFromCollection = collectionPar => {
|
|
438
|
-
debug(`get schema for collection ${collectionPar}`);
|
|
439
|
-
return baseOptions.datapackage.ddfSchema[ collectionPar ].map(
|
|
440
|
-
({ primaryKey, value }) => ({ key: primaryKey, value })
|
|
441
|
-
);
|
|
442
|
-
};
|
|
443
|
-
|
|
444
|
-
const collection = queryParam.from.split('.')[ 0 ];
|
|
445
|
-
|
|
446
|
-
if (baseOptions.datapackage.ddfSchema[ collection ]) {
|
|
447
|
-
return getSchemaFromCollection(collection);
|
|
448
|
-
} else if (collection === '*') {
|
|
449
|
-
return Object.keys(baseOptions.datapackage.ddfSchema)
|
|
450
|
-
.map(getSchemaFromCollection)
|
|
451
|
-
.reduce((a, b) => a.concat(b));
|
|
452
|
-
} else {
|
|
453
|
-
const message = `No valid collection (${collection}) for schema query`;
|
|
454
|
-
error(message);
|
|
455
|
-
throwError(new DdfCsvError(DDF_ERROR, message));
|
|
456
|
-
}
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
function fillMissingValues (row, projection) {
|
|
460
|
-
for (const field of projection) {
|
|
461
|
-
if (typeof row[ field ] === 'undefined') {
|
|
462
|
-
row[ field ] = null;
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
return row;
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
function applyFilterRow (row, filter) {
|
|
470
|
-
// implicit $and in filter object handled by .every()
|
|
471
|
-
return Object.keys(filter).every(filterKey => {
|
|
472
|
-
const operator = operators.get(filterKey);
|
|
473
|
-
|
|
474
|
-
if (operator) {
|
|
475
|
-
return operator(row, filter[ filterKey ]);
|
|
476
|
-
// assuming values are primitives not Number/Boolean/String objects
|
|
477
|
-
} else if (typeof filter[ filterKey ] !== 'object') {
|
|
478
|
-
// { <field>: <value> } is shorthand for { <field>: { $eq: <value> }}
|
|
479
|
-
return operators.get('$eq')(row[ filterKey ], filter[ filterKey ]);
|
|
480
|
-
} else {
|
|
481
|
-
// filter[filterKey] is an object and will thus contain
|
|
482
|
-
// an equality operator (no deep objects (like in Mongo) supported)
|
|
483
|
-
return applyFilterRow(row[ filterKey ], filter[ filterKey ]);
|
|
484
|
-
}
|
|
485
|
-
});
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
function getJoinFilters (join, queryParam, options) {
|
|
489
|
-
return Promise.all(Object.keys(join).map(joinID => getJoinFilter(joinID, join[ joinID ], queryParam, options)))
|
|
490
|
-
.then(results => results.reduce(mergeObjects, {}));
|
|
491
|
-
}
|
|
492
|
-
|
|
493
|
-
function mergeObjects (a, b) {
|
|
494
|
-
return Object.assign(a, b);
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
function getJoinFilter (joinID, join, queryParam, options) {
|
|
498
|
-
// assumption: join.key is same as field in where clause
|
|
499
|
-
// - where: { geo: $geo }, join: { "$geo": { key: geo, where: { ... }}}
|
|
500
|
-
// - where: { year: $year }, join: { "$year": { key: year, where { ... }}}
|
|
501
|
-
if (options.conceptsLookup.get(join.key).concept_type === 'time') {
|
|
502
|
-
// time, no query needed as time values are not explicit in the dataSource
|
|
503
|
-
// assumption: there are no time-properties. E.g. data like <year>,population
|
|
504
|
-
return Promise.resolve({ [ joinID ]: join.where });
|
|
505
|
-
} else {
|
|
506
|
-
// entity concept
|
|
507
|
-
return queryData({
|
|
508
|
-
select: { key: [ join.key ] },
|
|
509
|
-
where: join.where,
|
|
510
|
-
from: options.conceptsLookup.has(join.key) ? 'entities' : 'concepts'
|
|
511
|
-
}, Object.assign({ joinID }, options))
|
|
512
|
-
.then(result => ({
|
|
513
|
-
[ joinID ]: {
|
|
514
|
-
[ join.key ]: {
|
|
515
|
-
$in: new Set(result.map(row => row[ join.key ]))
|
|
516
|
-
}
|
|
517
|
-
}
|
|
518
|
-
}));
|
|
519
|
-
}
|
|
520
|
-
}
|
|
521
|
-
|
|
522
|
-
function getFilterFields (filter) {
|
|
523
|
-
const fields = [];
|
|
524
|
-
|
|
525
|
-
for (const field in filter) {
|
|
526
|
-
// no support for deeper object structures (mongo style)
|
|
527
|
-
if (includes([ '$and', '$or', '$not', '$nor' ], field)) {
|
|
528
|
-
filter[ field ].map(getFilterFields).forEach(subFields => fields.push(...subFields));
|
|
529
|
-
} else {
|
|
530
|
-
fields.push(field);
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
|
|
534
|
-
return [...new Set(fields)];
|
|
535
|
-
}
|
|
536
|
-
|
|
537
|
-
/**
|
|
538
|
-
* Filter concepts by type
|
|
539
|
-
* @param {Array} conceptStrings Array of concept strings to filter out. Default all concepts.
|
|
540
|
-
* @param {Array} conceptTypes Array of concept types to filter out
|
|
541
|
-
* @return {Array} Array of concept strings only of given types
|
|
542
|
-
*/
|
|
543
|
-
function filterConceptsByType (conceptTypes, queryKey, options) {
|
|
544
|
-
const conceptStrings = queryKey || Array.from(options.conceptsLookup.keys());
|
|
545
|
-
const concepts = [];
|
|
546
|
-
|
|
547
|
-
for (const conceptString of conceptStrings) {
|
|
548
|
-
const concept = options.conceptsLookup.get(conceptString);
|
|
549
|
-
|
|
550
|
-
if (includes(conceptTypes, concept.concept_type)) {
|
|
551
|
-
concepts.push(concept);
|
|
552
|
-
}
|
|
553
|
-
}
|
|
554
|
-
|
|
555
|
-
return concepts;
|
|
556
|
-
}
|
|
557
|
-
|
|
558
|
-
/**
|
|
559
|
-
* Find the aliases an entity concept can have
|
|
560
|
-
* @param {Array} conceptStrings An array of concept strings for which entity aliases
|
|
561
|
-
* are found if they're entity concepts
|
|
562
|
-
* @return {Map} Map with all aliases as keys and the entity concept as value
|
|
563
|
-
*/
|
|
564
|
-
function getEntityConceptRenameMap (queryKey, resourceKey, options) {
|
|
565
|
-
const resourceKeySet = new Set(resourceKey);
|
|
566
|
-
const entityConceptTypes = [ 'entity_set', 'entity_domain' ];
|
|
567
|
-
const queryEntityConcepts = filterConceptsByType(entityConceptTypes, queryKey, options);
|
|
568
|
-
|
|
569
|
-
if (queryEntityConcepts.length === 0) {
|
|
570
|
-
return new Map();
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
const allEntityConcepts = filterConceptsByType(entityConceptTypes, null, options);
|
|
574
|
-
|
|
575
|
-
return queryEntityConcepts
|
|
576
|
-
.map(concept => allEntityConcepts
|
|
577
|
-
.filter(lookupConcept => {
|
|
578
|
-
if (concept.concept_type === 'entity_set') {
|
|
579
|
-
return resourceKeySet.has(lookupConcept.concept) &&
|
|
580
|
-
lookupConcept.concept !== concept.concept && // not the actual concept
|
|
581
|
-
(
|
|
582
|
-
lookupConcept.domain === concept.domain || // other entity sets in entity domain
|
|
583
|
-
lookupConcept.concept === concept.domain // entity domain of the entity set
|
|
584
|
-
);
|
|
585
|
-
} else {
|
|
586
|
-
// concept_type == "entity_domain"
|
|
587
|
-
return resourceKeySet.has(lookupConcept.concept) &&
|
|
588
|
-
lookupConcept.concept !== concept.concept && // not the actual concept
|
|
589
|
-
lookupConcept.domain === concept.concept; // entity sets of the entity domain
|
|
590
|
-
}
|
|
591
|
-
})
|
|
592
|
-
.reduce((map, aliasConcept) => map.set(aliasConcept.concept, concept.concept), new Map())
|
|
593
|
-
).reduce((mapA, mapB) => new Map([ ...mapA, ...mapB ]), new Map());
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
/**
|
|
597
|
-
* Get a "$in" filter containing all entities for a entity concept.
|
|
598
|
-
* @param {Array} conceptStrings Array of concept strings for which entities should be found
|
|
599
|
-
* @return {Array} Array of filter objects for each entity concept
|
|
600
|
-
*/
|
|
601
|
-
function getEntitySetFilter (conceptStrings, queryParam, options) {
|
|
602
|
-
const promises = filterConceptsByType([ 'entity_set' ], conceptStrings, options)
|
|
603
|
-
.map(concept => queryData({
|
|
604
|
-
select: { key: [ concept.domain ], value: [ 'is--' + concept.concept ] },
|
|
605
|
-
from: 'entities'
|
|
606
|
-
}, Object.assign({}, options))
|
|
607
|
-
.then(result => ({
|
|
608
|
-
[ concept.concept ]:
|
|
609
|
-
{
|
|
610
|
-
$in: new Set(
|
|
611
|
-
result
|
|
612
|
-
.filter(row => row[ 'is--' + concept.concept ])
|
|
613
|
-
.map(row => row[ concept.domain ])
|
|
614
|
-
)
|
|
615
|
-
}
|
|
616
|
-
}))
|
|
617
|
-
);
|
|
618
|
-
|
|
619
|
-
return Promise.all(promises).then(results => {
|
|
620
|
-
return results.reduce((a, b) => Object.assign(a, b), {});
|
|
621
|
-
});
|
|
622
|
-
}
|
|
623
|
-
|
|
624
|
-
/**
|
|
625
|
-
* Returns all resources for a certain key value pair or multiple values for one key
|
|
626
|
-
* @param {Array} key The key of the requested resources
|
|
627
|
-
* @param {Array/string} value The value or values found in the requested resources
|
|
628
|
-
* @return {Array} Array of resource objects
|
|
629
|
-
*/
|
|
630
|
-
function getResources (key, value?) {
|
|
631
|
-
// value not given, load all resources for key
|
|
632
|
-
if (!value || value.length === 0 || key[0] === value) {
|
|
633
|
-
return new Set(
|
|
634
|
-
[ ...keyValueLookup
|
|
635
|
-
.get(createKeyString(key))
|
|
636
|
-
.values()
|
|
637
|
-
].reduce((a, b) => a.concat(b))
|
|
638
|
-
);
|
|
639
|
-
}
|
|
640
|
-
// multiple values
|
|
641
|
-
if (Array.isArray(value)) {
|
|
642
|
-
return value
|
|
643
|
-
.map(singleValue => getResources(key, singleValue))
|
|
644
|
-
.reduce((resultSet, resources) => new Set([ ...resultSet, ...resources ]), new Set());
|
|
645
|
-
}
|
|
646
|
-
// one key, one value
|
|
647
|
-
let oneKeyOneValueResourcesArray = keyValueLookup
|
|
648
|
-
.get(createKeyString(key))
|
|
649
|
-
.get(value);
|
|
650
|
-
|
|
651
|
-
if (oneKeyOneValueResourcesArray) {
|
|
652
|
-
oneKeyOneValueResourcesArray = oneKeyOneValueResourcesArray
|
|
653
|
-
.filter(v => isEmpty(optimalFilesSet) || includes(optimalFilesSet, v.path));
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
return new Set(oneKeyOneValueResourcesArray);
|
|
657
|
-
}
|
|
658
|
-
|
|
659
|
-
function processResourceResponse (response, select, filterFields, options) {
|
|
660
|
-
const resourcePK = response.resource.schema.primaryKey;
|
|
661
|
-
// all fields used for select or filters
|
|
662
|
-
const resourceProjection = new Set([ ...resourcePK, ...select.value, ...filterFields ]);
|
|
663
|
-
// rename map to rename relevant entity headers to requested entity concepts
|
|
664
|
-
const renameMap = getEntityConceptRenameMap(select.key, resourcePK, options);
|
|
665
|
-
|
|
666
|
-
// Renaming must happen after projection to prevent ambiguity
|
|
667
|
-
// E.g. a resource with `<geo>,name,region` fields.
|
|
668
|
-
// Assume `region` is an entity set in domain `geo`.
|
|
669
|
-
// { select: { key: ["region"], value: ["name"] } } is queried
|
|
670
|
-
// If one did rename first the file would have headers `<region>,name,region`.
|
|
671
|
-
// This would be invalid and make unambiguous projection impossible.
|
|
672
|
-
// Thus we need to apply projection first with result: `<geo>,name`, then we can rename.
|
|
673
|
-
return response.data
|
|
674
|
-
.map(row => projectRow(row, resourceProjection)) // remove fields not used for select or filter
|
|
675
|
-
.map(row => renameHeaderRow(row, renameMap)); // rename header rows (must happen **after** projection)
|
|
676
|
-
}
|
|
677
|
-
|
|
678
|
-
function loadResources (key, value, language, options, queryParam) {
|
|
679
|
-
const { debug } = options.diagnostic.prepareDiagnosticFor('loadResource');
|
|
680
|
-
const resources = getResources(key, value);
|
|
681
|
-
|
|
682
|
-
debug('resources list by query', {queryParam, resources: [ ...resources ]});
|
|
683
|
-
|
|
684
|
-
return Promise.all([ ...resources ].map(
|
|
685
|
-
resource => loadResource(resource, language, options)
|
|
686
|
-
));
|
|
687
|
-
}
|
|
688
|
-
|
|
689
|
-
function projectRow (row, projectionSet) {
|
|
690
|
-
const result = {};
|
|
691
|
-
|
|
692
|
-
for (const concept of Object.keys(row)) {
|
|
693
|
-
if (projectionSet.has(concept)) {
|
|
694
|
-
result[ concept ] = row[ concept ];
|
|
695
|
-
}
|
|
696
|
-
}
|
|
697
|
-
|
|
698
|
-
return result;
|
|
699
|
-
}
|
|
700
|
-
|
|
701
|
-
function renameHeaderRow (row, renameMap) {
|
|
702
|
-
const result = {};
|
|
703
|
-
|
|
704
|
-
for (const concept of Object.keys(row)) {
|
|
705
|
-
result[ renameMap.get(concept) || concept ] = row[ concept ];
|
|
706
|
-
}
|
|
707
|
-
|
|
708
|
-
return result;
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
function joinData (key, joinMode, ...data) {
|
|
712
|
-
if (data.length === 1) {
|
|
713
|
-
return data[ 0 ];
|
|
714
|
-
}
|
|
715
|
-
|
|
716
|
-
const canonicalKey = key.slice(0).sort();
|
|
717
|
-
const dataMap = data.reduce((result, dataPar) => {
|
|
718
|
-
dataPar.forEach(row => {
|
|
719
|
-
const keyString = canonicalKey.map(concept => row[ concept ]).join(',');
|
|
720
|
-
|
|
721
|
-
if (result.has(keyString)) {
|
|
722
|
-
const resultRow = result.get(keyString);
|
|
723
|
-
|
|
724
|
-
joinRow(resultRow, row, joinMode);
|
|
725
|
-
} else {
|
|
726
|
-
result.set(keyString, Object.assign({}, row));
|
|
727
|
-
}
|
|
728
|
-
});
|
|
729
|
-
|
|
730
|
-
return result;
|
|
731
|
-
}, new Map());
|
|
732
|
-
return [ ...dataMap.values() ];
|
|
733
|
-
}
|
|
734
|
-
|
|
735
|
-
function joinRow (resultRow, sourceRow, mode) {
|
|
736
|
-
switch (mode) {
|
|
737
|
-
case 'overwrite':
|
|
738
|
-
/* Simple alternative without empty value or error handling */
|
|
739
|
-
Object.assign(resultRow, sourceRow);
|
|
740
|
-
break;
|
|
741
|
-
case 'translation':
|
|
742
|
-
// Translation joining ignores empty values
|
|
743
|
-
// and allows different values for strings (= translations)
|
|
744
|
-
for (const concept in sourceRow) {
|
|
745
|
-
if (sourceRow[ concept ] !== '') {
|
|
746
|
-
resultRow[ concept ] = sourceRow[ concept ];
|
|
747
|
-
}
|
|
748
|
-
}
|
|
749
|
-
break;
|
|
750
|
-
case 'overwriteWithError':
|
|
751
|
-
/* Alternative for "overwrite" with JOIN error detection */
|
|
752
|
-
for (const concept in sourceRow) {
|
|
753
|
-
if (resultRow[ concept ] !== undefined && resultRow[ concept ] !== sourceRow[ concept ]) {
|
|
754
|
-
const sourceRowStr = JSON.stringify(sourceRow);
|
|
755
|
-
const resultRowStr = JSON.stringify(resultRow);
|
|
756
|
-
const errStr =
|
|
757
|
-
`JOIN Error: two resources have different data for "${concept}": ${sourceRowStr},${resultRowStr}`;
|
|
758
|
-
|
|
759
|
-
throwError(new DdfCsvError(DDF_ERROR, errStr));
|
|
760
|
-
} else {
|
|
761
|
-
resultRow[ concept ] = sourceRow[ concept ];
|
|
762
|
-
}
|
|
763
|
-
}
|
|
764
|
-
break;
|
|
765
|
-
}
|
|
766
|
-
}
|
|
767
|
-
|
|
768
|
-
function throwError (error: DdfCsvError) {
|
|
769
|
-
const currentLogger = logger || console;
|
|
770
|
-
|
|
771
|
-
currentLogger.error(error.message);
|
|
772
|
-
|
|
773
|
-
throw error;
|
|
774
|
-
}
|
|
775
|
-
|
|
776
|
-
function createKeyString (key, row = false) {
|
|
777
|
-
const canonicalKey = key.slice(0).sort();
|
|
778
|
-
|
|
779
|
-
if (!row) {
|
|
780
|
-
return canonicalKey.join(',');
|
|
781
|
-
} else {
|
|
782
|
-
return canonicalKey.map(concept => row[ concept ]).join(',');
|
|
783
|
-
}
|
|
784
|
-
}
|
|
785
|
-
|
|
786
|
-
function loadResource (resource, language, options) {
|
|
787
|
-
const { warning } = options.diagnostic.prepareDiagnosticFor('loadResource');
|
|
788
|
-
const filePromises = [];
|
|
789
|
-
|
|
790
|
-
if (typeof resource.data === 'undefined') {
|
|
791
|
-
resource.data = loadFile(resource.path, options);
|
|
792
|
-
}
|
|
793
|
-
|
|
794
|
-
filePromises.push(resource.data);
|
|
795
|
-
|
|
796
|
-
const languageValid = typeof language !== 'undefined' && includes(getLanguages(options), language);
|
|
797
|
-
const languageLoaded = typeof resource.translations[ language ] !== 'undefined';
|
|
798
|
-
|
|
799
|
-
if (languageValid) {
|
|
800
|
-
if (!languageLoaded) {
|
|
801
|
-
const translationPath = `lang/${language}/${resource.path}`;
|
|
802
|
-
|
|
803
|
-
// error loading translation file is expected when specific file is not translated
|
|
804
|
-
// more correct would be to only resolve file-not-found errors but current solution is sufficient
|
|
805
|
-
resource.translations[ language ] = loadFile(translationPath, options)
|
|
806
|
-
.catch(err => {
|
|
807
|
-
warning(`translation file ${translationPath}`, err);
|
|
808
|
-
return Promise.resolve({});
|
|
809
|
-
});
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
filePromises.push(resource.translations[ language ]);
|
|
813
|
-
}
|
|
814
|
-
|
|
815
|
-
return Promise.all(filePromises).then(fileResponses => {
|
|
816
|
-
// resp.data does not exist if translation file not found
|
|
817
|
-
const filesData = fileResponses.map(resp => resp.data || []);
|
|
818
|
-
const primaryKey = resource.schema.primaryKey;
|
|
819
|
-
const data = joinData(primaryKey, 'translation', ...filesData);
|
|
820
|
-
|
|
821
|
-
return { data, resource };
|
|
822
|
-
});
|
|
823
|
-
|
|
824
|
-
}
|
|
825
|
-
|
|
826
|
-
function getLanguages (options: {datapackage}): string[] {
|
|
827
|
-
if (!options.datapackage.translations) {
|
|
828
|
-
return [];
|
|
829
|
-
}
|
|
830
|
-
|
|
831
|
-
return options.datapackage.translations.map(lang => lang.id);
|
|
832
|
-
}
|
|
833
|
-
|
|
834
|
-
function loadFile (filePath, options) {
|
|
835
|
-
const { debug, error } = options.diagnostic.prepareDiagnosticFor('loadFile');
|
|
836
|
-
const fullFilePath = getFilePath(options.basePath, filePath);
|
|
837
|
-
|
|
838
|
-
debug(`start reading "${filePath}"`);
|
|
839
|
-
|
|
840
|
-
return new Promise((resolve, reject) => {
|
|
841
|
-
options.fileReader.readText(fullFilePath, (err, data) => {
|
|
842
|
-
if (err) {
|
|
843
|
-
error(`fail "${filePath}" reading`, err);
|
|
844
|
-
return reject(new DdfCsvError(FILE_READING_ERROR, err, fullFilePath));
|
|
845
|
-
}
|
|
846
|
-
|
|
847
|
-
Papa.parse(stripBom(data), {
|
|
848
|
-
header: true,
|
|
849
|
-
skipEmptyLines: true,
|
|
850
|
-
dynamicTyping: (headerName) => {
|
|
851
|
-
// skip parsing time/string concept types
|
|
852
|
-
const concept: any = options.conceptsLookup.get(headerName) || {};
|
|
853
|
-
|
|
854
|
-
return !includes(['time', 'string', 'entity_domain', 'entity_set'], concept.concept_type);
|
|
855
|
-
},
|
|
856
|
-
transform: value => {
|
|
857
|
-
return value === '' ? null : value
|
|
858
|
-
},
|
|
859
|
-
complete: result => {
|
|
860
|
-
debug(`finish reading "${filePath}"`);
|
|
861
|
-
resolve(result);
|
|
862
|
-
},
|
|
863
|
-
error: parseErr => {
|
|
864
|
-
error(`fail "${filePath}" parsing`, parseErr);
|
|
865
|
-
reject(new DdfCsvError(CSV_PARSING_ERROR, parseErr, filePath));
|
|
866
|
-
}
|
|
867
|
-
});
|
|
868
|
-
});
|
|
869
|
-
});
|
|
870
|
-
}
|
|
871
|
-
|
|
872
|
-
function buildResourcesLookup (datapackagePar) {
|
|
873
|
-
if (resourcesLookup.size > 0) {
|
|
874
|
-
return resourcesLookup;
|
|
875
|
-
}
|
|
876
|
-
|
|
877
|
-
datapackagePar.resources.forEach(resource => {
|
|
878
|
-
if (!Array.isArray(resource.schema.primaryKey)) {
|
|
879
|
-
resource.schema.primaryKey = [ resource.schema.primaryKey ];
|
|
880
|
-
}
|
|
881
|
-
|
|
882
|
-
const constraints = resource.schema.fields.reduce((result, field) => {
|
|
883
|
-
if (field.constraints?.enum) {
|
|
884
|
-
if (!datasetWithConstraints) datasetWithConstraints = true;
|
|
885
|
-
result[field.name] = field.constraints.enum;
|
|
886
|
-
}
|
|
887
|
-
return result;
|
|
888
|
-
}, {});
|
|
889
|
-
resource.constraints = constraints;
|
|
890
|
-
|
|
891
|
-
resource.translations = {};
|
|
892
|
-
resourcesLookup.set(resource.name, resource);
|
|
893
|
-
});
|
|
894
|
-
|
|
895
|
-
return resourcesLookup;
|
|
896
|
-
}
|
|
897
|
-
|
|
898
|
-
function buildKeyValueLookup (datapackagePar) {
|
|
899
|
-
if (keyValueLookup.size > 0) {
|
|
900
|
-
return keyValueLookup;
|
|
901
|
-
}
|
|
902
|
-
|
|
903
|
-
for (const collection in datapackagePar.ddfSchema) {
|
|
904
|
-
datapackagePar.ddfSchema[ collection ].map(kvPair => {
|
|
905
|
-
const key = createKeyString(kvPair.primaryKey);
|
|
906
|
-
const resources = kvPair.resources.map(
|
|
907
|
-
resourceName => resourcesLookup.get(resourceName)
|
|
908
|
-
);
|
|
909
|
-
|
|
910
|
-
if (keyValueLookup.has(key)) {
|
|
911
|
-
keyValueLookup.get(key).set(kvPair.value, resources);
|
|
912
|
-
} else {
|
|
913
|
-
keyValueLookup.set(key, new Map([ [ kvPair.value, resources ] ]));
|
|
914
|
-
}
|
|
915
|
-
});
|
|
916
|
-
}
|
|
917
|
-
|
|
918
|
-
return keyValueLookup;
|
|
919
|
-
}
|
|
920
|
-
|
|
921
|
-
return {
|
|
922
|
-
query,
|
|
923
|
-
queryData,
|
|
924
|
-
loadFile,
|
|
925
|
-
getDatasetInfo
|
|
926
|
-
};
|
|
927
|
-
}
|
|
1
|
+
import includes from 'lodash-es/includes';
|
|
2
|
+
import isEmpty from 'lodash-es/isEmpty';
|
|
3
|
+
import stripBom from 'strip-bom';
|
|
4
|
+
import { getAppropriatePlugin } from './resource-selection-optimizer';
|
|
5
|
+
import { CSV_PARSING_ERROR, DDF_ERROR, DdfCsvError, FILE_READING_ERROR, JSON_PARSING_ERROR } from './ddfcsv-error';
|
|
6
|
+
import { getFilePath, isSchemaQuery, validateQueryDefinitions, validateQueryStructure } from '@vizabi/ddf-query-validator';
|
|
7
|
+
|
|
8
|
+
import * as Papa from 'papaparse';
|
|
9
|
+
import { utcParse } from 'd3-time-format';
|
|
10
|
+
import { IBaseReaderOptions, IDatapackage } from './interfaces';
|
|
11
|
+
|
|
12
|
+
const isValidNumeric = val => typeof val !== 'number' && !val ? false : true;
|
|
13
|
+
|
|
14
|
+
export function ddfCsvReader (logger?: any) {
|
|
15
|
+
const internalConcepts = [
|
|
16
|
+
{ concept: 'concept', concept_type: 'string', domain: null },
|
|
17
|
+
{ concept: 'concept_type', concept_type: 'string', domain: null }
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
const operators = new Map([
|
|
21
|
+
/* logical operators */
|
|
22
|
+
[ '$and', (row, predicates) => predicates.every(p => applyFilterRow(row, p)) ],
|
|
23
|
+
[ '$or', (row, predicates) => predicates.some(p => applyFilterRow(row, p)) ],
|
|
24
|
+
[ '$not', (row, predicate) => !applyFilterRow(row, predicate) ],
|
|
25
|
+
[ '$nor', (row, predicates) => !predicates.some(p => applyFilterRow(row, p)) ],
|
|
26
|
+
|
|
27
|
+
/* equality operators */
|
|
28
|
+
[ '$eq', (rowValue, filterValue) => rowValue == filterValue ],
|
|
29
|
+
[ '$ne', (rowValue, filterValue) => rowValue != filterValue ],
|
|
30
|
+
[ '$gt', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue > filterValue ],
|
|
31
|
+
[ '$gte', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue >= filterValue ],
|
|
32
|
+
[ '$lt', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue < filterValue ],
|
|
33
|
+
[ '$lte', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue <= filterValue ],
|
|
34
|
+
[ '$in', (rowValue, filterValue) => filterValue.has(rowValue) ],
|
|
35
|
+
[ '$nin', (rowValue, filterValue) => !filterValue.has(rowValue) ],
|
|
36
|
+
]);
|
|
37
|
+
|
|
38
|
+
const keyValueLookup = new Map<string, any>();
|
|
39
|
+
const resourcesLookup = new Map();
|
|
40
|
+
|
|
41
|
+
let optimalFilesSet = [];
|
|
42
|
+
let datapackage;
|
|
43
|
+
let datapackagePromise;
|
|
44
|
+
let datasetWithConstraints = false;
|
|
45
|
+
|
|
46
|
+
function getDatasetInfo(baseOptions: IBaseReaderOptions) : Promise<Object> {
|
|
47
|
+
return (datapackagePromise || loadDataPackage(baseOptions));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function loadDataPackage (baseOptions: IBaseReaderOptions): Promise<IDatapackage> {
|
|
51
|
+
const datapackagePath = getFilePath(baseOptions.basePath);
|
|
52
|
+
const { debug, error } = baseOptions.diagnostic.prepareDiagnosticFor('loadDataPackage');
|
|
53
|
+
|
|
54
|
+
return new Promise((resolve, reject) => {
|
|
55
|
+
if (datapackage) {
|
|
56
|
+
return resolve(datapackage);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
baseOptions.fileReader.readText(datapackagePath, (err, data) => {
|
|
60
|
+
if (err) {
|
|
61
|
+
error('file reading', err);
|
|
62
|
+
return reject(new DdfCsvError(FILE_READING_ERROR, err, datapackagePath));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
datapackage = JSON.parse(stripBom(data));
|
|
67
|
+
optimalFilesSet = [];
|
|
68
|
+
buildResourcesLookup(datapackage);
|
|
69
|
+
buildKeyValueLookup(datapackage);
|
|
70
|
+
} catch (parseErr) {
|
|
71
|
+
error('json file parsing', parseErr);
|
|
72
|
+
return reject(new DdfCsvError(JSON_PARSING_ERROR, parseErr.message, datapackagePath));
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
debug('datapackage content is ready');
|
|
76
|
+
|
|
77
|
+
resolve(datapackage);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function loadConcepts (queryParam, options: IBaseReaderOptions): Promise<object> {
|
|
83
|
+
const { error } = options.diagnostic.prepareDiagnosticFor('loadConcepts');
|
|
84
|
+
// start off with internal concepts
|
|
85
|
+
setConceptsLookup(internalConcepts, options);
|
|
86
|
+
// query concepts
|
|
87
|
+
const conceptQuery = {
|
|
88
|
+
select: { key: [ 'concept' ], value: [ 'concept_type', 'domain' ] },
|
|
89
|
+
from: 'concepts'
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
let result;
|
|
93
|
+
|
|
94
|
+
// not using query() to circumvent the conceptPromise resolving
|
|
95
|
+
try {
|
|
96
|
+
const concepts = await queryData(conceptQuery, options);
|
|
97
|
+
buildConceptsLookup(concepts, options);
|
|
98
|
+
// with conceptsLookup built, we can parse other concept properties
|
|
99
|
+
// according to their concept_type
|
|
100
|
+
result = await reparseConcepts(options);
|
|
101
|
+
} catch (err) {
|
|
102
|
+
error('concepts processing', err);
|
|
103
|
+
throw err;
|
|
104
|
+
}
|
|
105
|
+
return result;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function buildConceptsLookup (concepts, options) {
|
|
109
|
+
const entitySetMembershipConcepts = concepts
|
|
110
|
+
.filter(concept => concept.concept_type === 'entity_set')
|
|
111
|
+
.map(concept => ({
|
|
112
|
+
concept: 'is--' + concept.concept,
|
|
113
|
+
concept_type: 'boolean',
|
|
114
|
+
domain: null
|
|
115
|
+
}));
|
|
116
|
+
|
|
117
|
+
concepts = concepts
|
|
118
|
+
.concat(entitySetMembershipConcepts)
|
|
119
|
+
.concat(internalConcepts);
|
|
120
|
+
|
|
121
|
+
setConceptsLookup(concepts, options);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Iterates resources for query and applies parsing according to concept_type
|
|
126
|
+
* of headers. Does not take into account join clause.
|
|
127
|
+
* Impure function as it parses data in-place.
|
|
128
|
+
* @return {[type]} [description]
|
|
129
|
+
*/
|
|
130
|
+
function reparseConcepts ({ conceptsLookup }) {
|
|
131
|
+
const parsingFunctions = new Map<string, Function>([
|
|
132
|
+
[ 'boolean', (str) => str === 'true' || str === 'TRUE' ],
|
|
133
|
+
[ 'measure', (str) => parseFloat(str) ]
|
|
134
|
+
]);
|
|
135
|
+
|
|
136
|
+
const resources = getResources([ 'concept' ]);
|
|
137
|
+
|
|
138
|
+
const resourceUpdates = [ ...resources ].map(resource => {
|
|
139
|
+
return resource.data.then(response => {
|
|
140
|
+
|
|
141
|
+
// first find out which resource concepts need parsing
|
|
142
|
+
const resourceConcepts = Object.keys(response.data[ 0 ]);
|
|
143
|
+
const parsingConcepts = new Map<string, Function>();
|
|
144
|
+
|
|
145
|
+
resourceConcepts.forEach(concept => {
|
|
146
|
+
const type = conceptsLookup.get(concept).concept_type;
|
|
147
|
+
const fn = parsingFunctions.get(type);
|
|
148
|
+
|
|
149
|
+
if (fn) {
|
|
150
|
+
parsingConcepts.set(concept, fn);
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// then parse only those concepts
|
|
155
|
+
return response.data.forEach(row => {
|
|
156
|
+
for (const [ concept, parseFn ] of parsingConcepts) {
|
|
157
|
+
row[ concept ] = parseFn(row[ concept ]);
|
|
158
|
+
}
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
});
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
return Promise.all(resourceUpdates);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// can only take single-dimensional key
|
|
168
|
+
function setConceptsLookup (concepts, options) {
|
|
169
|
+
options.conceptsLookup.clear();
|
|
170
|
+
concepts.forEach(row => options.conceptsLookup.set(row.concept, row));
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function preValidateQueryStructure(queryParam, baseOptions): boolean {
|
|
174
|
+
if (queryParam.from == "datapoints" && queryParam.select.value.length == 0) return true;
|
|
175
|
+
return false;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
async function query (queryParam, _baseOptions: IBaseReaderOptions) {
|
|
179
|
+
const baseOptions = Object.assign({}, _baseOptions);
|
|
180
|
+
const { warning, error } = baseOptions.diagnostic.prepareDiagnosticFor('query');
|
|
181
|
+
let data;
|
|
182
|
+
|
|
183
|
+
if (preValidateQueryStructure(queryParam, baseOptions)) {
|
|
184
|
+
return Promise.resolve([]);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
try {
|
|
188
|
+
await validateQueryStructure(queryParam, baseOptions);
|
|
189
|
+
baseOptions.datapackage = await (datapackagePromise || (datapackagePromise = loadDataPackage(baseOptions)));
|
|
190
|
+
baseOptions.resourcesLookup = resourcesLookup;
|
|
191
|
+
await loadConcepts(queryParam, baseOptions);
|
|
192
|
+
await validateQueryDefinitions(queryParam, baseOptions);
|
|
193
|
+
|
|
194
|
+
if (isSchemaQuery(queryParam)) {
|
|
195
|
+
data = await querySchema(queryParam, baseOptions);
|
|
196
|
+
} else {
|
|
197
|
+
const appropriatePlugin = datasetWithConstraints && getAppropriatePlugin(this, queryParam, baseOptions);
|
|
198
|
+
|
|
199
|
+
optimalFilesSet = [];
|
|
200
|
+
if (appropriatePlugin) {
|
|
201
|
+
const files = await appropriatePlugin.getRecommendedFilesSet();
|
|
202
|
+
optimalFilesSet = files;
|
|
203
|
+
queryParam.optimalFilesSet = [].concat(files, queryParam.optimalFilesSet);
|
|
204
|
+
|
|
205
|
+
warning('get custom optimal files list by a plugin', optimalFilesSet);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
data = await queryData(queryParam, baseOptions);
|
|
209
|
+
}
|
|
210
|
+
} catch (err) {
|
|
211
|
+
error('general query error', err);
|
|
212
|
+
throw err;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
return data;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function queryData (queryParam, _options: IBaseReaderOptions) {
|
|
219
|
+
const options = Object.assign({}, _options);
|
|
220
|
+
const { debug } = options.diagnostic.prepareDiagnosticFor('queryData');
|
|
221
|
+
const {
|
|
222
|
+
select: { key = [], value = [] },
|
|
223
|
+
from = '',
|
|
224
|
+
where = {},
|
|
225
|
+
join = {},
|
|
226
|
+
order_by = [],
|
|
227
|
+
language
|
|
228
|
+
} = queryParam;
|
|
229
|
+
const select = { key, value };
|
|
230
|
+
|
|
231
|
+
debug('start all data loading', queryParam);
|
|
232
|
+
|
|
233
|
+
const projection = new Set(select.key.concat(select.value));
|
|
234
|
+
const filterFields = getFilterFields(where).filter(field => from === 'entities' || !projection.has(field));
|
|
235
|
+
// load all relevant resources
|
|
236
|
+
const resourcesPromise = loadResources(select.key, [ ...select.value, ...filterFields ], language, options, queryParam);
|
|
237
|
+
// list of entities selected from a join clause, later insterted in where clause
|
|
238
|
+
const joinsPromise = getJoinFilters(join, queryParam, options);
|
|
239
|
+
// filter which ensures result only includes queried entity sets
|
|
240
|
+
const entitySetFilterPromise = getEntitySetFilter(select.key, queryParam, options);
|
|
241
|
+
|
|
242
|
+
return Promise.all([ resourcesPromise, entitySetFilterPromise, joinsPromise ])
|
|
243
|
+
.then(([ resourceResponses, entitySetFilter, joinFilters ]) => {
|
|
244
|
+
debug('finish all data loading', queryParam);
|
|
245
|
+
// create filter from where, join filters and entity set filters
|
|
246
|
+
const whereResolved = processWhere(where, joinFilters);
|
|
247
|
+
const filter = mergeFilters(entitySetFilter, whereResolved);
|
|
248
|
+
|
|
249
|
+
debug('dataTables processing', queryParam);
|
|
250
|
+
const dataTables = resourceResponses
|
|
251
|
+
// rename key-columns and remove irrelevant value-columns
|
|
252
|
+
.map(response => processResourceResponse(response, select, filterFields, options));
|
|
253
|
+
|
|
254
|
+
debug('queryResult processing', queryParam);
|
|
255
|
+
// join (reduce) data to one data table
|
|
256
|
+
const queryResult = joinData(select.key, 'overwrite', ...dataTables)
|
|
257
|
+
.filter(row => applyFilterRow(row, filter)) // apply filters (entity sets and where (including join))
|
|
258
|
+
.map(row => fillMissingValues(row, projection)) // fill any missing values with null values
|
|
259
|
+
.map(row => projectRow(row, projection)); // remove fields used only for filtering
|
|
260
|
+
|
|
261
|
+
debug('result ordering', queryParam);
|
|
262
|
+
orderData(queryResult, order_by);
|
|
263
|
+
debug('final result is ready', queryParam);
|
|
264
|
+
|
|
265
|
+
return parseTime(queryResult, options);
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Parses time concept strings in result to Date objects
|
|
271
|
+
* @param result
|
|
272
|
+
* @param options
|
|
273
|
+
*/
|
|
274
|
+
function parseTime(result, options: IBaseReaderOptions) {
|
|
275
|
+
const conceptsLookup = options.conceptsLookup;
|
|
276
|
+
const concepts = Object.keys(result[0] || {});
|
|
277
|
+
const timeConcepts = concepts.map(c => conceptsLookup.get(c) || {}).filter(co => co.concept_type == 'time');
|
|
278
|
+
timeConcepts.forEach(({ concept }) => {
|
|
279
|
+
const parse = getTimeParser(concept, options);
|
|
280
|
+
result.forEach(row => {
|
|
281
|
+
row[concept] = parse(row[concept]);
|
|
282
|
+
});
|
|
283
|
+
});
|
|
284
|
+
return result;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Time parsers for DDF built-in time concepts
|
|
289
|
+
* @param concept
|
|
290
|
+
*/
|
|
291
|
+
function getTimeParser(concept, options: IBaseReaderOptions) {
|
|
292
|
+
const { error } = options.diagnostic.prepareDiagnosticFor('queryData');
|
|
293
|
+
const parsers = {
|
|
294
|
+
year: utcParse('%Y'),
|
|
295
|
+
month: utcParse('%Y-%m'),
|
|
296
|
+
day: utcParse('%Y%m%d'),
|
|
297
|
+
hour: utcParse('%Y%m%dt%H'),
|
|
298
|
+
minute: utcParse('%Y%m%dt%H%M'),
|
|
299
|
+
second: utcParse('%Y%m%dt%H%M%S'),
|
|
300
|
+
week: utcParse('%Yw%V'),
|
|
301
|
+
quarter: utcParse('%Yq%q')
|
|
302
|
+
};
|
|
303
|
+
function tryParse(str) {
|
|
304
|
+
for (const i in parsers) {
|
|
305
|
+
const dateObject = parsers[i](str);
|
|
306
|
+
if (dateObject) {
|
|
307
|
+
return dateObject;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
error('Could not parse time string: ' + str);
|
|
311
|
+
return null;
|
|
312
|
+
}
|
|
313
|
+
if (concept == 'time') {
|
|
314
|
+
return tryParse;
|
|
315
|
+
}
|
|
316
|
+
if (!parsers[concept]) {
|
|
317
|
+
error('No time parser found for time concept: ' + concept);
|
|
318
|
+
return str => str;
|
|
319
|
+
}
|
|
320
|
+
return parsers[concept];
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
function orderData (data, orderBy = []) {
|
|
324
|
+
if (orderBy.length === 0) {
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// process ["geo"] or [{"geo": "asc"}] to [{ concept: "geo", direction: 1 }];
|
|
329
|
+
const orderNormalized = orderBy.map(orderPart => {
|
|
330
|
+
if (typeof orderPart === 'string') {
|
|
331
|
+
return { concept: orderPart, direction: 1 };
|
|
332
|
+
} else {
|
|
333
|
+
const concept = Object.keys(orderPart)[ 0 ];
|
|
334
|
+
const direction = (orderPart[ concept ] === 'asc' ? 1 : -1);
|
|
335
|
+
|
|
336
|
+
return { concept, direction };
|
|
337
|
+
}
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
// sort by one or more fields
|
|
341
|
+
const n = orderNormalized.length;
|
|
342
|
+
|
|
343
|
+
data.sort((a, b) => {
|
|
344
|
+
for (let i = 0; i < n; i++) {
|
|
345
|
+
const order = orderNormalized[ i ];
|
|
346
|
+
|
|
347
|
+
if (a[ order.concept ] < b[ order.concept ]) {
|
|
348
|
+
return -1 * order.direction;
|
|
349
|
+
} else if (a[ order.concept ] > b[ order.concept ]) {
|
|
350
|
+
return 1 * order.direction;
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
return 0;
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Replaces `$join` placeholders with relevant `{ "$in": [...] }` operator.
|
|
360
|
+
* Replaces $in- and $nin-arrays with sets for faster filtering
|
|
361
|
+
* @param {Object} where Where clause possibly containing $join placeholders as field values.
|
|
362
|
+
* @param {Object} joinFilters Collection of lists of entity or time values,
|
|
363
|
+
* coming from other tables defined in query `join` clause.
|
|
364
|
+
* @return {Object} Where clause with $join placeholders replaced by valid filter statements
|
|
365
|
+
*/
|
|
366
|
+
function processWhere (where, joinFilters) {
|
|
367
|
+
const result = {};
|
|
368
|
+
|
|
369
|
+
for (const field in where) {
|
|
370
|
+
const fieldValue = where[ field ];
|
|
371
|
+
|
|
372
|
+
if (includes([ '$and', '$or', '$nor' ], field)) {
|
|
373
|
+
result[ field ] = fieldValue.map(subFilter => processWhere(subFilter, joinFilters));
|
|
374
|
+
} else if (field === '$in' || field === '$nin') {
|
|
375
|
+
// prepare "$in" fields for optimized lookup
|
|
376
|
+
result[ field ] = new Set(fieldValue);
|
|
377
|
+
} else if (typeof joinFilters[ fieldValue ] !== 'undefined') {
|
|
378
|
+
// found a join!
|
|
379
|
+
// not assigning to result[field] because joinFilter can contain $and/$or statements in case of
|
|
380
|
+
// time concept (join-where is directly copied, not executed)
|
|
381
|
+
// otherwise could end up with where: { year: { $and: [{ ... }]}}, which is invalid
|
|
382
|
+
// (no boolean ops inside field objects)
|
|
383
|
+
// in case of entity join, joinFilters contains correct field
|
|
384
|
+
Object.assign(result, joinFilters[ fieldValue ]);
|
|
385
|
+
} else if (typeof fieldValue === 'object') {
|
|
386
|
+
// catches $not and fields with equality operator-objects
|
|
387
|
+
// { <field>: { "$lt": 1500 }}
|
|
388
|
+
result[ field ] = processWhere(fieldValue, joinFilters);
|
|
389
|
+
} else {
|
|
390
|
+
// catches rest, being all equality operators except for $in and $nin
|
|
391
|
+
// { "$lt": 1500 }
|
|
392
|
+
result[ field ] = fieldValue;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
return result;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function patchFilterForOrClause(filter) {
|
|
400
|
+
|
|
401
|
+
function processFilter(where) {
|
|
402
|
+
const whereKeys = Object.keys(where);
|
|
403
|
+
for (const key of whereKeys) {
|
|
404
|
+
if (key == "$or") {
|
|
405
|
+
where[key] = where[key].reduce((res, value) => {
|
|
406
|
+
const valueKeys = Object.keys(value);
|
|
407
|
+
if (valueKeys.length > 1) {
|
|
408
|
+
for (const vKey of valueKeys) {
|
|
409
|
+
res.push({ [vKey]: value[vKey] });
|
|
410
|
+
}
|
|
411
|
+
} else {
|
|
412
|
+
res.push(value);
|
|
413
|
+
}
|
|
414
|
+
return res;
|
|
415
|
+
}, []);
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
return where;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
return processFilter(filter);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
function mergeFilters (...filters) {
|
|
425
|
+
return filters.reduce((a, b) => {
|
|
426
|
+
if (!isEmpty(b)) {
|
|
427
|
+
patchFilterForOrClause(b);
|
|
428
|
+
a.$and.push(b);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
return a;
|
|
432
|
+
}, { $and: [] });
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
function querySchema (queryParam, baseOptions: IBaseReaderOptions) {
|
|
436
|
+
const { debug, error } = baseOptions.diagnostic.prepareDiagnosticFor('query');
|
|
437
|
+
const getSchemaFromCollection = collectionPar => {
|
|
438
|
+
debug(`get schema for collection ${collectionPar}`);
|
|
439
|
+
return baseOptions.datapackage.ddfSchema[ collectionPar ].map(
|
|
440
|
+
({ primaryKey, value }) => ({ key: primaryKey, value })
|
|
441
|
+
);
|
|
442
|
+
};
|
|
443
|
+
|
|
444
|
+
const collection = queryParam.from.split('.')[ 0 ];
|
|
445
|
+
|
|
446
|
+
if (baseOptions.datapackage.ddfSchema[ collection ]) {
|
|
447
|
+
return getSchemaFromCollection(collection);
|
|
448
|
+
} else if (collection === '*') {
|
|
449
|
+
return Object.keys(baseOptions.datapackage.ddfSchema)
|
|
450
|
+
.map(getSchemaFromCollection)
|
|
451
|
+
.reduce((a, b) => a.concat(b));
|
|
452
|
+
} else {
|
|
453
|
+
const message = `No valid collection (${collection}) for schema query`;
|
|
454
|
+
error(message);
|
|
455
|
+
throwError(new DdfCsvError(DDF_ERROR, message));
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
function fillMissingValues (row, projection) {
|
|
460
|
+
for (const field of projection) {
|
|
461
|
+
if (typeof row[ field ] === 'undefined') {
|
|
462
|
+
row[ field ] = null;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
return row;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
function applyFilterRow (row, filter) {
|
|
470
|
+
// implicit $and in filter object handled by .every()
|
|
471
|
+
return Object.keys(filter).every(filterKey => {
|
|
472
|
+
const operator = operators.get(filterKey);
|
|
473
|
+
|
|
474
|
+
if (operator) {
|
|
475
|
+
return operator(row, filter[ filterKey ]);
|
|
476
|
+
// assuming values are primitives not Number/Boolean/String objects
|
|
477
|
+
} else if (typeof filter[ filterKey ] !== 'object') {
|
|
478
|
+
// { <field>: <value> } is shorthand for { <field>: { $eq: <value> }}
|
|
479
|
+
return operators.get('$eq')(row[ filterKey ], filter[ filterKey ]);
|
|
480
|
+
} else {
|
|
481
|
+
// filter[filterKey] is an object and will thus contain
|
|
482
|
+
// an equality operator (no deep objects (like in Mongo) supported)
|
|
483
|
+
return applyFilterRow(row[ filterKey ], filter[ filterKey ]);
|
|
484
|
+
}
|
|
485
|
+
});
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
function getJoinFilters (join, queryParam, options) {
|
|
489
|
+
return Promise.all(Object.keys(join).map(joinID => getJoinFilter(joinID, join[ joinID ], queryParam, options)))
|
|
490
|
+
.then(results => results.reduce(mergeObjects, {}));
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
function mergeObjects (a, b) {
|
|
494
|
+
return Object.assign(a, b);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
function getJoinFilter (joinID, join, queryParam, options) {
|
|
498
|
+
// assumption: join.key is same as field in where clause
|
|
499
|
+
// - where: { geo: $geo }, join: { "$geo": { key: geo, where: { ... }}}
|
|
500
|
+
// - where: { year: $year }, join: { "$year": { key: year, where { ... }}}
|
|
501
|
+
if (options.conceptsLookup.get(join.key).concept_type === 'time') {
|
|
502
|
+
// time, no query needed as time values are not explicit in the dataSource
|
|
503
|
+
// assumption: there are no time-properties. E.g. data like <year>,population
|
|
504
|
+
return Promise.resolve({ [ joinID ]: join.where });
|
|
505
|
+
} else {
|
|
506
|
+
// entity concept
|
|
507
|
+
return queryData({
|
|
508
|
+
select: { key: [ join.key ] },
|
|
509
|
+
where: join.where,
|
|
510
|
+
from: options.conceptsLookup.has(join.key) ? 'entities' : 'concepts'
|
|
511
|
+
}, Object.assign({ joinID }, options))
|
|
512
|
+
.then(result => ({
|
|
513
|
+
[ joinID ]: {
|
|
514
|
+
[ join.key ]: {
|
|
515
|
+
$in: new Set(result.map(row => row[ join.key ]))
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}));
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
function getFilterFields (filter) {
|
|
523
|
+
const fields = [];
|
|
524
|
+
|
|
525
|
+
for (const field in filter) {
|
|
526
|
+
// no support for deeper object structures (mongo style)
|
|
527
|
+
if (includes([ '$and', '$or', '$not', '$nor' ], field)) {
|
|
528
|
+
filter[ field ].map(getFilterFields).forEach(subFields => fields.push(...subFields));
|
|
529
|
+
} else {
|
|
530
|
+
fields.push(field);
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
return [...new Set(fields)];
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Filter concepts by type
|
|
539
|
+
* @param {Array} conceptStrings Array of concept strings to filter out. Default all concepts.
|
|
540
|
+
* @param {Array} conceptTypes Array of concept types to filter out
|
|
541
|
+
* @return {Array} Array of concept strings only of given types
|
|
542
|
+
*/
|
|
543
|
+
function filterConceptsByType (conceptTypes, queryKey, options) {
|
|
544
|
+
const conceptStrings = queryKey || Array.from(options.conceptsLookup.keys());
|
|
545
|
+
const concepts = [];
|
|
546
|
+
|
|
547
|
+
for (const conceptString of conceptStrings) {
|
|
548
|
+
const concept = options.conceptsLookup.get(conceptString);
|
|
549
|
+
|
|
550
|
+
if (includes(conceptTypes, concept.concept_type)) {
|
|
551
|
+
concepts.push(concept);
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
return concepts;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Find the aliases an entity concept can have
|
|
560
|
+
* @param {Array} conceptStrings An array of concept strings for which entity aliases
|
|
561
|
+
* are found if they're entity concepts
|
|
562
|
+
* @return {Map} Map with all aliases as keys and the entity concept as value
|
|
563
|
+
*/
|
|
564
|
+
function getEntityConceptRenameMap (queryKey, resourceKey, options) {
|
|
565
|
+
const resourceKeySet = new Set(resourceKey);
|
|
566
|
+
const entityConceptTypes = [ 'entity_set', 'entity_domain' ];
|
|
567
|
+
const queryEntityConcepts = filterConceptsByType(entityConceptTypes, queryKey, options);
|
|
568
|
+
|
|
569
|
+
if (queryEntityConcepts.length === 0) {
|
|
570
|
+
return new Map();
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
const allEntityConcepts = filterConceptsByType(entityConceptTypes, null, options);
|
|
574
|
+
|
|
575
|
+
return queryEntityConcepts
|
|
576
|
+
.map(concept => allEntityConcepts
|
|
577
|
+
.filter(lookupConcept => {
|
|
578
|
+
if (concept.concept_type === 'entity_set') {
|
|
579
|
+
return resourceKeySet.has(lookupConcept.concept) &&
|
|
580
|
+
lookupConcept.concept !== concept.concept && // not the actual concept
|
|
581
|
+
(
|
|
582
|
+
lookupConcept.domain === concept.domain || // other entity sets in entity domain
|
|
583
|
+
lookupConcept.concept === concept.domain // entity domain of the entity set
|
|
584
|
+
);
|
|
585
|
+
} else {
|
|
586
|
+
// concept_type == "entity_domain"
|
|
587
|
+
return resourceKeySet.has(lookupConcept.concept) &&
|
|
588
|
+
lookupConcept.concept !== concept.concept && // not the actual concept
|
|
589
|
+
lookupConcept.domain === concept.concept; // entity sets of the entity domain
|
|
590
|
+
}
|
|
591
|
+
})
|
|
592
|
+
.reduce((map, aliasConcept) => map.set(aliasConcept.concept, concept.concept), new Map())
|
|
593
|
+
).reduce((mapA, mapB) => new Map([ ...mapA, ...mapB ]), new Map());
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
/**
|
|
597
|
+
* Get a "$in" filter containing all entities for a entity concept.
|
|
598
|
+
* @param {Array} conceptStrings Array of concept strings for which entities should be found
|
|
599
|
+
* @return {Array} Array of filter objects for each entity concept
|
|
600
|
+
*/
|
|
601
|
+
function getEntitySetFilter (conceptStrings, queryParam, options) {
|
|
602
|
+
const promises = filterConceptsByType([ 'entity_set' ], conceptStrings, options)
|
|
603
|
+
.map(concept => queryData({
|
|
604
|
+
select: { key: [ concept.domain ], value: [ 'is--' + concept.concept ] },
|
|
605
|
+
from: 'entities'
|
|
606
|
+
}, Object.assign({}, options))
|
|
607
|
+
.then(result => ({
|
|
608
|
+
[ concept.concept ]:
|
|
609
|
+
{
|
|
610
|
+
$in: new Set(
|
|
611
|
+
result
|
|
612
|
+
.filter(row => row[ 'is--' + concept.concept ])
|
|
613
|
+
.map(row => row[ concept.domain ])
|
|
614
|
+
)
|
|
615
|
+
}
|
|
616
|
+
}))
|
|
617
|
+
);
|
|
618
|
+
|
|
619
|
+
return Promise.all(promises).then(results => {
|
|
620
|
+
return results.reduce((a, b) => Object.assign(a, b), {});
|
|
621
|
+
});
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Returns all resources for a certain key value pair or multiple values for one key
|
|
626
|
+
* @param {Array} key The key of the requested resources
|
|
627
|
+
* @param {Array/string} value The value or values found in the requested resources
|
|
628
|
+
* @return {Array} Array of resource objects
|
|
629
|
+
*/
|
|
630
|
+
function getResources (key, value?) {
|
|
631
|
+
// value not given, load all resources for key
|
|
632
|
+
if (!value || value.length === 0 || key[0] === value) {
|
|
633
|
+
return new Set(
|
|
634
|
+
[ ...keyValueLookup
|
|
635
|
+
.get(createKeyString(key))
|
|
636
|
+
.values()
|
|
637
|
+
].reduce((a, b) => a.concat(b))
|
|
638
|
+
);
|
|
639
|
+
}
|
|
640
|
+
// multiple values
|
|
641
|
+
if (Array.isArray(value)) {
|
|
642
|
+
return value
|
|
643
|
+
.map(singleValue => getResources(key, singleValue))
|
|
644
|
+
.reduce((resultSet, resources) => new Set([ ...resultSet, ...resources ]), new Set());
|
|
645
|
+
}
|
|
646
|
+
// one key, one value
|
|
647
|
+
let oneKeyOneValueResourcesArray = keyValueLookup
|
|
648
|
+
.get(createKeyString(key))
|
|
649
|
+
.get(value);
|
|
650
|
+
|
|
651
|
+
if (oneKeyOneValueResourcesArray) {
|
|
652
|
+
oneKeyOneValueResourcesArray = oneKeyOneValueResourcesArray
|
|
653
|
+
.filter(v => isEmpty(optimalFilesSet) || includes(optimalFilesSet, v.path));
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
return new Set(oneKeyOneValueResourcesArray);
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
function processResourceResponse (response, select, filterFields, options) {
|
|
660
|
+
const resourcePK = response.resource.schema.primaryKey;
|
|
661
|
+
// all fields used for select or filters
|
|
662
|
+
const resourceProjection = new Set([ ...resourcePK, ...select.value, ...filterFields ]);
|
|
663
|
+
// rename map to rename relevant entity headers to requested entity concepts
|
|
664
|
+
const renameMap = getEntityConceptRenameMap(select.key, resourcePK, options);
|
|
665
|
+
|
|
666
|
+
// Renaming must happen after projection to prevent ambiguity
|
|
667
|
+
// E.g. a resource with `<geo>,name,region` fields.
|
|
668
|
+
// Assume `region` is an entity set in domain `geo`.
|
|
669
|
+
// { select: { key: ["region"], value: ["name"] } } is queried
|
|
670
|
+
// If one did rename first the file would have headers `<region>,name,region`.
|
|
671
|
+
// This would be invalid and make unambiguous projection impossible.
|
|
672
|
+
// Thus we need to apply projection first with result: `<geo>,name`, then we can rename.
|
|
673
|
+
return response.data
|
|
674
|
+
.map(row => projectRow(row, resourceProjection)) // remove fields not used for select or filter
|
|
675
|
+
.map(row => renameHeaderRow(row, renameMap)); // rename header rows (must happen **after** projection)
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
function loadResources (key, value, language, options, queryParam) {
|
|
679
|
+
const { debug } = options.diagnostic.prepareDiagnosticFor('loadResource');
|
|
680
|
+
const resources = getResources(key, value);
|
|
681
|
+
|
|
682
|
+
debug('resources list by query', {queryParam, resources: [ ...resources ]});
|
|
683
|
+
|
|
684
|
+
return Promise.all([ ...resources ].map(
|
|
685
|
+
resource => loadResource(resource, language, options)
|
|
686
|
+
));
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
function projectRow (row, projectionSet) {
|
|
690
|
+
const result = {};
|
|
691
|
+
|
|
692
|
+
for (const concept of Object.keys(row)) {
|
|
693
|
+
if (projectionSet.has(concept)) {
|
|
694
|
+
result[ concept ] = row[ concept ];
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
return result;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
function renameHeaderRow (row, renameMap) {
|
|
702
|
+
const result = {};
|
|
703
|
+
|
|
704
|
+
for (const concept of Object.keys(row)) {
|
|
705
|
+
result[ renameMap.get(concept) || concept ] = row[ concept ];
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
return result;
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
function joinData (key, joinMode, ...data) {
|
|
712
|
+
if (data.length === 1) {
|
|
713
|
+
return data[ 0 ];
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
const canonicalKey = key.slice(0).sort();
|
|
717
|
+
const dataMap = data.reduce((result, dataPar) => {
|
|
718
|
+
dataPar.forEach(row => {
|
|
719
|
+
const keyString = canonicalKey.map(concept => row[ concept ]).join(',');
|
|
720
|
+
|
|
721
|
+
if (result.has(keyString)) {
|
|
722
|
+
const resultRow = result.get(keyString);
|
|
723
|
+
|
|
724
|
+
joinRow(resultRow, row, joinMode);
|
|
725
|
+
} else {
|
|
726
|
+
result.set(keyString, Object.assign({}, row));
|
|
727
|
+
}
|
|
728
|
+
});
|
|
729
|
+
|
|
730
|
+
return result;
|
|
731
|
+
}, new Map());
|
|
732
|
+
return [ ...dataMap.values() ];
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
function joinRow (resultRow, sourceRow, mode) {
|
|
736
|
+
switch (mode) {
|
|
737
|
+
case 'overwrite':
|
|
738
|
+
/* Simple alternative without empty value or error handling */
|
|
739
|
+
Object.assign(resultRow, sourceRow);
|
|
740
|
+
break;
|
|
741
|
+
case 'translation':
|
|
742
|
+
// Translation joining ignores empty values
|
|
743
|
+
// and allows different values for strings (= translations)
|
|
744
|
+
for (const concept in sourceRow) {
|
|
745
|
+
if (sourceRow[ concept ] !== '') {
|
|
746
|
+
resultRow[ concept ] = sourceRow[ concept ];
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
break;
|
|
750
|
+
case 'overwriteWithError':
|
|
751
|
+
/* Alternative for "overwrite" with JOIN error detection */
|
|
752
|
+
for (const concept in sourceRow) {
|
|
753
|
+
if (resultRow[ concept ] !== undefined && resultRow[ concept ] !== sourceRow[ concept ]) {
|
|
754
|
+
const sourceRowStr = JSON.stringify(sourceRow);
|
|
755
|
+
const resultRowStr = JSON.stringify(resultRow);
|
|
756
|
+
const errStr =
|
|
757
|
+
`JOIN Error: two resources have different data for "${concept}": ${sourceRowStr},${resultRowStr}`;
|
|
758
|
+
|
|
759
|
+
throwError(new DdfCsvError(DDF_ERROR, errStr));
|
|
760
|
+
} else {
|
|
761
|
+
resultRow[ concept ] = sourceRow[ concept ];
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
break;
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
function throwError (error: DdfCsvError) {
|
|
769
|
+
const currentLogger = logger || console;
|
|
770
|
+
|
|
771
|
+
currentLogger.error(error.message);
|
|
772
|
+
|
|
773
|
+
throw error;
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
function createKeyString (key, row = false) {
|
|
777
|
+
const canonicalKey = key.slice(0).sort();
|
|
778
|
+
|
|
779
|
+
if (!row) {
|
|
780
|
+
return canonicalKey.join(',');
|
|
781
|
+
} else {
|
|
782
|
+
return canonicalKey.map(concept => row[ concept ]).join(',');
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
function loadResource (resource, language, options) {
|
|
787
|
+
const { warning } = options.diagnostic.prepareDiagnosticFor('loadResource');
|
|
788
|
+
const filePromises = [];
|
|
789
|
+
|
|
790
|
+
if (typeof resource.data === 'undefined') {
|
|
791
|
+
resource.data = loadFile(resource.path, options);
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
filePromises.push(resource.data);
|
|
795
|
+
|
|
796
|
+
const languageValid = typeof language !== 'undefined' && includes(getLanguages(options), language);
|
|
797
|
+
const languageLoaded = typeof resource.translations[ language ] !== 'undefined';
|
|
798
|
+
|
|
799
|
+
if (languageValid) {
|
|
800
|
+
if (!languageLoaded) {
|
|
801
|
+
const translationPath = `lang/${language}/${resource.path}`;
|
|
802
|
+
|
|
803
|
+
// error loading translation file is expected when specific file is not translated
|
|
804
|
+
// more correct would be to only resolve file-not-found errors but current solution is sufficient
|
|
805
|
+
resource.translations[ language ] = loadFile(translationPath, options)
|
|
806
|
+
.catch(err => {
|
|
807
|
+
warning(`translation file ${translationPath}`, err);
|
|
808
|
+
return Promise.resolve({});
|
|
809
|
+
});
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
filePromises.push(resource.translations[ language ]);
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
return Promise.all(filePromises).then(fileResponses => {
|
|
816
|
+
// resp.data does not exist if translation file not found
|
|
817
|
+
const filesData = fileResponses.map(resp => resp.data || []);
|
|
818
|
+
const primaryKey = resource.schema.primaryKey;
|
|
819
|
+
const data = joinData(primaryKey, 'translation', ...filesData);
|
|
820
|
+
|
|
821
|
+
return { data, resource };
|
|
822
|
+
});
|
|
823
|
+
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
function getLanguages (options: {datapackage}): string[] {
|
|
827
|
+
if (!options.datapackage.translations) {
|
|
828
|
+
return [];
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
return options.datapackage.translations.map(lang => lang.id);
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
function loadFile (filePath, options) {
|
|
835
|
+
const { debug, error } = options.diagnostic.prepareDiagnosticFor('loadFile');
|
|
836
|
+
const fullFilePath = getFilePath(options.basePath, filePath);
|
|
837
|
+
|
|
838
|
+
debug(`start reading "${filePath}"`);
|
|
839
|
+
|
|
840
|
+
return new Promise((resolve, reject) => {
|
|
841
|
+
options.fileReader.readText(fullFilePath, (err, data) => {
|
|
842
|
+
if (err) {
|
|
843
|
+
error(`fail "${filePath}" reading`, err);
|
|
844
|
+
return reject(new DdfCsvError(FILE_READING_ERROR, err, fullFilePath));
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
Papa.parse(stripBom(data), {
|
|
848
|
+
header: true,
|
|
849
|
+
skipEmptyLines: true,
|
|
850
|
+
dynamicTyping: (headerName) => {
|
|
851
|
+
// skip parsing time/string concept types
|
|
852
|
+
const concept: any = options.conceptsLookup.get(headerName) || {};
|
|
853
|
+
|
|
854
|
+
return !includes(['time', 'string', 'entity_domain', 'entity_set'], concept.concept_type);
|
|
855
|
+
},
|
|
856
|
+
transform: value => {
|
|
857
|
+
return value === '' ? null : value
|
|
858
|
+
},
|
|
859
|
+
complete: result => {
|
|
860
|
+
debug(`finish reading "${filePath}"`);
|
|
861
|
+
resolve(result);
|
|
862
|
+
},
|
|
863
|
+
error: parseErr => {
|
|
864
|
+
error(`fail "${filePath}" parsing`, parseErr);
|
|
865
|
+
reject(new DdfCsvError(CSV_PARSING_ERROR, parseErr, filePath));
|
|
866
|
+
}
|
|
867
|
+
});
|
|
868
|
+
});
|
|
869
|
+
});
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
function buildResourcesLookup (datapackagePar) {
|
|
873
|
+
if (resourcesLookup.size > 0) {
|
|
874
|
+
return resourcesLookup;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
datapackagePar.resources.forEach(resource => {
|
|
878
|
+
if (!Array.isArray(resource.schema.primaryKey)) {
|
|
879
|
+
resource.schema.primaryKey = [ resource.schema.primaryKey ];
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
const constraints = resource.schema.fields.reduce((result, field) => {
|
|
883
|
+
if (field.constraints?.enum) {
|
|
884
|
+
if (!datasetWithConstraints) datasetWithConstraints = true;
|
|
885
|
+
result[field.name] = field.constraints.enum;
|
|
886
|
+
}
|
|
887
|
+
return result;
|
|
888
|
+
}, {});
|
|
889
|
+
resource.constraints = constraints;
|
|
890
|
+
|
|
891
|
+
resource.translations = {};
|
|
892
|
+
resourcesLookup.set(resource.name, resource);
|
|
893
|
+
});
|
|
894
|
+
|
|
895
|
+
return resourcesLookup;
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
function buildKeyValueLookup (datapackagePar) {
|
|
899
|
+
if (keyValueLookup.size > 0) {
|
|
900
|
+
return keyValueLookup;
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
for (const collection in datapackagePar.ddfSchema) {
|
|
904
|
+
datapackagePar.ddfSchema[ collection ].map(kvPair => {
|
|
905
|
+
const key = createKeyString(kvPair.primaryKey);
|
|
906
|
+
const resources = kvPair.resources.map(
|
|
907
|
+
resourceName => resourcesLookup.get(resourceName)
|
|
908
|
+
);
|
|
909
|
+
|
|
910
|
+
if (keyValueLookup.has(key)) {
|
|
911
|
+
keyValueLookup.get(key).set(kvPair.value, resources);
|
|
912
|
+
} else {
|
|
913
|
+
keyValueLookup.set(key, new Map([ [ kvPair.value, resources ] ]));
|
|
914
|
+
}
|
|
915
|
+
});
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
return keyValueLookup;
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
return {
|
|
922
|
+
query,
|
|
923
|
+
queryData,
|
|
924
|
+
loadFile,
|
|
925
|
+
getDatasetInfo
|
|
926
|
+
};
|
|
927
|
+
}
|