@vizabi/reader-ddfcsv 4.5.3 → 4.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.nyc_output/574fe7f0-58f6-4de2-9524-16e72b784218.json +1 -0
  2. package/.nyc_output/9aede69f-00e6-432d-bdeb-2c9f4084a6dc.json +1 -0
  3. package/.nyc_output/processinfo/574fe7f0-58f6-4de2-9524-16e72b784218.json +1 -0
  4. package/.nyc_output/processinfo/9aede69f-00e6-432d-bdeb-2c9f4084a6dc.json +1 -0
  5. package/.nyc_output/processinfo/index.json +1 -0
  6. package/LICENSE +0 -0
  7. package/README.md +2 -2
  8. package/coverage/base.css +224 -0
  9. package/coverage/block-navigation.js +87 -0
  10. package/coverage/favicon.png +0 -0
  11. package/coverage/index.html +161 -0
  12. package/coverage/prettify.css +1 -0
  13. package/coverage/prettify.js +2 -0
  14. package/coverage/sort-arrow-sprite.png +0 -0
  15. package/coverage/sorter.js +210 -0
  16. package/coverage/src/ddf-csv.ts.html +2866 -0
  17. package/coverage/src/ddfcsv-error.ts.html +148 -0
  18. package/coverage/src/ddfcsv-reader.ts.html +538 -0
  19. package/coverage/src/file-readers/backend-file-reader.ts.html +175 -0
  20. package/coverage/src/file-readers/github-path-adapter.ts.html +244 -0
  21. package/coverage/src/file-readers/index.html +131 -0
  22. package/coverage/src/index.html +176 -0
  23. package/coverage/src/index.ts.html +145 -0
  24. package/coverage/src/interfaces.ts.html +196 -0
  25. package/coverage/src/resource-selection-optimizer/in-clause-under-conjunction.ts.html +745 -0
  26. package/coverage/src/resource-selection-optimizer/index.html +131 -0
  27. package/coverage/src/resource-selection-optimizer/index.ts.html +118 -0
  28. package/coverage/src/test-cases/concepts.ts.html +166 -0
  29. package/coverage/src/test-cases/entities.ts.html +241 -0
  30. package/coverage/src/test-cases/index.html +131 -0
  31. package/dist/reader-ddfcsv.js +15 -1
  32. package/dist/reader-ddfcsv.js.map +1 -0
  33. package/dist/stats.html +4950 -0
  34. package/icon.png +0 -0
  35. package/lib/src/ddf-csv.d.ts +0 -0
  36. package/lib/src/ddf-csv.js +13 -14
  37. package/lib/src/ddf-csv.js.map +1 -1
  38. package/lib/src/ddfcsv-error.d.ts +0 -0
  39. package/lib/src/ddfcsv-error.js +0 -0
  40. package/lib/src/ddfcsv-error.js.map +0 -0
  41. package/lib/src/ddfcsv-reader.d.ts +0 -0
  42. package/lib/src/ddfcsv-reader.js +5 -5
  43. package/lib/src/ddfcsv-reader.js.map +1 -1
  44. package/lib/src/file-readers/backend-file-reader.d.ts +0 -0
  45. package/lib/src/file-readers/backend-file-reader.js +0 -0
  46. package/lib/src/file-readers/backend-file-reader.js.map +0 -0
  47. package/lib/src/file-readers/github-path-adapter.d.ts +0 -0
  48. package/lib/src/file-readers/github-path-adapter.js +0 -0
  49. package/lib/src/file-readers/github-path-adapter.js.map +0 -0
  50. package/lib/src/index.d.ts +0 -0
  51. package/lib/src/index.js +2 -2
  52. package/lib/src/index.js.map +0 -0
  53. package/lib/src/interfaces.d.ts +0 -0
  54. package/lib/src/interfaces.js +0 -0
  55. package/lib/src/interfaces.js.map +0 -0
  56. package/lib/src/resource-selection-optimizer/in-clause-under-conjunction.d.ts +0 -0
  57. package/lib/src/resource-selection-optimizer/in-clause-under-conjunction.js +16 -17
  58. package/lib/src/resource-selection-optimizer/in-clause-under-conjunction.js.map +1 -1
  59. package/lib/src/resource-selection-optimizer/index.d.ts +0 -0
  60. package/lib/src/resource-selection-optimizer/index.js +2 -2
  61. package/lib/src/resource-selection-optimizer/index.js.map +1 -1
  62. package/lib-web/src/ddf-csv.d.ts +0 -0
  63. package/lib-web/src/ddf-csv.js +30 -34
  64. package/lib-web/src/ddf-csv.js.map +1 -1
  65. package/lib-web/src/ddfcsv-error.d.ts +0 -0
  66. package/lib-web/src/ddfcsv-error.js +5 -9
  67. package/lib-web/src/ddfcsv-error.js.map +1 -1
  68. package/lib-web/src/ddfcsv-reader.d.ts +0 -0
  69. package/lib-web/src/ddfcsv-reader.js +16 -19
  70. package/lib-web/src/ddfcsv-reader.js.map +1 -1
  71. package/lib-web/src/file-readers/frontend-file-reader.d.ts +0 -0
  72. package/lib-web/src/file-readers/frontend-file-reader.js +1 -5
  73. package/lib-web/src/file-readers/frontend-file-reader.js.map +1 -1
  74. package/lib-web/src/file-readers/github-path-adapter.d.ts +0 -0
  75. package/lib-web/src/file-readers/github-path-adapter.js +1 -4
  76. package/lib-web/src/file-readers/github-path-adapter.js.map +1 -1
  77. package/lib-web/src/index-web.d.ts +0 -0
  78. package/lib-web/src/index-web.js +9 -14
  79. package/lib-web/src/index-web.js.map +1 -1
  80. package/lib-web/src/interfaces.d.ts +0 -0
  81. package/lib-web/src/interfaces.js +1 -2
  82. package/lib-web/src/interfaces.js.map +0 -0
  83. package/lib-web/src/resource-selection-optimizer/in-clause-under-conjunction.d.ts +0 -0
  84. package/lib-web/src/resource-selection-optimizer/in-clause-under-conjunction.js +12 -17
  85. package/lib-web/src/resource-selection-optimizer/in-clause-under-conjunction.js.map +1 -1
  86. package/lib-web/src/resource-selection-optimizer/index.d.ts +0 -0
  87. package/lib-web/src/resource-selection-optimizer/index.js +4 -7
  88. package/lib-web/src/resource-selection-optimizer/index.js.map +1 -1
  89. package/package.json +93 -129
  90. package/rollup.config.mjs +30 -0
  91. package/scripts/set-own-version.js +0 -0
  92. package/src/ddf-csv.ts +927 -927
  93. package/src/ddfcsv-error.ts +0 -0
  94. package/src/ddfcsv-reader.ts +151 -151
  95. package/src/file-readers/backend-file-reader.ts +0 -0
  96. package/src/file-readers/frontend-file-reader.ts +0 -0
  97. package/src/file-readers/github-path-adapter.ts +0 -0
  98. package/src/index-web.ts +0 -0
  99. package/src/index.ts +0 -0
  100. package/src/interfaces.ts +0 -0
  101. package/src/resource-selection-optimizer/in-clause-under-conjunction.ts +220 -220
  102. package/src/resource-selection-optimizer/index.ts +11 -11
  103. package/src/test-cases/concepts.ts +0 -0
  104. package/src/test-cases/entities.ts +52 -52
  105. package/test/assets-fixtures/world-50m.json +0 -0
  106. package/test/assets.spec.ts +0 -0
  107. package/test/common.ts +1 -1
  108. package/test/definition/concepts-definition.spec.ts +0 -0
  109. package/test/definition/datapoints-definition.spec.ts +1 -1
  110. package/test/definition/entities-definition.spec.ts +264 -264
  111. package/test/definition/schema-definition.spec.ts +0 -0
  112. package/test/diagnostics.spec.ts +0 -0
  113. package/test/features-service.spec.ts +95 -95
  114. package/test/high-load.spec.ts +0 -0
  115. package/test/main.spec.ts +0 -0
  116. package/test/multi-instances.spec.ts +0 -0
  117. package/test/result-fixtures/datapoints-assets.json +0 -0
  118. package/test/result-fixtures/in-clause-under-conjunction-1.json +0 -0
  119. package/test/result-fixtures/in-clause-under-conjunction-2.json +0 -0
  120. package/test/result-fixtures/multi-instances/concepts-sg.json +0 -0
  121. package/test/result-fixtures/multi-instances/concepts-soderstornsmodellen.json +0 -0
  122. package/test/result-fixtures/multi-instances/datapoints-sg.json +0 -0
  123. package/test/result-fixtures/multi-instances/datapoints-soderstornsmodellen.json +0 -0
  124. package/test/result-fixtures/multi-instances/entities-sg.json +0 -0
  125. package/test/result-fixtures/multi-instances/entities-soderstornsmodellen.json +0 -0
  126. package/test/result-fixtures/multi-instances/schema-sg.json +0 -0
  127. package/test/result-fixtures/multi-instances/schema-soderstornsmodellen.json +0 -0
  128. package/test/schema.spec.ts +0 -0
  129. package/test/tslint.json +0 -0
  130. package/tsconfig-web.json +41 -41
  131. package/tsconfig.json +40 -37
  132. package/tslint.json +0 -0
  133. package/.travis.yml +0 -37
  134. package/deploy.js +0 -87
  135. package/dist/reader-ddfcsv-polyfill.js +0 -2
  136. package/dist/reader-ddfcsv-polyfill.js.map +0 -1
  137. package/test/mocha.opts +0 -7
package/src/ddf-csv.ts CHANGED
@@ -1,927 +1,927 @@
1
- import * as includes from 'lodash.includes';
2
- import * as isEmpty from 'lodash.isempty';
3
- import * as stripBom from 'strip-bom';
4
- import { getAppropriatePlugin } from './resource-selection-optimizer';
5
- import { CSV_PARSING_ERROR, DDF_ERROR, DdfCsvError, FILE_READING_ERROR, JSON_PARSING_ERROR } from './ddfcsv-error';
6
- import { getFilePath, isSchemaQuery, validateQueryDefinitions, validateQueryStructure } from '@vizabi/ddf-query-validator';
7
-
8
- import * as Papa from 'papaparse';
9
- import { utcParse } from 'd3-time-format';
10
- import { IBaseReaderOptions, IDatapackage } from './interfaces';
11
-
12
- const isValidNumeric = val => typeof val !== 'number' && !val ? false : true;
13
-
14
- export function ddfCsvReader (logger?: any) {
15
- const internalConcepts = [
16
- { concept: 'concept', concept_type: 'string', domain: null },
17
- { concept: 'concept_type', concept_type: 'string', domain: null }
18
- ];
19
-
20
- const operators = new Map([
21
- /* logical operators */
22
- [ '$and', (row, predicates) => predicates.every(p => applyFilterRow(row, p)) ],
23
- [ '$or', (row, predicates) => predicates.some(p => applyFilterRow(row, p)) ],
24
- [ '$not', (row, predicate) => !applyFilterRow(row, predicate) ],
25
- [ '$nor', (row, predicates) => !predicates.some(p => applyFilterRow(row, p)) ],
26
-
27
- /* equality operators */
28
- [ '$eq', (rowValue, filterValue) => rowValue == filterValue ],
29
- [ '$ne', (rowValue, filterValue) => rowValue != filterValue ],
30
- [ '$gt', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue > filterValue ],
31
- [ '$gte', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue >= filterValue ],
32
- [ '$lt', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue < filterValue ],
33
- [ '$lte', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue <= filterValue ],
34
- [ '$in', (rowValue, filterValue) => filterValue.has(rowValue) ],
35
- [ '$nin', (rowValue, filterValue) => !filterValue.has(rowValue) ],
36
- ]);
37
-
38
- const keyValueLookup = new Map<string, any>();
39
- const resourcesLookup = new Map();
40
-
41
- let optimalFilesSet = [];
42
- let datapackage;
43
- let datapackagePromise;
44
- let datasetWithConstraints = false;
45
-
46
- function getDatasetInfo(baseOptions: IBaseReaderOptions) : Promise<Object> {
47
- return (datapackagePromise || loadDataPackage(baseOptions));
48
- }
49
-
50
- function loadDataPackage (baseOptions: IBaseReaderOptions): Promise<IDatapackage> {
51
- const datapackagePath = getFilePath(baseOptions.basePath);
52
- const { debug, error } = baseOptions.diagnostic.prepareDiagnosticFor('loadDataPackage');
53
-
54
- return new Promise((resolve, reject) => {
55
- if (datapackage) {
56
- return resolve(datapackage);
57
- }
58
-
59
- baseOptions.fileReader.readText(datapackagePath, (err, data) => {
60
- if (err) {
61
- error('file reading', err);
62
- return reject(new DdfCsvError(FILE_READING_ERROR, err, datapackagePath));
63
- }
64
-
65
- try {
66
- datapackage = JSON.parse(stripBom(data));
67
- optimalFilesSet = [];
68
- buildResourcesLookup(datapackage);
69
- buildKeyValueLookup(datapackage);
70
- } catch (parseErr) {
71
- error('json file parsing', parseErr);
72
- return reject(new DdfCsvError(JSON_PARSING_ERROR, parseErr.message, datapackagePath));
73
- }
74
-
75
- debug('datapackage content is ready');
76
-
77
- resolve(datapackage);
78
- });
79
- });
80
- }
81
-
82
- async function loadConcepts (queryParam, options: IBaseReaderOptions): Promise<object> {
83
- const { error } = options.diagnostic.prepareDiagnosticFor('loadConcepts');
84
- // start off with internal concepts
85
- setConceptsLookup(internalConcepts, options);
86
- // query concepts
87
- const conceptQuery = {
88
- select: { key: [ 'concept' ], value: [ 'concept_type', 'domain' ] },
89
- from: 'concepts'
90
- };
91
-
92
- let result;
93
-
94
- // not using query() to circumvent the conceptPromise resolving
95
- try {
96
- const concepts = await queryData(conceptQuery, options);
97
- buildConceptsLookup(concepts, options);
98
- // with conceptsLookup built, we can parse other concept properties
99
- // according to their concept_type
100
- result = await reparseConcepts(options);
101
- } catch (err) {
102
- error('concepts processing', err);
103
- throw err;
104
- }
105
- return result;
106
- }
107
-
108
- function buildConceptsLookup (concepts, options) {
109
- const entitySetMembershipConcepts = concepts
110
- .filter(concept => concept.concept_type === 'entity_set')
111
- .map(concept => ({
112
- concept: 'is--' + concept.concept,
113
- concept_type: 'boolean',
114
- domain: null
115
- }));
116
-
117
- concepts = concepts
118
- .concat(entitySetMembershipConcepts)
119
- .concat(internalConcepts);
120
-
121
- setConceptsLookup(concepts, options);
122
- }
123
-
124
- /**
125
- * Iterates resources for query and applies parsing according to concept_type
126
- * of headers. Does not take into account join clause.
127
- * Impure function as it parses data in-place.
128
- * @return {[type]} [description]
129
- */
130
- function reparseConcepts ({ conceptsLookup }) {
131
- const parsingFunctions = new Map<string, Function>([
132
- [ 'boolean', (str) => str === 'true' || str === 'TRUE' ],
133
- [ 'measure', (str) => parseFloat(str) ]
134
- ]);
135
-
136
- const resources = getResources([ 'concept' ]);
137
-
138
- const resourceUpdates = [ ...resources ].map(resource => {
139
- return resource.data.then(response => {
140
-
141
- // first find out which resource concepts need parsing
142
- const resourceConcepts = Object.keys(response.data[ 0 ]);
143
- const parsingConcepts = new Map<string, Function>();
144
-
145
- resourceConcepts.forEach(concept => {
146
- const type = conceptsLookup.get(concept).concept_type;
147
- const fn = parsingFunctions.get(type);
148
-
149
- if (fn) {
150
- parsingConcepts.set(concept, fn);
151
- }
152
- });
153
-
154
- // then parse only those concepts
155
- return response.data.forEach(row => {
156
- for (const [ concept, parseFn ] of parsingConcepts) {
157
- row[ concept ] = parseFn(row[ concept ]);
158
- }
159
- });
160
-
161
- });
162
- });
163
-
164
- return Promise.all(resourceUpdates);
165
- }
166
-
167
- // can only take single-dimensional key
168
- function setConceptsLookup (concepts, options) {
169
- options.conceptsLookup.clear();
170
- concepts.forEach(row => options.conceptsLookup.set(row.concept, row));
171
- }
172
-
173
- function preValidateQueryStructure(queryParam, baseOptions): boolean {
174
- if (queryParam.from == "datapoints" && queryParam.select.value.length == 0) return true;
175
- return false;
176
- }
177
-
178
- async function query (queryParam, _baseOptions: IBaseReaderOptions) {
179
- const baseOptions = Object.assign({}, _baseOptions);
180
- const { warning, error } = baseOptions.diagnostic.prepareDiagnosticFor('query');
181
- let data;
182
-
183
- if (preValidateQueryStructure(queryParam, baseOptions)) {
184
- return Promise.resolve([]);
185
- }
186
-
187
- try {
188
- await validateQueryStructure(queryParam, baseOptions);
189
- baseOptions.datapackage = await (datapackagePromise || (datapackagePromise = loadDataPackage(baseOptions)));
190
- baseOptions.resourcesLookup = resourcesLookup;
191
- await loadConcepts(queryParam, baseOptions);
192
- await validateQueryDefinitions(queryParam, baseOptions);
193
-
194
- if (isSchemaQuery(queryParam)) {
195
- data = await querySchema(queryParam, baseOptions);
196
- } else {
197
- const appropriatePlugin = datasetWithConstraints && getAppropriatePlugin(this, queryParam, baseOptions);
198
-
199
- optimalFilesSet = [];
200
- if (appropriatePlugin) {
201
- const files = await appropriatePlugin.getRecommendedFilesSet();
202
- optimalFilesSet = files;
203
- queryParam.optimalFilesSet = [].concat(files, queryParam.optimalFilesSet);
204
-
205
- warning('get custom optimal files list by a plugin', optimalFilesSet);
206
- }
207
-
208
- data = await queryData(queryParam, baseOptions);
209
- }
210
- } catch (err) {
211
- error('general query error', err);
212
- throw err;
213
- }
214
-
215
- return data;
216
- }
217
-
218
- function queryData (queryParam, _options: IBaseReaderOptions) {
219
- const options = Object.assign({}, _options);
220
- const { debug } = options.diagnostic.prepareDiagnosticFor('queryData');
221
- const {
222
- select: { key = [], value = [] },
223
- from = '',
224
- where = {},
225
- join = {},
226
- order_by = [],
227
- language
228
- } = queryParam;
229
- const select = { key, value };
230
-
231
- debug('start all data loading', queryParam);
232
-
233
- const projection = new Set(select.key.concat(select.value));
234
- const filterFields = getFilterFields(where).filter(field => from === 'entities' || !projection.has(field));
235
- // load all relevant resources
236
- const resourcesPromise = loadResources(select.key, [ ...select.value, ...filterFields ], language, options, queryParam);
237
- // list of entities selected from a join clause, later insterted in where clause
238
- const joinsPromise = getJoinFilters(join, queryParam, options);
239
- // filter which ensures result only includes queried entity sets
240
- const entitySetFilterPromise = getEntitySetFilter(select.key, queryParam, options);
241
-
242
- return Promise.all([ resourcesPromise, entitySetFilterPromise, joinsPromise ])
243
- .then(([ resourceResponses, entitySetFilter, joinFilters ]) => {
244
- debug('finish all data loading', queryParam);
245
- // create filter from where, join filters and entity set filters
246
- const whereResolved = processWhere(where, joinFilters);
247
- const filter = mergeFilters(entitySetFilter, whereResolved);
248
-
249
- debug('dataTables processing', queryParam);
250
- const dataTables = resourceResponses
251
- // rename key-columns and remove irrelevant value-columns
252
- .map(response => processResourceResponse(response, select, filterFields, options));
253
-
254
- debug('queryResult processing', queryParam);
255
- // join (reduce) data to one data table
256
- const queryResult = joinData(select.key, 'overwrite', ...dataTables)
257
- .filter(row => applyFilterRow(row, filter)) // apply filters (entity sets and where (including join))
258
- .map(row => fillMissingValues(row, projection)) // fill any missing values with null values
259
- .map(row => projectRow(row, projection)); // remove fields used only for filtering
260
-
261
- debug('result ordering', queryParam);
262
- orderData(queryResult, order_by);
263
- debug('final result is ready', queryParam);
264
-
265
- return parseTime(queryResult, options);
266
- });
267
- }
268
-
269
- /**
270
- * Parses time concept strings in result to Date objects
271
- * @param result
272
- * @param options
273
- */
274
- function parseTime(result, options: IBaseReaderOptions) {
275
- const conceptsLookup = options.conceptsLookup;
276
- const concepts = Object.keys(result[0] || {});
277
- const timeConcepts = concepts.map(c => conceptsLookup.get(c) || {}).filter(co => co.concept_type == 'time');
278
- timeConcepts.forEach(({ concept }) => {
279
- const parse = getTimeParser(concept, options);
280
- result.forEach(row => {
281
- row[concept] = parse(row[concept]);
282
- });
283
- });
284
- return result;
285
- }
286
-
287
- /**
288
- * Time parsers for DDF built-in time concepts
289
- * @param concept
290
- */
291
- function getTimeParser(concept, options: IBaseReaderOptions) {
292
- const { error } = options.diagnostic.prepareDiagnosticFor('queryData');
293
- const parsers = {
294
- year: utcParse('%Y'),
295
- month: utcParse('%Y-%m'),
296
- day: utcParse('%Y%m%d'),
297
- hour: utcParse('%Y%m%dt%H'),
298
- minute: utcParse('%Y%m%dt%H%M'),
299
- second: utcParse('%Y%m%dt%H%M%S'),
300
- week: utcParse('%Yw%V'),
301
- quarter: utcParse('%Yq%q')
302
- };
303
- function tryParse(str) {
304
- for (const i in parsers) {
305
- const dateObject = parsers[i](str);
306
- if (dateObject) {
307
- return dateObject;
308
- }
309
- }
310
- error('Could not parse time string: ' + str);
311
- return null;
312
- }
313
- if (concept == 'time') {
314
- return tryParse;
315
- }
316
- if (!parsers[concept]) {
317
- error('No time parser found for time concept: ' + concept);
318
- return str => str;
319
- }
320
- return parsers[concept];
321
- }
322
-
323
- function orderData (data, orderBy = []) {
324
- if (orderBy.length === 0) {
325
- return;
326
- }
327
-
328
- // process ["geo"] or [{"geo": "asc"}] to [{ concept: "geo", direction: 1 }];
329
- const orderNormalized = orderBy.map(orderPart => {
330
- if (typeof orderPart === 'string') {
331
- return { concept: orderPart, direction: 1 };
332
- } else {
333
- const concept = Object.keys(orderPart)[ 0 ];
334
- const direction = (orderPart[ concept ] === 'asc' ? 1 : -1);
335
-
336
- return { concept, direction };
337
- }
338
- });
339
-
340
- // sort by one or more fields
341
- const n = orderNormalized.length;
342
-
343
- data.sort((a, b) => {
344
- for (let i = 0; i < n; i++) {
345
- const order = orderNormalized[ i ];
346
-
347
- if (a[ order.concept ] < b[ order.concept ]) {
348
- return -1 * order.direction;
349
- } else if (a[ order.concept ] > b[ order.concept ]) {
350
- return 1 * order.direction;
351
- }
352
- }
353
-
354
- return 0;
355
- });
356
- }
357
-
358
- /**
359
- * Replaces `$join` placeholders with relevant `{ "$in": [...] }` operator.
360
- * Replaces $in- and $nin-arrays with sets for faster filtering
361
- * @param {Object} where Where clause possibly containing $join placeholders as field values.
362
- * @param {Object} joinFilters Collection of lists of entity or time values,
363
- * coming from other tables defined in query `join` clause.
364
- * @return {Object} Where clause with $join placeholders replaced by valid filter statements
365
- */
366
- function processWhere (where, joinFilters) {
367
- const result = {};
368
-
369
- for (const field in where) {
370
- const fieldValue = where[ field ];
371
-
372
- if (includes([ '$and', '$or', '$nor' ], field)) {
373
- result[ field ] = fieldValue.map(subFilter => processWhere(subFilter, joinFilters));
374
- } else if (field === '$in' || field === '$nin') {
375
- // prepare "$in" fields for optimized lookup
376
- result[ field ] = new Set(fieldValue);
377
- } else if (typeof joinFilters[ fieldValue ] !== 'undefined') {
378
- // found a join!
379
- // not assigning to result[field] because joinFilter can contain $and/$or statements in case of
380
- // time concept (join-where is directly copied, not executed)
381
- // otherwise could end up with where: { year: { $and: [{ ... }]}}, which is invalid
382
- // (no boolean ops inside field objects)
383
- // in case of entity join, joinFilters contains correct field
384
- Object.assign(result, joinFilters[ fieldValue ]);
385
- } else if (typeof fieldValue === 'object') {
386
- // catches $not and fields with equality operator-objects
387
- // { <field>: { "$lt": 1500 }}
388
- result[ field ] = processWhere(fieldValue, joinFilters);
389
- } else {
390
- // catches rest, being all equality operators except for $in and $nin
391
- // { "$lt": 1500 }
392
- result[ field ] = fieldValue;
393
- }
394
- }
395
-
396
- return result;
397
- }
398
-
399
- function patchFilterForOrClause(filter) {
400
-
401
- function processFilter(where) {
402
- const whereKeys = Object.keys(where);
403
- for (const key of whereKeys) {
404
- if (key == "$or") {
405
- where[key] = where[key].reduce((res, value) => {
406
- const valueKeys = Object.keys(value);
407
- if (valueKeys.length > 1) {
408
- for (const vKey of valueKeys) {
409
- res.push({ [vKey]: value[vKey] });
410
- }
411
- } else {
412
- res.push(value);
413
- }
414
- return res;
415
- }, []);
416
- }
417
- }
418
- return where;
419
- }
420
-
421
- return processFilter(filter);
422
- }
423
-
424
- function mergeFilters (...filters) {
425
- return filters.reduce((a, b) => {
426
- if (!isEmpty(b)) {
427
- patchFilterForOrClause(b);
428
- a.$and.push(b);
429
- }
430
-
431
- return a;
432
- }, { $and: [] });
433
- }
434
-
435
- function querySchema (queryParam, baseOptions: IBaseReaderOptions) {
436
- const { debug, error } = baseOptions.diagnostic.prepareDiagnosticFor('query');
437
- const getSchemaFromCollection = collectionPar => {
438
- debug(`get schema for collection ${collectionPar}`);
439
- return baseOptions.datapackage.ddfSchema[ collectionPar ].map(
440
- ({ primaryKey, value }) => ({ key: primaryKey, value })
441
- );
442
- };
443
-
444
- const collection = queryParam.from.split('.')[ 0 ];
445
-
446
- if (baseOptions.datapackage.ddfSchema[ collection ]) {
447
- return getSchemaFromCollection(collection);
448
- } else if (collection === '*') {
449
- return Object.keys(baseOptions.datapackage.ddfSchema)
450
- .map(getSchemaFromCollection)
451
- .reduce((a, b) => a.concat(b));
452
- } else {
453
- const message = `No valid collection (${collection}) for schema query`;
454
- error(message);
455
- throwError(new DdfCsvError(DDF_ERROR, message));
456
- }
457
- }
458
-
459
- function fillMissingValues (row, projection) {
460
- for (const field of projection) {
461
- if (typeof row[ field ] === 'undefined') {
462
- row[ field ] = null;
463
- }
464
- }
465
-
466
- return row;
467
- }
468
-
469
- function applyFilterRow (row, filter) {
470
- // implicit $and in filter object handled by .every()
471
- return Object.keys(filter).every(filterKey => {
472
- const operator = operators.get(filterKey);
473
-
474
- if (operator) {
475
- return operator(row, filter[ filterKey ]);
476
- // assuming values are primitives not Number/Boolean/String objects
477
- } else if (typeof filter[ filterKey ] !== 'object') {
478
- // { <field>: <value> } is shorthand for { <field>: { $eq: <value> }}
479
- return operators.get('$eq')(row[ filterKey ], filter[ filterKey ]);
480
- } else {
481
- // filter[filterKey] is an object and will thus contain
482
- // an equality operator (no deep objects (like in Mongo) supported)
483
- return applyFilterRow(row[ filterKey ], filter[ filterKey ]);
484
- }
485
- });
486
- }
487
-
488
- function getJoinFilters (join, queryParam, options) {
489
- return Promise.all(Object.keys(join).map(joinID => getJoinFilter(joinID, join[ joinID ], queryParam, options)))
490
- .then(results => results.reduce(mergeObjects, {}));
491
- }
492
-
493
- function mergeObjects (a, b) {
494
- return Object.assign(a, b);
495
- }
496
-
497
- function getJoinFilter (joinID, join, queryParam, options) {
498
- // assumption: join.key is same as field in where clause
499
- // - where: { geo: $geo }, join: { "$geo": { key: geo, where: { ... }}}
500
- // - where: { year: $year }, join: { "$year": { key: year, where { ... }}}
501
- if (options.conceptsLookup.get(join.key).concept_type === 'time') {
502
- // time, no query needed as time values are not explicit in the dataSource
503
- // assumption: there are no time-properties. E.g. data like <year>,population
504
- return Promise.resolve({ [ joinID ]: join.where });
505
- } else {
506
- // entity concept
507
- return queryData({
508
- select: { key: [ join.key ] },
509
- where: join.where,
510
- from: options.conceptsLookup.has(join.key) ? 'entities' : 'concepts'
511
- }, Object.assign({ joinID }, options))
512
- .then(result => ({
513
- [ joinID ]: {
514
- [ join.key ]: {
515
- $in: new Set(result.map(row => row[ join.key ]))
516
- }
517
- }
518
- }));
519
- }
520
- }
521
-
522
- function getFilterFields (filter) {
523
- const fields = [];
524
-
525
- for (const field in filter) {
526
- // no support for deeper object structures (mongo style)
527
- if (includes([ '$and', '$or', '$not', '$nor' ], field)) {
528
- filter[ field ].map(getFilterFields).forEach(subFields => fields.push(...subFields));
529
- } else {
530
- fields.push(field);
531
- }
532
- }
533
-
534
- return [...new Set(fields)];
535
- }
536
-
537
- /**
538
- * Filter concepts by type
539
- * @param {Array} conceptStrings Array of concept strings to filter out. Default all concepts.
540
- * @param {Array} conceptTypes Array of concept types to filter out
541
- * @return {Array} Array of concept strings only of given types
542
- */
543
- function filterConceptsByType (conceptTypes, queryKey, options) {
544
- const conceptStrings = queryKey || Array.from(options.conceptsLookup.keys());
545
- const concepts = [];
546
-
547
- for (const conceptString of conceptStrings) {
548
- const concept = options.conceptsLookup.get(conceptString);
549
-
550
- if (includes(conceptTypes, concept.concept_type)) {
551
- concepts.push(concept);
552
- }
553
- }
554
-
555
- return concepts;
556
- }
557
-
558
- /**
559
- * Find the aliases an entity concept can have
560
- * @param {Array} conceptStrings An array of concept strings for which entity aliases
561
- * are found if they're entity concepts
562
- * @return {Map} Map with all aliases as keys and the entity concept as value
563
- */
564
- function getEntityConceptRenameMap (queryKey, resourceKey, options) {
565
- const resourceKeySet = new Set(resourceKey);
566
- const entityConceptTypes = [ 'entity_set', 'entity_domain' ];
567
- const queryEntityConcepts = filterConceptsByType(entityConceptTypes, queryKey, options);
568
-
569
- if (queryEntityConcepts.length === 0) {
570
- return new Map();
571
- }
572
-
573
- const allEntityConcepts = filterConceptsByType(entityConceptTypes, null, options);
574
-
575
- return queryEntityConcepts
576
- .map(concept => allEntityConcepts
577
- .filter(lookupConcept => {
578
- if (concept.concept_type === 'entity_set') {
579
- return resourceKeySet.has(lookupConcept.concept) &&
580
- lookupConcept.concept !== concept.concept && // not the actual concept
581
- (
582
- lookupConcept.domain === concept.domain || // other entity sets in entity domain
583
- lookupConcept.concept === concept.domain // entity domain of the entity set
584
- );
585
- } else {
586
- // concept_type == "entity_domain"
587
- return resourceKeySet.has(lookupConcept.concept) &&
588
- lookupConcept.concept !== concept.concept && // not the actual concept
589
- lookupConcept.domain === concept.concept; // entity sets of the entity domain
590
- }
591
- })
592
- .reduce((map, aliasConcept) => map.set(aliasConcept.concept, concept.concept), new Map())
593
- ).reduce((mapA, mapB) => new Map([ ...mapA, ...mapB ]), new Map());
594
- }
595
-
596
- /**
597
- * Get a "$in" filter containing all entities for a entity concept.
598
- * @param {Array} conceptStrings Array of concept strings for which entities should be found
599
- * @return {Array} Array of filter objects for each entity concept
600
- */
601
- function getEntitySetFilter (conceptStrings, queryParam, options) {
602
- const promises = filterConceptsByType([ 'entity_set' ], conceptStrings, options)
603
- .map(concept => queryData({
604
- select: { key: [ concept.domain ], value: [ 'is--' + concept.concept ] },
605
- from: 'entities'
606
- }, Object.assign({}, options))
607
- .then(result => ({
608
- [ concept.concept ]:
609
- {
610
- $in: new Set(
611
- result
612
- .filter(row => row[ 'is--' + concept.concept ])
613
- .map(row => row[ concept.domain ])
614
- )
615
- }
616
- }))
617
- );
618
-
619
- return Promise.all(promises).then(results => {
620
- return results.reduce((a, b) => Object.assign(a, b), {});
621
- });
622
- }
623
-
624
- /**
625
- * Returns all resources for a certain key value pair or multiple values for one key
626
- * @param {Array} key The key of the requested resources
627
- * @param {Array/string} value The value or values found in the requested resources
628
- * @return {Array} Array of resource objects
629
- */
630
- function getResources (key, value?) {
631
- // value not given, load all resources for key
632
- if (!value || value.length === 0 || key[0] === value) {
633
- return new Set(
634
- [ ...keyValueLookup
635
- .get(createKeyString(key))
636
- .values()
637
- ].reduce((a, b) => a.concat(b))
638
- );
639
- }
640
- // multiple values
641
- if (Array.isArray(value)) {
642
- return value
643
- .map(singleValue => getResources(key, singleValue))
644
- .reduce((resultSet, resources) => new Set([ ...resultSet, ...resources ]), new Set());
645
- }
646
- // one key, one value
647
- let oneKeyOneValueResourcesArray = keyValueLookup
648
- .get(createKeyString(key))
649
- .get(value);
650
-
651
- if (oneKeyOneValueResourcesArray) {
652
- oneKeyOneValueResourcesArray = oneKeyOneValueResourcesArray
653
- .filter(v => isEmpty(optimalFilesSet) || includes(optimalFilesSet, v.path));
654
- }
655
-
656
- return new Set(oneKeyOneValueResourcesArray);
657
- }
658
-
659
- function processResourceResponse (response, select, filterFields, options) {
660
- const resourcePK = response.resource.schema.primaryKey;
661
- // all fields used for select or filters
662
- const resourceProjection = new Set([ ...resourcePK, ...select.value, ...filterFields ]);
663
- // rename map to rename relevant entity headers to requested entity concepts
664
- const renameMap = getEntityConceptRenameMap(select.key, resourcePK, options);
665
-
666
- // Renaming must happen after projection to prevent ambiguity
667
- // E.g. a resource with `<geo>,name,region` fields.
668
- // Assume `region` is an entity set in domain `geo`.
669
- // { select: { key: ["region"], value: ["name"] } } is queried
670
- // If one did rename first the file would have headers `<region>,name,region`.
671
- // This would be invalid and make unambiguous projection impossible.
672
- // Thus we need to apply projection first with result: `<geo>,name`, then we can rename.
673
- return response.data
674
- .map(row => projectRow(row, resourceProjection)) // remove fields not used for select or filter
675
- .map(row => renameHeaderRow(row, renameMap)); // rename header rows (must happen **after** projection)
676
- }
677
-
678
- function loadResources (key, value, language, options, queryParam) {
679
- const { debug } = options.diagnostic.prepareDiagnosticFor('loadResource');
680
- const resources = getResources(key, value);
681
-
682
- debug('resources list by query', {queryParam, resources: [ ...resources ]});
683
-
684
- return Promise.all([ ...resources ].map(
685
- resource => loadResource(resource, language, options)
686
- ));
687
- }
688
-
689
- function projectRow (row, projectionSet) {
690
- const result = {};
691
-
692
- for (const concept of Object.keys(row)) {
693
- if (projectionSet.has(concept)) {
694
- result[ concept ] = row[ concept ];
695
- }
696
- }
697
-
698
- return result;
699
- }
700
-
701
- function renameHeaderRow (row, renameMap) {
702
- const result = {};
703
-
704
- for (const concept of Object.keys(row)) {
705
- result[ renameMap.get(concept) || concept ] = row[ concept ];
706
- }
707
-
708
- return result;
709
- }
710
-
711
- function joinData (key, joinMode, ...data) {
712
- if (data.length === 1) {
713
- return data[ 0 ];
714
- }
715
-
716
- const canonicalKey = key.slice(0).sort();
717
- const dataMap = data.reduce((result, dataPar) => {
718
- dataPar.forEach(row => {
719
- const keyString = canonicalKey.map(concept => row[ concept ]).join(',');
720
-
721
- if (result.has(keyString)) {
722
- const resultRow = result.get(keyString);
723
-
724
- joinRow(resultRow, row, joinMode);
725
- } else {
726
- result.set(keyString, Object.assign({}, row));
727
- }
728
- });
729
-
730
- return result;
731
- }, new Map());
732
- return [ ...dataMap.values() ];
733
- }
734
-
735
- function joinRow (resultRow, sourceRow, mode) {
736
- switch (mode) {
737
- case 'overwrite':
738
- /* Simple alternative without empty value or error handling */
739
- Object.assign(resultRow, sourceRow);
740
- break;
741
- case 'translation':
742
- // Translation joining ignores empty values
743
- // and allows different values for strings (= translations)
744
- for (const concept in sourceRow) {
745
- if (sourceRow[ concept ] !== '') {
746
- resultRow[ concept ] = sourceRow[ concept ];
747
- }
748
- }
749
- break;
750
- case 'overwriteWithError':
751
- /* Alternative for "overwrite" with JOIN error detection */
752
- for (const concept in sourceRow) {
753
- if (resultRow[ concept ] !== undefined && resultRow[ concept ] !== sourceRow[ concept ]) {
754
- const sourceRowStr = JSON.stringify(sourceRow);
755
- const resultRowStr = JSON.stringify(resultRow);
756
- const errStr =
757
- `JOIN Error: two resources have different data for "${concept}": ${sourceRowStr},${resultRowStr}`;
758
-
759
- throwError(new DdfCsvError(DDF_ERROR, errStr));
760
- } else {
761
- resultRow[ concept ] = sourceRow[ concept ];
762
- }
763
- }
764
- break;
765
- }
766
- }
767
-
768
- function throwError (error: DdfCsvError) {
769
- const currentLogger = logger || console;
770
-
771
- currentLogger.error(error.message);
772
-
773
- throw error;
774
- }
775
-
776
- function createKeyString (key, row = false) {
777
- const canonicalKey = key.slice(0).sort();
778
-
779
- if (!row) {
780
- return canonicalKey.join(',');
781
- } else {
782
- return canonicalKey.map(concept => row[ concept ]).join(',');
783
- }
784
- }
785
-
786
- function loadResource (resource, language, options) {
787
- const { warning } = options.diagnostic.prepareDiagnosticFor('loadResource');
788
- const filePromises = [];
789
-
790
- if (typeof resource.data === 'undefined') {
791
- resource.data = loadFile(resource.path, options);
792
- }
793
-
794
- filePromises.push(resource.data);
795
-
796
- const languageValid = typeof language !== 'undefined' && includes(getLanguages(options), language);
797
- const languageLoaded = typeof resource.translations[ language ] !== 'undefined';
798
-
799
- if (languageValid) {
800
- if (!languageLoaded) {
801
- const translationPath = `lang/${language}/${resource.path}`;
802
-
803
- // error loading translation file is expected when specific file is not translated
804
- // more correct would be to only resolve file-not-found errors but current solution is sufficient
805
- resource.translations[ language ] = loadFile(translationPath, options)
806
- .catch(err => {
807
- warning(`translation file ${translationPath}`, err);
808
- return Promise.resolve({});
809
- });
810
- }
811
-
812
- filePromises.push(resource.translations[ language ]);
813
- }
814
-
815
- return Promise.all(filePromises).then(fileResponses => {
816
- // resp.data does not exist if translation file not found
817
- const filesData = fileResponses.map(resp => resp.data || []);
818
- const primaryKey = resource.schema.primaryKey;
819
- const data = joinData(primaryKey, 'translation', ...filesData);
820
-
821
- return { data, resource };
822
- });
823
-
824
- }
825
-
826
- function getLanguages (options: {datapackage}): string[] {
827
- if (!options.datapackage.translations) {
828
- return [];
829
- }
830
-
831
- return options.datapackage.translations.map(lang => lang.id);
832
- }
833
-
834
- function loadFile (filePath, options) {
835
- const { debug, error } = options.diagnostic.prepareDiagnosticFor('loadFile');
836
- const fullFilePath = getFilePath(options.basePath, filePath);
837
-
838
- debug(`start reading "${filePath}"`);
839
-
840
- return new Promise((resolve, reject) => {
841
- options.fileReader.readText(fullFilePath, (err, data) => {
842
- if (err) {
843
- error(`fail "${filePath}" reading`, err);
844
- return reject(new DdfCsvError(FILE_READING_ERROR, err, fullFilePath));
845
- }
846
-
847
- Papa.parse(stripBom(data), {
848
- header: true,
849
- skipEmptyLines: true,
850
- dynamicTyping: (headerName) => {
851
- // skip parsing time/string concept types
852
- const concept: any = options.conceptsLookup.get(headerName) || {};
853
-
854
- return !includes(['time', 'string', 'entity_domain', 'entity_set'], concept.concept_type);
855
- },
856
- transform: value => {
857
- return value === '' ? null : value
858
- },
859
- complete: result => {
860
- debug(`finish reading "${filePath}"`);
861
- resolve(result);
862
- },
863
- error: parseErr => {
864
- error(`fail "${filePath}" parsing`, parseErr);
865
- reject(new DdfCsvError(CSV_PARSING_ERROR, parseErr, filePath));
866
- }
867
- });
868
- });
869
- });
870
- }
871
-
872
- function buildResourcesLookup (datapackagePar) {
873
- if (resourcesLookup.size > 0) {
874
- return resourcesLookup;
875
- }
876
-
877
- datapackagePar.resources.forEach(resource => {
878
- if (!Array.isArray(resource.schema.primaryKey)) {
879
- resource.schema.primaryKey = [ resource.schema.primaryKey ];
880
- }
881
-
882
- const constraints = resource.schema.fields.reduce((result, field) => {
883
- if (field.constraints?.enum) {
884
- if (!datasetWithConstraints) datasetWithConstraints = true;
885
- result[field.name] = field.constraints.enum;
886
- }
887
- return result;
888
- }, {});
889
- resource.constraints = constraints;
890
-
891
- resource.translations = {};
892
- resourcesLookup.set(resource.name, resource);
893
- });
894
-
895
- return resourcesLookup;
896
- }
897
-
898
- function buildKeyValueLookup (datapackagePar) {
899
- if (keyValueLookup.size > 0) {
900
- return keyValueLookup;
901
- }
902
-
903
- for (const collection in datapackagePar.ddfSchema) {
904
- datapackagePar.ddfSchema[ collection ].map(kvPair => {
905
- const key = createKeyString(kvPair.primaryKey);
906
- const resources = kvPair.resources.map(
907
- resourceName => resourcesLookup.get(resourceName)
908
- );
909
-
910
- if (keyValueLookup.has(key)) {
911
- keyValueLookup.get(key).set(kvPair.value, resources);
912
- } else {
913
- keyValueLookup.set(key, new Map([ [ kvPair.value, resources ] ]));
914
- }
915
- });
916
- }
917
-
918
- return keyValueLookup;
919
- }
920
-
921
- return {
922
- query,
923
- queryData,
924
- loadFile,
925
- getDatasetInfo
926
- };
927
- }
1
+ import includes from 'lodash-es/includes';
2
+ import isEmpty from 'lodash-es/isEmpty';
3
+ import stripBom from 'strip-bom';
4
+ import { getAppropriatePlugin } from './resource-selection-optimizer';
5
+ import { CSV_PARSING_ERROR, DDF_ERROR, DdfCsvError, FILE_READING_ERROR, JSON_PARSING_ERROR } from './ddfcsv-error';
6
+ import { getFilePath, isSchemaQuery, validateQueryDefinitions, validateQueryStructure } from '@vizabi/ddf-query-validator';
7
+
8
+ import * as Papa from 'papaparse';
9
+ import { utcParse } from 'd3-time-format';
10
+ import { IBaseReaderOptions, IDatapackage } from './interfaces';
11
+
12
+ const isValidNumeric = val => typeof val !== 'number' && !val ? false : true;
13
+
14
+ export function ddfCsvReader (logger?: any) {
15
+ const internalConcepts = [
16
+ { concept: 'concept', concept_type: 'string', domain: null },
17
+ { concept: 'concept_type', concept_type: 'string', domain: null }
18
+ ];
19
+
20
+ const operators = new Map([
21
+ /* logical operators */
22
+ [ '$and', (row, predicates) => predicates.every(p => applyFilterRow(row, p)) ],
23
+ [ '$or', (row, predicates) => predicates.some(p => applyFilterRow(row, p)) ],
24
+ [ '$not', (row, predicate) => !applyFilterRow(row, predicate) ],
25
+ [ '$nor', (row, predicates) => !predicates.some(p => applyFilterRow(row, p)) ],
26
+
27
+ /* equality operators */
28
+ [ '$eq', (rowValue, filterValue) => rowValue == filterValue ],
29
+ [ '$ne', (rowValue, filterValue) => rowValue != filterValue ],
30
+ [ '$gt', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue > filterValue ],
31
+ [ '$gte', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue >= filterValue ],
32
+ [ '$lt', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue < filterValue ],
33
+ [ '$lte', (rowValue, filterValue) => isValidNumeric(rowValue) && rowValue <= filterValue ],
34
+ [ '$in', (rowValue, filterValue) => filterValue.has(rowValue) ],
35
+ [ '$nin', (rowValue, filterValue) => !filterValue.has(rowValue) ],
36
+ ]);
37
+
38
+ const keyValueLookup = new Map<string, any>();
39
+ const resourcesLookup = new Map();
40
+
41
+ let optimalFilesSet = [];
42
+ let datapackage;
43
+ let datapackagePromise;
44
+ let datasetWithConstraints = false;
45
+
46
+ function getDatasetInfo(baseOptions: IBaseReaderOptions) : Promise<Object> {
47
+ return (datapackagePromise || loadDataPackage(baseOptions));
48
+ }
49
+
50
+ function loadDataPackage (baseOptions: IBaseReaderOptions): Promise<IDatapackage> {
51
+ const datapackagePath = getFilePath(baseOptions.basePath);
52
+ const { debug, error } = baseOptions.diagnostic.prepareDiagnosticFor('loadDataPackage');
53
+
54
+ return new Promise((resolve, reject) => {
55
+ if (datapackage) {
56
+ return resolve(datapackage);
57
+ }
58
+
59
+ baseOptions.fileReader.readText(datapackagePath, (err, data) => {
60
+ if (err) {
61
+ error('file reading', err);
62
+ return reject(new DdfCsvError(FILE_READING_ERROR, err, datapackagePath));
63
+ }
64
+
65
+ try {
66
+ datapackage = JSON.parse(stripBom(data));
67
+ optimalFilesSet = [];
68
+ buildResourcesLookup(datapackage);
69
+ buildKeyValueLookup(datapackage);
70
+ } catch (parseErr) {
71
+ error('json file parsing', parseErr);
72
+ return reject(new DdfCsvError(JSON_PARSING_ERROR, parseErr.message, datapackagePath));
73
+ }
74
+
75
+ debug('datapackage content is ready');
76
+
77
+ resolve(datapackage);
78
+ });
79
+ });
80
+ }
81
+
82
+ async function loadConcepts (queryParam, options: IBaseReaderOptions): Promise<object> {
83
+ const { error } = options.diagnostic.prepareDiagnosticFor('loadConcepts');
84
+ // start off with internal concepts
85
+ setConceptsLookup(internalConcepts, options);
86
+ // query concepts
87
+ const conceptQuery = {
88
+ select: { key: [ 'concept' ], value: [ 'concept_type', 'domain' ] },
89
+ from: 'concepts'
90
+ };
91
+
92
+ let result;
93
+
94
+ // not using query() to circumvent the conceptPromise resolving
95
+ try {
96
+ const concepts = await queryData(conceptQuery, options);
97
+ buildConceptsLookup(concepts, options);
98
+ // with conceptsLookup built, we can parse other concept properties
99
+ // according to their concept_type
100
+ result = await reparseConcepts(options);
101
+ } catch (err) {
102
+ error('concepts processing', err);
103
+ throw err;
104
+ }
105
+ return result;
106
+ }
107
+
108
+ function buildConceptsLookup (concepts, options) {
109
+ const entitySetMembershipConcepts = concepts
110
+ .filter(concept => concept.concept_type === 'entity_set')
111
+ .map(concept => ({
112
+ concept: 'is--' + concept.concept,
113
+ concept_type: 'boolean',
114
+ domain: null
115
+ }));
116
+
117
+ concepts = concepts
118
+ .concat(entitySetMembershipConcepts)
119
+ .concat(internalConcepts);
120
+
121
+ setConceptsLookup(concepts, options);
122
+ }
123
+
124
+ /**
125
+ * Iterates resources for query and applies parsing according to concept_type
126
+ * of headers. Does not take into account join clause.
127
+ * Impure function as it parses data in-place.
128
+ * @return {[type]} [description]
129
+ */
130
+ function reparseConcepts ({ conceptsLookup }) {
131
+ const parsingFunctions = new Map<string, Function>([
132
+ [ 'boolean', (str) => str === 'true' || str === 'TRUE' ],
133
+ [ 'measure', (str) => parseFloat(str) ]
134
+ ]);
135
+
136
+ const resources = getResources([ 'concept' ]);
137
+
138
+ const resourceUpdates = [ ...resources ].map(resource => {
139
+ return resource.data.then(response => {
140
+
141
+ // first find out which resource concepts need parsing
142
+ const resourceConcepts = Object.keys(response.data[ 0 ]);
143
+ const parsingConcepts = new Map<string, Function>();
144
+
145
+ resourceConcepts.forEach(concept => {
146
+ const type = conceptsLookup.get(concept).concept_type;
147
+ const fn = parsingFunctions.get(type);
148
+
149
+ if (fn) {
150
+ parsingConcepts.set(concept, fn);
151
+ }
152
+ });
153
+
154
+ // then parse only those concepts
155
+ return response.data.forEach(row => {
156
+ for (const [ concept, parseFn ] of parsingConcepts) {
157
+ row[ concept ] = parseFn(row[ concept ]);
158
+ }
159
+ });
160
+
161
+ });
162
+ });
163
+
164
+ return Promise.all(resourceUpdates);
165
+ }
166
+
167
+ // can only take single-dimensional key
168
+ function setConceptsLookup (concepts, options) {
169
+ options.conceptsLookup.clear();
170
+ concepts.forEach(row => options.conceptsLookup.set(row.concept, row));
171
+ }
172
+
173
+ function preValidateQueryStructure(queryParam, baseOptions): boolean {
174
+ if (queryParam.from == "datapoints" && queryParam.select.value.length == 0) return true;
175
+ return false;
176
+ }
177
+
178
+ async function query (queryParam, _baseOptions: IBaseReaderOptions) {
179
+ const baseOptions = Object.assign({}, _baseOptions);
180
+ const { warning, error } = baseOptions.diagnostic.prepareDiagnosticFor('query');
181
+ let data;
182
+
183
+ if (preValidateQueryStructure(queryParam, baseOptions)) {
184
+ return Promise.resolve([]);
185
+ }
186
+
187
+ try {
188
+ await validateQueryStructure(queryParam, baseOptions);
189
+ baseOptions.datapackage = await (datapackagePromise || (datapackagePromise = loadDataPackage(baseOptions)));
190
+ baseOptions.resourcesLookup = resourcesLookup;
191
+ await loadConcepts(queryParam, baseOptions);
192
+ await validateQueryDefinitions(queryParam, baseOptions);
193
+
194
+ if (isSchemaQuery(queryParam)) {
195
+ data = await querySchema(queryParam, baseOptions);
196
+ } else {
197
+ const appropriatePlugin = datasetWithConstraints && getAppropriatePlugin(this, queryParam, baseOptions);
198
+
199
+ optimalFilesSet = [];
200
+ if (appropriatePlugin) {
201
+ const files = await appropriatePlugin.getRecommendedFilesSet();
202
+ optimalFilesSet = files;
203
+ queryParam.optimalFilesSet = [].concat(files, queryParam.optimalFilesSet);
204
+
205
+ warning('get custom optimal files list by a plugin', optimalFilesSet);
206
+ }
207
+
208
+ data = await queryData(queryParam, baseOptions);
209
+ }
210
+ } catch (err) {
211
+ error('general query error', err);
212
+ throw err;
213
+ }
214
+
215
+ return data;
216
+ }
217
+
218
+ function queryData (queryParam, _options: IBaseReaderOptions) {
219
+ const options = Object.assign({}, _options);
220
+ const { debug } = options.diagnostic.prepareDiagnosticFor('queryData');
221
+ const {
222
+ select: { key = [], value = [] },
223
+ from = '',
224
+ where = {},
225
+ join = {},
226
+ order_by = [],
227
+ language
228
+ } = queryParam;
229
+ const select = { key, value };
230
+
231
+ debug('start all data loading', queryParam);
232
+
233
+ const projection = new Set(select.key.concat(select.value));
234
+ const filterFields = getFilterFields(where).filter(field => from === 'entities' || !projection.has(field));
235
+ // load all relevant resources
236
+ const resourcesPromise = loadResources(select.key, [ ...select.value, ...filterFields ], language, options, queryParam);
237
+ // list of entities selected from a join clause, later insterted in where clause
238
+ const joinsPromise = getJoinFilters(join, queryParam, options);
239
+ // filter which ensures result only includes queried entity sets
240
+ const entitySetFilterPromise = getEntitySetFilter(select.key, queryParam, options);
241
+
242
+ return Promise.all([ resourcesPromise, entitySetFilterPromise, joinsPromise ])
243
+ .then(([ resourceResponses, entitySetFilter, joinFilters ]) => {
244
+ debug('finish all data loading', queryParam);
245
+ // create filter from where, join filters and entity set filters
246
+ const whereResolved = processWhere(where, joinFilters);
247
+ const filter = mergeFilters(entitySetFilter, whereResolved);
248
+
249
+ debug('dataTables processing', queryParam);
250
+ const dataTables = resourceResponses
251
+ // rename key-columns and remove irrelevant value-columns
252
+ .map(response => processResourceResponse(response, select, filterFields, options));
253
+
254
+ debug('queryResult processing', queryParam);
255
+ // join (reduce) data to one data table
256
+ const queryResult = joinData(select.key, 'overwrite', ...dataTables)
257
+ .filter(row => applyFilterRow(row, filter)) // apply filters (entity sets and where (including join))
258
+ .map(row => fillMissingValues(row, projection)) // fill any missing values with null values
259
+ .map(row => projectRow(row, projection)); // remove fields used only for filtering
260
+
261
+ debug('result ordering', queryParam);
262
+ orderData(queryResult, order_by);
263
+ debug('final result is ready', queryParam);
264
+
265
+ return parseTime(queryResult, options);
266
+ });
267
+ }
268
+
269
+ /**
270
+ * Parses time concept strings in result to Date objects
271
+ * @param result
272
+ * @param options
273
+ */
274
+ function parseTime(result, options: IBaseReaderOptions) {
275
+ const conceptsLookup = options.conceptsLookup;
276
+ const concepts = Object.keys(result[0] || {});
277
+ const timeConcepts = concepts.map(c => conceptsLookup.get(c) || {}).filter(co => co.concept_type == 'time');
278
+ timeConcepts.forEach(({ concept }) => {
279
+ const parse = getTimeParser(concept, options);
280
+ result.forEach(row => {
281
+ row[concept] = parse(row[concept]);
282
+ });
283
+ });
284
+ return result;
285
+ }
286
+
287
+ /**
288
+ * Time parsers for DDF built-in time concepts
289
+ * @param concept
290
+ */
291
+ function getTimeParser(concept, options: IBaseReaderOptions) {
292
+ const { error } = options.diagnostic.prepareDiagnosticFor('queryData');
293
+ const parsers = {
294
+ year: utcParse('%Y'),
295
+ month: utcParse('%Y-%m'),
296
+ day: utcParse('%Y%m%d'),
297
+ hour: utcParse('%Y%m%dt%H'),
298
+ minute: utcParse('%Y%m%dt%H%M'),
299
+ second: utcParse('%Y%m%dt%H%M%S'),
300
+ week: utcParse('%Yw%V'),
301
+ quarter: utcParse('%Yq%q')
302
+ };
303
+ function tryParse(str) {
304
+ for (const i in parsers) {
305
+ const dateObject = parsers[i](str);
306
+ if (dateObject) {
307
+ return dateObject;
308
+ }
309
+ }
310
+ error('Could not parse time string: ' + str);
311
+ return null;
312
+ }
313
+ if (concept == 'time') {
314
+ return tryParse;
315
+ }
316
+ if (!parsers[concept]) {
317
+ error('No time parser found for time concept: ' + concept);
318
+ return str => str;
319
+ }
320
+ return parsers[concept];
321
+ }
322
+
323
+ function orderData (data, orderBy = []) {
324
+ if (orderBy.length === 0) {
325
+ return;
326
+ }
327
+
328
+ // process ["geo"] or [{"geo": "asc"}] to [{ concept: "geo", direction: 1 }];
329
+ const orderNormalized = orderBy.map(orderPart => {
330
+ if (typeof orderPart === 'string') {
331
+ return { concept: orderPart, direction: 1 };
332
+ } else {
333
+ const concept = Object.keys(orderPart)[ 0 ];
334
+ const direction = (orderPart[ concept ] === 'asc' ? 1 : -1);
335
+
336
+ return { concept, direction };
337
+ }
338
+ });
339
+
340
+ // sort by one or more fields
341
+ const n = orderNormalized.length;
342
+
343
+ data.sort((a, b) => {
344
+ for (let i = 0; i < n; i++) {
345
+ const order = orderNormalized[ i ];
346
+
347
+ if (a[ order.concept ] < b[ order.concept ]) {
348
+ return -1 * order.direction;
349
+ } else if (a[ order.concept ] > b[ order.concept ]) {
350
+ return 1 * order.direction;
351
+ }
352
+ }
353
+
354
+ return 0;
355
+ });
356
+ }
357
+
358
+ /**
359
+ * Replaces `$join` placeholders with relevant `{ "$in": [...] }` operator.
360
+ * Replaces $in- and $nin-arrays with sets for faster filtering
361
+ * @param {Object} where Where clause possibly containing $join placeholders as field values.
362
+ * @param {Object} joinFilters Collection of lists of entity or time values,
363
+ * coming from other tables defined in query `join` clause.
364
+ * @return {Object} Where clause with $join placeholders replaced by valid filter statements
365
+ */
366
+ function processWhere (where, joinFilters) {
367
+ const result = {};
368
+
369
+ for (const field in where) {
370
+ const fieldValue = where[ field ];
371
+
372
+ if (includes([ '$and', '$or', '$nor' ], field)) {
373
+ result[ field ] = fieldValue.map(subFilter => processWhere(subFilter, joinFilters));
374
+ } else if (field === '$in' || field === '$nin') {
375
+ // prepare "$in" fields for optimized lookup
376
+ result[ field ] = new Set(fieldValue);
377
+ } else if (typeof joinFilters[ fieldValue ] !== 'undefined') {
378
+ // found a join!
379
+ // not assigning to result[field] because joinFilter can contain $and/$or statements in case of
380
+ // time concept (join-where is directly copied, not executed)
381
+ // otherwise could end up with where: { year: { $and: [{ ... }]}}, which is invalid
382
+ // (no boolean ops inside field objects)
383
+ // in case of entity join, joinFilters contains correct field
384
+ Object.assign(result, joinFilters[ fieldValue ]);
385
+ } else if (typeof fieldValue === 'object') {
386
+ // catches $not and fields with equality operator-objects
387
+ // { <field>: { "$lt": 1500 }}
388
+ result[ field ] = processWhere(fieldValue, joinFilters);
389
+ } else {
390
+ // catches rest, being all equality operators except for $in and $nin
391
+ // { "$lt": 1500 }
392
+ result[ field ] = fieldValue;
393
+ }
394
+ }
395
+
396
+ return result;
397
+ }
398
+
399
+ function patchFilterForOrClause(filter) {
400
+
401
+ function processFilter(where) {
402
+ const whereKeys = Object.keys(where);
403
+ for (const key of whereKeys) {
404
+ if (key == "$or") {
405
+ where[key] = where[key].reduce((res, value) => {
406
+ const valueKeys = Object.keys(value);
407
+ if (valueKeys.length > 1) {
408
+ for (const vKey of valueKeys) {
409
+ res.push({ [vKey]: value[vKey] });
410
+ }
411
+ } else {
412
+ res.push(value);
413
+ }
414
+ return res;
415
+ }, []);
416
+ }
417
+ }
418
+ return where;
419
+ }
420
+
421
+ return processFilter(filter);
422
+ }
423
+
424
+ function mergeFilters (...filters) {
425
+ return filters.reduce((a, b) => {
426
+ if (!isEmpty(b)) {
427
+ patchFilterForOrClause(b);
428
+ a.$and.push(b);
429
+ }
430
+
431
+ return a;
432
+ }, { $and: [] });
433
+ }
434
+
435
+ function querySchema (queryParam, baseOptions: IBaseReaderOptions) {
436
+ const { debug, error } = baseOptions.diagnostic.prepareDiagnosticFor('query');
437
+ const getSchemaFromCollection = collectionPar => {
438
+ debug(`get schema for collection ${collectionPar}`);
439
+ return baseOptions.datapackage.ddfSchema[ collectionPar ].map(
440
+ ({ primaryKey, value }) => ({ key: primaryKey, value })
441
+ );
442
+ };
443
+
444
+ const collection = queryParam.from.split('.')[ 0 ];
445
+
446
+ if (baseOptions.datapackage.ddfSchema[ collection ]) {
447
+ return getSchemaFromCollection(collection);
448
+ } else if (collection === '*') {
449
+ return Object.keys(baseOptions.datapackage.ddfSchema)
450
+ .map(getSchemaFromCollection)
451
+ .reduce((a, b) => a.concat(b));
452
+ } else {
453
+ const message = `No valid collection (${collection}) for schema query`;
454
+ error(message);
455
+ throwError(new DdfCsvError(DDF_ERROR, message));
456
+ }
457
+ }
458
+
459
+ function fillMissingValues (row, projection) {
460
+ for (const field of projection) {
461
+ if (typeof row[ field ] === 'undefined') {
462
+ row[ field ] = null;
463
+ }
464
+ }
465
+
466
+ return row;
467
+ }
468
+
469
+ function applyFilterRow (row, filter) {
470
+ // implicit $and in filter object handled by .every()
471
+ return Object.keys(filter).every(filterKey => {
472
+ const operator = operators.get(filterKey);
473
+
474
+ if (operator) {
475
+ return operator(row, filter[ filterKey ]);
476
+ // assuming values are primitives not Number/Boolean/String objects
477
+ } else if (typeof filter[ filterKey ] !== 'object') {
478
+ // { <field>: <value> } is shorthand for { <field>: { $eq: <value> }}
479
+ return operators.get('$eq')(row[ filterKey ], filter[ filterKey ]);
480
+ } else {
481
+ // filter[filterKey] is an object and will thus contain
482
+ // an equality operator (no deep objects (like in Mongo) supported)
483
+ return applyFilterRow(row[ filterKey ], filter[ filterKey ]);
484
+ }
485
+ });
486
+ }
487
+
488
+ function getJoinFilters (join, queryParam, options) {
489
+ return Promise.all(Object.keys(join).map(joinID => getJoinFilter(joinID, join[ joinID ], queryParam, options)))
490
+ .then(results => results.reduce(mergeObjects, {}));
491
+ }
492
+
493
+ function mergeObjects (a, b) {
494
+ return Object.assign(a, b);
495
+ }
496
+
497
+ function getJoinFilter (joinID, join, queryParam, options) {
498
+ // assumption: join.key is same as field in where clause
499
+ // - where: { geo: $geo }, join: { "$geo": { key: geo, where: { ... }}}
500
+ // - where: { year: $year }, join: { "$year": { key: year, where { ... }}}
501
+ if (options.conceptsLookup.get(join.key).concept_type === 'time') {
502
+ // time, no query needed as time values are not explicit in the dataSource
503
+ // assumption: there are no time-properties. E.g. data like <year>,population
504
+ return Promise.resolve({ [ joinID ]: join.where });
505
+ } else {
506
+ // entity concept
507
+ return queryData({
508
+ select: { key: [ join.key ] },
509
+ where: join.where,
510
+ from: options.conceptsLookup.has(join.key) ? 'entities' : 'concepts'
511
+ }, Object.assign({ joinID }, options))
512
+ .then(result => ({
513
+ [ joinID ]: {
514
+ [ join.key ]: {
515
+ $in: new Set(result.map(row => row[ join.key ]))
516
+ }
517
+ }
518
+ }));
519
+ }
520
+ }
521
+
522
+ function getFilterFields (filter) {
523
+ const fields = [];
524
+
525
+ for (const field in filter) {
526
+ // no support for deeper object structures (mongo style)
527
+ if (includes([ '$and', '$or', '$not', '$nor' ], field)) {
528
+ filter[ field ].map(getFilterFields).forEach(subFields => fields.push(...subFields));
529
+ } else {
530
+ fields.push(field);
531
+ }
532
+ }
533
+
534
+ return [...new Set(fields)];
535
+ }
536
+
537
+ /**
538
+ * Filter concepts by type
539
+ * @param {Array} conceptStrings Array of concept strings to filter out. Default all concepts.
540
+ * @param {Array} conceptTypes Array of concept types to filter out
541
+ * @return {Array} Array of concept strings only of given types
542
+ */
543
+ function filterConceptsByType (conceptTypes, queryKey, options) {
544
+ const conceptStrings = queryKey || Array.from(options.conceptsLookup.keys());
545
+ const concepts = [];
546
+
547
+ for (const conceptString of conceptStrings) {
548
+ const concept = options.conceptsLookup.get(conceptString);
549
+
550
+ if (includes(conceptTypes, concept.concept_type)) {
551
+ concepts.push(concept);
552
+ }
553
+ }
554
+
555
+ return concepts;
556
+ }
557
+
558
+ /**
559
+ * Find the aliases an entity concept can have
560
+ * @param {Array} conceptStrings An array of concept strings for which entity aliases
561
+ * are found if they're entity concepts
562
+ * @return {Map} Map with all aliases as keys and the entity concept as value
563
+ */
564
+ function getEntityConceptRenameMap (queryKey, resourceKey, options) {
565
+ const resourceKeySet = new Set(resourceKey);
566
+ const entityConceptTypes = [ 'entity_set', 'entity_domain' ];
567
+ const queryEntityConcepts = filterConceptsByType(entityConceptTypes, queryKey, options);
568
+
569
+ if (queryEntityConcepts.length === 0) {
570
+ return new Map();
571
+ }
572
+
573
+ const allEntityConcepts = filterConceptsByType(entityConceptTypes, null, options);
574
+
575
+ return queryEntityConcepts
576
+ .map(concept => allEntityConcepts
577
+ .filter(lookupConcept => {
578
+ if (concept.concept_type === 'entity_set') {
579
+ return resourceKeySet.has(lookupConcept.concept) &&
580
+ lookupConcept.concept !== concept.concept && // not the actual concept
581
+ (
582
+ lookupConcept.domain === concept.domain || // other entity sets in entity domain
583
+ lookupConcept.concept === concept.domain // entity domain of the entity set
584
+ );
585
+ } else {
586
+ // concept_type == "entity_domain"
587
+ return resourceKeySet.has(lookupConcept.concept) &&
588
+ lookupConcept.concept !== concept.concept && // not the actual concept
589
+ lookupConcept.domain === concept.concept; // entity sets of the entity domain
590
+ }
591
+ })
592
+ .reduce((map, aliasConcept) => map.set(aliasConcept.concept, concept.concept), new Map())
593
+ ).reduce((mapA, mapB) => new Map([ ...mapA, ...mapB ]), new Map());
594
+ }
595
+
596
+ /**
597
+ * Get a "$in" filter containing all entities for a entity concept.
598
+ * @param {Array} conceptStrings Array of concept strings for which entities should be found
599
+ * @return {Array} Array of filter objects for each entity concept
600
+ */
601
+ function getEntitySetFilter (conceptStrings, queryParam, options) {
602
+ const promises = filterConceptsByType([ 'entity_set' ], conceptStrings, options)
603
+ .map(concept => queryData({
604
+ select: { key: [ concept.domain ], value: [ 'is--' + concept.concept ] },
605
+ from: 'entities'
606
+ }, Object.assign({}, options))
607
+ .then(result => ({
608
+ [ concept.concept ]:
609
+ {
610
+ $in: new Set(
611
+ result
612
+ .filter(row => row[ 'is--' + concept.concept ])
613
+ .map(row => row[ concept.domain ])
614
+ )
615
+ }
616
+ }))
617
+ );
618
+
619
+ return Promise.all(promises).then(results => {
620
+ return results.reduce((a, b) => Object.assign(a, b), {});
621
+ });
622
+ }
623
+
624
+ /**
625
+ * Returns all resources for a certain key value pair or multiple values for one key
626
+ * @param {Array} key The key of the requested resources
627
+ * @param {Array/string} value The value or values found in the requested resources
628
+ * @return {Array} Array of resource objects
629
+ */
630
+ function getResources (key, value?) {
631
+ // value not given, load all resources for key
632
+ if (!value || value.length === 0 || key[0] === value) {
633
+ return new Set(
634
+ [ ...keyValueLookup
635
+ .get(createKeyString(key))
636
+ .values()
637
+ ].reduce((a, b) => a.concat(b))
638
+ );
639
+ }
640
+ // multiple values
641
+ if (Array.isArray(value)) {
642
+ return value
643
+ .map(singleValue => getResources(key, singleValue))
644
+ .reduce((resultSet, resources) => new Set([ ...resultSet, ...resources ]), new Set());
645
+ }
646
+ // one key, one value
647
+ let oneKeyOneValueResourcesArray = keyValueLookup
648
+ .get(createKeyString(key))
649
+ .get(value);
650
+
651
+ if (oneKeyOneValueResourcesArray) {
652
+ oneKeyOneValueResourcesArray = oneKeyOneValueResourcesArray
653
+ .filter(v => isEmpty(optimalFilesSet) || includes(optimalFilesSet, v.path));
654
+ }
655
+
656
+ return new Set(oneKeyOneValueResourcesArray);
657
+ }
658
+
659
+ function processResourceResponse (response, select, filterFields, options) {
660
+ const resourcePK = response.resource.schema.primaryKey;
661
+ // all fields used for select or filters
662
+ const resourceProjection = new Set([ ...resourcePK, ...select.value, ...filterFields ]);
663
+ // rename map to rename relevant entity headers to requested entity concepts
664
+ const renameMap = getEntityConceptRenameMap(select.key, resourcePK, options);
665
+
666
+ // Renaming must happen after projection to prevent ambiguity
667
+ // E.g. a resource with `<geo>,name,region` fields.
668
+ // Assume `region` is an entity set in domain `geo`.
669
+ // { select: { key: ["region"], value: ["name"] } } is queried
670
+ // If one did rename first the file would have headers `<region>,name,region`.
671
+ // This would be invalid and make unambiguous projection impossible.
672
+ // Thus we need to apply projection first with result: `<geo>,name`, then we can rename.
673
+ return response.data
674
+ .map(row => projectRow(row, resourceProjection)) // remove fields not used for select or filter
675
+ .map(row => renameHeaderRow(row, renameMap)); // rename header rows (must happen **after** projection)
676
+ }
677
+
678
+ function loadResources (key, value, language, options, queryParam) {
679
+ const { debug } = options.diagnostic.prepareDiagnosticFor('loadResource');
680
+ const resources = getResources(key, value);
681
+
682
+ debug('resources list by query', {queryParam, resources: [ ...resources ]});
683
+
684
+ return Promise.all([ ...resources ].map(
685
+ resource => loadResource(resource, language, options)
686
+ ));
687
+ }
688
+
689
+ function projectRow (row, projectionSet) {
690
+ const result = {};
691
+
692
+ for (const concept of Object.keys(row)) {
693
+ if (projectionSet.has(concept)) {
694
+ result[ concept ] = row[ concept ];
695
+ }
696
+ }
697
+
698
+ return result;
699
+ }
700
+
701
+ function renameHeaderRow (row, renameMap) {
702
+ const result = {};
703
+
704
+ for (const concept of Object.keys(row)) {
705
+ result[ renameMap.get(concept) || concept ] = row[ concept ];
706
+ }
707
+
708
+ return result;
709
+ }
710
+
711
+ function joinData (key, joinMode, ...data) {
712
+ if (data.length === 1) {
713
+ return data[ 0 ];
714
+ }
715
+
716
+ const canonicalKey = key.slice(0).sort();
717
+ const dataMap = data.reduce((result, dataPar) => {
718
+ dataPar.forEach(row => {
719
+ const keyString = canonicalKey.map(concept => row[ concept ]).join(',');
720
+
721
+ if (result.has(keyString)) {
722
+ const resultRow = result.get(keyString);
723
+
724
+ joinRow(resultRow, row, joinMode);
725
+ } else {
726
+ result.set(keyString, Object.assign({}, row));
727
+ }
728
+ });
729
+
730
+ return result;
731
+ }, new Map());
732
+ return [ ...dataMap.values() ];
733
+ }
734
+
735
+ function joinRow (resultRow, sourceRow, mode) {
736
+ switch (mode) {
737
+ case 'overwrite':
738
+ /* Simple alternative without empty value or error handling */
739
+ Object.assign(resultRow, sourceRow);
740
+ break;
741
+ case 'translation':
742
+ // Translation joining ignores empty values
743
+ // and allows different values for strings (= translations)
744
+ for (const concept in sourceRow) {
745
+ if (sourceRow[ concept ] !== '') {
746
+ resultRow[ concept ] = sourceRow[ concept ];
747
+ }
748
+ }
749
+ break;
750
+ case 'overwriteWithError':
751
+ /* Alternative for "overwrite" with JOIN error detection */
752
+ for (const concept in sourceRow) {
753
+ if (resultRow[ concept ] !== undefined && resultRow[ concept ] !== sourceRow[ concept ]) {
754
+ const sourceRowStr = JSON.stringify(sourceRow);
755
+ const resultRowStr = JSON.stringify(resultRow);
756
+ const errStr =
757
+ `JOIN Error: two resources have different data for "${concept}": ${sourceRowStr},${resultRowStr}`;
758
+
759
+ throwError(new DdfCsvError(DDF_ERROR, errStr));
760
+ } else {
761
+ resultRow[ concept ] = sourceRow[ concept ];
762
+ }
763
+ }
764
+ break;
765
+ }
766
+ }
767
+
768
+ function throwError (error: DdfCsvError) {
769
+ const currentLogger = logger || console;
770
+
771
+ currentLogger.error(error.message);
772
+
773
+ throw error;
774
+ }
775
+
776
+ function createKeyString (key, row = false) {
777
+ const canonicalKey = key.slice(0).sort();
778
+
779
+ if (!row) {
780
+ return canonicalKey.join(',');
781
+ } else {
782
+ return canonicalKey.map(concept => row[ concept ]).join(',');
783
+ }
784
+ }
785
+
786
+ function loadResource (resource, language, options) {
787
+ const { warning } = options.diagnostic.prepareDiagnosticFor('loadResource');
788
+ const filePromises = [];
789
+
790
+ if (typeof resource.data === 'undefined') {
791
+ resource.data = loadFile(resource.path, options);
792
+ }
793
+
794
+ filePromises.push(resource.data);
795
+
796
+ const languageValid = typeof language !== 'undefined' && includes(getLanguages(options), language);
797
+ const languageLoaded = typeof resource.translations[ language ] !== 'undefined';
798
+
799
+ if (languageValid) {
800
+ if (!languageLoaded) {
801
+ const translationPath = `lang/${language}/${resource.path}`;
802
+
803
+ // error loading translation file is expected when specific file is not translated
804
+ // more correct would be to only resolve file-not-found errors but current solution is sufficient
805
+ resource.translations[ language ] = loadFile(translationPath, options)
806
+ .catch(err => {
807
+ warning(`translation file ${translationPath}`, err);
808
+ return Promise.resolve({});
809
+ });
810
+ }
811
+
812
+ filePromises.push(resource.translations[ language ]);
813
+ }
814
+
815
+ return Promise.all(filePromises).then(fileResponses => {
816
+ // resp.data does not exist if translation file not found
817
+ const filesData = fileResponses.map(resp => resp.data || []);
818
+ const primaryKey = resource.schema.primaryKey;
819
+ const data = joinData(primaryKey, 'translation', ...filesData);
820
+
821
+ return { data, resource };
822
+ });
823
+
824
+ }
825
+
826
+ function getLanguages (options: {datapackage}): string[] {
827
+ if (!options.datapackage.translations) {
828
+ return [];
829
+ }
830
+
831
+ return options.datapackage.translations.map(lang => lang.id);
832
+ }
833
+
834
+ function loadFile (filePath, options) {
835
+ const { debug, error } = options.diagnostic.prepareDiagnosticFor('loadFile');
836
+ const fullFilePath = getFilePath(options.basePath, filePath);
837
+
838
+ debug(`start reading "${filePath}"`);
839
+
840
+ return new Promise((resolve, reject) => {
841
+ options.fileReader.readText(fullFilePath, (err, data) => {
842
+ if (err) {
843
+ error(`fail "${filePath}" reading`, err);
844
+ return reject(new DdfCsvError(FILE_READING_ERROR, err, fullFilePath));
845
+ }
846
+
847
+ Papa.parse(stripBom(data), {
848
+ header: true,
849
+ skipEmptyLines: true,
850
+ dynamicTyping: (headerName) => {
851
+ // skip parsing time/string concept types
852
+ const concept: any = options.conceptsLookup.get(headerName) || {};
853
+
854
+ return !includes(['time', 'string', 'entity_domain', 'entity_set'], concept.concept_type);
855
+ },
856
+ transform: value => {
857
+ return value === '' ? null : value
858
+ },
859
+ complete: result => {
860
+ debug(`finish reading "${filePath}"`);
861
+ resolve(result);
862
+ },
863
+ error: parseErr => {
864
+ error(`fail "${filePath}" parsing`, parseErr);
865
+ reject(new DdfCsvError(CSV_PARSING_ERROR, parseErr, filePath));
866
+ }
867
+ });
868
+ });
869
+ });
870
+ }
871
+
872
+ function buildResourcesLookup (datapackagePar) {
873
+ if (resourcesLookup.size > 0) {
874
+ return resourcesLookup;
875
+ }
876
+
877
+ datapackagePar.resources.forEach(resource => {
878
+ if (!Array.isArray(resource.schema.primaryKey)) {
879
+ resource.schema.primaryKey = [ resource.schema.primaryKey ];
880
+ }
881
+
882
+ const constraints = resource.schema.fields.reduce((result, field) => {
883
+ if (field.constraints?.enum) {
884
+ if (!datasetWithConstraints) datasetWithConstraints = true;
885
+ result[field.name] = field.constraints.enum;
886
+ }
887
+ return result;
888
+ }, {});
889
+ resource.constraints = constraints;
890
+
891
+ resource.translations = {};
892
+ resourcesLookup.set(resource.name, resource);
893
+ });
894
+
895
+ return resourcesLookup;
896
+ }
897
+
898
+ function buildKeyValueLookup (datapackagePar) {
899
+ if (keyValueLookup.size > 0) {
900
+ return keyValueLookup;
901
+ }
902
+
903
+ for (const collection in datapackagePar.ddfSchema) {
904
+ datapackagePar.ddfSchema[ collection ].map(kvPair => {
905
+ const key = createKeyString(kvPair.primaryKey);
906
+ const resources = kvPair.resources.map(
907
+ resourceName => resourcesLookup.get(resourceName)
908
+ );
909
+
910
+ if (keyValueLookup.has(key)) {
911
+ keyValueLookup.get(key).set(kvPair.value, resources);
912
+ } else {
913
+ keyValueLookup.set(key, new Map([ [ kvPair.value, resources ] ]));
914
+ }
915
+ });
916
+ }
917
+
918
+ return keyValueLookup;
919
+ }
920
+
921
+ return {
922
+ query,
923
+ queryData,
924
+ loadFile,
925
+ getDatasetInfo
926
+ };
927
+ }