rbql 0.25.0 → 0.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +26 -0
- package/README.md +2 -2
- package/cli_parser.js +0 -0
- package/csv_utils.js +37 -0
- package/index.js +0 -0
- package/package.json +1 -1
- package/rbql.js +21 -17
- package/rbql_csv.js +32 -47
package/.eslintrc.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"env": {
|
|
3
|
+
"browser": false,
|
|
4
|
+
"commonjs": true,
|
|
5
|
+
"es6": true,
|
|
6
|
+
"node": true
|
|
7
|
+
},
|
|
8
|
+
"parserOptions": {
|
|
9
|
+
"ecmaFeatures": {
|
|
10
|
+
"jsx": true
|
|
11
|
+
},
|
|
12
|
+
"sourceType": "module",
|
|
13
|
+
"ecmaVersion": 2018
|
|
14
|
+
},
|
|
15
|
+
"rules": {
|
|
16
|
+
"no-const-assign": "warn",
|
|
17
|
+
"no-this-before-super": "warn",
|
|
18
|
+
"no-undef": "warn",
|
|
19
|
+
"semi": [2, "always"],
|
|
20
|
+
"no-unreachable": "warn",
|
|
21
|
+
"no-unused-vars": "warn",
|
|
22
|
+
"constructor-super": "warn",
|
|
23
|
+
"no-trailing-spaces": "error",
|
|
24
|
+
"valid-typeof": "warn"
|
|
25
|
+
}
|
|
26
|
+
}
|
package/README.md
CHANGED
|
@@ -196,7 +196,7 @@ rbql.query_table(user_query, input_table, output_table, warnings).then(success_h
|
|
|
196
196
|
|
|
197
197
|
#### Example of query_csv() usage:
|
|
198
198
|
```
|
|
199
|
-
const
|
|
199
|
+
const rbql = require('rbql');
|
|
200
200
|
let user_query = 'SELECT a1, parseInt(a2) % 1000 WHERE a3 != "USA" LIMIT 5';
|
|
201
201
|
let error_handler = function(exception) {
|
|
202
202
|
console.log('Error: ' + String(exception));
|
|
@@ -207,7 +207,7 @@ let success_handler = function() {
|
|
|
207
207
|
console.log('warnings: ' + JSON.stringify(warnings));
|
|
208
208
|
console.log('output table: output.csv');
|
|
209
209
|
}
|
|
210
|
-
|
|
210
|
+
rbql.query_csv(user_query, 'input.csv', ',', 'quoted', 'output.csv', ',', 'quoted', 'utf-8', warnings).then(success_handler).catch(error_handler);
|
|
211
211
|
```
|
|
212
212
|
|
|
213
213
|
|
package/cli_parser.js
CHANGED
|
File without changes
|
package/csv_utils.js
CHANGED
|
@@ -39,6 +39,7 @@ function extract_next_field(src, dlm, preserve_quotes_and_whitespaces, allow_ext
|
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
function split_quoted_str(src, dlm, preserve_quotes_and_whitespaces=false) {
|
|
42
|
+
// This function is newline-agnostic i.e. it can also split records with multiline fields.
|
|
42
43
|
if (src.indexOf('"') == -1) // Optimization for most common case
|
|
43
44
|
return [src.split(dlm), false];
|
|
44
45
|
var result = [];
|
|
@@ -116,6 +117,41 @@ function smart_split(src, dlm, policy, preserve_quotes_and_whitespaces) {
|
|
|
116
117
|
}
|
|
117
118
|
|
|
118
119
|
|
|
120
|
+
class MultilineRecordAggregator {
|
|
121
|
+
constructor(comment_prefix) {
|
|
122
|
+
this.comment_prefix = comment_prefix;
|
|
123
|
+
this.reset();
|
|
124
|
+
}
|
|
125
|
+
add_line(line_text) {
|
|
126
|
+
if (this.has_full_record || this.has_comment_line) {
|
|
127
|
+
throw new Error('Invalid usage - record aggregator must be reset before adding new lines');
|
|
128
|
+
}
|
|
129
|
+
if (this.comment_prefix && this.rfc_line_buffer.length == 0 && line_text.startsWith(this.comment_prefix)) {
|
|
130
|
+
this.has_comment_line = true;
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
let match_list = line_text.match(/"/g);
|
|
134
|
+
let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
|
|
135
|
+
this.rfc_line_buffer.push(line_text);
|
|
136
|
+
this.has_full_record = (!has_unbalanced_double_quote && this.rfc_line_buffer.length == 1) || (has_unbalanced_double_quote && this.rfc_line_buffer.length > 1);
|
|
137
|
+
return this.has_full_record;
|
|
138
|
+
}
|
|
139
|
+
is_inside_multiline_record() {
|
|
140
|
+
return this.rfc_line_buffer.length && !this.has_full_record;
|
|
141
|
+
}
|
|
142
|
+
get_full_line(line_separator) {
|
|
143
|
+
return this.rfc_line_buffer.join(line_separator);
|
|
144
|
+
}
|
|
145
|
+
get_num_lines_in_record() {
|
|
146
|
+
return this.rfc_line_buffer.length;
|
|
147
|
+
}
|
|
148
|
+
reset() {
|
|
149
|
+
this.rfc_line_buffer = [];
|
|
150
|
+
this.has_full_record = false;
|
|
151
|
+
this.has_comment_line = false;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
119
155
|
|
|
120
156
|
module.exports.split_quoted_str = split_quoted_str;
|
|
121
157
|
module.exports.split_whitespace_separated_str = split_whitespace_separated_str;
|
|
@@ -125,3 +161,4 @@ module.exports.rfc_quote_field = rfc_quote_field;
|
|
|
125
161
|
module.exports.unquote_field = unquote_field;
|
|
126
162
|
module.exports.unquote_fields = unquote_fields;
|
|
127
163
|
module.exports.split_lines = split_lines;
|
|
164
|
+
module.exports.MultilineRecordAggregator = MultilineRecordAggregator;
|
package/index.js
CHANGED
|
File without changes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rbql",
|
|
3
|
-
"version": "0.25.
|
|
3
|
+
"version": "0.25.1",
|
|
4
4
|
"description": "Rainbow Query Language",
|
|
5
5
|
"keywords": ["CSV", "TSV", "spreadsheet", "SQL", "SQL-like", "transpiler", "CLI", "command-line", "library", "browser", "Node", "select", "update", "join"],
|
|
6
6
|
"scripts": {
|
package/rbql.js
CHANGED
|
@@ -70,7 +70,7 @@ var query_context = null; // Needs to be global for MIN(), MAX(), etc functions.
|
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs';
|
|
73
|
-
const RBQL_VERSION = '0.25.
|
|
73
|
+
const RBQL_VERSION = '0.25.1';
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
function check_if_brackets_match(opening_bracket, closing_bracket) {
|
|
@@ -1608,20 +1608,20 @@ function select_output_header(input_header, join_header, query_column_infos) {
|
|
|
1608
1608
|
}
|
|
1609
1609
|
|
|
1610
1610
|
|
|
1611
|
-
function
|
|
1612
|
-
let
|
|
1613
|
-
|
|
1614
|
-
for (let i = 0; i < keys.length; i++) {
|
|
1615
|
-
let key = keys[i];
|
|
1616
|
-
let record_id = inconsistent_records_info[key];
|
|
1617
|
-
entries.push([record_id, key]);
|
|
1618
|
-
}
|
|
1619
|
-
entries.sort(function(a, b) { return a[0] - b[0]; });
|
|
1611
|
+
function sample_first_two_inconsistent_records(inconsistent_records_info) {
|
|
1612
|
+
let entries = Array.from(inconsistent_records_info.entries());
|
|
1613
|
+
entries.sort(function(a, b) { return a[1] - b[1]; });
|
|
1620
1614
|
assert(entries.length > 1);
|
|
1621
|
-
let [
|
|
1622
|
-
let [
|
|
1615
|
+
let [num_fields_1, record_num_1] = entries[0];
|
|
1616
|
+
let [num_fields_2, record_num_2] = entries[1];
|
|
1617
|
+
return [record_num_1, num_fields_1, record_num_2, num_fields_2];
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
|
|
1621
|
+
function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
|
|
1622
|
+
let [record_num_1, num_fields_1, record_num_2, num_fields_2] = sample_first_two_inconsistent_records(inconsistent_records_info);
|
|
1623
1623
|
let warn_msg = `Number of fields in "${table_name}" table is not consistent: `;
|
|
1624
|
-
warn_msg += `e.g. record ${
|
|
1624
|
+
warn_msg += `e.g. record ${record_num_1} -> ${num_fields_1} fields, record ${record_num_2} -> ${num_fields_2} fields`;
|
|
1625
1625
|
return warn_msg;
|
|
1626
1626
|
}
|
|
1627
1627
|
|
|
@@ -1691,7 +1691,7 @@ class TableIterator extends RBQLInputIterator {
|
|
|
1691
1691
|
this.normalize_column_names = normalize_column_names;
|
|
1692
1692
|
this.variable_prefix = variable_prefix;
|
|
1693
1693
|
this.nr = 0;
|
|
1694
|
-
this.fields_info = new
|
|
1694
|
+
this.fields_info = new Map();
|
|
1695
1695
|
this.stopped = false;
|
|
1696
1696
|
}
|
|
1697
1697
|
|
|
@@ -1727,13 +1727,13 @@ class TableIterator extends RBQLInputIterator {
|
|
|
1727
1727
|
let record = this.table[this.nr];
|
|
1728
1728
|
this.nr += 1;
|
|
1729
1729
|
let num_fields = record.length;
|
|
1730
|
-
if (!this.fields_info.
|
|
1731
|
-
this.fields_info
|
|
1730
|
+
if (!this.fields_info.has(num_fields))
|
|
1731
|
+
this.fields_info.set(num_fields, this.nr);
|
|
1732
1732
|
return record;
|
|
1733
1733
|
};
|
|
1734
1734
|
|
|
1735
1735
|
get_warnings() {
|
|
1736
|
-
if (
|
|
1736
|
+
if (this.fields_info.size > 1)
|
|
1737
1737
|
return [make_inconsistent_num_fields_warning('input', this.fields_info)];
|
|
1738
1738
|
return [];
|
|
1739
1739
|
};
|
|
@@ -1840,6 +1840,9 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
|
|
|
1840
1840
|
if (rb_actions.hasOwnProperty(SELECT)) {
|
|
1841
1841
|
query_context.top_count = find_top(rb_actions);
|
|
1842
1842
|
if (rb_actions.hasOwnProperty(EXCEPT)) {
|
|
1843
|
+
if (rb_actions.hasOwnProperty(JOIN)) {
|
|
1844
|
+
throw new RbqlParsingError('EXCEPT and JOIN are not allowed in the same query');
|
|
1845
|
+
}
|
|
1843
1846
|
let [output_header, select_expression] = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals, input_header);
|
|
1844
1847
|
query_context.select_expression = select_expression;
|
|
1845
1848
|
query_context.writer.set_header(output_header);
|
|
@@ -1947,5 +1950,6 @@ exports.adhoc_parse_select_expression_to_column_infos = adhoc_parse_select_expre
|
|
|
1947
1950
|
exports.replace_star_count = replace_star_count;
|
|
1948
1951
|
exports.replace_star_vars_for_header_parsing = replace_star_vars_for_header_parsing;
|
|
1949
1952
|
exports.select_output_header = select_output_header;
|
|
1953
|
+
exports.sample_first_two_inconsistent_records = sample_first_two_inconsistent_records;
|
|
1950
1954
|
|
|
1951
1955
|
}(typeof exports === 'undefined' ? this.rbql = {} : exports));
|
package/rbql_csv.js
CHANGED
|
@@ -62,19 +62,9 @@ function remove_utf8_bom(line, assumed_source_encoding) {
|
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
|
|
65
|
-
let
|
|
66
|
-
let entries = [];
|
|
67
|
-
for (let i = 0; i < keys.length; i++) {
|
|
68
|
-
let key = keys[i];
|
|
69
|
-
let record_id = inconsistent_records_info[key];
|
|
70
|
-
entries.push([record_id, key]);
|
|
71
|
-
}
|
|
72
|
-
entries.sort(function(a, b) { return a[0] - b[0]; });
|
|
73
|
-
assert(entries.length > 1);
|
|
74
|
-
let [record_1, num_fields_1] = entries[0];
|
|
75
|
-
let [record_2, num_fields_2] = entries[1];
|
|
65
|
+
let [record_num_1, num_fields_1, record_num_2, num_fields_2] = rbql.sample_first_two_inconsistent_records(inconsistent_records_info);
|
|
76
66
|
let warn_msg = `Number of fields in "${table_name}" table is not consistent: `;
|
|
77
|
-
warn_msg += `e.g. record ${
|
|
67
|
+
warn_msg += `e.g. record ${record_num_1} -> ${num_fields_1} fields, record ${record_num_2} -> ${num_fields_2} fields`;
|
|
78
68
|
return warn_msg;
|
|
79
69
|
}
|
|
80
70
|
|
|
@@ -182,7 +172,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
182
172
|
|
|
183
173
|
this.table_name = table_name;
|
|
184
174
|
this.variable_prefix = variable_prefix;
|
|
185
|
-
this.comment_prefix =
|
|
175
|
+
this.comment_prefix = comment_prefix;
|
|
186
176
|
|
|
187
177
|
this.decoder = null;
|
|
188
178
|
if (encoding == 'utf-8' && this.csv_path === null) {
|
|
@@ -204,11 +194,11 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
204
194
|
this.utf8_bom_removed = false; // BOM doesn't get automatically removed by the decoder when utf-8 file is treated as latin-1
|
|
205
195
|
this.first_defective_line = null;
|
|
206
196
|
|
|
207
|
-
this.fields_info = new
|
|
197
|
+
this.fields_info = new Map();
|
|
208
198
|
this.NR = 0; // Record number
|
|
209
199
|
this.NL = 0; // Line number (NL != NR when the CSV file has comments or multiline fields)
|
|
210
200
|
|
|
211
|
-
this.
|
|
201
|
+
this.line_aggregator = new csv_utils.MultilineRecordAggregator(comment_prefix);
|
|
212
202
|
|
|
213
203
|
this.partially_decoded_line = '';
|
|
214
204
|
this.partially_decoded_line_ends_with_cr = false;
|
|
@@ -222,7 +212,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
222
212
|
|
|
223
213
|
this.produced_records_queue = new RecordQueue();
|
|
224
214
|
|
|
225
|
-
this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.
|
|
215
|
+
this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.process_record_line_simple;
|
|
226
216
|
}
|
|
227
217
|
|
|
228
218
|
|
|
@@ -350,9 +340,14 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
350
340
|
};
|
|
351
341
|
|
|
352
342
|
|
|
353
|
-
|
|
354
|
-
if (this.comment_prefix
|
|
343
|
+
process_record_line_simple(line) {
|
|
344
|
+
if (this.comment_prefix && line.startsWith(this.comment_prefix))
|
|
355
345
|
return; // Just skip the line
|
|
346
|
+
this.process_record_line(line);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
process_record_line(line) {
|
|
356
351
|
this.NR += 1;
|
|
357
352
|
var [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, false);
|
|
358
353
|
if (warning) {
|
|
@@ -363,29 +358,20 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
363
358
|
}
|
|
364
359
|
}
|
|
365
360
|
let num_fields = record.length;
|
|
366
|
-
if (!this.fields_info.
|
|
367
|
-
this.fields_info
|
|
361
|
+
if (!this.fields_info.has(num_fields))
|
|
362
|
+
this.fields_info.set(num_fields, this.NR);
|
|
368
363
|
this.produced_records_queue.enqueue(record);
|
|
369
364
|
this.try_resolve_next_record();
|
|
370
365
|
};
|
|
371
366
|
|
|
372
367
|
|
|
373
368
|
process_partial_rfc_record_line(line) {
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
this.
|
|
380
|
-
} else if (this.rfc_line_buffer.length == 0 && has_unbalanced_double_quote) {
|
|
381
|
-
this.rfc_line_buffer.push(line);
|
|
382
|
-
} else if (!has_unbalanced_double_quote) {
|
|
383
|
-
this.rfc_line_buffer.push(line);
|
|
384
|
-
} else {
|
|
385
|
-
this.rfc_line_buffer.push(line);
|
|
386
|
-
let multiline_row = this.rfc_line_buffer.join('\n');
|
|
387
|
-
this.rfc_line_buffer = [];
|
|
388
|
-
this.process_record_line(multiline_row);
|
|
369
|
+
this.line_aggregator.add_line(line);
|
|
370
|
+
if (this.line_aggregator.has_comment_line) {
|
|
371
|
+
this.line_aggregator.reset();
|
|
372
|
+
} else if (this.line_aggregator.has_full_record) {
|
|
373
|
+
this.process_record_line(this.line_aggregator.get_full_line('\n'));
|
|
374
|
+
this.line_aggregator.reset();
|
|
389
375
|
}
|
|
390
376
|
};
|
|
391
377
|
|
|
@@ -432,13 +418,13 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
432
418
|
};
|
|
433
419
|
|
|
434
420
|
|
|
435
|
-
process_data_bulk(
|
|
436
|
-
let decoded_string =
|
|
421
|
+
process_data_bulk(data_blob) {
|
|
422
|
+
let decoded_string = data_blob.toString(this.encoding);
|
|
437
423
|
if (this.encoding == 'utf-8') {
|
|
438
424
|
// Using hacky comparison method from here: https://stackoverflow.com/a/32279283/2898283
|
|
439
425
|
// TODO get rid of this once TextDecoder is really fixed or when alternative method of reliable decoding appears
|
|
440
426
|
let control_buffer = Buffer.from(decoded_string, 'utf-8');
|
|
441
|
-
if (Buffer.compare(
|
|
427
|
+
if (Buffer.compare(data_blob, control_buffer) != 0) {
|
|
442
428
|
this.store_or_propagate_exception(new RbqlIOHandlingError(utf_decoding_error));
|
|
443
429
|
return;
|
|
444
430
|
}
|
|
@@ -449,8 +435,8 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
449
435
|
for (let i = 0; i < lines.length; i++) {
|
|
450
436
|
this.process_line(lines[i]);
|
|
451
437
|
}
|
|
452
|
-
if (this.
|
|
453
|
-
this.process_record_line(this.
|
|
438
|
+
if (this.line_aggregator.is_inside_multiline_record()) {
|
|
439
|
+
this.process_record_line(this.line_aggregator.get_full_line('\n'));
|
|
454
440
|
}
|
|
455
441
|
this.input_exhausted = true;
|
|
456
442
|
this.try_resolve_next_record(); // Should be a NOOP here?
|
|
@@ -464,8 +450,8 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
464
450
|
this.partially_decoded_line = '';
|
|
465
451
|
this.process_line(last_line);
|
|
466
452
|
}
|
|
467
|
-
if (this.
|
|
468
|
-
this.process_record_line(this.
|
|
453
|
+
if (this.line_aggregator.is_inside_multiline_record()) {
|
|
454
|
+
this.process_record_line(this.line_aggregator.get_full_line('\n'));
|
|
469
455
|
}
|
|
470
456
|
this.try_resolve_next_record();
|
|
471
457
|
};
|
|
@@ -487,11 +473,11 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
487
473
|
} else {
|
|
488
474
|
let parent_iterator = this;
|
|
489
475
|
return new Promise(function(resolve, reject) {
|
|
490
|
-
fs.readFile(parent_iterator.csv_path, (err,
|
|
476
|
+
fs.readFile(parent_iterator.csv_path, (err, data_blob) => {
|
|
491
477
|
if (err) {
|
|
492
478
|
reject(err);
|
|
493
479
|
} else {
|
|
494
|
-
parent_iterator.process_data_bulk(
|
|
480
|
+
parent_iterator.process_data_bulk(data_blob);
|
|
495
481
|
resolve();
|
|
496
482
|
}
|
|
497
483
|
});
|
|
@@ -506,8 +492,8 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
506
492
|
result.push(`Inconsistent double quote escaping in ${this.table_name} table. E.g. at line ${this.first_defective_line}`);
|
|
507
493
|
if (this.utf8_bom_removed)
|
|
508
494
|
result.push(`UTF-8 Byte Order Mark (BOM) was found and skipped in ${this.table_name} table`);
|
|
509
|
-
if (
|
|
510
|
-
result.push(make_inconsistent_num_fields_warning(
|
|
495
|
+
if (this.fields_info.size > 1)
|
|
496
|
+
result.push(make_inconsistent_num_fields_warning(this.table_name, this.fields_info));
|
|
511
497
|
return result;
|
|
512
498
|
};
|
|
513
499
|
}
|
|
@@ -710,7 +696,6 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
|
|
|
710
696
|
input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
|
|
711
697
|
}
|
|
712
698
|
let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
|
|
713
|
-
// FIXME add on(error) handler to avoid async errors, see https://github.com/nodejs/node-v0.x-archive/issues/406
|
|
714
699
|
if (input_delim == '"' && input_policy == 'quoted')
|
|
715
700
|
throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
|
|
716
701
|
if (csv_encoding == 'latin-1')
|