rbql 0.25.0 → 0.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.eslintrc.json ADDED
@@ -0,0 +1,26 @@
1
+ {
2
+ "env": {
3
+ "browser": false,
4
+ "commonjs": true,
5
+ "es6": true,
6
+ "node": true
7
+ },
8
+ "parserOptions": {
9
+ "ecmaFeatures": {
10
+ "jsx": true
11
+ },
12
+ "sourceType": "module",
13
+ "ecmaVersion": 2018
14
+ },
15
+ "rules": {
16
+ "no-const-assign": "warn",
17
+ "no-this-before-super": "warn",
18
+ "no-undef": "warn",
19
+ "semi": [2, "always"],
20
+ "no-unreachable": "warn",
21
+ "no-unused-vars": "warn",
22
+ "constructor-super": "warn",
23
+ "no-trailing-spaces": "error",
24
+ "valid-typeof": "warn"
25
+ }
26
+ }
package/README.md CHANGED
@@ -196,7 +196,7 @@ rbql.query_table(user_query, input_table, output_table, warnings).then(success_h
196
196
 
197
197
  #### Example of query_csv() usage:
198
198
  ```
199
- const rbql_csv = require('rbql_csv');
199
+ const rbql = require('rbql');
200
200
  let user_query = 'SELECT a1, parseInt(a2) % 1000 WHERE a3 != "USA" LIMIT 5';
201
201
  let error_handler = function(exception) {
202
202
  console.log('Error: ' + String(exception));
@@ -207,7 +207,7 @@ let success_handler = function() {
207
207
  console.log('warnings: ' + JSON.stringify(warnings));
208
208
  console.log('output table: output.csv');
209
209
  }
210
- rbql_csv.query_csv(user_query, 'input.csv', ',', 'quoted', 'output.csv', ',', 'quoted', 'utf-8', warnings).then(success_handler).catch(error_handler);
210
+ rbql.query_csv(user_query, 'input.csv', ',', 'quoted', 'output.csv', ',', 'quoted', 'utf-8', warnings).then(success_handler).catch(error_handler);
211
211
  ```
212
212
 
213
213
 
package/cli_parser.js CHANGED
File without changes
package/csv_utils.js CHANGED
@@ -39,6 +39,7 @@ function extract_next_field(src, dlm, preserve_quotes_and_whitespaces, allow_ext
39
39
 
40
40
 
41
41
  function split_quoted_str(src, dlm, preserve_quotes_and_whitespaces=false) {
42
+ // This function is newline-agnostic i.e. it can also split records with multiline fields.
42
43
  if (src.indexOf('"') == -1) // Optimization for most common case
43
44
  return [src.split(dlm), false];
44
45
  var result = [];
@@ -116,6 +117,41 @@ function smart_split(src, dlm, policy, preserve_quotes_and_whitespaces) {
116
117
  }
117
118
 
118
119
 
120
+ class MultilineRecordAggregator {
121
+ constructor(comment_prefix) {
122
+ this.comment_prefix = comment_prefix;
123
+ this.reset();
124
+ }
125
+ add_line(line_text) {
126
+ if (this.has_full_record || this.has_comment_line) {
127
+ throw new Error('Invalid usage - record aggregator must be reset before adding new lines');
128
+ }
129
+ if (this.comment_prefix && this.rfc_line_buffer.length == 0 && line_text.startsWith(this.comment_prefix)) {
130
+ this.has_comment_line = true;
131
+ return false;
132
+ }
133
+ let match_list = line_text.match(/"/g);
134
+ let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
135
+ this.rfc_line_buffer.push(line_text);
136
+ this.has_full_record = (!has_unbalanced_double_quote && this.rfc_line_buffer.length == 1) || (has_unbalanced_double_quote && this.rfc_line_buffer.length > 1);
137
+ return this.has_full_record;
138
+ }
139
+ is_inside_multiline_record() {
140
+ return this.rfc_line_buffer.length && !this.has_full_record;
141
+ }
142
+ get_full_line(line_separator) {
143
+ return this.rfc_line_buffer.join(line_separator);
144
+ }
145
+ get_num_lines_in_record() {
146
+ return this.rfc_line_buffer.length;
147
+ }
148
+ reset() {
149
+ this.rfc_line_buffer = [];
150
+ this.has_full_record = false;
151
+ this.has_comment_line = false;
152
+ }
153
+ }
154
+
119
155
 
120
156
  module.exports.split_quoted_str = split_quoted_str;
121
157
  module.exports.split_whitespace_separated_str = split_whitespace_separated_str;
@@ -125,3 +161,4 @@ module.exports.rfc_quote_field = rfc_quote_field;
125
161
  module.exports.unquote_field = unquote_field;
126
162
  module.exports.unquote_fields = unquote_fields;
127
163
  module.exports.split_lines = split_lines;
164
+ module.exports.MultilineRecordAggregator = MultilineRecordAggregator;
package/index.js CHANGED
File without changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rbql",
3
- "version": "0.25.0",
3
+ "version": "0.25.1",
4
4
  "description": "Rainbow Query Language",
5
5
  "keywords": ["CSV", "TSV", "spreadsheet", "SQL", "SQL-like", "transpiler", "CLI", "command-line", "library", "browser", "Node", "select", "update", "join"],
6
6
  "scripts": {
package/rbql.js CHANGED
@@ -70,7 +70,7 @@ var query_context = null; // Needs to be global for MIN(), MAX(), etc functions.
70
70
 
71
71
 
72
72
  const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs';
73
- const RBQL_VERSION = '0.25.0';
73
+ const RBQL_VERSION = '0.25.1';
74
74
 
75
75
 
76
76
  function check_if_brackets_match(opening_bracket, closing_bracket) {
@@ -1608,20 +1608,20 @@ function select_output_header(input_header, join_header, query_column_infos) {
1608
1608
  }
1609
1609
 
1610
1610
 
1611
- function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
1612
- let keys = Object.keys(inconsistent_records_info);
1613
- let entries = [];
1614
- for (let i = 0; i < keys.length; i++) {
1615
- let key = keys[i];
1616
- let record_id = inconsistent_records_info[key];
1617
- entries.push([record_id, key]);
1618
- }
1619
- entries.sort(function(a, b) { return a[0] - b[0]; });
1611
+ function sample_first_two_inconsistent_records(inconsistent_records_info) {
1612
+ let entries = Array.from(inconsistent_records_info.entries());
1613
+ entries.sort(function(a, b) { return a[1] - b[1]; });
1620
1614
  assert(entries.length > 1);
1621
- let [record_1, num_fields_1] = entries[0];
1622
- let [record_2, num_fields_2] = entries[1];
1615
+ let [num_fields_1, record_num_1] = entries[0];
1616
+ let [num_fields_2, record_num_2] = entries[1];
1617
+ return [record_num_1, num_fields_1, record_num_2, num_fields_2];
1618
+ }
1619
+
1620
+
1621
+ function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
1622
+ let [record_num_1, num_fields_1, record_num_2, num_fields_2] = sample_first_two_inconsistent_records(inconsistent_records_info);
1623
1623
  let warn_msg = `Number of fields in "${table_name}" table is not consistent: `;
1624
- warn_msg += `e.g. record ${record_1} -> ${num_fields_1} fields, record ${record_2} -> ${num_fields_2} fields`;
1624
+ warn_msg += `e.g. record ${record_num_1} -> ${num_fields_1} fields, record ${record_num_2} -> ${num_fields_2} fields`;
1625
1625
  return warn_msg;
1626
1626
  }
1627
1627
 
@@ -1691,7 +1691,7 @@ class TableIterator extends RBQLInputIterator {
1691
1691
  this.normalize_column_names = normalize_column_names;
1692
1692
  this.variable_prefix = variable_prefix;
1693
1693
  this.nr = 0;
1694
- this.fields_info = new Object();
1694
+ this.fields_info = new Map();
1695
1695
  this.stopped = false;
1696
1696
  }
1697
1697
 
@@ -1727,13 +1727,13 @@ class TableIterator extends RBQLInputIterator {
1727
1727
  let record = this.table[this.nr];
1728
1728
  this.nr += 1;
1729
1729
  let num_fields = record.length;
1730
- if (!this.fields_info.hasOwnProperty(num_fields))
1731
- this.fields_info[num_fields] = this.nr;
1730
+ if (!this.fields_info.has(num_fields))
1731
+ this.fields_info.set(num_fields, this.nr);
1732
1732
  return record;
1733
1733
  };
1734
1734
 
1735
1735
  get_warnings() {
1736
- if (Object.keys(this.fields_info).length > 1)
1736
+ if (this.fields_info.size > 1)
1737
1737
  return [make_inconsistent_num_fields_warning('input', this.fields_info)];
1738
1738
  return [];
1739
1739
  };
@@ -1840,6 +1840,9 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
1840
1840
  if (rb_actions.hasOwnProperty(SELECT)) {
1841
1841
  query_context.top_count = find_top(rb_actions);
1842
1842
  if (rb_actions.hasOwnProperty(EXCEPT)) {
1843
+ if (rb_actions.hasOwnProperty(JOIN)) {
1844
+ throw new RbqlParsingError('EXCEPT and JOIN are not allowed in the same query');
1845
+ }
1843
1846
  let [output_header, select_expression] = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals, input_header);
1844
1847
  query_context.select_expression = select_expression;
1845
1848
  query_context.writer.set_header(output_header);
@@ -1947,5 +1950,6 @@ exports.adhoc_parse_select_expression_to_column_infos = adhoc_parse_select_expre
1947
1950
  exports.replace_star_count = replace_star_count;
1948
1951
  exports.replace_star_vars_for_header_parsing = replace_star_vars_for_header_parsing;
1949
1952
  exports.select_output_header = select_output_header;
1953
+ exports.sample_first_two_inconsistent_records = sample_first_two_inconsistent_records;
1950
1954
 
1951
1955
  }(typeof exports === 'undefined' ? this.rbql = {} : exports));
package/rbql_csv.js CHANGED
@@ -62,19 +62,9 @@ function remove_utf8_bom(line, assumed_source_encoding) {
62
62
 
63
63
 
64
64
  function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
65
- let keys = Object.keys(inconsistent_records_info);
66
- let entries = [];
67
- for (let i = 0; i < keys.length; i++) {
68
- let key = keys[i];
69
- let record_id = inconsistent_records_info[key];
70
- entries.push([record_id, key]);
71
- }
72
- entries.sort(function(a, b) { return a[0] - b[0]; });
73
- assert(entries.length > 1);
74
- let [record_1, num_fields_1] = entries[0];
75
- let [record_2, num_fields_2] = entries[1];
65
+ let [record_num_1, num_fields_1, record_num_2, num_fields_2] = rbql.sample_first_two_inconsistent_records(inconsistent_records_info);
76
66
  let warn_msg = `Number of fields in "${table_name}" table is not consistent: `;
77
- warn_msg += `e.g. record ${record_1} -> ${num_fields_1} fields, record ${record_2} -> ${num_fields_2} fields`;
67
+ warn_msg += `e.g. record ${record_num_1} -> ${num_fields_1} fields, record ${record_num_2} -> ${num_fields_2} fields`;
78
68
  return warn_msg;
79
69
  }
80
70
 
@@ -182,7 +172,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
182
172
 
183
173
  this.table_name = table_name;
184
174
  this.variable_prefix = variable_prefix;
185
- this.comment_prefix = (comment_prefix !== null && comment_prefix.length) ? comment_prefix : null;
175
+ this.comment_prefix = comment_prefix;
186
176
 
187
177
  this.decoder = null;
188
178
  if (encoding == 'utf-8' && this.csv_path === null) {
@@ -204,11 +194,11 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
204
194
  this.utf8_bom_removed = false; // BOM doesn't get automatically removed by the decoder when utf-8 file is treated as latin-1
205
195
  this.first_defective_line = null;
206
196
 
207
- this.fields_info = new Object();
197
+ this.fields_info = new Map();
208
198
  this.NR = 0; // Record number
209
199
  this.NL = 0; // Line number (NL != NR when the CSV file has comments or multiline fields)
210
200
 
211
- this.rfc_line_buffer = [];
201
+ this.line_aggregator = new csv_utils.MultilineRecordAggregator(comment_prefix);
212
202
 
213
203
  this.partially_decoded_line = '';
214
204
  this.partially_decoded_line_ends_with_cr = false;
@@ -222,7 +212,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
222
212
 
223
213
  this.produced_records_queue = new RecordQueue();
224
214
 
225
- this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.process_record_line;
215
+ this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.process_record_line_simple;
226
216
  }
227
217
 
228
218
 
@@ -350,9 +340,14 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
350
340
  };
351
341
 
352
342
 
353
- process_record_line(line) {
354
- if (this.comment_prefix !== null && line.startsWith(this.comment_prefix))
343
+ process_record_line_simple(line) {
344
+ if (this.comment_prefix && line.startsWith(this.comment_prefix))
355
345
  return; // Just skip the line
346
+ this.process_record_line(line);
347
+ }
348
+
349
+
350
+ process_record_line(line) {
356
351
  this.NR += 1;
357
352
  var [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, false);
358
353
  if (warning) {
@@ -363,29 +358,20 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
363
358
  }
364
359
  }
365
360
  let num_fields = record.length;
366
- if (!this.fields_info.hasOwnProperty(num_fields))
367
- this.fields_info[num_fields] = this.NR;
361
+ if (!this.fields_info.has(num_fields))
362
+ this.fields_info.set(num_fields, this.NR);
368
363
  this.produced_records_queue.enqueue(record);
369
364
  this.try_resolve_next_record();
370
365
  };
371
366
 
372
367
 
373
368
  process_partial_rfc_record_line(line) {
374
- if (this.comment_prefix !== null && this.rfc_line_buffer.length == 0 && line.startsWith(this.comment_prefix))
375
- return; // Just skip the line
376
- let match_list = line.match(/"/g);
377
- let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
378
- if (this.rfc_line_buffer.length == 0 && !has_unbalanced_double_quote) {
379
- this.process_record_line(line);
380
- } else if (this.rfc_line_buffer.length == 0 && has_unbalanced_double_quote) {
381
- this.rfc_line_buffer.push(line);
382
- } else if (!has_unbalanced_double_quote) {
383
- this.rfc_line_buffer.push(line);
384
- } else {
385
- this.rfc_line_buffer.push(line);
386
- let multiline_row = this.rfc_line_buffer.join('\n');
387
- this.rfc_line_buffer = [];
388
- this.process_record_line(multiline_row);
369
+ this.line_aggregator.add_line(line);
370
+ if (this.line_aggregator.has_comment_line) {
371
+ this.line_aggregator.reset();
372
+ } else if (this.line_aggregator.has_full_record) {
373
+ this.process_record_line(this.line_aggregator.get_full_line('\n'));
374
+ this.line_aggregator.reset();
389
375
  }
390
376
  };
391
377
 
@@ -432,13 +418,13 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
432
418
  };
433
419
 
434
420
 
435
- process_data_bulk(data_chunk) {
436
- let decoded_string = data_chunk.toString(this.encoding);
421
+ process_data_bulk(data_blob) {
422
+ let decoded_string = data_blob.toString(this.encoding);
437
423
  if (this.encoding == 'utf-8') {
438
424
  // Using hacky comparison method from here: https://stackoverflow.com/a/32279283/2898283
439
425
  // TODO get rid of this once TextDecoder is really fixed or when alternative method of reliable decoding appears
440
426
  let control_buffer = Buffer.from(decoded_string, 'utf-8');
441
- if (Buffer.compare(data_chunk, control_buffer) != 0) {
427
+ if (Buffer.compare(data_blob, control_buffer) != 0) {
442
428
  this.store_or_propagate_exception(new RbqlIOHandlingError(utf_decoding_error));
443
429
  return;
444
430
  }
@@ -449,8 +435,8 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
449
435
  for (let i = 0; i < lines.length; i++) {
450
436
  this.process_line(lines[i]);
451
437
  }
452
- if (this.rfc_line_buffer.length > 0) {
453
- this.process_record_line(this.rfc_line_buffer.join('\n'));
438
+ if (this.line_aggregator.is_inside_multiline_record()) {
439
+ this.process_record_line(this.line_aggregator.get_full_line('\n'));
454
440
  }
455
441
  this.input_exhausted = true;
456
442
  this.try_resolve_next_record(); // Should be a NOOP here?
@@ -464,8 +450,8 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
464
450
  this.partially_decoded_line = '';
465
451
  this.process_line(last_line);
466
452
  }
467
- if (this.rfc_line_buffer.length > 0) {
468
- this.process_record_line(this.rfc_line_buffer.join('\n'));
453
+ if (this.line_aggregator.is_inside_multiline_record()) {
454
+ this.process_record_line(this.line_aggregator.get_full_line('\n'));
469
455
  }
470
456
  this.try_resolve_next_record();
471
457
  };
@@ -487,11 +473,11 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
487
473
  } else {
488
474
  let parent_iterator = this;
489
475
  return new Promise(function(resolve, reject) {
490
- fs.readFile(parent_iterator.csv_path, (err, data_chunk) => {
476
+ fs.readFile(parent_iterator.csv_path, (err, data_blob) => {
491
477
  if (err) {
492
478
  reject(err);
493
479
  } else {
494
- parent_iterator.process_data_bulk(data_chunk);
480
+ parent_iterator.process_data_bulk(data_blob);
495
481
  resolve();
496
482
  }
497
483
  });
@@ -506,8 +492,8 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
506
492
  result.push(`Inconsistent double quote escaping in ${this.table_name} table. E.g. at line ${this.first_defective_line}`);
507
493
  if (this.utf8_bom_removed)
508
494
  result.push(`UTF-8 Byte Order Mark (BOM) was found and skipped in ${this.table_name} table`);
509
- if (Object.keys(this.fields_info).length > 1)
510
- result.push(make_inconsistent_num_fields_warning('input', this.fields_info));
495
+ if (this.fields_info.size > 1)
496
+ result.push(make_inconsistent_num_fields_warning(this.table_name, this.fields_info));
511
497
  return result;
512
498
  };
513
499
  }
@@ -710,7 +696,6 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
710
696
  input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
711
697
  }
712
698
  let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
713
- // FIXME add on(error) handler to avoid async errors, see https://github.com/nodejs/node-v0.x-archive/issues/406
714
699
  if (input_delim == '"' && input_policy == 'quoted')
715
700
  throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
716
701
  if (csv_encoding == 'latin-1')