rbql 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/DEV_README.md CHANGED
@@ -1,4 +1,8 @@
1
1
  # Publishing the package to npm
2
- 1. Run `git clean -fd` just in case.
3
- 2. Run `npm publish`.
2
+ 1. Update version in package.json. Make sure it is synced with unit tests js version - run the unit tests. Python and JS version don't have to be in sync!
3
+ 2. Run `git clean -fd` just in case.
4
+ 3. Run `npm publish`.
5
+
6
+ Note: you need to be authorize in order to publish so in the new system you might need to run `npm adduser` first,
7
+ if you run `npm publish` without authorizing it would prompt you to do it anyway, so no big deal.
4
8
 
package/README.md CHANGED
@@ -219,11 +219,14 @@ You can also check rbql-js cli app code as a usage example: [rbql-js cli source
219
219
 
220
220
 
221
221
  ### Installation:
222
- To use RBQL as CLI app you need to install it in global (-g) mode:
222
+ To use RBQL as CLI app you can install it in global (-g) mode:
223
223
  ```
224
224
  $ npm install -g rbql
225
225
  ```
226
226
 
227
+ RBQL can also be installed locally with `$ npm install rbql`, but then you would have to run it with `$ npx rbql-js ...` instead of `$ rbql-js ...`.
228
+
229
+
227
230
  ### Usage (non-interactive mode):
228
231
 
229
232
  ```
@@ -339,6 +342,13 @@ But it is also possible to override this selection directly in the query by addi
339
342
  Example: `select top 5 NR, * with (header)`
340
343
 
341
344
 
345
+ ### Pipe syntax for query chaining
346
+ You can chain consecutive queries via pipe `|` syntax. Example:
347
+ ```
348
+ SELECT a2 AS region, count(*) AS cnt GROUP BY a2 | SELECT * ORDER BY a.cnt DESC
349
+ ```
350
+
351
+
342
352
  ### User Defined Functions (UDF)
343
353
 
344
354
  RBQL supports User Defined Functions
package/cli_rbql.js CHANGED
@@ -16,6 +16,7 @@ var interactive_mode = false;
16
16
 
17
17
  // TODO implement colored output like in Python version
18
18
  // TODO implement query history like in Python version. "readline" modules allows to do that, see "completer" parameter.
19
+ // TODO switch to built-in node util parseArgs module (added in 2022)
19
20
 
20
21
  // FIXME test readline on Win: disable interactive mode?
21
22
 
@@ -133,9 +134,9 @@ async function sample_lines(table_path) {
133
134
  }
134
135
 
135
136
 
136
- async function sample_records(table_path, encoding, delim, policy, comment_prefix, trim_whitespaces) {
137
+ async function sample_records(table_path, encoding, delim, policy, comment_prefix, trim_whitespaces, comment_regex) {
137
138
  let table_stream = fs.createReadStream(table_path);
138
- let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy, /*has_header=*/false, comment_prefix, 'input', 'a', trim_whitespaces);
139
+ let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy, /*has_header=*/false, comment_prefix, 'input', 'a', trim_whitespaces, comment_regex);
139
140
  let sampled_records = await sampling_iterator.get_all_records(10);
140
141
  let warnings = sampling_iterator.get_warnings();
141
142
  return [sampled_records, warnings];
@@ -183,7 +184,7 @@ async function handle_query_success(warnings, output_path, encoding, delim, poli
183
184
  }
184
185
  }
185
186
  if (interactive_mode) {
186
- let [records, _warnings] = await sample_records(output_path, encoding, delim, policy, /*comment_prefix=*/null, /*trim_whitespaces=*/false);
187
+ let [records, _warnings] = await sample_records(output_path, encoding, delim, policy, /*comment_prefix=*/null, /*trim_whitespaces=*/false, /*comment_regex=*/null);
187
188
  console.log('\nOutput table preview:');
188
189
  console.log('====================================');
189
190
  print_colorized(records, delim, false, false);
@@ -210,6 +211,7 @@ async function run_with_js(args) {
210
211
  var csv_encoding = args['encoding'];
211
212
  var with_headers = args['with-headers'];
212
213
  var comment_prefix = args['comment-prefix'];
214
+ var comment_regex = args['comment-regex'];
213
215
  var trim_whitespaces = args['trim-spaces'];
214
216
  var output_delim = get_default(args, 'out-delim', null);
215
217
  var output_policy = get_default(args, 'out-policy', null);
@@ -231,7 +233,7 @@ async function run_with_js(args) {
231
233
  // * This is CLI so no way we are in the Electron environment which can't use the TextDecoder
232
234
  // * Streaming mode works a little faster (since we don't need to do the manual validation)
233
235
  // TODO check if the current node installation doesn't have ICU enabled (which is typicaly provided by Node.js by default, see https://nodejs.org/api/intl.html) and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
234
- await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code, {'trim_whitespaces': trim_whitespaces});
236
+ await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code, {'trim_whitespaces': trim_whitespaces, 'comment_regex': comment_regex});
235
237
  await handle_query_success(warnings, output_path, csv_encoding, output_delim, output_policy);
236
238
  return true;
237
239
  } catch (e) {
@@ -251,8 +253,8 @@ function get_default_output_path(input_path, delim) {
251
253
  }
252
254
 
253
255
 
254
- async function show_preview(input_path, encoding, delim, policy, with_headers, comment_prefix, trim_whitespaces) {
255
- let [records, warnings] = await sample_records(input_path, encoding, delim, policy, comment_prefix, trim_whitespaces);
256
+ async function show_preview(input_path, encoding, delim, policy, with_headers, comment_prefix, trim_whitespaces, comment_regex) {
257
+ let [records, warnings] = await sample_records(input_path, encoding, delim, policy, comment_prefix, trim_whitespaces, comment_regex);
256
258
  console.log('Input table preview:');
257
259
  console.log('====================================');
258
260
  print_colorized(records, delim, true, with_headers);
@@ -281,7 +283,7 @@ async function run_interactive_loop(args) {
281
283
  if (!delim)
282
284
  throw new GenericError('Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option');
283
285
  }
284
- await show_preview(input_path, args['encoding'], delim, policy, args['with-headers'], args['comment-prefix'], args['trim-spaces']);
286
+ await show_preview(input_path, args['encoding'], delim, policy, args['with-headers'], args['comment-prefix'], args['trim-spaces'], args['comment-regex']);
285
287
  args.delim = delim;
286
288
  args.policy = policy;
287
289
  if (!args.output) {
@@ -367,7 +369,8 @@ function main() {
367
369
  '--delim': {'help': 'Delimiter character or multicharacter string, e.g. "," or "###". Can be autodetected in interactive mode', 'metavar': 'DELIM'},
368
370
  '--policy': {'help': 'Split policy, see the explanation below. Supported values: "simple", "quoted", "quoted_rfc", "whitespace", "monocolumn". Can be autodetected in interactive mode', 'metavar': 'POLICY'},
369
371
  '--with-headers': {'boolean': true, 'help': 'Indicates that input (and join) table has header'},
370
- '--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"', 'metavar': 'PREFIX'},
372
+ '--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#"', 'metavar': 'PREFIX'},
373
+ '--comment-regex': {'help': 'Ignore lines in input and join tables that contain the comment REGEX.', 'metavar': 'REGEX'},
371
374
  '--encoding': {'default': 'utf-8', 'help': 'Manually set csv encoding', 'metavar': 'ENCODING'},
372
375
  '--trim-spaces': {'boolean': true, 'help': 'Trim leading and trailing spaces from fields'},
373
376
  '--out-format': {'default': 'input', 'help': 'Output format. Supported values: ' + out_format_names.map(v => `"${v}"`).join(', '), 'metavar': 'FORMAT'},
package/csv_utils.js CHANGED
@@ -118,8 +118,9 @@ function smart_split(src, dlm, policy, preserve_quotes_and_whitespaces) {
118
118
 
119
119
 
120
120
  class MultilineRecordAggregator {
121
- constructor(comment_prefix) {
121
+ constructor(comment_prefix, comment_regex) {
122
122
  this.comment_prefix = comment_prefix;
123
+ this.comment_regex = comment_regex;
123
124
  this.reset();
124
125
  }
125
126
  add_line(line_text) {
@@ -130,6 +131,10 @@ class MultilineRecordAggregator {
130
131
  this.has_comment_line = true;
131
132
  return false;
132
133
  }
134
+ if (this.comment_regex && this.rfc_line_buffer.length == 0 && line_text.search(this.comment_regex) != -1) {
135
+ this.has_comment_line = true;
136
+ return false;
137
+ }
133
138
  let match_list = line_text.match(/"/g);
134
139
  let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
135
140
  this.rfc_line_buffer.push(line_text);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rbql",
3
- "version": "0.28.0",
3
+ "version": "0.29.0",
4
4
  "description": "Rainbow Query Language",
5
5
  "keywords": ["CSV", "TSV", "spreadsheet", "SQL", "SQL-like", "transpiler", "CLI", "command-line", "library", "browser", "Node", "select", "update", "join"],
6
6
  "scripts": {
package/rbql.js CHANGED
@@ -70,7 +70,7 @@ var query_context = null; // Needs to be global for MIN(), MAX(), etc functions.
70
70
 
71
71
 
72
72
  const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs';
73
- const RBQL_VERSION = '0.27.0';
73
+ const RBQL_VERSION = '0.29.0';
74
74
 
75
75
 
76
76
  function check_if_brackets_match(opening_bracket, closing_bracket) {
@@ -1796,6 +1796,7 @@ class TableWriter extends RBQLOutputWriter {
1796
1796
  super();
1797
1797
  this.table = external_table;
1798
1798
  this.header = null;
1799
+ this.finished = false;
1799
1800
  }
1800
1801
 
1801
1802
  async write(fields) {
@@ -1806,6 +1807,33 @@ class TableWriter extends RBQLOutputWriter {
1806
1807
  set_header(header) {
1807
1808
  this.header = header;
1808
1809
  }
1810
+
1811
+ async finish() {
1812
+ this.finished = true;
1813
+ }
1814
+ }
1815
+
1816
+
1817
+ class TablePipe {
1818
+ constructor() {
1819
+ this.table = [];
1820
+ this.writer = new TableWriter(this.table);
1821
+ this.iterator = null;
1822
+ }
1823
+
1824
+ get_writer() {
1825
+ return this.writer;
1826
+ }
1827
+
1828
+ get_iterator() {
1829
+ if (!this.writer.finished) {
1830
+ throw new RbqlIOHandlingError("Trying to read from non-thread-safe table pipe while not finishing writing yet");
1831
+ }
1832
+ if (this.iterator === null) {
1833
+ this.iterator = new TableIterator(this.table, this.writer.header);
1834
+ }
1835
+ return this.iterator;
1836
+ }
1809
1837
  }
1810
1838
 
1811
1839
 
@@ -1925,7 +1953,12 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
1925
1953
  }
1926
1954
 
1927
1955
 
1928
- async function query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry=null, user_init_code='') {
1956
+ function split_query_to_stages(query_text) {
1957
+ return query_text.split(/\|[>]?[ ]*(?=(?:select|update)[ ])/i);
1958
+ }
1959
+
1960
+
1961
+ async function staged_query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code) {
1929
1962
  query_context = new RBQLContext(query_text, input_iterator, output_writer, user_init_code);
1930
1963
  await shallow_parse_input_query(query_text, input_iterator, join_tables_registry, query_context);
1931
1964
  await compile_and_run(query_context);
@@ -1937,6 +1970,20 @@ async function query(query_text, input_iterator, output_writer, output_warnings,
1937
1970
  }
1938
1971
 
1939
1972
 
1973
+ async function query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry=null, user_init_code='') {
1974
+ let query_stages = split_query_to_stages(query_text);
1975
+ let previous_pipe = null;
1976
+ for (let i = 0; i < query_stages.length; i++) {
1977
+ let query_stage_text = query_stages[i];
1978
+ let output_pipe = i + 1 < query_stages.length ? new TablePipe() : null;
1979
+ let stage_iterator = previous_pipe === null ? input_iterator : previous_pipe.get_iterator();
1980
+ let stage_writer = output_pipe === null ? output_writer : output_pipe.get_writer();
1981
+ await staged_query(query_stage_text, stage_iterator, stage_writer, output_warnings, join_tables_registry, user_init_code);
1982
+ previous_pipe = output_pipe;
1983
+ }
1984
+ }
1985
+
1986
+
1940
1987
  async function query_table(query_text, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, output_column_names=null, normalize_column_names=true, user_init_code='') {
1941
1988
  if (!normalize_column_names && input_column_names !== null && join_column_names !== null)
1942
1989
  ensure_no_ambiguous_variables(query_text, input_column_names, join_column_names);
package/rbql_csv.js CHANGED
@@ -156,7 +156,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
156
156
  // CSVRecordIterator implements a typical async producer-consumer model with an internal buffer:
157
157
  // get_record() - consumer
158
158
  // stream.on('data') - producer
159
- constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a', trim_whitespaces=false) {
159
+ constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a', trim_whitespaces=false, comment_regex=null) {
160
160
  super();
161
161
  this.stream = stream;
162
162
  this.csv_path = csv_path;
@@ -173,6 +173,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
173
173
  this.table_name = table_name;
174
174
  this.variable_prefix = variable_prefix;
175
175
  this.comment_prefix = comment_prefix;
176
+ this.comment_regex = comment_regex;
176
177
  this.trim_whitespaces = trim_whitespaces;
177
178
 
178
179
  this.decoder = null;
@@ -199,7 +200,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
199
200
  this.NR = 0; // Record number
200
201
  this.NL = 0; // Line number (NL != NR when the CSV file has comments or multiline fields)
201
202
 
202
- this.line_aggregator = new csv_utils.MultilineRecordAggregator(comment_prefix);
203
+ this.line_aggregator = new csv_utils.MultilineRecordAggregator(comment_prefix, comment_regex);
203
204
 
204
205
  this.partially_decoded_line = '';
205
206
  this.partially_decoded_line_ends_with_cr = false;
@@ -344,6 +345,8 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
344
345
  process_record_line_simple(line) {
345
346
  if (this.comment_prefix && line.startsWith(this.comment_prefix))
346
347
  return; // Just skip the line
348
+ if (this.comment_regex && line.search(this.comment_regex) != -1)
349
+ return; // Just skip the line
347
350
  this.process_record_line(line);
348
351
  }
349
352
 
@@ -680,7 +683,8 @@ class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
680
683
  this.stream = fs.createReadStream(this.table_path);
681
684
  }
682
685
  let trim_whitespaces = this.options && this.options['trim_whitespaces'] ? true : false;
683
- this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b', trim_whitespaces);
686
+ let comment_regex = this.options && this.options.hasOwnProperty('comment_regex') ? this.options['comment_regex'] : null;
687
+ this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b', trim_whitespaces, comment_regex);
684
688
  return this.record_iterator;
685
689
  };
686
690
 
@@ -701,6 +705,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
701
705
  input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
702
706
  }
703
707
  let trim_whitespaces = options && options['trim_whitespaces'] ? true : false;
708
+ let comment_regex = options && options.hasOwnProperty('comment_regex') ? options['comment_regex'] : null;
704
709
  let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
705
710
  if (input_delim == '"' && input_policy == 'quoted')
706
711
  throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
@@ -717,7 +722,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
717
722
  }
718
723
  let input_file_dir = input_path ? path.dirname(input_path) : null;
719
724
  let join_tables_registry = new FileSystemCSVRegistry(input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
720
- let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix, 'input', 'a', trim_whitespaces);
725
+ let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix, 'input', 'a', trim_whitespaces, comment_regex);
721
726
  let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
722
727
 
723
728
  await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);