rbql 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/DEV_README.md ADDED
@@ -0,0 +1,4 @@
1
+ # Publishing the package to npm
2
+ 1. Run `git clean -fd` just in case.
3
+ 2. Run `npm publish`.
4
+
package/README.md CHANGED
@@ -302,7 +302,7 @@ _UPDATE_ query produces a new table where original values are replaced according
302
302
  ### Aggregate functions and queries
303
303
 
304
304
  RBQL supports the following aggregate functions, which can also be used with _GROUP BY_ keyword:
305
- _COUNT_, _ARRAY_AGG_, _MIN_, _MAX_, _SUM_, _AVG_, _VARIANCE_, _MEDIAN_
305
+ _COUNT_, _ARRAY_AGG_, _MIN_, _MAX_, _ANY_VALUE_, _SUM_, _AVG_, _VARIANCE_, _MEDIAN_
306
306
 
307
307
  Limitation: aggregate functions inside JavaScript expressions are not supported. Although you can use expressions inside aggregate functions.
308
308
  E.g. `MAX(float(a1) / 1000)` - valid; `MAX(a1) / 1000` - invalid.
package/cli_rbql.js CHANGED
@@ -133,9 +133,9 @@ async function sample_lines(table_path) {
133
133
  }
134
134
 
135
135
 
136
- async function sample_records(table_path, encoding, delim, policy) {
136
+ async function sample_records(table_path, encoding, delim, policy, comment_prefix, trim_whitespaces) {
137
137
  let table_stream = fs.createReadStream(table_path);
138
- let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy);
138
+ let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy, /*has_header=*/false, comment_prefix, 'input', 'a', trim_whitespaces);
139
139
  let sampled_records = await sampling_iterator.get_all_records(10);
140
140
  let warnings = sampling_iterator.get_warnings();
141
141
  return [sampled_records, warnings];
@@ -183,7 +183,7 @@ async function handle_query_success(warnings, output_path, encoding, delim, poli
183
183
  }
184
184
  }
185
185
  if (interactive_mode) {
186
- let [records, _warnings] = await sample_records(output_path, encoding, delim, policy);
186
+ let [records, _warnings] = await sample_records(output_path, encoding, delim, policy, /*comment_prefix=*/null, /*trim_whitespaces=*/false);
187
187
  console.log('\nOutput table preview:');
188
188
  console.log('====================================');
189
189
  print_colorized(records, delim, false, false);
@@ -210,6 +210,7 @@ async function run_with_js(args) {
210
210
  var csv_encoding = args['encoding'];
211
211
  var with_headers = args['with-headers'];
212
212
  var comment_prefix = args['comment-prefix'];
213
+ var trim_whitespaces = args['trim-spaces'];
213
214
  var output_delim = get_default(args, 'out-delim', null);
214
215
  var output_policy = get_default(args, 'out-policy', null);
215
216
  let init_source_file = get_default(args, 'init-source-file', null);
@@ -230,7 +231,7 @@ async function run_with_js(args) {
230
231
  // * This is CLI so no way we are in the Electron environment which can't use the TextDecoder
231
232
  // * Streaming mode works a little faster (since we don't need to do the manual validation)
232
233
  // TODO check if the current node installation doesn't have ICU enabled (which is typicaly provided by Node.js by default, see https://nodejs.org/api/intl.html) and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
233
- await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code/*, {'bulk_read': true}*/);
234
+ await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code, {'trim_whitespaces': trim_whitespaces});
234
235
  await handle_query_success(warnings, output_path, csv_encoding, output_delim, output_policy);
235
236
  return true;
236
237
  } catch (e) {
@@ -250,8 +251,8 @@ function get_default_output_path(input_path, delim) {
250
251
  }
251
252
 
252
253
 
253
- async function show_preview(input_path, encoding, delim, policy, with_headers) {
254
- let [records, warnings] = await sample_records(input_path, encoding, delim, policy);
254
+ async function show_preview(input_path, encoding, delim, policy, with_headers, comment_prefix, trim_whitespaces) {
255
+ let [records, warnings] = await sample_records(input_path, encoding, delim, policy, comment_prefix, trim_whitespaces);
255
256
  console.log('Input table preview:');
256
257
  console.log('====================================');
257
258
  print_colorized(records, delim, true, with_headers);
@@ -280,7 +281,7 @@ async function run_interactive_loop(args) {
280
281
  if (!delim)
281
282
  throw new GenericError('Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option');
282
283
  }
283
- await show_preview(input_path, args['encoding'], delim, policy, args['with-headers']);
284
+ await show_preview(input_path, args['encoding'], delim, policy, args['with-headers'], args['comment-prefix'], args['trim-spaces']);
284
285
  args.delim = delim;
285
286
  args.policy = policy;
286
287
  if (!args.output) {
@@ -368,6 +369,7 @@ function main() {
368
369
  '--with-headers': {'boolean': true, 'help': 'Indicates that input (and join) table has header'},
369
370
  '--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"', 'metavar': 'PREFIX'},
370
371
  '--encoding': {'default': 'utf-8', 'help': 'Manually set csv encoding', 'metavar': 'ENCODING'},
372
+ '--trim-spaces': {'boolean': true, 'help': 'Trim leading and trailing spaces from fields'},
371
373
  '--out-format': {'default': 'input', 'help': 'Output format. Supported values: ' + out_format_names.map(v => `"${v}"`).join(', '), 'metavar': 'FORMAT'},
372
374
  '--out-delim': {'help': 'Output delim. Use with "out-policy". Overrides out-format', 'metavar': 'DELIM'},
373
375
  '--out-policy': {'help': 'Output policy. Use with "out-delim". Overrides out-format', 'metavar': 'POLICY'},
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rbql",
3
- "version": "0.27.0",
3
+ "version": "0.28.0",
4
4
  "description": "Rainbow Query Language",
5
5
  "keywords": ["CSV", "TSV", "spreadsheet", "SQL", "SQL-like", "transpiler", "CLI", "command-line", "library", "browser", "Node", "select", "update", "join"],
6
6
  "scripts": {
package/rbql.js CHANGED
@@ -289,6 +289,24 @@ function parse_number(val) {
289
289
  }
290
290
 
291
291
 
292
+ class AnyValueAggregator {
293
+ constructor() {
294
+ this.stats = new Map();
295
+ }
296
+
297
+ increment(key, val) {
298
+ var cur_aggr = this.stats.get(key);
299
+ if (cur_aggr === undefined) {
300
+ this.stats.set(key, val);
301
+ }
302
+ }
303
+
304
+ get_final(key) {
305
+ return this.stats.get(key);
306
+ }
307
+ }
308
+
309
+
292
310
  class MinAggregator {
293
311
  constructor() {
294
312
  this.stats = new Map();
@@ -310,7 +328,6 @@ class MinAggregator {
310
328
  }
311
329
 
312
330
 
313
-
314
331
  class MaxAggregator {
315
332
  constructor() {
316
333
  this.stats = new Map();
@@ -514,6 +531,11 @@ function init_aggregator(generator_name, val, post_proc=null) {
514
531
  return res;
515
532
  }
516
533
 
534
+ function ANY_VALUE(val) {
535
+ return query_context.aggregation_stage < 2 ? init_aggregator(AnyValueAggregator, val) : val;
536
+ }
537
+ const any_value = ANY_VALUE;
538
+ const Any_value = ANY_VALUE;
517
539
 
518
540
  function MIN(val) {
519
541
  return query_context.aggregation_stage < 2 ? init_aggregator(MinAggregator, val) : val;
@@ -521,7 +543,6 @@ function MIN(val) {
521
543
  const min = MIN;
522
544
  const Min = MIN;
523
545
 
524
-
525
546
  function MAX(val) {
526
547
  return query_context.aggregation_stage < 2 ? init_aggregator(MaxAggregator, val) : val;
527
548
  }
@@ -1823,6 +1844,7 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
1823
1844
  if (rb_actions.hasOwnProperty(ORDER_BY) || rb_actions.hasOwnProperty(UPDATE))
1824
1845
  throw new RbqlParsingError('"ORDER BY", "UPDATE" and "DISTINCT" keywords are not allowed in aggregate queries');
1825
1846
  query_context.aggregation_key_expression = '[' + combine_string_literals(rb_actions[GROUP_BY]['text'], string_literals) + ']';
1847
+ query_context.aggregation_stage = 1;
1826
1848
  }
1827
1849
 
1828
1850
 
package/rbql_csv.js CHANGED
@@ -156,7 +156,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
156
156
  // CSVRecordIterator implements a typical async producer-consumer model with an internal buffer:
157
157
  // get_record() - consumer
158
158
  // stream.on('data') - producer
159
- constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
159
+ constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a', trim_whitespaces=false) {
160
160
  super();
161
161
  this.stream = stream;
162
162
  this.csv_path = csv_path;
@@ -173,6 +173,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
173
173
  this.table_name = table_name;
174
174
  this.variable_prefix = variable_prefix;
175
175
  this.comment_prefix = comment_prefix;
176
+ this.trim_whitespaces = trim_whitespaces;
176
177
 
177
178
  this.decoder = null;
178
179
  if (encoding == 'utf-8' && this.csv_path === null) {
@@ -350,6 +351,9 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
350
351
  process_record_line(line) {
351
352
  this.NR += 1;
352
353
  var [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, false);
354
+ if (this.trim_whitespaces) {
355
+ record = record.map((v) => v.trim());
356
+ }
353
357
  if (warning) {
354
358
  if (this.first_defective_line === null) {
355
359
  this.first_defective_line = this.NL;
@@ -675,7 +679,8 @@ class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
675
679
  } else {
676
680
  this.stream = fs.createReadStream(this.table_path);
677
681
  }
678
- this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
682
+ let trim_whitespaces = this.options && this.options['trim_whitespaces'] ? true : false;
683
+ this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b', trim_whitespaces);
679
684
  return this.record_iterator;
680
685
  };
681
686
 
@@ -695,6 +700,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
695
700
  } else {
696
701
  input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
697
702
  }
703
+ let trim_whitespaces = options && options['trim_whitespaces'] ? true : false;
698
704
  let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
699
705
  if (input_delim == '"' && input_policy == 'quoted')
700
706
  throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
@@ -711,7 +717,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
711
717
  }
712
718
  let input_file_dir = input_path ? path.dirname(input_path) : null;
713
719
  let join_tables_registry = new FileSystemCSVRegistry(input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
714
- let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
720
+ let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix, 'input', 'a', trim_whitespaces);
715
721
  let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
716
722
 
717
723
  await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);