npm - rbql - Versions diffs - 0.27.0 → 0.28.0 - Mend

rbql 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/DEV_README.md ADDED Viewed

@@ -0,0 +1,4 @@
+# Publishing the package to npm
+1. Run `git clean -fd` just in case.
+2. Run `npm publish`.

package/README.md CHANGED Viewed

@@ -302,7 +302,7 @@ _UPDATE_ query produces a new table where original values are replaced according
 ### Aggregate functions and queries
 RBQL supports the following aggregate functions, which can also be used with _GROUP BY_ keyword:
-_COUNT_, _ARRAY_AGG_, _MIN_, _MAX_, _SUM_, _AVG_, _VARIANCE_, _MEDIAN_
+_COUNT_, _ARRAY_AGG_, _MIN_, _MAX_, _ANY_VALUE_, _SUM_, _AVG_, _VARIANCE_, _MEDIAN_
 Limitation: aggregate functions inside JavaScript expressions are not supported. Although you can use expressions inside aggregate functions.
 E.g. `MAX(float(a1) / 1000)` - valid; `MAX(a1) / 1000` - invalid.

package/cli_rbql.js CHANGED Viewed

@@ -133,9 +133,9 @@ async function sample_lines(table_path) {
 }
-async function sample_records(table_path, encoding, delim, policy) {
+async function sample_records(table_path, encoding, delim, policy, comment_prefix, trim_whitespaces) {
     let table_stream = fs.createReadStream(table_path);
-    let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy);
+    let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy, /*has_header=*/false, comment_prefix, 'input', 'a', trim_whitespaces);
     let sampled_records = await sampling_iterator.get_all_records(10);
     let warnings = sampling_iterator.get_warnings();
     return [sampled_records, warnings];
@@ -183,7 +183,7 @@ async function handle_query_success(warnings, output_path, encoding, delim, poli
             }
         }
         if (interactive_mode) {
-            let [records, _warnings] = await sample_records(output_path, encoding, delim, policy);
+            let [records, _warnings] = await sample_records(output_path, encoding, delim, policy, /*comment_prefix=*/null, /*trim_whitespaces=*/false);
             console.log('\nOutput table preview:');
             console.log('====================================');
             print_colorized(records, delim, false, false);
@@ -210,6 +210,7 @@ async function run_with_js(args) {
     var csv_encoding = args['encoding'];
     var with_headers = args['with-headers'];
     var comment_prefix = args['comment-prefix'];
+    var trim_whitespaces = args['trim-spaces'];
     var output_delim = get_default(args, 'out-delim', null);
     var output_policy = get_default(args, 'out-policy', null);
     let init_source_file = get_default(args, 'init-source-file', null);
@@ -230,7 +231,7 @@ async function run_with_js(args) {
         // * This is CLI so no way we are in the Electron environment which can't use the TextDecoder
         // * Streaming mode works a little faster (since we don't need to do the manual validation)
         // TODO check if the current node installation doesn't have ICU enabled (which is typicaly provided by Node.js by default, see https://nodejs.org/api/intl.html) and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
-        await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code/*, {'bulk_read': true}*/);
+        await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code, {'trim_whitespaces': trim_whitespaces});
         await handle_query_success(warnings, output_path, csv_encoding, output_delim, output_policy);
         return true;
     } catch (e) {
@@ -250,8 +251,8 @@ function get_default_output_path(input_path, delim) {
 }
-async function show_preview(input_path, encoding, delim, policy, with_headers) {
-    let [records, warnings] = await sample_records(input_path, encoding, delim, policy);
+async function show_preview(input_path, encoding, delim, policy, with_headers, comment_prefix, trim_whitespaces) {
+    let [records, warnings] = await sample_records(input_path, encoding, delim, policy, comment_prefix, trim_whitespaces);
     console.log('Input table preview:');
     console.log('====================================');
     print_colorized(records, delim, true, with_headers);
@@ -280,7 +281,7 @@ async function run_interactive_loop(args) {
         if (!delim)
             throw new GenericError('Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option');
     }
-    await show_preview(input_path, args['encoding'], delim, policy, args['with-headers']);
+    await show_preview(input_path, args['encoding'], delim, policy, args['with-headers'], args['comment-prefix'], args['trim-spaces']);
     args.delim = delim;
     args.policy = policy;
     if (!args.output) {
@@ -368,6 +369,7 @@ function main() {
         '--with-headers': {'boolean': true, 'help': 'Indicates that input (and join) table has header'},
         '--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"', 'metavar': 'PREFIX'},
         '--encoding': {'default': 'utf-8', 'help': 'Manually set csv encoding', 'metavar': 'ENCODING'},
+        '--trim-spaces': {'boolean': true, 'help': 'Trim leading and trailing spaces from fields'},
         '--out-format': {'default': 'input', 'help': 'Output format. Supported values: ' + out_format_names.map(v => `"${v}"`).join(', '), 'metavar': 'FORMAT'},
         '--out-delim': {'help': 'Output delim. Use with "out-policy". Overrides out-format', 'metavar': 'DELIM'},
         '--out-policy': {'help': 'Output policy. Use with "out-delim". Overrides out-format', 'metavar': 'POLICY'},

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "rbql",
-  "version": "0.27.0",
+  "version": "0.28.0",
   "description": "Rainbow Query Language",
   "keywords": ["CSV", "TSV", "spreadsheet", "SQL", "SQL-like", "transpiler", "CLI", "command-line", "library", "browser", "Node", "select", "update", "join"],
   "scripts": {

package/rbql.js CHANGED Viewed

@@ -289,6 +289,24 @@ function parse_number(val) {
 }
+class AnyValueAggregator {
+    constructor() {
+        this.stats = new Map();
+    }
+    increment(key, val) {
+        var cur_aggr = this.stats.get(key);
+        if (cur_aggr === undefined) {
+            this.stats.set(key, val);
+        }
+    }
+    get_final(key) {
+        return this.stats.get(key);
+    }
+}
 class MinAggregator {
     constructor() {
         this.stats = new Map();
@@ -310,7 +328,6 @@ class MinAggregator {
 }
 class MaxAggregator {
     constructor() {
         this.stats = new Map();
@@ -514,6 +531,11 @@ function init_aggregator(generator_name, val, post_proc=null) {
     return res;
 }
+function ANY_VALUE(val) {
+    return query_context.aggregation_stage < 2 ? init_aggregator(AnyValueAggregator, val) : val;
+}
+const any_value = ANY_VALUE;
+const Any_value = ANY_VALUE;
 function MIN(val) {
     return query_context.aggregation_stage < 2 ? init_aggregator(MinAggregator, val) : val;
@@ -521,7 +543,6 @@ function MIN(val) {
 const min = MIN;
 const Min = MIN;
 function MAX(val) {
     return query_context.aggregation_stage < 2 ? init_aggregator(MaxAggregator, val) : val;
 }
@@ -1823,6 +1844,7 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
         if (rb_actions.hasOwnProperty(ORDER_BY) || rb_actions.hasOwnProperty(UPDATE))
             throw new RbqlParsingError('"ORDER BY", "UPDATE" and "DISTINCT" keywords are not allowed in aggregate queries');
         query_context.aggregation_key_expression = '[' + combine_string_literals(rb_actions[GROUP_BY]['text'], string_literals) + ']';
+        query_context.aggregation_stage = 1;
     }

package/rbql_csv.js CHANGED Viewed

@@ -156,7 +156,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
     // CSVRecordIterator implements a typical async producer-consumer model with an internal buffer:
     // get_record() - consumer
     // stream.on('data') - producer
-    constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
+    constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a', trim_whitespaces=false) {
         super();
         this.stream = stream;
         this.csv_path = csv_path;
@@ -173,6 +173,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
         this.table_name = table_name;
         this.variable_prefix = variable_prefix;
         this.comment_prefix = comment_prefix;
+        this.trim_whitespaces = trim_whitespaces;
         this.decoder = null;
         if (encoding == 'utf-8' && this.csv_path === null) {
@@ -350,6 +351,9 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
     process_record_line(line) {
         this.NR += 1;
         var [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, false);
+        if (this.trim_whitespaces) {
+            record = record.map((v) => v.trim());
+        }
         if (warning) {
             if (this.first_defective_line === null) {
                 this.first_defective_line = this.NL;
@@ -675,7 +679,8 @@ class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
         } else {
             this.stream = fs.createReadStream(this.table_path);
         }
-        this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
+        let trim_whitespaces = this.options && this.options['trim_whitespaces'] ? true : false;
+        this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b', trim_whitespaces);
         return this.record_iterator;
     };
@@ -695,6 +700,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
     } else {
         input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
     }
+    let trim_whitespaces = options && options['trim_whitespaces'] ? true : false;
     let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
     if (input_delim == '"' && input_policy == 'quoted')
         throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
@@ -711,7 +717,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
     }
     let input_file_dir = input_path ? path.dirname(input_path) : null;
     let join_tables_registry = new FileSystemCSVRegistry(input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
-    let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
+    let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix, 'input', 'a', trim_whitespaces);
     let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
     await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);