npm - rbql - Versions diffs - 0.26.0 → 0.28.0 - Mend

rbql 0.26.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/DEV_README.md ADDED Viewed

@@ -0,0 +1,4 @@
+# Publishing the package to npm
+1. Run `git clean -fd` just in case.
+2. Run `npm publish`.

package/README.md CHANGED Viewed

@@ -302,7 +302,7 @@ _UPDATE_ query produces a new table where original values are replaced according
 ### Aggregate functions and queries
 RBQL supports the following aggregate functions, which can also be used with _GROUP BY_ keyword:
-_COUNT_, _ARRAY_AGG_, _MIN_, _MAX_, _SUM_, _AVG_, _VARIANCE_, _MEDIAN_
+_COUNT_, _ARRAY_AGG_, _MIN_, _MAX_, _ANY_VALUE_, _SUM_, _AVG_, _VARIANCE_, _MEDIAN_
 Limitation: aggregate functions inside JavaScript expressions are not supported. Although you can use expressions inside aggregate functions.
 E.g. `MAX(float(a1) / 1000)` - valid; `MAX(a1) / 1000` - invalid.

package/cli_rbql.js CHANGED Viewed

@@ -133,9 +133,9 @@ async function sample_lines(table_path) {
 }
-async function sample_records(table_path, encoding, delim, policy) {
+async function sample_records(table_path, encoding, delim, policy, comment_prefix, trim_whitespaces) {
     let table_stream = fs.createReadStream(table_path);
-    let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy);
+    let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy, /*has_header=*/false, comment_prefix, 'input', 'a', trim_whitespaces);
     let sampled_records = await sampling_iterator.get_all_records(10);
     let warnings = sampling_iterator.get_warnings();
     return [sampled_records, warnings];
@@ -183,7 +183,7 @@ async function handle_query_success(warnings, output_path, encoding, delim, poli
             }
         }
         if (interactive_mode) {
-            let [records, _warnings] = await sample_records(output_path, encoding, delim, policy);
+            let [records, _warnings] = await sample_records(output_path, encoding, delim, policy, /*comment_prefix=*/null, /*trim_whitespaces=*/false);
             console.log('\nOutput table preview:');
             console.log('====================================');
             print_colorized(records, delim, false, false);
@@ -210,6 +210,7 @@ async function run_with_js(args) {
     var csv_encoding = args['encoding'];
     var with_headers = args['with-headers'];
     var comment_prefix = args['comment-prefix'];
+    var trim_whitespaces = args['trim-spaces'];
     var output_delim = get_default(args, 'out-delim', null);
     var output_policy = get_default(args, 'out-policy', null);
     let init_source_file = get_default(args, 'init-source-file', null);
@@ -230,7 +231,7 @@ async function run_with_js(args) {
         // * This is CLI so no way we are in the Electron environment which can't use the TextDecoder
         // * Streaming mode works a little faster (since we don't need to do the manual validation)
         // TODO check if the current node installation doesn't have ICU enabled (which is typicaly provided by Node.js by default, see https://nodejs.org/api/intl.html) and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
-        await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code/*, {'bulk_read': true}*/);
+        await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code, {'trim_whitespaces': trim_whitespaces});
         await handle_query_success(warnings, output_path, csv_encoding, output_delim, output_policy);
         return true;
     } catch (e) {
@@ -250,8 +251,8 @@ function get_default_output_path(input_path, delim) {
 }
-async function show_preview(input_path, encoding, delim, policy, with_headers) {
-    let [records, warnings] = await sample_records(input_path, encoding, delim, policy);
+async function show_preview(input_path, encoding, delim, policy, with_headers, comment_prefix, trim_whitespaces) {
+    let [records, warnings] = await sample_records(input_path, encoding, delim, policy, comment_prefix, trim_whitespaces);
     console.log('Input table preview:');
     console.log('====================================');
     print_colorized(records, delim, true, with_headers);
@@ -280,7 +281,7 @@ async function run_interactive_loop(args) {
         if (!delim)
             throw new GenericError('Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option');
     }
-    await show_preview(input_path, args['encoding'], delim, policy, args['with-headers']);
+    await show_preview(input_path, args['encoding'], delim, policy, args['with-headers'], args['comment-prefix'], args['trim-spaces']);
     args.delim = delim;
     args.policy = policy;
     if (!args.output) {
@@ -368,6 +369,7 @@ function main() {
         '--with-headers': {'boolean': true, 'help': 'Indicates that input (and join) table has header'},
         '--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"', 'metavar': 'PREFIX'},
         '--encoding': {'default': 'utf-8', 'help': 'Manually set csv encoding', 'metavar': 'ENCODING'},
+        '--trim-spaces': {'boolean': true, 'help': 'Trim leading and trailing spaces from fields'},
         '--out-format': {'default': 'input', 'help': 'Output format. Supported values: ' + out_format_names.map(v => `"${v}"`).join(', '), 'metavar': 'FORMAT'},
         '--out-delim': {'help': 'Output delim. Use with "out-policy". Overrides out-format', 'metavar': 'DELIM'},
         '--out-policy': {'help': 'Output policy. Use with "out-delim". Overrides out-format', 'metavar': 'POLICY'},

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "rbql",
-  "version": "0.26.0",
+  "version": "0.28.0",
   "description": "Rainbow Query Language",
   "keywords": ["CSV", "TSV", "spreadsheet", "SQL", "SQL-like", "transpiler", "CLI", "command-line", "library", "browser", "Node", "select", "update", "join"],
   "scripts": {

package/rbql.js CHANGED Viewed

@@ -70,7 +70,7 @@ var query_context = null; // Needs to be global for MIN(), MAX(), etc functions.
 const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs';
-const RBQL_VERSION = '0.26.0';
+const RBQL_VERSION = '0.27.0';
 function check_if_brackets_match(opening_bracket, closing_bracket) {
@@ -130,30 +130,30 @@ function column_info_from_text_span(text_span, string_literals) {
     let subscript_str_match = /^([ab])\[___RBQL_STRING_LITERAL([0-9]+)___\]$/.exec(text_span);
     let as_alias_match = /^(.*) (as|AS) +([a-zA-Z][a-zA-Z0-9_]*) *$/.exec(text_span);
     if (as_alias_match !== null) {
-        return {table_name: null, column_index: null, column_name: as_alias_match[3], is_star: false, is_alias: true};
+        return {table_name: null, column_index: null, column_name: null, is_star: false, alias_name: as_alias_match[3]};
     }
     if (simple_var_match !== null) {
         if (text_span == rbql_star_marker)
-            return {table_name: null, column_index: null, column_name: null, is_star: true, is_alias: false};
+            return {table_name: null, column_index: null, column_name: null, is_star: true, alias_name: null};
         if (text_span.startsWith('___RBQL_STRING_LITERAL'))
             return null;
         let match = /^([ab])([0-9]+)$/.exec(text_span);
         if (match !== null) {
-            return {table_name: match[1], column_index: parseInt(match[2]) - 1, column_name: null, is_star: false, is_alias: false};
+            return {table_name: match[1], column_index: parseInt(match[2]) - 1, column_name: null, is_star: false, alias_name: null};
         }
         // Some examples for this branch: NR, NF
-        return {table_name: null, column_index: null, column_name: text_span, is_star: false, is_alias: false};
+        return {table_name: null, column_index: null, column_name: text_span, is_star: false, alias_name: null};
     } else if (attribute_match !== null) {
         let table_name = attribute_match[1];
         let column_name = attribute_match[2];
         if (column_name == rbql_star_marker) {
-            return {table_name: table_name, column_index: null, column_name: null, is_star: true, is_alias: false};
+            return {table_name: table_name, column_index: null, column_name: null, is_star: true, alias_name: null};
         }
-        return {table_name: null, column_index: null, column_name: column_name, is_star: false, is_alias: false};
+        return {table_name: null, column_index: null, column_name: column_name, is_star: false, alias_name: null};
     } else if (subscript_int_match != null) {
         let table_name = subscript_int_match[1];
         let column_index = parseInt(subscript_int_match[2]) - 1;
-        return {table_name: table_name, column_index: column_index, column_name: null, is_star: false, is_alias: false};
+        return {table_name: table_name, column_index: column_index, column_name: null, is_star: false, alias_name: null};
     } else if (subscript_str_match != null) {
         let table_name = subscript_str_match[1];
         let replaced_string_literal_id = subscript_str_match[2];
@@ -161,7 +161,7 @@ function column_info_from_text_span(text_span, string_literals) {
             let quoted_column_name = string_literals[replaced_string_literal_id];
             let unquoted_column_name = unquote_string(quoted_column_name);
             if (unquoted_column_name !== null && unquoted_column_name !== undefined) {
-                return {table_name: null, column_index: null, column_name: unquoted_column_name, is_star: false, is_alias: false};
+                return {table_name: null, column_index: null, column_name: unquoted_column_name, is_star: false, alias_name: null};
             }
         }
     }
@@ -289,6 +289,24 @@ function parse_number(val) {
 }
+class AnyValueAggregator {
+    constructor() {
+        this.stats = new Map();
+    }
+    increment(key, val) {
+        var cur_aggr = this.stats.get(key);
+        if (cur_aggr === undefined) {
+            this.stats.set(key, val);
+        }
+    }
+    get_final(key) {
+        return this.stats.get(key);
+    }
+}
 class MinAggregator {
     constructor() {
         this.stats = new Map();
@@ -310,7 +328,6 @@ class MinAggregator {
 }
 class MaxAggregator {
     constructor() {
         this.stats = new Map();
@@ -514,6 +531,11 @@ function init_aggregator(generator_name, val, post_proc=null) {
     return res;
 }
+function ANY_VALUE(val) {
+    return query_context.aggregation_stage < 2 ? init_aggregator(AnyValueAggregator, val) : val;
+}
+const any_value = ANY_VALUE;
+const Any_value = ANY_VALUE;
 function MIN(val) {
     return query_context.aggregation_stage < 2 ? init_aggregator(MinAggregator, val) : val;
@@ -521,7 +543,6 @@ function MIN(val) {
 const min = MIN;
 const Min = MIN;
 function MAX(val) {
     return query_context.aggregation_stage < 2 ? init_aggregator(MaxAggregator, val) : val;
 }
@@ -1259,8 +1280,8 @@ function generate_init_statements(query_text, variables_map, join_variables_map,
 function replace_star_count(aggregate_expression) {
-    var rgx = /(^|,) *COUNT\( *\* *\) *(?:$|(?=,))/ig;
-    var result = aggregate_expression.replace(rgx, '$1 COUNT(1)');
+    var rgx = /(?:(?<=^)|(?<=,)) *COUNT\( *\* *\)/ig;
+    var result = aggregate_expression.replace(rgx, ' COUNT(1)');
     return str_strip(result);
 }
@@ -1580,13 +1601,22 @@ function select_output_header(input_header, join_header, query_column_infos) {
     if (input_header === null) {
         assert(join_header === null);
     }
+    let query_has_star = false;
+    let query_has_column_alias = false;
+    for (let qci of query_column_infos) {
+        query_has_star = query_has_star || (qci !== null && qci.is_star);
+        query_has_column_alias = query_has_column_alias || (qci !== null && qci.alias_name !== null);
+    }
     if (input_header === null) {
-        for (let qci of query_column_infos) {
-            if (qci !== null && qci.is_alias) {
-                throw new RbqlParsingError(`Specifying column alias "AS ${qci.column_name}" is not allowed if input table has no header`);
-            }
+        if (query_has_star && query_has_column_alias) {
+            throw new RbqlParsingError('Using both * (star) and AS alias in the same query is not allowed for input tables without header');
         }
-        return null;
+        if (!query_has_column_alias) {
+            // Input table has no header and query has no aliases therefore the output table will be without header.
+            return null;
+        }
+        input_header = [];
+        join_header = [];
     }
     if (join_header === null) {
         // This means there is no JOIN table.
@@ -1607,6 +1637,8 @@ function select_output_header(input_header, join_header, query_column_infos) {
             }
         } else if (qci.column_name !== null) {
             output_header.push(qci.column_name);
+        } else if (qci.alias_name !== null) {
+            output_header.push(qci.alias_name);
         } else if (qci.column_index !== null) {
             if (qci.table_name == 'a' && qci.column_index < input_header.length) {
                 output_header.push(input_header[qci.column_index]);
@@ -1812,6 +1844,7 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
         if (rb_actions.hasOwnProperty(ORDER_BY) || rb_actions.hasOwnProperty(UPDATE))
             throw new RbqlParsingError('"ORDER BY", "UPDATE" and "DISTINCT" keywords are not allowed in aggregate queries');
         query_context.aggregation_key_expression = '[' + combine_string_literals(rb_actions[GROUP_BY]['text'], string_literals) + ']';
+        query_context.aggregation_stage = 1;
     }

package/rbql_csv.js CHANGED Viewed

@@ -156,7 +156,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
     // CSVRecordIterator implements a typical async producer-consumer model with an internal buffer:
     // get_record() - consumer
     // stream.on('data') - producer
-    constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
+    constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a', trim_whitespaces=false) {
         super();
         this.stream = stream;
         this.csv_path = csv_path;
@@ -173,6 +173,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
         this.table_name = table_name;
         this.variable_prefix = variable_prefix;
         this.comment_prefix = comment_prefix;
+        this.trim_whitespaces = trim_whitespaces;
         this.decoder = null;
         if (encoding == 'utf-8' && this.csv_path === null) {
@@ -350,6 +351,9 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
     process_record_line(line) {
         this.NR += 1;
         var [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, false);
+        if (this.trim_whitespaces) {
+            record = record.map((v) => v.trim());
+        }
         if (warning) {
             if (this.first_defective_line === null) {
                 this.first_defective_line = this.NL;
@@ -675,7 +679,8 @@ class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
         } else {
             this.stream = fs.createReadStream(this.table_path);
         }
-        this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
+        let trim_whitespaces = this.options && this.options['trim_whitespaces'] ? true : false;
+        this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b', trim_whitespaces);
         return this.record_iterator;
     };
@@ -695,6 +700,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
     } else {
         input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
     }
+    let trim_whitespaces = options && options['trim_whitespaces'] ? true : false;
     let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
     if (input_delim == '"' && input_policy == 'quoted')
         throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
@@ -711,7 +717,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
     }
     let input_file_dir = input_path ? path.dirname(input_path) : null;
     let join_tables_registry = new FileSystemCSVRegistry(input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
-    let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
+    let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix, 'input', 'a', trim_whitespaces);
     let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
     await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);