npm - rbql - Versions diffs - 0.19.3 → 0.25.0 - Mend

rbql 0.19.3 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -32,7 +32,7 @@ The following two functions are avilable in the browser version:
 Run user query against input array of records and put the result set in the output array:
 ```
-async function query_table(user_query, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, normalize_column_names=true)
+async function query_table(user_query, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, output_column_names=null, normalize_column_names=true)
 ```
 #### Parameters:
@@ -51,6 +51,8 @@ async function query_table(user_query, input_table, output_table, output_warning
   Names of _input_table_ columns which users of the app can use in their queries
 * _join_column_names_: **array**
   Names of _join_table_ columns which users of the app can use in their queries
+* _output_column_names_: **array**
+  Output column names will be stored in this array after the query completion.
 * _normalize_column_names_: **boolean**
   If set to true - column names provided with _input_column_names_ and _join_column_names_ will be normalized to "a" and "b" prefix forms e.g. "Age" -> "a.Age", "Sale price" -> "b['Sale price']".
   If set to false - column names can be used in user queries "as is".
@@ -137,7 +139,7 @@ The following 3 functions are avilable in Node version:
 Run user query against input_path CSV file and save it as output_path CSV file.
 ```
-async function rbql.query_csv(user_query, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings)
+async function rbql.query_csv(user_query, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers=false, comment_prefix=null)
 ```
 #### Parameters:
@@ -161,6 +163,10 @@ async function rbql.query_csv(user_query, input_path, input_delim, input_policy,
   encoding of input, output and join tables (join table can be defined inside the user query)
 * _output_warnings_: **array**
   Warnings will be stored here after the query completion. If no warnings - the array would be empty
+* _with_headers_: **boolean**
+  If set to `true` treat the first records in input (and join) file as header.
+* _comment_prefix_: **string**
+  Treat lines starting with the prefix as comments and skip them.
 ## Usage:
@@ -236,16 +242,14 @@ $ rbql-js --input input.csv --output result.csv
 ### Main Features
 * Use JavaScript expressions inside _SELECT_, _UPDATE_, _WHERE_ and _ORDER BY_ statements
-* Result set of any query immediately becomes a first-class table on it's own
-* Supports input tables with inconsistent number of fields per record
-* Output records appear in the same order as in input unless _ORDER BY_ is provided
-* Each record has a unique NR (record number) identifier
+* Supports multiple input formats
+* Result set of any query immediately becomes a first-class table on its own
+* No need to provide FROM statement in the query when the input table is defined by the current context.
 * Supports all main SQL keywords
 * Supports aggregate functions and GROUP BY queries
-* Provides some new useful query modes which traditional SQL engines do not have
-* Supports both _TOP_ and _LIMIT_ keywords
 * Supports user-defined functions (UDF)
-* Works out of the box, no external dependencies
+* Provides some new useful query modes which traditional SQL engines do not have
+* Lightweight, dependency-free, works out of the box
 #### Limitations:
@@ -284,24 +288,15 @@ RBQL for CSV files provides the following variables which you can use in your qu
    Description: Number of fields in the current record
 * _a.name_, _b.Person_age_, ... _a.{Good_alphanumeric_column_name}_
    Variable type: **string**
-   Description: Value of the field referenced by it's "name". You can use this notation if the field in the first (header) CSV line has a "good" alphanumeric name
+   Description: Value of the field referenced by it's "name". You can use this notation if the field in the header has a "good" alphanumeric name
 * _a["object id"]_, _a['9.12341234']_, _b["%$ !! 10 20"]_ ... _a["Arbitrary column name!"]_
    Variable type: **string**
-   Description: Value of the field referenced by it's "name". You can use this notation to reference fields by arbitrary values in the first (header) CSV line, even when there is no header at all
-#### Notes:
-* You can mix all variable types in a single query, example:
-  ```select a1, b2 JOIN /path/to/b.csv ON a['Item Id'] == b.Identifier WHERE NR > 1 and parseInt(a.Weight) * 100 > parseInt(b["weight of the item"])```
-* Referencing fields by header names does not automatically skip the header line (you can use `where NR > 1` trick to skip it)
-* If you want to use RBQL as a library for your own app you can define your own custom variables and do not have to support the above mentioned CSV-related variables.
+   Description: Value of the field referenced by it's "name". You can use this notation to reference fields by arbitrary values in the header
 ### UPDATE statement
-_UPDATE_ query produces a new table where original values are replaced according to the UPDATE expression, so it can also be considered a special type of SELECT query. This prevents accidental data loss from poorly written queries.
-_UPDATE SET_ is synonym to _UPDATE_, because in RBQL there is no need to specify the source table.
+_UPDATE_ query produces a new table where original values are replaced according to the UPDATE expression, so it can also be considered a special type of SELECT query.
 ### Aggregate functions and queries
@@ -316,11 +311,11 @@ There is a workaround for the limitation above for _ARRAY_AGG_ function which su
 ### JOIN statements
-Join table B can be referenced either by it's file path or by it's name - an arbitary string which user should provide before executing the JOIN query.
-RBQL supports _STRICT LEFT JOIN_ which is like _LEFT JOIN_, but generates an error if any key in left table "A" doesn't have exactly one matching key in the right table "B".
+Join table B can be referenced either by its file path or by its name - an arbitrary string which the user should provide before executing the JOIN query.
+RBQL supports _STRICT LEFT JOIN_ which is like _LEFT JOIN_, but generates an error if any key in the left table "A" doesn't have exactly one matching key in the right table "B".
+Table B path can be either relative to the working dir, relative to the main table or absolute.
 Limitation: _JOIN_ statements can't contain JavaScript expressions and must have the following form: _<JOIN\_KEYWORD> (/path/to/table.tsv | table_name ) ON a... == b... [AND a... == b... [AND ... ]]_
 ### SELECT EXCEPT statement
 SELECT EXCEPT can be used to select everything except specific columns. E.g. to select everything but columns 2 and 4, run: `SELECT * EXCEPT a2, a4`
@@ -337,6 +332,12 @@ RBQL does not support LIKE operator, instead it provides "like()" function which
 `SELECT * where like(a1, 'foo%bar')`
+### WITH (header) and WITH (noheader) statements
+You can set whether the input (and join) CSV file has a header or not using the environment configuration parameters which could be `--with_headers` CLI flag or GUI checkbox or something else.
+But it is also possible to override this selection directly in the query by adding either `WITH (header)` or `WITH (noheader)` statement at the end of the query.
+Example: `select top 5 NR, * with (header)`
 ### User Defined Functions (UDF)
 RBQL supports User Defined Functions
@@ -346,8 +347,8 @@ You can define custom functions and/or import libraries in a special file: `~/.r
 ## Examples of RBQL queries
 * `select top 100 a1, a2 * 10, a4.length where a1 == "Buy" order by parseInt(a2) desc`
-* `select * order by Math.random() where NR > 1` - skip header record and random sort
-* `select top 20 a.vehicle_price.length / 10, a2 where NR > 1 && ["car", "plane", "boat"].indexOf(a['Vehicle type']) > -1 limit 20` - referencing columns by names from header record and skipping the header
+* `select * order by Math.random()` - random sort
+* `select top 20 a.vehicle_price.length / 10, a2 where parseInt(a.vehicle_price) < 500 && ["car", "plane", "boat"].indexOf(a['Vehicle type']) > -1 limit 20` - referencing columns by names from header
 * `update set a3 = 'NPC' where a3.indexOf('Non-playable character') != -1`
 * `select NR, *` - enumerate records, NR is 1-based
 * `select a1, b1, b2 inner join ./countries.txt on a2 == b1 order by a1, a3` - example of join query
@@ -355,16 +356,6 @@ You can define custom functions and/or import libraries in a special file: `~/.r
 * `select ...a1.split(':')` - Using JS "destructuring assignment" syntax to split one column into many. Do not try this with other SQL engines!
-### FAQ
-#### How do I skip header record in CSV files?
-You can use the following trick: add `... where NR > 1 ...` to your query
-And if you are doing math operation you can modify your query like this, example:
-`select parseInt(a3) * 1000, a2` -> `select NR > 1 ? parseInt(a3) * 1000 : a3, a2`
 ### References
 * [RBQL: Official Site](https://rbql.org/)

package/cli_rbql.js CHANGED Viewed

@@ -158,7 +158,7 @@ async function autodetect_delim_policy(table_path) {
 }
-function print_colorized(records, delim, show_column_names, skip_header) {
+function print_colorized(records, delim, show_column_names, with_headers) {
     let reset_color_code = '\x1b[0m';
     let color_codes = ['\x1b[0m', '\x1b[31m', '\x1b[32m', '\x1b[33m', '\x1b[34m', '\x1b[35m', '\x1b[36m', '\x1b[31;1m', '\x1b[32;1m', '\x1b[33;1m'];
     for (let r = 0; r < records.length; r++) {
@@ -166,7 +166,7 @@ function print_colorized(records, delim, show_column_names, skip_header) {
         for (let c = 0; c < records[r].length; c++) {
             let color_code = color_codes[c % color_codes.length];
             let field = records[r][c];
-            let colored_field = (!show_column_names || (skip_header && r == 0)) ? color_code + field : `${color_code}a${c + 1}:${field}`;
+            let colored_field = (!show_column_names || (with_headers && r == 0)) ? color_code + field : `${color_code}a${c + 1}:${field}`;
             out_fields.push(colored_field);
         }
         let out_line = out_fields.join(delim) + reset_color_code;
@@ -208,7 +208,7 @@ async function run_with_js(args) {
     var input_path = get_default(args, 'input', null);
     var output_path = get_default(args, 'output', null);
     var csv_encoding = args['encoding'];
-    var skip_header = args['skip-header'];
+    var with_headers = args['with-headers'];
     var comment_prefix = args['comment-prefix'];
     var output_delim = get_default(args, 'out-delim', null);
     var output_policy = get_default(args, 'out-policy', null);
@@ -229,8 +229,8 @@ async function run_with_js(args) {
         // * binary/latin-1 do not require the decoder anyway
         // * This is CLI so no way we are in the Electron environment which can't use the TextDecoder
         // * Streaming mode works a little faster (since we don't need to do the manual validation)
-        // TODO check if the current node installation doesn't have ICU enabled and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
-        await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, skip_header, comment_prefix, user_init_code/*, {'bulk_read': true}*/);
+        // TODO check if the current node installation doesn't have ICU enabled (which is typicaly provided by Node.js by default, see https://nodejs.org/api/intl.html) and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
+        await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code/*, {'bulk_read': true}*/);
         await handle_query_success(warnings, output_path, csv_encoding, output_delim, output_policy);
         return true;
     } catch (e) {
@@ -250,11 +250,11 @@ function get_default_output_path(input_path, delim) {
 }
-async function show_preview(input_path, encoding, delim, policy, skip_header) {
+async function show_preview(input_path, encoding, delim, policy, with_headers) {
     let [records, warnings] = await sample_records(input_path, encoding, delim, policy);
     console.log('Input table preview:');
     console.log('====================================');
-    print_colorized(records, delim, true, skip_header);
+    print_colorized(records, delim, true, with_headers);
     console.log('====================================\n');
     for (let warning of warnings) {
         show_warning(warning);
@@ -280,7 +280,7 @@ async function run_interactive_loop(args) {
         if (!delim)
             throw new GenericError('Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option');
     }
-    await show_preview(input_path, args['encoding'], delim, policy, args['skip-header']);
+    await show_preview(input_path, args['encoding'], delim, policy, args['with-headers']);
     args.delim = delim;
     args.policy = policy;
     if (!args.output) {
@@ -365,7 +365,7 @@ function main() {
         '--output': {'help': 'Write output table to FILE instead of stdout', 'metavar': 'FILE'},
         '--delim': {'help': 'Delimiter character or multicharacter string, e.g. "," or "###". Can be autodetected in interactive mode', 'metavar': 'DELIM'},
         '--policy': {'help': 'Split policy, see the explanation below. Supported values: "simple", "quoted", "quoted_rfc", "whitespace", "monocolumn". Can be autodetected in interactive mode', 'metavar': 'POLICY'},
-        '--skip-header': {'boolean': true, 'help': 'Skip header line in input and join tables. Roughly equivalent of ... WHERE NR > 1 ... in your Query'},
+        '--with-headers': {'boolean': true, 'help': 'Indicates that input (and join) table has header'},
         '--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"', 'metavar': 'PREFIX'},
         '--encoding': {'default': 'utf-8', 'help': 'Manually set csv encoding', 'metavar': 'ENCODING'},
         '--out-format': {'default': 'input', 'help': 'Output format. Supported values: ' + out_format_names.map(v => `"${v}"`).join(', '), 'metavar': 'FORMAT'},

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "rbql",
-  "version": "0.19.3",
+  "version": "0.25.0",
   "description": "Rainbow Query Language",
   "keywords": ["CSV", "TSV", "spreadsheet", "SQL", "SQL-like", "transpiler", "CLI", "command-line", "library", "browser", "Node", "select", "update", "join"],
   "scripts": {

package/rbql.js CHANGED Viewed

@@ -66,11 +66,117 @@ class RBQLContext {
     }
 }
-var query_context = null; // Needs to be global for MIN(), MAX(), etc functions
+var query_context = null; // Needs to be global for MIN(), MAX(), etc functions. TODO find a way to make it local.
 const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs';
-const RBQL_VERSION = '0.19.3';
+const RBQL_VERSION = '0.25.0';
+function check_if_brackets_match(opening_bracket, closing_bracket) {
+    return (opening_bracket == '[' && closing_bracket == ']') || (opening_bracket == '(' && closing_bracket == ')') || (opening_bracket == '{' && closing_bracket == '}');
+}
+function parse_root_bracket_level_text_spans(select_expression) {
+    let text_spans = []; // parts of text separated by commas at the root parenthesis level
+    let last_pos = 0;
+    let bracket_stack = [];
+    for (let i = 0; i < select_expression.length; i++) {
+        let cur_char = select_expression[i];
+        if (cur_char == ',' && bracket_stack.length == 0) {
+            text_spans.push(select_expression.substring(last_pos, i));
+            last_pos = i + 1;
+        } else if (['[', '{', '('].indexOf(cur_char) != -1) {
+            bracket_stack.push(cur_char);
+        } else if ([']', '}', ')'].indexOf(cur_char) != -1) {
+            if (bracket_stack.length && check_if_brackets_match(bracket_stack[bracket_stack.length - 1], cur_char)) {
+                bracket_stack.pop();
+            } else {
+                throw new RbqlParsingError(`Unable to parse column headers in SELECT expression: No matching opening bracket for closing "${cur_char}"`);
+            }
+        }
+    }
+    if (bracket_stack.length) {
+        throw new RbqlParsingError(`Unable to parse column headers in SELECT expression: No matching closing bracket for opening "${bracket_stack[0]}"`);
+    }
+    text_spans.push(select_expression.substring(last_pos, select_expression.length));
+    text_spans = text_spans.map(span => span.trim());
+    return text_spans;
+}
+function unquote_string(quoted_str) {
+    // It's possible to use eval here to unqoute the quoted_column_name, but it would be a little barbaric, let's do it manually instead
+    if (!quoted_str || quoted_str.length < 2)
+        return null;
+    if (quoted_str[0] == "'" && quoted_str[quoted_str.length - 1] == "'") {
+        return quoted_str.substring(1, quoted_str.length - 1).replace(/\\'/g, "'").replace(/\\\\/g, "\\");
+    } else if (quoted_str[0] == '"' && quoted_str[quoted_str.length - 1] == '"') {
+        return quoted_str.substring(1, quoted_str.length - 1).replace(/\\"/g, '"').replace(/\\\\/g, "\\");
+    } else {
+        return null;
+    }
+}
+function column_info_from_text_span(text_span, string_literals) {
+    // This function is a rough equivalent of "column_info_from_node()" function in python version of RBQL
+    text_span = text_span.trim();
+    let rbql_star_marker = '__RBQL_INTERNAL_STAR';
+    let simple_var_match = /^[_a-zA-Z][_a-zA-Z0-9]*$/.exec(text_span);
+    let attribute_match = /^([ab])\.([_a-zA-Z][_a-zA-Z0-9]*)$/.exec(text_span);
+    let subscript_int_match = /^([ab])\[([0-9]+)\]$/.exec(text_span);
+    let subscript_str_match = /^([ab])\[___RBQL_STRING_LITERAL([0-9]+)___\]$/.exec(text_span);
+    if (simple_var_match !== null) {
+        if (text_span == rbql_star_marker)
+            return {table_name: null, column_index: null, column_name: null, is_star: true};
+        if (text_span.startsWith('___RBQL_STRING_LITERAL'))
+            return null;
+        let match = /^([ab])([0-9]+)$/.exec(text_span);
+        if (match !== null) {
+            return {table_name: match[1], column_index: parseInt(match[2]) - 1, column_name: null, is_star: false};
+        }
+        // Some examples for this branch: NR, NF
+        return {table_name: null, column_index: null, column_name: text_span, is_star: false};
+    } else if (attribute_match !== null) {
+        let table_name = attribute_match[1];
+        let column_name = attribute_match[2];
+        if (column_name == rbql_star_marker) {
+            return {table_name: table_name, column_index: null, column_name: null, is_star: true};
+        }
+        return {table_name: null, column_index: null, column_name: column_name, is_star: false};
+    } else if (subscript_int_match != null) {
+        let table_name = subscript_int_match[1];
+        let column_index = parseInt(subscript_int_match[2]) - 1;
+        return {table_name: table_name, column_index: column_index, column_name: null, is_star: false};
+    } else if (subscript_str_match != null) {
+        let table_name = subscript_str_match[1];
+        let replaced_string_literal_id = subscript_str_match[2];
+        if (replaced_string_literal_id < string_literals.length) {
+            let quoted_column_name = string_literals[replaced_string_literal_id];
+            let unquoted_column_name = unquote_string(quoted_column_name);
+            if (unquoted_column_name !== null && unquoted_column_name !== undefined) {
+                return {table_name: null, column_index: null, column_name: unquoted_column_name, is_star: false};
+            }
+        }
+    }
+    return null;
+}
+function adhoc_parse_select_expression_to_column_infos(select_expression, string_literals) {
+    // It is acceptable for the algorithm to provide null column name when it could be theorethically possible to deduce the name.
+    // I.e. this algorithm guarantees precision but doesn't guarantee completeness in all theorethically possible queries.
+    // Although the algorithm should be complete in all practical scenarios, i.e. it should be hard to come up with the query that doesn't produce complete set of column names.
+    // The null column name just means that the output column will be named as col{i}, so the failure to detect the proper column name can be tolerated.
+    // Specifically this function guarantees the following:
+    // 1. The number of column_infos is correct and will match the number of fields in each record in the output - otherwise the exception should be thrown
+    // 2. If column_info at pos j is not null, it is guaranteed to correctly represent that column name in the output
+    let text_spans = parse_root_bracket_level_text_spans(select_expression);
+    let column_infos = text_spans.map(ts => column_info_from_text_span(ts, string_literals));
+    return column_infos;
+}
 function stable_compare(a, b) {
@@ -469,10 +575,10 @@ class TopWriter {
         this.top_count = top_count;
     }
-    write(record) {
+    async write(record) {
         if (this.top_count !== null && this.NW >= this.top_count)
             return false;
-        this.subwriter.write(record);
+        await this.subwriter.write(record);
         this.NW += 1;
         return true;
     }
@@ -489,10 +595,10 @@ class UniqWriter {
         this.seen = new Set();
     }
-    write(record) {
+    async write(record) {
         if (!add_to_set(this.seen, JSON.stringify(record)))
             return true;
-        if (!this.subwriter.write(record))
+        if (!await this.subwriter.write(record))
             return false;
         return true;
     }
@@ -509,7 +615,7 @@ class UniqCountWriter {
         this.records = new Map();
     }
-    write(record) {
+    async write(record) {
         var key = JSON.stringify(record);
         var old_val = this.records.get(key);
         if (old_val) {
@@ -524,7 +630,7 @@ class UniqCountWriter {
         for (var [key, value] of this.records) {
             let [count, record] = value;
             record.unshift(count);
-            if (!this.subwriter.write(record))
+            if (!await this.subwriter.write(record))
                 break;
         }
         await this.subwriter.finish();
@@ -539,7 +645,7 @@ class SortedWriter {
         this.unsorted_entries = [];
     }
-    write(stable_entry) {
+    async write(stable_entry) {
         this.unsorted_entries.push(stable_entry);
         return true;
     }
@@ -551,7 +657,7 @@ class SortedWriter {
             unsorted_entries.reverse();
         for (var i = 0; i < unsorted_entries.length; i++) {
             var entry = unsorted_entries[i];
-            if (!this.subwriter.write(entry[entry.length - 1]))
+            if (!await this.subwriter.write(entry[entry.length - 1]))
                 break;
         }
         await this.subwriter.finish();
@@ -575,7 +681,7 @@ class AggregateWriter {
             for (var ag of this.aggregators) {
                 out_fields.push(ag.get_final(key));
             }
-            if (!this.subwriter.write(out_fields))
+            if (!await this.subwriter.write(out_fields))
                 break;
         }
         await this.subwriter.finish();
@@ -635,13 +741,13 @@ function select_except(src, except_fields) {
 }
-function select_simple(sort_key, NR, out_fields) {
+async function select_simple(sort_key, NR, out_fields) {
     if (query_context.sort_key_expression !== null) {
         var sort_entry = sort_key.concat([NR, out_fields]);
-        if (!query_context.writer.write(sort_entry))
+        if (!await query_context.writer.write(sort_entry))
             return false;
     } else {
-        if (!query_context.writer.write(out_fields))
+        if (!await query_context.writer.write(out_fields))
             return false;
     }
     return true;
@@ -683,12 +789,12 @@ function select_aggregated(key, transparent_values) {
 }
-function select_unnested(sort_key, NR, folded_fields) {
+async function select_unnested(sort_key, NR, folded_fields) {
     let out_fields = folded_fields.slice();
     let unnest_pos = folded_fields.findIndex(val => val instanceof UnnestMarker);
     for (var i = 0; i < query_context.unnest_list.length; i++) {
         out_fields[unnest_pos] = query_context.unnest_list[i];
-        if (!select_simple(sort_key, NR, out_fields.slice()))
+        if (!await select_simple(sort_key, NR, out_fields.slice()))
             return false;
     }
     return true;
@@ -705,10 +811,10 @@ if (__RBQLMP__where_expression) {
     } else {
         let sort_key = [__RBQLMP__sort_key_expression];
         if (query_context.unnest_list !== null) {
-            if (!select_unnested(sort_key, NR, out_fields))
+            if (!await select_unnested(sort_key, NR, out_fields))
                 stop_flag = true;
         } else {
-            if (!select_simple(sort_key, NR, out_fields))
+            if (!await select_simple(sort_key, NR, out_fields))
                 stop_flag = true;
         }
     }
@@ -749,7 +855,7 @@ if (join_matches.length == 1 && (__RBQLMP__where_expression)) {
     NU += 1;
     __RBQLMP__update_expressions
 }
-if (!query_context.writer.write(up_fields))
+if (!await query_context.writer.write(up_fields))
     stop_flag = true;
 `;
@@ -761,7 +867,7 @@ if (__RBQLMP__where_expression) {
     NU += 1;
     __RBQLMP__update_expressions
 }
-if (!query_context.writer.write(up_fields))
+if (!await query_context.writer.write(up_fields))
     stop_flag = true;
 `;
@@ -866,7 +972,7 @@ async function compile_and_run(query_context) {
             if (lower_case_query.indexOf(' like ') != -1)
                 throw new SyntaxError(e.message + "\nRBQL doesn't support \"LIKE\" operator, use like() function instead e.g. ... WHERE like(a1, 'foo%bar') ... "); // UT JSON
             if (lower_case_query.indexOf(' from ') != -1)
-                throw new SyntaxError(e.message + "\nRBQL doesn't use \"FROM\" keyword, e.g. you can query 'SELECT *' without FROM"); // UT JSON
+                throw new SyntaxError(e.message + "\nTip: If input table is defined by the environment, RBQL query should not have \"FROM\" keyword"); // UT JSON
             if (e && e.message && String(e.message).toLowerCase().indexOf('unexpected identifier') != -1) {
                 if (lower_case_query.indexOf(' and ') != -1)
                     throw new SyntaxError(e.message + "\nDid you use 'and' keyword in your query?\nJavaScript backend doesn't support 'and' keyword, use '&&' operator instead!");
@@ -893,6 +999,7 @@ const ORDER_BY = 'ORDER BY';
 const WHERE = 'WHERE';
 const LIMIT = 'LIMIT';
 const EXCEPT = 'EXCEPT';
+const WITH = 'WITH';
 function get_ambiguous_error_msg(variable_name) {
@@ -925,7 +1032,7 @@ function strip_comments(cline) {
 function combine_string_literals(backend_expression, string_literals) {
     for (var i = 0; i < string_literals.length; i++) {
-        backend_expression = replace_all(backend_expression, `###RBQL_STRING_LITERAL${i}###`, string_literals[i]);
+        backend_expression = replace_all(backend_expression, `___RBQL_STRING_LITERAL${i}___`, string_literals[i]);
     }
     return backend_expression;
 }
@@ -1172,6 +1279,24 @@ function replace_star_vars(rbql_expression) {
 }
+function replace_star_vars_for_header_parsing(rbql_expression) {
+    let star_rgx = /(?:(?<=^)|(?<=,)) *(\*|a\.\*|b\.\*) *(?=$|,)/g;
+    let matches = get_all_matches(star_rgx, rbql_expression);
+    let last_pos = 0;
+    let result = '';
+    for (let match of matches) {
+        let star_expression = match[1];
+        let replacement_expression = {'*': '__RBQL_INTERNAL_STAR', 'a.*': 'a.__RBQL_INTERNAL_STAR', 'b.*': 'b.__RBQL_INTERNAL_STAR'}[star_expression];
+        if (last_pos < match.index)
+            result += rbql_expression.substring(last_pos, match.index);
+        result += replacement_expression;
+        last_pos = match.index + match[0].length;
+    }
+    result += rbql_expression.substring(last_pos);
+    return result;
+}
 function translate_update_expression(update_expression, input_variables_map, string_literals, indent) {
     let first_assignment = str_strip(update_expression.split('=')[0]);
     let first_assignment_error = `Unable to parse "UPDATE" expression: the expression must start with assignment, but "${first_assignment}" does not look like an assignable field name`;
@@ -1203,12 +1328,12 @@ function translate_update_expression(update_expression, input_variables_map, str
 function translate_select_expression(select_expression) {
-    var translated = replace_star_count(select_expression);
-    translated = replace_star_vars(translated);
-    translated = str_strip(translated);
+    let expression_without_stars = replace_star_count(select_expression);
+    let translated = str_strip(replace_star_vars(expression_without_stars));
+    let translated_for_header = str_strip(replace_star_vars_for_header_parsing(expression_without_stars));
     if (!translated.length)
         throw new RbqlParsingError('"SELECT" expression is empty');
-    return `[].concat([${translated}])`;
+    return [`[].concat([${translated}])`, translated_for_header];
 }
@@ -1225,7 +1350,7 @@ function separate_string_literals(rbql_expression) {
         string_literals.push(string_literal);
         var start_index = match_obj.index;
         format_parts.push(rbql_expression.substring(idx_before, start_index));
-        format_parts.push(`###RBQL_STRING_LITERAL${literal_id}###`);
+        format_parts.push(`___RBQL_STRING_LITERAL${literal_id}___`);
         idx_before = rgx.lastIndex;
     }
     format_parts.push(rbql_expression.substring(idx_before));
@@ -1269,8 +1394,13 @@ function locate_statements(rbql_expression) {
 function separate_actions(rbql_expression) {
     rbql_expression = str_strip(rbql_expression);
-    var ordered_statements = locate_statements(rbql_expression);
     var result = {};
+    let with_match = /^(.*)  *[Ww][Ii][Tt][Hh] *\(([a-z]{4,20})\) *$/.exec(rbql_expression);
+    if (with_match !== null) {
+        rbql_expression = with_match[1];
+        result[WITH] = with_match[2];
+    }
+    var ordered_statements = locate_statements(rbql_expression);
     for (var i = 0; i < ordered_statements.length; i++) {
         var statement_start = ordered_statements[i][0];
         var span_start = ordered_statements[i][1];
@@ -1305,7 +1435,7 @@ function separate_actions(rbql_expression) {
         if (statement == SELECT) {
             if (statement_start != 0)
                 throw new RbqlParsingError('SELECT keyword must be at the beginning of the query');
-            var match = /^ *TOP *([0-9]+) /i.exec(span);
+            let match = /^ *TOP *([0-9]+) /i.exec(span);
             if (match !== null) {
                 statement_params['top'] = parseInt(match[1]);
                 span = span.substr(match.index + match[0].length);
@@ -1347,7 +1477,7 @@ function find_top(rb_actions) {
 }
-function translate_except_expression(except_expression, input_variables_map, string_literals) {
+function translate_except_expression(except_expression, input_variables_map, string_literals, input_header) {
     let skip_vars = except_expression.split(',');
     skip_vars = skip_vars.map(str_strip);
     let skip_indices = [];
@@ -1358,8 +1488,9 @@ function translate_except_expression(except_expression, input_variables_map, str
         skip_indices.push(input_variables_map[var_name].index);
     }
     skip_indices = skip_indices.sort((a, b) => a - b);
+    let output_header = input_header === null ? null : select_except(input_header, skip_indices);
     let indices_str = skip_indices.join(',');
-    return `select_except(record_a, [${indices_str}])`;
+    return [output_header, `select_except(record_a, [${indices_str}])`];
 }
@@ -1428,7 +1559,7 @@ class HashJoinMap {
 function cleanup_query(query_text) {
-    return query_text.split('\n').map(strip_comments).filter(line => line.length).join(' ');
+    return query_text.split('\n').map(strip_comments).filter(line => line.length).join(' ').replace(/;+$/g, '');
 }
@@ -1439,6 +1570,44 @@ function remove_redundant_table_name(query_text) {
 }
+function select_output_header(input_header, join_header, query_column_infos) {
+    if (input_header === null && join_header === null)
+        return null;
+    if (input_header === null)
+        input_header = [];
+    if (join_header === null)
+        join_header = [];
+    let output_header = [];
+    for (let qci of query_column_infos) {
+        // TODO refactor this and python version: extract this code into a function instead to always return something
+        if (qci === null) {
+            output_header.push('col' + (output_header.length + 1));
+        } else if (qci.is_star) {
+            if (qci.table_name === null) {
+                output_header = output_header.concat(input_header).concat(join_header);
+            } else if (qci.table_name === 'a') {
+                output_header = output_header.concat(input_header);
+            } else if (qci.table_name === 'b') {
+                output_header = output_header.concat(join_header);
+            }
+        } else if (qci.column_name !== null) {
+            output_header.push(qci.column_name);
+        } else if (qci.column_index !== null) {
+            if (qci.table_name == 'a' && qci.column_index < input_header.length) {
+                output_header.push(input_header[qci.column_index]);
+            } else if (qci.table_name == 'b' && qci.column_index < join_header.length) {
+                output_header.push(join_header[qci.column_index]);
+            } else {
+                output_header.push('col' + (output_header.length + 1));
+            }
+        } else { // Should never happen
+            output_header.push('col' + (output_header.length + 1));
+        }
+    }
+    return output_header;
+}
 function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
     let keys = Object.keys(inconsistent_records_info);
     let entries = [];
@@ -1468,16 +1637,22 @@ class RBQLInputIterator {
     async get_record() {
         throw new Error("Unable to call the interface method");
     }
+    handle_query_modifier() {
+        return; // Reimplement if you need to handle a boolean query modifier that can be used like this: `SELECT * WITH (modifiername)`
+    }
     get_warnings() {
         return []; // Reimplement if your class can produce warnings
     }
+    async get_header() {
+        return null; // Reimplement if your class can provide input header
+    }
 }
 class RBQLOutputWriter {
     constructor(){}
-    write(fields) {
+    async write(fields) {
         throw new Error("Unable to call the interface method");
     }
@@ -1488,6 +1663,10 @@ class RBQLOutputWriter {
     get_warnings() {
         return []; // Reimplement if your class can produce warnings
     };
+    set_header() {
+        return; // Reimplement if your class can handle output headers in a meaningful way
+    }
 }
@@ -1558,6 +1737,10 @@ class TableIterator extends RBQLInputIterator {
             return [make_inconsistent_num_fields_warning('input', this.fields_info)];
         return [];
     };
+    async get_header() {
+        return this.column_names;
+    }
 }
@@ -1565,12 +1748,17 @@ class TableWriter extends RBQLOutputWriter {
     constructor(external_table) {
         super();
         this.table = external_table;
+        this.header = null;
     }
-    write(fields) {
+    async write(fields) {
         this.table.push(fields);
         return true;
     };
+    set_header(header) {
+        this.header = header;
+    }
 }
@@ -1595,9 +1783,12 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
     query_text = cleanup_query(query_text);
     var [format_expression, string_literals] = separate_string_literals(query_text);
     format_expression = remove_redundant_table_name(format_expression);
-    var input_variables_map = await input_iterator.get_variables_map(query_text);
     var rb_actions = separate_actions(format_expression);
+    if (rb_actions.hasOwnProperty(WITH)) {
+        input_iterator.handle_query_modifier(rb_actions[WITH]);
+    }
+    var input_variables_map = await input_iterator.get_variables_map(query_text);
     if (rb_actions.hasOwnProperty(ORDER_BY) && rb_actions.hasOwnProperty(UPDATE))
         throw new RbqlParsingError('"ORDER BY" is not allowed in "UPDATE" queries');
@@ -1609,6 +1800,7 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
     }
     let join_variables_map = null;
+    let join_header = null;
     if (rb_actions.hasOwnProperty(JOIN)) {
         var [rhs_table_id, variable_pairs] = parse_join_expression(rb_actions[JOIN]['text']);
         if (join_tables_registry === null)
@@ -1616,7 +1808,11 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
         let join_record_iterator = join_tables_registry.get_iterator_by_table_id(rhs_table_id);
         if (!join_record_iterator)
             throw new RbqlParsingError(`Unable to find join table: "${rhs_table_id}"`);
+        if (rb_actions.hasOwnProperty(WITH)) {
+            join_record_iterator.handle_query_modifier(rb_actions[WITH]);
+        }
         join_variables_map = await join_record_iterator.get_variables_map(query_text);
+        join_header = await join_record_iterator.get_header();
         let [lhs_variables, rhs_indices] = resolve_join_variables(input_variables_map, join_variables_map, variable_pairs, string_literals);
         let sql_join_type = {'JOIN': InnerJoiner, 'INNER JOIN': InnerJoiner, 'LEFT JOIN': LeftJoiner, 'LEFT OUTER JOIN': LeftJoiner, 'STRICT LEFT JOIN': StrictLeftJoiner}[rb_actions[JOIN]['join_subtype']];
         query_context.lhs_join_var_expression = lhs_variables.length == 1 ? lhs_variables[0] : 'JSON.stringify([' + lhs_variables.join(',') + '])';
@@ -1634,26 +1830,33 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
         query_context.where_expression = combine_string_literals(where_expression, string_literals);
     }
+    let input_header = await input_iterator.get_header();
     if (rb_actions.hasOwnProperty(UPDATE)) {
         var update_expression = translate_update_expression(rb_actions[UPDATE]['text'], input_variables_map, string_literals, ' '.repeat(8));
         query_context.update_expressions = combine_string_literals(update_expression, string_literals);
+        query_context.writer.set_header(input_header);
     }
     if (rb_actions.hasOwnProperty(SELECT)) {
         query_context.top_count = find_top(rb_actions);
-        query_context.writer = new TopWriter(query_context.writer, query_context.top_count);
+        if (rb_actions.hasOwnProperty(EXCEPT)) {
+            let [output_header, select_expression] = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals, input_header);
+            query_context.select_expression = select_expression;
+            query_context.writer.set_header(output_header);
+        } else {
+            let [select_expression, select_expression_for_ast] = translate_select_expression(rb_actions[SELECT]['text']);
+            query_context.select_expression = combine_string_literals(select_expression, string_literals);
+            let column_infos = adhoc_parse_select_expression_to_column_infos(select_expression_for_ast, string_literals);
+            let output_header = select_output_header(input_header, join_header, column_infos);
+            query_context.writer.set_header(output_header);
+        }
+        query_context.writer = new TopWriter(query_context.writer, query_context.top_count);
         if (rb_actions[SELECT].hasOwnProperty('distinct_count')) {
             query_context.writer = new UniqCountWriter(query_context.writer);
         } else if (rb_actions[SELECT].hasOwnProperty('distinct')) {
             query_context.writer = new UniqWriter(query_context.writer);
         }
-        if (rb_actions.hasOwnProperty(EXCEPT)) {
-            query_context.select_expression = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals);
-        } else {
-            let select_expression = translate_select_expression(rb_actions[SELECT]['text']);
-            query_context.select_expression = combine_string_literals(select_expression, string_literals);
-        }
     }
     if (rb_actions.hasOwnProperty(ORDER_BY)) {
@@ -1676,13 +1879,21 @@ async function query(query_text, input_iterator, output_writer, output_warnings,
 }
-async function query_table(query_text, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, normalize_column_names=true, user_init_code='') {
+async function query_table(query_text, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, output_column_names=null, normalize_column_names=true, user_init_code='') {
     if (!normalize_column_names && input_column_names !== null && join_column_names !== null)
         ensure_no_ambiguous_variables(query_text, input_column_names, join_column_names);
     let input_iterator = new TableIterator(input_table, input_column_names, normalize_column_names);
     let output_writer = new TableWriter(output_table);
     let join_tables_registry = join_table === null ? null : new SingleTableRegistry(join_table, join_column_names, normalize_column_names);
     await query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
+    if (output_column_names !== null) {
+        assert(output_column_names.length == 0, '`output_column_names` param must be an empty list or null');
+        if (output_writer.header !== null) {
+            for (let column_name of output_writer.header) {
+                output_column_names.push(column_name);
+            }
+        }
+    }
 }
@@ -1716,6 +1927,7 @@ exports.exception_to_error_info = exception_to_error_info;
 // The functions below are exported just for unit tests, they are not part of the rbql API
+// TODO exports through the special unit_test proxy e.g. exports.unit_test.parse_basic_variables = parse_basic_variables;
 exports.parse_basic_variables = parse_basic_variables;
 exports.parse_array_variables = parse_array_variables;
 exports.parse_dictionary_variables = parse_dictionary_variables;
@@ -1725,11 +1937,15 @@ exports.strip_comments = strip_comments;
 exports.separate_actions = separate_actions;
 exports.separate_string_literals = separate_string_literals;
 exports.combine_string_literals = combine_string_literals;
-exports.translate_except_expression = translate_except_expression;
 exports.parse_join_expression = parse_join_expression;
 exports.resolve_join_variables = resolve_join_variables;
 exports.translate_update_expression = translate_update_expression;
 exports.translate_select_expression = translate_select_expression;
+exports.translate_except_expression = translate_except_expression;
 exports.like_to_regex = like_to_regex;
+exports.adhoc_parse_select_expression_to_column_infos = adhoc_parse_select_expression_to_column_infos;
+exports.replace_star_count = replace_star_count;
+exports.replace_star_vars_for_header_parsing = replace_star_vars_for_header_parsing;
+exports.select_output_header = select_output_header;
 }(typeof exports === 'undefined' ? this.rbql = {} : exports));

package/rbql_csv.js CHANGED Viewed

@@ -117,11 +117,18 @@ function get_index_record(index_path, key) {
 }
-function find_table_path(table_id) {
+function find_table_path(main_table_dir, table_id) {
+    // If table_id is a relative path it could be relative either to the current directory or to the main table dir.
     var candidate_path = expanduser(table_id);
     if (fs.existsSync(candidate_path)) {
         return candidate_path;
     }
+    if (main_table_dir && !path.isAbsolute(candidate_path)) {
+        candidate_path = path.join(main_table_dir, candidate_path);
+        if (fs.existsSync(candidate_path)) {
+            return candidate_path;
+        }
+    }
     let table_names_settings_path = path.join(os.homedir(), '.rbql_table_names');
     var name_record = get_index_record(table_names_settings_path, table_id);
     if (name_record && name_record.length > 1 && fs.existsSync(name_record[1])) {
@@ -152,10 +159,6 @@ class RecordQueue {
         }
         return this.pull_stack.pop();
     }
-    return_to_pull_stack(record) {
-        this.pull_stack.push(record);
-    }
 }
@@ -163,7 +166,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
     // CSVRecordIterator implements a typical async producer-consumer model with an internal buffer:
     // get_record() - consumer
     // stream.on('data') - producer
-    constructor(stream, csv_path, encoding, delim, policy, skip_headers=false, comment_prefix=null, table_name='input', variable_prefix='a') {
+    constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
         super();
         this.stream = stream;
         this.csv_path = csv_path;
@@ -171,7 +174,12 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
         this.encoding = encoding;
         this.delim = delim;
         this.policy = policy;
-        this.skip_headers = skip_headers;
+        this.has_header = has_header;
+        this.first_record = null;
+        this.first_record_should_be_emitted = !has_header;
+        this.header_preread_complete = false;
         this.table_name = table_name;
         this.variable_prefix = variable_prefix;
         this.comment_prefix = (comment_prefix !== null && comment_prefix.length) ? comment_prefix : null;
@@ -203,9 +211,13 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
         this.rfc_line_buffer = [];
         this.partially_decoded_line = '';
+        this.partially_decoded_line_ends_with_cr = false;
+        // Holds an external "resolve" function which is called when everything is fine.
         this.resolve_current_record = null;
+        // Holds an external "reject" function which is called when error has occured.
         this.reject_current_record = null;
+        // Holds last exception if we don't have any reject callbacks from clients yet.
         this.current_exception = null;
         this.produced_records_queue = new RecordQueue();
@@ -213,27 +225,56 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
         this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.process_record_line;
     }
-    handle_exception(exception) {
-        if (this.reject_current_record) {
+    handle_query_modifier(modifier) {
+        // For `... WITH (header) ...` syntax
+        if (['header', 'headers'].indexOf(modifier) != -1) {
+            this.has_header = true;
+            this.first_record_should_be_emitted = false;
+        }
+        if (['noheader', 'noheaders'].indexOf(modifier) != -1) {
+            this.has_header = false;
+            this.first_record_should_be_emitted = true;
+        }
+    }
+    reset_external_callbacks() {
+        // Drop external callbacks simultaneously since promises can only resolve once, see: https://stackoverflow.com/a/18218542/2898283
+        this.reject_current_record = null;
+        this.resolve_current_record = null;
+    }
+    try_propagate_exception() {
+        if (this.current_exception && this.reject_current_record) {
             let reject = this.reject_current_record;
-            this.reject_current_record = null;
-            this.resolve_current_record = null;
+            let exception = this.current_exception;
+            this.reset_external_callbacks();
+            this.current_exception = null;
             reject(exception);
-        } else {
-            this.current_exception = exception;
         }
+    }
+    store_or_propagate_exception(exception) {
+        if (this.current_exception === null)
+            // Ignore subsequent exceptions if we already have an unreported error. This way we prioritize earlier errors over the more recent ones.
+            this.current_exception = exception;
+        this.try_propagate_exception();
     }
-    async preread_header() {
-        let header_record = await this.get_record();
-        if (header_record === null)
-            return null;
-        if (!this.skip_headers)
-            this.produced_records_queue.return_to_pull_stack(header_record);
+    async preread_first_record() {
+        if (this.header_preread_complete)
+            return;
+        this.first_record = await this.get_record();
+        this.header_preread_complete = true; // We must set header_preread_complete to true after calling get_record(), because get_record() uses it internally.
+        if (this.first_record === null) {
+            return;
+        }
         if (this.stream)
             this.stream.pause();
-        return header_record.slice();
+        this.first_record = this.first_record.slice();
     };
@@ -242,24 +283,37 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
         rbql.parse_basic_variables(query_text, this.variable_prefix, variable_map);
         rbql.parse_array_variables(query_text, this.variable_prefix, variable_map);
-        let header_record = await this.preread_header(); // TODO optimize: do not start the stream if query_text doesn't seem to have dictionary or attribute -looking patterns
-        if (header_record) {
-            rbql.parse_attribute_variables(query_text, this.variable_prefix, header_record, 'CSV header line', variable_map);
-            rbql.parse_dictionary_variables(query_text, this.variable_prefix, header_record, variable_map);
+        await this.preread_first_record();
+        if (this.has_header && this.first_record) {
+            rbql.parse_attribute_variables(query_text, this.variable_prefix, this.first_record, 'CSV header line', variable_map);
+            rbql.parse_dictionary_variables(query_text, this.variable_prefix, this.first_record, variable_map);
         }
         return variable_map;
     };
+    async get_header() {
+        await this.preread_first_record();
+        return this.has_header ? this.first_record : null;
+    }
     try_resolve_next_record() {
+        this.try_propagate_exception();
         if (this.resolve_current_record === null)
             return;
-        let record = this.produced_records_queue.dequeue();
+        let record = null;
+        if (this.first_record_should_be_emitted && this.header_preread_complete) {
+            this.first_record_should_be_emitted = false;
+            record = this.first_record;
+        } else {
+            record = this.produced_records_queue.dequeue();
+        }
         if (record === null && !this.input_exhausted)
             return;
         let resolve = this.resolve_current_record;
-        this.resolve_current_record = null;
-        this.reject_current_record = null;
+        this.reset_external_callbacks();
         resolve(record);
     };
@@ -275,9 +329,6 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
             parent_iterator.resolve_current_record = resolve;
             parent_iterator.reject_current_record = reject;
         });
-        if (this.current_exception) {
-            this.reject_current_record(this.current_exception);
-        }
         this.try_resolve_next_record();
         return current_record_promise;
     };
@@ -308,7 +359,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
             if (this.first_defective_line === null) {
                 this.first_defective_line = this.NL;
                 if (this.policy == 'quoted_rfc')
-                    this.handle_exception(new RbqlIOHandlingError(`Inconsistent double quote escaping in ${this.table_name} table at record ${this.NR}, line ${this.NL}`));
+                    this.store_or_propagate_exception(new RbqlIOHandlingError(`Inconsistent double quote escaping in ${this.table_name} table at record ${this.NR}, line ${this.NL}`));
             }
         }
         let num_fields = record.length;
@@ -359,19 +410,23 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
                 decoded_string = this.decoder.decode(data_chunk);
             } catch (e) {
                 if (e instanceof TypeError) {
-                    this.handle_exception(new RbqlIOHandlingError(utf_decoding_error));
+                    this.store_or_propagate_exception(new RbqlIOHandlingError(utf_decoding_error));
                 } else {
-                    this.handle_exception(e);
+                    this.store_or_propagate_exception(e);
                 }
                 return;
             }
         } else {
             decoded_string = data_chunk.toString(this.encoding);
         }
+        let line_starts_with_lf = decoded_string.length && decoded_string[0] == '\n';
+        let first_line_index = line_starts_with_lf && this.partially_decoded_line_ends_with_cr ? 1 : 0;
+        this.partially_decoded_line_ends_with_cr = decoded_string.length && decoded_string[decoded_string.length - 1] == '\r';
         let lines = csv_utils.split_lines(decoded_string);
         lines[0] = this.partially_decoded_line + lines[0];
+        assert(first_line_index == 0 || lines[0].length == 0);
         this.partially_decoded_line = lines.pop();
-        for (let i = 0; i < lines.length; i++) {
+        for (let i = first_line_index; i < lines.length; i++) {
             this.process_line(lines[i]);
         }
     };
@@ -384,7 +439,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
             // TODO get rid of this once TextDecoder is really fixed or when alternative method of reliable decoding appears
             let control_buffer = Buffer.from(decoded_string, 'utf-8');
             if (Buffer.compare(data_chunk, control_buffer) != 0) {
-                this.handle_exception(new RbqlIOHandlingError(utf_decoding_error));
+                this.store_or_propagate_exception(new RbqlIOHandlingError(utf_decoding_error));
                 return;
             }
         }
@@ -465,6 +520,7 @@ class CSVWriter extends rbql.RBQLOutputWriter {
         this.encoding = encoding;
         if (encoding)
             this.stream.setDefaultEncoding(encoding);
+        this.stream.on('error', (error_obj) => { this.store_first_error(error_obj); })
         this.delim = delim;
         this.policy = policy;
         this.line_separator = line_separator;
@@ -474,6 +530,8 @@ class CSVWriter extends rbql.RBQLOutputWriter {
         this.null_in_output = false;
         this.delim_in_simple_output = false;
+        this.header_len = null;
+        this.first_error = null;
         if (policy == 'simple') {
             this.polymorphic_join = this.simple_join;
@@ -491,6 +549,20 @@ class CSVWriter extends rbql.RBQLOutputWriter {
     }
+    store_first_error(error_obj) {
+        // Store only first error because it is typically more important than the subsequent ones.
+        if (this.first_error === null)
+            this.first_error = error_obj;
+    }
+    set_header(header) {
+        if (header !== null) {
+            this.header_len = header.length;
+            this.write(header);
+        }
+    }
     quoted_join(fields) {
         let delim = this.delim;
         var quoted_fields = fields.map(function(v) { return csv_utils.quote_field(String(v), delim); });
@@ -535,11 +607,20 @@ class CSVWriter extends rbql.RBQLOutputWriter {
     };
-    write(fields) {
+    async write(fields) {
+        if (this.header_len !== null && fields.length != this.header_len)
+            throw new RbqlIOHandlingError(`Inconsistent number of columns in output header and the current record: ${this.header_len} != ${fields.length}`);
         this.normalize_fields(fields);
         this.stream.write(this.polymorphic_join(fields));
         this.stream.write(this.line_separator);
-        return true;
+        let writer_error = this.first_error;
+        return new Promise(function(resolve, reject) {
+            if (writer_error !== null) {
+                reject(writer_error);
+            } else {
+                resolve(true);
+            }
+        });
     };
@@ -554,7 +635,11 @@ class CSVWriter extends rbql.RBQLOutputWriter {
         let close_stream_on_finish = this.close_stream_on_finish;
         let output_stream = this.stream;
         let output_encoding = this.encoding;
+        let writer_error = this.first_error;
         let finish_promise = new Promise(function(resolve, reject) {
+            if (writer_error !== null) {
+                reject(writer_error);
+            }
             if (close_stream_on_finish) {
                 output_stream.end('', output_encoding, () => { resolve(); });
             } else {
@@ -578,12 +663,13 @@ class CSVWriter extends rbql.RBQLOutputWriter {
 class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
-    constructor(delim, policy, encoding, skip_headers=false, comment_prefix=null, options=null) {
+    constructor(input_file_dir, delim, policy, encoding, has_header=false, comment_prefix=null, options=null) {
         super();
+        this.input_file_dir = input_file_dir;
         this.delim = delim;
         this.policy = policy;
         this.encoding = encoding;
-        this.skip_headers = skip_headers;
+        this.has_header = has_header;
         this.comment_prefix = comment_prefix;
         this.stream = null;
         this.record_iterator = null;
@@ -594,7 +680,7 @@ class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
     }
     get_iterator_by_table_id(table_id) {
-        this.table_path = find_table_path(table_id);
+        this.table_path = find_table_path(this.input_file_dir, table_id);
         if (this.table_path === null) {
             throw new RbqlIOHandlingError(`Unable to find join table "${table_id}"`);
         }
@@ -603,19 +689,19 @@ class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
         } else {
             this.stream = fs.createReadStream(this.table_path);
         }
-        this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.skip_headers, this.comment_prefix, table_id, 'b');
+        this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
         return this.record_iterator;
     };
     get_warnings(output_warnings) {
-        if (this.record_iterator && this.skip_headers) {
-            output_warnings.push(`The first (header) record was also skipped in the JOIN file: ${path.basename(this.table_path)}`);
+        if (this.record_iterator && this.has_header) {
+            output_warnings.push(`The first record in JOIN file ${path.basename(this.table_path)} was also treated as header (and skipped)`);
         }
     }
 }
-async function query_csv(query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, skip_headers=false, comment_prefix=null, user_init_code='', options=null) {
+async function query_csv(query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers=false, comment_prefix=null, user_init_code='', options=null) {
     let input_stream = null;
     let bulk_input_path = null;
     if (options && options['bulk_read'] && input_path) {
@@ -624,6 +710,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
         input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
     }
     let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
+    // FIXME add on(error) handler to avoid async errors, see https://github.com/nodejs/node-v0.x-archive/issues/406
     if (input_delim == '"' && input_policy == 'quoted')
         throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
     if (csv_encoding == 'latin-1')
@@ -637,9 +724,9 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
     if (user_init_code == '' && fs.existsSync(default_init_source_path)) {
         user_init_code = read_user_init_code(default_init_source_path);
     }
-    let join_tables_registry = new FileSystemCSVRegistry(input_delim, input_policy, csv_encoding, skip_headers, comment_prefix, options);
-    let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, skip_headers, comment_prefix);
+    let input_file_dir = input_path ? path.dirname(input_path) : null;
+    let join_tables_registry = new FileSystemCSVRegistry(input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
+    let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
     let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
     await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);