rbql 0.19.3 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -36
- package/cli_rbql.js +9 -9
- package/package.json +1 -1
- package/rbql.js +260 -44
- package/rbql_csv.js +134 -47
package/README.md
CHANGED
|
@@ -32,7 +32,7 @@ The following two functions are avilable in the browser version:
|
|
|
32
32
|
Run user query against input array of records and put the result set in the output array:
|
|
33
33
|
|
|
34
34
|
```
|
|
35
|
-
async function query_table(user_query, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, normalize_column_names=true)
|
|
35
|
+
async function query_table(user_query, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, output_column_names=null, normalize_column_names=true)
|
|
36
36
|
```
|
|
37
37
|
|
|
38
38
|
#### Parameters:
|
|
@@ -51,6 +51,8 @@ async function query_table(user_query, input_table, output_table, output_warning
|
|
|
51
51
|
Names of _input_table_ columns which users of the app can use in their queries
|
|
52
52
|
* _join_column_names_: **array**
|
|
53
53
|
Names of _join_table_ columns which users of the app can use in their queries
|
|
54
|
+
* _output_column_names_: **array**
|
|
55
|
+
Output column names will be stored in this array after the query completion.
|
|
54
56
|
* _normalize_column_names_: **boolean**
|
|
55
57
|
If set to true - column names provided with _input_column_names_ and _join_column_names_ will be normalized to "a" and "b" prefix forms e.g. "Age" -> "a.Age", "Sale price" -> "b['Sale price']".
|
|
56
58
|
If set to false - column names can be used in user queries "as is".
|
|
@@ -137,7 +139,7 @@ The following 3 functions are avilable in Node version:
|
|
|
137
139
|
Run user query against input_path CSV file and save it as output_path CSV file.
|
|
138
140
|
|
|
139
141
|
```
|
|
140
|
-
async function rbql.query_csv(user_query, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings)
|
|
142
|
+
async function rbql.query_csv(user_query, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers=false, comment_prefix=null)
|
|
141
143
|
```
|
|
142
144
|
|
|
143
145
|
#### Parameters:
|
|
@@ -161,6 +163,10 @@ async function rbql.query_csv(user_query, input_path, input_delim, input_policy,
|
|
|
161
163
|
encoding of input, output and join tables (join table can be defined inside the user query)
|
|
162
164
|
* _output_warnings_: **array**
|
|
163
165
|
Warnings will be stored here after the query completion. If no warnings - the array would be empty
|
|
166
|
+
* _with_headers_: **boolean**
|
|
167
|
+
If set to `true` treat the first records in input (and join) file as header.
|
|
168
|
+
* _comment_prefix_: **string**
|
|
169
|
+
Treat lines starting with the prefix as comments and skip them.
|
|
164
170
|
|
|
165
171
|
|
|
166
172
|
## Usage:
|
|
@@ -236,16 +242,14 @@ $ rbql-js --input input.csv --output result.csv
|
|
|
236
242
|
### Main Features
|
|
237
243
|
|
|
238
244
|
* Use JavaScript expressions inside _SELECT_, _UPDATE_, _WHERE_ and _ORDER BY_ statements
|
|
239
|
-
*
|
|
240
|
-
*
|
|
241
|
-
*
|
|
242
|
-
* Each record has a unique NR (record number) identifier
|
|
245
|
+
* Supports multiple input formats
|
|
246
|
+
* Result set of any query immediately becomes a first-class table on its own
|
|
247
|
+
* No need to provide FROM statement in the query when the input table is defined by the current context.
|
|
243
248
|
* Supports all main SQL keywords
|
|
244
249
|
* Supports aggregate functions and GROUP BY queries
|
|
245
|
-
* Provides some new useful query modes which traditional SQL engines do not have
|
|
246
|
-
* Supports both _TOP_ and _LIMIT_ keywords
|
|
247
250
|
* Supports user-defined functions (UDF)
|
|
248
|
-
*
|
|
251
|
+
* Provides some new useful query modes which traditional SQL engines do not have
|
|
252
|
+
* Lightweight, dependency-free, works out of the box
|
|
249
253
|
|
|
250
254
|
#### Limitations:
|
|
251
255
|
|
|
@@ -284,24 +288,15 @@ RBQL for CSV files provides the following variables which you can use in your qu
|
|
|
284
288
|
Description: Number of fields in the current record
|
|
285
289
|
* _a.name_, _b.Person_age_, ... _a.{Good_alphanumeric_column_name}_
|
|
286
290
|
Variable type: **string**
|
|
287
|
-
Description: Value of the field referenced by it's "name". You can use this notation if the field in the
|
|
291
|
+
Description: Value of the field referenced by it's "name". You can use this notation if the field in the header has a "good" alphanumeric name
|
|
288
292
|
* _a["object id"]_, _a['9.12341234']_, _b["%$ !! 10 20"]_ ... _a["Arbitrary column name!"]_
|
|
289
293
|
Variable type: **string**
|
|
290
|
-
Description: Value of the field referenced by it's "name". You can use this notation to reference fields by arbitrary values in the
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
#### Notes:
|
|
294
|
-
* You can mix all variable types in a single query, example:
|
|
295
|
-
```select a1, b2 JOIN /path/to/b.csv ON a['Item Id'] == b.Identifier WHERE NR > 1 and parseInt(a.Weight) * 100 > parseInt(b["weight of the item"])```
|
|
296
|
-
* Referencing fields by header names does not automatically skip the header line (you can use `where NR > 1` trick to skip it)
|
|
297
|
-
* If you want to use RBQL as a library for your own app you can define your own custom variables and do not have to support the above mentioned CSV-related variables.
|
|
294
|
+
Description: Value of the field referenced by it's "name". You can use this notation to reference fields by arbitrary values in the header
|
|
298
295
|
|
|
299
296
|
|
|
300
297
|
### UPDATE statement
|
|
301
298
|
|
|
302
|
-
_UPDATE_ query produces a new table where original values are replaced according to the UPDATE expression, so it can also be considered a special type of SELECT query.
|
|
303
|
-
_UPDATE SET_ is synonym to _UPDATE_, because in RBQL there is no need to specify the source table.
|
|
304
|
-
|
|
299
|
+
_UPDATE_ query produces a new table where original values are replaced according to the UPDATE expression, so it can also be considered a special type of SELECT query.
|
|
305
300
|
|
|
306
301
|
### Aggregate functions and queries
|
|
307
302
|
|
|
@@ -316,11 +311,11 @@ There is a workaround for the limitation above for _ARRAY_AGG_ function which su
|
|
|
316
311
|
|
|
317
312
|
### JOIN statements
|
|
318
313
|
|
|
319
|
-
Join table B can be referenced either by
|
|
320
|
-
RBQL supports _STRICT LEFT JOIN_ which is like _LEFT JOIN_, but generates an error if any key in left table "A" doesn't have exactly one matching key in the right table "B".
|
|
314
|
+
Join table B can be referenced either by its file path or by its name - an arbitrary string which the user should provide before executing the JOIN query.
|
|
315
|
+
RBQL supports _STRICT LEFT JOIN_ which is like _LEFT JOIN_, but generates an error if any key in the left table "A" doesn't have exactly one matching key in the right table "B".
|
|
316
|
+
Table B path can be either relative to the working dir, relative to the main table or absolute.
|
|
321
317
|
Limitation: _JOIN_ statements can't contain JavaScript expressions and must have the following form: _<JOIN\_KEYWORD> (/path/to/table.tsv | table_name ) ON a... == b... [AND a... == b... [AND ... ]]_
|
|
322
318
|
|
|
323
|
-
|
|
324
319
|
### SELECT EXCEPT statement
|
|
325
320
|
|
|
326
321
|
SELECT EXCEPT can be used to select everything except specific columns. E.g. to select everything but columns 2 and 4, run: `SELECT * EXCEPT a2, a4`
|
|
@@ -337,6 +332,12 @@ RBQL does not support LIKE operator, instead it provides "like()" function which
|
|
|
337
332
|
`SELECT * where like(a1, 'foo%bar')`
|
|
338
333
|
|
|
339
334
|
|
|
335
|
+
### WITH (header) and WITH (noheader) statements
|
|
336
|
+
You can set whether the input (and join) CSV file has a header or not using the environment configuration parameters which could be `--with_headers` CLI flag or GUI checkbox or something else.
|
|
337
|
+
But it is also possible to override this selection directly in the query by adding either `WITH (header)` or `WITH (noheader)` statement at the end of the query.
|
|
338
|
+
Example: `select top 5 NR, * with (header)`
|
|
339
|
+
|
|
340
|
+
|
|
340
341
|
### User Defined Functions (UDF)
|
|
341
342
|
|
|
342
343
|
RBQL supports User Defined Functions
|
|
@@ -346,8 +347,8 @@ You can define custom functions and/or import libraries in a special file: `~/.r
|
|
|
346
347
|
## Examples of RBQL queries
|
|
347
348
|
|
|
348
349
|
* `select top 100 a1, a2 * 10, a4.length where a1 == "Buy" order by parseInt(a2) desc`
|
|
349
|
-
* `select * order by Math.random()
|
|
350
|
-
* `select top 20 a.vehicle_price.length / 10, a2 where
|
|
350
|
+
* `select * order by Math.random()` - random sort
|
|
351
|
+
* `select top 20 a.vehicle_price.length / 10, a2 where parseInt(a.vehicle_price) < 500 && ["car", "plane", "boat"].indexOf(a['Vehicle type']) > -1 limit 20` - referencing columns by names from header
|
|
351
352
|
* `update set a3 = 'NPC' where a3.indexOf('Non-playable character') != -1`
|
|
352
353
|
* `select NR, *` - enumerate records, NR is 1-based
|
|
353
354
|
* `select a1, b1, b2 inner join ./countries.txt on a2 == b1 order by a1, a3` - example of join query
|
|
@@ -355,16 +356,6 @@ You can define custom functions and/or import libraries in a special file: `~/.r
|
|
|
355
356
|
* `select ...a1.split(':')` - Using JS "destructuring assignment" syntax to split one column into many. Do not try this with other SQL engines!
|
|
356
357
|
|
|
357
358
|
|
|
358
|
-
### FAQ
|
|
359
|
-
|
|
360
|
-
#### How do I skip header record in CSV files?
|
|
361
|
-
|
|
362
|
-
You can use the following trick: add `... where NR > 1 ...` to your query
|
|
363
|
-
|
|
364
|
-
And if you are doing math operation you can modify your query like this, example:
|
|
365
|
-
`select parseInt(a3) * 1000, a2` -> `select NR > 1 ? parseInt(a3) * 1000 : a3, a2`
|
|
366
|
-
|
|
367
|
-
|
|
368
359
|
### References
|
|
369
360
|
|
|
370
361
|
* [RBQL: Official Site](https://rbql.org/)
|
package/cli_rbql.js
CHANGED
|
@@ -158,7 +158,7 @@ async function autodetect_delim_policy(table_path) {
|
|
|
158
158
|
}
|
|
159
159
|
|
|
160
160
|
|
|
161
|
-
function print_colorized(records, delim, show_column_names,
|
|
161
|
+
function print_colorized(records, delim, show_column_names, with_headers) {
|
|
162
162
|
let reset_color_code = '\x1b[0m';
|
|
163
163
|
let color_codes = ['\x1b[0m', '\x1b[31m', '\x1b[32m', '\x1b[33m', '\x1b[34m', '\x1b[35m', '\x1b[36m', '\x1b[31;1m', '\x1b[32;1m', '\x1b[33;1m'];
|
|
164
164
|
for (let r = 0; r < records.length; r++) {
|
|
@@ -166,7 +166,7 @@ function print_colorized(records, delim, show_column_names, skip_header) {
|
|
|
166
166
|
for (let c = 0; c < records[r].length; c++) {
|
|
167
167
|
let color_code = color_codes[c % color_codes.length];
|
|
168
168
|
let field = records[r][c];
|
|
169
|
-
let colored_field = (!show_column_names || (
|
|
169
|
+
let colored_field = (!show_column_names || (with_headers && r == 0)) ? color_code + field : `${color_code}a${c + 1}:${field}`;
|
|
170
170
|
out_fields.push(colored_field);
|
|
171
171
|
}
|
|
172
172
|
let out_line = out_fields.join(delim) + reset_color_code;
|
|
@@ -208,7 +208,7 @@ async function run_with_js(args) {
|
|
|
208
208
|
var input_path = get_default(args, 'input', null);
|
|
209
209
|
var output_path = get_default(args, 'output', null);
|
|
210
210
|
var csv_encoding = args['encoding'];
|
|
211
|
-
var
|
|
211
|
+
var with_headers = args['with-headers'];
|
|
212
212
|
var comment_prefix = args['comment-prefix'];
|
|
213
213
|
var output_delim = get_default(args, 'out-delim', null);
|
|
214
214
|
var output_policy = get_default(args, 'out-policy', null);
|
|
@@ -229,8 +229,8 @@ async function run_with_js(args) {
|
|
|
229
229
|
// * binary/latin-1 do not require the decoder anyway
|
|
230
230
|
// * This is CLI so no way we are in the Electron environment which can't use the TextDecoder
|
|
231
231
|
// * Streaming mode works a little faster (since we don't need to do the manual validation)
|
|
232
|
-
// TODO check if the current node installation doesn't have ICU enabled and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
|
|
233
|
-
await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings,
|
|
232
|
+
// TODO check if the current node installation doesn't have ICU enabled (which is typicaly provided by Node.js by default, see https://nodejs.org/api/intl.html) and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
|
|
233
|
+
await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code/*, {'bulk_read': true}*/);
|
|
234
234
|
await handle_query_success(warnings, output_path, csv_encoding, output_delim, output_policy);
|
|
235
235
|
return true;
|
|
236
236
|
} catch (e) {
|
|
@@ -250,11 +250,11 @@ function get_default_output_path(input_path, delim) {
|
|
|
250
250
|
}
|
|
251
251
|
|
|
252
252
|
|
|
253
|
-
async function show_preview(input_path, encoding, delim, policy,
|
|
253
|
+
async function show_preview(input_path, encoding, delim, policy, with_headers) {
|
|
254
254
|
let [records, warnings] = await sample_records(input_path, encoding, delim, policy);
|
|
255
255
|
console.log('Input table preview:');
|
|
256
256
|
console.log('====================================');
|
|
257
|
-
print_colorized(records, delim, true,
|
|
257
|
+
print_colorized(records, delim, true, with_headers);
|
|
258
258
|
console.log('====================================\n');
|
|
259
259
|
for (let warning of warnings) {
|
|
260
260
|
show_warning(warning);
|
|
@@ -280,7 +280,7 @@ async function run_interactive_loop(args) {
|
|
|
280
280
|
if (!delim)
|
|
281
281
|
throw new GenericError('Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option');
|
|
282
282
|
}
|
|
283
|
-
await show_preview(input_path, args['encoding'], delim, policy, args['
|
|
283
|
+
await show_preview(input_path, args['encoding'], delim, policy, args['with-headers']);
|
|
284
284
|
args.delim = delim;
|
|
285
285
|
args.policy = policy;
|
|
286
286
|
if (!args.output) {
|
|
@@ -365,7 +365,7 @@ function main() {
|
|
|
365
365
|
'--output': {'help': 'Write output table to FILE instead of stdout', 'metavar': 'FILE'},
|
|
366
366
|
'--delim': {'help': 'Delimiter character or multicharacter string, e.g. "," or "###". Can be autodetected in interactive mode', 'metavar': 'DELIM'},
|
|
367
367
|
'--policy': {'help': 'Split policy, see the explanation below. Supported values: "simple", "quoted", "quoted_rfc", "whitespace", "monocolumn". Can be autodetected in interactive mode', 'metavar': 'POLICY'},
|
|
368
|
-
'--
|
|
368
|
+
'--with-headers': {'boolean': true, 'help': 'Indicates that input (and join) table has header'},
|
|
369
369
|
'--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"', 'metavar': 'PREFIX'},
|
|
370
370
|
'--encoding': {'default': 'utf-8', 'help': 'Manually set csv encoding', 'metavar': 'ENCODING'},
|
|
371
371
|
'--out-format': {'default': 'input', 'help': 'Output format. Supported values: ' + out_format_names.map(v => `"${v}"`).join(', '), 'metavar': 'FORMAT'},
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rbql",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.25.0",
|
|
4
4
|
"description": "Rainbow Query Language",
|
|
5
5
|
"keywords": ["CSV", "TSV", "spreadsheet", "SQL", "SQL-like", "transpiler", "CLI", "command-line", "library", "browser", "Node", "select", "update", "join"],
|
|
6
6
|
"scripts": {
|
package/rbql.js
CHANGED
|
@@ -66,11 +66,117 @@ class RBQLContext {
|
|
|
66
66
|
}
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
-
var query_context = null; // Needs to be global for MIN(), MAX(), etc functions
|
|
69
|
+
var query_context = null; // Needs to be global for MIN(), MAX(), etc functions. TODO find a way to make it local.
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs';
|
|
73
|
-
const RBQL_VERSION = '0.
|
|
73
|
+
const RBQL_VERSION = '0.25.0';
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
function check_if_brackets_match(opening_bracket, closing_bracket) {
|
|
77
|
+
return (opening_bracket == '[' && closing_bracket == ']') || (opening_bracket == '(' && closing_bracket == ')') || (opening_bracket == '{' && closing_bracket == '}');
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
function parse_root_bracket_level_text_spans(select_expression) {
|
|
82
|
+
let text_spans = []; // parts of text separated by commas at the root parenthesis level
|
|
83
|
+
let last_pos = 0;
|
|
84
|
+
let bracket_stack = [];
|
|
85
|
+
for (let i = 0; i < select_expression.length; i++) {
|
|
86
|
+
let cur_char = select_expression[i];
|
|
87
|
+
if (cur_char == ',' && bracket_stack.length == 0) {
|
|
88
|
+
text_spans.push(select_expression.substring(last_pos, i));
|
|
89
|
+
last_pos = i + 1;
|
|
90
|
+
} else if (['[', '{', '('].indexOf(cur_char) != -1) {
|
|
91
|
+
bracket_stack.push(cur_char);
|
|
92
|
+
} else if ([']', '}', ')'].indexOf(cur_char) != -1) {
|
|
93
|
+
if (bracket_stack.length && check_if_brackets_match(bracket_stack[bracket_stack.length - 1], cur_char)) {
|
|
94
|
+
bracket_stack.pop();
|
|
95
|
+
} else {
|
|
96
|
+
throw new RbqlParsingError(`Unable to parse column headers in SELECT expression: No matching opening bracket for closing "${cur_char}"`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
if (bracket_stack.length) {
|
|
101
|
+
throw new RbqlParsingError(`Unable to parse column headers in SELECT expression: No matching closing bracket for opening "${bracket_stack[0]}"`);
|
|
102
|
+
}
|
|
103
|
+
text_spans.push(select_expression.substring(last_pos, select_expression.length));
|
|
104
|
+
text_spans = text_spans.map(span => span.trim());
|
|
105
|
+
return text_spans;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
function unquote_string(quoted_str) {
|
|
110
|
+
// It's possible to use eval here to unqoute the quoted_column_name, but it would be a little barbaric, let's do it manually instead
|
|
111
|
+
if (!quoted_str || quoted_str.length < 2)
|
|
112
|
+
return null;
|
|
113
|
+
if (quoted_str[0] == "'" && quoted_str[quoted_str.length - 1] == "'") {
|
|
114
|
+
return quoted_str.substring(1, quoted_str.length - 1).replace(/\\'/g, "'").replace(/\\\\/g, "\\");
|
|
115
|
+
} else if (quoted_str[0] == '"' && quoted_str[quoted_str.length - 1] == '"') {
|
|
116
|
+
return quoted_str.substring(1, quoted_str.length - 1).replace(/\\"/g, '"').replace(/\\\\/g, "\\");
|
|
117
|
+
} else {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
function column_info_from_text_span(text_span, string_literals) {
|
|
124
|
+
// This function is a rough equivalent of "column_info_from_node()" function in python version of RBQL
|
|
125
|
+
text_span = text_span.trim();
|
|
126
|
+
let rbql_star_marker = '__RBQL_INTERNAL_STAR';
|
|
127
|
+
let simple_var_match = /^[_a-zA-Z][_a-zA-Z0-9]*$/.exec(text_span);
|
|
128
|
+
let attribute_match = /^([ab])\.([_a-zA-Z][_a-zA-Z0-9]*)$/.exec(text_span);
|
|
129
|
+
let subscript_int_match = /^([ab])\[([0-9]+)\]$/.exec(text_span);
|
|
130
|
+
let subscript_str_match = /^([ab])\[___RBQL_STRING_LITERAL([0-9]+)___\]$/.exec(text_span);
|
|
131
|
+
if (simple_var_match !== null) {
|
|
132
|
+
if (text_span == rbql_star_marker)
|
|
133
|
+
return {table_name: null, column_index: null, column_name: null, is_star: true};
|
|
134
|
+
if (text_span.startsWith('___RBQL_STRING_LITERAL'))
|
|
135
|
+
return null;
|
|
136
|
+
let match = /^([ab])([0-9]+)$/.exec(text_span);
|
|
137
|
+
if (match !== null) {
|
|
138
|
+
return {table_name: match[1], column_index: parseInt(match[2]) - 1, column_name: null, is_star: false};
|
|
139
|
+
}
|
|
140
|
+
// Some examples for this branch: NR, NF
|
|
141
|
+
return {table_name: null, column_index: null, column_name: text_span, is_star: false};
|
|
142
|
+
} else if (attribute_match !== null) {
|
|
143
|
+
let table_name = attribute_match[1];
|
|
144
|
+
let column_name = attribute_match[2];
|
|
145
|
+
if (column_name == rbql_star_marker) {
|
|
146
|
+
return {table_name: table_name, column_index: null, column_name: null, is_star: true};
|
|
147
|
+
}
|
|
148
|
+
return {table_name: null, column_index: null, column_name: column_name, is_star: false};
|
|
149
|
+
} else if (subscript_int_match != null) {
|
|
150
|
+
let table_name = subscript_int_match[1];
|
|
151
|
+
let column_index = parseInt(subscript_int_match[2]) - 1;
|
|
152
|
+
return {table_name: table_name, column_index: column_index, column_name: null, is_star: false};
|
|
153
|
+
} else if (subscript_str_match != null) {
|
|
154
|
+
let table_name = subscript_str_match[1];
|
|
155
|
+
let replaced_string_literal_id = subscript_str_match[2];
|
|
156
|
+
if (replaced_string_literal_id < string_literals.length) {
|
|
157
|
+
let quoted_column_name = string_literals[replaced_string_literal_id];
|
|
158
|
+
let unquoted_column_name = unquote_string(quoted_column_name);
|
|
159
|
+
if (unquoted_column_name !== null && unquoted_column_name !== undefined) {
|
|
160
|
+
return {table_name: null, column_index: null, column_name: unquoted_column_name, is_star: false};
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return null;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
function adhoc_parse_select_expression_to_column_infos(select_expression, string_literals) {
|
|
169
|
+
// It is acceptable for the algorithm to provide null column name when it could be theorethically possible to deduce the name.
|
|
170
|
+
// I.e. this algorithm guarantees precision but doesn't guarantee completeness in all theorethically possible queries.
|
|
171
|
+
// Although the algorithm should be complete in all practical scenarios, i.e. it should be hard to come up with the query that doesn't produce complete set of column names.
|
|
172
|
+
// The null column name just means that the output column will be named as col{i}, so the failure to detect the proper column name can be tolerated.
|
|
173
|
+
// Specifically this function guarantees the following:
|
|
174
|
+
// 1. The number of column_infos is correct and will match the number of fields in each record in the output - otherwise the exception should be thrown
|
|
175
|
+
// 2. If column_info at pos j is not null, it is guaranteed to correctly represent that column name in the output
|
|
176
|
+
let text_spans = parse_root_bracket_level_text_spans(select_expression);
|
|
177
|
+
let column_infos = text_spans.map(ts => column_info_from_text_span(ts, string_literals));
|
|
178
|
+
return column_infos;
|
|
179
|
+
}
|
|
74
180
|
|
|
75
181
|
|
|
76
182
|
function stable_compare(a, b) {
|
|
@@ -469,10 +575,10 @@ class TopWriter {
|
|
|
469
575
|
this.top_count = top_count;
|
|
470
576
|
}
|
|
471
577
|
|
|
472
|
-
write(record) {
|
|
578
|
+
async write(record) {
|
|
473
579
|
if (this.top_count !== null && this.NW >= this.top_count)
|
|
474
580
|
return false;
|
|
475
|
-
this.subwriter.write(record);
|
|
581
|
+
await this.subwriter.write(record);
|
|
476
582
|
this.NW += 1;
|
|
477
583
|
return true;
|
|
478
584
|
}
|
|
@@ -489,10 +595,10 @@ class UniqWriter {
|
|
|
489
595
|
this.seen = new Set();
|
|
490
596
|
}
|
|
491
597
|
|
|
492
|
-
write(record) {
|
|
598
|
+
async write(record) {
|
|
493
599
|
if (!add_to_set(this.seen, JSON.stringify(record)))
|
|
494
600
|
return true;
|
|
495
|
-
if (!this.subwriter.write(record))
|
|
601
|
+
if (!await this.subwriter.write(record))
|
|
496
602
|
return false;
|
|
497
603
|
return true;
|
|
498
604
|
}
|
|
@@ -509,7 +615,7 @@ class UniqCountWriter {
|
|
|
509
615
|
this.records = new Map();
|
|
510
616
|
}
|
|
511
617
|
|
|
512
|
-
write(record) {
|
|
618
|
+
async write(record) {
|
|
513
619
|
var key = JSON.stringify(record);
|
|
514
620
|
var old_val = this.records.get(key);
|
|
515
621
|
if (old_val) {
|
|
@@ -524,7 +630,7 @@ class UniqCountWriter {
|
|
|
524
630
|
for (var [key, value] of this.records) {
|
|
525
631
|
let [count, record] = value;
|
|
526
632
|
record.unshift(count);
|
|
527
|
-
if (!this.subwriter.write(record))
|
|
633
|
+
if (!await this.subwriter.write(record))
|
|
528
634
|
break;
|
|
529
635
|
}
|
|
530
636
|
await this.subwriter.finish();
|
|
@@ -539,7 +645,7 @@ class SortedWriter {
|
|
|
539
645
|
this.unsorted_entries = [];
|
|
540
646
|
}
|
|
541
647
|
|
|
542
|
-
write(stable_entry) {
|
|
648
|
+
async write(stable_entry) {
|
|
543
649
|
this.unsorted_entries.push(stable_entry);
|
|
544
650
|
return true;
|
|
545
651
|
}
|
|
@@ -551,7 +657,7 @@ class SortedWriter {
|
|
|
551
657
|
unsorted_entries.reverse();
|
|
552
658
|
for (var i = 0; i < unsorted_entries.length; i++) {
|
|
553
659
|
var entry = unsorted_entries[i];
|
|
554
|
-
if (!this.subwriter.write(entry[entry.length - 1]))
|
|
660
|
+
if (!await this.subwriter.write(entry[entry.length - 1]))
|
|
555
661
|
break;
|
|
556
662
|
}
|
|
557
663
|
await this.subwriter.finish();
|
|
@@ -575,7 +681,7 @@ class AggregateWriter {
|
|
|
575
681
|
for (var ag of this.aggregators) {
|
|
576
682
|
out_fields.push(ag.get_final(key));
|
|
577
683
|
}
|
|
578
|
-
if (!this.subwriter.write(out_fields))
|
|
684
|
+
if (!await this.subwriter.write(out_fields))
|
|
579
685
|
break;
|
|
580
686
|
}
|
|
581
687
|
await this.subwriter.finish();
|
|
@@ -635,13 +741,13 @@ function select_except(src, except_fields) {
|
|
|
635
741
|
}
|
|
636
742
|
|
|
637
743
|
|
|
638
|
-
function select_simple(sort_key, NR, out_fields) {
|
|
744
|
+
async function select_simple(sort_key, NR, out_fields) {
|
|
639
745
|
if (query_context.sort_key_expression !== null) {
|
|
640
746
|
var sort_entry = sort_key.concat([NR, out_fields]);
|
|
641
|
-
if (!query_context.writer.write(sort_entry))
|
|
747
|
+
if (!await query_context.writer.write(sort_entry))
|
|
642
748
|
return false;
|
|
643
749
|
} else {
|
|
644
|
-
if (!query_context.writer.write(out_fields))
|
|
750
|
+
if (!await query_context.writer.write(out_fields))
|
|
645
751
|
return false;
|
|
646
752
|
}
|
|
647
753
|
return true;
|
|
@@ -683,12 +789,12 @@ function select_aggregated(key, transparent_values) {
|
|
|
683
789
|
}
|
|
684
790
|
|
|
685
791
|
|
|
686
|
-
function select_unnested(sort_key, NR, folded_fields) {
|
|
792
|
+
async function select_unnested(sort_key, NR, folded_fields) {
|
|
687
793
|
let out_fields = folded_fields.slice();
|
|
688
794
|
let unnest_pos = folded_fields.findIndex(val => val instanceof UnnestMarker);
|
|
689
795
|
for (var i = 0; i < query_context.unnest_list.length; i++) {
|
|
690
796
|
out_fields[unnest_pos] = query_context.unnest_list[i];
|
|
691
|
-
if (!select_simple(sort_key, NR, out_fields.slice()))
|
|
797
|
+
if (!await select_simple(sort_key, NR, out_fields.slice()))
|
|
692
798
|
return false;
|
|
693
799
|
}
|
|
694
800
|
return true;
|
|
@@ -705,10 +811,10 @@ if (__RBQLMP__where_expression) {
|
|
|
705
811
|
} else {
|
|
706
812
|
let sort_key = [__RBQLMP__sort_key_expression];
|
|
707
813
|
if (query_context.unnest_list !== null) {
|
|
708
|
-
if (!select_unnested(sort_key, NR, out_fields))
|
|
814
|
+
if (!await select_unnested(sort_key, NR, out_fields))
|
|
709
815
|
stop_flag = true;
|
|
710
816
|
} else {
|
|
711
|
-
if (!select_simple(sort_key, NR, out_fields))
|
|
817
|
+
if (!await select_simple(sort_key, NR, out_fields))
|
|
712
818
|
stop_flag = true;
|
|
713
819
|
}
|
|
714
820
|
}
|
|
@@ -749,7 +855,7 @@ if (join_matches.length == 1 && (__RBQLMP__where_expression)) {
|
|
|
749
855
|
NU += 1;
|
|
750
856
|
__RBQLMP__update_expressions
|
|
751
857
|
}
|
|
752
|
-
if (!query_context.writer.write(up_fields))
|
|
858
|
+
if (!await query_context.writer.write(up_fields))
|
|
753
859
|
stop_flag = true;
|
|
754
860
|
`;
|
|
755
861
|
|
|
@@ -761,7 +867,7 @@ if (__RBQLMP__where_expression) {
|
|
|
761
867
|
NU += 1;
|
|
762
868
|
__RBQLMP__update_expressions
|
|
763
869
|
}
|
|
764
|
-
if (!query_context.writer.write(up_fields))
|
|
870
|
+
if (!await query_context.writer.write(up_fields))
|
|
765
871
|
stop_flag = true;
|
|
766
872
|
`;
|
|
767
873
|
|
|
@@ -866,7 +972,7 @@ async function compile_and_run(query_context) {
|
|
|
866
972
|
if (lower_case_query.indexOf(' like ') != -1)
|
|
867
973
|
throw new SyntaxError(e.message + "\nRBQL doesn't support \"LIKE\" operator, use like() function instead e.g. ... WHERE like(a1, 'foo%bar') ... "); // UT JSON
|
|
868
974
|
if (lower_case_query.indexOf(' from ') != -1)
|
|
869
|
-
throw new SyntaxError(e.message + "\
|
|
975
|
+
throw new SyntaxError(e.message + "\nTip: If input table is defined by the environment, RBQL query should not have \"FROM\" keyword"); // UT JSON
|
|
870
976
|
if (e && e.message && String(e.message).toLowerCase().indexOf('unexpected identifier') != -1) {
|
|
871
977
|
if (lower_case_query.indexOf(' and ') != -1)
|
|
872
978
|
throw new SyntaxError(e.message + "\nDid you use 'and' keyword in your query?\nJavaScript backend doesn't support 'and' keyword, use '&&' operator instead!");
|
|
@@ -893,6 +999,7 @@ const ORDER_BY = 'ORDER BY';
|
|
|
893
999
|
const WHERE = 'WHERE';
|
|
894
1000
|
const LIMIT = 'LIMIT';
|
|
895
1001
|
const EXCEPT = 'EXCEPT';
|
|
1002
|
+
const WITH = 'WITH';
|
|
896
1003
|
|
|
897
1004
|
|
|
898
1005
|
function get_ambiguous_error_msg(variable_name) {
|
|
@@ -925,7 +1032,7 @@ function strip_comments(cline) {
|
|
|
925
1032
|
|
|
926
1033
|
function combine_string_literals(backend_expression, string_literals) {
|
|
927
1034
|
for (var i = 0; i < string_literals.length; i++) {
|
|
928
|
-
backend_expression = replace_all(backend_expression,
|
|
1035
|
+
backend_expression = replace_all(backend_expression, `___RBQL_STRING_LITERAL${i}___`, string_literals[i]);
|
|
929
1036
|
}
|
|
930
1037
|
return backend_expression;
|
|
931
1038
|
}
|
|
@@ -1172,6 +1279,24 @@ function replace_star_vars(rbql_expression) {
|
|
|
1172
1279
|
}
|
|
1173
1280
|
|
|
1174
1281
|
|
|
1282
|
+
function replace_star_vars_for_header_parsing(rbql_expression) {
|
|
1283
|
+
let star_rgx = /(?:(?<=^)|(?<=,)) *(\*|a\.\*|b\.\*) *(?=$|,)/g;
|
|
1284
|
+
let matches = get_all_matches(star_rgx, rbql_expression);
|
|
1285
|
+
let last_pos = 0;
|
|
1286
|
+
let result = '';
|
|
1287
|
+
for (let match of matches) {
|
|
1288
|
+
let star_expression = match[1];
|
|
1289
|
+
let replacement_expression = {'*': '__RBQL_INTERNAL_STAR', 'a.*': 'a.__RBQL_INTERNAL_STAR', 'b.*': 'b.__RBQL_INTERNAL_STAR'}[star_expression];
|
|
1290
|
+
if (last_pos < match.index)
|
|
1291
|
+
result += rbql_expression.substring(last_pos, match.index);
|
|
1292
|
+
result += replacement_expression;
|
|
1293
|
+
last_pos = match.index + match[0].length;
|
|
1294
|
+
}
|
|
1295
|
+
result += rbql_expression.substring(last_pos);
|
|
1296
|
+
return result;
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
|
|
1175
1300
|
function translate_update_expression(update_expression, input_variables_map, string_literals, indent) {
|
|
1176
1301
|
let first_assignment = str_strip(update_expression.split('=')[0]);
|
|
1177
1302
|
let first_assignment_error = `Unable to parse "UPDATE" expression: the expression must start with assignment, but "${first_assignment}" does not look like an assignable field name`;
|
|
@@ -1203,12 +1328,12 @@ function translate_update_expression(update_expression, input_variables_map, str
|
|
|
1203
1328
|
|
|
1204
1329
|
|
|
1205
1330
|
function translate_select_expression(select_expression) {
|
|
1206
|
-
|
|
1207
|
-
translated = replace_star_vars(
|
|
1208
|
-
|
|
1331
|
+
let expression_without_stars = replace_star_count(select_expression);
|
|
1332
|
+
let translated = str_strip(replace_star_vars(expression_without_stars));
|
|
1333
|
+
let translated_for_header = str_strip(replace_star_vars_for_header_parsing(expression_without_stars));
|
|
1209
1334
|
if (!translated.length)
|
|
1210
1335
|
throw new RbqlParsingError('"SELECT" expression is empty');
|
|
1211
|
-
return `[].concat([${translated}])
|
|
1336
|
+
return [`[].concat([${translated}])`, translated_for_header];
|
|
1212
1337
|
}
|
|
1213
1338
|
|
|
1214
1339
|
|
|
@@ -1225,7 +1350,7 @@ function separate_string_literals(rbql_expression) {
|
|
|
1225
1350
|
string_literals.push(string_literal);
|
|
1226
1351
|
var start_index = match_obj.index;
|
|
1227
1352
|
format_parts.push(rbql_expression.substring(idx_before, start_index));
|
|
1228
|
-
format_parts.push(
|
|
1353
|
+
format_parts.push(`___RBQL_STRING_LITERAL${literal_id}___`);
|
|
1229
1354
|
idx_before = rgx.lastIndex;
|
|
1230
1355
|
}
|
|
1231
1356
|
format_parts.push(rbql_expression.substring(idx_before));
|
|
@@ -1269,8 +1394,13 @@ function locate_statements(rbql_expression) {
|
|
|
1269
1394
|
|
|
1270
1395
|
function separate_actions(rbql_expression) {
|
|
1271
1396
|
rbql_expression = str_strip(rbql_expression);
|
|
1272
|
-
var ordered_statements = locate_statements(rbql_expression);
|
|
1273
1397
|
var result = {};
|
|
1398
|
+
let with_match = /^(.*) *[Ww][Ii][Tt][Hh] *\(([a-z]{4,20})\) *$/.exec(rbql_expression);
|
|
1399
|
+
if (with_match !== null) {
|
|
1400
|
+
rbql_expression = with_match[1];
|
|
1401
|
+
result[WITH] = with_match[2];
|
|
1402
|
+
}
|
|
1403
|
+
var ordered_statements = locate_statements(rbql_expression);
|
|
1274
1404
|
for (var i = 0; i < ordered_statements.length; i++) {
|
|
1275
1405
|
var statement_start = ordered_statements[i][0];
|
|
1276
1406
|
var span_start = ordered_statements[i][1];
|
|
@@ -1305,7 +1435,7 @@ function separate_actions(rbql_expression) {
|
|
|
1305
1435
|
if (statement == SELECT) {
|
|
1306
1436
|
if (statement_start != 0)
|
|
1307
1437
|
throw new RbqlParsingError('SELECT keyword must be at the beginning of the query');
|
|
1308
|
-
|
|
1438
|
+
let match = /^ *TOP *([0-9]+) /i.exec(span);
|
|
1309
1439
|
if (match !== null) {
|
|
1310
1440
|
statement_params['top'] = parseInt(match[1]);
|
|
1311
1441
|
span = span.substr(match.index + match[0].length);
|
|
@@ -1347,7 +1477,7 @@ function find_top(rb_actions) {
|
|
|
1347
1477
|
}
|
|
1348
1478
|
|
|
1349
1479
|
|
|
1350
|
-
function translate_except_expression(except_expression, input_variables_map, string_literals) {
|
|
1480
|
+
function translate_except_expression(except_expression, input_variables_map, string_literals, input_header) {
|
|
1351
1481
|
let skip_vars = except_expression.split(',');
|
|
1352
1482
|
skip_vars = skip_vars.map(str_strip);
|
|
1353
1483
|
let skip_indices = [];
|
|
@@ -1358,8 +1488,9 @@ function translate_except_expression(except_expression, input_variables_map, str
|
|
|
1358
1488
|
skip_indices.push(input_variables_map[var_name].index);
|
|
1359
1489
|
}
|
|
1360
1490
|
skip_indices = skip_indices.sort((a, b) => a - b);
|
|
1491
|
+
let output_header = input_header === null ? null : select_except(input_header, skip_indices);
|
|
1361
1492
|
let indices_str = skip_indices.join(',');
|
|
1362
|
-
return `select_except(record_a, [${indices_str}])
|
|
1493
|
+
return [output_header, `select_except(record_a, [${indices_str}])`];
|
|
1363
1494
|
}
|
|
1364
1495
|
|
|
1365
1496
|
|
|
@@ -1428,7 +1559,7 @@ class HashJoinMap {
|
|
|
1428
1559
|
|
|
1429
1560
|
|
|
1430
1561
|
function cleanup_query(query_text) {
|
|
1431
|
-
return query_text.split('\n').map(strip_comments).filter(line => line.length).join(' ');
|
|
1562
|
+
return query_text.split('\n').map(strip_comments).filter(line => line.length).join(' ').replace(/;+$/g, '');
|
|
1432
1563
|
}
|
|
1433
1564
|
|
|
1434
1565
|
|
|
@@ -1439,6 +1570,44 @@ function remove_redundant_table_name(query_text) {
|
|
|
1439
1570
|
}
|
|
1440
1571
|
|
|
1441
1572
|
|
|
1573
|
+
function select_output_header(input_header, join_header, query_column_infos) {
|
|
1574
|
+
if (input_header === null && join_header === null)
|
|
1575
|
+
return null;
|
|
1576
|
+
if (input_header === null)
|
|
1577
|
+
input_header = [];
|
|
1578
|
+
if (join_header === null)
|
|
1579
|
+
join_header = [];
|
|
1580
|
+
let output_header = [];
|
|
1581
|
+
for (let qci of query_column_infos) {
|
|
1582
|
+
// TODO refactor this and python version: extract this code into a function instead to always return something
|
|
1583
|
+
if (qci === null) {
|
|
1584
|
+
output_header.push('col' + (output_header.length + 1));
|
|
1585
|
+
} else if (qci.is_star) {
|
|
1586
|
+
if (qci.table_name === null) {
|
|
1587
|
+
output_header = output_header.concat(input_header).concat(join_header);
|
|
1588
|
+
} else if (qci.table_name === 'a') {
|
|
1589
|
+
output_header = output_header.concat(input_header);
|
|
1590
|
+
} else if (qci.table_name === 'b') {
|
|
1591
|
+
output_header = output_header.concat(join_header);
|
|
1592
|
+
}
|
|
1593
|
+
} else if (qci.column_name !== null) {
|
|
1594
|
+
output_header.push(qci.column_name);
|
|
1595
|
+
} else if (qci.column_index !== null) {
|
|
1596
|
+
if (qci.table_name == 'a' && qci.column_index < input_header.length) {
|
|
1597
|
+
output_header.push(input_header[qci.column_index]);
|
|
1598
|
+
} else if (qci.table_name == 'b' && qci.column_index < join_header.length) {
|
|
1599
|
+
output_header.push(join_header[qci.column_index]);
|
|
1600
|
+
} else {
|
|
1601
|
+
output_header.push('col' + (output_header.length + 1));
|
|
1602
|
+
}
|
|
1603
|
+
} else { // Should never happen
|
|
1604
|
+
output_header.push('col' + (output_header.length + 1));
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
return output_header;
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
|
|
1442
1611
|
function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
|
|
1443
1612
|
let keys = Object.keys(inconsistent_records_info);
|
|
1444
1613
|
let entries = [];
|
|
@@ -1468,16 +1637,22 @@ class RBQLInputIterator {
|
|
|
1468
1637
|
async get_record() {
|
|
1469
1638
|
throw new Error("Unable to call the interface method");
|
|
1470
1639
|
}
|
|
1640
|
+
handle_query_modifier() {
|
|
1641
|
+
return; // Reimplement if you need to handle a boolean query modifier that can be used like this: `SELECT * WITH (modifiername)`
|
|
1642
|
+
}
|
|
1471
1643
|
get_warnings() {
|
|
1472
1644
|
return []; // Reimplement if your class can produce warnings
|
|
1473
1645
|
}
|
|
1646
|
+
async get_header() {
|
|
1647
|
+
return null; // Reimplement if your class can provide input header
|
|
1648
|
+
}
|
|
1474
1649
|
}
|
|
1475
1650
|
|
|
1476
1651
|
|
|
1477
1652
|
class RBQLOutputWriter {
|
|
1478
1653
|
constructor(){}
|
|
1479
1654
|
|
|
1480
|
-
write(fields) {
|
|
1655
|
+
async write(fields) {
|
|
1481
1656
|
throw new Error("Unable to call the interface method");
|
|
1482
1657
|
}
|
|
1483
1658
|
|
|
@@ -1488,6 +1663,10 @@ class RBQLOutputWriter {
|
|
|
1488
1663
|
get_warnings() {
|
|
1489
1664
|
return []; // Reimplement if your class can produce warnings
|
|
1490
1665
|
};
|
|
1666
|
+
|
|
1667
|
+
set_header() {
|
|
1668
|
+
return; // Reimplement if your class can handle output headers in a meaningful way
|
|
1669
|
+
}
|
|
1491
1670
|
}
|
|
1492
1671
|
|
|
1493
1672
|
|
|
@@ -1558,6 +1737,10 @@ class TableIterator extends RBQLInputIterator {
|
|
|
1558
1737
|
return [make_inconsistent_num_fields_warning('input', this.fields_info)];
|
|
1559
1738
|
return [];
|
|
1560
1739
|
};
|
|
1740
|
+
|
|
1741
|
+
async get_header() {
|
|
1742
|
+
return this.column_names;
|
|
1743
|
+
}
|
|
1561
1744
|
}
|
|
1562
1745
|
|
|
1563
1746
|
|
|
@@ -1565,12 +1748,17 @@ class TableWriter extends RBQLOutputWriter {
|
|
|
1565
1748
|
constructor(external_table) {
|
|
1566
1749
|
super();
|
|
1567
1750
|
this.table = external_table;
|
|
1751
|
+
this.header = null;
|
|
1568
1752
|
}
|
|
1569
1753
|
|
|
1570
|
-
write(fields) {
|
|
1754
|
+
async write(fields) {
|
|
1571
1755
|
this.table.push(fields);
|
|
1572
1756
|
return true;
|
|
1573
1757
|
};
|
|
1758
|
+
|
|
1759
|
+
set_header(header) {
|
|
1760
|
+
this.header = header;
|
|
1761
|
+
}
|
|
1574
1762
|
}
|
|
1575
1763
|
|
|
1576
1764
|
|
|
@@ -1595,9 +1783,12 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
|
|
|
1595
1783
|
query_text = cleanup_query(query_text);
|
|
1596
1784
|
var [format_expression, string_literals] = separate_string_literals(query_text);
|
|
1597
1785
|
format_expression = remove_redundant_table_name(format_expression);
|
|
1598
|
-
var input_variables_map = await input_iterator.get_variables_map(query_text);
|
|
1599
1786
|
|
|
1600
1787
|
var rb_actions = separate_actions(format_expression);
|
|
1788
|
+
if (rb_actions.hasOwnProperty(WITH)) {
|
|
1789
|
+
input_iterator.handle_query_modifier(rb_actions[WITH]);
|
|
1790
|
+
}
|
|
1791
|
+
var input_variables_map = await input_iterator.get_variables_map(query_text);
|
|
1601
1792
|
|
|
1602
1793
|
if (rb_actions.hasOwnProperty(ORDER_BY) && rb_actions.hasOwnProperty(UPDATE))
|
|
1603
1794
|
throw new RbqlParsingError('"ORDER BY" is not allowed in "UPDATE" queries');
|
|
@@ -1609,6 +1800,7 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
|
|
|
1609
1800
|
}
|
|
1610
1801
|
|
|
1611
1802
|
let join_variables_map = null;
|
|
1803
|
+
let join_header = null;
|
|
1612
1804
|
if (rb_actions.hasOwnProperty(JOIN)) {
|
|
1613
1805
|
var [rhs_table_id, variable_pairs] = parse_join_expression(rb_actions[JOIN]['text']);
|
|
1614
1806
|
if (join_tables_registry === null)
|
|
@@ -1616,7 +1808,11 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
|
|
|
1616
1808
|
let join_record_iterator = join_tables_registry.get_iterator_by_table_id(rhs_table_id);
|
|
1617
1809
|
if (!join_record_iterator)
|
|
1618
1810
|
throw new RbqlParsingError(`Unable to find join table: "${rhs_table_id}"`);
|
|
1811
|
+
if (rb_actions.hasOwnProperty(WITH)) {
|
|
1812
|
+
join_record_iterator.handle_query_modifier(rb_actions[WITH]);
|
|
1813
|
+
}
|
|
1619
1814
|
join_variables_map = await join_record_iterator.get_variables_map(query_text);
|
|
1815
|
+
join_header = await join_record_iterator.get_header();
|
|
1620
1816
|
let [lhs_variables, rhs_indices] = resolve_join_variables(input_variables_map, join_variables_map, variable_pairs, string_literals);
|
|
1621
1817
|
let sql_join_type = {'JOIN': InnerJoiner, 'INNER JOIN': InnerJoiner, 'LEFT JOIN': LeftJoiner, 'LEFT OUTER JOIN': LeftJoiner, 'STRICT LEFT JOIN': StrictLeftJoiner}[rb_actions[JOIN]['join_subtype']];
|
|
1622
1818
|
query_context.lhs_join_var_expression = lhs_variables.length == 1 ? lhs_variables[0] : 'JSON.stringify([' + lhs_variables.join(',') + '])';
|
|
@@ -1634,26 +1830,33 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
|
|
|
1634
1830
|
query_context.where_expression = combine_string_literals(where_expression, string_literals);
|
|
1635
1831
|
}
|
|
1636
1832
|
|
|
1833
|
+
let input_header = await input_iterator.get_header();
|
|
1637
1834
|
if (rb_actions.hasOwnProperty(UPDATE)) {
|
|
1638
1835
|
var update_expression = translate_update_expression(rb_actions[UPDATE]['text'], input_variables_map, string_literals, ' '.repeat(8));
|
|
1639
1836
|
query_context.update_expressions = combine_string_literals(update_expression, string_literals);
|
|
1837
|
+
query_context.writer.set_header(input_header);
|
|
1640
1838
|
}
|
|
1641
1839
|
|
|
1642
1840
|
if (rb_actions.hasOwnProperty(SELECT)) {
|
|
1643
1841
|
query_context.top_count = find_top(rb_actions);
|
|
1644
|
-
|
|
1842
|
+
if (rb_actions.hasOwnProperty(EXCEPT)) {
|
|
1843
|
+
let [output_header, select_expression] = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals, input_header);
|
|
1844
|
+
query_context.select_expression = select_expression;
|
|
1845
|
+
query_context.writer.set_header(output_header);
|
|
1846
|
+
} else {
|
|
1847
|
+
let [select_expression, select_expression_for_ast] = translate_select_expression(rb_actions[SELECT]['text']);
|
|
1848
|
+
query_context.select_expression = combine_string_literals(select_expression, string_literals);
|
|
1849
|
+
let column_infos = adhoc_parse_select_expression_to_column_infos(select_expression_for_ast, string_literals);
|
|
1850
|
+
let output_header = select_output_header(input_header, join_header, column_infos);
|
|
1851
|
+
query_context.writer.set_header(output_header);
|
|
1852
|
+
}
|
|
1645
1853
|
|
|
1854
|
+
query_context.writer = new TopWriter(query_context.writer, query_context.top_count);
|
|
1646
1855
|
if (rb_actions[SELECT].hasOwnProperty('distinct_count')) {
|
|
1647
1856
|
query_context.writer = new UniqCountWriter(query_context.writer);
|
|
1648
1857
|
} else if (rb_actions[SELECT].hasOwnProperty('distinct')) {
|
|
1649
1858
|
query_context.writer = new UniqWriter(query_context.writer);
|
|
1650
1859
|
}
|
|
1651
|
-
if (rb_actions.hasOwnProperty(EXCEPT)) {
|
|
1652
|
-
query_context.select_expression = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals);
|
|
1653
|
-
} else {
|
|
1654
|
-
let select_expression = translate_select_expression(rb_actions[SELECT]['text']);
|
|
1655
|
-
query_context.select_expression = combine_string_literals(select_expression, string_literals);
|
|
1656
|
-
}
|
|
1657
1860
|
}
|
|
1658
1861
|
|
|
1659
1862
|
if (rb_actions.hasOwnProperty(ORDER_BY)) {
|
|
@@ -1676,13 +1879,21 @@ async function query(query_text, input_iterator, output_writer, output_warnings,
|
|
|
1676
1879
|
}
|
|
1677
1880
|
|
|
1678
1881
|
|
|
1679
|
-
async function query_table(query_text, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, normalize_column_names=true, user_init_code='') {
|
|
1882
|
+
async function query_table(query_text, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, output_column_names=null, normalize_column_names=true, user_init_code='') {
|
|
1680
1883
|
if (!normalize_column_names && input_column_names !== null && join_column_names !== null)
|
|
1681
1884
|
ensure_no_ambiguous_variables(query_text, input_column_names, join_column_names);
|
|
1682
1885
|
let input_iterator = new TableIterator(input_table, input_column_names, normalize_column_names);
|
|
1683
1886
|
let output_writer = new TableWriter(output_table);
|
|
1684
1887
|
let join_tables_registry = join_table === null ? null : new SingleTableRegistry(join_table, join_column_names, normalize_column_names);
|
|
1685
1888
|
await query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
|
|
1889
|
+
if (output_column_names !== null) {
|
|
1890
|
+
assert(output_column_names.length == 0, '`output_column_names` param must be an empty list or null');
|
|
1891
|
+
if (output_writer.header !== null) {
|
|
1892
|
+
for (let column_name of output_writer.header) {
|
|
1893
|
+
output_column_names.push(column_name);
|
|
1894
|
+
}
|
|
1895
|
+
}
|
|
1896
|
+
}
|
|
1686
1897
|
}
|
|
1687
1898
|
|
|
1688
1899
|
|
|
@@ -1716,6 +1927,7 @@ exports.exception_to_error_info = exception_to_error_info;
|
|
|
1716
1927
|
|
|
1717
1928
|
|
|
1718
1929
|
// The functions below are exported just for unit tests, they are not part of the rbql API
|
|
1930
|
+
// TODO exports through the special unit_test proxy e.g. exports.unit_test.parse_basic_variables = parse_basic_variables;
|
|
1719
1931
|
exports.parse_basic_variables = parse_basic_variables;
|
|
1720
1932
|
exports.parse_array_variables = parse_array_variables;
|
|
1721
1933
|
exports.parse_dictionary_variables = parse_dictionary_variables;
|
|
@@ -1725,11 +1937,15 @@ exports.strip_comments = strip_comments;
|
|
|
1725
1937
|
exports.separate_actions = separate_actions;
|
|
1726
1938
|
exports.separate_string_literals = separate_string_literals;
|
|
1727
1939
|
exports.combine_string_literals = combine_string_literals;
|
|
1728
|
-
exports.translate_except_expression = translate_except_expression;
|
|
1729
1940
|
exports.parse_join_expression = parse_join_expression;
|
|
1730
1941
|
exports.resolve_join_variables = resolve_join_variables;
|
|
1731
1942
|
exports.translate_update_expression = translate_update_expression;
|
|
1732
1943
|
exports.translate_select_expression = translate_select_expression;
|
|
1944
|
+
exports.translate_except_expression = translate_except_expression;
|
|
1733
1945
|
exports.like_to_regex = like_to_regex;
|
|
1946
|
+
exports.adhoc_parse_select_expression_to_column_infos = adhoc_parse_select_expression_to_column_infos;
|
|
1947
|
+
exports.replace_star_count = replace_star_count;
|
|
1948
|
+
exports.replace_star_vars_for_header_parsing = replace_star_vars_for_header_parsing;
|
|
1949
|
+
exports.select_output_header = select_output_header;
|
|
1734
1950
|
|
|
1735
1951
|
}(typeof exports === 'undefined' ? this.rbql = {} : exports));
|
package/rbql_csv.js
CHANGED
|
@@ -117,11 +117,18 @@ function get_index_record(index_path, key) {
|
|
|
117
117
|
}
|
|
118
118
|
|
|
119
119
|
|
|
120
|
-
function find_table_path(table_id) {
|
|
120
|
+
function find_table_path(main_table_dir, table_id) {
|
|
121
|
+
// If table_id is a relative path it could be relative either to the current directory or to the main table dir.
|
|
121
122
|
var candidate_path = expanduser(table_id);
|
|
122
123
|
if (fs.existsSync(candidate_path)) {
|
|
123
124
|
return candidate_path;
|
|
124
125
|
}
|
|
126
|
+
if (main_table_dir && !path.isAbsolute(candidate_path)) {
|
|
127
|
+
candidate_path = path.join(main_table_dir, candidate_path);
|
|
128
|
+
if (fs.existsSync(candidate_path)) {
|
|
129
|
+
return candidate_path;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
125
132
|
let table_names_settings_path = path.join(os.homedir(), '.rbql_table_names');
|
|
126
133
|
var name_record = get_index_record(table_names_settings_path, table_id);
|
|
127
134
|
if (name_record && name_record.length > 1 && fs.existsSync(name_record[1])) {
|
|
@@ -152,10 +159,6 @@ class RecordQueue {
|
|
|
152
159
|
}
|
|
153
160
|
return this.pull_stack.pop();
|
|
154
161
|
}
|
|
155
|
-
|
|
156
|
-
return_to_pull_stack(record) {
|
|
157
|
-
this.pull_stack.push(record);
|
|
158
|
-
}
|
|
159
162
|
}
|
|
160
163
|
|
|
161
164
|
|
|
@@ -163,7 +166,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
163
166
|
// CSVRecordIterator implements a typical async producer-consumer model with an internal buffer:
|
|
164
167
|
// get_record() - consumer
|
|
165
168
|
// stream.on('data') - producer
|
|
166
|
-
constructor(stream, csv_path, encoding, delim, policy,
|
|
169
|
+
constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
|
|
167
170
|
super();
|
|
168
171
|
this.stream = stream;
|
|
169
172
|
this.csv_path = csv_path;
|
|
@@ -171,7 +174,12 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
171
174
|
this.encoding = encoding;
|
|
172
175
|
this.delim = delim;
|
|
173
176
|
this.policy = policy;
|
|
174
|
-
|
|
177
|
+
|
|
178
|
+
this.has_header = has_header;
|
|
179
|
+
this.first_record = null;
|
|
180
|
+
this.first_record_should_be_emitted = !has_header;
|
|
181
|
+
this.header_preread_complete = false;
|
|
182
|
+
|
|
175
183
|
this.table_name = table_name;
|
|
176
184
|
this.variable_prefix = variable_prefix;
|
|
177
185
|
this.comment_prefix = (comment_prefix !== null && comment_prefix.length) ? comment_prefix : null;
|
|
@@ -203,9 +211,13 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
203
211
|
this.rfc_line_buffer = [];
|
|
204
212
|
|
|
205
213
|
this.partially_decoded_line = '';
|
|
214
|
+
this.partially_decoded_line_ends_with_cr = false;
|
|
206
215
|
|
|
216
|
+
// Holds an external "resolve" function which is called when everything is fine.
|
|
207
217
|
this.resolve_current_record = null;
|
|
218
|
+
// Holds an external "reject" function which is called when error has occured.
|
|
208
219
|
this.reject_current_record = null;
|
|
220
|
+
// Holds last exception if we don't have any reject callbacks from clients yet.
|
|
209
221
|
this.current_exception = null;
|
|
210
222
|
|
|
211
223
|
this.produced_records_queue = new RecordQueue();
|
|
@@ -213,27 +225,56 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
213
225
|
this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.process_record_line;
|
|
214
226
|
}
|
|
215
227
|
|
|
216
|
-
|
|
217
|
-
|
|
228
|
+
|
|
229
|
+
handle_query_modifier(modifier) {
|
|
230
|
+
// For `... WITH (header) ...` syntax
|
|
231
|
+
if (['header', 'headers'].indexOf(modifier) != -1) {
|
|
232
|
+
this.has_header = true;
|
|
233
|
+
this.first_record_should_be_emitted = false;
|
|
234
|
+
}
|
|
235
|
+
if (['noheader', 'noheaders'].indexOf(modifier) != -1) {
|
|
236
|
+
this.has_header = false;
|
|
237
|
+
this.first_record_should_be_emitted = true;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
reset_external_callbacks() {
|
|
243
|
+
// Drop external callbacks simultaneously since promises can only resolve once, see: https://stackoverflow.com/a/18218542/2898283
|
|
244
|
+
this.reject_current_record = null;
|
|
245
|
+
this.resolve_current_record = null;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
try_propagate_exception() {
|
|
249
|
+
if (this.current_exception && this.reject_current_record) {
|
|
218
250
|
let reject = this.reject_current_record;
|
|
219
|
-
|
|
220
|
-
this.
|
|
251
|
+
let exception = this.current_exception;
|
|
252
|
+
this.reset_external_callbacks();
|
|
253
|
+
this.current_exception = null;
|
|
221
254
|
reject(exception);
|
|
222
|
-
} else {
|
|
223
|
-
this.current_exception = exception;
|
|
224
255
|
}
|
|
256
|
+
}
|
|
257
|
+
|
|
225
258
|
|
|
259
|
+
store_or_propagate_exception(exception) {
|
|
260
|
+
if (this.current_exception === null)
|
|
261
|
+
// Ignore subsequent exceptions if we already have an unreported error. This way we prioritize earlier errors over the more recent ones.
|
|
262
|
+
this.current_exception = exception;
|
|
263
|
+
this.try_propagate_exception();
|
|
226
264
|
}
|
|
227
265
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
if (
|
|
231
|
-
return
|
|
232
|
-
|
|
233
|
-
|
|
266
|
+
|
|
267
|
+
async preread_first_record() {
|
|
268
|
+
if (this.header_preread_complete)
|
|
269
|
+
return;
|
|
270
|
+
this.first_record = await this.get_record();
|
|
271
|
+
this.header_preread_complete = true; // We must set header_preread_complete to true after calling get_record(), because get_record() uses it internally.
|
|
272
|
+
if (this.first_record === null) {
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
234
275
|
if (this.stream)
|
|
235
276
|
this.stream.pause();
|
|
236
|
-
|
|
277
|
+
this.first_record = this.first_record.slice();
|
|
237
278
|
};
|
|
238
279
|
|
|
239
280
|
|
|
@@ -242,24 +283,37 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
242
283
|
rbql.parse_basic_variables(query_text, this.variable_prefix, variable_map);
|
|
243
284
|
rbql.parse_array_variables(query_text, this.variable_prefix, variable_map);
|
|
244
285
|
|
|
245
|
-
|
|
246
|
-
if (
|
|
247
|
-
rbql.parse_attribute_variables(query_text, this.variable_prefix,
|
|
248
|
-
rbql.parse_dictionary_variables(query_text, this.variable_prefix,
|
|
286
|
+
await this.preread_first_record();
|
|
287
|
+
if (this.has_header && this.first_record) {
|
|
288
|
+
rbql.parse_attribute_variables(query_text, this.variable_prefix, this.first_record, 'CSV header line', variable_map);
|
|
289
|
+
rbql.parse_dictionary_variables(query_text, this.variable_prefix, this.first_record, variable_map);
|
|
249
290
|
}
|
|
250
291
|
return variable_map;
|
|
251
292
|
};
|
|
252
293
|
|
|
294
|
+
async get_header() {
|
|
295
|
+
await this.preread_first_record();
|
|
296
|
+
return this.has_header ? this.first_record : null;
|
|
297
|
+
}
|
|
298
|
+
|
|
253
299
|
|
|
254
300
|
try_resolve_next_record() {
|
|
301
|
+
this.try_propagate_exception();
|
|
255
302
|
if (this.resolve_current_record === null)
|
|
256
303
|
return;
|
|
257
|
-
|
|
304
|
+
|
|
305
|
+
let record = null;
|
|
306
|
+
if (this.first_record_should_be_emitted && this.header_preread_complete) {
|
|
307
|
+
this.first_record_should_be_emitted = false;
|
|
308
|
+
record = this.first_record;
|
|
309
|
+
} else {
|
|
310
|
+
record = this.produced_records_queue.dequeue();
|
|
311
|
+
}
|
|
312
|
+
|
|
258
313
|
if (record === null && !this.input_exhausted)
|
|
259
314
|
return;
|
|
260
315
|
let resolve = this.resolve_current_record;
|
|
261
|
-
this.
|
|
262
|
-
this.reject_current_record = null;
|
|
316
|
+
this.reset_external_callbacks();
|
|
263
317
|
resolve(record);
|
|
264
318
|
};
|
|
265
319
|
|
|
@@ -275,9 +329,6 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
275
329
|
parent_iterator.resolve_current_record = resolve;
|
|
276
330
|
parent_iterator.reject_current_record = reject;
|
|
277
331
|
});
|
|
278
|
-
if (this.current_exception) {
|
|
279
|
-
this.reject_current_record(this.current_exception);
|
|
280
|
-
}
|
|
281
332
|
this.try_resolve_next_record();
|
|
282
333
|
return current_record_promise;
|
|
283
334
|
};
|
|
@@ -308,7 +359,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
308
359
|
if (this.first_defective_line === null) {
|
|
309
360
|
this.first_defective_line = this.NL;
|
|
310
361
|
if (this.policy == 'quoted_rfc')
|
|
311
|
-
this.
|
|
362
|
+
this.store_or_propagate_exception(new RbqlIOHandlingError(`Inconsistent double quote escaping in ${this.table_name} table at record ${this.NR}, line ${this.NL}`));
|
|
312
363
|
}
|
|
313
364
|
}
|
|
314
365
|
let num_fields = record.length;
|
|
@@ -359,19 +410,23 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
359
410
|
decoded_string = this.decoder.decode(data_chunk);
|
|
360
411
|
} catch (e) {
|
|
361
412
|
if (e instanceof TypeError) {
|
|
362
|
-
this.
|
|
413
|
+
this.store_or_propagate_exception(new RbqlIOHandlingError(utf_decoding_error));
|
|
363
414
|
} else {
|
|
364
|
-
this.
|
|
415
|
+
this.store_or_propagate_exception(e);
|
|
365
416
|
}
|
|
366
417
|
return;
|
|
367
418
|
}
|
|
368
419
|
} else {
|
|
369
420
|
decoded_string = data_chunk.toString(this.encoding);
|
|
370
421
|
}
|
|
422
|
+
let line_starts_with_lf = decoded_string.length && decoded_string[0] == '\n';
|
|
423
|
+
let first_line_index = line_starts_with_lf && this.partially_decoded_line_ends_with_cr ? 1 : 0;
|
|
424
|
+
this.partially_decoded_line_ends_with_cr = decoded_string.length && decoded_string[decoded_string.length - 1] == '\r';
|
|
371
425
|
let lines = csv_utils.split_lines(decoded_string);
|
|
372
426
|
lines[0] = this.partially_decoded_line + lines[0];
|
|
427
|
+
assert(first_line_index == 0 || lines[0].length == 0);
|
|
373
428
|
this.partially_decoded_line = lines.pop();
|
|
374
|
-
for (let i =
|
|
429
|
+
for (let i = first_line_index; i < lines.length; i++) {
|
|
375
430
|
this.process_line(lines[i]);
|
|
376
431
|
}
|
|
377
432
|
};
|
|
@@ -384,7 +439,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
384
439
|
// TODO get rid of this once TextDecoder is really fixed or when alternative method of reliable decoding appears
|
|
385
440
|
let control_buffer = Buffer.from(decoded_string, 'utf-8');
|
|
386
441
|
if (Buffer.compare(data_chunk, control_buffer) != 0) {
|
|
387
|
-
this.
|
|
442
|
+
this.store_or_propagate_exception(new RbqlIOHandlingError(utf_decoding_error));
|
|
388
443
|
return;
|
|
389
444
|
}
|
|
390
445
|
}
|
|
@@ -465,6 +520,7 @@ class CSVWriter extends rbql.RBQLOutputWriter {
|
|
|
465
520
|
this.encoding = encoding;
|
|
466
521
|
if (encoding)
|
|
467
522
|
this.stream.setDefaultEncoding(encoding);
|
|
523
|
+
this.stream.on('error', (error_obj) => { this.store_first_error(error_obj); })
|
|
468
524
|
this.delim = delim;
|
|
469
525
|
this.policy = policy;
|
|
470
526
|
this.line_separator = line_separator;
|
|
@@ -474,6 +530,8 @@ class CSVWriter extends rbql.RBQLOutputWriter {
|
|
|
474
530
|
|
|
475
531
|
this.null_in_output = false;
|
|
476
532
|
this.delim_in_simple_output = false;
|
|
533
|
+
this.header_len = null;
|
|
534
|
+
this.first_error = null;
|
|
477
535
|
|
|
478
536
|
if (policy == 'simple') {
|
|
479
537
|
this.polymorphic_join = this.simple_join;
|
|
@@ -491,6 +549,20 @@ class CSVWriter extends rbql.RBQLOutputWriter {
|
|
|
491
549
|
}
|
|
492
550
|
|
|
493
551
|
|
|
552
|
+
store_first_error(error_obj) {
|
|
553
|
+
// Store only first error because it is typically more important than the subsequent ones.
|
|
554
|
+
if (this.first_error === null)
|
|
555
|
+
this.first_error = error_obj;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
set_header(header) {
|
|
559
|
+
if (header !== null) {
|
|
560
|
+
this.header_len = header.length;
|
|
561
|
+
this.write(header);
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
|
|
494
566
|
quoted_join(fields) {
|
|
495
567
|
let delim = this.delim;
|
|
496
568
|
var quoted_fields = fields.map(function(v) { return csv_utils.quote_field(String(v), delim); });
|
|
@@ -535,11 +607,20 @@ class CSVWriter extends rbql.RBQLOutputWriter {
|
|
|
535
607
|
};
|
|
536
608
|
|
|
537
609
|
|
|
538
|
-
write(fields) {
|
|
610
|
+
async write(fields) {
|
|
611
|
+
if (this.header_len !== null && fields.length != this.header_len)
|
|
612
|
+
throw new RbqlIOHandlingError(`Inconsistent number of columns in output header and the current record: ${this.header_len} != ${fields.length}`);
|
|
539
613
|
this.normalize_fields(fields);
|
|
540
614
|
this.stream.write(this.polymorphic_join(fields));
|
|
541
615
|
this.stream.write(this.line_separator);
|
|
542
|
-
|
|
616
|
+
let writer_error = this.first_error;
|
|
617
|
+
return new Promise(function(resolve, reject) {
|
|
618
|
+
if (writer_error !== null) {
|
|
619
|
+
reject(writer_error);
|
|
620
|
+
} else {
|
|
621
|
+
resolve(true);
|
|
622
|
+
}
|
|
623
|
+
});
|
|
543
624
|
};
|
|
544
625
|
|
|
545
626
|
|
|
@@ -554,7 +635,11 @@ class CSVWriter extends rbql.RBQLOutputWriter {
|
|
|
554
635
|
let close_stream_on_finish = this.close_stream_on_finish;
|
|
555
636
|
let output_stream = this.stream;
|
|
556
637
|
let output_encoding = this.encoding;
|
|
638
|
+
let writer_error = this.first_error;
|
|
557
639
|
let finish_promise = new Promise(function(resolve, reject) {
|
|
640
|
+
if (writer_error !== null) {
|
|
641
|
+
reject(writer_error);
|
|
642
|
+
}
|
|
558
643
|
if (close_stream_on_finish) {
|
|
559
644
|
output_stream.end('', output_encoding, () => { resolve(); });
|
|
560
645
|
} else {
|
|
@@ -578,12 +663,13 @@ class CSVWriter extends rbql.RBQLOutputWriter {
|
|
|
578
663
|
|
|
579
664
|
|
|
580
665
|
class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
|
|
581
|
-
constructor(delim, policy, encoding,
|
|
666
|
+
constructor(input_file_dir, delim, policy, encoding, has_header=false, comment_prefix=null, options=null) {
|
|
582
667
|
super();
|
|
668
|
+
this.input_file_dir = input_file_dir;
|
|
583
669
|
this.delim = delim;
|
|
584
670
|
this.policy = policy;
|
|
585
671
|
this.encoding = encoding;
|
|
586
|
-
this.
|
|
672
|
+
this.has_header = has_header;
|
|
587
673
|
this.comment_prefix = comment_prefix;
|
|
588
674
|
this.stream = null;
|
|
589
675
|
this.record_iterator = null;
|
|
@@ -594,7 +680,7 @@ class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
|
|
|
594
680
|
}
|
|
595
681
|
|
|
596
682
|
get_iterator_by_table_id(table_id) {
|
|
597
|
-
this.table_path = find_table_path(table_id);
|
|
683
|
+
this.table_path = find_table_path(this.input_file_dir, table_id);
|
|
598
684
|
if (this.table_path === null) {
|
|
599
685
|
throw new RbqlIOHandlingError(`Unable to find join table "${table_id}"`);
|
|
600
686
|
}
|
|
@@ -603,19 +689,19 @@ class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
|
|
|
603
689
|
} else {
|
|
604
690
|
this.stream = fs.createReadStream(this.table_path);
|
|
605
691
|
}
|
|
606
|
-
this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.
|
|
692
|
+
this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
|
|
607
693
|
return this.record_iterator;
|
|
608
694
|
};
|
|
609
695
|
|
|
610
696
|
get_warnings(output_warnings) {
|
|
611
|
-
if (this.record_iterator && this.
|
|
612
|
-
output_warnings.push(`The first
|
|
697
|
+
if (this.record_iterator && this.has_header) {
|
|
698
|
+
output_warnings.push(`The first record in JOIN file ${path.basename(this.table_path)} was also treated as header (and skipped)`);
|
|
613
699
|
}
|
|
614
700
|
}
|
|
615
701
|
}
|
|
616
702
|
|
|
617
703
|
|
|
618
|
-
async function query_csv(query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings,
|
|
704
|
+
async function query_csv(query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers=false, comment_prefix=null, user_init_code='', options=null) {
|
|
619
705
|
let input_stream = null;
|
|
620
706
|
let bulk_input_path = null;
|
|
621
707
|
if (options && options['bulk_read'] && input_path) {
|
|
@@ -624,6 +710,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
|
|
|
624
710
|
input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
|
|
625
711
|
}
|
|
626
712
|
let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
|
|
713
|
+
// FIXME add on(error) handler to avoid async errors, see https://github.com/nodejs/node-v0.x-archive/issues/406
|
|
627
714
|
if (input_delim == '"' && input_policy == 'quoted')
|
|
628
715
|
throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
|
|
629
716
|
if (csv_encoding == 'latin-1')
|
|
@@ -637,9 +724,9 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
|
|
|
637
724
|
if (user_init_code == '' && fs.existsSync(default_init_source_path)) {
|
|
638
725
|
user_init_code = read_user_init_code(default_init_source_path);
|
|
639
726
|
}
|
|
640
|
-
|
|
641
|
-
let join_tables_registry = new FileSystemCSVRegistry(input_delim, input_policy, csv_encoding,
|
|
642
|
-
let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy,
|
|
727
|
+
let input_file_dir = input_path ? path.dirname(input_path) : null;
|
|
728
|
+
let join_tables_registry = new FileSystemCSVRegistry(input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
|
|
729
|
+
let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
|
|
643
730
|
let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
|
|
644
731
|
|
|
645
732
|
await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
|