rbql 0.28.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +0 -0
- package/DEV_README.md +6 -2
- package/README.md +11 -1
- package/cli_parser.js +0 -0
- package/cli_rbql.js +11 -8
- package/csv_utils.js +23 -8
- package/index.js +0 -0
- package/package.json +1 -1
- package/rbql.js +49 -2
- package/rbql_csv.js +25 -20
package/.eslintrc.json
CHANGED
|
File without changes
|
package/DEV_README.md
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
# Publishing the package to npm
|
|
2
|
-
1.
|
|
3
|
-
2. Run `
|
|
2
|
+
1. Update version in package.json. Make sure it is synced with unit tests js version - run the unit tests. Python and JS version don't have to be in sync!
|
|
3
|
+
2. Run `git clean -fd` just in case.
|
|
4
|
+
3. Run `npm publish`.
|
|
5
|
+
|
|
6
|
+
Note: you need to be authorize in order to publish so in the new system you might need to run `npm adduser` first,
|
|
7
|
+
if you run `npm publish` without authorizing it would prompt you to do it anyway, so no big deal.
|
|
4
8
|
|
package/README.md
CHANGED
|
@@ -219,11 +219,14 @@ You can also check rbql-js cli app code as a usage example: [rbql-js cli source
|
|
|
219
219
|
|
|
220
220
|
|
|
221
221
|
### Installation:
|
|
222
|
-
To use RBQL as CLI app you
|
|
222
|
+
To use RBQL as CLI app you can install it in global (-g) mode:
|
|
223
223
|
```
|
|
224
224
|
$ npm install -g rbql
|
|
225
225
|
```
|
|
226
226
|
|
|
227
|
+
RBQL can also be installed locally with `$ npm install rbql`, but then you would have to run it with `$ npx rbql-js ...` instead of `$ rbql-js ...`.
|
|
228
|
+
|
|
229
|
+
|
|
227
230
|
### Usage (non-interactive mode):
|
|
228
231
|
|
|
229
232
|
```
|
|
@@ -339,6 +342,13 @@ But it is also possible to override this selection directly in the query by addi
|
|
|
339
342
|
Example: `select top 5 NR, * with (header)`
|
|
340
343
|
|
|
341
344
|
|
|
345
|
+
### Pipe syntax for query chaining
|
|
346
|
+
You can chain consecutive queries via pipe `|` syntax. Example:
|
|
347
|
+
```
|
|
348
|
+
SELECT a2 AS region, count(*) AS cnt GROUP BY a2 | SELECT * ORDER BY a.cnt DESC
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
|
|
342
352
|
### User Defined Functions (UDF)
|
|
343
353
|
|
|
344
354
|
RBQL supports User Defined Functions
|
package/cli_parser.js
CHANGED
|
File without changes
|
package/cli_rbql.js
CHANGED
|
@@ -16,6 +16,7 @@ var interactive_mode = false;
|
|
|
16
16
|
|
|
17
17
|
// TODO implement colored output like in Python version
|
|
18
18
|
// TODO implement query history like in Python version. "readline" modules allows to do that, see "completer" parameter.
|
|
19
|
+
// TODO switch to built-in node util parseArgs module (added in 2022)
|
|
19
20
|
|
|
20
21
|
// FIXME test readline on Win: disable interactive mode?
|
|
21
22
|
|
|
@@ -133,9 +134,9 @@ async function sample_lines(table_path) {
|
|
|
133
134
|
}
|
|
134
135
|
|
|
135
136
|
|
|
136
|
-
async function sample_records(table_path, encoding, delim, policy, comment_prefix, trim_whitespaces) {
|
|
137
|
+
async function sample_records(table_path, encoding, delim, policy, comment_prefix, trim_whitespaces, comment_regex) {
|
|
137
138
|
let table_stream = fs.createReadStream(table_path);
|
|
138
|
-
let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy, /*has_header=*/false, comment_prefix, 'input', 'a', trim_whitespaces);
|
|
139
|
+
let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy, /*has_header=*/false, comment_prefix, 'input', 'a', trim_whitespaces, comment_regex);
|
|
139
140
|
let sampled_records = await sampling_iterator.get_all_records(10);
|
|
140
141
|
let warnings = sampling_iterator.get_warnings();
|
|
141
142
|
return [sampled_records, warnings];
|
|
@@ -183,7 +184,7 @@ async function handle_query_success(warnings, output_path, encoding, delim, poli
|
|
|
183
184
|
}
|
|
184
185
|
}
|
|
185
186
|
if (interactive_mode) {
|
|
186
|
-
let [records, _warnings] = await sample_records(output_path, encoding, delim, policy, /*comment_prefix=*/null, /*trim_whitespaces=*/false);
|
|
187
|
+
let [records, _warnings] = await sample_records(output_path, encoding, delim, policy, /*comment_prefix=*/null, /*trim_whitespaces=*/false, /*comment_regex=*/null);
|
|
187
188
|
console.log('\nOutput table preview:');
|
|
188
189
|
console.log('====================================');
|
|
189
190
|
print_colorized(records, delim, false, false);
|
|
@@ -210,6 +211,7 @@ async function run_with_js(args) {
|
|
|
210
211
|
var csv_encoding = args['encoding'];
|
|
211
212
|
var with_headers = args['with-headers'];
|
|
212
213
|
var comment_prefix = args['comment-prefix'];
|
|
214
|
+
var comment_regex = args['comment-regex'];
|
|
213
215
|
var trim_whitespaces = args['trim-spaces'];
|
|
214
216
|
var output_delim = get_default(args, 'out-delim', null);
|
|
215
217
|
var output_policy = get_default(args, 'out-policy', null);
|
|
@@ -231,7 +233,7 @@ async function run_with_js(args) {
|
|
|
231
233
|
// * This is CLI so no way we are in the Electron environment which can't use the TextDecoder
|
|
232
234
|
// * Streaming mode works a little faster (since we don't need to do the manual validation)
|
|
233
235
|
// TODO check if the current node installation doesn't have ICU enabled (which is typicaly provided by Node.js by default, see https://nodejs.org/api/intl.html) and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
|
|
234
|
-
await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code, {'trim_whitespaces': trim_whitespaces});
|
|
236
|
+
await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code, {'trim_whitespaces': trim_whitespaces, 'comment_regex': comment_regex});
|
|
235
237
|
await handle_query_success(warnings, output_path, csv_encoding, output_delim, output_policy);
|
|
236
238
|
return true;
|
|
237
239
|
} catch (e) {
|
|
@@ -251,8 +253,8 @@ function get_default_output_path(input_path, delim) {
|
|
|
251
253
|
}
|
|
252
254
|
|
|
253
255
|
|
|
254
|
-
async function show_preview(input_path, encoding, delim, policy, with_headers, comment_prefix, trim_whitespaces) {
|
|
255
|
-
let [records, warnings] = await sample_records(input_path, encoding, delim, policy, comment_prefix, trim_whitespaces);
|
|
256
|
+
async function show_preview(input_path, encoding, delim, policy, with_headers, comment_prefix, trim_whitespaces, comment_regex) {
|
|
257
|
+
let [records, warnings] = await sample_records(input_path, encoding, delim, policy, comment_prefix, trim_whitespaces, comment_regex);
|
|
256
258
|
console.log('Input table preview:');
|
|
257
259
|
console.log('====================================');
|
|
258
260
|
print_colorized(records, delim, true, with_headers);
|
|
@@ -281,7 +283,7 @@ async function run_interactive_loop(args) {
|
|
|
281
283
|
if (!delim)
|
|
282
284
|
throw new GenericError('Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option');
|
|
283
285
|
}
|
|
284
|
-
await show_preview(input_path, args['encoding'], delim, policy, args['with-headers'], args['comment-prefix'], args['trim-spaces']);
|
|
286
|
+
await show_preview(input_path, args['encoding'], delim, policy, args['with-headers'], args['comment-prefix'], args['trim-spaces'], args['comment-regex']);
|
|
285
287
|
args.delim = delim;
|
|
286
288
|
args.policy = policy;
|
|
287
289
|
if (!args.output) {
|
|
@@ -367,7 +369,8 @@ function main() {
|
|
|
367
369
|
'--delim': {'help': 'Delimiter character or multicharacter string, e.g. "," or "###". Can be autodetected in interactive mode', 'metavar': 'DELIM'},
|
|
368
370
|
'--policy': {'help': 'Split policy, see the explanation below. Supported values: "simple", "quoted", "quoted_rfc", "whitespace", "monocolumn". Can be autodetected in interactive mode', 'metavar': 'POLICY'},
|
|
369
371
|
'--with-headers': {'boolean': true, 'help': 'Indicates that input (and join) table has header'},
|
|
370
|
-
'--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#"
|
|
372
|
+
'--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#"', 'metavar': 'PREFIX'},
|
|
373
|
+
'--comment-regex': {'help': 'Ignore lines in input and join tables that contain the comment REGEX.', 'metavar': 'REGEX'},
|
|
371
374
|
'--encoding': {'default': 'utf-8', 'help': 'Manually set csv encoding', 'metavar': 'ENCODING'},
|
|
372
375
|
'--trim-spaces': {'boolean': true, 'help': 'Trim leading and trailing spaces from fields'},
|
|
373
376
|
'--out-format': {'default': 'input', 'help': 'Output format. Supported values: ' + out_format_names.map(v => `"${v}"`).join(', '), 'metavar': 'FORMAT'},
|
package/csv_utils.js
CHANGED
|
@@ -106,20 +106,30 @@ function split_whitespace_separated_str(src, preserve_whitespaces=false) {
|
|
|
106
106
|
}
|
|
107
107
|
|
|
108
108
|
|
|
109
|
+
function get_polymorphic_split_function(dlm, policy, preserve_quotes_and_whitespaces) {
|
|
110
|
+
// TODO consider moving this function to rbql_csv.js
|
|
111
|
+
if (policy === 'simple') {
|
|
112
|
+
return (src) => [src.split(dlm), false];
|
|
113
|
+
} else if (policy === 'whitespace') {
|
|
114
|
+
return (src) => [split_whitespace_separated_str(src, preserve_quotes_and_whitespaces), false];
|
|
115
|
+
} else if (policy === 'monocolumn') {
|
|
116
|
+
return (src) => [[src], false];
|
|
117
|
+
} else if (policy === 'quoted' || policy === 'quoted_rfc') {
|
|
118
|
+
return (src) => split_quoted_str(src, dlm, preserve_quotes_and_whitespaces);
|
|
119
|
+
} else {
|
|
120
|
+
throw new Error(`Unsupported splitting policy: ${policy}`);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
109
124
|
function smart_split(src, dlm, policy, preserve_quotes_and_whitespaces) {
|
|
110
|
-
|
|
111
|
-
return [src.split(dlm), false];
|
|
112
|
-
if (policy === 'whitespace')
|
|
113
|
-
return [split_whitespace_separated_str(src, preserve_quotes_and_whitespaces), false];
|
|
114
|
-
if (policy === 'monocolumn')
|
|
115
|
-
return [[src], false];
|
|
116
|
-
return split_quoted_str(src, dlm, preserve_quotes_and_whitespaces);
|
|
125
|
+
return get_polymorphic_split_function(dlm, policy, preserve_quotes_and_whitespaces)(src);
|
|
117
126
|
}
|
|
118
127
|
|
|
119
128
|
|
|
120
129
|
class MultilineRecordAggregator {
|
|
121
|
-
constructor(comment_prefix) {
|
|
130
|
+
constructor(comment_prefix, comment_regex) {
|
|
122
131
|
this.comment_prefix = comment_prefix;
|
|
132
|
+
this.comment_regex = comment_regex;
|
|
123
133
|
this.reset();
|
|
124
134
|
}
|
|
125
135
|
add_line(line_text) {
|
|
@@ -130,6 +140,10 @@ class MultilineRecordAggregator {
|
|
|
130
140
|
this.has_comment_line = true;
|
|
131
141
|
return false;
|
|
132
142
|
}
|
|
143
|
+
if (this.comment_regex && this.rfc_line_buffer.length == 0 && line_text.search(this.comment_regex) != -1) {
|
|
144
|
+
this.has_comment_line = true;
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
133
147
|
let match_list = line_text.match(/"/g);
|
|
134
148
|
let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
|
|
135
149
|
this.rfc_line_buffer.push(line_text);
|
|
@@ -156,6 +170,7 @@ class MultilineRecordAggregator {
|
|
|
156
170
|
module.exports.split_quoted_str = split_quoted_str;
|
|
157
171
|
module.exports.split_whitespace_separated_str = split_whitespace_separated_str;
|
|
158
172
|
module.exports.smart_split = smart_split;
|
|
173
|
+
module.exports.get_polymorphic_split_function = get_polymorphic_split_function;
|
|
159
174
|
module.exports.quote_field = quote_field;
|
|
160
175
|
module.exports.rfc_quote_field = rfc_quote_field;
|
|
161
176
|
module.exports.unquote_field = unquote_field;
|
package/index.js
CHANGED
|
File without changes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rbql",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.30.0",
|
|
4
4
|
"description": "Rainbow Query Language",
|
|
5
5
|
"keywords": ["CSV", "TSV", "spreadsheet", "SQL", "SQL-like", "transpiler", "CLI", "command-line", "library", "browser", "Node", "select", "update", "join"],
|
|
6
6
|
"scripts": {
|
package/rbql.js
CHANGED
|
@@ -70,7 +70,7 @@ var query_context = null; // Needs to be global for MIN(), MAX(), etc functions.
|
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs';
|
|
73
|
-
const RBQL_VERSION = '0.
|
|
73
|
+
const RBQL_VERSION = '0.30.0';
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
function check_if_brackets_match(opening_bracket, closing_bracket) {
|
|
@@ -1796,6 +1796,7 @@ class TableWriter extends RBQLOutputWriter {
|
|
|
1796
1796
|
super();
|
|
1797
1797
|
this.table = external_table;
|
|
1798
1798
|
this.header = null;
|
|
1799
|
+
this.finished = false;
|
|
1799
1800
|
}
|
|
1800
1801
|
|
|
1801
1802
|
async write(fields) {
|
|
@@ -1806,6 +1807,33 @@ class TableWriter extends RBQLOutputWriter {
|
|
|
1806
1807
|
set_header(header) {
|
|
1807
1808
|
this.header = header;
|
|
1808
1809
|
}
|
|
1810
|
+
|
|
1811
|
+
async finish() {
|
|
1812
|
+
this.finished = true;
|
|
1813
|
+
}
|
|
1814
|
+
}
|
|
1815
|
+
|
|
1816
|
+
|
|
1817
|
+
class TablePipe {
|
|
1818
|
+
constructor() {
|
|
1819
|
+
this.table = [];
|
|
1820
|
+
this.writer = new TableWriter(this.table);
|
|
1821
|
+
this.iterator = null;
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
get_writer() {
|
|
1825
|
+
return this.writer;
|
|
1826
|
+
}
|
|
1827
|
+
|
|
1828
|
+
get_iterator() {
|
|
1829
|
+
if (!this.writer.finished) {
|
|
1830
|
+
throw new RbqlIOHandlingError("Trying to read from non-thread-safe table pipe while not finishing writing yet");
|
|
1831
|
+
}
|
|
1832
|
+
if (this.iterator === null) {
|
|
1833
|
+
this.iterator = new TableIterator(this.table, this.writer.header);
|
|
1834
|
+
}
|
|
1835
|
+
return this.iterator;
|
|
1836
|
+
}
|
|
1809
1837
|
}
|
|
1810
1838
|
|
|
1811
1839
|
|
|
@@ -1925,7 +1953,12 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
|
|
|
1925
1953
|
}
|
|
1926
1954
|
|
|
1927
1955
|
|
|
1928
|
-
|
|
1956
|
+
function split_query_to_stages(query_text) {
|
|
1957
|
+
return query_text.split(/\|[>]?[ ]*(?=(?:select|update)[ ])/i);
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
|
|
1961
|
+
async function staged_query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code) {
|
|
1929
1962
|
query_context = new RBQLContext(query_text, input_iterator, output_writer, user_init_code);
|
|
1930
1963
|
await shallow_parse_input_query(query_text, input_iterator, join_tables_registry, query_context);
|
|
1931
1964
|
await compile_and_run(query_context);
|
|
@@ -1937,6 +1970,20 @@ async function query(query_text, input_iterator, output_writer, output_warnings,
|
|
|
1937
1970
|
}
|
|
1938
1971
|
|
|
1939
1972
|
|
|
1973
|
+
async function query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry=null, user_init_code='') {
|
|
1974
|
+
let query_stages = split_query_to_stages(query_text);
|
|
1975
|
+
let previous_pipe = null;
|
|
1976
|
+
for (let i = 0; i < query_stages.length; i++) {
|
|
1977
|
+
let query_stage_text = query_stages[i];
|
|
1978
|
+
let output_pipe = i + 1 < query_stages.length ? new TablePipe() : null;
|
|
1979
|
+
let stage_iterator = previous_pipe === null ? input_iterator : previous_pipe.get_iterator();
|
|
1980
|
+
let stage_writer = output_pipe === null ? output_writer : output_pipe.get_writer();
|
|
1981
|
+
await staged_query(query_stage_text, stage_iterator, stage_writer, output_warnings, join_tables_registry, user_init_code);
|
|
1982
|
+
previous_pipe = output_pipe;
|
|
1983
|
+
}
|
|
1984
|
+
}
|
|
1985
|
+
|
|
1986
|
+
|
|
1940
1987
|
async function query_table(query_text, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, output_column_names=null, normalize_column_names=true, user_init_code='') {
|
|
1941
1988
|
if (!normalize_column_names && input_column_names !== null && join_column_names !== null)
|
|
1942
1989
|
ensure_no_ambiguous_variables(query_text, input_column_names, join_column_names);
|
package/rbql_csv.js
CHANGED
|
@@ -14,9 +14,6 @@ class RbqlIOHandlingError extends Error {}
|
|
|
14
14
|
class AssertionError extends Error {}
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
// TODO performance improvement: replace smart_split() with polymorphic_split()
|
|
18
|
-
|
|
19
|
-
|
|
20
17
|
function assert(condition, message=null) {
|
|
21
18
|
if (!condition) {
|
|
22
19
|
if (!message) {
|
|
@@ -156,7 +153,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
156
153
|
// CSVRecordIterator implements a typical async producer-consumer model with an internal buffer:
|
|
157
154
|
// get_record() - consumer
|
|
158
155
|
// stream.on('data') - producer
|
|
159
|
-
constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a', trim_whitespaces=false) {
|
|
156
|
+
constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a', trim_whitespaces=false, comment_regex=null) {
|
|
160
157
|
super();
|
|
161
158
|
this.stream = stream;
|
|
162
159
|
this.csv_path = csv_path;
|
|
@@ -173,6 +170,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
173
170
|
this.table_name = table_name;
|
|
174
171
|
this.variable_prefix = variable_prefix;
|
|
175
172
|
this.comment_prefix = comment_prefix;
|
|
173
|
+
this.comment_regex = comment_regex;
|
|
176
174
|
this.trim_whitespaces = trim_whitespaces;
|
|
177
175
|
|
|
178
176
|
this.decoder = null;
|
|
@@ -199,7 +197,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
199
197
|
this.NR = 0; // Record number
|
|
200
198
|
this.NL = 0; // Line number (NL != NR when the CSV file has comments or multiline fields)
|
|
201
199
|
|
|
202
|
-
this.line_aggregator = new csv_utils.MultilineRecordAggregator(comment_prefix);
|
|
200
|
+
this.line_aggregator = new csv_utils.MultilineRecordAggregator(comment_prefix, comment_regex);
|
|
203
201
|
|
|
204
202
|
this.partially_decoded_line = '';
|
|
205
203
|
this.partially_decoded_line_ends_with_cr = false;
|
|
@@ -214,6 +212,8 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
214
212
|
this.produced_records_queue = new RecordQueue();
|
|
215
213
|
|
|
216
214
|
this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.process_record_line_simple;
|
|
215
|
+
|
|
216
|
+
this.polymorphic_split = csv_utils.get_polymorphic_split_function(this.delim, this.policy, false);
|
|
217
217
|
}
|
|
218
218
|
|
|
219
219
|
|
|
@@ -344,13 +344,15 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
344
344
|
process_record_line_simple(line) {
|
|
345
345
|
if (this.comment_prefix && line.startsWith(this.comment_prefix))
|
|
346
346
|
return; // Just skip the line
|
|
347
|
+
if (this.comment_regex && line.search(this.comment_regex) != -1)
|
|
348
|
+
return; // Just skip the line
|
|
347
349
|
this.process_record_line(line);
|
|
348
350
|
}
|
|
349
351
|
|
|
350
352
|
|
|
351
353
|
process_record_line(line) {
|
|
352
354
|
this.NR += 1;
|
|
353
|
-
var [record, warning] =
|
|
355
|
+
var [record, warning] = this.polymorphic_split(line);
|
|
354
356
|
if (this.trim_whitespaces) {
|
|
355
357
|
record = record.map((v) => v.trim());
|
|
356
358
|
}
|
|
@@ -661,32 +663,34 @@ class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
|
|
|
661
663
|
this.encoding = encoding;
|
|
662
664
|
this.has_header = has_header;
|
|
663
665
|
this.comment_prefix = comment_prefix;
|
|
664
|
-
this.stream = null;
|
|
665
|
-
this.record_iterator = null;
|
|
666
|
-
|
|
667
666
|
this.options = options;
|
|
668
|
-
this.
|
|
669
|
-
this.table_path = null;
|
|
667
|
+
this.active_join_files = [];
|
|
670
668
|
}
|
|
671
669
|
|
|
672
670
|
get_iterator_by_table_id(table_id) {
|
|
673
|
-
|
|
674
|
-
|
|
671
|
+
let stream = null;
|
|
672
|
+
let table_path = find_table_path(this.input_file_dir, table_id);
|
|
673
|
+
if (table_path === null) {
|
|
675
674
|
throw new RbqlIOHandlingError(`Unable to find join table "${table_id}"`);
|
|
676
675
|
}
|
|
676
|
+
let bulk_input_path = null;
|
|
677
677
|
if (this.options && this.options['bulk_read']) {
|
|
678
|
-
|
|
678
|
+
bulk_input_path = table_path;
|
|
679
679
|
} else {
|
|
680
|
-
|
|
680
|
+
stream = fs.createReadStream(table_path);
|
|
681
681
|
}
|
|
682
682
|
let trim_whitespaces = this.options && this.options['trim_whitespaces'] ? true : false;
|
|
683
|
-
|
|
684
|
-
|
|
683
|
+
let comment_regex = this.options && this.options.hasOwnProperty('comment_regex') ? this.options['comment_regex'] : null;
|
|
684
|
+
let record_iterator = new CSVRecordIterator(stream, bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b', trim_whitespaces, comment_regex);
|
|
685
|
+
this.active_join_files.push({'table_path': table_path, 'input_stream': stream, 'record_iterator': record_iterator});
|
|
686
|
+
return record_iterator;
|
|
685
687
|
};
|
|
686
688
|
|
|
687
689
|
get_warnings(output_warnings) {
|
|
688
|
-
if (this.
|
|
689
|
-
|
|
690
|
+
if (this.has_header) {
|
|
691
|
+
for (let active_join_file of this.active_join_files) {
|
|
692
|
+
output_warnings.push(`The first record in JOIN file ${path.basename(active_join_file.table_path)} was also treated as header (and skipped)`);
|
|
693
|
+
}
|
|
690
694
|
}
|
|
691
695
|
}
|
|
692
696
|
}
|
|
@@ -701,6 +705,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
|
|
|
701
705
|
input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
|
|
702
706
|
}
|
|
703
707
|
let trim_whitespaces = options && options['trim_whitespaces'] ? true : false;
|
|
708
|
+
let comment_regex = options && options.hasOwnProperty('comment_regex') ? options['comment_regex'] : null;
|
|
704
709
|
let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
|
|
705
710
|
if (input_delim == '"' && input_policy == 'quoted')
|
|
706
711
|
throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
|
|
@@ -717,7 +722,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
|
|
|
717
722
|
}
|
|
718
723
|
let input_file_dir = input_path ? path.dirname(input_path) : null;
|
|
719
724
|
let join_tables_registry = new FileSystemCSVRegistry(input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
|
|
720
|
-
let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix, 'input', 'a', trim_whitespaces);
|
|
725
|
+
let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix, 'input', 'a', trim_whitespaces, comment_regex);
|
|
721
726
|
let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
|
|
722
727
|
|
|
723
728
|
await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
|