rbql 0.26.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DEV_README.md +4 -0
- package/README.md +1 -1
- package/cli_rbql.js +9 -7
- package/package.json +1 -1
- package/rbql.js +51 -18
- package/rbql_csv.js +9 -3
package/DEV_README.md
ADDED
package/README.md
CHANGED
|
@@ -302,7 +302,7 @@ _UPDATE_ query produces a new table where original values are replaced according
|
|
|
302
302
|
### Aggregate functions and queries
|
|
303
303
|
|
|
304
304
|
RBQL supports the following aggregate functions, which can also be used with _GROUP BY_ keyword:
|
|
305
|
-
_COUNT_, _ARRAY_AGG_, _MIN_, _MAX_, _SUM_, _AVG_, _VARIANCE_, _MEDIAN_
|
|
305
|
+
_COUNT_, _ARRAY_AGG_, _MIN_, _MAX_, _ANY_VALUE_, _SUM_, _AVG_, _VARIANCE_, _MEDIAN_
|
|
306
306
|
|
|
307
307
|
Limitation: aggregate functions inside JavaScript expressions are not supported. Although you can use expressions inside aggregate functions.
|
|
308
308
|
E.g. `MAX(float(a1) / 1000)` - valid; `MAX(a1) / 1000` - invalid.
|
package/cli_rbql.js
CHANGED
|
@@ -133,9 +133,9 @@ async function sample_lines(table_path) {
|
|
|
133
133
|
}
|
|
134
134
|
|
|
135
135
|
|
|
136
|
-
async function sample_records(table_path, encoding, delim, policy) {
|
|
136
|
+
async function sample_records(table_path, encoding, delim, policy, comment_prefix, trim_whitespaces) {
|
|
137
137
|
let table_stream = fs.createReadStream(table_path);
|
|
138
|
-
let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy);
|
|
138
|
+
let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy, /*has_header=*/false, comment_prefix, 'input', 'a', trim_whitespaces);
|
|
139
139
|
let sampled_records = await sampling_iterator.get_all_records(10);
|
|
140
140
|
let warnings = sampling_iterator.get_warnings();
|
|
141
141
|
return [sampled_records, warnings];
|
|
@@ -183,7 +183,7 @@ async function handle_query_success(warnings, output_path, encoding, delim, poli
|
|
|
183
183
|
}
|
|
184
184
|
}
|
|
185
185
|
if (interactive_mode) {
|
|
186
|
-
let [records, _warnings] = await sample_records(output_path, encoding, delim, policy);
|
|
186
|
+
let [records, _warnings] = await sample_records(output_path, encoding, delim, policy, /*comment_prefix=*/null, /*trim_whitespaces=*/false);
|
|
187
187
|
console.log('\nOutput table preview:');
|
|
188
188
|
console.log('====================================');
|
|
189
189
|
print_colorized(records, delim, false, false);
|
|
@@ -210,6 +210,7 @@ async function run_with_js(args) {
|
|
|
210
210
|
var csv_encoding = args['encoding'];
|
|
211
211
|
var with_headers = args['with-headers'];
|
|
212
212
|
var comment_prefix = args['comment-prefix'];
|
|
213
|
+
var trim_whitespaces = args['trim-spaces'];
|
|
213
214
|
var output_delim = get_default(args, 'out-delim', null);
|
|
214
215
|
var output_policy = get_default(args, 'out-policy', null);
|
|
215
216
|
let init_source_file = get_default(args, 'init-source-file', null);
|
|
@@ -230,7 +231,7 @@ async function run_with_js(args) {
|
|
|
230
231
|
// * This is CLI so no way we are in the Electron environment which can't use the TextDecoder
|
|
231
232
|
// * Streaming mode works a little faster (since we don't need to do the manual validation)
|
|
232
233
|
// TODO check if the current node installation doesn't have ICU enabled (which is typicaly provided by Node.js by default, see https://nodejs.org/api/intl.html) and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
|
|
233
|
-
await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code
|
|
234
|
+
await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code, {'trim_whitespaces': trim_whitespaces});
|
|
234
235
|
await handle_query_success(warnings, output_path, csv_encoding, output_delim, output_policy);
|
|
235
236
|
return true;
|
|
236
237
|
} catch (e) {
|
|
@@ -250,8 +251,8 @@ function get_default_output_path(input_path, delim) {
|
|
|
250
251
|
}
|
|
251
252
|
|
|
252
253
|
|
|
253
|
-
async function show_preview(input_path, encoding, delim, policy, with_headers) {
|
|
254
|
-
let [records, warnings] = await sample_records(input_path, encoding, delim, policy);
|
|
254
|
+
async function show_preview(input_path, encoding, delim, policy, with_headers, comment_prefix, trim_whitespaces) {
|
|
255
|
+
let [records, warnings] = await sample_records(input_path, encoding, delim, policy, comment_prefix, trim_whitespaces);
|
|
255
256
|
console.log('Input table preview:');
|
|
256
257
|
console.log('====================================');
|
|
257
258
|
print_colorized(records, delim, true, with_headers);
|
|
@@ -280,7 +281,7 @@ async function run_interactive_loop(args) {
|
|
|
280
281
|
if (!delim)
|
|
281
282
|
throw new GenericError('Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option');
|
|
282
283
|
}
|
|
283
|
-
await show_preview(input_path, args['encoding'], delim, policy, args['with-headers']);
|
|
284
|
+
await show_preview(input_path, args['encoding'], delim, policy, args['with-headers'], args['comment-prefix'], args['trim-spaces']);
|
|
284
285
|
args.delim = delim;
|
|
285
286
|
args.policy = policy;
|
|
286
287
|
if (!args.output) {
|
|
@@ -368,6 +369,7 @@ function main() {
|
|
|
368
369
|
'--with-headers': {'boolean': true, 'help': 'Indicates that input (and join) table has header'},
|
|
369
370
|
'--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"', 'metavar': 'PREFIX'},
|
|
370
371
|
'--encoding': {'default': 'utf-8', 'help': 'Manually set csv encoding', 'metavar': 'ENCODING'},
|
|
372
|
+
'--trim-spaces': {'boolean': true, 'help': 'Trim leading and trailing spaces from fields'},
|
|
371
373
|
'--out-format': {'default': 'input', 'help': 'Output format. Supported values: ' + out_format_names.map(v => `"${v}"`).join(', '), 'metavar': 'FORMAT'},
|
|
372
374
|
'--out-delim': {'help': 'Output delim. Use with "out-policy". Overrides out-format', 'metavar': 'DELIM'},
|
|
373
375
|
'--out-policy': {'help': 'Output policy. Use with "out-delim". Overrides out-format', 'metavar': 'POLICY'},
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rbql",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.28.0",
|
|
4
4
|
"description": "Rainbow Query Language",
|
|
5
5
|
"keywords": ["CSV", "TSV", "spreadsheet", "SQL", "SQL-like", "transpiler", "CLI", "command-line", "library", "browser", "Node", "select", "update", "join"],
|
|
6
6
|
"scripts": {
|
package/rbql.js
CHANGED
|
@@ -70,7 +70,7 @@ var query_context = null; // Needs to be global for MIN(), MAX(), etc functions.
|
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs';
|
|
73
|
-
const RBQL_VERSION = '0.
|
|
73
|
+
const RBQL_VERSION = '0.27.0';
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
function check_if_brackets_match(opening_bracket, closing_bracket) {
|
|
@@ -130,30 +130,30 @@ function column_info_from_text_span(text_span, string_literals) {
|
|
|
130
130
|
let subscript_str_match = /^([ab])\[___RBQL_STRING_LITERAL([0-9]+)___\]$/.exec(text_span);
|
|
131
131
|
let as_alias_match = /^(.*) (as|AS) +([a-zA-Z][a-zA-Z0-9_]*) *$/.exec(text_span);
|
|
132
132
|
if (as_alias_match !== null) {
|
|
133
|
-
return {table_name: null, column_index: null, column_name:
|
|
133
|
+
return {table_name: null, column_index: null, column_name: null, is_star: false, alias_name: as_alias_match[3]};
|
|
134
134
|
}
|
|
135
135
|
if (simple_var_match !== null) {
|
|
136
136
|
if (text_span == rbql_star_marker)
|
|
137
|
-
return {table_name: null, column_index: null, column_name: null, is_star: true,
|
|
137
|
+
return {table_name: null, column_index: null, column_name: null, is_star: true, alias_name: null};
|
|
138
138
|
if (text_span.startsWith('___RBQL_STRING_LITERAL'))
|
|
139
139
|
return null;
|
|
140
140
|
let match = /^([ab])([0-9]+)$/.exec(text_span);
|
|
141
141
|
if (match !== null) {
|
|
142
|
-
return {table_name: match[1], column_index: parseInt(match[2]) - 1, column_name: null, is_star: false,
|
|
142
|
+
return {table_name: match[1], column_index: parseInt(match[2]) - 1, column_name: null, is_star: false, alias_name: null};
|
|
143
143
|
}
|
|
144
144
|
// Some examples for this branch: NR, NF
|
|
145
|
-
return {table_name: null, column_index: null, column_name: text_span, is_star: false,
|
|
145
|
+
return {table_name: null, column_index: null, column_name: text_span, is_star: false, alias_name: null};
|
|
146
146
|
} else if (attribute_match !== null) {
|
|
147
147
|
let table_name = attribute_match[1];
|
|
148
148
|
let column_name = attribute_match[2];
|
|
149
149
|
if (column_name == rbql_star_marker) {
|
|
150
|
-
return {table_name: table_name, column_index: null, column_name: null, is_star: true,
|
|
150
|
+
return {table_name: table_name, column_index: null, column_name: null, is_star: true, alias_name: null};
|
|
151
151
|
}
|
|
152
|
-
return {table_name: null, column_index: null, column_name: column_name, is_star: false,
|
|
152
|
+
return {table_name: null, column_index: null, column_name: column_name, is_star: false, alias_name: null};
|
|
153
153
|
} else if (subscript_int_match != null) {
|
|
154
154
|
let table_name = subscript_int_match[1];
|
|
155
155
|
let column_index = parseInt(subscript_int_match[2]) - 1;
|
|
156
|
-
return {table_name: table_name, column_index: column_index, column_name: null, is_star: false,
|
|
156
|
+
return {table_name: table_name, column_index: column_index, column_name: null, is_star: false, alias_name: null};
|
|
157
157
|
} else if (subscript_str_match != null) {
|
|
158
158
|
let table_name = subscript_str_match[1];
|
|
159
159
|
let replaced_string_literal_id = subscript_str_match[2];
|
|
@@ -161,7 +161,7 @@ function column_info_from_text_span(text_span, string_literals) {
|
|
|
161
161
|
let quoted_column_name = string_literals[replaced_string_literal_id];
|
|
162
162
|
let unquoted_column_name = unquote_string(quoted_column_name);
|
|
163
163
|
if (unquoted_column_name !== null && unquoted_column_name !== undefined) {
|
|
164
|
-
return {table_name: null, column_index: null, column_name: unquoted_column_name, is_star: false,
|
|
164
|
+
return {table_name: null, column_index: null, column_name: unquoted_column_name, is_star: false, alias_name: null};
|
|
165
165
|
}
|
|
166
166
|
}
|
|
167
167
|
}
|
|
@@ -289,6 +289,24 @@ function parse_number(val) {
|
|
|
289
289
|
}
|
|
290
290
|
|
|
291
291
|
|
|
292
|
+
class AnyValueAggregator {
|
|
293
|
+
constructor() {
|
|
294
|
+
this.stats = new Map();
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
increment(key, val) {
|
|
298
|
+
var cur_aggr = this.stats.get(key);
|
|
299
|
+
if (cur_aggr === undefined) {
|
|
300
|
+
this.stats.set(key, val);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
get_final(key) {
|
|
305
|
+
return this.stats.get(key);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
|
|
292
310
|
class MinAggregator {
|
|
293
311
|
constructor() {
|
|
294
312
|
this.stats = new Map();
|
|
@@ -310,7 +328,6 @@ class MinAggregator {
|
|
|
310
328
|
}
|
|
311
329
|
|
|
312
330
|
|
|
313
|
-
|
|
314
331
|
class MaxAggregator {
|
|
315
332
|
constructor() {
|
|
316
333
|
this.stats = new Map();
|
|
@@ -514,6 +531,11 @@ function init_aggregator(generator_name, val, post_proc=null) {
|
|
|
514
531
|
return res;
|
|
515
532
|
}
|
|
516
533
|
|
|
534
|
+
function ANY_VALUE(val) {
|
|
535
|
+
return query_context.aggregation_stage < 2 ? init_aggregator(AnyValueAggregator, val) : val;
|
|
536
|
+
}
|
|
537
|
+
const any_value = ANY_VALUE;
|
|
538
|
+
const Any_value = ANY_VALUE;
|
|
517
539
|
|
|
518
540
|
function MIN(val) {
|
|
519
541
|
return query_context.aggregation_stage < 2 ? init_aggregator(MinAggregator, val) : val;
|
|
@@ -521,7 +543,6 @@ function MIN(val) {
|
|
|
521
543
|
const min = MIN;
|
|
522
544
|
const Min = MIN;
|
|
523
545
|
|
|
524
|
-
|
|
525
546
|
function MAX(val) {
|
|
526
547
|
return query_context.aggregation_stage < 2 ? init_aggregator(MaxAggregator, val) : val;
|
|
527
548
|
}
|
|
@@ -1259,8 +1280,8 @@ function generate_init_statements(query_text, variables_map, join_variables_map,
|
|
|
1259
1280
|
|
|
1260
1281
|
|
|
1261
1282
|
function replace_star_count(aggregate_expression) {
|
|
1262
|
-
var rgx = /(
|
|
1263
|
-
var result = aggregate_expression.replace(rgx, '
|
|
1283
|
+
var rgx = /(?:(?<=^)|(?<=,)) *COUNT\( *\* *\)/ig;
|
|
1284
|
+
var result = aggregate_expression.replace(rgx, ' COUNT(1)');
|
|
1264
1285
|
return str_strip(result);
|
|
1265
1286
|
}
|
|
1266
1287
|
|
|
@@ -1580,13 +1601,22 @@ function select_output_header(input_header, join_header, query_column_infos) {
|
|
|
1580
1601
|
if (input_header === null) {
|
|
1581
1602
|
assert(join_header === null);
|
|
1582
1603
|
}
|
|
1604
|
+
let query_has_star = false;
|
|
1605
|
+
let query_has_column_alias = false;
|
|
1606
|
+
for (let qci of query_column_infos) {
|
|
1607
|
+
query_has_star = query_has_star || (qci !== null && qci.is_star);
|
|
1608
|
+
query_has_column_alias = query_has_column_alias || (qci !== null && qci.alias_name !== null);
|
|
1609
|
+
}
|
|
1583
1610
|
if (input_header === null) {
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
throw new RbqlParsingError(`Specifying column alias "AS ${qci.column_name}" is not allowed if input table has no header`);
|
|
1587
|
-
}
|
|
1611
|
+
if (query_has_star && query_has_column_alias) {
|
|
1612
|
+
throw new RbqlParsingError('Using both * (star) and AS alias in the same query is not allowed for input tables without header');
|
|
1588
1613
|
}
|
|
1589
|
-
|
|
1614
|
+
if (!query_has_column_alias) {
|
|
1615
|
+
// Input table has no header and query has no aliases therefore the output table will be without header.
|
|
1616
|
+
return null;
|
|
1617
|
+
}
|
|
1618
|
+
input_header = [];
|
|
1619
|
+
join_header = [];
|
|
1590
1620
|
}
|
|
1591
1621
|
if (join_header === null) {
|
|
1592
1622
|
// This means there is no JOIN table.
|
|
@@ -1607,6 +1637,8 @@ function select_output_header(input_header, join_header, query_column_infos) {
|
|
|
1607
1637
|
}
|
|
1608
1638
|
} else if (qci.column_name !== null) {
|
|
1609
1639
|
output_header.push(qci.column_name);
|
|
1640
|
+
} else if (qci.alias_name !== null) {
|
|
1641
|
+
output_header.push(qci.alias_name);
|
|
1610
1642
|
} else if (qci.column_index !== null) {
|
|
1611
1643
|
if (qci.table_name == 'a' && qci.column_index < input_header.length) {
|
|
1612
1644
|
output_header.push(input_header[qci.column_index]);
|
|
@@ -1812,6 +1844,7 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
|
|
|
1812
1844
|
if (rb_actions.hasOwnProperty(ORDER_BY) || rb_actions.hasOwnProperty(UPDATE))
|
|
1813
1845
|
throw new RbqlParsingError('"ORDER BY", "UPDATE" and "DISTINCT" keywords are not allowed in aggregate queries');
|
|
1814
1846
|
query_context.aggregation_key_expression = '[' + combine_string_literals(rb_actions[GROUP_BY]['text'], string_literals) + ']';
|
|
1847
|
+
query_context.aggregation_stage = 1;
|
|
1815
1848
|
}
|
|
1816
1849
|
|
|
1817
1850
|
|
package/rbql_csv.js
CHANGED
|
@@ -156,7 +156,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
156
156
|
// CSVRecordIterator implements a typical async producer-consumer model with an internal buffer:
|
|
157
157
|
// get_record() - consumer
|
|
158
158
|
// stream.on('data') - producer
|
|
159
|
-
constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
|
|
159
|
+
constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a', trim_whitespaces=false) {
|
|
160
160
|
super();
|
|
161
161
|
this.stream = stream;
|
|
162
162
|
this.csv_path = csv_path;
|
|
@@ -173,6 +173,7 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
173
173
|
this.table_name = table_name;
|
|
174
174
|
this.variable_prefix = variable_prefix;
|
|
175
175
|
this.comment_prefix = comment_prefix;
|
|
176
|
+
this.trim_whitespaces = trim_whitespaces;
|
|
176
177
|
|
|
177
178
|
this.decoder = null;
|
|
178
179
|
if (encoding == 'utf-8' && this.csv_path === null) {
|
|
@@ -350,6 +351,9 @@ class CSVRecordIterator extends rbql.RBQLInputIterator {
|
|
|
350
351
|
process_record_line(line) {
|
|
351
352
|
this.NR += 1;
|
|
352
353
|
var [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, false);
|
|
354
|
+
if (this.trim_whitespaces) {
|
|
355
|
+
record = record.map((v) => v.trim());
|
|
356
|
+
}
|
|
353
357
|
if (warning) {
|
|
354
358
|
if (this.first_defective_line === null) {
|
|
355
359
|
this.first_defective_line = this.NL;
|
|
@@ -675,7 +679,8 @@ class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
|
|
|
675
679
|
} else {
|
|
676
680
|
this.stream = fs.createReadStream(this.table_path);
|
|
677
681
|
}
|
|
678
|
-
|
|
682
|
+
let trim_whitespaces = this.options && this.options['trim_whitespaces'] ? true : false;
|
|
683
|
+
this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b', trim_whitespaces);
|
|
679
684
|
return this.record_iterator;
|
|
680
685
|
};
|
|
681
686
|
|
|
@@ -695,6 +700,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
|
|
|
695
700
|
} else {
|
|
696
701
|
input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
|
|
697
702
|
}
|
|
703
|
+
let trim_whitespaces = options && options['trim_whitespaces'] ? true : false;
|
|
698
704
|
let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
|
|
699
705
|
if (input_delim == '"' && input_policy == 'quoted')
|
|
700
706
|
throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
|
|
@@ -711,7 +717,7 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
|
|
|
711
717
|
}
|
|
712
718
|
let input_file_dir = input_path ? path.dirname(input_path) : null;
|
|
713
719
|
let join_tables_registry = new FileSystemCSVRegistry(input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
|
|
714
|
-
let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
|
|
720
|
+
let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix, 'input', 'a', trim_whitespaces);
|
|
715
721
|
let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
|
|
716
722
|
|
|
717
723
|
await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
|