node-red-contrib-prib-functions 0.23.2 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/.github/copilot-instructions.md +36 -0
  2. package/README.md +153 -140
  3. package/columnar/columnar.html +258 -0
  4. package/columnar/columnar.js +1055 -0
  5. package/columnar/icons/columnar.svg +38 -0
  6. package/fileSystem/filesystem.html +299 -0
  7. package/fileSystem/filesystem.js +170 -0
  8. package/gitlab/gitlab.html +191 -0
  9. package/gitlab/gitlab.js +248 -0
  10. package/gitlab/icons/gitlab.svg +17 -0
  11. package/lib/AlphaBeta.js +32 -0
  12. package/lib/GraphDB.js +40 -9
  13. package/lib/MinMax.js +17 -0
  14. package/lib/Tree.js +64 -0
  15. package/lib/objectExtensions.js +28 -5
  16. package/lib/timeDimension.js +36 -0
  17. package/lib/typedInput.js +18 -2
  18. package/logisticRegression/icons/logisticregression.svg +22 -0
  19. package/logisticRegression/logisticRegression.html +136 -0
  20. package/logisticRegression/logisticRegression.js +83 -0
  21. package/package.json +21 -9
  22. package/test/02-graphdb.js +46 -0
  23. package/test/columnar.js +509 -0
  24. package/test/data/.config.nodes.json +114 -70
  25. package/test/data/.config.nodes.json.backup +104 -71
  26. package/test/data/.config.runtime.json +2 -1
  27. package/test/data/.config.runtime.json.backup +2 -1
  28. package/test/data/.config.users.json +3 -2
  29. package/test/data/.config.users.json.backup +3 -2
  30. package/test/data/.flow.json.backup +1545 -369
  31. package/test/data/flow.json +1457 -270
  32. package/test/data/package-lock.json +11 -11
  33. package/test/data/shares/.config.nodes.json +611 -0
  34. package/test/data/shares/.config.nodes.json.backup +589 -0
  35. package/test/data/shares/.config.runtime.json +5 -0
  36. package/test/data/shares/.config.runtime.json.backup +4 -0
  37. package/test/data/shares/.config.users.json +33 -0
  38. package/test/data/shares/.config.users.json.backup +33 -0
  39. package/test/data/shares/.flow.json.backup +230 -0
  40. package/test/data/shares/.flow_cred.json.backup +3 -0
  41. package/test/data/shares/flow.json +267 -0
  42. package/test/data/shares/flow_cred.json +3 -0
  43. package/test/data/shares/package.json +6 -0
  44. package/test/data/shares/settings.js +544 -0
  45. package/test/dataAnalysisExtensions.js +93 -93
  46. package/test/logisticRegression.js +379 -0
  47. package/test/transform.js +11 -11
  48. package/test/transformConfluence.js +4 -2
  49. package/test/transformNumPy.js +3 -1
  50. package/test/transformXLSX.js +4 -2
  51. package/test/transformXML.js +4 -2
  52. package/test-runner.js +400 -0
  53. package/test.parq +0 -0
  54. package/test_select.js +37 -0
  55. package/testing/test.js +8 -7
  56. package/transform/transform.html +23 -2
  57. package/transform/transform.js +239 -283
  58. package/transform/xlsx2.js +74 -0
@@ -0,0 +1,1055 @@
1
+ const logger = new (require("node-red-contrib-logger"))("Columnar Store");
2
+ logger.sendInfo("Copyright 2025 Jaroslav Peter Prib");
3
+
4
+ // Implementation based on Apache Parquet specification concepts
5
+ // Uses columnar storage, compression, and metadata principles from the spec
6
+ const fs = require('fs');
7
+ const path = require('path');
8
+ const zlib = require('zlib');
9
+
10
+ // Simple columnar storage implementation inspired by Apache Parquet specification
11
+ class SimpleColumnarStore {
12
+ static async writeRecords(records, filePath) {
13
+ if (!records || records.length === 0) {
14
+ throw new Error("No records to write");
15
+ }
16
+
17
+ // Extract schema from first record (following Parquet schema concepts)
18
+ const schema = {};
19
+ const firstRecord = records[0];
20
+ for (const [key, value] of Object.entries(firstRecord)) {
21
+ schema[key] = typeof value;
22
+ }
23
+
24
+ // Organize data by columns (columnar storage principle)
25
+ const columns = {};
26
+ for (const key of Object.keys(schema)) {
27
+ columns[key] = records.map(record => record[key] || null);
28
+ }
29
+
30
+ // Create metadata (following Parquet metadata concepts)
31
+ const metadata = {
32
+ version: 1,
33
+ schema: schema,
34
+ numRows: records.length,
35
+ numColumns: Object.keys(schema).length,
36
+ created: new Date().toISOString(),
37
+ compression: 'gzip'
38
+ };
39
+
40
+ // Serialize and compress data (following Parquet compression concepts)
41
+ const data = {
42
+ metadata: metadata,
43
+ columns: columns
44
+ };
45
+
46
+ const jsonData = JSON.stringify(data);
47
+ const compressed = zlib.gzipSync(jsonData);
48
+
49
+ // Write to file with magic bytes (inspired by Parquet file format)
50
+ const magic = Buffer.from('PARQ');
51
+ const metadataSize = Buffer.alloc(4);
52
+ metadataSize.writeUInt32LE(compressed.length, 0);
53
+
54
+ const output = Buffer.concat([magic, metadataSize, compressed]);
55
+ fs.writeFileSync(filePath, output);
56
+
57
+ return {
58
+ filePath: filePath,
59
+ recordsWritten: records.length,
60
+ schema: schema,
61
+ compressedSize: output.length
62
+ };
63
+ }
64
+ // improved writeRecords: uses true columnar binary buffers with individual compression
65
+ // Now supports appending to existing files
66
+ static async writeRecords(records, filePath, append = false) {
67
+ if (!records || records.length === 0) {
68
+ throw new Error("No records to write");
69
+ }
70
+
71
+ // If appending and file exists, read existing data and merge
72
+ let existingRecords = [];
73
+ let existingSchema = {};
74
+ if (append && fs.existsSync(filePath)) {
75
+ try {
76
+ const existingData = await this.readRecords(filePath);
77
+ existingRecords = existingData.records;
78
+ existingSchema = existingData.schema;
79
+ } catch (e) {
80
+ // If we can't read the existing file, treat it as non-existent
81
+ existingRecords = [];
82
+ existingSchema = {};
83
+ }
84
+ }
85
+
86
+ // Combine existing and new records
87
+ const allRecords = [...existingRecords, ...records];
88
+
89
+ // infer schema types from all records (merge schemas)
90
+ const schema = {...existingSchema};
91
+ for (const record of allRecords) {
92
+ for (const [k, v] of Object.entries(record)) {
93
+ if (!(k in schema)) {
94
+ schema[k] = typeof v;
95
+ }
96
+ }
97
+ }
98
+
99
+ // build column buffers (binary) rather than JSON
100
+ const columnBuffers = [];
101
+ const columnMeta = [];
102
+
103
+ for (const [col, type] of Object.entries(schema)) {
104
+ let buf;
105
+ switch (type) {
106
+ case 'number': {
107
+ const arr = new Float64Array(allRecords.length);
108
+ allRecords.forEach((r,i)=> arr[i] = r[col] == null ? NaN : r[col]);
109
+ buf = Buffer.from(arr.buffer);
110
+ break;
111
+ }
112
+ case 'boolean': {
113
+ const arr = Buffer.alloc(allRecords.length);
114
+ allRecords.forEach((r,i)=> arr[i] = r[col] ? 1 : 0);
115
+ buf = arr;
116
+ break;
117
+ }
118
+ case 'string': {
119
+ const parts = [];
120
+ allRecords.forEach(r=>{
121
+ const s = r[col] == null ? '' : String(r[col]);
122
+ const sb = Buffer.from(s,'utf8');
123
+ const len = Buffer.alloc(4);
124
+ len.writeUInt32LE(sb.length,0);
125
+ parts.push(len,sb);
126
+ });
127
+ buf = Buffer.concat(parts);
128
+ break;
129
+ }
130
+ default: {
131
+ const json = allRecords.map(r=>r[col]);
132
+ buf = Buffer.from(JSON.stringify(json),'utf8');
133
+ }
134
+ }
135
+ const comp = zlib.gzipSync(buf);
136
+ columnMeta.push({name: col, type: type, length: comp.length});
137
+ columnBuffers.push(comp);
138
+ }
139
+
140
+ const metadata = {
141
+ version: 1,
142
+ schema: schema,
143
+ numRows: allRecords.length,
144
+ numColumns: columnMeta.length,
145
+ created: new Date().toISOString(),
146
+ compression: 'gzip',
147
+ columns: columnMeta
148
+ };
149
+
150
+ const metaBuf = Buffer.from(JSON.stringify(metadata),'utf8');
151
+ const metaSize = Buffer.alloc(4);
152
+ metaSize.writeUInt32LE(metaBuf.length,0);
153
+
154
+ const magic = Buffer.from('PARQ');
155
+
156
+ const pieces = [magic, metaSize, metaBuf];
157
+ for (const comp of columnBuffers) {
158
+ const sizeBuf = Buffer.alloc(4);
159
+ sizeBuf.writeUInt32LE(comp.length,0);
160
+ pieces.push(sizeBuf, comp);
161
+ }
162
+ const output = Buffer.concat(pieces);
163
+ fs.writeFileSync(filePath, output);
164
+ return {
165
+ filePath,
166
+ recordsWritten: records.length,
167
+ totalRecords: allRecords.length,
168
+ schema,
169
+ fileSize: output.length,
170
+ appended: append && existingRecords.length > 0
171
+ };
172
+ }
173
+
174
+ // Convenience method to append records to existing file
175
+ static async appendRecords(records, filePath) {
176
+ return this.writeRecords(records, filePath, true);
177
+ }
178
+
179
+ static async readRecords(filePath) {
180
+ if (!fs.existsSync(filePath)) {
181
+ throw new Error("File does not exist: " + filePath);
182
+ }
183
+
184
+ const buffer = fs.readFileSync(filePath);
185
+
186
+ // Check magic bytes
187
+ const magic = buffer.slice(0, 4).toString();
188
+ if (magic !== 'PARQ') {
189
+ throw new Error("Invalid file format - not a columnar file");
190
+ }
191
+
192
+ // Read compressed data size
193
+ const dataSize = buffer.readUInt32LE(4);
194
+ const compressed = buffer.slice(8, 8 + dataSize);
195
+
196
+ // Decompress and parse
197
+ const jsonData = zlib.gunzipSync(compressed).toString();
198
+ const data = JSON.parse(jsonData);
199
+
200
+ // Reconstruct records from columnar data
201
+ const records = [];
202
+ const numRows = data.metadata.numRows;
203
+
204
+ for (let i = 0; i < numRows; i++) {
205
+ const record = {};
206
+ for (const [columnName, columnData] of Object.entries(data.columns)) {
207
+ record[columnName] = columnData[i];
208
+ }
209
+ records.push(record);
210
+ }
211
+
212
+ return {
213
+ records: records,
214
+ count: records.length,
215
+ schema: data.metadata.schema,
216
+ metadata: data.metadata,
217
+ filePath: filePath
218
+ };
219
+ }
220
+ static async readRecords(filePath) {
221
+ if (!fs.existsSync(filePath)) {
222
+ throw new Error("File does not exist: " + filePath);
223
+ }
224
+
225
+ const buffer = fs.readFileSync(filePath);
226
+
227
+ // Check magic bytes
228
+ const magic = buffer.slice(0,4).toString();
229
+ if (magic !== 'PARQ') throw new Error('Invalid format');
230
+
231
+ let offset = 4;
232
+ const metaSize = buffer.readUInt32LE(offset); offset +=4;
233
+ const metaBuf = buffer.slice(offset, offset+metaSize); offset += metaSize;
234
+ const metadata = JSON.parse(metaBuf.toString('utf8'));
235
+
236
+ const records = [];
237
+ const numRows = metadata.numRows;
238
+
239
+ // read each column sequentially according to metadata
240
+ const columns = {};
241
+ for (const colMeta of metadata.columns) {
242
+ const colSize = buffer.readUInt32LE(offset); offset +=4;
243
+ const comp = buffer.slice(offset, offset+colSize); offset += colSize;
244
+ const raw = zlib.gunzipSync(comp);
245
+ // decode according to type
246
+ let arr;
247
+ switch (colMeta.type) {
248
+ case 'number': arr = new Float64Array(raw.buffer, raw.byteOffset, numRows); break;
249
+ case 'boolean': arr = Uint8Array.from(raw); break;
250
+ case 'string': {
251
+ const vals = [];
252
+ let p = 0;
253
+ for (let i=0;i<numRows;i++) {
254
+ const len = raw.readUInt32LE(p); p+=4;
255
+ vals.push(raw.slice(p,p+len).toString('utf8')); p+=len;
256
+ }
257
+ arr = vals;
258
+ break;
259
+ }
260
+ default: arr = JSON.parse(raw.toString('utf8'));
261
+ }
262
+ columns[colMeta.name] = arr;
263
+ }
264
+
265
+ for (let i=0;i<numRows;i++) {
266
+ const rec = {};
267
+ for (const colName of Object.keys(columns)) {
268
+ const colArr = columns[colName];
269
+ rec[colName] = colArr[i];
270
+ }
271
+ records.push(rec);
272
+ }
273
+
274
+ return {records, count: records.length, schema: metadata.schema, metadata, filePath};
275
+ }
276
+
277
+ static async queryRecords(filePath, options = {}) {
278
+ const { limit, filter, ridMap } = options;
279
+ const data = await this.readRecords(filePath);
280
+
281
+ let records = data.records;
282
+ let totalScanned = records.length;
283
+
284
+ // if a ridMap is provided we intersect the sets for each column
285
+ if (ridMap) {
286
+ let hits;
287
+ for (const col in ridMap) {
288
+ const rids = ridMap[col];
289
+ if (!Array.isArray(rids) && !(rids instanceof Set))
290
+ throw Error('ridMap values must be array or set');
291
+ const set = new Set(rids);
292
+ if (hits == null) hits = set;
293
+ else hits = new Set([...hits].filter(x => set.has(x)));
294
+ }
295
+ if (hits) {
296
+ records = records.filter((_r, i) => hits.has(i));
297
+ }
298
+ } else if (filter) {
299
+ if (typeof filter === 'function') {
300
+ records = records.filter(filter);
301
+ } else if (typeof filter === 'object') {
302
+ // treat object as column->predicate map and intersect rids
303
+ let ridSets = [];
304
+ for (const [col, pred] of Object.entries(filter)) {
305
+ if (typeof pred !== 'function') continue;
306
+ const set = new Set();
307
+ const colArr = data.records.map(r => r[col]);
308
+ colArr.forEach((val, i) => {
309
+ try {
310
+ if (pred(val)) set.add(i);
311
+ } catch (e) {}
312
+ });
313
+ ridSets.push(set);
314
+ }
315
+ if (ridSets.length > 0) {
316
+ let common = ridSets.shift();
317
+ ridSets.forEach(s => {
318
+ common = new Set([...common].filter(x => s.has(x)));
319
+ });
320
+ records = [...common].sort((a, b) => a - b).map(i => data.records[i]);
321
+ totalScanned = data.records.length;
322
+ }
323
+ }
324
+ }
325
+
326
+ // Apply limit if provided
327
+ if (limit && limit > 0) {
328
+ records = records.slice(0, limit);
329
+ }
330
+
331
+ return {
332
+ records: records,
333
+ count: records.length,
334
+ totalScanned: totalScanned,
335
+ schema: data.schema,
336
+ filePath: filePath,
337
+ filtered: !!(filter || ridMap)
338
+ };
339
+ }
340
+
341
+ // Full-featured SQL query engine with JOIN support. Supports:
342
+ // SELECT [col1,col2,…|*|COUNT(*)|SUM(col)|AVG(col)|etc] FROM ?
343
+ // [INNER|LEFT|RIGHT|FULL OUTER] JOIN filePath ON <join condition>
344
+ // [WHERE <expression>]
345
+ // [GROUP BY col1,col2,…]
346
+ // [HAVING <expression>]
347
+ // [ORDER BY col1 [ASC|DESC], …]
348
+ // [LIMIT n]
349
+ // Parameters: Use $param or :param syntax (e.g., WHERE age > $age)
350
+ // Expressions may use JavaScript syntax with record fields.
351
+ static async sqlQuery(filePath, sql, parsedTokens = null, parameters = {}) {
352
+ if (typeof sql !== 'string' || !sql.trim()) {
353
+ throw new Error('sql must be a non-empty string');
354
+ }
355
+
356
+ const data = await this.readRecords(filePath);
357
+
358
+ // Parse query components first (before parameter substitution)
359
+ const tokens = parsedTokens || this._parseSql(sql);
360
+
361
+ let records = [...data.records];
362
+
363
+ // JOIN operations
364
+ if (tokens.joins && tokens.joins.length > 0) {
365
+ for (const join of tokens.joins) {
366
+ const joinData = await this.readRecords(join.filePath);
367
+ records = this._applyJoin(records, joinData.records, join);
368
+ }
369
+ }
370
+
371
+ // WHERE clause - use prepared statement if available
372
+ if (tokens.where) {
373
+ let whereExpr = tokens.where;
374
+ if (tokens.wherePrepared && tokens.whereParamLocations) {
375
+ // Use prepared statement approach
376
+ whereExpr = this._substituteParametersInPrepared(
377
+ tokens.wherePrepared,
378
+ tokens.whereParamLocations,
379
+ { msg: parameters.msg, flow: parameters.flow, global: parameters.global }
380
+ );
381
+ }
382
+ records = this._applyWhere(records, whereExpr);
383
+ }
384
+
385
+ // GROUP BY or aggregates without GROUP BY
386
+ if (tokens.groupBy && tokens.groupBy.length > 0) {
387
+ records = this._applyGroupBy(records, tokens.groupBy, tokens.select);
388
+ } else if (tokens.hasAggregates) {
389
+ // Aggregates without GROUP BY
390
+ records = this._applyGroupBy(records, [], tokens.select);
391
+ }
392
+
393
+ // HAVING clause (after grouping) - use prepared statement if available
394
+ if (tokens.having) {
395
+ let havingExpr = tokens.having;
396
+ if (tokens.havingPrepared && tokens.havingParamLocations) {
397
+ // Use prepared statement approach
398
+ havingExpr = this._substituteParametersInPrepared(
399
+ tokens.havingPrepared,
400
+ tokens.havingParamLocations,
401
+ { msg: parameters.msg, flow: parameters.flow, global: parameters.global }
402
+ );
403
+ }
404
+ records = this._applyWhere(records, havingExpr);
405
+ }
406
+
407
+ // ORDER BY
408
+ if (tokens.orderBy && tokens.orderBy.length > 0) {
409
+ records = this._applyOrderBy(records, tokens.orderBy);
410
+ }
411
+
412
+ // LIMIT
413
+ if (tokens.limit) {
414
+ records = records.slice(0, tokens.limit);
415
+ }
416
+
417
+ // Final projection (SELECT columns)
418
+ // Always apply if not SELECT *, or if we have JOINs (to handle alias.column syntax)
419
+ const hasJoins = tokens.joins && tokens.joins.length > 0;
420
+ if (tokens.select && tokens.select !== '*' && (!tokens.hasAggregates || hasJoins)) {
421
+ let selectExpr = tokens.select;
422
+ // Substitute parameters in SELECT clause if prepared statement exists
423
+ if (tokens.selectPrepared && tokens.selectParamLocations) {
424
+ selectExpr = this._substituteParametersInPrepared(
425
+ tokens.selectPrepared,
426
+ tokens.selectParamLocations,
427
+ { msg: parameters.msg, flow: parameters.flow, global: parameters.global }
428
+ );
429
+ }
430
+ records = this._projectColumns(records, selectExpr);
431
+ }
432
+
433
+ return records;
434
+ }
435
+
436
+ // Extract and prepare parameters from an expression (prepared statement approach)
437
+ // Returns {expression: prepared_expr, paramLocations: [{marker, ctxType, path}]}
438
+ static _extractAndPrepareParameters(expr) {
439
+ const paramLocations = [];
440
+ let paramIndex = 0;
441
+
442
+ const preparedExpr = expr.replace(/:(msg|flow|global)\.[a-zA-Z0-9_$.]+/g, (match) => {
443
+ // Extract context type and path from marker like ":msg.payload.minAge"
444
+ const ctxType = match.split('.')[0].slice(1); // Get 'msg' from ":msg"
445
+ const path = match.slice(1 + ctxType.length + 1); // Get "payload.minAge" after ":msg."
446
+ paramLocations.push({ marker: match, ctxType, path, index: paramIndex });
447
+ // Replace with a placeholder that won't interfere with JavaScript parsing
448
+ paramIndex++;
449
+ return `__PARAM_${paramLocations.length - 1}__`;
450
+ });
451
+
452
+ return { preparedExpr, paramLocations };
453
+ }
454
+
455
+ // Substitute prepared parameters with actual values at runtime
456
+ static _substituteParametersInPrepared(preparedExpr, paramLocations, context = {}) {
457
+ function getDeep(obj, path) {
458
+ if (!obj || typeof path !== 'string') return undefined;
459
+ return path.split('.').reduce((acc, part) => (acc && acc[part] !== undefined ? acc[part] : undefined), obj);
460
+ }
461
+
462
+ let resultExpr = preparedExpr;
463
+
464
+ for (const param of paramLocations) {
465
+ let value;
466
+ if (param.ctxType === 'msg') {
467
+ value = getDeep(context.msg, param.path);
468
+ } else if (param.ctxType === 'flow') {
469
+ value = getDeep(context.flow, param.path);
470
+ } else if (param.ctxType === 'global') {
471
+ value = getDeep(context.global, param.path);
472
+ } else {
473
+ throw new Error(`Invalid parameter context: ${param.ctxType}`);
474
+ }
475
+
476
+ if (value === undefined) {
477
+ throw new Error(`Parameter '${param.marker}' not provided`);
478
+ }
479
+
480
+ // Safely convert value to JavaScript literal
481
+ let safeValue;
482
+ if (typeof value === 'string') {
483
+ safeValue = JSON.stringify(value);
484
+ } else if (typeof value === 'number' || typeof value === 'boolean') {
485
+ safeValue = String(value);
486
+ } else {
487
+ safeValue = JSON.stringify(value);
488
+ }
489
+
490
+ resultExpr = resultExpr.replace(`__PARAM_${param.index}__`, safeValue);
491
+ }
492
+
493
+ return resultExpr;
494
+ }
495
+
496
+ // Legacy function for backward compatibility (deprecated)
497
+ static _substituteParameters(sql, context = {}) {
498
+ // Only allow :msg.something, :flow.something, :global.something
499
+ function getDeep(obj, path) {
500
+ if (!obj || typeof path !== 'string') return undefined;
501
+ return path.split('.').reduce((acc, part) => (acc && acc[part] !== undefined ? acc[part] : undefined), obj);
502
+ }
503
+ return sql.replace(/:(msg|flow|global)\.[a-zA-Z0-9_$.]+/g, (match) => {
504
+ // Extract context type and path
505
+ const [_, ctxType, ...pathParts] = match.split(/[:.]/);
506
+ const path = match.slice(1 + ctxType.length + 1); // skip : and ctxType.
507
+ let value;
508
+ if (ctxType === 'msg') {
509
+ value = getDeep(context.msg, path);
510
+ } else if (ctxType === 'flow') {
511
+ value = getDeep(context.flow, path);
512
+ } else if (ctxType === 'global') {
513
+ value = getDeep(context.global, path);
514
+ } else {
515
+ throw new Error(`Invalid parameter context: ${ctxType}`);
516
+ }
517
+ if (value === undefined) {
518
+ throw new Error(`Parameter '${match}' not provided`);
519
+ }
520
+ // Only allow safe values
521
+ if (typeof value === 'string') {
522
+ return JSON.stringify(value);
523
+ } else if (typeof value === 'number' || typeof value === 'boolean') {
524
+ return String(value);
525
+ } else {
526
+ return JSON.stringify(value);
527
+ }
528
+ });
529
+ }
530
+ static _parseSql(sql) {
531
+ const result = {
532
+ select: '*',
533
+ hasAggregates: false,
534
+ from: '?',
535
+ joins: [],
536
+ where: null,
537
+ groupBy: [],
538
+ having: null,
539
+ orderBy: [],
540
+ limit: null
541
+ };
542
+
543
+ // Case-insensitive regex matching
544
+ const selectMatch = sql.match(/SELECT\s+(.+?)\s+FROM/i);
545
+ if (!selectMatch) throw new Error('Invalid SQL: missing SELECT clause');
546
+ result.select = selectMatch[1].trim();
547
+ result.hasAggregates = /\b(COUNT|SUM|AVG|MIN|MAX)\s*\(/i.test(result.select);
548
+
549
+ // Prepare parameters for SELECT clause
550
+ const selectPrepared = this._extractAndPrepareParameters(result.select);
551
+ result.selectPrepared = selectPrepared.preparedExpr;
552
+ result.selectParamLocations = selectPrepared.paramLocations;
553
+
554
+ // Parse JOINs (INNER, LEFT, RIGHT, FULL OUTER)
555
+ // Handles optional table alias before JOIN: "FROM ? u INNER JOIN 'file' o ON u.id = o.uid"
556
+ const joinRegex = /(?:FROM|JOIN)\s+[?'"]?\S+[?'"]?\s+\w+\s+(INNER\s+|LEFT\s+|RIGHT\s+|FULL\s+OUTER\s+)?JOIN\s+['"]([^'"]+)['"]\s+\w+\s+ON\s+(.+?)(?:(?:INNER|LEFT|RIGHT|FULL|WHERE|GROUP|HAVING|ORDER|LIMIT)\s|$)/gi;
557
+ let joinMatch;
558
+ while ((joinMatch = joinRegex.exec(sql)) !== null) {
559
+ const joinType = (joinMatch[1] || 'INNER').trim().toUpperCase();
560
+ const filePath = joinMatch[2];
561
+ const onCondition = joinMatch[3].trim();
562
+ result.joins.push({ joinType, filePath, onCondition });
563
+ }
564
+
565
+ const whereMatch = sql.match(/WHERE\s+(.+?)(?:GROUP\s+BY|HAVING|ORDER\s+BY|LIMIT|$)/i);
566
+ if (whereMatch) {
567
+ result.where = whereMatch[1].trim();
568
+ // Prepare parameters for WHERE clause
569
+ const prepared = this._extractAndPrepareParameters(result.where);
570
+ result.wherePrepared = prepared.preparedExpr;
571
+ result.whereParamLocations = prepared.paramLocations;
572
+ }
573
+
574
+ const groupMatch = sql.match(/GROUP\s+BY\s+(.+?)(?:HAVING|ORDER\s+BY|LIMIT|$)/i);
575
+ if (groupMatch) {
576
+ result.groupBy = groupMatch[1].trim().split(/\s*,\s*/).map(c => c.trim());
577
+ }
578
+
579
+ const havingMatch = sql.match(/HAVING\s+(.+?)(?:ORDER\s+BY|LIMIT|$)/i);
580
+ if (havingMatch) {
581
+ result.having = havingMatch[1].trim();
582
+ // Prepare parameters for HAVING clause
583
+ const prepared = this._extractAndPrepareParameters(result.having);
584
+ result.havingPrepared = prepared.preparedExpr;
585
+ result.havingParamLocations = prepared.paramLocations;
586
+ }
587
+
588
+ const orderMatch = sql.match(/ORDER\s+BY\s+(.+?)(?:LIMIT|$)/i);
589
+ if (orderMatch) {
590
+ const orderParts = orderMatch[1].trim().split(/\s*,\s*/);
591
+ result.orderBy = orderParts.map(part => {
592
+ const m = part.match(/(\S+)\s+(ASC|DESC)?/i);
593
+ return {
594
+ column: m[1],
595
+ direction: (m[2] || 'ASC').toUpperCase()
596
+ };
597
+ });
598
+ }
599
+
600
+ const limitMatch = sql.match(/LIMIT\s+(\d+)/i);
601
+ if (limitMatch) result.limit = parseInt(limitMatch[1]);
602
+
603
+ return result;
604
+ }
605
+
606
+ static _applyJoin(leftRecords, rightRecords, joinSpec) {
607
+ const { joinType, onCondition } = joinSpec;
608
+ const result = [];
609
+
610
+ // Parse the ON condition: e.g., "l.id = r.id"
611
+ const onMatch = onCondition.match(/(\w+)\.(\w+)\s*=\s*(\w+)\.(\w+)/);
612
+ if (!onMatch) throw new Error('Invalid JOIN condition syntax');
613
+ const [, lAlias, lCol, rAlias, rCol] = onMatch;
614
+
615
+ // Build a map for quick lookup
616
+ const rightMap = {};
617
+ rightRecords.forEach((r, idx) => {
618
+ const key = String(r[rCol]);
619
+ if (!rightMap[key]) rightMap[key] = [];
620
+ rightMap[key].push({ record: r, index: idx });
621
+ });
622
+
623
+ const processedLeft = new Set();
624
+ const processedRight = new Set();
625
+
626
+ // INNER, LEFT, RIGHT, FULL OUTER
627
+ if (joinType === 'INNER' || joinType === 'LEFT' || joinType === 'FULL OUTER') {
628
+ leftRecords.forEach((l, lIdx) => {
629
+ const key = String(l[lCol]);
630
+ const matches = rightMap[key] || [];
631
+
632
+ if (matches.length > 0) {
633
+ matches.forEach(({ record: r, index: rIdx }) => {
634
+ result.push(Object.assign({}, l, r));
635
+ processedRight.add(rIdx);
636
+ });
637
+ } else if (joinType !== 'INNER') {
638
+ // LEFT or FULL OUTER: include unmatched left rows
639
+ result.push(l);
640
+ }
641
+ processedLeft.add(lIdx);
642
+ });
643
+ }
644
+
645
+ if (joinType === 'RIGHT' || joinType === 'FULL OUTER') {
646
+ rightRecords.forEach((r, rIdx) => {
647
+ if (!processedRight.has(rIdx)) {
648
+ result.push(r);
649
+ }
650
+ });
651
+ }
652
+
653
+ return result;
654
+ }
655
+
656
+ static _applyWhere(records, whereExpr) {
657
+ // Convert SQL comparison operators to JavaScript
658
+ // Order matters: replace compound operators first, then single =
659
+ let jsExpr = whereExpr
660
+ .replace(/<>/g, '!==') // SQL !=
661
+ .replace(/!=/g, '!==') // SQL != (alternative)
662
+ .replace(/\bAND\b/gi, '&&') // SQL AND
663
+ .replace(/\bOR\b/gi, '||') // SQL OR
664
+ .replace(/\bNOT\b/gi, '!') // SQL NOT
665
+ .replace(/([^=!<>])=([^=])/g, '$1===$2'); // SQL = becomes === (not part of compound operators)
666
+
667
+ let fn;
668
+ try {
669
+ fn = new Function('r', 'with(r){return ' + jsExpr + ';}');
670
+ } catch (e) {
671
+ throw new Error('Invalid WHERE expression: ' + e.message);
672
+ }
673
+ return records.filter(r => {
674
+ try {
675
+ return fn(r);
676
+ } catch (_e) {
677
+ return false;
678
+ }
679
+ });
680
+ }
681
+
682
+ static _applyGroupBy(records, groupCols, selectExpr) {
683
+ const groups = {};
684
+
685
+ // Check if SELECT has only aggregates (no group by columns)
686
+ const hasOnlyAggregates = groupCols.length === 0 && /^\s*(COUNT|SUM|AVG|MIN|MAX|COUNT\s+DISTINCT)\s*\(/i.test(selectExpr);
687
+
688
+ if (hasOnlyAggregates) {
689
+ // Single aggregate over all records
690
+ const row = {};
691
+ const aggregates = this._extractAggregates(selectExpr, records);
692
+ Object.assign(row, aggregates);
693
+ return [row];
694
+ }
695
+
696
+ // Build groups
697
+ records.forEach(r => {
698
+ const key = groupCols.map(c => String(r[c])).join('|');
699
+ if (!groups[key]) {
700
+ groups[key] = {
701
+ records: [],
702
+ groupValues: {}
703
+ };
704
+ groupCols.forEach(c => {
705
+ groups[key].groupValues[c] = r[c];
706
+ });
707
+ }
708
+ groups[key].records.push(r);
709
+ });
710
+
711
+ // Compute aggregates
712
+ const result = [];
713
+ for (const key in groups) {
714
+ const group = groups[key];
715
+ const row = { ...group.groupValues };
716
+
717
+ // Parse aggregates from selectExpr
718
+ const aggregates = this._extractAggregates(selectExpr, group.records);
719
+ Object.assign(row, aggregates);
720
+
721
+ result.push(row);
722
+ }
723
+
724
+ return result;
725
+ }
726
+
727
+ static _extractAggregates(selectExpr, records) {
728
+ const result = {};
729
+ const aggRegex = /(COUNT|SUM|AVG|MIN|MAX|COUNT\s+DISTINCT)\s*\(\s*([^)]+)\s*\)\s*(?:AS|as)?\s*(\w+)?/g;
730
+
731
+ let match;
732
+ while ((match = aggRegex.exec(selectExpr)) !== null) {
733
+ const [, aggFunc, colExpr, alias] = match;
734
+ const colName = alias || aggFunc.toUpperCase() + '_' + colExpr;
735
+
736
+ const values = records.map(r => {
737
+ try {
738
+ if (colExpr === '*') return 1;
739
+ const fn = new Function('r', 'with(r){return ' + colExpr + ';}');
740
+ return fn(r);
741
+ } catch (_e) {
742
+ return null;
743
+ }
744
+ }).filter(v => v != null);
745
+
746
+ if (aggFunc.toUpperCase() === 'COUNT') {
747
+ result[colName] = records.length;
748
+ } else if (aggFunc.toUpperCase() === 'COUNT DISTINCT') {
749
+ result[colName] = new Set(values).size;
750
+ } else if (aggFunc.toUpperCase() === 'SUM') {
751
+ result[colName] = values.reduce((a, b) => a + b, 0);
752
+ } else if (aggFunc.toUpperCase() === 'AVG') {
753
+ result[colName] = values.length > 0 ? values.reduce((a, b) => a + b, 0) / values.length : 0;
754
+ } else if (aggFunc.toUpperCase() === 'MIN') {
755
+ result[colName] = Math.min(...values);
756
+ } else if (aggFunc.toUpperCase() === 'MAX') {
757
+ result[colName] = Math.max(...values);
758
+ }
759
+ }
760
+
761
+ return result;
762
+ }
763
+
764
+ static _applyOrderBy(records, orderBy) {
765
+ return records.sort((a, b) => {
766
+ for (const { column, direction } of orderBy) {
767
+ const valA = a[column];
768
+ const valB = b[column];
769
+ let cmp = 0;
770
+ if (valA < valB) cmp = -1;
771
+ else if (valA > valB) cmp = 1;
772
+ if (cmp !== 0) return direction === 'DESC' ? -cmp : cmp;
773
+ }
774
+ return 0;
775
+ });
776
+ }
777
+
778
+ static _projectColumns(records, selectExpr) {
779
+
780
+ // Split selectExpr on commas, but ignore commas inside quotes
781
+ function splitSelectColumns(expr) {
782
+ const cols = [];
783
+ let current = '';
784
+ let inSingle = false, inDouble = false;
785
+ for (let i = 0; i < expr.length; i++) {
786
+ const ch = expr[i];
787
+ if (ch === "'" && !inDouble) inSingle = !inSingle;
788
+ else if (ch === '"' && !inSingle) inDouble = !inDouble;
789
+ if (ch === ',' && !inSingle && !inDouble) {
790
+ cols.push(current.trim());
791
+ current = '';
792
+ } else {
793
+ current += ch;
794
+ }
795
+ }
796
+ if (current.trim()) cols.push(current.trim());
797
+ return cols;
798
+ }
799
+
800
+ const cols = splitSelectColumns(selectExpr).map(c => {
801
+ // Support quoted identifiers for output name
802
+ // Match: expr [AS name] or expr [AS "name"]
803
+ const m = c.match(/(.+?)(?:\s+(?:AS|as)\s+((?:"[^"]+")|(?:'[^']+')|\w+))?$/);
804
+ const expr = m[1].trim();
805
+ let outputName = m[2];
806
+ if (outputName) {
807
+ outputName = outputName.replace(/^['"]|['"]$/g, ''); // Remove quotes if present
808
+ } else {
809
+ outputName = expr;
810
+ }
811
+ return { expr, outputName };
812
+ });
813
+
814
+ return records.map(r => {
815
+ const o = {};
816
+ cols.forEach(c => {
817
+ // Check if expr is a simple column reference or contains parameters/expressions
818
+ if (/^[a-zA-Z_][a-zA-Z0-9_.]*$/.test(c.expr)) {
819
+ // Simple column reference (e.g., "id", "u.id")
820
+ let sourceCol = c.expr;
821
+ if (c.expr.includes('.')) {
822
+ sourceCol = c.expr.split('.')[1]; // Extract column name after alias
823
+ }
824
+ o[c.outputName] = r[sourceCol];
825
+ } else {
826
+ // Computed expression - evaluate it (handles parameters and JS expressions)
827
+ try {
828
+ const fn = new Function('r', 'with(r){return ' + c.expr + ';}');
829
+ o[c.outputName] = fn(r);
830
+ } catch (e) {
831
+ o[c.outputName] = undefined;
832
+ }
833
+ }
834
+ });
835
+ return o;
836
+ });
837
+ }
838
+
839
+ static async getSchema(filePath) {
840
+ if (!fs.existsSync(filePath)) {
841
+ throw new Error("File does not exist: " + filePath);
842
+ }
843
+
844
+ const buffer = fs.readFileSync(filePath);
845
+
846
+ // Check magic bytes
847
+ const magic = buffer.slice(0, 4).toString();
848
+ if (magic !== 'PARQ') {
849
+ throw new Error("Invalid file format");
850
+ }
851
+
852
+ // Read compressed data size
853
+ const dataSize = buffer.readUInt32LE(4);
854
+ const compressed = buffer.slice(8, 8 + dataSize);
855
+
856
+ // Decompress and parse metadata only
857
+ const jsonData = zlib.gunzipSync(compressed).toString();
858
+ const data = JSON.parse(jsonData);
859
+
860
+ return {
861
+ schema: data.metadata.schema,
862
+ fields: Object.keys(data.metadata.schema),
863
+ metadata: data.metadata,
864
+ filePath: filePath
865
+ };
866
+ }
867
+
868
+ static async getMetadata(filePath) {
869
+ const stats = fs.statSync(filePath);
870
+ const schemaData = await this.getSchema(filePath);
871
+
872
+ return {
873
+ filePath: filePath,
874
+ fileSize: stats.size,
875
+ numRows: schemaData.metadata.numRows,
876
+ numColumns: schemaData.metadata.numColumns,
877
+ schema: schemaData.schema,
878
+ created: schemaData.metadata.created,
879
+ modified: stats.mtime,
880
+ compression: schemaData.metadata.compression,
881
+ version: schemaData.metadata.version
882
+ };
883
+ }
884
+ }
885
+
886
+ const actions = {
887
+ // Helper function to safely extract properties from msg object
888
+ getMsgProperty: (msg, path) => {
889
+ if (!path || typeof path !== 'string') {
890
+ return undefined;
891
+ }
892
+
893
+ const parts = path.split('.');
894
+ let current = msg;
895
+
896
+ for (const part of parts) {
897
+ if (current && typeof current === 'object' && part in current) {
898
+ current = current[part];
899
+ } else {
900
+ return undefined;
901
+ }
902
+ }
903
+
904
+ return current;
905
+ },
906
+ readFile: async (RED, node, msg) => {
907
+ const filePath = msg.payload && msg.payload.filePath ? msg.payload.filePath : node.filePath;
908
+ if (!filePath) {
909
+ throw new Error("filePath must be provided in msg.payload or configured in node");
910
+ }
911
+
912
+ return await SimpleColumnarStore.readRecords(filePath);
913
+ },
914
+
915
+ writeFile: async (RED, node, msg) => {
916
+ if (!msg.payload || !msg.payload.records || !Array.isArray(msg.payload.records)) {
917
+ throw new Error("msg.payload must contain records array");
918
+ }
919
+
920
+ const filePath = msg.payload.filePath || node.filePath;
921
+ if (!filePath) {
922
+ throw new Error("filePath must be provided in msg.payload or configured in node");
923
+ }
924
+
925
+ // Ensure directory exists
926
+ const dir = path.dirname(filePath);
927
+ if (!fs.existsSync(dir)) {
928
+ fs.mkdirSync(dir, { recursive: true });
929
+ }
930
+
931
+ return await SimpleColumnarStore.writeRecords(msg.payload.records, filePath);
932
+ },
933
+
934
+ appendFile: async (RED, node, msg) => {
935
+ if (!msg.payload || !msg.payload.records || !Array.isArray(msg.payload.records)) {
936
+ throw new Error("msg.payload must contain records array");
937
+ }
938
+
939
+ const filePath = msg.payload.filePath || node.filePath;
940
+ if (!filePath) {
941
+ throw new Error("filePath must be provided in msg.payload or configured in node");
942
+ }
943
+
944
+ // Ensure directory exists
945
+ const dir = path.dirname(filePath);
946
+ if (!fs.existsSync(dir)) {
947
+ fs.mkdirSync(dir, { recursive: true });
948
+ }
949
+
950
+ return await SimpleColumnarStore.appendRecords(msg.payload.records, filePath);
951
+ },
952
+
953
+ queryFile: async (RED, node, msg) => {
954
+ const filePath = msg.payload && msg.payload.filePath ? msg.payload.filePath : node.filePath;
955
+ if (!filePath) {
956
+ throw new Error("filePath must be provided in msg.payload or configured in node");
957
+ }
958
+
959
+ const options = {
960
+ limit: msg.payload && msg.payload.limit,
961
+ filter: msg.payload && msg.payload.filter
962
+ };
963
+
964
+ return await SimpleColumnarStore.queryRecords(filePath, options);
965
+ },
966
+
967
+ sqlQuery: async (RED, node, msg) => {
968
+ const filePath = msg.payload && msg.payload.filePath ? msg.payload.filePath : node.filePath;
969
+ if (!filePath) {
970
+ throw new Error("filePath must be provided in msg.payload or configured in node");
971
+ }
972
+ const sql = (msg.payload && msg.payload.sql) || node.sqlQuery;
973
+ if (!sql) {
974
+ throw new Error("SQL query must be provided in msg.payload.sql or configured in node");
975
+ }
976
+
977
+ // Prepare context for parameter extraction
978
+ const context = {
979
+ msg,
980
+ flow: (typeof node.context === 'function' && node.context().flow) ? node.context().flow : {},
981
+ global: (typeof node.context === 'function' && node.context().global) ? node.context().global : {}
982
+ };
983
+
984
+ // Use cached tokens if SQL matches the cached query
985
+ const useCachedTokens = !msg.payload?.sql && node.cachedSqlTokens && sql === node.cachedSql;
986
+
987
+ return await SimpleColumnarStore.sqlQuery(filePath, sql, useCachedTokens ? node.cachedSqlTokens : null, context);
988
+ },
989
+
990
+ getSchema: async (RED, node, msg) => {
991
+ const filePath = msg.payload && msg.payload.filePath ? msg.payload.filePath : node.filePath;
992
+ if (!filePath) {
993
+ throw new Error("filePath must be provided in msg.payload or configured in node");
994
+ }
995
+
996
+ return await SimpleColumnarStore.getSchema(filePath);
997
+ },
998
+
999
+ getMetadata: async (RED, node, msg) => {
1000
+ const filePath = msg.payload && msg.payload.filePath ? msg.payload.filePath : node.filePath;
1001
+ if (!filePath) {
1002
+ throw new Error("filePath must be provided in msg.payload or configured in node");
1003
+ }
1004
+
1005
+ return await SimpleColumnarStore.getMetadata(filePath);
1006
+ }
1007
+ };
1008
+
1009
+ module.exports = function (RED) {
1010
+ function ColumnarNode(config) {
1011
+ RED.nodes.createNode(this, config);
1012
+ const node = Object.assign(this, config, {
1013
+ filePath: config.filePath || ''
1014
+ });
1015
+
1016
+ // Cache parsed SQL tokens for hardcoded queries to improve performance
1017
+ if (config.action === 'sqlQuery' && config.sqlQuery && config.sqlQuery.trim()) {
1018
+ try {
1019
+ node.cachedSqlTokens = SimpleColumnarStore._parseSql(config.sqlQuery);
1020
+ node.cachedSql = config.sqlQuery;
1021
+ } catch (error) {
1022
+ node.error("Failed to parse hardcoded SQL: " + error.message);
1023
+ node.status({ fill: "red", shape: "ring", text: "SQL parse error" });
1024
+ return;
1025
+ }
1026
+ }
1027
+
1028
+ node.callFunction = actions[config.action];
1029
+ if (!node.callFunction) {
1030
+ node.error("Unknown action: " + config.action);
1031
+ node.status({ fill: "red", shape: "ring", text: "Unknown action: " + config.action });
1032
+ return;
1033
+ }
1034
+
1035
+ node.status({ fill: "green", shape: "dot", text: "Ready" });
1036
+
1037
+ node.on('input', async function (msg) {
1038
+ try {
1039
+ const result = await node.callFunction(RED, node, msg);
1040
+ const outputProperty = node.outputProperty || 'result';
1041
+ msg[outputProperty] = result;
1042
+ node.send(msg);
1043
+ node.status({ fill: "green", shape: "dot", text: "Success" });
1044
+ } catch (error) {
1045
+ node.error(error.message, msg);
1046
+ node.status({ fill: "red", shape: "ring", text: error.message.substring(0, 20) });
1047
+ }
1048
+ });
1049
+ }
1050
+ RED.nodes.registerType("columnar", ColumnarNode);
1051
+ };
1052
+
1053
+ // export helper class for external use/testing
1054
+ module.exports.SimpleColumnarStore = SimpleColumnarStore;
1055
+ module.exports.actions = actions;