@transitive-sdk/clickhouse 0.3.8 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -26,14 +26,17 @@ const MULTI_TENANT_SCHEMA = {
26
26
  * WHERE clause. */
27
27
  const path2where = (path) => {
28
28
  const where = [];
29
+ const wildIndices = [];
29
30
  _.forEach(path, (value, i) => {
30
31
  if (!['+','#'].includes(value[0])) {
31
32
  // it's a constant, filter by it
32
33
  where.push(`TopicParts[${i + 1}] = '${value}'`);
33
34
  // Note that ClickHouse/SQL index starting at 1, not 0
35
+ } else {
36
+ wildIndices.push(i);
34
37
  }
35
38
  });
36
- return where;
39
+ return {where, wildIndices};
37
40
  };
38
41
 
39
42
 
@@ -297,7 +300,7 @@ class ClickHouse {
297
300
  // Set/update TTL for this capability and sub-topic
298
301
 
299
302
  // Derive WHERE conditions for TTL expression from non-wildcards
300
- const where = path2where(path);
303
+ const { where } = path2where(path);
301
304
 
302
305
  if (where.length == 0) {
303
306
  // underspecified, don't set TTL
@@ -348,23 +351,83 @@ class ClickHouse {
348
351
  topicSelector,
349
352
  since = undefined,
350
353
  until = undefined,
351
- orderBy = 'Timestamp DESC',
352
- limit = 1000
354
+ // if provided, extract this sub-value of the payload-json, requires type
355
+ path = undefined,
356
+ // type of element to extract using `path`: for available types, see https://clickhouse.com/docs/sql-reference/data-types
357
+ type = 'String',
358
+ orderBy = 'time DESC',
359
+ limit = 1000, // end result limit (i.e., after grouping)
360
+
361
+ bins = undefined, // into how many bins to aggregate (if given, requires since)
362
+ // Aggregation function to use (if aggSeconds or bins is given)
363
+ // if `bins` or `aggregateSeconds` is given, which operator to use to compute
364
+ // aggregate value. Default is `count` (which works for any data type).
365
+ // See https://clickhouse.com/docs/sql-reference/aggregate-functions/reference.
366
+ agg = 'count',
367
+ } = options;
368
+
369
+ let {
370
+ // how many seconds to group together (alternative to bins + time interval)
371
+ aggSeconds
353
372
  } = options;
354
373
 
355
- const path = topicToPath(topicSelector);
374
+ /* some useful queries we'd like to support:
375
+
376
+ # get avg `i` value for each minute of the last hour (limit: 60)
377
+ ```sql
378
+ select toStartOfInterval(Timestamp, INTERVAL 60 SECOND) as time,
379
+ avg(JSONExtractInt(Payload,'i')) as agg
380
+ from mqtt_history_tests
381
+ GROUP BY (time)
382
+ ORDER BY time
383
+ LIMIT 60
384
+ ```
385
+ ->
386
+ ```js
387
+ { aggregateSeconds: 60, path: ['i'], type: 'Int', agg: 'avg', limit: 60 }
388
+ ```
389
+ */
390
+
391
+ const pathSelector = topicToPath(topicSelector);
356
392
 
357
393
  // interpret wildcards
358
- const where = path2where(path);
394
+ const { where } = path2where(pathSelector);
359
395
  since && where.push(`Timestamp >= fromUnixTimestamp64Milli(${since.getTime()})`);
360
396
  until && where.push(`Timestamp <= fromUnixTimestamp64Milli(${until.getTime()})`);
361
397
  const whereStatement = where.length > 0 ? `WHERE ${where.join(' AND ')}` : '';
362
398
 
363
- const result = await this.client.query({
364
- query: `SELECT Payload,TopicParts,Timestamp FROM default.${this.mqttHistoryTable} ${
365
- whereStatement} ORDER BY ${orderBy} ${limit ? ` LIMIT ${limit}` : ''}`,
366
- format: 'JSONEachRow'
367
- });
399
+ const extractValue = path && type
400
+ ? `JSONExtract(Payload, ${path.map(s => `'${s}'`).join(', ')}, '${type}')`
401
+ : 'Payload';
402
+
403
+ let select = [`${extractValue} as value`, 'Payload', 'TopicParts',
404
+ 'Timestamp', 'Timestamp as time'];
405
+
406
+ let group = '';
407
+ if (bins > 1 && since) {
408
+ // compute aggSeconds from desired number of bins and `since`
409
+ const duration = (until || Date.now()) - since.getTime();
410
+ aggSeconds = Math.floor((duration/1000)/(bins - 1));
411
+ }
412
+
413
+ // if aggregation is requested, build the GROUP BY expression and update SELECT
414
+ if (aggSeconds) {
415
+ // SQL sub-string to extract the desired value from the JSON payload
416
+ // const wildParts = wildIndices.map(i => `TopicParts[${i + 1}]`);
417
+ // update SELECT statement with aggregations
418
+ select = [`${agg}(${extractValue}) as aggValue`,
419
+ // ...wildParts,
420
+ 'TopicParts',
421
+ `toStartOfInterval(Timestamp, INTERVAL ${aggSeconds} SECOND) as time`
422
+ ];
423
+ // group = `GROUP BY (time,${wildParts.join(',')})`
424
+ group = `GROUP BY (time,TopicParts)`
425
+ }
426
+
427
+ const query = `SELECT ${select.join(',')} FROM default.${this.mqttHistoryTable} ${
428
+ whereStatement} ${group} ORDER BY ${orderBy} ${limit ? ` LIMIT ${limit}` : ''}`;
429
+ // console.log(query);
430
+ const result = await this.client.query({ query, format: 'JSONEachRow' });
368
431
 
369
432
  const rows = await result.json();
370
433
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@transitive-sdk/clickhouse",
3
- "version": "0.3.8",
3
+ "version": "0.4.1",
4
4
  "description": "A tiny ClickHouse utility class for use in the Transitive framework.",
5
5
  "homepage": "https://transitiverobotics.com",
6
6
  "repository": {
@@ -301,6 +301,16 @@ describe('ClickHouse', function() {
301
301
  topicSelector: `/${org}/+/+/+/+/willBeNull` });
302
302
  assert.strictEqual(rows.at(-1).Payload, null);
303
303
  });
304
+
305
+ it('extracts sub-values', async () => {
306
+ const rows = await clickhouse.queryMQTTHistory({
307
+ orderBy: 'Timestamp ASC',
308
+ topicSelector: `/${org}/device1/@myscope/cap/+/sub1/sub2/sub3.2`,
309
+ path: ['data', 'aNumber'],
310
+ type: 'int'
311
+ });
312
+ assert.strictEqual(rows[0].value, 1234);
313
+ });
304
314
  });
305
315
 
306
316
  /** Test performance of the table (index). */
@@ -323,7 +333,7 @@ describe('ClickHouse', function() {
323
333
  rows.push({
324
334
  Timestamp: new Date(now + i * GAP), // use current date to avoid immediate TTL cleanup
325
335
  TopicParts: [`org${i % 50}`, `device${i % 1000}`, '@myscope',
326
- `cap${i % 100}`, `1.${i % 100}.0`, `data_${i % 1000}`, i],
336
+ `cap${i % 100}`, `1.${i % 100}.0`, `data_${i % 1000}`],
327
337
  Payload: { i },
328
338
  })
329
339
  }
@@ -423,6 +433,45 @@ describe('ClickHouse', function() {
423
433
  assert.equal(rows.length, 8);
424
434
  assertTimelimit(ROWS / 10000);
425
435
  });
426
- });
427
436
 
437
+ it('quickly filters and aggregates by time', async () => {
438
+ const rows = await clickhouse.queryMQTTHistory({
439
+ topicSelector: `/org0/device0/@myscope/cap0/1.0.0/data_0`,
440
+ since: new Date(now),
441
+ until: new Date(now + ROWS * GAP),
442
+ bins: 60,
443
+ limit: 2 * ROWS,
444
+ });
445
+ // there can be one-off errors due to rounding down to start of interval:
446
+ assert(Math.abs(rows.length - 60) < 2);
447
+ assertTimelimit(ROWS / 10000);
448
+ });
449
+
450
+ it('quickly filters, aggregates by time, extracts value, and averages', async () => {
451
+ const aggSeconds = 1000;
452
+ const rows = await clickhouse.queryMQTTHistory({
453
+ topicSelector: `/org0/device0/@myscope/cap0/1.0.0/data_0`,
454
+ aggSeconds,
455
+ path: ['i'],
456
+ type: 'int',
457
+ agg: 'avg',
458
+ limit: 2 * ROWS,
459
+ });
460
+ assert.equal(rows.length, ROWS / aggSeconds);
461
+ assertTimelimit(ROWS / 1000);
462
+ });
463
+
464
+ it('quickly filters, aggregates by time, extracts value, and averages, per device and sub-value', async () => {
465
+ const rows = await clickhouse.queryMQTTHistory({
466
+ topicSelector: `/org0/+/@myscope/cap0/1.0.0/+`,
467
+ aggSeconds: 1000,
468
+ path: ['i'],
469
+ type: 'int',
470
+ agg: 'avg',
471
+ limit: 2 * ROWS,
472
+ });
473
+ assert.equal(rows.length, 10000);
474
+ assertTimelimit(ROWS / 1000);
475
+ });
476
+ });
428
477
  });