@perspective-dev/client 4.1.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,10 +10,18 @@
10
10
  // ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
11
11
  // ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
12
12
 
13
- import type {
14
- VirtualDataSlice,
15
- VirtualServerHandler,
16
- } from "@perspective-dev/client";
13
+ /**
14
+ * An implementation of a Perspective Virtual Server for DuckDB.
15
+ *
16
+ * This import is optional, and so must be imported manually from either
17
+ * `@perspective-dev/client/dist/esm/virtual_servers/duckdb.js` or
18
+ * `@perspective-dev/client/src/ts/virtual_servers/duckdb.ts`, it is not
19
+ * exported from the package root `@perspective-dev/client`
20
+ *
21
+ * @module
22
+ */
23
+
24
+ import type * as perspective from "@perspective-dev/client";
17
25
  import type { ColumnType } from "@perspective-dev/client/dist/esm/ts-rs/ColumnType.d.ts";
18
26
  import type { ViewConfig } from "@perspective-dev/client/dist/esm/ts-rs/ViewConfig.d.ts";
19
27
  import type { ViewWindow } from "@perspective-dev/client/dist/esm/ts-rs/ViewWindow.d.ts";
@@ -68,27 +76,45 @@ const FILTER_OPS = [
68
76
  ];
69
77
 
70
78
  function duckdbTypeToPsp(name: string): ColumnType {
71
- if (name === "VARCHAR") return "string";
72
- if (name === "DOUBLE" || name === "BIGINT" || name === "HUGEINT")
79
+ if (name === "VARCHAR" || name == "Utf8") {
80
+ return "string";
81
+ }
82
+
83
+ if (
84
+ name === "DOUBLE" ||
85
+ name === "BIGINT" ||
86
+ name === "HUGEINT" ||
87
+ name === "Float64" ||
88
+ name.startsWith("Decimal")
89
+ ) {
73
90
  return "float";
74
- if (name.startsWith("Decimal")) return "float";
75
- if (name.startsWith("Int")) return "integer";
76
- if (name === "INTEGER") return "integer";
77
- if (name === "Utf8") return "string";
78
- if (name === "Date32<DAY>") return "date";
79
- if (name === "Float64") return "float";
80
- if (name === "DATE") return "date";
81
- if (name === "BOOLEAN") return "boolean";
82
- if (name === "TIMESTAMP") return "datetime";
91
+ }
92
+
93
+ if (name.startsWith("Int") || name == "INTEGER") {
94
+ return "integer";
95
+ }
96
+
97
+ if (name === "INTEGER") {
98
+ return "integer";
99
+ }
100
+
101
+ if (name === "DATE" || name.startsWith("Date")) {
102
+ return "date";
103
+ }
104
+
105
+ if (name === "BOOLEAN" || name === "Bool") {
106
+ return "boolean";
107
+ }
108
+
109
+ if (name === "TIMESTAMP" || name.startsWith("Timestamp")) {
110
+ return "datetime";
111
+ }
112
+
83
113
  throw new Error(`Unknown type '${name}'`);
84
114
  }
85
115
 
86
116
  function convertDecimalToNumber(value: any, dtypeString: string) {
87
- if (
88
- value === null ||
89
- value === undefined ||
90
- !(value instanceof Uint32Array || value instanceof Int32Array)
91
- ) {
117
+ if (!(value instanceof Uint32Array || value instanceof Int32Array)) {
92
118
  return value;
93
119
  }
94
120
 
@@ -97,15 +123,9 @@ function convertDecimalToNumber(value: any, dtypeString: string) {
97
123
  bigIntValue |= BigInt(value[i]) << BigInt(i * 32);
98
124
  }
99
125
 
100
- const maxInt128 = BigInt(2) ** BigInt(127);
101
- if (bigIntValue >= maxInt128) {
102
- bigIntValue -= BigInt(2) ** BigInt(128);
103
- }
104
-
105
126
  const scaleMatch = dtypeString.match(/Decimal\[\d+e(\d+)\]/);
106
- const scale = scaleMatch ? parseInt(scaleMatch[1]) : 0;
107
-
108
- if (scale > 0) {
127
+ if (scaleMatch) {
128
+ const scale = parseInt(scaleMatch[1]);
109
129
  return Number(bigIntValue) / Math.pow(10, scale);
110
130
  } else {
111
131
  return Number(bigIntValue);
@@ -125,7 +145,7 @@ async function runQuery(
125
145
  async function runQuery(
126
146
  db: duckdb.AsyncDuckDBConnection,
127
147
  query: string,
128
- options?: { columns: boolean },
148
+ options?: { columns: false },
129
149
  ): Promise<any[]>;
130
150
 
131
151
  async function runQuery(
@@ -134,7 +154,6 @@ async function runQuery(
134
154
  options: { columns?: boolean } = {},
135
155
  ) {
136
156
  query = query.replace(/\s+/g, " ").trim();
137
- // console.log("Query:", query);
138
157
  try {
139
158
  const result = await db.query(query);
140
159
  if (options.columns) {
@@ -153,11 +172,28 @@ async function runQuery(
153
172
  }
154
173
  }
155
174
 
156
- export class DuckDBHandler implements VirtualServerHandler {
175
+ /**
176
+ * An implementation of Perspective's Virtual Server for `@duckdb/duckdb-wasm`.
177
+ */
178
+ export class DuckDBHandler implements perspective.VirtualServerHandler {
157
179
  private db: duckdb.AsyncDuckDBConnection;
180
+ private sqlBuilder: perspective.GenericSQLVirtualServerModel;
181
+ constructor(db: duckdb.AsyncDuckDBConnection, mod?: typeof perspective) {
182
+ if (!mod) {
183
+ if (customElements) {
184
+ const viewer_class: any =
185
+ customElements.get("perspective-viewer");
186
+ if (viewer_class) {
187
+ mod = viewer_class.__wasm_module__;
188
+ } else {
189
+ throw new Error("Missing perspective-client.wasm");
190
+ }
191
+ } else {
192
+ }
193
+ }
158
194
 
159
- constructor(db: duckdb.AsyncDuckDBConnection) {
160
195
  this.db = db;
196
+ this.sqlBuilder = new mod!.GenericSQLVirtualServerModel();
161
197
  }
162
198
 
163
199
  getFeatures() {
@@ -186,20 +222,25 @@ export class DuckDBHandler implements VirtualServerHandler {
186
222
  }
187
223
 
188
224
  async getHostedTables() {
189
- const results = await runQuery(this.db, "SHOW ALL TABLES");
190
- return results.map((row) => row.toJSON().name);
225
+ const query = this.sqlBuilder.getHostedTables();
226
+ const results = await runQuery(this.db, query);
227
+ return results.map((row) => {
228
+ const json = row.toJSON();
229
+ return `${json.database || "memory"}.${json.name}`;
230
+ });
191
231
  }
192
232
 
193
- async tableSchema(tableId: string) {
194
- const query = `DESCRIBE ${tableId}`;
233
+ async tableSchema(tableId: string, config?: ViewConfig) {
234
+ const query = this.sqlBuilder.tableSchema(tableId);
195
235
  const results = await runQuery(this.db, query);
196
236
  const schema = {} as Record<string, ColumnType>;
197
237
  for (const result of results) {
198
238
  const res = result.toJSON();
199
239
  const colName = res.column_name;
200
- if (!colName.startsWith("__") || !colName.endsWith("__")) {
201
- const cleanName = colName.split("_").slice(-1)[0] as string;
202
- schema[cleanName] = duckdbTypeToPsp(res.column_type);
240
+ if (!colName.startsWith("__")) {
241
+ schema[colName] = duckdbTypeToPsp(
242
+ res.column_type,
243
+ ) as ColumnType;
203
244
  }
204
245
  }
205
246
 
@@ -207,7 +248,7 @@ export class DuckDBHandler implements VirtualServerHandler {
207
248
  }
208
249
 
209
250
  async viewColumnSize(viewId: string, config: ViewConfig) {
210
- const query = `SELECT COUNT(*) FROM (DESCRIBE ${viewId})`;
251
+ const query = this.sqlBuilder.viewColumnSize(viewId);
211
252
  const results = await runQuery(this.db, query);
212
253
  const gs = config.group_by?.length || 0;
213
254
  const count = Number(Object.values(results[0].toJSON())[0]);
@@ -218,293 +259,79 @@ export class DuckDBHandler implements VirtualServerHandler {
218
259
  }
219
260
 
220
261
  async tableSize(tableId: string) {
221
- const query = `SELECT COUNT(*) FROM ${tableId}`;
262
+ const query = this.sqlBuilder.tableSize(tableId);
222
263
  const results = await runQuery(this.db, query);
223
264
  return Number(results[0].toJSON()["count_star()"]);
224
265
  }
225
266
 
226
- // async viewSchema(viewId: string, config: ViewConfig) {
227
- // return this.tableSchema(viewId);
228
- // }
229
-
230
- // async viewSize(viewId: string) {
231
- // return this.tableSize(viewId);
232
- // }
233
-
234
267
  async tableMakeView(tableId: string, viewId: string, config: ViewConfig) {
235
- const columns = config.columns || [];
236
- const group_by = config.group_by || [];
237
- const split_by = config.split_by || [];
238
- const aggregates = config.aggregates || {};
239
- const sort = config.sort || [];
240
- const expressions = config.expressions || {};
241
- const filter = config.filter || [];
242
-
243
- const colName = (col: string) => {
244
- const expr = expressions[col];
245
- return expr || `"${col}"`;
246
- };
247
-
248
- const getAggregate = (col: string) => aggregates[col] || null;
249
-
250
- const generateSelectClauses = () => {
251
- const clauses = [];
252
- if (group_by.length > 0) {
253
- for (const col of columns) {
254
- if (col !== null) {
255
- // TODO texodus
256
- const agg = getAggregate(col) || "any_value";
257
- clauses.push(`${agg}(${colName(col)}) as "${col}"`);
258
- }
259
- }
260
-
261
- if (split_by.length === 0) {
262
- for (let idx = 0; idx < group_by.length; idx++) {
263
- clauses.push(
264
- `${colName(group_by[idx])} as __ROW_PATH_${idx}__`,
265
- );
266
- }
267
-
268
- const groups = group_by.map(colName).join(", ");
269
- clauses.push(`GROUPING_ID(${groups}) AS __GROUPING_ID__`);
270
- }
271
- } else if (columns.length > 0) {
272
- for (const col of columns) {
273
- if (col !== null) {
274
- // TODO texodus
275
- clauses.push(
276
- `${colName(col)} as "${col.replace(/"/g, '""')}"`,
277
- );
278
- }
279
- }
280
- }
281
-
282
- return clauses;
283
- };
284
-
285
- const orderByClauses = [];
286
- const windowClauses = [];
287
- const whereClauses = [];
288
-
289
- if (group_by.length > 0) {
290
- for (let gidx = 0; gidx < group_by.length; gidx++) {
291
- const groups = group_by
292
- .slice(0, gidx + 1)
293
- .map(colName)
294
- .join(", ");
295
- if (split_by.length === 0) {
296
- orderByClauses.push(`GROUPING_ID(${groups}) DESC`);
297
- }
298
-
299
- for (const [sort_col, sort_dir] of sort) {
300
- if (sort_dir !== "none") {
301
- const agg = getAggregate(sort_col) || "any_value";
302
- if (gidx >= group_by.length - 1) {
303
- orderByClauses.push(
304
- `${agg}(${colName(sort_col)}) ${sort_dir}`,
305
- );
306
- } else {
307
- orderByClauses.push(
308
- `first(${agg}(${colName(sort_col)})) OVER __WINDOW_${gidx}__ ${sort_dir}`,
309
- );
310
- }
311
- }
312
- }
313
-
314
- orderByClauses.push(`__ROW_PATH_${gidx}__ ASC`);
315
- }
316
- } else {
317
- for (const [sort_col, sort_dir] of sort) {
318
- if (sort_dir) {
319
- orderByClauses.push(`${colName(sort_col)} ${sort_dir}`);
320
- }
321
- }
322
- }
323
-
324
- if (sort.length > 0 && group_by.length > 1) {
325
- for (let gidx = 0; gidx < group_by.length - 1; gidx++) {
326
- const partition = Array.from(
327
- { length: gidx + 1 },
328
- (_, i) => `__ROW_PATH_${i}__`,
329
- ).join(", ");
330
- const sub_groups = group_by
331
- .slice(0, gidx + 1)
332
- .map(colName)
333
- .join(", ");
334
- const groups = group_by.map(colName).join(", ");
335
- windowClauses.push(
336
- `__WINDOW_${gidx}__ AS (PARTITION BY GROUPING_ID(${sub_groups}), ${partition} ORDER BY ${groups})`,
337
- );
338
- }
339
- }
340
-
341
- for (const [name, op, value] of filter) {
342
- if (value !== null && value !== undefined) {
343
- const term_lit =
344
- typeof value === "string" ? `'${value}'` : String(value);
345
- whereClauses.push(`${colName(name)} ${op} ${term_lit}`);
346
- }
347
- }
348
-
349
- let query;
350
- if (split_by.length > 0) {
351
- query = `SELECT * FROM ${tableId}`;
352
- } else {
353
- const selectClauses = generateSelectClauses();
354
- query = `SELECT ${selectClauses.join(", ")} FROM ${tableId}`;
355
- }
356
-
357
- if (whereClauses.length > 0) {
358
- query = `${query} WHERE ${whereClauses.join(" AND ")}`;
359
- }
360
-
361
- if (split_by.length > 0) {
362
- const groups = group_by.map(colName).join(", ");
363
- const group_aliases = group_by
364
- .map((x, i) => `${colName(x)} AS __ROW_PATH_${i}__`)
365
- .join(", ");
366
- const pivotOn = split_by.map((c) => `"${c}"`).join(", ");
367
- const pivotUsing = generateSelectClauses().join(", ");
368
-
369
- query = `
370
- SELECT * EXCLUDE (${groups}), ${group_aliases} FROM (
371
- PIVOT (${query})
372
- ON ${pivotOn}
373
- USING ${pivotUsing}
374
- GROUP BY ${groups}
375
- )
376
- `;
377
- } else if (group_by.length > 0) {
378
- const groups = group_by.map(colName).join(", ");
379
- query = `${query} GROUP BY ROLLUP(${groups})`;
380
- }
381
-
382
- if (windowClauses.length > 0) {
383
- query = `${query} WINDOW ${windowClauses.join(", ")}`;
384
- }
385
-
386
- if (orderByClauses.length > 0) {
387
- query = `${query} ORDER BY ${orderByClauses.join(", ")}`;
388
- }
389
-
390
- query = `CREATE TABLE ${viewId} AS (${query})`;
268
+ const query = this.sqlBuilder.tableMakeView(tableId, viewId, config);
391
269
  await runQuery(this.db, query);
392
270
  }
393
271
 
394
272
  async tableValidateExpression(tableId: string, expression: string) {
395
- const query = `DESCRIBE (select ${expression} from ${tableId})`;
273
+ const query = this.sqlBuilder.tableValidateExpression(
274
+ tableId,
275
+ expression,
276
+ );
396
277
  const results = await runQuery(this.db, query);
397
- return duckdbTypeToPsp(results[0].toJSON()["column_type"]);
278
+ return duckdbTypeToPsp(
279
+ results[0].toJSON()["column_type"],
280
+ ) as ColumnType;
398
281
  }
399
282
 
400
283
  async viewDelete(viewId: string) {
401
- const query = `DROP TABLE IF EXISTS ${viewId}`;
284
+ const query = this.sqlBuilder.viewDelete(viewId);
402
285
  await runQuery(this.db, query);
403
286
  }
404
287
 
405
288
  async viewGetData(
406
289
  viewId: string,
407
290
  config: ViewConfig,
291
+ schema: Record<string, ColumnType>,
408
292
  viewport: ViewWindow,
409
- dataSlice: VirtualDataSlice,
293
+ dataSlice: perspective.VirtualDataSlice,
410
294
  ) {
411
- const group_by = config.group_by || [];
412
- const split_by = config.split_by || [];
413
- const start_col = viewport.start_col;
414
- const end_col = viewport.end_col;
415
- const start_row = viewport.start_row || 0;
416
- const end_row = viewport.end_row;
417
-
418
- let limit = "";
419
- if (end_row !== null && end_row !== undefined) {
420
- limit = `LIMIT ${end_row - start_row} OFFSET ${start_row}`;
421
- }
422
-
423
- const schemaQuery = `DESCRIBE ${viewId}`;
424
- const schemaResults = await runQuery(this.db, schemaQuery);
425
- const columnTypes = new Map();
426
- for (const result of schemaResults) {
427
- const res = result.toJSON();
428
- columnTypes.set(res.column_name, res.column_type);
429
- }
430
-
431
- const dataColumns = Array.from(columnTypes.entries())
432
- .filter(([colName]) => !colName.startsWith("__"))
433
- .slice(start_col, end_col);
434
-
435
- const groupByColsList = [];
436
- if (group_by.length > 0) {
437
- if (split_by.length === 0) {
438
- groupByColsList.push("__GROUPING_ID__");
439
- }
440
- for (let idx = 0; idx < group_by.length; idx++) {
441
- groupByColsList.push(`__ROW_PATH_${idx}__`);
442
- }
443
- }
444
-
445
- const allColumns = [
446
- ...groupByColsList.map((col) => `"${col}"`),
447
- ...dataColumns.map(([colName]) => `"${colName}"`),
448
- ];
449
-
450
- const query = `
451
- SELECT ${allColumns.join(", ")}
452
- FROM ${viewId} ${limit}
453
- `;
295
+ const is_group_by = config.group_by?.length > 0;
296
+ const is_split_by = config.split_by?.length > 0;
297
+ const query = this.sqlBuilder.viewGetData(
298
+ viewId,
299
+ config,
300
+ viewport,
301
+ schema,
302
+ );
454
303
 
455
304
  const { rows, columns, dtypes } = await runQuery(this.db, query, {
456
305
  columns: true,
457
306
  });
458
307
 
459
308
  for (let cidx = 0; cidx < columns.length; cidx++) {
460
- const col = columns[cidx];
461
-
462
- if (cidx === 0 && group_by.length > 0 && split_by.length === 0) {
309
+ if (cidx === 0 && is_group_by && !is_split_by) {
310
+ // This is the grouping_id column, skip it
463
311
  continue;
464
312
  }
465
313
 
466
- let group_by_index = null;
467
- let max_grouping_id = null;
468
- const row_path_match = col.match(/__ROW_PATH_(\d+)__/);
469
- if (row_path_match) {
470
- group_by_index = parseInt(row_path_match[1]);
471
- max_grouping_id = 2 ** (group_by.length - group_by_index) - 1;
314
+ let col = columns[cidx];
315
+ if (is_split_by && !col.startsWith("__ROW_PATH_")) {
316
+ col = col.replaceAll("_", "|");
472
317
  }
473
318
 
474
- const dtype = duckdbTypeToPsp(dtypes[cidx]);
475
- const isDecimal = dtypes[cidx].startsWith("Decimal");
476
- const colName =
477
- group_by_index !== null
478
- ? "__ROW_PATH__"
479
- : col.replace(/_/g, "|");
319
+ const dtype = duckdbTypeToPsp(dtypes[cidx]) as ColumnType;
480
320
 
321
+ const isDecimal = dtypes[cidx].startsWith("Decimal");
481
322
  for (let ridx = 0; ridx < rows.length; ridx++) {
482
- const row = rows[ridx];
483
- const rowArray = row.toArray();
484
- const shouldSet =
485
- split_by.length > 0 ||
486
- max_grouping_id === null ||
487
- rowArray[0] < max_grouping_id;
488
-
489
- if (shouldSet) {
490
- let value = rowArray[cidx];
491
-
492
- if (isDecimal) {
493
- value = convertDecimalToNumber(value, dtypes[cidx]);
494
- }
495
-
496
- if (typeof value === "bigint") {
497
- value = Number(value);
498
- }
499
-
500
- dataSlice.setCol(
501
- dtype,
502
- colName,
503
- ridx,
504
- value,
505
- group_by_index,
506
- );
323
+ const rowArray = rows[ridx].toArray();
324
+ const grouping_id = Number(rowArray[0]);
325
+ let value = rowArray[cidx];
326
+ if (isDecimal) {
327
+ value = convertDecimalToNumber(value, dtypes[cidx]);
507
328
  }
329
+
330
+ if (typeof value === "bigint") {
331
+ value = Number(value);
332
+ }
333
+
334
+ dataSlice.setCol(dtype, col, ridx, value, grouping_id);
508
335
  }
509
336
  }
510
337
  }