@perspective-dev/client 4.1.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,10 +10,18 @@
10
10
  // ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
11
11
  // ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
12
12
 
13
- import type {
14
- VirtualDataSlice,
15
- VirtualServerHandler,
16
- } from "@perspective-dev/client";
13
+ /**
14
+ * An implementation of a Perspective Virtual Server for DuckDB.
15
+ *
16
+ * This import is optional, and so must be imported manually from either
17
+ * `@perspective-dev/client/dist/esm/virtual_servers/duckdb.js` or
18
+ * `@perspective-dev/client/src/ts/virtual_servers/duckdb.ts`, it is not
19
+ * exported from the package root `@perspective-dev/client`
20
+ *
21
+ * @module
22
+ */
23
+
24
+ import type * as perspective from "@perspective-dev/client";
17
25
  import type { ColumnType } from "@perspective-dev/client/dist/esm/ts-rs/ColumnType.d.ts";
18
26
  import type { ViewConfig } from "@perspective-dev/client/dist/esm/ts-rs/ViewConfig.d.ts";
19
27
  import type { ViewWindow } from "@perspective-dev/client/dist/esm/ts-rs/ViewWindow.d.ts";
@@ -68,32 +76,45 @@ const FILTER_OPS = [
68
76
  ];
69
77
 
70
78
  function duckdbTypeToPsp(name: string): ColumnType {
71
- if (name === "VARCHAR") return "string";
79
+ if (name === "VARCHAR" || name == "Utf8") {
80
+ return "string";
81
+ }
82
+
72
83
  if (
73
84
  name === "DOUBLE" ||
74
85
  name === "BIGINT" ||
75
86
  name === "HUGEINT" ||
87
+ name === "Float64" ||
76
88
  name.startsWith("Decimal")
77
- )
89
+ ) {
78
90
  return "float";
79
- if (name.startsWith("Decimal")) return "float";
80
- if (name.startsWith("Int")) return "integer";
81
- if (name === "INTEGER") return "integer";
82
- if (name === "Utf8") return "string";
83
- if (name === "Date32<DAY>") return "date";
84
- if (name === "Float64") return "float";
85
- if (name === "DATE") return "date";
86
- if (name === "BOOLEAN") return "boolean";
87
- if (name === "TIMESTAMP" || name.startsWith("Timestamp")) return "datetime";
91
+ }
92
+
93
+ if (name.startsWith("Int") || name == "INTEGER") {
94
+ return "integer";
95
+ }
96
+
97
+ if (name === "INTEGER") {
98
+ return "integer";
99
+ }
100
+
101
+ if (name === "DATE" || name.startsWith("Date")) {
102
+ return "date";
103
+ }
104
+
105
+ if (name === "BOOLEAN" || name === "Bool") {
106
+ return "boolean";
107
+ }
108
+
109
+ if (name === "TIMESTAMP" || name.startsWith("Timestamp")) {
110
+ return "datetime";
111
+ }
112
+
88
113
  throw new Error(`Unknown type '${name}'`);
89
114
  }
90
115
 
91
116
  function convertDecimalToNumber(value: any, dtypeString: string) {
92
- if (
93
- value === null ||
94
- value === undefined ||
95
- !(value instanceof Uint32Array || value instanceof Int32Array)
96
- ) {
117
+ if (!(value instanceof Uint32Array || value instanceof Int32Array)) {
97
118
  return value;
98
119
  }
99
120
 
@@ -102,15 +123,9 @@ function convertDecimalToNumber(value: any, dtypeString: string) {
102
123
  bigIntValue |= BigInt(value[i]) << BigInt(i * 32);
103
124
  }
104
125
 
105
- const maxInt128 = BigInt(2) ** BigInt(127);
106
- if (bigIntValue >= maxInt128) {
107
- bigIntValue -= BigInt(2) ** BigInt(128);
108
- }
109
-
110
126
  const scaleMatch = dtypeString.match(/Decimal\[\d+e(\d+)\]/);
111
- const scale = scaleMatch ? parseInt(scaleMatch[1]) : 0;
112
-
113
- if (scale > 0) {
127
+ if (scaleMatch) {
128
+ const scale = parseInt(scaleMatch[1]);
114
129
  return Number(bigIntValue) / Math.pow(10, scale);
115
130
  } else {
116
131
  return Number(bigIntValue);
@@ -130,7 +145,7 @@ async function runQuery(
130
145
  async function runQuery(
131
146
  db: duckdb.AsyncDuckDBConnection,
132
147
  query: string,
133
- options?: { columns: boolean },
148
+ options?: { columns: false },
134
149
  ): Promise<any[]>;
135
150
 
136
151
  async function runQuery(
@@ -139,7 +154,6 @@ async function runQuery(
139
154
  options: { columns?: boolean } = {},
140
155
  ) {
141
156
  query = query.replace(/\s+/g, " ").trim();
142
- // console.log("Query:", query);
143
157
  try {
144
158
  const result = await db.query(query);
145
159
  if (options.columns) {
@@ -158,11 +172,28 @@ async function runQuery(
158
172
  }
159
173
  }
160
174
 
161
- export class DuckDBHandler implements VirtualServerHandler {
175
+ /**
176
+ * An implementation of Perspective's Virtual Server for `@duckdb/duckdb-wasm`.
177
+ */
178
+ export class DuckDBHandler implements perspective.VirtualServerHandler {
162
179
  private db: duckdb.AsyncDuckDBConnection;
180
+ private sqlBuilder: perspective.GenericSQLVirtualServerModel;
181
+ constructor(db: duckdb.AsyncDuckDBConnection, mod?: typeof perspective) {
182
+ if (!mod) {
183
+ if (customElements) {
184
+ const viewer_class: any =
185
+ customElements.get("perspective-viewer");
186
+ if (viewer_class) {
187
+ mod = viewer_class.__wasm_module__;
188
+ } else {
189
+ throw new Error("Missing perspective-client.wasm");
190
+ }
191
+ } else {
192
+ }
193
+ }
163
194
 
164
- constructor(db: duckdb.AsyncDuckDBConnection) {
165
195
  this.db = db;
196
+ this.sqlBuilder = new mod!.GenericSQLVirtualServerModel();
166
197
  }
167
198
 
168
199
  getFeatures() {
@@ -191,20 +222,25 @@ export class DuckDBHandler implements VirtualServerHandler {
191
222
  }
192
223
 
193
224
  async getHostedTables() {
194
- const results = await runQuery(this.db, "SHOW ALL TABLES");
195
- return results.map((row) => row.toJSON().name);
225
+ const query = this.sqlBuilder.getHostedTables();
226
+ const results = await runQuery(this.db, query);
227
+ return results.map((row) => {
228
+ const json = row.toJSON();
229
+ return `${json.database || "memory"}.${json.name}`;
230
+ });
196
231
  }
197
232
 
198
- async tableSchema(tableId: string) {
199
- const query = `DESCRIBE ${tableId}`;
233
+ async tableSchema(tableId: string, config?: ViewConfig) {
234
+ const query = this.sqlBuilder.tableSchema(tableId);
200
235
  const results = await runQuery(this.db, query);
201
236
  const schema = {} as Record<string, ColumnType>;
202
237
  for (const result of results) {
203
238
  const res = result.toJSON();
204
239
  const colName = res.column_name;
205
- if (!colName.startsWith("__") || !colName.endsWith("__")) {
206
- const cleanName = colName.split("_").slice(-1)[0] as string;
207
- schema[cleanName] = duckdbTypeToPsp(res.column_type);
240
+ if (!colName.startsWith("__")) {
241
+ schema[colName] = duckdbTypeToPsp(
242
+ res.column_type,
243
+ ) as ColumnType;
208
244
  }
209
245
  }
210
246
 
@@ -212,7 +248,7 @@ export class DuckDBHandler implements VirtualServerHandler {
212
248
  }
213
249
 
214
250
  async viewColumnSize(viewId: string, config: ViewConfig) {
215
- const query = `SELECT COUNT(*) FROM (DESCRIBE ${viewId})`;
251
+ const query = this.sqlBuilder.viewColumnSize(viewId);
216
252
  const results = await runQuery(this.db, query);
217
253
  const gs = config.group_by?.length || 0;
218
254
  const count = Number(Object.values(results[0].toJSON())[0]);
@@ -223,293 +259,79 @@ export class DuckDBHandler implements VirtualServerHandler {
223
259
  }
224
260
 
225
261
  async tableSize(tableId: string) {
226
- const query = `SELECT COUNT(*) FROM ${tableId}`;
262
+ const query = this.sqlBuilder.tableSize(tableId);
227
263
  const results = await runQuery(this.db, query);
228
264
  return Number(results[0].toJSON()["count_star()"]);
229
265
  }
230
266
 
231
- // async viewSchema(viewId: string, config: ViewConfig) {
232
- // return this.tableSchema(viewId);
233
- // }
234
-
235
- // async viewSize(viewId: string) {
236
- // return this.tableSize(viewId);
237
- // }
238
-
239
267
  async tableMakeView(tableId: string, viewId: string, config: ViewConfig) {
240
- const columns = config.columns || [];
241
- const group_by = config.group_by || [];
242
- const split_by = config.split_by || [];
243
- const aggregates = config.aggregates || {};
244
- const sort = config.sort || [];
245
- const expressions = config.expressions || {};
246
- const filter = config.filter || [];
247
-
248
- const colName = (col: string) => {
249
- const expr = expressions[col];
250
- return expr || `"${col}"`;
251
- };
252
-
253
- const getAggregate = (col: string) => aggregates[col] || null;
254
-
255
- const generateSelectClauses = () => {
256
- const clauses = [];
257
- if (group_by.length > 0) {
258
- for (const col of columns) {
259
- if (col !== null) {
260
- // TODO texodus
261
- const agg = getAggregate(col) || "any_value";
262
- clauses.push(`${agg}(${colName(col)}) as "${col}"`);
263
- }
264
- }
265
-
266
- if (split_by.length === 0) {
267
- for (let idx = 0; idx < group_by.length; idx++) {
268
- clauses.push(
269
- `${colName(group_by[idx])} as __ROW_PATH_${idx}__`,
270
- );
271
- }
272
-
273
- const groups = group_by.map(colName).join(", ");
274
- clauses.push(`GROUPING_ID(${groups}) AS __GROUPING_ID__`);
275
- }
276
- } else if (columns.length > 0) {
277
- for (const col of columns) {
278
- if (col !== null) {
279
- // TODO texodus
280
- clauses.push(
281
- `${colName(col)} as "${col.replace(/"/g, '""')}"`,
282
- );
283
- }
284
- }
285
- }
286
-
287
- return clauses;
288
- };
289
-
290
- const orderByClauses = [];
291
- const windowClauses = [];
292
- const whereClauses = [];
293
-
294
- if (group_by.length > 0) {
295
- for (let gidx = 0; gidx < group_by.length; gidx++) {
296
- const groups = group_by
297
- .slice(0, gidx + 1)
298
- .map(colName)
299
- .join(", ");
300
- if (split_by.length === 0) {
301
- orderByClauses.push(`GROUPING_ID(${groups}) DESC`);
302
- }
303
-
304
- for (const [sort_col, sort_dir] of sort) {
305
- if (sort_dir !== "none") {
306
- const agg = getAggregate(sort_col) || "any_value";
307
- if (gidx >= group_by.length - 1) {
308
- orderByClauses.push(
309
- `${agg}(${colName(sort_col)}) ${sort_dir}`,
310
- );
311
- } else {
312
- orderByClauses.push(
313
- `first(${agg}(${colName(sort_col)})) OVER __WINDOW_${gidx}__ ${sort_dir}`,
314
- );
315
- }
316
- }
317
- }
318
-
319
- orderByClauses.push(`__ROW_PATH_${gidx}__ ASC`);
320
- }
321
- } else {
322
- for (const [sort_col, sort_dir] of sort) {
323
- if (sort_dir) {
324
- orderByClauses.push(`${colName(sort_col)} ${sort_dir}`);
325
- }
326
- }
327
- }
328
-
329
- if (sort.length > 0 && group_by.length > 1) {
330
- for (let gidx = 0; gidx < group_by.length - 1; gidx++) {
331
- const partition = Array.from(
332
- { length: gidx + 1 },
333
- (_, i) => `__ROW_PATH_${i}__`,
334
- ).join(", ");
335
- const sub_groups = group_by
336
- .slice(0, gidx + 1)
337
- .map(colName)
338
- .join(", ");
339
- const groups = group_by.map(colName).join(", ");
340
- windowClauses.push(
341
- `__WINDOW_${gidx}__ AS (PARTITION BY GROUPING_ID(${sub_groups}), ${partition} ORDER BY ${groups})`,
342
- );
343
- }
344
- }
345
-
346
- for (const [name, op, value] of filter) {
347
- if (value !== null && value !== undefined) {
348
- const term_lit =
349
- typeof value === "string" ? `'${value}'` : String(value);
350
- whereClauses.push(`${colName(name)} ${op} ${term_lit}`);
351
- }
352
- }
353
-
354
- let query;
355
- if (split_by.length > 0) {
356
- query = `SELECT * FROM ${tableId}`;
357
- } else {
358
- const selectClauses = generateSelectClauses();
359
- query = `SELECT ${selectClauses.join(", ")} FROM ${tableId}`;
360
- }
361
-
362
- if (whereClauses.length > 0) {
363
- query = `${query} WHERE ${whereClauses.join(" AND ")}`;
364
- }
365
-
366
- if (split_by.length > 0) {
367
- const groups = group_by.map(colName).join(", ");
368
- const group_aliases = group_by
369
- .map((x, i) => `${colName(x)} AS __ROW_PATH_${i}__`)
370
- .join(", ");
371
- const pivotOn = split_by.map((c) => `"${c}"`).join(", ");
372
- const pivotUsing = generateSelectClauses().join(", ");
373
-
374
- query = `
375
- SELECT * EXCLUDE (${groups}), ${group_aliases} FROM (
376
- PIVOT (${query})
377
- ON ${pivotOn}
378
- USING ${pivotUsing}
379
- GROUP BY ${groups}
380
- )
381
- `;
382
- } else if (group_by.length > 0) {
383
- const groups = group_by.map(colName).join(", ");
384
- query = `${query} GROUP BY ROLLUP(${groups})`;
385
- }
386
-
387
- if (windowClauses.length > 0) {
388
- query = `${query} WINDOW ${windowClauses.join(", ")}`;
389
- }
390
-
391
- if (orderByClauses.length > 0) {
392
- query = `${query} ORDER BY ${orderByClauses.join(", ")}`;
393
- }
394
-
395
- query = `CREATE TABLE ${viewId} AS (${query})`;
268
+ const query = this.sqlBuilder.tableMakeView(tableId, viewId, config);
396
269
  await runQuery(this.db, query);
397
270
  }
398
271
 
399
272
  async tableValidateExpression(tableId: string, expression: string) {
400
- const query = `DESCRIBE (select ${expression} from ${tableId})`;
273
+ const query = this.sqlBuilder.tableValidateExpression(
274
+ tableId,
275
+ expression,
276
+ );
401
277
  const results = await runQuery(this.db, query);
402
- return duckdbTypeToPsp(results[0].toJSON()["column_type"]);
278
+ return duckdbTypeToPsp(
279
+ results[0].toJSON()["column_type"],
280
+ ) as ColumnType;
403
281
  }
404
282
 
405
283
  async viewDelete(viewId: string) {
406
- const query = `DROP TABLE IF EXISTS ${viewId}`;
284
+ const query = this.sqlBuilder.viewDelete(viewId);
407
285
  await runQuery(this.db, query);
408
286
  }
409
287
 
410
288
  async viewGetData(
411
289
  viewId: string,
412
290
  config: ViewConfig,
291
+ schema: Record<string, ColumnType>,
413
292
  viewport: ViewWindow,
414
- dataSlice: VirtualDataSlice,
293
+ dataSlice: perspective.VirtualDataSlice,
415
294
  ) {
416
- const group_by = config.group_by || [];
417
- const split_by = config.split_by || [];
418
- const start_col = viewport.start_col;
419
- const end_col = viewport.end_col;
420
- const start_row = viewport.start_row || 0;
421
- const end_row = viewport.end_row;
422
-
423
- let limit = "";
424
- if (end_row !== null && end_row !== undefined) {
425
- limit = `LIMIT ${end_row - start_row} OFFSET ${start_row}`;
426
- }
427
-
428
- const schemaQuery = `DESCRIBE ${viewId}`;
429
- const schemaResults = await runQuery(this.db, schemaQuery);
430
- const columnTypes = new Map();
431
- for (const result of schemaResults) {
432
- const res = result.toJSON();
433
- columnTypes.set(res.column_name, res.column_type);
434
- }
435
-
436
- const dataColumns = Array.from(columnTypes.entries())
437
- .filter(([colName]) => !colName.startsWith("__"))
438
- .slice(start_col, end_col);
439
-
440
- const groupByColsList = [];
441
- if (group_by.length > 0) {
442
- if (split_by.length === 0) {
443
- groupByColsList.push("__GROUPING_ID__");
444
- }
445
- for (let idx = 0; idx < group_by.length; idx++) {
446
- groupByColsList.push(`__ROW_PATH_${idx}__`);
447
- }
448
- }
449
-
450
- const allColumns = [
451
- ...groupByColsList.map((col) => `"${col}"`),
452
- ...dataColumns.map(([colName]) => `"${colName}"`),
453
- ];
454
-
455
- const query = `
456
- SELECT ${allColumns.join(", ")}
457
- FROM ${viewId} ${limit}
458
- `;
295
+ const is_group_by = config.group_by?.length > 0;
296
+ const is_split_by = config.split_by?.length > 0;
297
+ const query = this.sqlBuilder.viewGetData(
298
+ viewId,
299
+ config,
300
+ viewport,
301
+ schema,
302
+ );
459
303
 
460
304
  const { rows, columns, dtypes } = await runQuery(this.db, query, {
461
305
  columns: true,
462
306
  });
463
307
 
464
308
  for (let cidx = 0; cidx < columns.length; cidx++) {
465
- const col = columns[cidx];
466
-
467
- if (cidx === 0 && group_by.length > 0 && split_by.length === 0) {
309
+ if (cidx === 0 && is_group_by && !is_split_by) {
310
+ // This is the grouping_id column, skip it
468
311
  continue;
469
312
  }
470
313
 
471
- let group_by_index = null;
472
- let max_grouping_id = null;
473
- const row_path_match = col.match(/__ROW_PATH_(\d+)__/);
474
- if (row_path_match) {
475
- group_by_index = parseInt(row_path_match[1]);
476
- max_grouping_id = 2 ** (group_by.length - group_by_index) - 1;
314
+ let col = columns[cidx];
315
+ if (is_split_by && !col.startsWith("__ROW_PATH_")) {
316
+ col = col.replaceAll("_", "|");
477
317
  }
478
318
 
479
- const dtype = duckdbTypeToPsp(dtypes[cidx]);
480
- const isDecimal = dtypes[cidx].startsWith("Decimal");
481
- const colName =
482
- group_by_index !== null
483
- ? "__ROW_PATH__"
484
- : col.replace(/_/g, "|");
319
+ const dtype = duckdbTypeToPsp(dtypes[cidx]) as ColumnType;
485
320
 
321
+ const isDecimal = dtypes[cidx].startsWith("Decimal");
486
322
  for (let ridx = 0; ridx < rows.length; ridx++) {
487
- const row = rows[ridx];
488
- const rowArray = row.toArray();
489
- const shouldSet =
490
- split_by.length > 0 ||
491
- max_grouping_id === null ||
492
- rowArray[0] < max_grouping_id;
493
-
494
- if (shouldSet) {
495
- let value = rowArray[cidx];
496
-
497
- if (isDecimal) {
498
- value = convertDecimalToNumber(value, dtypes[cidx]);
499
- }
500
-
501
- if (typeof value === "bigint") {
502
- value = Number(value);
503
- }
504
-
505
- dataSlice.setCol(
506
- dtype,
507
- colName,
508
- ridx,
509
- value,
510
- group_by_index,
511
- );
323
+ const rowArray = rows[ridx].toArray();
324
+ const grouping_id = Number(rowArray[0]);
325
+ let value = rowArray[cidx];
326
+ if (isDecimal) {
327
+ value = convertDecimalToNumber(value, dtypes[cidx]);
512
328
  }
329
+
330
+ if (typeof value === "bigint") {
331
+ value = Number(value);
332
+ }
333
+
334
+ dataSlice.setCol(dtype, col, ridx, value, grouping_id);
513
335
  }
514
336
  }
515
337
  }