duckdb 0.5.2-dev1745.0 → 0.5.2-dev1806.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/duckdb.d.ts CHANGED
@@ -20,13 +20,14 @@ export class Connection {
20
20
  constructor(db: Database, callback?: Callback<any>);
21
21
 
22
22
  all(sql: string, ...args: [...any, Callback<TableData>] | []): void;
23
+ arrowIPCAll(sql: string, ...args: [...any, Callback<TableData>] | []): void;
23
24
  each(sql: string, ...args: [...any, Callback<RowData>] | []): void;
24
25
  exec(sql: string, ...args: [...any, Callback<void>] | []): void;
25
26
 
26
27
  prepare(sql: string, ...args: [...any, Callback<Statement>] | []): Statement;
27
28
  run(sql: string, ...args: [...any, Callback<void>] | []): Statement;
28
29
 
29
- register(
30
+ register_udf(
30
31
  name: string,
31
32
  return_type: string,
32
33
  fun: (...args: any[]) => any
@@ -37,9 +38,10 @@ export class Connection {
37
38
  return_type: string,
38
39
  fun: (...args: any[]) => any
39
40
  ): void;
40
- unregister(name: string, callback: Callback<any>): void;
41
+ unregister_udf(name: string, callback: Callback<any>): void;
41
42
 
42
43
  stream(sql: any, ...args: any[]): QueryResult;
44
+ arrowIPCStream(sql: any, ...args: any[]): QueryResult;
43
45
  }
44
46
 
45
47
  export class QueryResult {
@@ -54,24 +56,31 @@ export class Database {
54
56
  connect(): Connection;
55
57
 
56
58
  all(sql: string, ...args: [...any, Callback<TableData>] | []): void;
59
+ arrowIPCAll(sql: string, ...args: [...any, Callback<TableData>] | []): void;
57
60
  each(sql: string, ...args: [...any, Callback<RowData>] | []): void;
58
61
  exec(sql: string, ...args: [...any, Callback<void>] | []): void;
59
62
 
60
63
  prepare(sql: string, ...args: [...any, Callback<Statement>] | []): Statement;
61
64
  run(sql: string, ...args: [...any, Callback<void>] | []): Statement;
62
65
 
63
- register(
66
+ register_udf(
64
67
  name: string,
65
68
  return_type: string,
66
69
  fun: (...args: any[]) => any
67
70
  ): void;
68
- unregister(name: string, callback: Callback<any>): void;
71
+ unregister_udf(name: string, callback: Callback<any>): void;
72
+
73
+ stream(sql: any, ...args: any[]): QueryResult;
74
+ arrowIPCStream(sql: any, ...args: any[]): QueryResult;
69
75
  }
70
76
 
71
77
  export class Statement {
72
78
  constructor();
73
79
 
74
80
  all(...args: [...any, Callback<TableData>] | []): void;
81
+
82
+ arrowIPCAll(...args: [...any, Callback<TableData>] | []): void;
83
+
75
84
  each(...args: [...any, Callback<RowData>] | []): void;
76
85
 
77
86
  finalize(callback?: Callback<void>): void;
package/lib/duckdb.js CHANGED
@@ -67,6 +67,15 @@ var QueryResult = duckdb.QueryResult;
67
67
  */
68
68
  QueryResult.prototype.nextChunk;
69
69
 
70
+ /**
71
+ * Function to fetch the next result blob of an Arrow IPC Stream in a zero-copy way.
72
+ * (requires arrow extension to be loaded)
73
+ *
74
+ * @method
75
+ * @return data chunk
76
+ */
77
+ QueryResult.prototype.nextIpcBuffer;
78
+
70
79
  /**
71
80
  * @name asyncIterator
72
81
  * @memberof module:duckdb~QueryResult
@@ -115,6 +124,75 @@ Connection.prototype.all = function (sql) {
115
124
  return statement.all.apply(statement, arguments);
116
125
  }
117
126
 
127
+ // Utility class for streaming Apache Arrow IPC
128
+ class IpcResultStreamIterator {
129
+ constructor(stream_result_p) {
130
+ this._depleted = false;
131
+ this.stream_result = stream_result_p;
132
+ }
133
+
134
+ async next() {
135
+ if (this._depleted) {
136
+ return { done: true, value: null };
137
+ }
138
+
139
+ const ipc_raw = await this.stream_result.nextIpcBuffer();
140
+ const res = new Uint8Array(ipc_raw);
141
+
142
+ this._depleted = res.length == 0;
143
+ return {
144
+ done: this._depleted,
145
+ value: res,
146
+ };
147
+ }
148
+
149
+ [Symbol.asyncIterator]() {
150
+ return this;
151
+ }
152
+
153
+ // Materialize the IPC stream into a list of Uint8Arrays
154
+ async toArray () {
155
+ const retval = []
156
+
157
+ for await (const ipc_buf of this) {
158
+ retval.push(ipc_buf);
159
+ }
160
+
161
+ // Push EOS message containing 4 bytes of 0
162
+ retval.push(new Uint8Array([0,0,0,0]));
163
+
164
+ return retval;
165
+ }
166
+ }
167
+
168
+ /**
169
+ * Run a SQL query and serialize the result into the Apache Arrow IPC format (requires arrow extension to be loaded)
170
+ * @arg sql
171
+ * @param {...*} params
172
+ * @param callback
173
+ * @return {void}
174
+ */
175
+ Connection.prototype.arrowIPCAll = function (sql) {
176
+ const query = "SELECT * FROM to_arrow_ipc((" + sql + "));";
177
+ var statement = new Statement(this, query);
178
+ return statement.arrowIPCAll.apply(statement, arguments);
179
+ }
180
+
181
+ /**
182
+ * Run a SQL query, returns a IpcResultStreamIterator that allows streaming the result into the Apache Arrow IPC format
183
+ * (requires arrow extension to be loaded)
184
+ *
185
+ * @arg sql
186
+ * @param {...*} params
187
+ * @param callback
188
+ * @return IpcResultStreamIterator
189
+ */
190
+ Connection.prototype.arrowIPCStream = async function (sql) {
191
+ const query = "SELECT * FROM to_arrow_ipc((" + sql + "));";
192
+ const statement = new Statement(this, query);
193
+ return new IpcResultStreamIterator(await statement.stream.apply(statement, arguments));
194
+ }
195
+
118
196
  /**
119
197
  * Runs a SQL query and triggers the callback for each result row
120
198
  * @arg sql
@@ -149,9 +227,9 @@ Connection.prototype.stream = async function* (sql) {
149
227
  * @return {void}
150
228
  * @note this follows the wasm udfs somewhat but is simpler because we can pass data much more cleanly
151
229
  */
152
- Connection.prototype.register = function (name, return_type, fun) {
230
+ Connection.prototype.register_udf = function (name, return_type, fun) {
153
231
  // TODO what if this throws an error somewhere? do we need a try/catch?
154
- return this.register_bulk(name, return_type, function (desc) {
232
+ return this.register_udf_bulk(name, return_type, function (desc) {
155
233
  try {
156
234
  // Build an argument resolver
157
235
  const buildResolver = (arg) => {
@@ -289,7 +367,7 @@ Connection.prototype.exec;
289
367
  * @param callback
290
368
  * @return {void}
291
369
  */
292
- Connection.prototype.register_bulk;
370
+ Connection.prototype.register_udf_bulk;
293
371
  /**
294
372
  * Unregister a User Defined Function
295
373
  *
@@ -299,7 +377,7 @@ Connection.prototype.register_bulk;
299
377
  * @param callback
300
378
  * @return {void}
301
379
  */
302
- Connection.prototype.unregister;
380
+ Connection.prototype.unregister_udf;
303
381
 
304
382
  var default_connection = function (o) {
305
383
  if (o.default_connection == undefined) {
@@ -308,6 +386,29 @@ var default_connection = function (o) {
308
386
  return o.default_connection;
309
387
  }
310
388
 
389
+ /**
390
+ * Register a Buffer to be scanned using the Apache Arrow IPC scanner
391
+ * (requires arrow extension to be loaded)
392
+ *
393
+ * @method
394
+ * @arg name
395
+ * @arg array
396
+ * @arg force
397
+ * @param callback
398
+ * @return {void}
399
+ */
400
+ Connection.prototype.register_buffer;
401
+
402
+ /**
403
+ * Unregister the Buffer
404
+ *
405
+ * @method
406
+ * @arg name
407
+ * @param callback
408
+ * @return {void}
409
+ */
410
+ Connection.prototype.unregister_buffer;
411
+
311
412
 
312
413
  /**
313
414
  * Closes database instance
@@ -390,7 +491,18 @@ Database.prototype.run = function () {
390
491
  }
391
492
 
392
493
  /**
393
- * Convenience method for Connection#each using a built-in default connection
494
+ * Convenience method for Connection#scanArrowIpc using a built-in default connection
495
+ * @arg sql
496
+ * @param {...*} params
497
+ * @param callback
498
+ * @return {void}
499
+ */
500
+ Database.prototype.scanArrowIpc = function () {
501
+ default_connection(this).scanArrowIpc.apply(this.default_connection, arguments);
502
+ return this;
503
+ }
504
+
505
+ /**
394
506
  * @arg sql
395
507
  * @param {...*} params
396
508
  * @param callback
@@ -414,7 +526,29 @@ Database.prototype.all = function () {
414
526
  }
415
527
 
416
528
  /**
417
- * Convenience method for Connection#exec using a built-in default connection
529
+ * Convenience method for Connection#arrowIPCAll using a built-in default connection
530
+ * @arg sql
531
+ * @param {...*} params
532
+ * @param callback
533
+ * @return {void}
534
+ */
535
+ Database.prototype.arrowIPCAll = function () {
536
+ default_connection(this).arrowIPCAll.apply(this.default_connection, arguments);
537
+ return this;
538
+ }
539
+
540
+ /**
541
+ * Convenience method for Connection#arrowIPCStream using a built-in default connection
542
+ * @arg sql
543
+ * @param {...*} params
544
+ * @param callback
545
+ * @return {void}
546
+ */
547
+ Database.prototype.arrowIPCStream = function () {
548
+ return default_connection(this).arrowIPCStream.apply(this.default_connection, arguments);
549
+ }
550
+
551
+ /**
418
552
  * @arg sql
419
553
  * @param {...*} params
420
554
  * @param callback
@@ -426,24 +560,52 @@ Database.prototype.exec = function () {
426
560
  }
427
561
 
428
562
  /**
429
- * Convenience method for Connection#register using a built-in default connection
563
+ * Register a User Defined Function
564
+ *
565
+ * Convenience method for Connection#register_udf
430
566
  * @arg name
431
567
  * @arg return_type
432
568
  * @arg fun
433
569
  * @return {this}
434
570
  */
435
- Database.prototype.register = function () {
436
- default_connection(this).register.apply(this.default_connection, arguments);
571
+ Database.prototype.register_udf = function () {
572
+ default_connection(this).register_udf.apply(this.default_connection, arguments);
573
+ return this;
574
+ }
575
+
576
+ /**
577
+ * Register a buffer containing serialized data to be scanned from DuckDB.
578
+ *
579
+ * Convenience method for Connection#unregister_buffer
580
+ * @arg name
581
+ * @return {this}
582
+ */
583
+ Database.prototype.register_buffer = function () {
584
+ default_connection(this).register_buffer.apply(this.default_connection, arguments);
585
+ return this;
586
+ }
587
+
588
+ /**
589
+ * Unregister a Buffer
590
+ *
591
+ * Convenience method for Connection#unregister_buffer
592
+ * @arg name
593
+ * @return {this}
594
+ */
595
+ Database.prototype.unregister_buffer = function () {
596
+ default_connection(this).unregister_buffer.apply(this.default_connection, arguments);
437
597
  return this;
438
598
  }
439
599
 
440
600
  /**
441
- * Convenience method for Connection#unregister using a built-in default connection
601
+ * Unregister a UDF
602
+ *
603
+ * Convenience method for Connection#unregister_udf
442
604
  * @arg name
443
605
  * @return {this}
444
606
  */
445
- Database.prototype.unregister = function () {
446
- default_connection(this).unregister.apply(this.default_connection, arguments);
607
+ Database.prototype.unregister_udf = function () {
608
+ default_connection(this).unregister_udf.apply(this.default_connection, arguments);
447
609
  return this;
448
610
  }
449
611
 
@@ -477,6 +639,14 @@ Statement.prototype.run;
477
639
  * @return {void}
478
640
  */
479
641
  Statement.prototype.all;
642
+ /**
643
+ * @method
644
+ * @arg sql
645
+ * @param {...*} params
646
+ * @param callback
647
+ * @return {void}
648
+ */
649
+ Statement.prototype.arrowIPCAll;
480
650
  /**
481
651
  * @method
482
652
  * @arg sql
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1745.0",
5
+ "version": "0.5.2-dev1806.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -18,7 +18,7 @@
18
18
  "scripts": {
19
19
  "install": "node-pre-gyp install --fallback-to-build",
20
20
  "pretest": "node test/support/createdb.js",
21
- "test": "mocha -R spec --timeout 480000",
21
+ "test": "mocha -R spec --timeout 480000 --expose-gc",
22
22
  "pack": "node-pre-gyp package"
23
23
  },
24
24
  "directories": {
@@ -28,6 +28,7 @@
28
28
  "devDependencies": {
29
29
  "@types/mocha": "^10.0.0",
30
30
  "@types/node": "^18.11.0",
31
+ "apache-arrow": "^9.0.0",
31
32
  "aws-sdk": "^2.790.0",
32
33
  "chai": "^4.3.6",
33
34
  "jsdoc3-parser": "^2.0.0",
@@ -15,8 +15,10 @@ Napi::Object Connection::Init(Napi::Env env, Napi::Object exports) {
15
15
  Napi::Function t =
16
16
  DefineClass(env, "Connection",
17
17
  {InstanceMethod("prepare", &Connection::Prepare), InstanceMethod("exec", &Connection::Exec),
18
- InstanceMethod("register_bulk", &Connection::Register),
19
- InstanceMethod("unregister", &Connection::Unregister)});
18
+ InstanceMethod("register_udf_bulk", &Connection::RegisterUdf),
19
+ InstanceMethod("register_buffer", &Connection::RegisterBuffer),
20
+ InstanceMethod("unregister_udf", &Connection::UnregisterUdf),
21
+ InstanceMethod("unregister_buffer", &Connection::UnRegisterBuffer)});
20
22
 
21
23
  constructor = Napi::Persistent(t);
22
24
  constructor.SuppressDestruct();
@@ -56,6 +58,46 @@ struct ConnectTask : public Task {
56
58
  bool success = false;
57
59
  };
58
60
 
61
+ struct NodeReplacementScanData : duckdb::ReplacementScanData {
62
+ NodeReplacementScanData(Connection *con_p) : connection_ref(con_p) {};
63
+ Connection *connection_ref;
64
+ };
65
+
66
+ static duckdb::unique_ptr<duckdb::TableFunctionRef>
67
+ ScanReplacement(duckdb::ClientContext &context, const std::string &table_name, duckdb::ReplacementScanData *data) {
68
+ auto &buffers = ((NodeReplacementScanData *)data)->connection_ref->buffers;
69
+ // Lookup buffer
70
+ auto lookup = buffers.find(table_name);
71
+ if (lookup == buffers.end()) {
72
+ return nullptr;
73
+ }
74
+
75
+ // Create table scan on ipc buffers
76
+ auto name = lookup->first;
77
+ auto ipc_buffer_array = lookup->second;
78
+
79
+ auto table_function = duckdb::make_unique<duckdb::TableFunctionRef>();
80
+ std::vector<duckdb::unique_ptr<duckdb::ParsedExpression>> table_fun_children;
81
+
82
+ duckdb::vector<duckdb::Value> list_children;
83
+
84
+ for (uint64_t ipc_idx = 0; ipc_idx < ipc_buffer_array.size(); ipc_idx++) {
85
+ auto &v = ipc_buffer_array[ipc_idx];
86
+ duckdb::child_list_t<duckdb::Value> struct_children;
87
+ struct_children.push_back(make_pair("ptr", duckdb::Value::UBIGINT(v.first)));
88
+ struct_children.push_back(make_pair("size", duckdb::Value::UBIGINT(v.second)));
89
+
90
+ // Push struct into table fun
91
+ list_children.push_back(duckdb::Value::STRUCT(move(struct_children)));
92
+ }
93
+
94
+ table_fun_children.push_back(
95
+ duckdb::make_unique<duckdb::ConstantExpression>(duckdb::Value::LIST(move(list_children))));
96
+ table_function->function =
97
+ duckdb::make_unique<duckdb::FunctionExpression>("scan_arrow_ipc", move(table_fun_children));
98
+ return table_function;
99
+ }
100
+
59
101
  Connection::Connection(const Napi::CallbackInfo &info) : Napi::ObjectWrap<Connection>(info) {
60
102
  Napi::Env env = info.Env();
61
103
  int length = info.Length();
@@ -68,6 +110,15 @@ Connection::Connection(const Napi::CallbackInfo &info) : Napi::ObjectWrap<Connec
68
110
  database_ref = Napi::ObjectWrap<Database>::Unwrap(info[0].As<Napi::Object>());
69
111
  database_ref->Ref();
70
112
 
113
+ if (!database_ref->database) {
114
+ Napi::Error::New(env, "Connection created on database that was not yet initialized")
115
+ .ThrowAsJavaScriptException();
116
+ return;
117
+ }
118
+ // Register replacement scan
119
+ database_ref->database->instance->config.replacement_scans.emplace_back(
120
+ ScanReplacement, duckdb::make_unique<NodeReplacementScanData>(this));
121
+
71
122
  Napi::Function callback;
72
123
  if (info.Length() > 0 && info[1].IsFunction()) {
73
124
  callback = info[1].As<Napi::Function>();
@@ -217,8 +268,8 @@ void DuckDBNodeUDFLauncher(Napi::Env env, Napi::Function jsudf, std::nullptr_t *
217
268
  jsargs->done = true;
218
269
  }
219
270
 
220
- struct RegisterTask : public Task {
221
- RegisterTask(Connection &connection, std::string name, std::string return_type_name, Napi::Function callback)
271
+ struct RegisterUdfTask : public Task {
272
+ RegisterUdfTask(Connection &connection, std::string name, std::string return_type_name, Napi::Function callback)
222
273
  : Task(connection, callback), name(std::move(name)), return_type_name(std::move(return_type_name)) {
223
274
  }
224
275
 
@@ -258,7 +309,7 @@ struct RegisterTask : public Task {
258
309
  std::string return_type_name;
259
310
  };
260
311
 
261
- Napi::Value Connection::Register(const Napi::CallbackInfo &info) {
312
+ Napi::Value Connection::RegisterUdf(const Napi::CallbackInfo &info) {
262
313
  auto env = info.Env();
263
314
  if (info.Length() < 3 || !info[0].IsString() || !info[1].IsString() || !info[2].IsFunction()) {
264
315
  Napi::TypeError::New(env, "Holding it wrong").ThrowAsJavaScriptException();
@@ -287,13 +338,69 @@ Napi::Value Connection::Register(const Napi::CallbackInfo &info) {
287
338
  udfs[name] = udf;
288
339
 
289
340
  database_ref->Schedule(info.Env(),
290
- duckdb::make_unique<RegisterTask>(*this, name, return_type_name, completion_callback));
341
+ duckdb::make_unique<RegisterUdfTask>(*this, name, return_type_name, completion_callback));
342
+
343
+ return Value();
344
+ }
345
+
346
+ // Register Arrow IPC buffers for scanning from DuckDB
347
+ Napi::Value Connection::RegisterBuffer(const Napi::CallbackInfo &info) {
348
+ auto env = info.Env();
349
+ if (info.Length() < 2 || !info[0].IsString() || !info[1].IsObject()) {
350
+ Napi::TypeError::New(env, "Incorrect params").ThrowAsJavaScriptException();
351
+ return env.Null();
352
+ }
291
353
 
354
+ std::string name = info[0].As<Napi::String>();
355
+ Napi::Array array = info[1].As<Napi::Array>();
356
+ bool force_register = false;
357
+
358
+ if (info.Length() > 2) {
359
+ if (!info[2].IsBoolean()) {
360
+ Napi::TypeError::New(env, "Incorrect params").ThrowAsJavaScriptException();
361
+ return env.Null();
362
+ }
363
+ force_register = info[2].As<Napi::Boolean>().Value();
364
+ }
365
+
366
+ array_references[name] = Napi::Persistent(array);
367
+
368
+ if (!force_register && buffers.find(name) != buffers.end()) {
369
+ Napi::TypeError::New(env, "Buffer with this name already exists").ThrowAsJavaScriptException();
370
+ return env.Null();
371
+ }
372
+
373
+ buffers[name] = std::vector<std::pair<uint64_t, uint64_t>>();
374
+
375
+ for (uint64_t ipc_idx = 0; ipc_idx < array.Length(); ipc_idx++) {
376
+ Napi::Value v = array[ipc_idx];
377
+ if (!v.IsObject()) {
378
+ Napi::TypeError::New(env, "Incorrect params").ThrowAsJavaScriptException();
379
+ return env.Null();
380
+ }
381
+ Napi::Uint8Array arr = v.As<Napi::Uint8Array>();
382
+ auto raw_ptr = reinterpret_cast<uint64_t>(arr.ArrayBuffer().Data());
383
+ auto length = (uint64_t)arr.ElementLength();
384
+
385
+ buffers[name].push_back(std::pair<uint64_t, uint64_t>({raw_ptr, length}));
386
+ }
387
+
388
+ return Value();
389
+ }
390
+
391
+ Napi::Value Connection::UnRegisterBuffer(const Napi::CallbackInfo &info) {
392
+ auto env = info.Env();
393
+ if (info.Length() != 1 || !info[0].IsString()) {
394
+ Napi::TypeError::New(env, "Holding it wrong").ThrowAsJavaScriptException();
395
+ return env.Null();
396
+ }
397
+ std::string name = info[0].As<Napi::String>();
398
+ buffers.erase(name);
292
399
  return Value();
293
400
  }
294
401
 
295
- struct UnregisterTask : public Task {
296
- UnregisterTask(Connection &connection, std::string name, Napi::Function callback)
402
+ struct UnregisterUdfTask : public Task {
403
+ UnregisterUdfTask(Connection &connection, std::string name, Napi::Function callback)
297
404
  : Task(connection, callback), name(std::move(name)) {
298
405
  }
299
406
 
@@ -318,7 +425,7 @@ struct UnregisterTask : public Task {
318
425
  std::string name;
319
426
  };
320
427
 
321
- Napi::Value Connection::Unregister(const Napi::CallbackInfo &info) {
428
+ Napi::Value Connection::UnregisterUdf(const Napi::CallbackInfo &info) {
322
429
  auto env = info.Env();
323
430
  if (info.Length() < 1 || !info[0].IsString()) {
324
431
  Napi::TypeError::New(env, "Holding it wrong").ThrowAsJavaScriptException();
@@ -331,7 +438,7 @@ Napi::Value Connection::Unregister(const Napi::CallbackInfo &info) {
331
438
  callback = info[1].As<Napi::Function>();
332
439
  }
333
440
 
334
- database_ref->Schedule(info.Env(), duckdb::make_unique<UnregisterTask>(*this, name, callback));
441
+ database_ref->Schedule(info.Env(), duckdb::make_unique<UnregisterUdfTask>(*this, name, callback));
335
442
  return Value();
336
443
  }
337
444