duckdb 0.5.2-dev880.0 → 0.6.1-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/duckdb.d.ts CHANGED
@@ -20,13 +20,14 @@ export class Connection {
20
20
  constructor(db: Database, callback?: Callback<any>);
21
21
 
22
22
  all(sql: string, ...args: [...any, Callback<TableData>] | []): void;
23
+ arrowIPCAll(sql: string, ...args: [...any, Callback<TableData>] | []): void;
23
24
  each(sql: string, ...args: [...any, Callback<RowData>] | []): void;
24
25
  exec(sql: string, ...args: [...any, Callback<void>] | []): void;
25
26
 
26
27
  prepare(sql: string, ...args: [...any, Callback<Statement>] | []): Statement;
27
28
  run(sql: string, ...args: [...any, Callback<void>] | []): Statement;
28
29
 
29
- register(
30
+ register_udf(
30
31
  name: string,
31
32
  return_type: string,
32
33
  fun: (...args: any[]) => any
@@ -37,9 +38,10 @@ export class Connection {
37
38
  return_type: string,
38
39
  fun: (...args: any[]) => any
39
40
  ): void;
40
- unregister(name: string, callback: Callback<any>): void;
41
+ unregister_udf(name: string, callback: Callback<any>): void;
41
42
 
42
43
  stream(sql: any, ...args: any[]): QueryResult;
44
+ arrowIPCStream(sql: any, ...args: any[]): QueryResult;
43
45
  }
44
46
 
45
47
  export class QueryResult {
@@ -47,31 +49,38 @@ export class QueryResult {
47
49
  }
48
50
 
49
51
  export class Database {
50
- constructor(path: string, callback?: Callback<any>);
52
+ constructor(path: string, accessMode?: number, callback?: Callback<any>);
51
53
 
52
54
  close(callback: Callback<void>): void;
53
55
 
54
56
  connect(): Connection;
55
57
 
56
58
  all(sql: string, ...args: [...any, Callback<TableData>] | []): void;
59
+ arrowIPCAll(sql: string, ...args: [...any, Callback<TableData>] | []): void;
57
60
  each(sql: string, ...args: [...any, Callback<RowData>] | []): void;
58
61
  exec(sql: string, ...args: [...any, Callback<void>] | []): void;
59
62
 
60
63
  prepare(sql: string, ...args: [...any, Callback<Statement>] | []): Statement;
61
64
  run(sql: string, ...args: [...any, Callback<void>] | []): Statement;
62
65
 
63
- register(
66
+ register_udf(
64
67
  name: string,
65
68
  return_type: string,
66
69
  fun: (...args: any[]) => any
67
70
  ): void;
68
- unregister(name: string, callback: Callback<any>): void;
71
+ unregister_udf(name: string, callback: Callback<any>): void;
72
+
73
+ stream(sql: any, ...args: any[]): QueryResult;
74
+ arrowIPCStream(sql: any, ...args: any[]): QueryResult;
69
75
  }
70
76
 
71
77
  export class Statement {
72
78
  constructor();
73
79
 
74
80
  all(...args: [...any, Callback<TableData>] | []): void;
81
+
82
+ arrowIPCAll(...args: [...any, Callback<TableData>] | []): void;
83
+
75
84
  each(...args: [...any, Callback<RowData>] | []): void;
76
85
 
77
86
  finalize(callback?: Callback<void>): void;
package/lib/duckdb.js CHANGED
@@ -67,6 +67,15 @@ var QueryResult = duckdb.QueryResult;
67
67
  */
68
68
  QueryResult.prototype.nextChunk;
69
69
 
70
+ /**
71
+ * Function to fetch the next result blob of an Arrow IPC Stream in a zero-copy way.
72
+ * (requires arrow extension to be loaded)
73
+ *
74
+ * @method
75
+ * @return data chunk
76
+ */
77
+ QueryResult.prototype.nextIpcBuffer;
78
+
70
79
  /**
71
80
  * @name asyncIterator
72
81
  * @memberof module:duckdb~QueryResult
@@ -115,6 +124,75 @@ Connection.prototype.all = function (sql) {
115
124
  return statement.all.apply(statement, arguments);
116
125
  }
117
126
 
127
+ // Utility class for streaming Apache Arrow IPC
128
+ class IpcResultStreamIterator {
129
+ constructor(stream_result_p) {
130
+ this._depleted = false;
131
+ this.stream_result = stream_result_p;
132
+ }
133
+
134
+ async next() {
135
+ if (this._depleted) {
136
+ return { done: true, value: null };
137
+ }
138
+
139
+ const ipc_raw = await this.stream_result.nextIpcBuffer();
140
+ const res = new Uint8Array(ipc_raw);
141
+
142
+ this._depleted = res.length == 0;
143
+ return {
144
+ done: this._depleted,
145
+ value: res,
146
+ };
147
+ }
148
+
149
+ [Symbol.asyncIterator]() {
150
+ return this;
151
+ }
152
+
153
+ // Materialize the IPC stream into a list of Uint8Arrays
154
+ async toArray () {
155
+ const retval = []
156
+
157
+ for await (const ipc_buf of this) {
158
+ retval.push(ipc_buf);
159
+ }
160
+
161
+ // Push EOS message containing 4 bytes of 0
162
+ retval.push(new Uint8Array([0,0,0,0]));
163
+
164
+ return retval;
165
+ }
166
+ }
167
+
168
+ /**
169
+ * Run a SQL query and serialize the result into the Apache Arrow IPC format (requires arrow extension to be loaded)
170
+ * @arg sql
171
+ * @param {...*} params
172
+ * @param callback
173
+ * @return {void}
174
+ */
175
+ Connection.prototype.arrowIPCAll = function (sql) {
176
+ const query = "SELECT * FROM to_arrow_ipc((" + sql + "));";
177
+ var statement = new Statement(this, query);
178
+ return statement.arrowIPCAll.apply(statement, arguments);
179
+ }
180
+
181
+ /**
182
+ * Run a SQL query, returns a IpcResultStreamIterator that allows streaming the result into the Apache Arrow IPC format
183
+ * (requires arrow extension to be loaded)
184
+ *
185
+ * @arg sql
186
+ * @param {...*} params
187
+ * @param callback
188
+ * @return IpcResultStreamIterator
189
+ */
190
+ Connection.prototype.arrowIPCStream = async function (sql) {
191
+ const query = "SELECT * FROM to_arrow_ipc((" + sql + "));";
192
+ const statement = new Statement(this, query);
193
+ return new IpcResultStreamIterator(await statement.stream.apply(statement, arguments));
194
+ }
195
+
118
196
  /**
119
197
  * Runs a SQL query and triggers the callback for each result row
120
198
  * @arg sql
@@ -149,9 +227,9 @@ Connection.prototype.stream = async function* (sql) {
149
227
  * @return {void}
150
228
  * @note this follows the wasm udfs somewhat but is simpler because we can pass data much more cleanly
151
229
  */
152
- Connection.prototype.register = function (name, return_type, fun) {
230
+ Connection.prototype.register_udf = function (name, return_type, fun) {
153
231
  // TODO what if this throws an error somewhere? do we need a try/catch?
154
- return this.register_bulk(name, return_type, function (desc) {
232
+ return this.register_udf_bulk(name, return_type, function (desc) {
155
233
  try {
156
234
  // Build an argument resolver
157
235
  const buildResolver = (arg) => {
@@ -289,7 +367,7 @@ Connection.prototype.exec;
289
367
  * @param callback
290
368
  * @return {void}
291
369
  */
292
- Connection.prototype.register_bulk;
370
+ Connection.prototype.register_udf_bulk;
293
371
  /**
294
372
  * Unregister a User Defined Function
295
373
  *
@@ -299,7 +377,7 @@ Connection.prototype.register_bulk;
299
377
  * @param callback
300
378
  * @return {void}
301
379
  */
302
- Connection.prototype.unregister;
380
+ Connection.prototype.unregister_udf;
303
381
 
304
382
  var default_connection = function (o) {
305
383
  if (o.default_connection == undefined) {
@@ -308,6 +386,29 @@ var default_connection = function (o) {
308
386
  return o.default_connection;
309
387
  }
310
388
 
389
+ /**
390
+ * Register a Buffer to be scanned using the Apache Arrow IPC scanner
391
+ * (requires arrow extension to be loaded)
392
+ *
393
+ * @method
394
+ * @arg name
395
+ * @arg array
396
+ * @arg force
397
+ * @param callback
398
+ * @return {void}
399
+ */
400
+ Connection.prototype.register_buffer;
401
+
402
+ /**
403
+ * Unregister the Buffer
404
+ *
405
+ * @method
406
+ * @arg name
407
+ * @param callback
408
+ * @return {void}
409
+ */
410
+ Connection.prototype.unregister_buffer;
411
+
311
412
 
312
413
  /**
313
414
  * Closes database instance
@@ -390,7 +491,18 @@ Database.prototype.run = function () {
390
491
  }
391
492
 
392
493
  /**
393
- * Convenience method for Connection#each using a built-in default connection
494
+ * Convenience method for Connection#scanArrowIpc using a built-in default connection
495
+ * @arg sql
496
+ * @param {...*} params
497
+ * @param callback
498
+ * @return {void}
499
+ */
500
+ Database.prototype.scanArrowIpc = function () {
501
+ default_connection(this).scanArrowIpc.apply(this.default_connection, arguments);
502
+ return this;
503
+ }
504
+
505
+ /**
394
506
  * @arg sql
395
507
  * @param {...*} params
396
508
  * @param callback
@@ -414,7 +526,29 @@ Database.prototype.all = function () {
414
526
  }
415
527
 
416
528
  /**
417
- * Convenience method for Connection#exec using a built-in default connection
529
+ * Convenience method for Connection#arrowIPCAll using a built-in default connection
530
+ * @arg sql
531
+ * @param {...*} params
532
+ * @param callback
533
+ * @return {void}
534
+ */
535
+ Database.prototype.arrowIPCAll = function () {
536
+ default_connection(this).arrowIPCAll.apply(this.default_connection, arguments);
537
+ return this;
538
+ }
539
+
540
+ /**
541
+ * Convenience method for Connection#arrowIPCStream using a built-in default connection
542
+ * @arg sql
543
+ * @param {...*} params
544
+ * @param callback
545
+ * @return {void}
546
+ */
547
+ Database.prototype.arrowIPCStream = function () {
548
+ return default_connection(this).arrowIPCStream.apply(this.default_connection, arguments);
549
+ }
550
+
551
+ /**
418
552
  * @arg sql
419
553
  * @param {...*} params
420
554
  * @param callback
@@ -426,24 +560,52 @@ Database.prototype.exec = function () {
426
560
  }
427
561
 
428
562
  /**
429
- * Convenience method for Connection#register using a built-in default connection
563
+ * Register a User Defined Function
564
+ *
565
+ * Convenience method for Connection#register_udf
430
566
  * @arg name
431
567
  * @arg return_type
432
568
  * @arg fun
433
569
  * @return {this}
434
570
  */
435
- Database.prototype.register = function () {
436
- default_connection(this).register.apply(this.default_connection, arguments);
571
+ Database.prototype.register_udf = function () {
572
+ default_connection(this).register_udf.apply(this.default_connection, arguments);
573
+ return this;
574
+ }
575
+
576
+ /**
577
+ * Register a buffer containing serialized data to be scanned from DuckDB.
578
+ *
579
+ * Convenience method for Connection#unregister_buffer
580
+ * @arg name
581
+ * @return {this}
582
+ */
583
+ Database.prototype.register_buffer = function () {
584
+ default_connection(this).register_buffer.apply(this.default_connection, arguments);
585
+ return this;
586
+ }
587
+
588
+ /**
589
+ * Unregister a Buffer
590
+ *
591
+ * Convenience method for Connection#unregister_buffer
592
+ * @arg name
593
+ * @return {this}
594
+ */
595
+ Database.prototype.unregister_buffer = function () {
596
+ default_connection(this).unregister_buffer.apply(this.default_connection, arguments);
437
597
  return this;
438
598
  }
439
599
 
440
600
  /**
441
- * Convenience method for Connection#unregister using a built-in default connection
601
+ * Unregister a UDF
602
+ *
603
+ * Convenience method for Connection#unregister_udf
442
604
  * @arg name
443
605
  * @return {this}
444
606
  */
445
- Database.prototype.unregister = function () {
446
- default_connection(this).unregister.apply(this.default_connection, arguments);
607
+ Database.prototype.unregister_udf = function () {
608
+ default_connection(this).unregister_udf.apply(this.default_connection, arguments);
447
609
  return this;
448
610
  }
449
611
 
@@ -477,6 +639,14 @@ Statement.prototype.run;
477
639
  * @return {void}
478
640
  */
479
641
  Statement.prototype.all;
642
+ /**
643
+ * @method
644
+ * @arg sql
645
+ * @param {...*} params
646
+ * @param callback
647
+ * @return {void}
648
+ */
649
+ Statement.prototype.arrowIPCAll;
480
650
  /**
481
651
  * @method
482
652
  * @arg sql
package/package.json CHANGED
@@ -2,13 +2,13 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev880.0",
5
+ "version": "0.6.1-dev0.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
9
9
  "@mapbox/node-pre-gyp": "^1.0.0",
10
10
  "node-addon-api": "*",
11
- "node-gyp": "^7.1.2"
11
+ "node-gyp": "^9.3.0"
12
12
  },
13
13
  "binary": {
14
14
  "module_name": "duckdb",
@@ -18,7 +18,7 @@
18
18
  "scripts": {
19
19
  "install": "node-pre-gyp install --fallback-to-build",
20
20
  "pretest": "node test/support/createdb.js",
21
- "test": "mocha -R spec --timeout 480000",
21
+ "test": "mocha -R spec --timeout 480000 --expose-gc",
22
22
  "pack": "node-pre-gyp package"
23
23
  },
24
24
  "directories": {
@@ -28,6 +28,7 @@
28
28
  "devDependencies": {
29
29
  "@types/mocha": "^10.0.0",
30
30
  "@types/node": "^18.11.0",
31
+ "apache-arrow": "^9.0.0",
31
32
  "aws-sdk": "^2.790.0",
32
33
  "chai": "^4.3.6",
33
34
  "jsdoc3-parser": "^2.0.0",
@@ -15,8 +15,10 @@ Napi::Object Connection::Init(Napi::Env env, Napi::Object exports) {
15
15
  Napi::Function t =
16
16
  DefineClass(env, "Connection",
17
17
  {InstanceMethod("prepare", &Connection::Prepare), InstanceMethod("exec", &Connection::Exec),
18
- InstanceMethod("register_bulk", &Connection::Register),
19
- InstanceMethod("unregister", &Connection::Unregister)});
18
+ InstanceMethod("register_udf_bulk", &Connection::RegisterUdf),
19
+ InstanceMethod("register_buffer", &Connection::RegisterBuffer),
20
+ InstanceMethod("unregister_udf", &Connection::UnregisterUdf),
21
+ InstanceMethod("unregister_buffer", &Connection::UnRegisterBuffer)});
20
22
 
21
23
  constructor = Napi::Persistent(t);
22
24
  constructor.SuppressDestruct();
@@ -56,6 +58,46 @@ struct ConnectTask : public Task {
56
58
  bool success = false;
57
59
  };
58
60
 
61
+ struct NodeReplacementScanData : duckdb::ReplacementScanData {
62
+ NodeReplacementScanData(Connection *con_p) : connection_ref(con_p) {};
63
+ Connection *connection_ref;
64
+ };
65
+
66
+ static duckdb::unique_ptr<duckdb::TableFunctionRef>
67
+ ScanReplacement(duckdb::ClientContext &context, const std::string &table_name, duckdb::ReplacementScanData *data) {
68
+ auto &buffers = ((NodeReplacementScanData *)data)->connection_ref->buffers;
69
+ // Lookup buffer
70
+ auto lookup = buffers.find(table_name);
71
+ if (lookup == buffers.end()) {
72
+ return nullptr;
73
+ }
74
+
75
+ // Create table scan on ipc buffers
76
+ auto name = lookup->first;
77
+ auto ipc_buffer_array = lookup->second;
78
+
79
+ auto table_function = duckdb::make_unique<duckdb::TableFunctionRef>();
80
+ std::vector<duckdb::unique_ptr<duckdb::ParsedExpression>> table_fun_children;
81
+
82
+ duckdb::vector<duckdb::Value> list_children;
83
+
84
+ for (uint64_t ipc_idx = 0; ipc_idx < ipc_buffer_array.size(); ipc_idx++) {
85
+ auto &v = ipc_buffer_array[ipc_idx];
86
+ duckdb::child_list_t<duckdb::Value> struct_children;
87
+ struct_children.push_back(make_pair("ptr", duckdb::Value::UBIGINT(v.first)));
88
+ struct_children.push_back(make_pair("size", duckdb::Value::UBIGINT(v.second)));
89
+
90
+ // Push struct into table fun
91
+ list_children.push_back(duckdb::Value::STRUCT(move(struct_children)));
92
+ }
93
+
94
+ table_fun_children.push_back(
95
+ duckdb::make_unique<duckdb::ConstantExpression>(duckdb::Value::LIST(move(list_children))));
96
+ table_function->function =
97
+ duckdb::make_unique<duckdb::FunctionExpression>("scan_arrow_ipc", move(table_fun_children));
98
+ return table_function;
99
+ }
100
+
59
101
  Connection::Connection(const Napi::CallbackInfo &info) : Napi::ObjectWrap<Connection>(info) {
60
102
  Napi::Env env = info.Env();
61
103
  int length = info.Length();
@@ -68,6 +110,11 @@ Connection::Connection(const Napi::CallbackInfo &info) : Napi::ObjectWrap<Connec
68
110
  database_ref = Napi::ObjectWrap<Database>::Unwrap(info[0].As<Napi::Object>());
69
111
  database_ref->Ref();
70
112
 
113
+ // Register replacement scan
114
+ // TODO: disabled currently, either fix or remove.
115
+ // database_ref->database->instance->config.replacement_scans.emplace_back(
116
+ // ScanReplacement, duckdb::make_unique<NodeReplacementScanData>(this));
117
+
71
118
  Napi::Function callback;
72
119
  if (info.Length() > 0 && info[1].IsFunction()) {
73
120
  callback = info[1].As<Napi::Function>();
@@ -217,8 +264,8 @@ void DuckDBNodeUDFLauncher(Napi::Env env, Napi::Function jsudf, std::nullptr_t *
217
264
  jsargs->done = true;
218
265
  }
219
266
 
220
- struct RegisterTask : public Task {
221
- RegisterTask(Connection &connection, std::string name, std::string return_type_name, Napi::Function callback)
267
+ struct RegisterUdfTask : public Task {
268
+ RegisterUdfTask(Connection &connection, std::string name, std::string return_type_name, Napi::Function callback)
222
269
  : Task(connection, callback), name(std::move(name)), return_type_name(std::move(return_type_name)) {
223
270
  }
224
271
 
@@ -258,7 +305,7 @@ struct RegisterTask : public Task {
258
305
  std::string return_type_name;
259
306
  };
260
307
 
261
- Napi::Value Connection::Register(const Napi::CallbackInfo &info) {
308
+ Napi::Value Connection::RegisterUdf(const Napi::CallbackInfo &info) {
262
309
  auto env = info.Env();
263
310
  if (info.Length() < 3 || !info[0].IsString() || !info[1].IsString() || !info[2].IsFunction()) {
264
311
  Napi::TypeError::New(env, "Holding it wrong").ThrowAsJavaScriptException();
@@ -287,13 +334,77 @@ Napi::Value Connection::Register(const Napi::CallbackInfo &info) {
287
334
  udfs[name] = udf;
288
335
 
289
336
  database_ref->Schedule(info.Env(),
290
- duckdb::make_unique<RegisterTask>(*this, name, return_type_name, completion_callback));
337
+ duckdb::make_unique<RegisterUdfTask>(*this, name, return_type_name, completion_callback));
338
+
339
+ return Value();
340
+ }
341
+
342
+ // Register Arrow IPC buffers for scanning from DuckDB
343
+ Napi::Value Connection::RegisterBuffer(const Napi::CallbackInfo &info) {
344
+ auto env = info.Env();
345
+
346
+ Napi::TypeError::New(env, "Register buffer currently not implemented").ThrowAsJavaScriptException();
347
+ return env.Null();
348
+
349
+ if (info.Length() < 2 || !info[0].IsString() || !info[1].IsObject()) {
350
+ Napi::TypeError::New(env, "Incorrect params").ThrowAsJavaScriptException();
351
+ return env.Null();
352
+ }
353
+
354
+ std::string name = info[0].As<Napi::String>();
355
+ Napi::Array array = info[1].As<Napi::Array>();
356
+ bool force_register = false;
357
+
358
+ if (info.Length() > 2) {
359
+ if (!info[2].IsBoolean()) {
360
+ Napi::TypeError::New(env, "Incorrect params").ThrowAsJavaScriptException();
361
+ return env.Null();
362
+ }
363
+ force_register = info[2].As<Napi::Boolean>().Value();
364
+ }
365
+
366
+ array_references[name] = Napi::Persistent(array);
367
+
368
+ if (!force_register && buffers.find(name) != buffers.end()) {
369
+ Napi::TypeError::New(env, "Buffer with this name already exists").ThrowAsJavaScriptException();
370
+ return env.Null();
371
+ }
372
+
373
+ buffers[name] = std::vector<std::pair<uint64_t, uint64_t>>();
291
374
 
375
+ for (uint64_t ipc_idx = 0; ipc_idx < array.Length(); ipc_idx++) {
376
+ Napi::Value v = array[ipc_idx];
377
+ if (!v.IsObject()) {
378
+ Napi::TypeError::New(env, "Incorrect params").ThrowAsJavaScriptException();
379
+ return env.Null();
380
+ }
381
+ Napi::Uint8Array arr = v.As<Napi::Uint8Array>();
382
+ auto raw_ptr = reinterpret_cast<uint64_t>(arr.ArrayBuffer().Data());
383
+ auto length = (uint64_t)arr.ElementLength();
384
+
385
+ buffers[name].push_back(std::pair<uint64_t, uint64_t>({raw_ptr, length}));
386
+ }
387
+
388
+ return Value();
389
+ }
390
+
391
+ Napi::Value Connection::UnRegisterBuffer(const Napi::CallbackInfo &info) {
392
+ auto env = info.Env();
393
+
394
+ Napi::TypeError::New(env, "Register buffer currently not implemented").ThrowAsJavaScriptException();
395
+ return env.Null();
396
+
397
+ if (info.Length() != 1 || !info[0].IsString()) {
398
+ Napi::TypeError::New(env, "Holding it wrong").ThrowAsJavaScriptException();
399
+ return env.Null();
400
+ }
401
+ std::string name = info[0].As<Napi::String>();
402
+ buffers.erase(name);
292
403
  return Value();
293
404
  }
294
405
 
295
- struct UnregisterTask : public Task {
296
- UnregisterTask(Connection &connection, std::string name, Napi::Function callback)
406
+ struct UnregisterUdfTask : public Task {
407
+ UnregisterUdfTask(Connection &connection, std::string name, Napi::Function callback)
297
408
  : Task(connection, callback), name(std::move(name)) {
298
409
  }
299
410
 
@@ -318,7 +429,7 @@ struct UnregisterTask : public Task {
318
429
  std::string name;
319
430
  };
320
431
 
321
- Napi::Value Connection::Unregister(const Napi::CallbackInfo &info) {
432
+ Napi::Value Connection::UnregisterUdf(const Napi::CallbackInfo &info) {
322
433
  auto env = info.Env();
323
434
  if (info.Length() < 1 || !info[0].IsString()) {
324
435
  Napi::TypeError::New(env, "Holding it wrong").ThrowAsJavaScriptException();
@@ -331,7 +442,7 @@ Napi::Value Connection::Unregister(const Napi::CallbackInfo &info) {
331
442
  callback = info[1].As<Napi::Function>();
332
443
  }
333
444
 
334
- database_ref->Schedule(info.Env(), duckdb::make_unique<UnregisterTask>(*this, name, callback));
445
+ database_ref->Schedule(info.Env(), duckdb::make_unique<UnregisterUdfTask>(*this, name, callback));
335
446
  return Value();
336
447
  }
337
448