duckdb 0.5.2-dev878.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7559,7 +7559,7 @@ public:
7559
7559
  static unique_ptr<ColumnReader> CreateReader(ParquetReader &reader, const LogicalType &type_p,
7560
7560
  const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define,
7561
7561
  idx_t max_repeat);
7562
- virtual void InitializeRead(const std::vector<ColumnChunk> &columns, TProtocol &protocol_p);
7562
+ virtual void InitializeRead(idx_t row_group_index, const std::vector<ColumnChunk> &columns, TProtocol &protocol_p);
7563
7563
  virtual idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out,
7564
7564
  Vector &result_out);
7565
7565
 
@@ -7579,7 +7579,7 @@ public:
7579
7579
  // register the range this reader will touch for prefetching
7580
7580
  virtual void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge);
7581
7581
 
7582
- virtual unique_ptr<BaseStatistics> Stats(const std::vector<ColumnChunk> &columns);
7582
+ virtual unique_ptr<BaseStatistics> Stats(idx_t row_group_idx_p, const std::vector<ColumnChunk> &columns);
7583
7583
 
7584
7584
  protected:
7585
7585
  // readers that use the default Read() need to implement those
@@ -7742,6 +7742,7 @@ struct ParquetOptions {
7742
7742
 
7743
7743
  bool binary_as_string = false;
7744
7744
  bool filename = false;
7745
+ bool file_row_number = false;
7745
7746
  bool hive_partitioning = false;
7746
7747
 
7747
7748
  public:
package/src/statement.cpp CHANGED
@@ -4,6 +4,9 @@
4
4
 
5
5
  #include <algorithm>
6
6
  #include <cassert>
7
+ #include <iostream>
8
+ #include <string>
9
+ #include <regex>
7
10
 
8
11
  namespace node_duckdb {
9
12
 
@@ -15,8 +18,8 @@ Napi::Object Statement::Init(Napi::Env env, Napi::Object exports) {
15
18
  Napi::Function t =
16
19
  DefineClass(env, "Statement",
17
20
  {InstanceMethod("run", &Statement::Run), InstanceMethod("all", &Statement::All),
18
- InstanceMethod("each", &Statement::Each), InstanceMethod("finalize", &Statement::Finish),
19
- InstanceMethod("stream", &Statement::Stream)});
21
+ InstanceMethod("arrowIPCAll", &Statement::ArrowIPCAll), InstanceMethod("each", &Statement::Each),
22
+ InstanceMethod("finalize", &Statement::Finish), InstanceMethod("stream", &Statement::Stream)});
20
23
 
21
24
  constructor = Napi::Persistent(t);
22
25
  constructor.SuppressDestruct();
@@ -225,8 +228,7 @@ static Napi::Value convert_col_val(Napi::Env &env, duckdb::Value dval, duckdb::L
225
228
  value = object_value;
226
229
  } break;
227
230
  default:
228
- Napi::Error::New(env, "Data type is not supported " + dval.type().ToString()).ThrowAsJavaScriptException();
229
- return env.Null();
231
+ value = Napi::String::New(env, dval.ToString());
230
232
  }
231
233
 
232
234
  return value;
@@ -255,7 +257,7 @@ static Napi::Value convert_chunk(Napi::Env &env, std::vector<std::string> names,
255
257
  return scope.Escape(result);
256
258
  }
257
259
 
258
- enum RunType { RUN, EACH, ALL };
260
+ enum RunType { RUN, EACH, ALL, ARROW_ALL };
259
261
 
260
262
  struct StatementParam {
261
263
  std::vector<duckdb::Value> params;
@@ -275,7 +277,8 @@ struct RunPreparedTask : public Task {
275
277
  return;
276
278
  }
277
279
 
278
- result = statement.statement->Execute(params->params, run_type != RunType::ALL);
280
+ result =
281
+ statement.statement->Execute(params->params, run_type != RunType::ALL && run_type != RunType::ARROW_ALL);
279
282
  }
280
283
 
281
284
  void Callback() override {
@@ -350,6 +353,62 @@ struct RunPreparedTask : public Task {
350
353
 
351
354
  cb.MakeCallback(statement.Value(), {env.Null(), result_arr});
352
355
  } break;
356
+ case RunType::ARROW_ALL: {
357
+ auto materialized_result = (duckdb::MaterializedQueryResult *)result.get();
358
+ // +1 is for null bytes at end of stream
359
+ Napi::Array result_arr(Napi::Array::New(env, materialized_result->RowCount() + 1));
360
+
361
+ auto deleter = [](Napi::Env, void *finalizeData, void *hint) {
362
+ delete static_cast<std::shared_ptr<duckdb::QueryResult> *>(hint);
363
+ };
364
+
365
+ std::shared_ptr<duckdb::QueryResult> result_ptr = move(result);
366
+
367
+ duckdb::idx_t out_idx = 1;
368
+ while (true) {
369
+ auto chunk = result_ptr->Fetch();
370
+
371
+ if (!chunk || chunk->size() == 0) {
372
+ break;
373
+ }
374
+
375
+ D_ASSERT(chunk->ColumnCount() == 2);
376
+ D_ASSERT(chunk->data[0].GetType() == duckdb::LogicalType::BLOB);
377
+ D_ASSERT(chunk->data[1].GetType() == duckdb::LogicalType::BOOLEAN);
378
+
379
+ for (duckdb::idx_t row_idx = 0; row_idx < chunk->size(); row_idx++) {
380
+ duckdb::string_t blob = ((duckdb::string_t *)(chunk->data[0].GetData()))[row_idx];
381
+ bool is_header = chunk->data[1].GetData()[row_idx];
382
+
383
+ // Create shared pointer to give (shared) ownership to ArrayBuffer, not that for these materialized
384
+ // query results, the string data is owned by the QueryResult
385
+ auto result_ref_ptr = new std::shared_ptr<duckdb::QueryResult>(result_ptr);
386
+
387
+ auto array_buffer = Napi::ArrayBuffer::New(env, (void *)blob.GetDataUnsafe(), blob.GetSize(),
388
+ deleter, result_ref_ptr);
389
+
390
+ auto typed_array = Napi::Uint8Array::New(env, blob.GetSize(), array_buffer, 0);
391
+
392
+ // TODO we should handle this in duckdb probably
393
+ if (is_header) {
394
+ result_arr.Set((uint32_t)0, typed_array);
395
+ } else {
396
+ D_ASSERT(out_idx < materialized_result->RowCount());
397
+ result_arr.Set(out_idx++, typed_array);
398
+ }
399
+ }
400
+ }
401
+
402
+ // TODO we should handle this in duckdb probably
403
+ auto null_arr = Napi::Uint8Array::New(env, 4);
404
+ memset(null_arr.Data(), '\0', 4);
405
+ result_arr.Set(out_idx++, null_arr);
406
+
407
+ // Confirm all rows are set
408
+ D_ASSERT(out_idx == materialized_result->RowCount() + 1);
409
+
410
+ cb.MakeCallback(statement.Value(), {env.Null(), result_arr});
411
+ } break;
353
412
  }
354
413
  }
355
414
  std::unique_ptr<duckdb::QueryResult> result;
@@ -424,6 +483,12 @@ Napi::Value Statement::All(const Napi::CallbackInfo &info) {
424
483
  return info.This();
425
484
  }
426
485
 
486
+ Napi::Value Statement::ArrowIPCAll(const Napi::CallbackInfo &info) {
487
+ connection_ref->database_ref->Schedule(
488
+ info.Env(), duckdb::make_unique<RunPreparedTask>(*this, HandleArgs(info), RunType::ARROW_ALL));
489
+ return info.This();
490
+ }
491
+
427
492
  Napi::Value Statement::Run(const Napi::CallbackInfo &info) {
428
493
  connection_ref->database_ref->Schedule(info.Env(),
429
494
  duckdb::make_unique<RunPreparedTask>(*this, HandleArgs(info), RunType::RUN));
@@ -472,7 +537,9 @@ Napi::FunctionReference QueryResult::constructor;
472
537
  Napi::Object QueryResult::Init(Napi::Env env, Napi::Object exports) {
473
538
  Napi::HandleScope scope(env);
474
539
 
475
- Napi::Function t = DefineClass(env, "QueryResult", {InstanceMethod("nextChunk", &QueryResult::NextChunk)});
540
+ Napi::Function t = DefineClass(env, "QueryResult",
541
+ {InstanceMethod("nextChunk", &QueryResult::NextChunk),
542
+ InstanceMethod("nextIpcBuffer", &QueryResult::NextIpcBuffer)});
476
543
 
477
544
  constructor = Napi::Persistent(t);
478
545
  constructor.SuppressDestruct();
@@ -522,6 +589,48 @@ struct GetChunkTask : public Task {
522
589
  std::unique_ptr<duckdb::DataChunk> chunk;
523
590
  };
524
591
 
592
+ struct GetNextArrowIpcTask : public Task {
593
+ GetNextArrowIpcTask(QueryResult &query_result, Napi::Promise::Deferred deferred)
594
+ : Task(query_result), deferred(deferred) {
595
+ }
596
+
597
+ void DoWork() override {
598
+ auto &query_result = Get<QueryResult>();
599
+ chunk = query_result.result->Fetch();
600
+ }
601
+
602
+ void DoCallback() override {
603
+ auto &query_result = Get<QueryResult>();
604
+ Napi::Env env = query_result.Env();
605
+ Napi::HandleScope scope(env);
606
+
607
+ if (chunk == nullptr || chunk->size() == 0) {
608
+ deferred.Resolve(env.Null());
609
+ return;
610
+ }
611
+
612
+ // Arrow IPC streams should be a single column of a single blob
613
+ D_ASSERT(chunk->size() == 1 && chunk->ColumnCount() == 2);
614
+ D_ASSERT(chunk->data[0].GetType() == duckdb::LogicalType::BLOB);
615
+
616
+ duckdb::string_t blob = *(duckdb::string_t *)(chunk->data[0].GetData());
617
+
618
+ // Transfer ownership and Construct ArrayBuffer
619
+ auto data_chunk_ptr = new std::unique_ptr<duckdb::DataChunk>();
620
+ *data_chunk_ptr = std::move(chunk);
621
+ auto deleter = [](Napi::Env, void *finalizeData, void *hint) {
622
+ delete static_cast<std::unique_ptr<duckdb::DataChunk> *>(hint);
623
+ };
624
+ auto array_buffer =
625
+ Napi::ArrayBuffer::New(env, (void *)blob.GetDataUnsafe(), blob.GetSize(), deleter, data_chunk_ptr);
626
+
627
+ deferred.Resolve(array_buffer);
628
+ }
629
+
630
+ Napi::Promise::Deferred deferred;
631
+ std::unique_ptr<duckdb::DataChunk> chunk;
632
+ };
633
+
525
634
  Napi::Value QueryResult::NextChunk(const Napi::CallbackInfo &info) {
526
635
  auto env = info.Env();
527
636
  auto deferred = Napi::Promise::Deferred::New(env);
@@ -530,4 +639,12 @@ Napi::Value QueryResult::NextChunk(const Napi::CallbackInfo &info) {
530
639
  return deferred.Promise();
531
640
  }
532
641
 
642
+ // Should only be called on an arrow ipc query
643
+ Napi::Value QueryResult::NextIpcBuffer(const Napi::CallbackInfo &info) {
644
+ auto env = info.Env();
645
+ auto deferred = Napi::Promise::Deferred::New(env);
646
+ database_ref->Schedule(env, duckdb::make_unique<GetNextArrowIpcTask>(*this, deferred));
647
+ return deferred.Promise();
648
+ }
649
+
533
650
  } // namespace node_duckdb
@@ -0,0 +1,61 @@
1
+ var sqlite3 = require('..');
2
+ var assert = require('assert');
3
+ var fs = require('fs');
4
+
5
+ describe('exec', function() {
6
+ var db;
7
+ before(function(done) {
8
+ db = new sqlite3.Database(':memory:', done);
9
+ });
10
+
11
+ // Note: arrow IPC api requires the arrow extension to be loaded. The tests for this functionality reside in:
12
+ // https://github.com/duckdblabs/arrow
13
+ describe(`Arrow IPC API fails neatly when extension not loaded`, () => {
14
+ let db;
15
+ let conn;
16
+ before((done) => {
17
+ db = new sqlite3.Database(':memory:', {"allow_unsigned_extensions": "true"}, () => {
18
+ done();
19
+ });
20
+ });
21
+
22
+ it(`Basic examples`, async () => {
23
+ const range_size = 130000;
24
+ const query = `SELECT * FROM range(0,${range_size}) tbl(i)`;
25
+
26
+ db.arrowIPCStream(query).then(
27
+ () => Promise.reject(new Error('Expected method to reject.')),
28
+ err => {
29
+ assert(err.message.includes("Catalog Error: Function with name to_arrow_ipc is not on the catalog, but it exists in the arrow extension. To Install and Load the extension, run: INSTALL arrow; LOAD arrow;"))
30
+ }
31
+ );
32
+
33
+ db.arrowIPCAll(`SELECT * FROM ipc_table`, function (err, result) {
34
+ if (err) {
35
+ assert(err.message.includes("Catalog Error: Function with name to_arrow_ipc is not on the catalog, but it exists in the arrow extension. To Install and Load the extension, run: INSTALL arrow; LOAD arrow;"))
36
+ } else {
37
+ assert.fail("Expected error");
38
+ }
39
+ });
40
+
41
+ assert.throws(() => db.register_buffer("ipc_table", [1,'a',1], true), TypeError, "Incorrect parameters");
42
+ });
43
+
44
+ it('Register buffer should be disabled currently', function(done) {
45
+ try {
46
+ db.register_buffer();
47
+ assert(0);
48
+ } catch (error) {
49
+ assert(error.message.includes('Register buffer currently not implemented'))
50
+ }
51
+
52
+ try {
53
+ db.unregister_buffer();
54
+ assert(0);
55
+ } catch (error) {
56
+ assert(error.message.includes('Register buffer currently not implemented'))
57
+ }
58
+ done()
59
+ });
60
+ });
61
+ });
@@ -188,4 +188,10 @@ describe("data type support", function () {
188
188
  done();
189
189
  });
190
190
  });
191
+ it("converts unsupported data types to strings", function(done) {
192
+ db.all("SELECT CAST('11:10:10' AS TIME) as time", function(err, rows) {
193
+ assert.equal(rows[0].time, '11:10:10');
194
+ done();
195
+ });
196
+ });
191
197
  });
@@ -150,13 +150,13 @@ describe('open/close', function() {
150
150
  });
151
151
  });
152
152
 
153
- describe('closing with unfinalized statements', function(done) {
153
+ describe('closing with unfinalized statements', function() {
154
154
  var completed = false;
155
155
  var completedSecond = false;
156
156
  var closed = false;
157
157
 
158
158
  var db;
159
- before(function() {
159
+ before(function(done) {
160
160
  db = new sqlite3.Database(':memory:', done);
161
161
  });
162
162
 
@@ -24,17 +24,19 @@ function createdb(callback) {
24
24
  if (callback) callback();
25
25
  } else {
26
26
  console.log("Creating test database... This may take several minutes.");
27
- var db = new sqlite3.Database(db_path);
28
- db.serialize(function() {
29
- db.run("CREATE TABLE foo (id INT, txt TEXT)");
30
- db.run("BEGIN TRANSACTION");
31
- var stmt = db.prepare("INSERT INTO foo VALUES(?, ?)");
32
- for (var i = 0; i < count; i++) {
33
- stmt.run(i, randomString());
34
- }
35
- stmt.finalize();
36
- db.run("COMMIT TRANSACTION", function () {
37
- db.close(callback);
27
+ var db = new sqlite3.Database(db_path, () => {
28
+ db.serialize(function() {
29
+ db.run("CREATE TABLE foo (id INT, txt TEXT)");
30
+ db.run("BEGIN TRANSACTION");
31
+ var stmt = db.prepare("INSERT INTO foo VALUES(?, ?)");
32
+ for (var i = 0; i < count; i++) {
33
+ stmt.run(i, randomString());
34
+ }
35
+
36
+ stmt.finalize();
37
+ db.run("COMMIT TRANSACTION", function () {
38
+ db.close(callback);
39
+ });
38
40
  });
39
41
  });
40
42
  }
@@ -5,7 +5,7 @@ import fs from "fs";
5
5
  describe("TypeScript declarataions", function () {
6
6
  var db: duckdb.Database;
7
7
  before(function (done) {
8
- db = new duckdb.Database(":memory:", done);
8
+ db = new duckdb.Database(":memory:", duckdb.OPEN_READWRITE, done);
9
9
  });
10
10
 
11
11
  it("typescript: Database constructor no callback", (done) => {
@@ -13,17 +13,39 @@ describe("TypeScript declarataions", function () {
13
13
  done();
14
14
  });
15
15
 
16
+ it("Database.create -- read only flag", (done) => {
17
+ const roDb = new duckdb.Database(
18
+ ":memory:",
19
+ duckdb.OPEN_READONLY,
20
+ (err: duckdb.DuckDbError | null, res: any) => {
21
+ assert.equal(err?.code, "DUCKDB_NODEJS_ERROR");
22
+ assert.equal(err?.errno, -1);
23
+ const errMessage: string = err?.message ?? "";
24
+ assert(
25
+ errMessage.includes(
26
+ "Cannot launch in-memory database in read-only mode"
27
+ )
28
+ );
29
+ done();
30
+ }
31
+ );
32
+ });
33
+
16
34
  it("typescript: Database constructor path error", (done) => {
17
- const tdb0 = new duckdb.Database("./bogusPath.db", (err, res) => {
18
- // Issue: I'm a little surprised that specifying an invalid file path
19
- // doesn't seem to immediately signal an error here, but it doesn't.
20
- tdb0.all(
21
- "PRAGMA show_tables",
22
- (err: duckdb.DuckDbError | null, res: any) => {
23
- done();
24
- }
25
- );
26
- });
35
+ const tdb0 = new duckdb.Database(
36
+ "./bogusPath.db",
37
+ duckdb.OPEN_READWRITE,
38
+ (err, res) => {
39
+ // Issue: I'm a little surprised that specifying an invalid file path
40
+ // doesn't seem to immediately signal an error here, but it doesn't.
41
+ tdb0.all(
42
+ "PRAGMA show_tables",
43
+ (err: duckdb.DuckDbError | null, res: any) => {
44
+ done();
45
+ }
46
+ );
47
+ }
48
+ );
27
49
  });
28
50
 
29
51
  it("typescript: query with error", (done) => {
@@ -95,7 +117,7 @@ describe("TypeScript declarataions", function () {
95
117
  });
96
118
 
97
119
  it("typescript: ternary int udf", function (done) {
98
- db.register(
120
+ db.register_udf(
99
121
  "udf",
100
122
  "integer",
101
123
  (x: number, y: number, z: number) => x + y + z
@@ -107,7 +129,7 @@ describe("TypeScript declarataions", function () {
107
129
  assert.equal(rows[0].v, 42);
108
130
  }
109
131
  );
110
- db.unregister("udf", done);
132
+ db.unregister_udf("udf", done);
111
133
  });
112
134
  it("typescript: retrieve 100,000 rows with Statement#each", function (done) {
113
135
  var total = 100000;
@@ -136,7 +158,7 @@ describe("TypeScript declarataions", function () {
136
158
  describe("typescript: simple prepared statement", function () {
137
159
  var db: duckdb.Database;
138
160
  before(function (done) {
139
- db = new duckdb.Database(":memory:", done);
161
+ db = new duckdb.Database(":memory:", duckdb.OPEN_READWRITE, done);
140
162
  });
141
163
 
142
164
  it("should prepare, run and finalize the statement", function (done) {
@@ -151,7 +173,7 @@ describe("typescript: simple prepared statement", function () {
151
173
  describe("typescript: prepared statements", function () {
152
174
  var db: duckdb.Database;
153
175
  before(function (done) {
154
- db = new duckdb.Database(":memory:", done);
176
+ db = new duckdb.Database(":memory:", duckdb.OPEN_READWRITE, done);
155
177
  });
156
178
 
157
179
  var inserted = 0;
@@ -193,7 +215,7 @@ describe("typescript: stream and QueryResult", function () {
193
215
  let db: duckdb.Database;
194
216
  let conn: duckdb.Connection;
195
217
  before((done) => {
196
- db = new duckdb.Database(":memory:", () => {
218
+ db = new duckdb.Database(":memory:", duckdb.OPEN_READWRITE, () => {
197
219
  conn = new duckdb.Connection(db, done);
198
220
  });
199
221
  });