duckdb 0.5.2-dev194.0 → 0.5.2-dev1940.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7559,7 +7559,7 @@ public:
7559
7559
  static unique_ptr<ColumnReader> CreateReader(ParquetReader &reader, const LogicalType &type_p,
7560
7560
  const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define,
7561
7561
  idx_t max_repeat);
7562
- virtual void InitializeRead(const std::vector<ColumnChunk> &columns, TProtocol &protocol_p);
7562
+ virtual void InitializeRead(idx_t row_group_index, const std::vector<ColumnChunk> &columns, TProtocol &protocol_p);
7563
7563
  virtual idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out,
7564
7564
  Vector &result_out);
7565
7565
 
@@ -7579,7 +7579,7 @@ public:
7579
7579
  // register the range this reader will touch for prefetching
7580
7580
  virtual void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge);
7581
7581
 
7582
- virtual unique_ptr<BaseStatistics> Stats(const std::vector<ColumnChunk> &columns);
7582
+ virtual unique_ptr<BaseStatistics> Stats(idx_t row_group_idx_p, const std::vector<ColumnChunk> &columns);
7583
7583
 
7584
7584
  protected:
7585
7585
  // readers that use the default Read() need to implement those
@@ -7742,6 +7742,7 @@ struct ParquetOptions {
7742
7742
 
7743
7743
  bool binary_as_string = false;
7744
7744
  bool filename = false;
7745
+ bool file_row_number = false;
7745
7746
  bool hive_partitioning = false;
7746
7747
 
7747
7748
  public:
package/src/statement.cpp CHANGED
@@ -4,6 +4,9 @@
4
4
 
5
5
  #include <algorithm>
6
6
  #include <cassert>
7
+ #include <iostream>
8
+ #include <string>
9
+ #include <regex>
7
10
 
8
11
  namespace node_duckdb {
9
12
 
@@ -15,8 +18,8 @@ Napi::Object Statement::Init(Napi::Env env, Napi::Object exports) {
15
18
  Napi::Function t =
16
19
  DefineClass(env, "Statement",
17
20
  {InstanceMethod("run", &Statement::Run), InstanceMethod("all", &Statement::All),
18
- InstanceMethod("each", &Statement::Each), InstanceMethod("finalize", &Statement::Finish),
19
- InstanceMethod("stream", &Statement::Stream)});
21
+ InstanceMethod("arrowIPCAll", &Statement::ArrowIPCAll), InstanceMethod("each", &Statement::Each),
22
+ InstanceMethod("finalize", &Statement::Finish), InstanceMethod("stream", &Statement::Stream)});
20
23
 
21
24
  constructor = Napi::Persistent(t);
22
25
  constructor.SuppressDestruct();
@@ -225,8 +228,7 @@ static Napi::Value convert_col_val(Napi::Env &env, duckdb::Value dval, duckdb::L
225
228
  value = object_value;
226
229
  } break;
227
230
  default:
228
- Napi::Error::New(env, "Data type is not supported " + dval.type().ToString()).ThrowAsJavaScriptException();
229
- return env.Null();
231
+ value = Napi::String::New(env, dval.ToString());
230
232
  }
231
233
 
232
234
  return value;
@@ -255,7 +257,7 @@ static Napi::Value convert_chunk(Napi::Env &env, std::vector<std::string> names,
255
257
  return scope.Escape(result);
256
258
  }
257
259
 
258
- enum RunType { RUN, EACH, ALL };
260
+ enum RunType { RUN, EACH, ALL, ARROW_ALL };
259
261
 
260
262
  struct StatementParam {
261
263
  std::vector<duckdb::Value> params;
@@ -275,7 +277,8 @@ struct RunPreparedTask : public Task {
275
277
  return;
276
278
  }
277
279
 
278
- result = statement.statement->Execute(params->params, run_type != RunType::ALL);
280
+ result =
281
+ statement.statement->Execute(params->params, run_type != RunType::ALL && run_type != RunType::ARROW_ALL);
279
282
  }
280
283
 
281
284
  void Callback() override {
@@ -350,6 +353,62 @@ struct RunPreparedTask : public Task {
350
353
 
351
354
  cb.MakeCallback(statement.Value(), {env.Null(), result_arr});
352
355
  } break;
356
+ case RunType::ARROW_ALL: {
357
+ auto materialized_result = (duckdb::MaterializedQueryResult *)result.get();
358
+ // +1 is for null bytes at end of stream
359
+ Napi::Array result_arr(Napi::Array::New(env, materialized_result->RowCount() + 1));
360
+
361
+ auto deleter = [](Napi::Env, void *finalizeData, void *hint) {
362
+ delete static_cast<std::shared_ptr<duckdb::QueryResult> *>(hint);
363
+ };
364
+
365
+ std::shared_ptr<duckdb::QueryResult> result_ptr = move(result);
366
+
367
+ duckdb::idx_t out_idx = 1;
368
+ while (true) {
369
+ auto chunk = result_ptr->Fetch();
370
+
371
+ if (!chunk || chunk->size() == 0) {
372
+ break;
373
+ }
374
+
375
+ D_ASSERT(chunk->ColumnCount() == 2);
376
+ D_ASSERT(chunk->data[0].GetType() == duckdb::LogicalType::BLOB);
377
+ D_ASSERT(chunk->data[1].GetType() == duckdb::LogicalType::BOOLEAN);
378
+
379
+ for (duckdb::idx_t row_idx = 0; row_idx < chunk->size(); row_idx++) {
380
+ duckdb::string_t blob = ((duckdb::string_t *)(chunk->data[0].GetData()))[row_idx];
381
+ bool is_header = chunk->data[1].GetData()[row_idx];
382
+
383
+ // Create shared pointer to give (shared) ownership to ArrayBuffer, not that for these materialized
384
+ // query results, the string data is owned by the QueryResult
385
+ auto result_ref_ptr = new std::shared_ptr<duckdb::QueryResult>(result_ptr);
386
+
387
+ auto array_buffer = Napi::ArrayBuffer::New(env, (void *)blob.GetDataUnsafe(), blob.GetSize(),
388
+ deleter, result_ref_ptr);
389
+
390
+ auto typed_array = Napi::Uint8Array::New(env, blob.GetSize(), array_buffer, 0);
391
+
392
+ // TODO we should handle this in duckdb probably
393
+ if (is_header) {
394
+ result_arr.Set((uint32_t)0, typed_array);
395
+ } else {
396
+ D_ASSERT(out_idx < materialized_result->RowCount());
397
+ result_arr.Set(out_idx++, typed_array);
398
+ }
399
+ }
400
+ }
401
+
402
+ // TODO we should handle this in duckdb probably
403
+ auto null_arr = Napi::Uint8Array::New(env, 4);
404
+ memset(null_arr.Data(), '\0', 4);
405
+ result_arr.Set(out_idx++, null_arr);
406
+
407
+ // Confirm all rows are set
408
+ D_ASSERT(out_idx == materialized_result->RowCount() + 1);
409
+
410
+ cb.MakeCallback(statement.Value(), {env.Null(), result_arr});
411
+ } break;
353
412
  }
354
413
  }
355
414
  std::unique_ptr<duckdb::QueryResult> result;
@@ -424,6 +483,12 @@ Napi::Value Statement::All(const Napi::CallbackInfo &info) {
424
483
  return info.This();
425
484
  }
426
485
 
486
+ Napi::Value Statement::ArrowIPCAll(const Napi::CallbackInfo &info) {
487
+ connection_ref->database_ref->Schedule(
488
+ info.Env(), duckdb::make_unique<RunPreparedTask>(*this, HandleArgs(info), RunType::ARROW_ALL));
489
+ return info.This();
490
+ }
491
+
427
492
  Napi::Value Statement::Run(const Napi::CallbackInfo &info) {
428
493
  connection_ref->database_ref->Schedule(info.Env(),
429
494
  duckdb::make_unique<RunPreparedTask>(*this, HandleArgs(info), RunType::RUN));
@@ -472,7 +537,9 @@ Napi::FunctionReference QueryResult::constructor;
472
537
  Napi::Object QueryResult::Init(Napi::Env env, Napi::Object exports) {
473
538
  Napi::HandleScope scope(env);
474
539
 
475
- Napi::Function t = DefineClass(env, "QueryResult", {InstanceMethod("nextChunk", &QueryResult::NextChunk)});
540
+ Napi::Function t = DefineClass(env, "QueryResult",
541
+ {InstanceMethod("nextChunk", &QueryResult::NextChunk),
542
+ InstanceMethod("nextIpcBuffer", &QueryResult::NextIpcBuffer)});
476
543
 
477
544
  constructor = Napi::Persistent(t);
478
545
  constructor.SuppressDestruct();
@@ -522,6 +589,48 @@ struct GetChunkTask : public Task {
522
589
  std::unique_ptr<duckdb::DataChunk> chunk;
523
590
  };
524
591
 
592
+ struct GetNextArrowIpcTask : public Task {
593
+ GetNextArrowIpcTask(QueryResult &query_result, Napi::Promise::Deferred deferred)
594
+ : Task(query_result), deferred(deferred) {
595
+ }
596
+
597
+ void DoWork() override {
598
+ auto &query_result = Get<QueryResult>();
599
+ chunk = query_result.result->Fetch();
600
+ }
601
+
602
+ void DoCallback() override {
603
+ auto &query_result = Get<QueryResult>();
604
+ Napi::Env env = query_result.Env();
605
+ Napi::HandleScope scope(env);
606
+
607
+ if (chunk == nullptr || chunk->size() == 0) {
608
+ deferred.Resolve(env.Null());
609
+ return;
610
+ }
611
+
612
+ // Arrow IPC streams should be a single column of a single blob
613
+ D_ASSERT(chunk->size() == 1 && chunk->ColumnCount() == 2);
614
+ D_ASSERT(chunk->data[0].GetType() == duckdb::LogicalType::BLOB);
615
+
616
+ duckdb::string_t blob = *(duckdb::string_t *)(chunk->data[0].GetData());
617
+
618
+ // Transfer ownership and Construct ArrayBuffer
619
+ auto data_chunk_ptr = new std::unique_ptr<duckdb::DataChunk>();
620
+ *data_chunk_ptr = std::move(chunk);
621
+ auto deleter = [](Napi::Env, void *finalizeData, void *hint) {
622
+ delete static_cast<std::unique_ptr<duckdb::DataChunk> *>(hint);
623
+ };
624
+ auto array_buffer =
625
+ Napi::ArrayBuffer::New(env, (void *)blob.GetDataUnsafe(), blob.GetSize(), deleter, data_chunk_ptr);
626
+
627
+ deferred.Resolve(array_buffer);
628
+ }
629
+
630
+ Napi::Promise::Deferred deferred;
631
+ std::unique_ptr<duckdb::DataChunk> chunk;
632
+ };
633
+
525
634
  Napi::Value QueryResult::NextChunk(const Napi::CallbackInfo &info) {
526
635
  auto env = info.Env();
527
636
  auto deferred = Napi::Promise::Deferred::New(env);
@@ -530,4 +639,12 @@ Napi::Value QueryResult::NextChunk(const Napi::CallbackInfo &info) {
530
639
  return deferred.Promise();
531
640
  }
532
641
 
642
+ // Should only be called on an arrow ipc query
643
+ Napi::Value QueryResult::NextIpcBuffer(const Napi::CallbackInfo &info) {
644
+ auto env = info.Env();
645
+ auto deferred = Napi::Promise::Deferred::New(env);
646
+ database_ref->Schedule(env, duckdb::make_unique<GetNextArrowIpcTask>(*this, deferred));
647
+ return deferred.Promise();
648
+ }
649
+
533
650
  } // namespace node_duckdb
@@ -188,4 +188,10 @@ describe("data type support", function () {
188
188
  done();
189
189
  });
190
190
  });
191
+ it("converts unsupported data types to strings", function(done) {
192
+ db.all("SELECT CAST('11:10:10' AS TIME) as time", function(err, rows) {
193
+ assert.equal(rows[0].time, '11:10:10');
194
+ done();
195
+ });
196
+ });
191
197
  });
@@ -150,13 +150,13 @@ describe('open/close', function() {
150
150
  });
151
151
  });
152
152
 
153
- describe('closing with unfinalized statements', function(done) {
153
+ describe('closing with unfinalized statements', function() {
154
154
  var completed = false;
155
155
  var completedSecond = false;
156
156
  var closed = false;
157
157
 
158
158
  var db;
159
- before(function() {
159
+ before(function(done) {
160
160
  db = new sqlite3.Database(':memory:', done);
161
161
  });
162
162
 
@@ -24,17 +24,19 @@ function createdb(callback) {
24
24
  if (callback) callback();
25
25
  } else {
26
26
  console.log("Creating test database... This may take several minutes.");
27
- var db = new sqlite3.Database(db_path);
28
- db.serialize(function() {
29
- db.run("CREATE TABLE foo (id INT, txt TEXT)");
30
- db.run("BEGIN TRANSACTION");
31
- var stmt = db.prepare("INSERT INTO foo VALUES(?, ?)");
32
- for (var i = 0; i < count; i++) {
33
- stmt.run(i, randomString());
34
- }
35
- stmt.finalize();
36
- db.run("COMMIT TRANSACTION", function () {
37
- db.close(callback);
27
+ var db = new sqlite3.Database(db_path, () => {
28
+ db.serialize(function() {
29
+ db.run("CREATE TABLE foo (id INT, txt TEXT)");
30
+ db.run("BEGIN TRANSACTION");
31
+ var stmt = db.prepare("INSERT INTO foo VALUES(?, ?)");
32
+ for (var i = 0; i < count; i++) {
33
+ stmt.run(i, randomString());
34
+ }
35
+
36
+ stmt.finalize();
37
+ db.run("COMMIT TRANSACTION", function () {
38
+ db.close(callback);
39
+ });
38
40
  });
39
41
  });
40
42
  }
@@ -0,0 +1,209 @@
1
+ import * as duckdb from "..";
2
+ import assert from "assert";
3
+ import fs from "fs";
4
+
5
+ describe("TypeScript declarataions", function () {
6
+ var db: duckdb.Database;
7
+ before(function (done) {
8
+ db = new duckdb.Database(":memory:", done);
9
+ });
10
+
11
+ it("typescript: Database constructor no callback", (done) => {
12
+ const tdb0 = new duckdb.Database(":memory:"); // no callback argument
13
+ done();
14
+ });
15
+
16
+ it("typescript: Database constructor path error", (done) => {
17
+ const tdb0 = new duckdb.Database("./bogusPath.db", (err, res) => {
18
+ // Issue: I'm a little surprised that specifying an invalid file path
19
+ // doesn't seem to immediately signal an error here, but it doesn't.
20
+ tdb0.all(
21
+ "PRAGMA show_tables",
22
+ (err: duckdb.DuckDbError | null, res: any) => {
23
+ done();
24
+ }
25
+ );
26
+ });
27
+ });
28
+
29
+ it("typescript: query with error", (done) => {
30
+ // query with an error:
31
+ db.all(
32
+ "SELECT * FROM sometable",
33
+ (err: duckdb.DuckDbError | null, res: any) => {
34
+ assert.equal(err?.code, "DUCKDB_NODEJS_ERROR");
35
+ assert.equal(err?.errno, -1);
36
+ done();
37
+ }
38
+ );
39
+ });
40
+
41
+ it("typescript: Database#exec", function (done) {
42
+ var sql = fs.readFileSync("test/support/script.sql", "utf8");
43
+ db.exec(sql, function (err: duckdb.DuckDbError | null) {
44
+ if (err) throw err;
45
+ done();
46
+ });
47
+ });
48
+
49
+ it("typescript: retrieve database structure", function (done) {
50
+ db.all(
51
+ "SELECT type, name FROM sqlite_master ORDER BY type, name",
52
+ function (err: duckdb.DuckDbError | null, rows: duckdb.TableData) {
53
+ if (err) throw err;
54
+ assert.deepEqual(rows, [
55
+ { type: "table", name: "grid_key" },
56
+ { type: "table", name: "grid_utfgrid" },
57
+ { type: "table", name: "images" },
58
+ { type: "table", name: "keymap" },
59
+ { type: "table", name: "map" },
60
+ { type: "table", name: "metadata" },
61
+ { type: "view", name: "grid_data" },
62
+ { type: "view", name: "grids" },
63
+ { type: "view", name: "tiles" },
64
+ ]);
65
+ done();
66
+ }
67
+ );
68
+ });
69
+
70
+ it("typescript: database#all with no callback", (done) => {
71
+ db.all("select 42 as x");
72
+ done();
73
+ });
74
+
75
+ it("typescript: database#connect", (done) => {
76
+ const conn = db.connect();
77
+ assert(conn instanceof duckdb.Connection);
78
+ done();
79
+ });
80
+
81
+ it("typescript: ensure empty results work ok", (done) => {
82
+ db.all(
83
+ "create table test_table as select 42 as x",
84
+ (err: duckdb.DuckDbError | null, res: duckdb.TableData) => {
85
+ db.all(
86
+ "drop table test_table",
87
+ (err: duckdb.DuckDbError | null, res: duckdb.TableData) => {
88
+ console.log("drop table results: ", err, res);
89
+ assert.deepEqual(res, []);
90
+ done();
91
+ }
92
+ );
93
+ }
94
+ );
95
+ });
96
+
97
+ it("typescript: ternary int udf", function (done) {
98
+ db.register_udf(
99
+ "udf",
100
+ "integer",
101
+ (x: number, y: number, z: number) => x + y + z
102
+ );
103
+ db.all(
104
+ "select udf(21, 20, 1) v",
105
+ function (err: duckdb.DuckDbError | null, rows: duckdb.TableData) {
106
+ if (err) throw err;
107
+ assert.equal(rows[0].v, 42);
108
+ }
109
+ );
110
+ db.unregister_udf("udf", done);
111
+ });
112
+ it("typescript: retrieve 100,000 rows with Statement#each", function (done) {
113
+ var total = 100000;
114
+ var retrieved = 0;
115
+
116
+ db.each(
117
+ "SELECT * FROM range(0, ?)",
118
+ total,
119
+ function (err: duckdb.DuckDbError | null, row: any) {
120
+ if (err) throw err;
121
+ retrieved++;
122
+
123
+ if (retrieved === total) {
124
+ assert.equal(
125
+ retrieved,
126
+ total,
127
+ "Only retrieved " + retrieved + " out of " + total + " rows."
128
+ );
129
+ done();
130
+ }
131
+ }
132
+ );
133
+ });
134
+ });
135
+
136
+ describe("typescript: simple prepared statement", function () {
137
+ var db: duckdb.Database;
138
+ before(function (done) {
139
+ db = new duckdb.Database(":memory:", done);
140
+ });
141
+
142
+ it("should prepare, run and finalize the statement", function (done) {
143
+ db.prepare("CREATE TABLE foo (bar text)").run().finalize(done);
144
+ });
145
+
146
+ after(function (done) {
147
+ db.close(done);
148
+ });
149
+ });
150
+
151
+ describe("typescript: prepared statements", function () {
152
+ var db: duckdb.Database;
153
+ before(function (done) {
154
+ db = new duckdb.Database(":memory:", done);
155
+ });
156
+
157
+ var inserted = 0;
158
+ var retrieved = 0;
159
+
160
+ // We insert and retrieve that many rows.
161
+ var count = 1000;
162
+
163
+ it("typescript: should create the table", function (done) {
164
+ db.prepare("CREATE TABLE foo (txt text, num int, flt double, blb blob)")
165
+ .run()
166
+ .finalize(done);
167
+ });
168
+
169
+ it("typescript: should insert " + count + " rows", function (done) {
170
+ for (var i = 0; i < count; i++) {
171
+ db.prepare("INSERT INTO foo VALUES(?, ?, ?, ?)")
172
+ .run(
173
+ "String " + i,
174
+ i,
175
+ i * Math.PI,
176
+ null,
177
+ function (err: duckdb.DuckDbError | null) {
178
+ if (err) throw err;
179
+ inserted++;
180
+ }
181
+ )
182
+ .finalize(function (err) {
183
+ if (err) throw err;
184
+ if (inserted == count) done();
185
+ });
186
+ }
187
+ });
188
+ });
189
+
190
+ describe("typescript: stream and QueryResult", function () {
191
+ const total = 1000;
192
+
193
+ let db: duckdb.Database;
194
+ let conn: duckdb.Connection;
195
+ before((done) => {
196
+ db = new duckdb.Database(":memory:", () => {
197
+ conn = new duckdb.Connection(db, done);
198
+ });
199
+ });
200
+
201
+ it("streams results", async () => {
202
+ let retrieved = 0;
203
+ const stream = conn.stream("SELECT * FROM range(0, ?)", total);
204
+ for await (const row of stream) {
205
+ retrieved++;
206
+ }
207
+ assert.equal(total, retrieved);
208
+ });
209
+ });