duckdb 0.5.2-dev2.0 → 0.5.2-dev2001.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7541,6 +7541,7 @@ class ParquetReader;
7541
7541
  using duckdb_apache::thrift::protocol::TProtocol;
7542
7542
 
7543
7543
  using duckdb_parquet::format::ColumnChunk;
7544
+ using duckdb_parquet::format::CompressionCodec;
7544
7545
  using duckdb_parquet::format::FieldRepetitionType;
7545
7546
  using duckdb_parquet::format::PageHeader;
7546
7547
  using duckdb_parquet::format::SchemaElement;
@@ -7558,7 +7559,7 @@ public:
7558
7559
  static unique_ptr<ColumnReader> CreateReader(ParquetReader &reader, const LogicalType &type_p,
7559
7560
  const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define,
7560
7561
  idx_t max_repeat);
7561
- virtual void InitializeRead(const std::vector<ColumnChunk> &columns, TProtocol &protocol_p);
7562
+ virtual void InitializeRead(idx_t row_group_index, const std::vector<ColumnChunk> &columns, TProtocol &protocol_p);
7562
7563
  virtual idx_t Read(uint64_t num_values, parquet_filter_t &filter, uint8_t *define_out, uint8_t *repeat_out,
7563
7564
  Vector &result_out);
7564
7565
 
@@ -7578,7 +7579,7 @@ public:
7578
7579
  // register the range this reader will touch for prefetching
7579
7580
  virtual void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge);
7580
7581
 
7581
- virtual unique_ptr<BaseStatistics> Stats(const std::vector<ColumnChunk> &columns);
7582
+ virtual unique_ptr<BaseStatistics> Stats(idx_t row_group_idx_p, const std::vector<ColumnChunk> &columns);
7582
7583
 
7583
7584
  protected:
7584
7585
  // readers that use the default Read() need to implement those
@@ -7617,9 +7618,10 @@ protected:
7617
7618
 
7618
7619
  private:
7619
7620
  void PrepareRead(parquet_filter_t &filter);
7620
- void PreparePage(idx_t compressed_page_size, idx_t uncompressed_page_size);
7621
+ void PreparePage(PageHeader &page_hdr);
7621
7622
  void PrepareDataPage(PageHeader &page_hdr);
7622
7623
  void PreparePageV2(PageHeader &page_hdr);
7624
+ void DecompressInternal(CompressionCodec::type codec, const char *src, idx_t src_size, char *dst, idx_t dst_size);
7623
7625
 
7624
7626
  const duckdb_parquet::format::ColumnChunk *chunk = nullptr;
7625
7627
 
@@ -7740,6 +7742,7 @@ struct ParquetOptions {
7740
7742
 
7741
7743
  bool binary_as_string = false;
7742
7744
  bool filename = false;
7745
+ bool file_row_number = false;
7743
7746
  bool hive_partitioning = false;
7744
7747
 
7745
7748
  public:
package/src/statement.cpp CHANGED
@@ -4,6 +4,9 @@
4
4
 
5
5
  #include <algorithm>
6
6
  #include <cassert>
7
+ #include <iostream>
8
+ #include <string>
9
+ #include <regex>
7
10
 
8
11
  namespace node_duckdb {
9
12
 
@@ -15,8 +18,8 @@ Napi::Object Statement::Init(Napi::Env env, Napi::Object exports) {
15
18
  Napi::Function t =
16
19
  DefineClass(env, "Statement",
17
20
  {InstanceMethod("run", &Statement::Run), InstanceMethod("all", &Statement::All),
18
- InstanceMethod("each", &Statement::Each), InstanceMethod("finalize", &Statement::Finish),
19
- InstanceMethod("stream", &Statement::Stream)});
21
+ InstanceMethod("arrowIPCAll", &Statement::ArrowIPCAll), InstanceMethod("each", &Statement::Each),
22
+ InstanceMethod("finalize", &Statement::Finish), InstanceMethod("stream", &Statement::Stream)});
20
23
 
21
24
  constructor = Napi::Persistent(t);
22
25
  constructor.SuppressDestruct();
@@ -225,8 +228,7 @@ static Napi::Value convert_col_val(Napi::Env &env, duckdb::Value dval, duckdb::L
225
228
  value = object_value;
226
229
  } break;
227
230
  default:
228
- Napi::Error::New(env, "Data type is not supported " + dval.type().ToString()).ThrowAsJavaScriptException();
229
- return env.Null();
231
+ value = Napi::String::New(env, dval.ToString());
230
232
  }
231
233
 
232
234
  return value;
@@ -255,7 +257,7 @@ static Napi::Value convert_chunk(Napi::Env &env, std::vector<std::string> names,
255
257
  return scope.Escape(result);
256
258
  }
257
259
 
258
- enum RunType { RUN, EACH, ALL };
260
+ enum RunType { RUN, EACH, ALL, ARROW_ALL };
259
261
 
260
262
  struct StatementParam {
261
263
  std::vector<duckdb::Value> params;
@@ -275,7 +277,8 @@ struct RunPreparedTask : public Task {
275
277
  return;
276
278
  }
277
279
 
278
- result = statement.statement->Execute(params->params, run_type != RunType::ALL);
280
+ result =
281
+ statement.statement->Execute(params->params, run_type != RunType::ALL && run_type != RunType::ARROW_ALL);
279
282
  }
280
283
 
281
284
  void Callback() override {
@@ -350,6 +353,62 @@ struct RunPreparedTask : public Task {
350
353
 
351
354
  cb.MakeCallback(statement.Value(), {env.Null(), result_arr});
352
355
  } break;
356
+ case RunType::ARROW_ALL: {
357
+ auto materialized_result = (duckdb::MaterializedQueryResult *)result.get();
358
+ // +1 is for null bytes at end of stream
359
+ Napi::Array result_arr(Napi::Array::New(env, materialized_result->RowCount() + 1));
360
+
361
+ auto deleter = [](Napi::Env, void *finalizeData, void *hint) {
362
+ delete static_cast<std::shared_ptr<duckdb::QueryResult> *>(hint);
363
+ };
364
+
365
+ std::shared_ptr<duckdb::QueryResult> result_ptr = move(result);
366
+
367
+ duckdb::idx_t out_idx = 1;
368
+ while (true) {
369
+ auto chunk = result_ptr->Fetch();
370
+
371
+ if (!chunk || chunk->size() == 0) {
372
+ break;
373
+ }
374
+
375
+ D_ASSERT(chunk->ColumnCount() == 2);
376
+ D_ASSERT(chunk->data[0].GetType() == duckdb::LogicalType::BLOB);
377
+ D_ASSERT(chunk->data[1].GetType() == duckdb::LogicalType::BOOLEAN);
378
+
379
+ for (duckdb::idx_t row_idx = 0; row_idx < chunk->size(); row_idx++) {
380
+ duckdb::string_t blob = ((duckdb::string_t *)(chunk->data[0].GetData()))[row_idx];
381
+ bool is_header = chunk->data[1].GetData()[row_idx];
382
+
383
+ // Create shared pointer to give (shared) ownership to ArrayBuffer, not that for these materialized
384
+ // query results, the string data is owned by the QueryResult
385
+ auto result_ref_ptr = new std::shared_ptr<duckdb::QueryResult>(result_ptr);
386
+
387
+ auto array_buffer = Napi::ArrayBuffer::New(env, (void *)blob.GetDataUnsafe(), blob.GetSize(),
388
+ deleter, result_ref_ptr);
389
+
390
+ auto typed_array = Napi::Uint8Array::New(env, blob.GetSize(), array_buffer, 0);
391
+
392
+ // TODO we should handle this in duckdb probably
393
+ if (is_header) {
394
+ result_arr.Set((uint32_t)0, typed_array);
395
+ } else {
396
+ D_ASSERT(out_idx < materialized_result->RowCount());
397
+ result_arr.Set(out_idx++, typed_array);
398
+ }
399
+ }
400
+ }
401
+
402
+ // TODO we should handle this in duckdb probably
403
+ auto null_arr = Napi::Uint8Array::New(env, 4);
404
+ memset(null_arr.Data(), '\0', 4);
405
+ result_arr.Set(out_idx++, null_arr);
406
+
407
+ // Confirm all rows are set
408
+ D_ASSERT(out_idx == materialized_result->RowCount() + 1);
409
+
410
+ cb.MakeCallback(statement.Value(), {env.Null(), result_arr});
411
+ } break;
353
412
  }
354
413
  }
355
414
  std::unique_ptr<duckdb::QueryResult> result;
@@ -424,6 +483,12 @@ Napi::Value Statement::All(const Napi::CallbackInfo &info) {
424
483
  return info.This();
425
484
  }
426
485
 
486
+ Napi::Value Statement::ArrowIPCAll(const Napi::CallbackInfo &info) {
487
+ connection_ref->database_ref->Schedule(
488
+ info.Env(), duckdb::make_unique<RunPreparedTask>(*this, HandleArgs(info), RunType::ARROW_ALL));
489
+ return info.This();
490
+ }
491
+
427
492
  Napi::Value Statement::Run(const Napi::CallbackInfo &info) {
428
493
  connection_ref->database_ref->Schedule(info.Env(),
429
494
  duckdb::make_unique<RunPreparedTask>(*this, HandleArgs(info), RunType::RUN));
@@ -472,7 +537,9 @@ Napi::FunctionReference QueryResult::constructor;
472
537
  Napi::Object QueryResult::Init(Napi::Env env, Napi::Object exports) {
473
538
  Napi::HandleScope scope(env);
474
539
 
475
- Napi::Function t = DefineClass(env, "QueryResult", {InstanceMethod("nextChunk", &QueryResult::NextChunk)});
540
+ Napi::Function t = DefineClass(env, "QueryResult",
541
+ {InstanceMethod("nextChunk", &QueryResult::NextChunk),
542
+ InstanceMethod("nextIpcBuffer", &QueryResult::NextIpcBuffer)});
476
543
 
477
544
  constructor = Napi::Persistent(t);
478
545
  constructor.SuppressDestruct();
@@ -522,6 +589,48 @@ struct GetChunkTask : public Task {
522
589
  std::unique_ptr<duckdb::DataChunk> chunk;
523
590
  };
524
591
 
592
+ struct GetNextArrowIpcTask : public Task {
593
+ GetNextArrowIpcTask(QueryResult &query_result, Napi::Promise::Deferred deferred)
594
+ : Task(query_result), deferred(deferred) {
595
+ }
596
+
597
+ void DoWork() override {
598
+ auto &query_result = Get<QueryResult>();
599
+ chunk = query_result.result->Fetch();
600
+ }
601
+
602
+ void DoCallback() override {
603
+ auto &query_result = Get<QueryResult>();
604
+ Napi::Env env = query_result.Env();
605
+ Napi::HandleScope scope(env);
606
+
607
+ if (chunk == nullptr || chunk->size() == 0) {
608
+ deferred.Resolve(env.Null());
609
+ return;
610
+ }
611
+
612
+ // Arrow IPC streams should be a single column of a single blob
613
+ D_ASSERT(chunk->size() == 1 && chunk->ColumnCount() == 2);
614
+ D_ASSERT(chunk->data[0].GetType() == duckdb::LogicalType::BLOB);
615
+
616
+ duckdb::string_t blob = *(duckdb::string_t *)(chunk->data[0].GetData());
617
+
618
+ // Transfer ownership and Construct ArrayBuffer
619
+ auto data_chunk_ptr = new std::unique_ptr<duckdb::DataChunk>();
620
+ *data_chunk_ptr = std::move(chunk);
621
+ auto deleter = [](Napi::Env, void *finalizeData, void *hint) {
622
+ delete static_cast<std::unique_ptr<duckdb::DataChunk> *>(hint);
623
+ };
624
+ auto array_buffer =
625
+ Napi::ArrayBuffer::New(env, (void *)blob.GetDataUnsafe(), blob.GetSize(), deleter, data_chunk_ptr);
626
+
627
+ deferred.Resolve(array_buffer);
628
+ }
629
+
630
+ Napi::Promise::Deferred deferred;
631
+ std::unique_ptr<duckdb::DataChunk> chunk;
632
+ };
633
+
525
634
  Napi::Value QueryResult::NextChunk(const Napi::CallbackInfo &info) {
526
635
  auto env = info.Env();
527
636
  auto deferred = Napi::Promise::Deferred::New(env);
@@ -530,4 +639,12 @@ Napi::Value QueryResult::NextChunk(const Napi::CallbackInfo &info) {
530
639
  return deferred.Promise();
531
640
  }
532
641
 
642
+ // Should only be called on an arrow ipc query
643
+ Napi::Value QueryResult::NextIpcBuffer(const Napi::CallbackInfo &info) {
644
+ auto env = info.Env();
645
+ auto deferred = Napi::Promise::Deferred::New(env);
646
+ database_ref->Schedule(env, duckdb::make_unique<GetNextArrowIpcTask>(*this, deferred));
647
+ return deferred.Promise();
648
+ }
649
+
533
650
  } // namespace node_duckdb
@@ -188,4 +188,10 @@ describe("data type support", function () {
188
188
  done();
189
189
  });
190
190
  });
191
+ it("converts unsupported data types to strings", function(done) {
192
+ db.all("SELECT CAST('11:10:10' AS TIME) as time", function(err, rows) {
193
+ assert.equal(rows[0].time, '11:10:10');
194
+ done();
195
+ });
196
+ });
191
197
  });
@@ -150,13 +150,13 @@ describe('open/close', function() {
150
150
  });
151
151
  });
152
152
 
153
- describe('closing with unfinalized statements', function(done) {
153
+ describe('closing with unfinalized statements', function() {
154
154
  var completed = false;
155
155
  var completedSecond = false;
156
156
  var closed = false;
157
157
 
158
158
  var db;
159
- before(function() {
159
+ before(function(done) {
160
160
  db = new sqlite3.Database(':memory:', done);
161
161
  });
162
162
 
@@ -24,17 +24,19 @@ function createdb(callback) {
24
24
  if (callback) callback();
25
25
  } else {
26
26
  console.log("Creating test database... This may take several minutes.");
27
- var db = new sqlite3.Database(db_path);
28
- db.serialize(function() {
29
- db.run("CREATE TABLE foo (id INT, txt TEXT)");
30
- db.run("BEGIN TRANSACTION");
31
- var stmt = db.prepare("INSERT INTO foo VALUES(?, ?)");
32
- for (var i = 0; i < count; i++) {
33
- stmt.run(i, randomString());
34
- }
35
- stmt.finalize();
36
- db.run("COMMIT TRANSACTION", function () {
37
- db.close(callback);
27
+ var db = new sqlite3.Database(db_path, () => {
28
+ db.serialize(function() {
29
+ db.run("CREATE TABLE foo (id INT, txt TEXT)");
30
+ db.run("BEGIN TRANSACTION");
31
+ var stmt = db.prepare("INSERT INTO foo VALUES(?, ?)");
32
+ for (var i = 0; i < count; i++) {
33
+ stmt.run(i, randomString());
34
+ }
35
+
36
+ stmt.finalize();
37
+ db.run("COMMIT TRANSACTION", function () {
38
+ db.close(callback);
39
+ });
38
40
  });
39
41
  });
40
42
  }
@@ -0,0 +1,209 @@
1
+ import * as duckdb from "..";
2
+ import assert from "assert";
3
+ import fs from "fs";
4
+
5
+ describe("TypeScript declarataions", function () {
6
+ var db: duckdb.Database;
7
+ before(function (done) {
8
+ db = new duckdb.Database(":memory:", done);
9
+ });
10
+
11
+ it("typescript: Database constructor no callback", (done) => {
12
+ const tdb0 = new duckdb.Database(":memory:"); // no callback argument
13
+ done();
14
+ });
15
+
16
+ it("typescript: Database constructor path error", (done) => {
17
+ const tdb0 = new duckdb.Database("./bogusPath.db", (err, res) => {
18
+ // Issue: I'm a little surprised that specifying an invalid file path
19
+ // doesn't seem to immediately signal an error here, but it doesn't.
20
+ tdb0.all(
21
+ "PRAGMA show_tables",
22
+ (err: duckdb.DuckDbError | null, res: any) => {
23
+ done();
24
+ }
25
+ );
26
+ });
27
+ });
28
+
29
+ it("typescript: query with error", (done) => {
30
+ // query with an error:
31
+ db.all(
32
+ "SELECT * FROM sometable",
33
+ (err: duckdb.DuckDbError | null, res: any) => {
34
+ assert.equal(err?.code, "DUCKDB_NODEJS_ERROR");
35
+ assert.equal(err?.errno, -1);
36
+ done();
37
+ }
38
+ );
39
+ });
40
+
41
+ it("typescript: Database#exec", function (done) {
42
+ var sql = fs.readFileSync("test/support/script.sql", "utf8");
43
+ db.exec(sql, function (err: duckdb.DuckDbError | null) {
44
+ if (err) throw err;
45
+ done();
46
+ });
47
+ });
48
+
49
+ it("typescript: retrieve database structure", function (done) {
50
+ db.all(
51
+ "SELECT type, name FROM sqlite_master ORDER BY type, name",
52
+ function (err: duckdb.DuckDbError | null, rows: duckdb.TableData) {
53
+ if (err) throw err;
54
+ assert.deepEqual(rows, [
55
+ { type: "table", name: "grid_key" },
56
+ { type: "table", name: "grid_utfgrid" },
57
+ { type: "table", name: "images" },
58
+ { type: "table", name: "keymap" },
59
+ { type: "table", name: "map" },
60
+ { type: "table", name: "metadata" },
61
+ { type: "view", name: "grid_data" },
62
+ { type: "view", name: "grids" },
63
+ { type: "view", name: "tiles" },
64
+ ]);
65
+ done();
66
+ }
67
+ );
68
+ });
69
+
70
+ it("typescript: database#all with no callback", (done) => {
71
+ db.all("select 42 as x");
72
+ done();
73
+ });
74
+
75
+ it("typescript: database#connect", (done) => {
76
+ const conn = db.connect();
77
+ assert(conn instanceof duckdb.Connection);
78
+ done();
79
+ });
80
+
81
+ it("typescript: ensure empty results work ok", (done) => {
82
+ db.all(
83
+ "create table test_table as select 42 as x",
84
+ (err: duckdb.DuckDbError | null, res: duckdb.TableData) => {
85
+ db.all(
86
+ "drop table test_table",
87
+ (err: duckdb.DuckDbError | null, res: duckdb.TableData) => {
88
+ console.log("drop table results: ", err, res);
89
+ assert.deepEqual(res, []);
90
+ done();
91
+ }
92
+ );
93
+ }
94
+ );
95
+ });
96
+
97
+ it("typescript: ternary int udf", function (done) {
98
+ db.register_udf(
99
+ "udf",
100
+ "integer",
101
+ (x: number, y: number, z: number) => x + y + z
102
+ );
103
+ db.all(
104
+ "select udf(21, 20, 1) v",
105
+ function (err: duckdb.DuckDbError | null, rows: duckdb.TableData) {
106
+ if (err) throw err;
107
+ assert.equal(rows[0].v, 42);
108
+ }
109
+ );
110
+ db.unregister_udf("udf", done);
111
+ });
112
+ it("typescript: retrieve 100,000 rows with Statement#each", function (done) {
113
+ var total = 100000;
114
+ var retrieved = 0;
115
+
116
+ db.each(
117
+ "SELECT * FROM range(0, ?)",
118
+ total,
119
+ function (err: duckdb.DuckDbError | null, row: any) {
120
+ if (err) throw err;
121
+ retrieved++;
122
+
123
+ if (retrieved === total) {
124
+ assert.equal(
125
+ retrieved,
126
+ total,
127
+ "Only retrieved " + retrieved + " out of " + total + " rows."
128
+ );
129
+ done();
130
+ }
131
+ }
132
+ );
133
+ });
134
+ });
135
+
136
+ describe("typescript: simple prepared statement", function () {
137
+ var db: duckdb.Database;
138
+ before(function (done) {
139
+ db = new duckdb.Database(":memory:", done);
140
+ });
141
+
142
+ it("should prepare, run and finalize the statement", function (done) {
143
+ db.prepare("CREATE TABLE foo (bar text)").run().finalize(done);
144
+ });
145
+
146
+ after(function (done) {
147
+ db.close(done);
148
+ });
149
+ });
150
+
151
+ describe("typescript: prepared statements", function () {
152
+ var db: duckdb.Database;
153
+ before(function (done) {
154
+ db = new duckdb.Database(":memory:", done);
155
+ });
156
+
157
+ var inserted = 0;
158
+ var retrieved = 0;
159
+
160
+ // We insert and retrieve that many rows.
161
+ var count = 1000;
162
+
163
+ it("typescript: should create the table", function (done) {
164
+ db.prepare("CREATE TABLE foo (txt text, num int, flt double, blb blob)")
165
+ .run()
166
+ .finalize(done);
167
+ });
168
+
169
+ it("typescript: should insert " + count + " rows", function (done) {
170
+ for (var i = 0; i < count; i++) {
171
+ db.prepare("INSERT INTO foo VALUES(?, ?, ?, ?)")
172
+ .run(
173
+ "String " + i,
174
+ i,
175
+ i * Math.PI,
176
+ null,
177
+ function (err: duckdb.DuckDbError | null) {
178
+ if (err) throw err;
179
+ inserted++;
180
+ }
181
+ )
182
+ .finalize(function (err) {
183
+ if (err) throw err;
184
+ if (inserted == count) done();
185
+ });
186
+ }
187
+ });
188
+ });
189
+
190
+ describe("typescript: stream and QueryResult", function () {
191
+ const total = 1000;
192
+
193
+ let db: duckdb.Database;
194
+ let conn: duckdb.Connection;
195
+ before((done) => {
196
+ db = new duckdb.Database(":memory:", () => {
197
+ conn = new duckdb.Connection(db, done);
198
+ });
199
+ });
200
+
201
+ it("streams results", async () => {
202
+ let retrieved = 0;
203
+ const stream = conn.stream("SELECT * FROM range(0, ?)", total);
204
+ for await (const row of stream) {
205
+ retrieved++;
206
+ }
207
+ assert.equal(total, retrieved);
208
+ });
209
+ });