duckdb 0.4.1-dev11.0 → 0.4.1-dev1101.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,185 @@
1
+ #include "duckdb.hpp"
2
+ #include "duckdb_node.hpp"
3
+ #include "napi.h"
4
+
5
+ #include <thread>
6
+
7
+ namespace node_duckdb {
8
+
9
+ Napi::Array EncodeDataChunk(Napi::Env env, duckdb::DataChunk &chunk, bool with_types, bool with_data) {
10
+ Napi::Array col_descs(Napi::Array::New(env, chunk.ColumnCount()));
11
+ for (idx_t col_idx = 0; col_idx < chunk.ColumnCount(); col_idx++) {
12
+ auto col_desc = Napi::Object::New(env);
13
+
14
+ // Make sure we only have flat vectors hereafter (for now)
15
+ auto &chunk_vec = chunk.data[col_idx];
16
+ if (with_data) {
17
+ chunk_vec.Flatten(chunk.size());
18
+ }
19
+
20
+ // Do a post-order DFS traversal
21
+ std::vector<std::tuple<bool, duckdb::Vector *, Napi::Object, size_t, size_t>> pending;
22
+ pending.emplace_back(false, &chunk_vec, Napi::Object::New(env), 0, 0);
23
+
24
+ while (!pending.empty()) {
25
+ // Unpack DFS node
26
+ auto &back = pending.back();
27
+ auto &visited = std::get<0>(back);
28
+ auto &vec = std::get<1>(back);
29
+ auto &desc = std::get<2>(back);
30
+ auto &parent_idx = std::get<3>(back);
31
+ auto &idx_in_parent = std::get<4>(back);
32
+
33
+ // Already visited?
34
+ if (visited) {
35
+ if (pending.size() == 1) {
36
+ col_desc = desc;
37
+ break;
38
+ }
39
+ std::get<2>(pending[parent_idx]).Get("children").As<Napi::Array>().Set(idx_in_parent, desc);
40
+ pending.pop_back();
41
+ continue;
42
+ }
43
+ visited = true;
44
+ auto current_idx = pending.size() - 1;
45
+
46
+ // Store types
47
+ auto &vec_type = vec->GetType();
48
+ if (with_types) {
49
+ desc.Set("sqlType", vec_type.ToString());
50
+ desc.Set("physicalType", TypeIdToString(vec_type.InternalType()));
51
+ }
52
+
53
+ // Create validity vector
54
+ if (with_data) {
55
+ vec->Flatten(chunk.size());
56
+ auto &validity = duckdb::FlatVector::Validity(*vec);
57
+ auto validity_buffer = Napi::Uint8Array::New(env, chunk.size());
58
+ for (idx_t row_idx = 0; row_idx < chunk.size(); row_idx++) {
59
+ validity_buffer[row_idx] = validity.RowIsValid(row_idx);
60
+ }
61
+ desc.Set("validity", validity_buffer);
62
+ }
63
+
64
+ // Create data buffer
65
+ switch (vec_type.id()) {
66
+ case duckdb::LogicalTypeId::TINYINT: {
67
+ if (with_data) {
68
+ auto array = Napi::Int8Array::New(env, chunk.size());
69
+ auto data = duckdb::FlatVector::GetData<int8_t>(*vec);
70
+ for (size_t i = 0; i < chunk.size(); ++i) {
71
+ array[i] = data[i];
72
+ }
73
+ desc.Set("data", array);
74
+ }
75
+ break;
76
+ }
77
+ case duckdb::LogicalTypeId::SMALLINT: {
78
+ if (with_data) {
79
+ auto array = Napi::Int16Array::New(env, chunk.size());
80
+ auto data = duckdb::FlatVector::GetData<int16_t>(*vec);
81
+ for (size_t i = 0; i < chunk.size(); ++i) {
82
+ array[i] = data[i];
83
+ }
84
+ desc.Set("data", array);
85
+ }
86
+ break;
87
+ }
88
+ case duckdb::LogicalTypeId::INTEGER: {
89
+ if (with_data) {
90
+ auto array = Napi::Int32Array::New(env, chunk.size());
91
+ auto data = duckdb::FlatVector::GetData<int32_t>(*vec);
92
+ for (size_t i = 0; i < chunk.size(); ++i) {
93
+ array[i] = data[i];
94
+ }
95
+ desc.Set("data", array);
96
+ }
97
+ break;
98
+ }
99
+ case duckdb::LogicalTypeId::DOUBLE: {
100
+ if (with_data) {
101
+ auto array = Napi::Float64Array::New(env, chunk.size());
102
+ auto data = duckdb::FlatVector::GetData<double>(*vec);
103
+ for (size_t i = 0; i < chunk.size(); ++i) {
104
+ array[i] = data[i];
105
+ }
106
+ desc.Set("data", array);
107
+ }
108
+ break;
109
+ }
110
+ case duckdb::LogicalTypeId::BIGINT:
111
+ case duckdb::LogicalTypeId::TIME:
112
+ case duckdb::LogicalTypeId::TIME_TZ:
113
+ case duckdb::LogicalTypeId::TIMESTAMP_MS:
114
+ case duckdb::LogicalTypeId::TIMESTAMP_NS:
115
+ case duckdb::LogicalTypeId::TIMESTAMP_SEC:
116
+ case duckdb::LogicalTypeId::TIMESTAMP: {
117
+ if (with_data) {
118
+ #if NAPI_VERSION > 5
119
+ auto array = Napi::BigInt64Array::New(env, chunk.size());
120
+ auto data = duckdb::FlatVector::GetData<int64_t>(*vec);
121
+ #else
122
+ auto array = Napi::Float64Array::New(env, chunk.size());
123
+ auto data = duckdb::FlatVector::GetData<int64_t>(*vec);
124
+ #endif
125
+ for (size_t i = 0; i < chunk.size(); ++i) {
126
+ array[i] = data[i];
127
+ }
128
+ desc.Set("data", array);
129
+ }
130
+ break;
131
+ }
132
+ case duckdb::LogicalTypeId::UBIGINT: {
133
+ if (with_data) {
134
+ #if NAPI_VERSION > 5
135
+ auto array = Napi::BigUint64Array::New(env, chunk.size());
136
+ auto data = duckdb::FlatVector::GetData<uint64_t>(*vec);
137
+ #else
138
+ auto array = Napi::Float64Array::New(env, chunk.size());
139
+ auto data = duckdb::FlatVector::GetData<int64_t>(*vec);
140
+ #endif
141
+ for (size_t i = 0; i < chunk.size(); ++i) {
142
+ array[i] = data[i];
143
+ }
144
+ desc.Set("data", array);
145
+ }
146
+ break;
147
+ }
148
+ case duckdb::LogicalTypeId::BLOB:
149
+ case duckdb::LogicalTypeId::VARCHAR: {
150
+ if (with_data) {
151
+ auto array = Napi::Array::New(env, chunk.size());
152
+ auto data = duckdb::FlatVector::GetData<duckdb::string_t>(*vec);
153
+ for (size_t i = 0; i < chunk.size(); ++i) {
154
+ array.Set(i, data[i].GetString());
155
+ }
156
+ desc.Set("data", array);
157
+ }
158
+ break;
159
+ }
160
+ case duckdb::LogicalTypeId::STRUCT: {
161
+ auto child_count = duckdb::StructType::GetChildCount(vec_type);
162
+ auto &entries = duckdb::StructVector::GetEntries(*vec);
163
+ desc.Set("children", Napi::Array::New(env, child_count));
164
+ for (size_t i = 0; i < child_count; ++i) {
165
+ auto c = child_count - 1 - i;
166
+ auto &entry = entries[c];
167
+ auto desc = Napi::Object::New(env);
168
+ auto name = duckdb::StructType::GetChildName(vec_type, c);
169
+ desc.Set("name", name);
170
+ pending.emplace_back(false, entry.get(), desc, current_idx, i);
171
+ }
172
+ break;
173
+ }
174
+ default:
175
+ Napi::TypeError::New(env, "Unsupported UDF argument type " + vec->GetType().ToString())
176
+ .ThrowAsJavaScriptException();
177
+ break;
178
+ }
179
+ }
180
+ col_descs.Set(col_idx, col_desc);
181
+ }
182
+ return col_descs;
183
+ }
184
+
185
+ } // namespace node_duckdb
package/src/database.cpp CHANGED
@@ -1,4 +1,5 @@
1
1
  #include "duckdb_node.hpp"
2
+ #include "napi.h"
2
3
  #include "parquet-amalgamation.hpp"
3
4
 
4
5
  namespace node_duckdb {
@@ -22,17 +23,39 @@ Napi::Object Database::Init(Napi::Env env, Napi::Object exports) {
22
23
  }
23
24
 
24
25
  struct OpenTask : public Task {
25
- OpenTask(Database &database_, std::string filename_, bool read_only_, Napi::Function callback_)
26
- : Task(database_, callback_), filename(filename_), read_only(read_only_) {
26
+ OpenTask(Database &database_, std::string filename_, duckdb::AccessMode access_mode_, Napi::Object config_,
27
+ Napi::Function callback_)
28
+ : Task(database_, callback_), filename(filename_) {
29
+
30
+ duckdb_config.options.access_mode = access_mode_;
31
+ Napi::Env env = database_.Env();
32
+ Napi::HandleScope scope(env);
33
+
34
+ if (!config_.IsUndefined()) {
35
+ const Napi::Array config_names = config_.GetPropertyNames();
36
+
37
+ for (duckdb::idx_t config_idx = 0; config_idx < config_names.Length(); config_idx++) {
38
+ std::string key = config_names.Get(config_idx).As<Napi::String>();
39
+ std::string val = config_.Get(key).As<Napi::String>();
40
+ auto config_property = duckdb::DBConfig::GetOptionByName(key);
41
+ if (!config_property) {
42
+ Napi::TypeError::New(env, "Unrecognized configuration property" + key).ThrowAsJavaScriptException();
43
+ return;
44
+ }
45
+ try {
46
+ duckdb_config.SetOption(*config_property, duckdb::Value(val));
47
+ } catch (std::exception &e) {
48
+ Napi::TypeError::New(env, "Failed to set configuration option " + key + ": " + e.what())
49
+ .ThrowAsJavaScriptException();
50
+ return;
51
+ }
52
+ }
53
+ }
27
54
  }
28
55
 
29
56
  void DoWork() override {
30
57
  try {
31
- duckdb::DBConfig config;
32
- if (read_only) {
33
- config.access_mode = duckdb::AccessMode::READ_ONLY;
34
- }
35
- Get<Database>().database = duckdb::make_unique<duckdb::DuckDB>(filename, &config);
58
+ Get<Database>().database = duckdb::make_unique<duckdb::DuckDB>(filename, &duckdb_config);
36
59
  duckdb::ParquetExtension extension;
37
60
  extension.Load(*Get<Database>().database);
38
61
  success = true;
@@ -59,22 +82,35 @@ struct OpenTask : public Task {
59
82
  }
60
83
 
61
84
  std::string filename;
62
- bool read_only = false;
85
+ duckdb::DBConfig duckdb_config;
63
86
  std::string error = "";
64
87
  bool success = false;
65
88
  };
66
89
 
67
- Database::Database(const Napi::CallbackInfo &info) : Napi::ObjectWrap<Database>(info), task_inflight(false) {
90
+ Database::Database(const Napi::CallbackInfo &info)
91
+ : Napi::ObjectWrap<Database>(info), task_inflight(false), env(info.Env()) {
68
92
  auto env = info.Env();
93
+
69
94
  if (info.Length() < 1 || !info[0].IsString()) {
70
95
  Napi::TypeError::New(env, "Database location expected").ThrowAsJavaScriptException();
71
96
  return;
72
97
  }
73
98
  std::string filename = info[0].As<Napi::String>();
74
99
  unsigned int pos = 1;
100
+
101
+ duckdb::AccessMode access_mode = duckdb::AccessMode::AUTOMATIC;
102
+
75
103
  int mode = 0;
76
104
  if (info.Length() >= pos && info[pos].IsNumber() && Utils::OtherIsInt(info[pos].As<Napi::Number>())) {
77
105
  mode = info[pos++].As<Napi::Number>().Int32Value();
106
+ if (mode == DUCKDB_NODEJS_READONLY) {
107
+ access_mode = duckdb::AccessMode::READ_ONLY;
108
+ }
109
+ }
110
+
111
+ Napi::Object config;
112
+ if (info.Length() >= pos && info[pos].IsObject() && !info[pos].IsFunction()) {
113
+ config = info[pos++].As<Napi::Object>();
78
114
  }
79
115
 
80
116
  Napi::Function callback;
@@ -82,7 +118,11 @@ Database::Database(const Napi::CallbackInfo &info) : Napi::ObjectWrap<Database>(
82
118
  callback = info[pos++].As<Napi::Function>();
83
119
  }
84
120
 
85
- Schedule(env, duckdb::make_unique<OpenTask>(*this, filename, mode == DUCKDB_NODEJS_READONLY, callback));
121
+ Schedule(env, duckdb::make_unique<OpenTask>(*this, filename, access_mode, config, callback));
122
+ }
123
+
124
+ Database::~Database() {
125
+ Napi::MemoryManagement::AdjustExternalMemory(env, -bytes_allocated);
86
126
  }
87
127
 
88
128
  void Database::Schedule(Napi::Env env, std::unique_ptr<Task> task) {
@@ -93,17 +133,15 @@ void Database::Schedule(Napi::Env env, std::unique_ptr<Task> task) {
93
133
  Process(env);
94
134
  }
95
135
 
96
- static void task_execute(napi_env e, void *data) {
136
+ static void TaskExecuteCallback(napi_env e, void *data) {
97
137
  auto holder = (TaskHolder *)data;
98
138
  holder->task->DoWork();
99
139
  }
100
140
 
101
- static void task_complete(napi_env e, napi_status status, void *data) {
141
+ static void TaskCompleteCallback(napi_env e, napi_status status, void *data) {
102
142
  std::unique_ptr<TaskHolder> holder((TaskHolder *)data);
103
143
  holder->db->TaskComplete(e);
104
- if (holder->task->callback.Value().IsFunction()) {
105
- holder->task->Callback();
106
- }
144
+ holder->task->DoCallback();
107
145
  }
108
146
 
109
147
  void Database::TaskComplete(Napi::Env env) {
@@ -112,6 +150,16 @@ void Database::TaskComplete(Napi::Env env) {
112
150
  task_inflight = false;
113
151
  }
114
152
  Process(env);
153
+
154
+ if (database) {
155
+ // Bookkeeping: tell node (and the node GC in particular) how much
156
+ // memory we're using, such that it can make better decisions on when to
157
+ // trigger collections.
158
+ auto &buffer_manager = duckdb::BufferManager::GetBufferManager(*database->instance);
159
+ auto current_bytes = buffer_manager.GetUsedMemory();
160
+ Napi::MemoryManagement::AdjustExternalMemory(env, current_bytes - bytes_allocated);
161
+ bytes_allocated = current_bytes;
162
+ }
115
163
  }
116
164
 
117
165
  void Database::Process(Napi::Env env) {
@@ -131,8 +179,8 @@ void Database::Process(Napi::Env env) {
131
179
  holder->task = move(task);
132
180
  holder->db = this;
133
181
 
134
- napi_create_async_work(env, NULL, Napi::String::New(env, "duckdb.Database.Task"), task_execute, task_complete,
135
- holder, &holder->request);
182
+ napi_create_async_work(env, nullptr, Napi::String::New(env, "duckdb.Database.Task"), TaskExecuteCallback,
183
+ TaskCompleteCallback, holder, &holder->request);
136
184
 
137
185
  napi_queue_async_work(env, holder->request);
138
186
  }
@@ -157,7 +205,7 @@ Napi::Value Database::Serialize(const Napi::CallbackInfo &info) {
157
205
  }
158
206
 
159
207
  struct WaitTask : public Task {
160
- WaitTask(Database &database_, Napi::Function callback_) : Task(database_, callback_) {
208
+ WaitTask(Database &database, Napi::Function callback) : Task(database, callback) {
161
209
  }
162
210
 
163
211
  void DoWork() override {
@@ -171,7 +219,7 @@ Napi::Value Database::Wait(const Napi::CallbackInfo &info) {
171
219
  }
172
220
 
173
221
  struct CloseTask : public Task {
174
- CloseTask(Database &database_, Napi::Function callback_) : Task(database_, callback_) {
222
+ CloseTask(Database &database, Napi::Function callback) : Task(database, callback) {
175
223
  }
176
224
 
177
225
  void DoWork() override {