duckdb 0.8.2-dev2283.0 → 0.8.2-dev2356.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -0
  4. package/src/duckdb/src/common/adbc/adbc.cpp +8 -3
  5. package/src/duckdb/src/common/arrow/arrow_appender.cpp +4 -4
  6. package/src/duckdb/src/common/arrow/arrow_converter.cpp +27 -26
  7. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +37 -43
  8. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  9. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +3 -3
  10. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +1 -1
  11. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +4 -3
  12. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +3 -3
  13. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +5 -3
  14. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +4 -0
  15. package/src/duckdb/src/include/duckdb/common/stack_checker.hpp +34 -0
  16. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +1 -2
  17. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +29 -0
  18. package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +45 -0
  19. package/src/duckdb/src/include/duckdb/main/client_config.hpp +0 -2
  20. package/src/duckdb/src/include/duckdb/main/client_context.hpp +1 -0
  21. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +25 -0
  22. package/src/duckdb/src/include/duckdb/main/config.hpp +1 -1
  23. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +3 -0
  24. package/src/duckdb/src/include/duckdb/main/query_result.hpp +2 -27
  25. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +3 -15
  26. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +13 -1
  27. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +1 -1
  28. package/src/duckdb/src/main/capi/arrow-c.cpp +1 -7
  29. package/src/duckdb/src/main/chunk_scan_state/query_result.cpp +53 -0
  30. package/src/duckdb/src/main/chunk_scan_state.cpp +42 -0
  31. package/src/duckdb/src/main/client_context.cpp +15 -2
  32. package/src/duckdb/src/main/database.cpp +0 -9
  33. package/src/duckdb/src/main/query_result.cpp +0 -21
  34. package/src/duckdb/src/parser/transformer.cpp +2 -16
  35. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +5 -3
  36. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +0 -28
  37. package/src/duckdb/src/planner/expression_binder.cpp +20 -0
  38. package/src/duckdb/ub_src_main.cpp +2 -0
  39. package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
  40. package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +0 -25
package/binding.gyp CHANGED
@@ -93,6 +93,7 @@
93
93
  "src/duckdb/ub_src_main.cpp",
94
94
  "src/duckdb/ub_src_main_capi.cpp",
95
95
  "src/duckdb/ub_src_main_capi_cast.cpp",
96
+ "src/duckdb/ub_src_main_chunk_scan_state.cpp",
96
97
  "src/duckdb/ub_src_main_extension.cpp",
97
98
  "src/duckdb/ub_src_main_relation.cpp",
98
99
  "src/duckdb/ub_src_main_settings.cpp",
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev2283.0",
5
+ "version": "0.8.2-dev2356.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -297,6 +297,7 @@ void ParquetWriter::PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGro
297
297
  // set up a new row group for this chunk collection
298
298
  auto &row_group = result.row_group;
299
299
  row_group.num_rows = buffer.Count();
300
+ row_group.total_byte_size = buffer.SizeInBytes();
300
301
  row_group.__isset.file_offset = true;
301
302
 
302
303
  auto &states = result.states;
@@ -202,14 +202,15 @@ AdbcStatusCode ConnectionGetTableSchema(struct AdbcConnection *connection, const
202
202
  SetError(error, "Connection is not set");
203
203
  return ADBC_STATUS_INVALID_ARGUMENT;
204
204
  }
205
+ if (db_schema == nullptr) {
206
+ // if schema is not set, we use the default schema
207
+ db_schema = "main";
208
+ }
205
209
  if (catalog != nullptr && strlen(catalog) > 0) {
206
210
  // In DuckDB this is the name of the database, not sure what's the expected functionality here, so for now,
207
211
  // scream.
208
212
  SetError(error, "Catalog Name is not used in DuckDB. It must be set to nullptr or an empty string");
209
213
  return ADBC_STATUS_NOT_IMPLEMENTED;
210
- } else if (db_schema == nullptr) {
211
- SetError(error, "AdbcConnectionGetTableSchema: must provide db_schema");
212
- return ADBC_STATUS_INVALID_ARGUMENT;
213
214
  } else if (table_name == nullptr) {
214
215
  SetError(error, "AdbcConnectionGetTableSchema: must provide table_name");
215
216
  return ADBC_STATUS_INVALID_ARGUMENT;
@@ -686,6 +687,10 @@ AdbcStatusCode QueryInternal(struct AdbcConnection *connection, struct ArrowArra
686
687
  AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth, const char *catalog,
687
688
  const char *db_schema, const char *table_name, const char **table_type,
688
689
  const char *column_name, struct ArrowArrayStream *out, struct AdbcError *error) {
690
+ if (depth != 0) {
691
+ SetError(error, "Depth parameter not yet supported");
692
+ return ADBC_STATUS_NOT_IMPLEMENTED;
693
+ }
689
694
  if (catalog != nullptr) {
690
695
  if (strcmp(catalog, "duckdb") == 0) {
691
696
  SetError(error, "catalog must be NULL or 'duckdb'");
@@ -14,7 +14,7 @@ namespace duckdb {
14
14
  // ArrowAppender
15
15
  //===--------------------------------------------------------------------===//
16
16
 
17
- ArrowAppender::ArrowAppender(vector<LogicalType> types_p, idx_t initial_capacity, ArrowOptions options)
17
+ ArrowAppender::ArrowAppender(vector<LogicalType> types_p, idx_t initial_capacity, ClientProperties options)
18
18
  : types(std::move(types_p)) {
19
19
  for (auto &type : types) {
20
20
  auto entry = ArrowAppender::InitializeChild(type, initial_capacity, options);
@@ -179,14 +179,14 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
179
179
  case LogicalTypeId::VARCHAR:
180
180
  case LogicalTypeId::BLOB:
181
181
  case LogicalTypeId::BIT:
182
- if (append_data.options.offset_size == ArrowOffsetSize::LARGE) {
182
+ if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
183
183
  InitializeAppenderForType<ArrowVarcharData<string_t>>(append_data);
184
184
  } else {
185
185
  InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter, uint32_t>>(append_data);
186
186
  }
187
187
  break;
188
188
  case LogicalTypeId::UUID:
189
- if (append_data.options.offset_size == ArrowOffsetSize::LARGE) {
189
+ if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
190
190
  InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter>>(append_data);
191
191
  } else {
192
192
  InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, uint32_t>>(append_data);
@@ -228,7 +228,7 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
228
228
  }
229
229
 
230
230
  unique_ptr<ArrowAppendData> ArrowAppender::InitializeChild(const LogicalType &type, idx_t capacity,
231
- ArrowOptions &options) {
231
+ ClientProperties &options) {
232
232
  auto result = make_uniq<ArrowAppendData>(options);
233
233
  InitializeFunctionPointers(*result, type);
234
234
 
@@ -15,12 +15,21 @@
15
15
 
16
16
  namespace duckdb {
17
17
 
18
- void ArrowConverter::ToArrowArray(DataChunk &input, ArrowArray *out_array, ArrowOptions options) {
18
+ void ArrowConverter::ToArrowArray(DataChunk &input, ArrowArray *out_array, ClientProperties options) {
19
19
  ArrowAppender appender(input.GetTypes(), input.size(), std::move(options));
20
20
  appender.Append(input, 0, input.size(), input.size());
21
21
  *out_array = appender.Finalize();
22
22
  }
23
23
 
24
+ unsafe_unique_array<char> AddName(const string &name) {
25
+ auto name_ptr = make_unsafe_uniq_array<char>(name.size() + 1);
26
+ for (size_t i = 0; i < name.size(); i++) {
27
+ name_ptr[i] = name[i];
28
+ }
29
+ name_ptr[name.size()] = '\0';
30
+ return name_ptr;
31
+ }
32
+
24
33
  //===--------------------------------------------------------------------===//
25
34
  // Arrow Schema
26
35
  //===--------------------------------------------------------------------===//
@@ -45,24 +54,26 @@ static void ReleaseDuckDBArrowSchema(ArrowSchema *schema) {
45
54
  delete holder;
46
55
  }
47
56
 
48
- void InitializeChild(ArrowSchema &child, const string &name = "") {
57
+ void InitializeChild(ArrowSchema &child, DuckDBArrowSchemaHolder &root_holder, const string &name = "") {
49
58
  //! Child is cleaned up by parent
50
59
  child.private_data = nullptr;
51
60
  child.release = ReleaseDuckDBArrowSchema;
52
61
 
53
62
  //! Store the child schema
54
63
  child.flags = ARROW_FLAG_NULLABLE;
55
- child.name = name.c_str();
64
+ root_holder.owned_type_names.push_back(AddName(name));
65
+
66
+ child.name = root_holder.owned_type_names.back().get();
56
67
  child.n_children = 0;
57
68
  child.children = nullptr;
58
69
  child.metadata = nullptr;
59
70
  child.dictionary = nullptr;
60
71
  }
61
72
  void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
62
- const ArrowOptions &options);
73
+ const ClientProperties &options);
63
74
 
64
75
  void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
65
- const ArrowOptions &options) {
76
+ const ClientProperties &options) {
66
77
  child.format = "+m";
67
78
  //! Map has one child which is a struct
68
79
  child.n_children = 1;
@@ -70,23 +81,14 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
70
81
  root_holder.nested_children.back().resize(1);
71
82
  root_holder.nested_children_ptr.emplace_back();
72
83
  root_holder.nested_children_ptr.back().push_back(&root_holder.nested_children.back()[0]);
73
- InitializeChild(root_holder.nested_children.back()[0]);
84
+ InitializeChild(root_holder.nested_children.back()[0], root_holder);
74
85
  child.children = &root_holder.nested_children_ptr.back()[0];
75
86
  child.children[0]->name = "entries";
76
87
  SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options);
77
88
  }
78
89
 
79
- unsafe_unique_array<char> AddName(const string &name) {
80
- auto name_ptr = make_unsafe_uniq_array<char>(name.size() + 1);
81
- for (size_t i = 0; i < name.size(); i++) {
82
- name_ptr[i] = name[i];
83
- }
84
- name_ptr[name.size()] = '\0';
85
- return name_ptr;
86
- }
87
-
88
90
  void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
89
- const ArrowOptions &options) {
91
+ const ClientProperties &options) {
90
92
  switch (type.id()) {
91
93
  case LogicalTypeId::BOOLEAN:
92
94
  child.format = "b";
@@ -126,7 +128,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
126
128
  break;
127
129
  case LogicalTypeId::UUID:
128
130
  case LogicalTypeId::VARCHAR:
129
- if (options.offset_size == ArrowOffsetSize::LARGE) {
131
+ if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
130
132
  child.format = "U";
131
133
  } else {
132
134
  child.format = "u";
@@ -136,7 +138,6 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
136
138
  child.format = "tdD";
137
139
  break;
138
140
  case LogicalTypeId::TIME:
139
- case LogicalTypeId::TIME_TZ:
140
141
  child.format = "ttu";
141
142
  break;
142
143
  case LogicalTypeId::TIMESTAMP:
@@ -174,7 +175,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
174
175
  }
175
176
  case LogicalTypeId::BLOB:
176
177
  case LogicalTypeId::BIT: {
177
- if (options.offset_size == ArrowOffsetSize::LARGE) {
178
+ if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
178
179
  child.format = "Z";
179
180
  } else {
180
181
  child.format = "z";
@@ -188,7 +189,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
188
189
  root_holder.nested_children.back().resize(1);
189
190
  root_holder.nested_children_ptr.emplace_back();
190
191
  root_holder.nested_children_ptr.back().push_back(&root_holder.nested_children.back()[0]);
191
- InitializeChild(root_holder.nested_children.back()[0]);
192
+ InitializeChild(root_holder.nested_children.back()[0], root_holder);
192
193
  child.children = &root_holder.nested_children_ptr.back()[0];
193
194
  child.children[0]->name = "l";
194
195
  SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options);
@@ -208,7 +209,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
208
209
  child.children = &root_holder.nested_children_ptr.back()[0];
209
210
  for (size_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
210
211
 
211
- InitializeChild(*child.children[type_idx]);
212
+ InitializeChild(*child.children[type_idx], root_holder);
212
213
 
213
214
  root_holder.owned_type_names.push_back(AddName(child_types[type_idx].first));
214
215
 
@@ -236,7 +237,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
236
237
  child.children = &root_holder.nested_children_ptr.back()[0];
237
238
  for (size_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
238
239
 
239
- InitializeChild(*child.children[type_idx]);
240
+ InitializeChild(*child.children[type_idx], root_holder);
240
241
 
241
242
  root_holder.owned_type_names.push_back(AddName(child_types[type_idx].first));
242
243
 
@@ -272,18 +273,18 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
272
273
  root_holder.nested_children.back().resize(1);
273
274
  root_holder.nested_children_ptr.emplace_back();
274
275
  root_holder.nested_children_ptr.back().push_back(&root_holder.nested_children.back()[0]);
275
- InitializeChild(root_holder.nested_children.back()[0]);
276
+ InitializeChild(root_holder.nested_children.back()[0], root_holder);
276
277
  child.dictionary = root_holder.nested_children_ptr.back()[0];
277
278
  child.dictionary->format = "u";
278
279
  break;
279
280
  }
280
281
  default:
281
- throw InternalException("Unsupported Arrow type " + type.ToString());
282
+ throw NotImplementedException("Unsupported Arrow type " + type.ToString());
282
283
  }
283
284
  }
284
285
 
285
286
  void ArrowConverter::ToArrowSchema(ArrowSchema *out_schema, const vector<LogicalType> &types,
286
- const vector<string> &names, const ArrowOptions &options) {
287
+ const vector<string> &names, const ClientProperties &options) {
287
288
  D_ASSERT(out_schema);
288
289
  D_ASSERT(types.size() == names.size());
289
290
  idx_t column_count = types.size();
@@ -310,7 +311,7 @@ void ArrowConverter::ToArrowSchema(ArrowSchema *out_schema, const vector<Logical
310
311
  for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
311
312
 
312
313
  auto &child = root_holder->children[col_idx];
313
- InitializeChild(child, names[col_idx]);
314
+ InitializeChild(child, *root_holder, names[col_idx]);
314
315
  SetArrowFormat(*root_holder, child, types[col_idx], options);
315
316
  }
316
317
 
@@ -9,6 +9,7 @@
9
9
  #include "duckdb/common/arrow/result_arrow_wrapper.hpp"
10
10
  #include "duckdb/common/arrow/arrow_appender.hpp"
11
11
  #include "duckdb/main/query_result.hpp"
12
+ #include "duckdb/main/chunk_scan_state/query_result.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
@@ -68,7 +69,7 @@ int ResultArrowArrayStreamWrapper::MyStreamGetSchema(struct ArrowArrayStream *st
68
69
  auto my_stream = reinterpret_cast<ResultArrowArrayStreamWrapper *>(stream->private_data);
69
70
  if (!my_stream->column_types.empty()) {
70
71
  ArrowConverter::ToArrowSchema(out, my_stream->column_types, my_stream->column_names,
71
- QueryResult::GetArrowOptions(*my_stream->result));
72
+ my_stream->result->client_properties);
72
73
  return 0;
73
74
  }
74
75
 
@@ -89,7 +90,7 @@ int ResultArrowArrayStreamWrapper::MyStreamGetSchema(struct ArrowArrayStream *st
89
90
  my_stream->column_names = result.names;
90
91
  }
91
92
  ArrowConverter::ToArrowSchema(out, my_stream->column_types, my_stream->column_names,
92
- QueryResult::GetArrowOptions(*my_stream->result));
93
+ my_stream->result->client_properties);
93
94
  return 0;
94
95
  }
95
96
 
@@ -99,6 +100,7 @@ int ResultArrowArrayStreamWrapper::MyStreamGetNext(struct ArrowArrayStream *stre
99
100
  }
100
101
  auto my_stream = reinterpret_cast<ResultArrowArrayStreamWrapper *>(stream->private_data);
101
102
  auto &result = *my_stream->result;
103
+ auto &scan_state = *my_stream->scan_state;
102
104
  if (result.HasError()) {
103
105
  my_stream->last_error = result.GetErrorObject();
104
106
  return -1;
@@ -117,7 +119,8 @@ int ResultArrowArrayStreamWrapper::MyStreamGetNext(struct ArrowArrayStream *stre
117
119
  }
118
120
  idx_t result_count;
119
121
  PreservedError error;
120
- if (!ArrowUtil::TryFetchChunk(&result, my_stream->batch_size, out, result_count, error)) {
122
+ if (!ArrowUtil::TryFetchChunk(scan_state, result.client_properties, my_stream->batch_size, out, result_count,
123
+ error)) {
121
124
  D_ASSERT(error);
122
125
  my_stream->last_error = error;
123
126
  return -1;
@@ -147,7 +150,7 @@ const char *ResultArrowArrayStreamWrapper::MyStreamGetLastError(struct ArrowArra
147
150
  }
148
151
 
149
152
  ResultArrowArrayStreamWrapper::ResultArrowArrayStreamWrapper(unique_ptr<QueryResult> result_p, idx_t batch_size_p)
150
- : result(std::move(result_p)) {
153
+ : result(std::move(result_p)), scan_state(make_uniq<QueryResultChunkScanState>(*result)) {
151
154
  //! We first initialize the private data of the stream
152
155
  stream.private_data = this;
153
156
  //! Ceil Approx_Batch_Size/STANDARD_VECTOR_SIZE
@@ -162,52 +165,43 @@ ResultArrowArrayStreamWrapper::ResultArrowArrayStreamWrapper(unique_ptr<QueryRes
162
165
  stream.get_last_error = ResultArrowArrayStreamWrapper::MyStreamGetLastError;
163
166
  }
164
167
 
165
- bool ArrowUtil::TryFetchNext(QueryResult &result, unique_ptr<DataChunk> &chunk, PreservedError &error) {
166
- if (result.type == QueryResultType::STREAM_RESULT) {
167
- auto &stream_result = result.Cast<StreamQueryResult>();
168
- if (!stream_result.IsOpen()) {
169
- return true;
170
- }
171
- }
172
- return result.TryFetch(chunk, error);
173
- }
174
-
175
- bool ArrowUtil::TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray *out, idx_t &count,
176
- PreservedError &error) {
168
+ bool ArrowUtil::TryFetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t batch_size, ArrowArray *out,
169
+ idx_t &count, PreservedError &error) {
177
170
  count = 0;
178
- ArrowAppender appender(result->types, chunk_size, QueryResult::GetArrowOptions(*result));
179
- auto &current_chunk = result->current_chunk;
180
- if (current_chunk.Valid()) {
171
+ ArrowAppender appender(scan_state.Types(), batch_size, std::move(options));
172
+ auto remaining_tuples_in_chunk = scan_state.RemainingInChunk();
173
+ if (remaining_tuples_in_chunk) {
181
174
  // We start by scanning the non-finished current chunk
182
- // Limit the amount we're fetching to the chunk_size
183
- idx_t cur_consumption = MinValue<idx_t>(current_chunk.RemainingSize(), chunk_size);
175
+ idx_t cur_consumption = MinValue(remaining_tuples_in_chunk, batch_size);
184
176
  count += cur_consumption;
185
- appender.Append(*current_chunk.data_chunk, current_chunk.position, current_chunk.position + cur_consumption,
186
- current_chunk.data_chunk->size());
187
- current_chunk.position += cur_consumption;
188
- }
189
- while (count < chunk_size) {
190
- unique_ptr<DataChunk> data_chunk;
191
- if (!TryFetchNext(*result, data_chunk, error)) {
192
- if (result->HasError()) {
193
- error = result->GetErrorObject();
177
+ auto &current_chunk = scan_state.CurrentChunk();
178
+ appender.Append(current_chunk, scan_state.CurrentOffset(), scan_state.CurrentOffset() + cur_consumption,
179
+ current_chunk.size());
180
+ scan_state.IncreaseOffset(cur_consumption);
181
+ }
182
+ while (count < batch_size) {
183
+ if (!scan_state.LoadNextChunk(error)) {
184
+ if (scan_state.HasError()) {
185
+ error = scan_state.GetError();
194
186
  }
195
187
  return false;
196
188
  }
197
- if (!data_chunk || data_chunk->size() == 0) {
189
+ if (scan_state.ChunkIsEmpty()) {
190
+ // The scan was successful, but an empty chunk was returned
198
191
  break;
199
192
  }
200
- if (count + data_chunk->size() > chunk_size) {
201
- // We have to split the chunk between this and the next batch
202
- idx_t available_space = chunk_size - count;
203
- appender.Append(*data_chunk, 0, available_space, data_chunk->size());
204
- count += available_space;
205
- current_chunk.data_chunk = std::move(data_chunk);
206
- current_chunk.position = available_space;
207
- } else {
208
- count += data_chunk->size();
209
- appender.Append(*data_chunk, 0, data_chunk->size(), data_chunk->size());
193
+ auto &current_chunk = scan_state.CurrentChunk();
194
+ if (scan_state.Finished() || current_chunk.size() == 0) {
195
+ break;
210
196
  }
197
+ // The amount we still need to append into this chunk
198
+ auto remaining = batch_size - count;
199
+
200
+ // The amount remaining, capped by the amount left in the current chunk
201
+ auto to_append_to_batch = MinValue(remaining, scan_state.RemainingInChunk());
202
+ appender.Append(current_chunk, 0, to_append_to_batch, current_chunk.size());
203
+ count += to_append_to_batch;
204
+ scan_state.IncreaseOffset(to_append_to_batch);
211
205
  }
212
206
  if (count > 0) {
213
207
  *out = appender.Finalize();
@@ -215,10 +209,10 @@ bool ArrowUtil::TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray
215
209
  return true;
216
210
  }
217
211
 
218
- idx_t ArrowUtil::FetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray *out) {
212
+ idx_t ArrowUtil::FetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out) {
219
213
  PreservedError error;
220
214
  idx_t result_count;
221
- if (!TryFetchChunk(result, chunk_size, out, result_count, error)) {
215
+ if (!TryFetchChunk(scan_state, std::move(options), chunk_size, out, result_count, error)) {
222
216
  error.Throw();
223
217
  }
224
218
  return result_count;
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev2283"
2
+ #define DUCKDB_VERSION "0.8.2-dev2356"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "e89665e802"
5
+ #define DUCKDB_SOURCE_ID "ef2efd1b9d"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -4,7 +4,7 @@
4
4
  #include "duckdb/common/types/vector.hpp"
5
5
  #include "duckdb/common/arrow/arrow.hpp"
6
6
  #include "duckdb/common/arrow/arrow_buffer.hpp"
7
- #include "duckdb/common/arrow/arrow_options.hpp"
7
+ #include "duckdb/main/client_properties.hpp"
8
8
  #include "duckdb/common/array.hpp"
9
9
 
10
10
  namespace duckdb {
@@ -26,7 +26,7 @@ typedef void (*finalize_t)(ArrowAppendData &append_data, const LogicalType &type
26
26
  // FIXME: we should separate the append state variables from the variables required by the ArrowArray into
27
27
  // ArrowAppendState
28
28
  struct ArrowAppendData {
29
- explicit ArrowAppendData(ArrowOptions &options_p) : options(options_p) {
29
+ explicit ArrowAppendData(ClientProperties &options_p) : options(options_p) {
30
30
  }
31
31
  // the buffers of the arrow vector
32
32
  ArrowBuffer validity;
@@ -49,7 +49,7 @@ struct ArrowAppendData {
49
49
  duckdb::array<const void *, 3> buffers = {{nullptr, nullptr, nullptr}};
50
50
  vector<ArrowArray *> child_pointers;
51
51
 
52
- ArrowOptions options;
52
+ ClientProperties options;
53
53
  };
54
54
 
55
55
  //===--------------------------------------------------------------------===//
@@ -62,7 +62,7 @@ struct ArrowVarcharData {
62
62
  auto last_offset = offset_data[append_data.row_count];
63
63
  idx_t max_offset = append_data.row_count + to - from;
64
64
  if (max_offset > NumericLimits<uint32_t>::Maximum() &&
65
- append_data.options.offset_size == ArrowOffsetSize::REGULAR) {
65
+ append_data.options.arrow_offset_size == ArrowOffsetSize::REGULAR) {
66
66
  throw InvalidInputException("Arrow Appender: The maximum total string size for regular string buffers is "
67
67
  "%u but the offset of %lu exceeds this.",
68
68
  NumericLimits<uint32_t>::Maximum(), max_offset);
@@ -19,7 +19,7 @@ struct ArrowAppendData;
19
19
  //! The ArrowAppender class can be used to incrementally construct an arrow array by appending data chunks into it
20
20
  class ArrowAppender {
21
21
  public:
22
- DUCKDB_API ArrowAppender(vector<LogicalType> types, idx_t initial_capacity, ArrowOptions options);
22
+ DUCKDB_API ArrowAppender(vector<LogicalType> types, idx_t initial_capacity, ClientProperties options);
23
23
  DUCKDB_API ~ArrowAppender();
24
24
 
25
25
  //! Append a data chunk to the underlying arrow array
@@ -30,7 +30,8 @@ public:
30
30
  public:
31
31
  static void ReleaseArray(ArrowArray *array);
32
32
  static ArrowArray *FinalizeChild(const LogicalType &type, ArrowAppendData &append_data);
33
- static unique_ptr<ArrowAppendData> InitializeChild(const LogicalType &type, idx_t capacity, ArrowOptions &options);
33
+ static unique_ptr<ArrowAppendData> InitializeChild(const LogicalType &type, idx_t capacity,
34
+ ClientProperties &options);
34
35
 
35
36
  private:
36
37
  //! The types of the chunks that will be appended in
@@ -40,7 +41,7 @@ private:
40
41
  //! The total row count that has been appended
41
42
  idx_t row_count = 0;
42
43
 
43
- ArrowOptions options;
44
+ ClientProperties options;
44
45
  };
45
46
 
46
47
  } // namespace duckdb
@@ -10,7 +10,7 @@
10
10
 
11
11
  #include "duckdb/common/types/data_chunk.hpp"
12
12
  #include "duckdb/common/arrow/arrow.hpp"
13
- #include "duckdb/common/arrow/arrow_options.hpp"
13
+ #include "duckdb/main/client_properties.hpp"
14
14
 
15
15
  struct ArrowSchema;
16
16
 
@@ -18,8 +18,8 @@ namespace duckdb {
18
18
 
19
19
  struct ArrowConverter {
20
20
  DUCKDB_API static void ToArrowSchema(ArrowSchema *out_schema, const vector<LogicalType> &types,
21
- const vector<string> &names, const ArrowOptions &options);
22
- DUCKDB_API static void ToArrowArray(DataChunk &input, ArrowArray *out_array, ArrowOptions options);
21
+ const vector<string> &names, const ClientProperties &options);
22
+ DUCKDB_API static void ToArrowArray(DataChunk &input, ArrowArray *out_array, ClientProperties options);
23
23
  };
24
24
 
25
25
  } // namespace duckdb
@@ -10,6 +10,8 @@
10
10
  #include "duckdb/common/arrow/arrow.hpp"
11
11
  #include "duckdb/common/helper.hpp"
12
12
  #include "duckdb/common/preserved_error.hpp"
13
+ #include "duckdb/main/chunk_scan_state.hpp"
14
+ #include "duckdb/main/client_properties.hpp"
13
15
 
14
16
  //! Here we have the internal duckdb classes that interact with Arrow's Internal Header (i.e., duckdb/commons/arrow.hpp)
15
17
  namespace duckdb {
@@ -56,9 +58,9 @@ public:
56
58
 
57
59
  class ArrowUtil {
58
60
  public:
59
- static bool TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray *out, idx_t &result_count,
60
- PreservedError &error);
61
- static idx_t FetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray *out);
61
+ static bool TryFetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out,
62
+ idx_t &result_count, PreservedError &error);
63
+ static idx_t FetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out);
62
64
 
63
65
  private:
64
66
  static bool TryFetchNext(QueryResult &result, unique_ptr<DataChunk> &out, PreservedError &error);
@@ -10,17 +10,21 @@
10
10
 
11
11
  #include "duckdb/main/query_result.hpp"
12
12
  #include "duckdb/common/arrow/arrow_wrapper.hpp"
13
+ #include "duckdb/main/chunk_scan_state.hpp"
13
14
 
14
15
  namespace duckdb {
15
16
  class ResultArrowArrayStreamWrapper {
16
17
  public:
17
18
  explicit ResultArrowArrayStreamWrapper(unique_ptr<QueryResult> result, idx_t batch_size);
19
+
20
+ public:
18
21
  ArrowArrayStream stream;
19
22
  unique_ptr<QueryResult> result;
20
23
  PreservedError last_error;
21
24
  idx_t batch_size;
22
25
  vector<LogicalType> column_types;
23
26
  vector<string> column_names;
27
+ unique_ptr<ChunkScanState> scan_state;
24
28
 
25
29
  private:
26
30
  static int MyStreamGetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *out);
@@ -0,0 +1,34 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/stack_checker.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ namespace duckdb {
12
+
13
+ template <class RECURSIVE_CLASS>
14
+ class StackChecker {
15
+ public:
16
+ StackChecker(RECURSIVE_CLASS &recursive_class_p, idx_t stack_usage_p)
17
+ : recursive_class(recursive_class_p), stack_usage(stack_usage_p) {
18
+ recursive_class.stack_depth += stack_usage;
19
+ }
20
+ ~StackChecker() {
21
+ recursive_class.stack_depth -= stack_usage;
22
+ }
23
+ StackChecker(StackChecker &&other) noexcept
24
+ : recursive_class(other.recursive_class), stack_usage(other.stack_usage) {
25
+ other.stack_usage = 0;
26
+ }
27
+ StackChecker(const StackChecker &) = delete;
28
+
29
+ private:
30
+ RECURSIVE_CLASS &recursive_class;
31
+ idx_t stack_usage;
32
+ };
33
+
34
+ } // namespace duckdb
@@ -13,7 +13,6 @@
13
13
  #include "duckdb/common/types.hpp"
14
14
  #include "duckdb/common/types/data_chunk.hpp"
15
15
  #include "duckdb/main/appender.hpp"
16
- #include "duckdb/common/arrow/arrow_options.hpp"
17
16
 
18
17
  #include <cstring>
19
18
  #include <cassert>
@@ -48,7 +47,7 @@ struct PendingStatementWrapper {
48
47
  struct ArrowResultWrapper {
49
48
  unique_ptr<MaterializedQueryResult> result;
50
49
  unique_ptr<DataChunk> current_chunk;
51
- ArrowOptions options;
50
+ ClientProperties options;
52
51
  };
53
52
 
54
53
  struct AppenderWrapper {
@@ -0,0 +1,29 @@
1
+ #pragma once
2
+
3
+ #include "duckdb/main/chunk_scan_state.hpp"
4
+ #include "duckdb/common/preserved_error.hpp"
5
+
6
+ namespace duckdb {
7
+
8
+ class QueryResult;
9
+
10
+ class QueryResultChunkScanState : public ChunkScanState {
11
+ public:
12
+ QueryResultChunkScanState(QueryResult &result);
13
+ ~QueryResultChunkScanState();
14
+
15
+ public:
16
+ bool LoadNextChunk(PreservedError &error) override;
17
+ bool HasError() const override;
18
+ PreservedError &GetError() override;
19
+ const vector<LogicalType> &Types() const override;
20
+ const vector<string> &Names() const override;
21
+
22
+ private:
23
+ bool InternalLoad(PreservedError &error);
24
+
25
+ private:
26
+ QueryResult &result;
27
+ };
28
+
29
+ } // namespace duckdb