duckdb 0.8.2-dev2283.0 → 0.8.2-dev2356.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +8 -3
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +4 -4
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +27 -26
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +37 -43
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +4 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +5 -3
- package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/stack_checker.hpp +34 -0
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +29 -0
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +45 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +2 -27
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +3 -15
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +13 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +1 -1
- package/src/duckdb/src/main/capi/arrow-c.cpp +1 -7
- package/src/duckdb/src/main/chunk_scan_state/query_result.cpp +53 -0
- package/src/duckdb/src/main/chunk_scan_state.cpp +42 -0
- package/src/duckdb/src/main/client_context.cpp +15 -2
- package/src/duckdb/src/main/database.cpp +0 -9
- package/src/duckdb/src/main/query_result.cpp +0 -21
- package/src/duckdb/src/parser/transformer.cpp +2 -16
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +5 -3
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +0 -28
- package/src/duckdb/src/planner/expression_binder.cpp +20 -0
- package/src/duckdb/ub_src_main.cpp +2 -0
- package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +0 -25
package/binding.gyp
CHANGED
@@ -93,6 +93,7 @@
|
|
93
93
|
"src/duckdb/ub_src_main.cpp",
|
94
94
|
"src/duckdb/ub_src_main_capi.cpp",
|
95
95
|
"src/duckdb/ub_src_main_capi_cast.cpp",
|
96
|
+
"src/duckdb/ub_src_main_chunk_scan_state.cpp",
|
96
97
|
"src/duckdb/ub_src_main_extension.cpp",
|
97
98
|
"src/duckdb/ub_src_main_relation.cpp",
|
98
99
|
"src/duckdb/ub_src_main_settings.cpp",
|
package/package.json
CHANGED
@@ -297,6 +297,7 @@ void ParquetWriter::PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGro
|
|
297
297
|
// set up a new row group for this chunk collection
|
298
298
|
auto &row_group = result.row_group;
|
299
299
|
row_group.num_rows = buffer.Count();
|
300
|
+
row_group.total_byte_size = buffer.SizeInBytes();
|
300
301
|
row_group.__isset.file_offset = true;
|
301
302
|
|
302
303
|
auto &states = result.states;
|
@@ -202,14 +202,15 @@ AdbcStatusCode ConnectionGetTableSchema(struct AdbcConnection *connection, const
|
|
202
202
|
SetError(error, "Connection is not set");
|
203
203
|
return ADBC_STATUS_INVALID_ARGUMENT;
|
204
204
|
}
|
205
|
+
if (db_schema == nullptr) {
|
206
|
+
// if schema is not set, we use the default schema
|
207
|
+
db_schema = "main";
|
208
|
+
}
|
205
209
|
if (catalog != nullptr && strlen(catalog) > 0) {
|
206
210
|
// In DuckDB this is the name of the database, not sure what's the expected functionality here, so for now,
|
207
211
|
// scream.
|
208
212
|
SetError(error, "Catalog Name is not used in DuckDB. It must be set to nullptr or an empty string");
|
209
213
|
return ADBC_STATUS_NOT_IMPLEMENTED;
|
210
|
-
} else if (db_schema == nullptr) {
|
211
|
-
SetError(error, "AdbcConnectionGetTableSchema: must provide db_schema");
|
212
|
-
return ADBC_STATUS_INVALID_ARGUMENT;
|
213
214
|
} else if (table_name == nullptr) {
|
214
215
|
SetError(error, "AdbcConnectionGetTableSchema: must provide table_name");
|
215
216
|
return ADBC_STATUS_INVALID_ARGUMENT;
|
@@ -686,6 +687,10 @@ AdbcStatusCode QueryInternal(struct AdbcConnection *connection, struct ArrowArra
|
|
686
687
|
AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth, const char *catalog,
|
687
688
|
const char *db_schema, const char *table_name, const char **table_type,
|
688
689
|
const char *column_name, struct ArrowArrayStream *out, struct AdbcError *error) {
|
690
|
+
if (depth != 0) {
|
691
|
+
SetError(error, "Depth parameter not yet supported");
|
692
|
+
return ADBC_STATUS_NOT_IMPLEMENTED;
|
693
|
+
}
|
689
694
|
if (catalog != nullptr) {
|
690
695
|
if (strcmp(catalog, "duckdb") == 0) {
|
691
696
|
SetError(error, "catalog must be NULL or 'duckdb'");
|
@@ -14,7 +14,7 @@ namespace duckdb {
|
|
14
14
|
// ArrowAppender
|
15
15
|
//===--------------------------------------------------------------------===//
|
16
16
|
|
17
|
-
ArrowAppender::ArrowAppender(vector<LogicalType> types_p, idx_t initial_capacity,
|
17
|
+
ArrowAppender::ArrowAppender(vector<LogicalType> types_p, idx_t initial_capacity, ClientProperties options)
|
18
18
|
: types(std::move(types_p)) {
|
19
19
|
for (auto &type : types) {
|
20
20
|
auto entry = ArrowAppender::InitializeChild(type, initial_capacity, options);
|
@@ -179,14 +179,14 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
|
|
179
179
|
case LogicalTypeId::VARCHAR:
|
180
180
|
case LogicalTypeId::BLOB:
|
181
181
|
case LogicalTypeId::BIT:
|
182
|
-
if (append_data.options.
|
182
|
+
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
183
183
|
InitializeAppenderForType<ArrowVarcharData<string_t>>(append_data);
|
184
184
|
} else {
|
185
185
|
InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter, uint32_t>>(append_data);
|
186
186
|
}
|
187
187
|
break;
|
188
188
|
case LogicalTypeId::UUID:
|
189
|
-
if (append_data.options.
|
189
|
+
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
190
190
|
InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter>>(append_data);
|
191
191
|
} else {
|
192
192
|
InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, uint32_t>>(append_data);
|
@@ -228,7 +228,7 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
|
|
228
228
|
}
|
229
229
|
|
230
230
|
unique_ptr<ArrowAppendData> ArrowAppender::InitializeChild(const LogicalType &type, idx_t capacity,
|
231
|
-
|
231
|
+
ClientProperties &options) {
|
232
232
|
auto result = make_uniq<ArrowAppendData>(options);
|
233
233
|
InitializeFunctionPointers(*result, type);
|
234
234
|
|
@@ -15,12 +15,21 @@
|
|
15
15
|
|
16
16
|
namespace duckdb {
|
17
17
|
|
18
|
-
void ArrowConverter::ToArrowArray(DataChunk &input, ArrowArray *out_array,
|
18
|
+
void ArrowConverter::ToArrowArray(DataChunk &input, ArrowArray *out_array, ClientProperties options) {
|
19
19
|
ArrowAppender appender(input.GetTypes(), input.size(), std::move(options));
|
20
20
|
appender.Append(input, 0, input.size(), input.size());
|
21
21
|
*out_array = appender.Finalize();
|
22
22
|
}
|
23
23
|
|
24
|
+
unsafe_unique_array<char> AddName(const string &name) {
|
25
|
+
auto name_ptr = make_unsafe_uniq_array<char>(name.size() + 1);
|
26
|
+
for (size_t i = 0; i < name.size(); i++) {
|
27
|
+
name_ptr[i] = name[i];
|
28
|
+
}
|
29
|
+
name_ptr[name.size()] = '\0';
|
30
|
+
return name_ptr;
|
31
|
+
}
|
32
|
+
|
24
33
|
//===--------------------------------------------------------------------===//
|
25
34
|
// Arrow Schema
|
26
35
|
//===--------------------------------------------------------------------===//
|
@@ -45,24 +54,26 @@ static void ReleaseDuckDBArrowSchema(ArrowSchema *schema) {
|
|
45
54
|
delete holder;
|
46
55
|
}
|
47
56
|
|
48
|
-
void InitializeChild(ArrowSchema &child, const string &name = "") {
|
57
|
+
void InitializeChild(ArrowSchema &child, DuckDBArrowSchemaHolder &root_holder, const string &name = "") {
|
49
58
|
//! Child is cleaned up by parent
|
50
59
|
child.private_data = nullptr;
|
51
60
|
child.release = ReleaseDuckDBArrowSchema;
|
52
61
|
|
53
62
|
//! Store the child schema
|
54
63
|
child.flags = ARROW_FLAG_NULLABLE;
|
55
|
-
|
64
|
+
root_holder.owned_type_names.push_back(AddName(name));
|
65
|
+
|
66
|
+
child.name = root_holder.owned_type_names.back().get();
|
56
67
|
child.n_children = 0;
|
57
68
|
child.children = nullptr;
|
58
69
|
child.metadata = nullptr;
|
59
70
|
child.dictionary = nullptr;
|
60
71
|
}
|
61
72
|
void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
|
62
|
-
const
|
73
|
+
const ClientProperties &options);
|
63
74
|
|
64
75
|
void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
|
65
|
-
const
|
76
|
+
const ClientProperties &options) {
|
66
77
|
child.format = "+m";
|
67
78
|
//! Map has one child which is a struct
|
68
79
|
child.n_children = 1;
|
@@ -70,23 +81,14 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
|
|
70
81
|
root_holder.nested_children.back().resize(1);
|
71
82
|
root_holder.nested_children_ptr.emplace_back();
|
72
83
|
root_holder.nested_children_ptr.back().push_back(&root_holder.nested_children.back()[0]);
|
73
|
-
InitializeChild(root_holder.nested_children.back()[0]);
|
84
|
+
InitializeChild(root_holder.nested_children.back()[0], root_holder);
|
74
85
|
child.children = &root_holder.nested_children_ptr.back()[0];
|
75
86
|
child.children[0]->name = "entries";
|
76
87
|
SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options);
|
77
88
|
}
|
78
89
|
|
79
|
-
unsafe_unique_array<char> AddName(const string &name) {
|
80
|
-
auto name_ptr = make_unsafe_uniq_array<char>(name.size() + 1);
|
81
|
-
for (size_t i = 0; i < name.size(); i++) {
|
82
|
-
name_ptr[i] = name[i];
|
83
|
-
}
|
84
|
-
name_ptr[name.size()] = '\0';
|
85
|
-
return name_ptr;
|
86
|
-
}
|
87
|
-
|
88
90
|
void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
|
89
|
-
const
|
91
|
+
const ClientProperties &options) {
|
90
92
|
switch (type.id()) {
|
91
93
|
case LogicalTypeId::BOOLEAN:
|
92
94
|
child.format = "b";
|
@@ -126,7 +128,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
126
128
|
break;
|
127
129
|
case LogicalTypeId::UUID:
|
128
130
|
case LogicalTypeId::VARCHAR:
|
129
|
-
if (options.
|
131
|
+
if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
130
132
|
child.format = "U";
|
131
133
|
} else {
|
132
134
|
child.format = "u";
|
@@ -136,7 +138,6 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
136
138
|
child.format = "tdD";
|
137
139
|
break;
|
138
140
|
case LogicalTypeId::TIME:
|
139
|
-
case LogicalTypeId::TIME_TZ:
|
140
141
|
child.format = "ttu";
|
141
142
|
break;
|
142
143
|
case LogicalTypeId::TIMESTAMP:
|
@@ -174,7 +175,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
174
175
|
}
|
175
176
|
case LogicalTypeId::BLOB:
|
176
177
|
case LogicalTypeId::BIT: {
|
177
|
-
if (options.
|
178
|
+
if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
178
179
|
child.format = "Z";
|
179
180
|
} else {
|
180
181
|
child.format = "z";
|
@@ -188,7 +189,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
188
189
|
root_holder.nested_children.back().resize(1);
|
189
190
|
root_holder.nested_children_ptr.emplace_back();
|
190
191
|
root_holder.nested_children_ptr.back().push_back(&root_holder.nested_children.back()[0]);
|
191
|
-
InitializeChild(root_holder.nested_children.back()[0]);
|
192
|
+
InitializeChild(root_holder.nested_children.back()[0], root_holder);
|
192
193
|
child.children = &root_holder.nested_children_ptr.back()[0];
|
193
194
|
child.children[0]->name = "l";
|
194
195
|
SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options);
|
@@ -208,7 +209,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
208
209
|
child.children = &root_holder.nested_children_ptr.back()[0];
|
209
210
|
for (size_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
|
210
211
|
|
211
|
-
InitializeChild(*child.children[type_idx]);
|
212
|
+
InitializeChild(*child.children[type_idx], root_holder);
|
212
213
|
|
213
214
|
root_holder.owned_type_names.push_back(AddName(child_types[type_idx].first));
|
214
215
|
|
@@ -236,7 +237,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
236
237
|
child.children = &root_holder.nested_children_ptr.back()[0];
|
237
238
|
for (size_t type_idx = 0; type_idx < child_types.size(); type_idx++) {
|
238
239
|
|
239
|
-
InitializeChild(*child.children[type_idx]);
|
240
|
+
InitializeChild(*child.children[type_idx], root_holder);
|
240
241
|
|
241
242
|
root_holder.owned_type_names.push_back(AddName(child_types[type_idx].first));
|
242
243
|
|
@@ -272,18 +273,18 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
272
273
|
root_holder.nested_children.back().resize(1);
|
273
274
|
root_holder.nested_children_ptr.emplace_back();
|
274
275
|
root_holder.nested_children_ptr.back().push_back(&root_holder.nested_children.back()[0]);
|
275
|
-
InitializeChild(root_holder.nested_children.back()[0]);
|
276
|
+
InitializeChild(root_holder.nested_children.back()[0], root_holder);
|
276
277
|
child.dictionary = root_holder.nested_children_ptr.back()[0];
|
277
278
|
child.dictionary->format = "u";
|
278
279
|
break;
|
279
280
|
}
|
280
281
|
default:
|
281
|
-
throw
|
282
|
+
throw NotImplementedException("Unsupported Arrow type " + type.ToString());
|
282
283
|
}
|
283
284
|
}
|
284
285
|
|
285
286
|
void ArrowConverter::ToArrowSchema(ArrowSchema *out_schema, const vector<LogicalType> &types,
|
286
|
-
const vector<string> &names, const
|
287
|
+
const vector<string> &names, const ClientProperties &options) {
|
287
288
|
D_ASSERT(out_schema);
|
288
289
|
D_ASSERT(types.size() == names.size());
|
289
290
|
idx_t column_count = types.size();
|
@@ -310,7 +311,7 @@ void ArrowConverter::ToArrowSchema(ArrowSchema *out_schema, const vector<Logical
|
|
310
311
|
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
311
312
|
|
312
313
|
auto &child = root_holder->children[col_idx];
|
313
|
-
InitializeChild(child, names[col_idx]);
|
314
|
+
InitializeChild(child, *root_holder, names[col_idx]);
|
314
315
|
SetArrowFormat(*root_holder, child, types[col_idx], options);
|
315
316
|
}
|
316
317
|
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#include "duckdb/common/arrow/result_arrow_wrapper.hpp"
|
10
10
|
#include "duckdb/common/arrow/arrow_appender.hpp"
|
11
11
|
#include "duckdb/main/query_result.hpp"
|
12
|
+
#include "duckdb/main/chunk_scan_state/query_result.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
|
@@ -68,7 +69,7 @@ int ResultArrowArrayStreamWrapper::MyStreamGetSchema(struct ArrowArrayStream *st
|
|
68
69
|
auto my_stream = reinterpret_cast<ResultArrowArrayStreamWrapper *>(stream->private_data);
|
69
70
|
if (!my_stream->column_types.empty()) {
|
70
71
|
ArrowConverter::ToArrowSchema(out, my_stream->column_types, my_stream->column_names,
|
71
|
-
|
72
|
+
my_stream->result->client_properties);
|
72
73
|
return 0;
|
73
74
|
}
|
74
75
|
|
@@ -89,7 +90,7 @@ int ResultArrowArrayStreamWrapper::MyStreamGetSchema(struct ArrowArrayStream *st
|
|
89
90
|
my_stream->column_names = result.names;
|
90
91
|
}
|
91
92
|
ArrowConverter::ToArrowSchema(out, my_stream->column_types, my_stream->column_names,
|
92
|
-
|
93
|
+
my_stream->result->client_properties);
|
93
94
|
return 0;
|
94
95
|
}
|
95
96
|
|
@@ -99,6 +100,7 @@ int ResultArrowArrayStreamWrapper::MyStreamGetNext(struct ArrowArrayStream *stre
|
|
99
100
|
}
|
100
101
|
auto my_stream = reinterpret_cast<ResultArrowArrayStreamWrapper *>(stream->private_data);
|
101
102
|
auto &result = *my_stream->result;
|
103
|
+
auto &scan_state = *my_stream->scan_state;
|
102
104
|
if (result.HasError()) {
|
103
105
|
my_stream->last_error = result.GetErrorObject();
|
104
106
|
return -1;
|
@@ -117,7 +119,8 @@ int ResultArrowArrayStreamWrapper::MyStreamGetNext(struct ArrowArrayStream *stre
|
|
117
119
|
}
|
118
120
|
idx_t result_count;
|
119
121
|
PreservedError error;
|
120
|
-
if (!ArrowUtil::TryFetchChunk(
|
122
|
+
if (!ArrowUtil::TryFetchChunk(scan_state, result.client_properties, my_stream->batch_size, out, result_count,
|
123
|
+
error)) {
|
121
124
|
D_ASSERT(error);
|
122
125
|
my_stream->last_error = error;
|
123
126
|
return -1;
|
@@ -147,7 +150,7 @@ const char *ResultArrowArrayStreamWrapper::MyStreamGetLastError(struct ArrowArra
|
|
147
150
|
}
|
148
151
|
|
149
152
|
ResultArrowArrayStreamWrapper::ResultArrowArrayStreamWrapper(unique_ptr<QueryResult> result_p, idx_t batch_size_p)
|
150
|
-
: result(std::move(result_p)) {
|
153
|
+
: result(std::move(result_p)), scan_state(make_uniq<QueryResultChunkScanState>(*result)) {
|
151
154
|
//! We first initialize the private data of the stream
|
152
155
|
stream.private_data = this;
|
153
156
|
//! Ceil Approx_Batch_Size/STANDARD_VECTOR_SIZE
|
@@ -162,52 +165,43 @@ ResultArrowArrayStreamWrapper::ResultArrowArrayStreamWrapper(unique_ptr<QueryRes
|
|
162
165
|
stream.get_last_error = ResultArrowArrayStreamWrapper::MyStreamGetLastError;
|
163
166
|
}
|
164
167
|
|
165
|
-
bool ArrowUtil::
|
166
|
-
|
167
|
-
auto &stream_result = result.Cast<StreamQueryResult>();
|
168
|
-
if (!stream_result.IsOpen()) {
|
169
|
-
return true;
|
170
|
-
}
|
171
|
-
}
|
172
|
-
return result.TryFetch(chunk, error);
|
173
|
-
}
|
174
|
-
|
175
|
-
bool ArrowUtil::TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray *out, idx_t &count,
|
176
|
-
PreservedError &error) {
|
168
|
+
bool ArrowUtil::TryFetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t batch_size, ArrowArray *out,
|
169
|
+
idx_t &count, PreservedError &error) {
|
177
170
|
count = 0;
|
178
|
-
ArrowAppender appender(
|
179
|
-
auto
|
180
|
-
if (
|
171
|
+
ArrowAppender appender(scan_state.Types(), batch_size, std::move(options));
|
172
|
+
auto remaining_tuples_in_chunk = scan_state.RemainingInChunk();
|
173
|
+
if (remaining_tuples_in_chunk) {
|
181
174
|
// We start by scanning the non-finished current chunk
|
182
|
-
|
183
|
-
idx_t cur_consumption = MinValue<idx_t>(current_chunk.RemainingSize(), chunk_size);
|
175
|
+
idx_t cur_consumption = MinValue(remaining_tuples_in_chunk, batch_size);
|
184
176
|
count += cur_consumption;
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
if (!
|
192
|
-
if (
|
193
|
-
error =
|
177
|
+
auto ¤t_chunk = scan_state.CurrentChunk();
|
178
|
+
appender.Append(current_chunk, scan_state.CurrentOffset(), scan_state.CurrentOffset() + cur_consumption,
|
179
|
+
current_chunk.size());
|
180
|
+
scan_state.IncreaseOffset(cur_consumption);
|
181
|
+
}
|
182
|
+
while (count < batch_size) {
|
183
|
+
if (!scan_state.LoadNextChunk(error)) {
|
184
|
+
if (scan_state.HasError()) {
|
185
|
+
error = scan_state.GetError();
|
194
186
|
}
|
195
187
|
return false;
|
196
188
|
}
|
197
|
-
if (
|
189
|
+
if (scan_state.ChunkIsEmpty()) {
|
190
|
+
// The scan was successful, but an empty chunk was returned
|
198
191
|
break;
|
199
192
|
}
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
appender.Append(*data_chunk, 0, available_space, data_chunk->size());
|
204
|
-
count += available_space;
|
205
|
-
current_chunk.data_chunk = std::move(data_chunk);
|
206
|
-
current_chunk.position = available_space;
|
207
|
-
} else {
|
208
|
-
count += data_chunk->size();
|
209
|
-
appender.Append(*data_chunk, 0, data_chunk->size(), data_chunk->size());
|
193
|
+
auto ¤t_chunk = scan_state.CurrentChunk();
|
194
|
+
if (scan_state.Finished() || current_chunk.size() == 0) {
|
195
|
+
break;
|
210
196
|
}
|
197
|
+
// The amount we still need to append into this chunk
|
198
|
+
auto remaining = batch_size - count;
|
199
|
+
|
200
|
+
// The amount remaining, capped by the amount left in the current chunk
|
201
|
+
auto to_append_to_batch = MinValue(remaining, scan_state.RemainingInChunk());
|
202
|
+
appender.Append(current_chunk, 0, to_append_to_batch, current_chunk.size());
|
203
|
+
count += to_append_to_batch;
|
204
|
+
scan_state.IncreaseOffset(to_append_to_batch);
|
211
205
|
}
|
212
206
|
if (count > 0) {
|
213
207
|
*out = appender.Finalize();
|
@@ -215,10 +209,10 @@ bool ArrowUtil::TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray
|
|
215
209
|
return true;
|
216
210
|
}
|
217
211
|
|
218
|
-
idx_t ArrowUtil::FetchChunk(
|
212
|
+
idx_t ArrowUtil::FetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out) {
|
219
213
|
PreservedError error;
|
220
214
|
idx_t result_count;
|
221
|
-
if (!TryFetchChunk(
|
215
|
+
if (!TryFetchChunk(scan_state, std::move(options), chunk_size, out, result_count, error)) {
|
222
216
|
error.Throw();
|
223
217
|
}
|
224
218
|
return result_count;
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev2356"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "ef2efd1b9d"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "duckdb/common/types/vector.hpp"
|
5
5
|
#include "duckdb/common/arrow/arrow.hpp"
|
6
6
|
#include "duckdb/common/arrow/arrow_buffer.hpp"
|
7
|
-
#include "duckdb/
|
7
|
+
#include "duckdb/main/client_properties.hpp"
|
8
8
|
#include "duckdb/common/array.hpp"
|
9
9
|
|
10
10
|
namespace duckdb {
|
@@ -26,7 +26,7 @@ typedef void (*finalize_t)(ArrowAppendData &append_data, const LogicalType &type
|
|
26
26
|
// FIXME: we should separate the append state variables from the variables required by the ArrowArray into
|
27
27
|
// ArrowAppendState
|
28
28
|
struct ArrowAppendData {
|
29
|
-
explicit ArrowAppendData(
|
29
|
+
explicit ArrowAppendData(ClientProperties &options_p) : options(options_p) {
|
30
30
|
}
|
31
31
|
// the buffers of the arrow vector
|
32
32
|
ArrowBuffer validity;
|
@@ -49,7 +49,7 @@ struct ArrowAppendData {
|
|
49
49
|
duckdb::array<const void *, 3> buffers = {{nullptr, nullptr, nullptr}};
|
50
50
|
vector<ArrowArray *> child_pointers;
|
51
51
|
|
52
|
-
|
52
|
+
ClientProperties options;
|
53
53
|
};
|
54
54
|
|
55
55
|
//===--------------------------------------------------------------------===//
|
@@ -62,7 +62,7 @@ struct ArrowVarcharData {
|
|
62
62
|
auto last_offset = offset_data[append_data.row_count];
|
63
63
|
idx_t max_offset = append_data.row_count + to - from;
|
64
64
|
if (max_offset > NumericLimits<uint32_t>::Maximum() &&
|
65
|
-
append_data.options.
|
65
|
+
append_data.options.arrow_offset_size == ArrowOffsetSize::REGULAR) {
|
66
66
|
throw InvalidInputException("Arrow Appender: The maximum total string size for regular string buffers is "
|
67
67
|
"%u but the offset of %lu exceeds this.",
|
68
68
|
NumericLimits<uint32_t>::Maximum(), max_offset);
|
@@ -19,7 +19,7 @@ struct ArrowAppendData;
|
|
19
19
|
//! The ArrowAppender class can be used to incrementally construct an arrow array by appending data chunks into it
|
20
20
|
class ArrowAppender {
|
21
21
|
public:
|
22
|
-
DUCKDB_API ArrowAppender(vector<LogicalType> types, idx_t initial_capacity,
|
22
|
+
DUCKDB_API ArrowAppender(vector<LogicalType> types, idx_t initial_capacity, ClientProperties options);
|
23
23
|
DUCKDB_API ~ArrowAppender();
|
24
24
|
|
25
25
|
//! Append a data chunk to the underlying arrow array
|
@@ -30,7 +30,8 @@ public:
|
|
30
30
|
public:
|
31
31
|
static void ReleaseArray(ArrowArray *array);
|
32
32
|
static ArrowArray *FinalizeChild(const LogicalType &type, ArrowAppendData &append_data);
|
33
|
-
static unique_ptr<ArrowAppendData> InitializeChild(const LogicalType &type, idx_t capacity,
|
33
|
+
static unique_ptr<ArrowAppendData> InitializeChild(const LogicalType &type, idx_t capacity,
|
34
|
+
ClientProperties &options);
|
34
35
|
|
35
36
|
private:
|
36
37
|
//! The types of the chunks that will be appended in
|
@@ -40,7 +41,7 @@ private:
|
|
40
41
|
//! The total row count that has been appended
|
41
42
|
idx_t row_count = 0;
|
42
43
|
|
43
|
-
|
44
|
+
ClientProperties options;
|
44
45
|
};
|
45
46
|
|
46
47
|
} // namespace duckdb
|
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/types/data_chunk.hpp"
|
12
12
|
#include "duckdb/common/arrow/arrow.hpp"
|
13
|
-
#include "duckdb/
|
13
|
+
#include "duckdb/main/client_properties.hpp"
|
14
14
|
|
15
15
|
struct ArrowSchema;
|
16
16
|
|
@@ -18,8 +18,8 @@ namespace duckdb {
|
|
18
18
|
|
19
19
|
struct ArrowConverter {
|
20
20
|
DUCKDB_API static void ToArrowSchema(ArrowSchema *out_schema, const vector<LogicalType> &types,
|
21
|
-
const vector<string> &names, const
|
22
|
-
DUCKDB_API static void ToArrowArray(DataChunk &input, ArrowArray *out_array,
|
21
|
+
const vector<string> &names, const ClientProperties &options);
|
22
|
+
DUCKDB_API static void ToArrowArray(DataChunk &input, ArrowArray *out_array, ClientProperties options);
|
23
23
|
};
|
24
24
|
|
25
25
|
} // namespace duckdb
|
@@ -10,6 +10,8 @@
|
|
10
10
|
#include "duckdb/common/arrow/arrow.hpp"
|
11
11
|
#include "duckdb/common/helper.hpp"
|
12
12
|
#include "duckdb/common/preserved_error.hpp"
|
13
|
+
#include "duckdb/main/chunk_scan_state.hpp"
|
14
|
+
#include "duckdb/main/client_properties.hpp"
|
13
15
|
|
14
16
|
//! Here we have the internal duckdb classes that interact with Arrow's Internal Header (i.e., duckdb/commons/arrow.hpp)
|
15
17
|
namespace duckdb {
|
@@ -56,9 +58,9 @@ public:
|
|
56
58
|
|
57
59
|
class ArrowUtil {
|
58
60
|
public:
|
59
|
-
static bool TryFetchChunk(
|
60
|
-
PreservedError &error);
|
61
|
-
static idx_t FetchChunk(
|
61
|
+
static bool TryFetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out,
|
62
|
+
idx_t &result_count, PreservedError &error);
|
63
|
+
static idx_t FetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out);
|
62
64
|
|
63
65
|
private:
|
64
66
|
static bool TryFetchNext(QueryResult &result, unique_ptr<DataChunk> &out, PreservedError &error);
|
@@ -10,17 +10,21 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/main/query_result.hpp"
|
12
12
|
#include "duckdb/common/arrow/arrow_wrapper.hpp"
|
13
|
+
#include "duckdb/main/chunk_scan_state.hpp"
|
13
14
|
|
14
15
|
namespace duckdb {
|
15
16
|
class ResultArrowArrayStreamWrapper {
|
16
17
|
public:
|
17
18
|
explicit ResultArrowArrayStreamWrapper(unique_ptr<QueryResult> result, idx_t batch_size);
|
19
|
+
|
20
|
+
public:
|
18
21
|
ArrowArrayStream stream;
|
19
22
|
unique_ptr<QueryResult> result;
|
20
23
|
PreservedError last_error;
|
21
24
|
idx_t batch_size;
|
22
25
|
vector<LogicalType> column_types;
|
23
26
|
vector<string> column_names;
|
27
|
+
unique_ptr<ChunkScanState> scan_state;
|
24
28
|
|
25
29
|
private:
|
26
30
|
static int MyStreamGetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *out);
|
@@ -0,0 +1,34 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/common/stack_checker.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
namespace duckdb {
|
12
|
+
|
13
|
+
template <class RECURSIVE_CLASS>
|
14
|
+
class StackChecker {
|
15
|
+
public:
|
16
|
+
StackChecker(RECURSIVE_CLASS &recursive_class_p, idx_t stack_usage_p)
|
17
|
+
: recursive_class(recursive_class_p), stack_usage(stack_usage_p) {
|
18
|
+
recursive_class.stack_depth += stack_usage;
|
19
|
+
}
|
20
|
+
~StackChecker() {
|
21
|
+
recursive_class.stack_depth -= stack_usage;
|
22
|
+
}
|
23
|
+
StackChecker(StackChecker &&other) noexcept
|
24
|
+
: recursive_class(other.recursive_class), stack_usage(other.stack_usage) {
|
25
|
+
other.stack_usage = 0;
|
26
|
+
}
|
27
|
+
StackChecker(const StackChecker &) = delete;
|
28
|
+
|
29
|
+
private:
|
30
|
+
RECURSIVE_CLASS &recursive_class;
|
31
|
+
idx_t stack_usage;
|
32
|
+
};
|
33
|
+
|
34
|
+
} // namespace duckdb
|
@@ -13,7 +13,6 @@
|
|
13
13
|
#include "duckdb/common/types.hpp"
|
14
14
|
#include "duckdb/common/types/data_chunk.hpp"
|
15
15
|
#include "duckdb/main/appender.hpp"
|
16
|
-
#include "duckdb/common/arrow/arrow_options.hpp"
|
17
16
|
|
18
17
|
#include <cstring>
|
19
18
|
#include <cassert>
|
@@ -48,7 +47,7 @@ struct PendingStatementWrapper {
|
|
48
47
|
struct ArrowResultWrapper {
|
49
48
|
unique_ptr<MaterializedQueryResult> result;
|
50
49
|
unique_ptr<DataChunk> current_chunk;
|
51
|
-
|
50
|
+
ClientProperties options;
|
52
51
|
};
|
53
52
|
|
54
53
|
struct AppenderWrapper {
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/main/chunk_scan_state.hpp"
|
4
|
+
#include "duckdb/common/preserved_error.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
class QueryResult;
|
9
|
+
|
10
|
+
class QueryResultChunkScanState : public ChunkScanState {
|
11
|
+
public:
|
12
|
+
QueryResultChunkScanState(QueryResult &result);
|
13
|
+
~QueryResultChunkScanState();
|
14
|
+
|
15
|
+
public:
|
16
|
+
bool LoadNextChunk(PreservedError &error) override;
|
17
|
+
bool HasError() const override;
|
18
|
+
PreservedError &GetError() override;
|
19
|
+
const vector<LogicalType> &Types() const override;
|
20
|
+
const vector<string> &Names() const override;
|
21
|
+
|
22
|
+
private:
|
23
|
+
bool InternalLoad(PreservedError &error);
|
24
|
+
|
25
|
+
private:
|
26
|
+
QueryResult &result;
|
27
|
+
};
|
28
|
+
|
29
|
+
} // namespace duckdb
|