duckdb 0.8.2-dev2509.0 → 0.8.2-dev2673.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +3 -3
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +5 -0
- package/src/duckdb/src/common/enum_util.cpp +35 -1
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +6 -1
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +6 -0
- package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
- package/src/duckdb/src/execution/index/art/art.cpp +43 -31
- package/src/duckdb/src/execution/index/art/leaf.cpp +47 -33
- package/src/duckdb/src/execution/index/art/node.cpp +31 -24
- package/src/duckdb/src/execution/index/art/prefix.cpp +100 -16
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +54 -31
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +32 -15
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
- package/src/duckdb/src/function/table/arrow.cpp +95 -92
- package/src/duckdb/src/function/table/arrow_conversion.cpp +45 -68
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +7 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +6 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +9 -11
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +8 -1
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +15 -14
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
- package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +17 -1
- package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -3
- package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +20 -5
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +1 -1
- package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
- package/src/duckdb/src/include/duckdb.h +16 -0
- package/src/duckdb/src/main/capi/pending-c.cpp +6 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +52 -4
- package/src/duckdb/src/main/client_context.cpp +27 -17
- package/src/duckdb/src/main/client_verify.cpp +17 -0
- package/src/duckdb/src/main/prepared_statement.cpp +38 -11
- package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
- package/src/duckdb/src/parallel/executor.cpp +3 -0
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -7
- package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
- package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
- package/src/duckdb/src/parser/transformer.cpp +27 -9
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +13 -13
- package/src/duckdb/src/planner/planner.cpp +7 -6
- package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +2 -2
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +9652 -9482
- package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
@@ -9,41 +9,37 @@
|
|
9
9
|
#include "duckdb/function/table/arrow.hpp"
|
10
10
|
#include "duckdb/function/table_function.hpp"
|
11
11
|
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
|
12
|
+
#include "duckdb/function/table/arrow/arrow_duck_schema.hpp"
|
12
13
|
#include "utf8proc_wrapper.hpp"
|
13
14
|
|
14
15
|
namespace duckdb {
|
15
16
|
|
16
|
-
|
17
|
-
ArrowSchema &schema, std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx) {
|
17
|
+
unique_ptr<ArrowType> ArrowTableFunction::GetArrowLogicalType(ArrowSchema &schema) {
|
18
18
|
auto format = string(schema.format);
|
19
|
-
if (arrow_convert_data.find(col_idx) == arrow_convert_data.end()) {
|
20
|
-
arrow_convert_data[col_idx] = make_uniq<ArrowConvertData>();
|
21
|
-
}
|
22
|
-
auto &convert_data = *arrow_convert_data[col_idx];
|
23
19
|
if (format == "n") {
|
24
|
-
return LogicalType::SQLNULL;
|
20
|
+
return make_uniq<ArrowType>(LogicalType::SQLNULL);
|
25
21
|
} else if (format == "b") {
|
26
|
-
return LogicalType::BOOLEAN;
|
22
|
+
return make_uniq<ArrowType>(LogicalType::BOOLEAN);
|
27
23
|
} else if (format == "c") {
|
28
|
-
return LogicalType::TINYINT;
|
24
|
+
return make_uniq<ArrowType>(LogicalType::TINYINT);
|
29
25
|
} else if (format == "s") {
|
30
|
-
return LogicalType::SMALLINT;
|
26
|
+
return make_uniq<ArrowType>(LogicalType::SMALLINT);
|
31
27
|
} else if (format == "i") {
|
32
|
-
return LogicalType::INTEGER;
|
28
|
+
return make_uniq<ArrowType>(LogicalType::INTEGER);
|
33
29
|
} else if (format == "l") {
|
34
|
-
return LogicalType::BIGINT;
|
30
|
+
return make_uniq<ArrowType>(LogicalType::BIGINT);
|
35
31
|
} else if (format == "C") {
|
36
|
-
return LogicalType::UTINYINT;
|
32
|
+
return make_uniq<ArrowType>(LogicalType::UTINYINT);
|
37
33
|
} else if (format == "S") {
|
38
|
-
return LogicalType::USMALLINT;
|
34
|
+
return make_uniq<ArrowType>(LogicalType::USMALLINT);
|
39
35
|
} else if (format == "I") {
|
40
|
-
return LogicalType::UINTEGER;
|
36
|
+
return make_uniq<ArrowType>(LogicalType::UINTEGER);
|
41
37
|
} else if (format == "L") {
|
42
|
-
return LogicalType::UBIGINT;
|
38
|
+
return make_uniq<ArrowType>(LogicalType::UBIGINT);
|
43
39
|
} else if (format == "f") {
|
44
|
-
return LogicalType::FLOAT;
|
40
|
+
return make_uniq<ArrowType>(LogicalType::FLOAT);
|
45
41
|
} else if (format == "g") {
|
46
|
-
return LogicalType::DOUBLE;
|
42
|
+
return make_uniq<ArrowType>(LogicalType::DOUBLE);
|
47
43
|
} else if (format[0] == 'd') { //! this can be either decimal128 or decimal 256 (e.g., d:38,0)
|
48
44
|
std::string parameters = format.substr(format.find(':'));
|
49
45
|
uint8_t width = std::stoi(parameters.substr(1, parameters.find(',')));
|
@@ -51,82 +47,74 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
|
|
51
47
|
if (width > 38) {
|
52
48
|
throw NotImplementedException("Unsupported Internal Arrow Type for Decimal %s", format);
|
53
49
|
}
|
54
|
-
return LogicalType::DECIMAL(width, scale);
|
50
|
+
return make_uniq<ArrowType>(LogicalType::DECIMAL(width, scale));
|
55
51
|
} else if (format == "u") {
|
56
|
-
|
57
|
-
return LogicalType::VARCHAR;
|
52
|
+
return make_uniq<ArrowType>(LogicalType::VARCHAR, ArrowVariableSizeType::NORMAL);
|
58
53
|
} else if (format == "U") {
|
59
|
-
|
60
|
-
return LogicalType::VARCHAR;
|
54
|
+
return make_uniq<ArrowType>(LogicalType::VARCHAR, ArrowVariableSizeType::SUPER_SIZE);
|
61
55
|
} else if (format == "tsn:") {
|
62
|
-
return LogicalTypeId::TIMESTAMP_NS;
|
56
|
+
return make_uniq<ArrowType>(LogicalTypeId::TIMESTAMP_NS);
|
63
57
|
} else if (format == "tsu:") {
|
64
|
-
return LogicalTypeId::TIMESTAMP;
|
58
|
+
return make_uniq<ArrowType>(LogicalTypeId::TIMESTAMP);
|
65
59
|
} else if (format == "tsm:") {
|
66
|
-
return LogicalTypeId::TIMESTAMP_MS;
|
60
|
+
return make_uniq<ArrowType>(LogicalTypeId::TIMESTAMP_MS);
|
67
61
|
} else if (format == "tss:") {
|
68
|
-
return LogicalTypeId::TIMESTAMP_SEC;
|
62
|
+
return make_uniq<ArrowType>(LogicalTypeId::TIMESTAMP_SEC);
|
69
63
|
} else if (format == "tdD") {
|
70
|
-
|
71
|
-
return LogicalType::DATE;
|
64
|
+
return make_uniq<ArrowType>(LogicalType::DATE, ArrowDateTimeType::DAYS);
|
72
65
|
} else if (format == "tdm") {
|
73
|
-
|
74
|
-
return LogicalType::DATE;
|
66
|
+
return make_uniq<ArrowType>(LogicalType::DATE, ArrowDateTimeType::MILLISECONDS);
|
75
67
|
} else if (format == "tts") {
|
76
|
-
|
77
|
-
return LogicalType::TIME;
|
68
|
+
return make_uniq<ArrowType>(LogicalType::TIME, ArrowDateTimeType::SECONDS);
|
78
69
|
} else if (format == "ttm") {
|
79
|
-
|
80
|
-
return LogicalType::TIME;
|
70
|
+
return make_uniq<ArrowType>(LogicalType::TIME, ArrowDateTimeType::MILLISECONDS);
|
81
71
|
} else if (format == "ttu") {
|
82
|
-
|
83
|
-
return LogicalType::TIME;
|
72
|
+
return make_uniq<ArrowType>(LogicalType::TIME, ArrowDateTimeType::MICROSECONDS);
|
84
73
|
} else if (format == "ttn") {
|
85
|
-
|
86
|
-
return LogicalType::TIME;
|
74
|
+
return make_uniq<ArrowType>(LogicalType::TIME, ArrowDateTimeType::NANOSECONDS);
|
87
75
|
} else if (format == "tDs") {
|
88
|
-
|
89
|
-
return LogicalType::INTERVAL;
|
76
|
+
return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::SECONDS);
|
90
77
|
} else if (format == "tDm") {
|
91
|
-
|
92
|
-
return LogicalType::INTERVAL;
|
78
|
+
return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::MILLISECONDS);
|
93
79
|
} else if (format == "tDu") {
|
94
|
-
|
95
|
-
return LogicalType::INTERVAL;
|
80
|
+
return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::MICROSECONDS);
|
96
81
|
} else if (format == "tDn") {
|
97
|
-
|
98
|
-
return LogicalType::INTERVAL;
|
82
|
+
return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::NANOSECONDS);
|
99
83
|
} else if (format == "tiD") {
|
100
|
-
|
101
|
-
return LogicalType::INTERVAL;
|
84
|
+
return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::DAYS);
|
102
85
|
} else if (format == "tiM") {
|
103
|
-
|
104
|
-
return LogicalType::INTERVAL;
|
86
|
+
return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::MONTHS);
|
105
87
|
} else if (format == "tin") {
|
106
|
-
|
107
|
-
return LogicalType::INTERVAL;
|
88
|
+
return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::MONTH_DAY_NANO);
|
108
89
|
} else if (format == "+l") {
|
109
|
-
|
110
|
-
auto
|
111
|
-
|
90
|
+
auto child_type = GetArrowLogicalType(*schema.children[0]);
|
91
|
+
auto list_type =
|
92
|
+
make_uniq<ArrowType>(LogicalType::LIST(child_type->GetDuckType()), ArrowVariableSizeType::NORMAL);
|
93
|
+
list_type->AddChild(std::move(child_type));
|
94
|
+
return list_type;
|
112
95
|
} else if (format == "+L") {
|
113
|
-
|
114
|
-
auto
|
115
|
-
|
96
|
+
auto child_type = GetArrowLogicalType(*schema.children[0]);
|
97
|
+
auto list_type =
|
98
|
+
make_uniq<ArrowType>(LogicalType::LIST(child_type->GetDuckType()), ArrowVariableSizeType::SUPER_SIZE);
|
99
|
+
list_type->AddChild(std::move(child_type));
|
100
|
+
return list_type;
|
116
101
|
} else if (format[0] == '+' && format[1] == 'w') {
|
117
102
|
std::string parameters = format.substr(format.find(':') + 1);
|
118
103
|
idx_t fixed_size = std::stoi(parameters);
|
119
|
-
|
120
|
-
auto
|
121
|
-
|
104
|
+
auto child_type = GetArrowLogicalType(*schema.children[0]);
|
105
|
+
auto list_type = make_uniq<ArrowType>(LogicalType::LIST(child_type->GetDuckType()), fixed_size);
|
106
|
+
list_type->AddChild(std::move(child_type));
|
107
|
+
return list_type;
|
122
108
|
} else if (format == "+s") {
|
123
109
|
child_list_t<LogicalType> child_types;
|
110
|
+
vector<unique_ptr<ArrowType>> children;
|
124
111
|
for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) {
|
125
|
-
|
126
|
-
child_types.emplace_back(schema.children[type_idx]->name,
|
112
|
+
children.emplace_back(GetArrowLogicalType(*schema.children[type_idx]));
|
113
|
+
child_types.emplace_back(schema.children[type_idx]->name, children.back()->GetDuckType());
|
127
114
|
}
|
128
|
-
|
129
|
-
|
115
|
+
auto struct_type = make_uniq<ArrowType>(LogicalType::STRUCT(std::move(child_types)));
|
116
|
+
struct_type->AssignChildren(std::move(children));
|
117
|
+
return struct_type;
|
130
118
|
} else if (format[0] == '+' && format[1] == 'u') {
|
131
119
|
if (format[2] != 's') {
|
132
120
|
throw NotImplementedException("Unsupported Internal Arrow Type: \"%c\" Union", format[2]);
|
@@ -138,48 +126,59 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
|
|
138
126
|
auto type_ids = StringUtil::Split(format.substr(prefix.size()), ',');
|
139
127
|
|
140
128
|
child_list_t<LogicalType> members;
|
129
|
+
vector<unique_ptr<ArrowType>> children;
|
141
130
|
for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) {
|
142
131
|
auto type = schema.children[type_idx];
|
143
132
|
|
144
|
-
|
133
|
+
children.emplace_back(GetArrowLogicalType(*type));
|
134
|
+
members.emplace_back(type->name, children.back()->GetDuckType());
|
145
135
|
}
|
146
136
|
|
147
|
-
|
148
|
-
|
137
|
+
auto union_type = make_uniq<ArrowType>(LogicalType::UNION(members));
|
138
|
+
union_type->AssignChildren(std::move(children));
|
139
|
+
return union_type;
|
149
140
|
} else if (format == "+m") {
|
150
|
-
convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
|
151
|
-
|
152
141
|
auto &arrow_struct_type = *schema.children[0];
|
153
142
|
D_ASSERT(arrow_struct_type.n_children == 2);
|
154
|
-
auto key_type = GetArrowLogicalType(*arrow_struct_type.children[0]
|
155
|
-
auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1]
|
156
|
-
|
143
|
+
auto key_type = GetArrowLogicalType(*arrow_struct_type.children[0]);
|
144
|
+
auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1]);
|
145
|
+
auto map_type = make_uniq<ArrowType>(LogicalType::MAP(key_type->GetDuckType(), value_type->GetDuckType()),
|
146
|
+
ArrowVariableSizeType::NORMAL);
|
147
|
+
child_list_t<LogicalType> key_value;
|
148
|
+
key_value.emplace_back(std::make_pair("key", key_type->GetDuckType()));
|
149
|
+
key_value.emplace_back(std::make_pair("value", value_type->GetDuckType()));
|
150
|
+
|
151
|
+
auto inner_struct =
|
152
|
+
make_uniq<ArrowType>(LogicalType::STRUCT(std::move(key_value)), ArrowVariableSizeType::NORMAL);
|
153
|
+
vector<unique_ptr<ArrowType>> children;
|
154
|
+
children.reserve(2);
|
155
|
+
children.push_back(std::move(key_type));
|
156
|
+
children.push_back(std::move(value_type));
|
157
|
+
inner_struct->AssignChildren(std::move(children));
|
158
|
+
map_type->AddChild(std::move(inner_struct));
|
159
|
+
return map_type;
|
157
160
|
} else if (format == "z") {
|
158
|
-
|
159
|
-
return LogicalType::BLOB;
|
161
|
+
return make_uniq<ArrowType>(LogicalType::BLOB, ArrowVariableSizeType::NORMAL);
|
160
162
|
} else if (format == "Z") {
|
161
|
-
|
162
|
-
return LogicalType::BLOB;
|
163
|
+
return make_uniq<ArrowType>(LogicalType::BLOB, ArrowVariableSizeType::SUPER_SIZE);
|
163
164
|
} else if (format[0] == 'w') {
|
164
165
|
std::string parameters = format.substr(format.find(':') + 1);
|
165
166
|
idx_t fixed_size = std::stoi(parameters);
|
166
|
-
|
167
|
-
return LogicalType::BLOB;
|
167
|
+
return make_uniq<ArrowType>(LogicalType::BLOB, fixed_size);
|
168
168
|
} else if (format[0] == 't' && format[1] == 's') {
|
169
169
|
// Timestamp with Timezone
|
170
|
+
// TODO right now we just get the UTC value. We probably want to support this properly in the future
|
170
171
|
if (format[2] == 'n') {
|
171
|
-
|
172
|
+
return make_uniq<ArrowType>(LogicalType::TIMESTAMP_TZ, ArrowDateTimeType::NANOSECONDS);
|
172
173
|
} else if (format[2] == 'u') {
|
173
|
-
|
174
|
+
return make_uniq<ArrowType>(LogicalType::TIMESTAMP_TZ, ArrowDateTimeType::MICROSECONDS);
|
174
175
|
} else if (format[2] == 'm') {
|
175
|
-
|
176
|
+
return make_uniq<ArrowType>(LogicalType::TIMESTAMP_TZ, ArrowDateTimeType::MILLISECONDS);
|
176
177
|
} else if (format[2] == 's') {
|
177
|
-
|
178
|
+
return make_uniq<ArrowType>(LogicalType::TIMESTAMP_TZ, ArrowDateTimeType::SECONDS);
|
178
179
|
} else {
|
179
180
|
throw NotImplementedException(" Timestamptz precision of not accepted");
|
180
181
|
}
|
181
|
-
// TODO right now we just get the UTC value. We probably want to support this properly in the future
|
182
|
-
return LogicalType::TIMESTAMP_TZ;
|
183
182
|
} else {
|
184
183
|
throw NotImplementedException("Unsupported Internal Arrow Type %s", format);
|
185
184
|
}
|
@@ -224,13 +223,17 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
|
|
224
223
|
if (!schema.release) {
|
225
224
|
throw InvalidInputException("arrow_scan: released schema passed");
|
226
225
|
}
|
226
|
+
auto arrow_type = GetArrowLogicalType(schema);
|
227
227
|
if (schema.dictionary) {
|
228
|
-
auto logical_type =
|
229
|
-
|
230
|
-
return_types.emplace_back(
|
228
|
+
auto logical_type = arrow_type->GetDuckType();
|
229
|
+
auto dictionary = GetArrowLogicalType(*schema.dictionary);
|
230
|
+
return_types.emplace_back(dictionary->GetDuckType());
|
231
|
+
// The dictionary might have different attributes (size type, datetime precision, etc..)
|
232
|
+
arrow_type->SetDictionary(std::move(dictionary));
|
231
233
|
} else {
|
232
|
-
return_types.emplace_back(
|
234
|
+
return_types.emplace_back(arrow_type->GetDuckType());
|
233
235
|
}
|
236
|
+
res->arrow_table.AddColumn(col_idx, std::move(arrow_type));
|
234
237
|
auto format = string(schema.format);
|
235
238
|
auto name = string(schema.name);
|
236
239
|
if (name.empty()) {
|
@@ -348,11 +351,11 @@ void ArrowTableFunction::ArrowScanFunction(ClientContext &context, TableFunction
|
|
348
351
|
if (global_state.CanRemoveFilterColumns()) {
|
349
352
|
state.all_columns.Reset();
|
350
353
|
state.all_columns.SetCardinality(output_size);
|
351
|
-
ArrowToDuckDB(state, data.
|
354
|
+
ArrowToDuckDB(state, data.arrow_table.GetColumns(), state.all_columns, data.lines_read - output_size);
|
352
355
|
output.ReferenceColumns(state.all_columns, global_state.projection_ids);
|
353
356
|
} else {
|
354
357
|
output.SetCardinality(output_size);
|
355
|
-
ArrowToDuckDB(state, data.
|
358
|
+
ArrowToDuckDB(state, data.arrow_table.GetColumns(), output, data.lines_read - output_size);
|
356
359
|
}
|
357
360
|
|
358
361
|
output.Verify();
|
@@ -5,16 +5,6 @@
|
|
5
5
|
#include "duckdb/common/types/arrow_aux_data.hpp"
|
6
6
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
7
7
|
|
8
|
-
namespace {
|
9
|
-
using duckdb::idx_t;
|
10
|
-
struct ArrowConvertDataIndices {
|
11
|
-
//! The index that refers to 'variable_sz_type' in ArrowConvertData
|
12
|
-
idx_t variable_sized_index;
|
13
|
-
//! The index that refers to 'date_time_precision' in ArrowConvertData
|
14
|
-
idx_t datetime_precision_index;
|
15
|
-
};
|
16
|
-
} // namespace
|
17
|
-
|
18
8
|
namespace duckdb {
|
19
9
|
|
20
10
|
static void ShiftRight(unsigned char *ar, int size, int shift) {
|
@@ -91,35 +81,33 @@ static void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanLocalSta
|
|
91
81
|
}
|
92
82
|
|
93
83
|
static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
94
|
-
|
95
|
-
idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset = -1,
|
84
|
+
const ArrowType &arrow_type, int64_t nested_offset = -1,
|
96
85
|
ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0);
|
97
86
|
|
98
87
|
static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
99
|
-
|
100
|
-
|
101
|
-
ValidityMask *parent_mask) {
|
102
|
-
auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
|
88
|
+
const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask) {
|
89
|
+
auto size_type = arrow_type.GetSizeType();
|
103
90
|
idx_t list_size = 0;
|
104
91
|
SetValidityMask(vector, array, scan_state, size, nested_offset);
|
105
92
|
idx_t start_offset = 0;
|
106
93
|
idx_t cur_offset = 0;
|
107
|
-
if (
|
94
|
+
if (size_type == ArrowVariableSizeType::FIXED_SIZE) {
|
95
|
+
auto fixed_size = arrow_type.FixedSize();
|
108
96
|
//! Have to check validity mask before setting this up
|
109
|
-
idx_t offset = (scan_state.chunk_offset + array.offset) *
|
97
|
+
idx_t offset = (scan_state.chunk_offset + array.offset) * fixed_size;
|
110
98
|
if (nested_offset != -1) {
|
111
|
-
offset =
|
99
|
+
offset = fixed_size * nested_offset;
|
112
100
|
}
|
113
101
|
start_offset = offset;
|
114
102
|
auto list_data = FlatVector::GetData<list_entry_t>(vector);
|
115
103
|
for (idx_t i = 0; i < size; i++) {
|
116
104
|
auto &le = list_data[i];
|
117
105
|
le.offset = cur_offset;
|
118
|
-
le.length =
|
119
|
-
cur_offset +=
|
106
|
+
le.length = fixed_size;
|
107
|
+
cur_offset += fixed_size;
|
120
108
|
}
|
121
109
|
list_size = start_offset + cur_offset;
|
122
|
-
} else if (
|
110
|
+
} else if (size_type == ArrowVariableSizeType::NORMAL) {
|
123
111
|
auto offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + scan_state.chunk_offset;
|
124
112
|
if (nested_offset != -1) {
|
125
113
|
offsets = ArrowBufferData<uint32_t>(array, 1) + nested_offset;
|
@@ -165,24 +153,22 @@ static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalS
|
|
165
153
|
}
|
166
154
|
}
|
167
155
|
if (list_size == 0 && start_offset == 0) {
|
168
|
-
ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size,
|
169
|
-
arrow_convert_idx, -1);
|
156
|
+
ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size, arrow_type[0], -1);
|
170
157
|
} else {
|
171
|
-
ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size,
|
172
|
-
arrow_convert_idx, start_offset);
|
158
|
+
ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size, arrow_type[0], start_offset);
|
173
159
|
}
|
174
160
|
}
|
175
161
|
|
176
162
|
static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
177
|
-
|
178
|
-
|
179
|
-
auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
|
163
|
+
const ArrowType &arrow_type, int64_t nested_offset) {
|
164
|
+
auto size_type = arrow_type.GetSizeType();
|
180
165
|
SetValidityMask(vector, array, scan_state, size, nested_offset);
|
181
|
-
if (
|
166
|
+
if (size_type == ArrowVariableSizeType::FIXED_SIZE) {
|
167
|
+
auto fixed_size = arrow_type.FixedSize();
|
182
168
|
//! Have to check validity mask before setting this up
|
183
|
-
idx_t offset = (scan_state.chunk_offset + array.offset) *
|
169
|
+
idx_t offset = (scan_state.chunk_offset + array.offset) * fixed_size;
|
184
170
|
if (nested_offset != -1) {
|
185
|
-
offset =
|
171
|
+
offset = fixed_size * nested_offset;
|
186
172
|
}
|
187
173
|
auto cdata = ArrowBufferData<char>(array, 1);
|
188
174
|
for (idx_t row_idx = 0; row_idx < size; row_idx++) {
|
@@ -190,11 +176,11 @@ static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalS
|
|
190
176
|
continue;
|
191
177
|
}
|
192
178
|
auto bptr = cdata + offset;
|
193
|
-
auto blob_len =
|
179
|
+
auto blob_len = fixed_size;
|
194
180
|
FlatVector::GetData<string_t>(vector)[row_idx] = StringVector::AddStringOrBlob(vector, bptr, blob_len);
|
195
181
|
offset += blob_len;
|
196
182
|
}
|
197
|
-
} else if (
|
183
|
+
} else if (size_type == ArrowVariableSizeType::NORMAL) {
|
198
184
|
auto offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + scan_state.chunk_offset;
|
199
185
|
if (nested_offset != -1) {
|
200
186
|
offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + nested_offset;
|
@@ -358,9 +344,8 @@ static void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, A
|
|
358
344
|
}
|
359
345
|
|
360
346
|
static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
361
|
-
|
362
|
-
|
363
|
-
ValidityMask *parent_mask, uint64_t parent_offset) {
|
347
|
+
const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask,
|
348
|
+
uint64_t parent_offset) {
|
364
349
|
switch (vector.GetType().id()) {
|
365
350
|
case LogicalTypeId::SQLNULL:
|
366
351
|
vector.Reference(Value());
|
@@ -412,9 +397,9 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
|
|
412
397
|
break;
|
413
398
|
}
|
414
399
|
case LogicalTypeId::VARCHAR: {
|
415
|
-
auto
|
400
|
+
auto size_type = arrow_type.GetSizeType();
|
416
401
|
auto cdata = ArrowBufferData<char>(array, 2);
|
417
|
-
if (
|
402
|
+
if (size_type == ArrowVariableSizeType::SUPER_SIZE) {
|
418
403
|
auto offsets = ArrowBufferData<uint64_t>(array, 1) + array.offset + scan_state.chunk_offset;
|
419
404
|
if (nested_offset != -1) {
|
420
405
|
offsets = ArrowBufferData<uint64_t>(array, 1) + array.offset + nested_offset;
|
@@ -430,7 +415,8 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
|
|
430
415
|
break;
|
431
416
|
}
|
432
417
|
case LogicalTypeId::DATE: {
|
433
|
-
|
418
|
+
|
419
|
+
auto precision = arrow_type.GetDateTimeType();
|
434
420
|
switch (precision) {
|
435
421
|
case ArrowDateTimeType::DAYS: {
|
436
422
|
DirectConversion(vector, array, scan_state, nested_offset, parent_offset);
|
@@ -454,7 +440,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
|
|
454
440
|
break;
|
455
441
|
}
|
456
442
|
case LogicalTypeId::TIME: {
|
457
|
-
auto precision =
|
443
|
+
auto precision = arrow_type.GetDateTimeType();
|
458
444
|
switch (precision) {
|
459
445
|
case ArrowDateTimeType::SECONDS: {
|
460
446
|
TimeConversion<int32_t>(vector, array, scan_state, nested_offset, size, 1000000);
|
@@ -485,7 +471,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
|
|
485
471
|
break;
|
486
472
|
}
|
487
473
|
case LogicalTypeId::TIMESTAMP_TZ: {
|
488
|
-
auto precision =
|
474
|
+
auto precision = arrow_type.GetDateTimeType();
|
489
475
|
switch (precision) {
|
490
476
|
case ArrowDateTimeType::SECONDS: {
|
491
477
|
TimestampTZConversion(vector, array, scan_state, nested_offset, size, 1000000);
|
@@ -516,7 +502,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
|
|
516
502
|
break;
|
517
503
|
}
|
518
504
|
case LogicalTypeId::INTERVAL: {
|
519
|
-
auto precision =
|
505
|
+
auto precision = arrow_type.GetDateTimeType();
|
520
506
|
switch (precision) {
|
521
507
|
case ArrowDateTimeType::SECONDS: {
|
522
508
|
IntervalConversionUs(vector, array, scan_state, nested_offset, size, 1000000);
|
@@ -611,18 +597,15 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
|
|
611
597
|
break;
|
612
598
|
}
|
613
599
|
case LogicalTypeId::BLOB: {
|
614
|
-
ArrowToDuckDBBlob(vector, array, scan_state, size,
|
615
|
-
nested_offset);
|
600
|
+
ArrowToDuckDBBlob(vector, array, scan_state, size, arrow_type, nested_offset);
|
616
601
|
break;
|
617
602
|
}
|
618
603
|
case LogicalTypeId::LIST: {
|
619
|
-
ArrowToDuckDBList(vector, array, scan_state, size,
|
620
|
-
nested_offset, parent_mask);
|
604
|
+
ArrowToDuckDBList(vector, array, scan_state, size, arrow_type, nested_offset, parent_mask);
|
621
605
|
break;
|
622
606
|
}
|
623
607
|
case LogicalTypeId::MAP: {
|
624
|
-
ArrowToDuckDBList(vector, array, scan_state, size,
|
625
|
-
nested_offset, parent_mask);
|
608
|
+
ArrowToDuckDBList(vector, array, scan_state, size, arrow_type, nested_offset, parent_mask);
|
626
609
|
ArrowToDuckDBMapVerify(vector, size);
|
627
610
|
break;
|
628
611
|
}
|
@@ -630,7 +613,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
|
|
630
613
|
//! Fill the children
|
631
614
|
auto &child_entries = StructVector::GetEntries(vector);
|
632
615
|
auto &struct_validity_mask = FlatVector::Validity(vector);
|
633
|
-
for (idx_t type_idx = 0; type_idx < (
|
616
|
+
for (idx_t type_idx = 0; type_idx < static_cast<idx_t>(array.n_children); type_idx++) {
|
634
617
|
SetValidityMask(*child_entries[type_idx], *array.children[type_idx], scan_state, size, nested_offset);
|
635
618
|
if (!struct_validity_mask.AllValid()) {
|
636
619
|
auto &child_validity_mark = FlatVector::Validity(*child_entries[type_idx]);
|
@@ -641,8 +624,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
|
|
641
624
|
}
|
642
625
|
}
|
643
626
|
ColumnArrowToDuckDB(*child_entries[type_idx], *array.children[type_idx], scan_state, size,
|
644
|
-
|
645
|
-
array.offset);
|
627
|
+
arrow_type[type_idx], nested_offset, &struct_validity_mask, array.offset);
|
646
628
|
}
|
647
629
|
break;
|
648
630
|
}
|
@@ -654,14 +636,13 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
|
|
654
636
|
auto &validity_mask = FlatVector::Validity(vector);
|
655
637
|
|
656
638
|
duckdb::vector<Vector> children;
|
657
|
-
for (idx_t type_idx = 0; type_idx < (
|
639
|
+
for (idx_t type_idx = 0; type_idx < static_cast<idx_t>(array.n_children); type_idx++) {
|
658
640
|
Vector child(members[type_idx].second);
|
659
641
|
auto arrow_array = array.children[type_idx];
|
660
642
|
|
661
643
|
SetValidityMask(child, *arrow_array, scan_state, size, nested_offset);
|
662
644
|
|
663
|
-
ColumnArrowToDuckDB(child, *arrow_array, scan_state, size,
|
664
|
-
nested_offset, &validity_mask);
|
645
|
+
ColumnArrowToDuckDB(child, *arrow_array, scan_state, size, arrow_type, nested_offset, &validity_mask);
|
665
646
|
|
666
647
|
children.push_back(std::move(child));
|
667
648
|
}
|
@@ -809,20 +790,18 @@ static void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, Logic
|
|
809
790
|
}
|
810
791
|
|
811
792
|
static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
|
812
|
-
idx_t size,
|
813
|
-
std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
|
814
|
-
idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx) {
|
793
|
+
idx_t size, const ArrowType &arrow_type, idx_t col_idx) {
|
815
794
|
SelectionVector sel;
|
816
795
|
auto &dict_vectors = scan_state.arrow_dictionary_vectors;
|
817
796
|
if (!dict_vectors.count(col_idx)) {
|
818
797
|
//! We need to set the dictionary data for this column
|
819
798
|
auto base_vector = make_uniq<Vector>(vector.GetType(), array.dictionary->length);
|
820
799
|
SetValidityMask(*base_vector, *array.dictionary, scan_state, array.dictionary->length, 0, array.null_count > 0);
|
821
|
-
ColumnArrowToDuckDB(*base_vector, *array.dictionary, scan_state, array.dictionary->length,
|
822
|
-
|
800
|
+
ColumnArrowToDuckDB(*base_vector, *array.dictionary, scan_state, array.dictionary->length,
|
801
|
+
arrow_type.GetDictionary());
|
823
802
|
dict_vectors[col_idx] = std::move(base_vector);
|
824
803
|
}
|
825
|
-
auto dictionary_type =
|
804
|
+
auto dictionary_type = arrow_type.GetDuckType();
|
826
805
|
//! Get Pointer to Indices of Dictionary
|
827
806
|
auto indices = ArrowBufferData<data_t>(array, 1) +
|
828
807
|
GetTypeIdSize(dictionary_type.InternalType()) * (scan_state.chunk_offset + array.offset);
|
@@ -836,8 +815,7 @@ static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, Arr
|
|
836
815
|
vector.Slice(*dict_vectors[col_idx], sel, size);
|
837
816
|
}
|
838
817
|
|
839
|
-
void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
|
840
|
-
unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
|
818
|
+
void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const arrow_column_map_t &arrow_convert_data,
|
841
819
|
DataChunk &output, idx_t start, bool arrow_scan_is_projected) {
|
842
820
|
for (idx_t idx = 0; idx < output.ColumnCount(); idx++) {
|
843
821
|
auto col_idx = scan_state.column_ids[idx];
|
@@ -851,7 +829,6 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
|
|
851
829
|
continue;
|
852
830
|
}
|
853
831
|
|
854
|
-
ArrowConvertDataIndices arrow_convert_idx {0, 0};
|
855
832
|
auto &array = *scan_state.chunk->arrow_array.children[arrow_array_idx];
|
856
833
|
if (!array.release) {
|
857
834
|
throw InvalidInputException("arrow_scan: released array passed");
|
@@ -861,13 +838,13 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
|
|
861
838
|
}
|
862
839
|
// Make sure this Vector keeps the Arrow chunk alive in case we can zero-copy the data
|
863
840
|
output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.chunk));
|
841
|
+
D_ASSERT(arrow_convert_data.find(col_idx) != arrow_convert_data.end());
|
842
|
+
auto &arrow_type = *arrow_convert_data.at(col_idx);
|
864
843
|
if (array.dictionary) {
|
865
|
-
ColumnArrowToDuckDBDictionary(output.data[idx], array, scan_state, output.size(),
|
866
|
-
col_idx, arrow_convert_idx);
|
844
|
+
ColumnArrowToDuckDBDictionary(output.data[idx], array, scan_state, output.size(), arrow_type, col_idx);
|
867
845
|
} else {
|
868
846
|
SetValidityMask(output.data[idx], array, scan_state, output.size(), -1);
|
869
|
-
ColumnArrowToDuckDB(output.data[idx], array, scan_state, output.size(),
|
870
|
-
arrow_convert_idx);
|
847
|
+
ColumnArrowToDuckDB(output.data[idx], array, scan_state, output.size(), arrow_type);
|
871
848
|
}
|
872
849
|
}
|
873
850
|
}
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev2673"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "fd683b9fb2"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -200,6 +200,8 @@ enum class PhysicalType : uint8_t;
|
|
200
200
|
|
201
201
|
enum class PragmaType : uint8_t;
|
202
202
|
|
203
|
+
enum class PreparedParamType : uint8_t;
|
204
|
+
|
203
205
|
enum class ProfilerPrintFormat : uint8_t;
|
204
206
|
|
205
207
|
enum class QueryNodeType : uint8_t;
|
@@ -533,6 +535,9 @@ const char* EnumUtil::ToChars<PhysicalType>(PhysicalType value);
|
|
533
535
|
template<>
|
534
536
|
const char* EnumUtil::ToChars<PragmaType>(PragmaType value);
|
535
537
|
|
538
|
+
template<>
|
539
|
+
const char* EnumUtil::ToChars<PreparedParamType>(PreparedParamType value);
|
540
|
+
|
536
541
|
template<>
|
537
542
|
const char* EnumUtil::ToChars<ProfilerPrintFormat>(ProfilerPrintFormat value);
|
538
543
|
|
@@ -906,6 +911,9 @@ PhysicalType EnumUtil::FromString<PhysicalType>(const char *value);
|
|
906
911
|
template<>
|
907
912
|
PragmaType EnumUtil::FromString<PragmaType>(const char *value);
|
908
913
|
|
914
|
+
template<>
|
915
|
+
PreparedParamType EnumUtil::FromString<PreparedParamType>(const char *value);
|
916
|
+
|
909
917
|
template<>
|
910
918
|
ProfilerPrintFormat EnumUtil::FromString<ProfilerPrintFormat>(const char *value);
|
911
919
|
|