duckdb 0.8.2-dev2399.0 → 0.8.2-dev2669.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-datepart.cpp +3 -3
  4. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  5. package/src/duckdb/src/catalog/default/default_functions.cpp +5 -0
  6. package/src/duckdb/src/common/enum_util.cpp +35 -1
  7. package/src/duckdb/src/common/http_state.cpp +78 -0
  8. package/src/duckdb/src/core_functions/function_list.cpp +2 -2
  9. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +314 -82
  10. package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
  11. package/src/duckdb/src/execution/index/art/art.cpp +43 -31
  12. package/src/duckdb/src/execution/index/art/leaf.cpp +47 -33
  13. package/src/duckdb/src/execution/index/art/node.cpp +31 -24
  14. package/src/duckdb/src/execution/index/art/prefix.cpp +100 -16
  15. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +54 -31
  16. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +32 -15
  17. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
  18. package/src/duckdb/src/function/table/arrow.cpp +95 -92
  19. package/src/duckdb/src/function/table/arrow_conversion.cpp +45 -68
  20. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  21. package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
  22. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  23. package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
  24. package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
  25. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -1
  26. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +4 -4
  27. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +7 -5
  28. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -6
  29. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +6 -0
  30. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +9 -11
  31. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +8 -1
  32. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
  33. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
  34. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +3 -1
  35. package/src/duckdb/src/include/duckdb/main/client_context.hpp +15 -14
  36. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
  37. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
  38. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +20 -3
  39. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +17 -1
  40. package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -3
  42. package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
  43. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +20 -5
  44. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +3 -3
  45. package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
  46. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb.h +16 -0
  49. package/src/duckdb/src/main/capi/pending-c.cpp +6 -0
  50. package/src/duckdb/src/main/capi/prepared-c.cpp +52 -4
  51. package/src/duckdb/src/main/client_context.cpp +27 -17
  52. package/src/duckdb/src/main/client_verify.cpp +17 -0
  53. package/src/duckdb/src/main/extension/extension_helper.cpp +2 -1
  54. package/src/duckdb/src/main/prepared_statement.cpp +38 -11
  55. package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
  56. package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -7
  57. package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
  58. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
  59. package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
  60. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
  61. package/src/duckdb/src/parser/transformer.cpp +27 -9
  62. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
  63. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
  64. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +13 -13
  65. package/src/duckdb/src/planner/planner.cpp +7 -6
  66. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
  67. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +3 -3
  68. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +2 -2
  69. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
  70. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
  71. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12855 -12282
  72. package/src/duckdb/ub_src_common.cpp +2 -0
  73. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
@@ -9,41 +9,37 @@
9
9
  #include "duckdb/function/table/arrow.hpp"
10
10
  #include "duckdb/function/table_function.hpp"
11
11
  #include "duckdb/parser/parsed_data/create_table_function_info.hpp"
12
+ #include "duckdb/function/table/arrow/arrow_duck_schema.hpp"
12
13
  #include "utf8proc_wrapper.hpp"
13
14
 
14
15
  namespace duckdb {
15
16
 
16
- LogicalType ArrowTableFunction::GetArrowLogicalType(
17
- ArrowSchema &schema, std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx) {
17
+ unique_ptr<ArrowType> ArrowTableFunction::GetArrowLogicalType(ArrowSchema &schema) {
18
18
  auto format = string(schema.format);
19
- if (arrow_convert_data.find(col_idx) == arrow_convert_data.end()) {
20
- arrow_convert_data[col_idx] = make_uniq<ArrowConvertData>();
21
- }
22
- auto &convert_data = *arrow_convert_data[col_idx];
23
19
  if (format == "n") {
24
- return LogicalType::SQLNULL;
20
+ return make_uniq<ArrowType>(LogicalType::SQLNULL);
25
21
  } else if (format == "b") {
26
- return LogicalType::BOOLEAN;
22
+ return make_uniq<ArrowType>(LogicalType::BOOLEAN);
27
23
  } else if (format == "c") {
28
- return LogicalType::TINYINT;
24
+ return make_uniq<ArrowType>(LogicalType::TINYINT);
29
25
  } else if (format == "s") {
30
- return LogicalType::SMALLINT;
26
+ return make_uniq<ArrowType>(LogicalType::SMALLINT);
31
27
  } else if (format == "i") {
32
- return LogicalType::INTEGER;
28
+ return make_uniq<ArrowType>(LogicalType::INTEGER);
33
29
  } else if (format == "l") {
34
- return LogicalType::BIGINT;
30
+ return make_uniq<ArrowType>(LogicalType::BIGINT);
35
31
  } else if (format == "C") {
36
- return LogicalType::UTINYINT;
32
+ return make_uniq<ArrowType>(LogicalType::UTINYINT);
37
33
  } else if (format == "S") {
38
- return LogicalType::USMALLINT;
34
+ return make_uniq<ArrowType>(LogicalType::USMALLINT);
39
35
  } else if (format == "I") {
40
- return LogicalType::UINTEGER;
36
+ return make_uniq<ArrowType>(LogicalType::UINTEGER);
41
37
  } else if (format == "L") {
42
- return LogicalType::UBIGINT;
38
+ return make_uniq<ArrowType>(LogicalType::UBIGINT);
43
39
  } else if (format == "f") {
44
- return LogicalType::FLOAT;
40
+ return make_uniq<ArrowType>(LogicalType::FLOAT);
45
41
  } else if (format == "g") {
46
- return LogicalType::DOUBLE;
42
+ return make_uniq<ArrowType>(LogicalType::DOUBLE);
47
43
  } else if (format[0] == 'd') { //! this can be either decimal128 or decimal 256 (e.g., d:38,0)
48
44
  std::string parameters = format.substr(format.find(':'));
49
45
  uint8_t width = std::stoi(parameters.substr(1, parameters.find(',')));
@@ -51,82 +47,74 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
51
47
  if (width > 38) {
52
48
  throw NotImplementedException("Unsupported Internal Arrow Type for Decimal %s", format);
53
49
  }
54
- return LogicalType::DECIMAL(width, scale);
50
+ return make_uniq<ArrowType>(LogicalType::DECIMAL(width, scale));
55
51
  } else if (format == "u") {
56
- convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
57
- return LogicalType::VARCHAR;
52
+ return make_uniq<ArrowType>(LogicalType::VARCHAR, ArrowVariableSizeType::NORMAL);
58
53
  } else if (format == "U") {
59
- convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
60
- return LogicalType::VARCHAR;
54
+ return make_uniq<ArrowType>(LogicalType::VARCHAR, ArrowVariableSizeType::SUPER_SIZE);
61
55
  } else if (format == "tsn:") {
62
- return LogicalTypeId::TIMESTAMP_NS;
56
+ return make_uniq<ArrowType>(LogicalTypeId::TIMESTAMP_NS);
63
57
  } else if (format == "tsu:") {
64
- return LogicalTypeId::TIMESTAMP;
58
+ return make_uniq<ArrowType>(LogicalTypeId::TIMESTAMP);
65
59
  } else if (format == "tsm:") {
66
- return LogicalTypeId::TIMESTAMP_MS;
60
+ return make_uniq<ArrowType>(LogicalTypeId::TIMESTAMP_MS);
67
61
  } else if (format == "tss:") {
68
- return LogicalTypeId::TIMESTAMP_SEC;
62
+ return make_uniq<ArrowType>(LogicalTypeId::TIMESTAMP_SEC);
69
63
  } else if (format == "tdD") {
70
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::DAYS);
71
- return LogicalType::DATE;
64
+ return make_uniq<ArrowType>(LogicalType::DATE, ArrowDateTimeType::DAYS);
72
65
  } else if (format == "tdm") {
73
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
74
- return LogicalType::DATE;
66
+ return make_uniq<ArrowType>(LogicalType::DATE, ArrowDateTimeType::MILLISECONDS);
75
67
  } else if (format == "tts") {
76
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
77
- return LogicalType::TIME;
68
+ return make_uniq<ArrowType>(LogicalType::TIME, ArrowDateTimeType::SECONDS);
78
69
  } else if (format == "ttm") {
79
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
80
- return LogicalType::TIME;
70
+ return make_uniq<ArrowType>(LogicalType::TIME, ArrowDateTimeType::MILLISECONDS);
81
71
  } else if (format == "ttu") {
82
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
83
- return LogicalType::TIME;
72
+ return make_uniq<ArrowType>(LogicalType::TIME, ArrowDateTimeType::MICROSECONDS);
84
73
  } else if (format == "ttn") {
85
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
86
- return LogicalType::TIME;
74
+ return make_uniq<ArrowType>(LogicalType::TIME, ArrowDateTimeType::NANOSECONDS);
87
75
  } else if (format == "tDs") {
88
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
89
- return LogicalType::INTERVAL;
76
+ return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::SECONDS);
90
77
  } else if (format == "tDm") {
91
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
92
- return LogicalType::INTERVAL;
78
+ return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::MILLISECONDS);
93
79
  } else if (format == "tDu") {
94
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
95
- return LogicalType::INTERVAL;
80
+ return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::MICROSECONDS);
96
81
  } else if (format == "tDn") {
97
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
98
- return LogicalType::INTERVAL;
82
+ return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::NANOSECONDS);
99
83
  } else if (format == "tiD") {
100
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::DAYS);
101
- return LogicalType::INTERVAL;
84
+ return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::DAYS);
102
85
  } else if (format == "tiM") {
103
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MONTHS);
104
- return LogicalType::INTERVAL;
86
+ return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::MONTHS);
105
87
  } else if (format == "tin") {
106
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MONTH_DAY_NANO);
107
- return LogicalType::INTERVAL;
88
+ return make_uniq<ArrowType>(LogicalType::INTERVAL, ArrowDateTimeType::MONTH_DAY_NANO);
108
89
  } else if (format == "+l") {
109
- convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
110
- auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx);
111
- return LogicalType::LIST(child_type);
90
+ auto child_type = GetArrowLogicalType(*schema.children[0]);
91
+ auto list_type =
92
+ make_uniq<ArrowType>(LogicalType::LIST(child_type->GetDuckType()), ArrowVariableSizeType::NORMAL);
93
+ list_type->AddChild(std::move(child_type));
94
+ return list_type;
112
95
  } else if (format == "+L") {
113
- convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
114
- auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx);
115
- return LogicalType::LIST(child_type);
96
+ auto child_type = GetArrowLogicalType(*schema.children[0]);
97
+ auto list_type =
98
+ make_uniq<ArrowType>(LogicalType::LIST(child_type->GetDuckType()), ArrowVariableSizeType::SUPER_SIZE);
99
+ list_type->AddChild(std::move(child_type));
100
+ return list_type;
116
101
  } else if (format[0] == '+' && format[1] == 'w') {
117
102
  std::string parameters = format.substr(format.find(':') + 1);
118
103
  idx_t fixed_size = std::stoi(parameters);
119
- convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::FIXED_SIZE, fixed_size);
120
- auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx);
121
- return LogicalType::LIST(child_type);
104
+ auto child_type = GetArrowLogicalType(*schema.children[0]);
105
+ auto list_type = make_uniq<ArrowType>(LogicalType::LIST(child_type->GetDuckType()), fixed_size);
106
+ list_type->AddChild(std::move(child_type));
107
+ return list_type;
122
108
  } else if (format == "+s") {
123
109
  child_list_t<LogicalType> child_types;
110
+ vector<unique_ptr<ArrowType>> children;
124
111
  for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) {
125
- auto child_type = GetArrowLogicalType(*schema.children[type_idx], arrow_convert_data, col_idx);
126
- child_types.emplace_back(schema.children[type_idx]->name, child_type);
112
+ children.emplace_back(GetArrowLogicalType(*schema.children[type_idx]));
113
+ child_types.emplace_back(schema.children[type_idx]->name, children.back()->GetDuckType());
127
114
  }
128
- return LogicalType::STRUCT(child_types);
129
-
115
+ auto struct_type = make_uniq<ArrowType>(LogicalType::STRUCT(std::move(child_types)));
116
+ struct_type->AssignChildren(std::move(children));
117
+ return struct_type;
130
118
  } else if (format[0] == '+' && format[1] == 'u') {
131
119
  if (format[2] != 's') {
132
120
  throw NotImplementedException("Unsupported Internal Arrow Type: \"%c\" Union", format[2]);
@@ -138,48 +126,59 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
138
126
  auto type_ids = StringUtil::Split(format.substr(prefix.size()), ',');
139
127
 
140
128
  child_list_t<LogicalType> members;
129
+ vector<unique_ptr<ArrowType>> children;
141
130
  for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) {
142
131
  auto type = schema.children[type_idx];
143
132
 
144
- members.emplace_back(type->name, GetArrowLogicalType(*type, arrow_convert_data, col_idx));
133
+ children.emplace_back(GetArrowLogicalType(*type));
134
+ members.emplace_back(type->name, children.back()->GetDuckType());
145
135
  }
146
136
 
147
- return LogicalType::UNION(members);
148
-
137
+ auto union_type = make_uniq<ArrowType>(LogicalType::UNION(members));
138
+ union_type->AssignChildren(std::move(children));
139
+ return union_type;
149
140
  } else if (format == "+m") {
150
- convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
151
-
152
141
  auto &arrow_struct_type = *schema.children[0];
153
142
  D_ASSERT(arrow_struct_type.n_children == 2);
154
- auto key_type = GetArrowLogicalType(*arrow_struct_type.children[0], arrow_convert_data, col_idx);
155
- auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1], arrow_convert_data, col_idx);
156
- return LogicalType::MAP(key_type, value_type);
143
+ auto key_type = GetArrowLogicalType(*arrow_struct_type.children[0]);
144
+ auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1]);
145
+ auto map_type = make_uniq<ArrowType>(LogicalType::MAP(key_type->GetDuckType(), value_type->GetDuckType()),
146
+ ArrowVariableSizeType::NORMAL);
147
+ child_list_t<LogicalType> key_value;
148
+ key_value.emplace_back(std::make_pair("key", key_type->GetDuckType()));
149
+ key_value.emplace_back(std::make_pair("value", value_type->GetDuckType()));
150
+
151
+ auto inner_struct =
152
+ make_uniq<ArrowType>(LogicalType::STRUCT(std::move(key_value)), ArrowVariableSizeType::NORMAL);
153
+ vector<unique_ptr<ArrowType>> children;
154
+ children.reserve(2);
155
+ children.push_back(std::move(key_type));
156
+ children.push_back(std::move(value_type));
157
+ inner_struct->AssignChildren(std::move(children));
158
+ map_type->AddChild(std::move(inner_struct));
159
+ return map_type;
157
160
  } else if (format == "z") {
158
- convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
159
- return LogicalType::BLOB;
161
+ return make_uniq<ArrowType>(LogicalType::BLOB, ArrowVariableSizeType::NORMAL);
160
162
  } else if (format == "Z") {
161
- convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::SUPER_SIZE, 0);
162
- return LogicalType::BLOB;
163
+ return make_uniq<ArrowType>(LogicalType::BLOB, ArrowVariableSizeType::SUPER_SIZE);
163
164
  } else if (format[0] == 'w') {
164
165
  std::string parameters = format.substr(format.find(':') + 1);
165
166
  idx_t fixed_size = std::stoi(parameters);
166
- convert_data.variable_sz_type.emplace_back(ArrowVariableSizeType::FIXED_SIZE, fixed_size);
167
- return LogicalType::BLOB;
167
+ return make_uniq<ArrowType>(LogicalType::BLOB, fixed_size);
168
168
  } else if (format[0] == 't' && format[1] == 's') {
169
169
  // Timestamp with Timezone
170
+ // TODO right now we just get the UTC value. We probably want to support this properly in the future
170
171
  if (format[2] == 'n') {
171
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::NANOSECONDS);
172
+ return make_uniq<ArrowType>(LogicalType::TIMESTAMP_TZ, ArrowDateTimeType::NANOSECONDS);
172
173
  } else if (format[2] == 'u') {
173
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MICROSECONDS);
174
+ return make_uniq<ArrowType>(LogicalType::TIMESTAMP_TZ, ArrowDateTimeType::MICROSECONDS);
174
175
  } else if (format[2] == 'm') {
175
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::MILLISECONDS);
176
+ return make_uniq<ArrowType>(LogicalType::TIMESTAMP_TZ, ArrowDateTimeType::MILLISECONDS);
176
177
  } else if (format[2] == 's') {
177
- convert_data.date_time_precision.emplace_back(ArrowDateTimeType::SECONDS);
178
+ return make_uniq<ArrowType>(LogicalType::TIMESTAMP_TZ, ArrowDateTimeType::SECONDS);
178
179
  } else {
179
180
  throw NotImplementedException(" Timestamptz precision of not accepted");
180
181
  }
181
- // TODO right now we just get the UTC value. We probably want to support this properly in the future
182
- return LogicalType::TIMESTAMP_TZ;
183
182
  } else {
184
183
  throw NotImplementedException("Unsupported Internal Arrow Type %s", format);
185
184
  }
@@ -224,13 +223,17 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
224
223
  if (!schema.release) {
225
224
  throw InvalidInputException("arrow_scan: released schema passed");
226
225
  }
226
+ auto arrow_type = GetArrowLogicalType(schema);
227
227
  if (schema.dictionary) {
228
- auto logical_type = GetArrowLogicalType(schema, res->arrow_convert_data, col_idx);
229
- res->arrow_convert_data[col_idx] = make_uniq<ArrowConvertData>(std::move(logical_type));
230
- return_types.emplace_back(GetArrowLogicalType(*schema.dictionary, res->arrow_convert_data, col_idx));
228
+ auto logical_type = arrow_type->GetDuckType();
229
+ auto dictionary = GetArrowLogicalType(*schema.dictionary);
230
+ return_types.emplace_back(dictionary->GetDuckType());
231
+ // The dictionary might have different attributes (size type, datetime precision, etc..)
232
+ arrow_type->SetDictionary(std::move(dictionary));
231
233
  } else {
232
- return_types.emplace_back(GetArrowLogicalType(schema, res->arrow_convert_data, col_idx));
234
+ return_types.emplace_back(arrow_type->GetDuckType());
233
235
  }
236
+ res->arrow_table.AddColumn(col_idx, std::move(arrow_type));
234
237
  auto format = string(schema.format);
235
238
  auto name = string(schema.name);
236
239
  if (name.empty()) {
@@ -348,11 +351,11 @@ void ArrowTableFunction::ArrowScanFunction(ClientContext &context, TableFunction
348
351
  if (global_state.CanRemoveFilterColumns()) {
349
352
  state.all_columns.Reset();
350
353
  state.all_columns.SetCardinality(output_size);
351
- ArrowToDuckDB(state, data.arrow_convert_data, state.all_columns, data.lines_read - output_size);
354
+ ArrowToDuckDB(state, data.arrow_table.GetColumns(), state.all_columns, data.lines_read - output_size);
352
355
  output.ReferenceColumns(state.all_columns, global_state.projection_ids);
353
356
  } else {
354
357
  output.SetCardinality(output_size);
355
- ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size);
358
+ ArrowToDuckDB(state, data.arrow_table.GetColumns(), output, data.lines_read - output_size);
356
359
  }
357
360
 
358
361
  output.Verify();
@@ -5,16 +5,6 @@
5
5
  #include "duckdb/common/types/arrow_aux_data.hpp"
6
6
  #include "duckdb/function/scalar/nested_functions.hpp"
7
7
 
8
- namespace {
9
- using duckdb::idx_t;
10
- struct ArrowConvertDataIndices {
11
- //! The index that refers to 'variable_sz_type' in ArrowConvertData
12
- idx_t variable_sized_index;
13
- //! The index that refers to 'date_time_precision' in ArrowConvertData
14
- idx_t datetime_precision_index;
15
- };
16
- } // namespace
17
-
18
8
  namespace duckdb {
19
9
 
20
10
  static void ShiftRight(unsigned char *ar, int size, int shift) {
@@ -91,35 +81,33 @@ static void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanLocalSta
91
81
  }
92
82
 
93
83
  static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
94
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
95
- idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset = -1,
84
+ const ArrowType &arrow_type, int64_t nested_offset = -1,
96
85
  ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0);
97
86
 
98
87
  static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
99
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
100
- idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset,
101
- ValidityMask *parent_mask) {
102
- auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
88
+ const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask) {
89
+ auto size_type = arrow_type.GetSizeType();
103
90
  idx_t list_size = 0;
104
91
  SetValidityMask(vector, array, scan_state, size, nested_offset);
105
92
  idx_t start_offset = 0;
106
93
  idx_t cur_offset = 0;
107
- if (original_type.first == ArrowVariableSizeType::FIXED_SIZE) {
94
+ if (size_type == ArrowVariableSizeType::FIXED_SIZE) {
95
+ auto fixed_size = arrow_type.FixedSize();
108
96
  //! Have to check validity mask before setting this up
109
- idx_t offset = (scan_state.chunk_offset + array.offset) * original_type.second;
97
+ idx_t offset = (scan_state.chunk_offset + array.offset) * fixed_size;
110
98
  if (nested_offset != -1) {
111
- offset = original_type.second * nested_offset;
99
+ offset = fixed_size * nested_offset;
112
100
  }
113
101
  start_offset = offset;
114
102
  auto list_data = FlatVector::GetData<list_entry_t>(vector);
115
103
  for (idx_t i = 0; i < size; i++) {
116
104
  auto &le = list_data[i];
117
105
  le.offset = cur_offset;
118
- le.length = original_type.second;
119
- cur_offset += original_type.second;
106
+ le.length = fixed_size;
107
+ cur_offset += fixed_size;
120
108
  }
121
109
  list_size = start_offset + cur_offset;
122
- } else if (original_type.first == ArrowVariableSizeType::NORMAL) {
110
+ } else if (size_type == ArrowVariableSizeType::NORMAL) {
123
111
  auto offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + scan_state.chunk_offset;
124
112
  if (nested_offset != -1) {
125
113
  offsets = ArrowBufferData<uint32_t>(array, 1) + nested_offset;
@@ -165,24 +153,22 @@ static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalS
165
153
  }
166
154
  }
167
155
  if (list_size == 0 && start_offset == 0) {
168
- ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size, arrow_convert_data, col_idx,
169
- arrow_convert_idx, -1);
156
+ ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size, arrow_type[0], -1);
170
157
  } else {
171
- ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size, arrow_convert_data, col_idx,
172
- arrow_convert_idx, start_offset);
158
+ ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size, arrow_type[0], start_offset);
173
159
  }
174
160
  }
175
161
 
176
162
  static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
177
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
178
- idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset) {
179
- auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
163
+ const ArrowType &arrow_type, int64_t nested_offset) {
164
+ auto size_type = arrow_type.GetSizeType();
180
165
  SetValidityMask(vector, array, scan_state, size, nested_offset);
181
- if (original_type.first == ArrowVariableSizeType::FIXED_SIZE) {
166
+ if (size_type == ArrowVariableSizeType::FIXED_SIZE) {
167
+ auto fixed_size = arrow_type.FixedSize();
182
168
  //! Have to check validity mask before setting this up
183
- idx_t offset = (scan_state.chunk_offset + array.offset) * original_type.second;
169
+ idx_t offset = (scan_state.chunk_offset + array.offset) * fixed_size;
184
170
  if (nested_offset != -1) {
185
- offset = original_type.second * nested_offset;
171
+ offset = fixed_size * nested_offset;
186
172
  }
187
173
  auto cdata = ArrowBufferData<char>(array, 1);
188
174
  for (idx_t row_idx = 0; row_idx < size; row_idx++) {
@@ -190,11 +176,11 @@ static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalS
190
176
  continue;
191
177
  }
192
178
  auto bptr = cdata + offset;
193
- auto blob_len = original_type.second;
179
+ auto blob_len = fixed_size;
194
180
  FlatVector::GetData<string_t>(vector)[row_idx] = StringVector::AddStringOrBlob(vector, bptr, blob_len);
195
181
  offset += blob_len;
196
182
  }
197
- } else if (original_type.first == ArrowVariableSizeType::NORMAL) {
183
+ } else if (size_type == ArrowVariableSizeType::NORMAL) {
198
184
  auto offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + scan_state.chunk_offset;
199
185
  if (nested_offset != -1) {
200
186
  offsets = ArrowBufferData<uint32_t>(array, 1) + array.offset + nested_offset;
@@ -358,9 +344,8 @@ static void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, A
358
344
  }
359
345
 
360
346
  static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
361
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
362
- idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx, int64_t nested_offset,
363
- ValidityMask *parent_mask, uint64_t parent_offset) {
347
+ const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask,
348
+ uint64_t parent_offset) {
364
349
  switch (vector.GetType().id()) {
365
350
  case LogicalTypeId::SQLNULL:
366
351
  vector.Reference(Value());
@@ -412,9 +397,9 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
412
397
  break;
413
398
  }
414
399
  case LogicalTypeId::VARCHAR: {
415
- auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
400
+ auto size_type = arrow_type.GetSizeType();
416
401
  auto cdata = ArrowBufferData<char>(array, 2);
417
- if (original_type.first == ArrowVariableSizeType::SUPER_SIZE) {
402
+ if (size_type == ArrowVariableSizeType::SUPER_SIZE) {
418
403
  auto offsets = ArrowBufferData<uint64_t>(array, 1) + array.offset + scan_state.chunk_offset;
419
404
  if (nested_offset != -1) {
420
405
  offsets = ArrowBufferData<uint64_t>(array, 1) + array.offset + nested_offset;
@@ -430,7 +415,8 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
430
415
  break;
431
416
  }
432
417
  case LogicalTypeId::DATE: {
433
- auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
418
+
419
+ auto precision = arrow_type.GetDateTimeType();
434
420
  switch (precision) {
435
421
  case ArrowDateTimeType::DAYS: {
436
422
  DirectConversion(vector, array, scan_state, nested_offset, parent_offset);
@@ -454,7 +440,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
454
440
  break;
455
441
  }
456
442
  case LogicalTypeId::TIME: {
457
- auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
443
+ auto precision = arrow_type.GetDateTimeType();
458
444
  switch (precision) {
459
445
  case ArrowDateTimeType::SECONDS: {
460
446
  TimeConversion<int32_t>(vector, array, scan_state, nested_offset, size, 1000000);
@@ -485,7 +471,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
485
471
  break;
486
472
  }
487
473
  case LogicalTypeId::TIMESTAMP_TZ: {
488
- auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
474
+ auto precision = arrow_type.GetDateTimeType();
489
475
  switch (precision) {
490
476
  case ArrowDateTimeType::SECONDS: {
491
477
  TimestampTZConversion(vector, array, scan_state, nested_offset, size, 1000000);
@@ -516,7 +502,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
516
502
  break;
517
503
  }
518
504
  case LogicalTypeId::INTERVAL: {
519
- auto precision = arrow_convert_data[col_idx]->date_time_precision[arrow_convert_idx.datetime_precision_index++];
505
+ auto precision = arrow_type.GetDateTimeType();
520
506
  switch (precision) {
521
507
  case ArrowDateTimeType::SECONDS: {
522
508
  IntervalConversionUs(vector, array, scan_state, nested_offset, size, 1000000);
@@ -611,18 +597,15 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
611
597
  break;
612
598
  }
613
599
  case LogicalTypeId::BLOB: {
614
- ArrowToDuckDBBlob(vector, array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx,
615
- nested_offset);
600
+ ArrowToDuckDBBlob(vector, array, scan_state, size, arrow_type, nested_offset);
616
601
  break;
617
602
  }
618
603
  case LogicalTypeId::LIST: {
619
- ArrowToDuckDBList(vector, array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx,
620
- nested_offset, parent_mask);
604
+ ArrowToDuckDBList(vector, array, scan_state, size, arrow_type, nested_offset, parent_mask);
621
605
  break;
622
606
  }
623
607
  case LogicalTypeId::MAP: {
624
- ArrowToDuckDBList(vector, array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx,
625
- nested_offset, parent_mask);
608
+ ArrowToDuckDBList(vector, array, scan_state, size, arrow_type, nested_offset, parent_mask);
626
609
  ArrowToDuckDBMapVerify(vector, size);
627
610
  break;
628
611
  }
@@ -630,7 +613,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
630
613
  //! Fill the children
631
614
  auto &child_entries = StructVector::GetEntries(vector);
632
615
  auto &struct_validity_mask = FlatVector::Validity(vector);
633
- for (idx_t type_idx = 0; type_idx < (idx_t)array.n_children; type_idx++) {
616
+ for (idx_t type_idx = 0; type_idx < static_cast<idx_t>(array.n_children); type_idx++) {
634
617
  SetValidityMask(*child_entries[type_idx], *array.children[type_idx], scan_state, size, nested_offset);
635
618
  if (!struct_validity_mask.AllValid()) {
636
619
  auto &child_validity_mark = FlatVector::Validity(*child_entries[type_idx]);
@@ -641,8 +624,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
641
624
  }
642
625
  }
643
626
  ColumnArrowToDuckDB(*child_entries[type_idx], *array.children[type_idx], scan_state, size,
644
- arrow_convert_data, col_idx, arrow_convert_idx, nested_offset, &struct_validity_mask,
645
- array.offset);
627
+ arrow_type[type_idx], nested_offset, &struct_validity_mask, array.offset);
646
628
  }
647
629
  break;
648
630
  }
@@ -654,14 +636,13 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
654
636
  auto &validity_mask = FlatVector::Validity(vector);
655
637
 
656
638
  duckdb::vector<Vector> children;
657
- for (idx_t type_idx = 0; type_idx < (::idx_t)array.n_children; type_idx++) {
639
+ for (idx_t type_idx = 0; type_idx < static_cast<idx_t>(array.n_children); type_idx++) {
658
640
  Vector child(members[type_idx].second);
659
641
  auto arrow_array = array.children[type_idx];
660
642
 
661
643
  SetValidityMask(child, *arrow_array, scan_state, size, nested_offset);
662
644
 
663
- ColumnArrowToDuckDB(child, *arrow_array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx,
664
- nested_offset, &validity_mask);
645
+ ColumnArrowToDuckDB(child, *arrow_array, scan_state, size, arrow_type, nested_offset, &validity_mask);
665
646
 
666
647
  children.push_back(std::move(child));
667
648
  }
@@ -809,20 +790,18 @@ static void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, Logic
809
790
  }
810
791
 
811
792
  static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
812
- idx_t size,
813
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
814
- idx_t col_idx, ArrowConvertDataIndices &arrow_convert_idx) {
793
+ idx_t size, const ArrowType &arrow_type, idx_t col_idx) {
815
794
  SelectionVector sel;
816
795
  auto &dict_vectors = scan_state.arrow_dictionary_vectors;
817
796
  if (!dict_vectors.count(col_idx)) {
818
797
  //! We need to set the dictionary data for this column
819
798
  auto base_vector = make_uniq<Vector>(vector.GetType(), array.dictionary->length);
820
799
  SetValidityMask(*base_vector, *array.dictionary, scan_state, array.dictionary->length, 0, array.null_count > 0);
821
- ColumnArrowToDuckDB(*base_vector, *array.dictionary, scan_state, array.dictionary->length, arrow_convert_data,
822
- col_idx, arrow_convert_idx);
800
+ ColumnArrowToDuckDB(*base_vector, *array.dictionary, scan_state, array.dictionary->length,
801
+ arrow_type.GetDictionary());
823
802
  dict_vectors[col_idx] = std::move(base_vector);
824
803
  }
825
- auto dictionary_type = arrow_convert_data[col_idx]->dictionary_type;
804
+ auto dictionary_type = arrow_type.GetDuckType();
826
805
  //! Get Pointer to Indices of Dictionary
827
806
  auto indices = ArrowBufferData<data_t>(array, 1) +
828
807
  GetTypeIdSize(dictionary_type.InternalType()) * (scan_state.chunk_offset + array.offset);
@@ -836,8 +815,7 @@ static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, Arr
836
815
  vector.Slice(*dict_vectors[col_idx], sel, size);
837
816
  }
838
817
 
839
- void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
840
- unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
818
+ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const arrow_column_map_t &arrow_convert_data,
841
819
  DataChunk &output, idx_t start, bool arrow_scan_is_projected) {
842
820
  for (idx_t idx = 0; idx < output.ColumnCount(); idx++) {
843
821
  auto col_idx = scan_state.column_ids[idx];
@@ -851,7 +829,6 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
851
829
  continue;
852
830
  }
853
831
 
854
- ArrowConvertDataIndices arrow_convert_idx {0, 0};
855
832
  auto &array = *scan_state.chunk->arrow_array.children[arrow_array_idx];
856
833
  if (!array.release) {
857
834
  throw InvalidInputException("arrow_scan: released array passed");
@@ -861,13 +838,13 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
861
838
  }
862
839
  // Make sure this Vector keeps the Arrow chunk alive in case we can zero-copy the data
863
840
  output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.chunk));
841
+ D_ASSERT(arrow_convert_data.find(col_idx) != arrow_convert_data.end());
842
+ auto &arrow_type = *arrow_convert_data.at(col_idx);
864
843
  if (array.dictionary) {
865
- ColumnArrowToDuckDBDictionary(output.data[idx], array, scan_state, output.size(), arrow_convert_data,
866
- col_idx, arrow_convert_idx);
844
+ ColumnArrowToDuckDBDictionary(output.data[idx], array, scan_state, output.size(), arrow_type, col_idx);
867
845
  } else {
868
846
  SetValidityMask(output.data[idx], array, scan_state, output.size(), -1);
869
- ColumnArrowToDuckDB(output.data[idx], array, scan_state, output.size(), arrow_convert_data, col_idx,
870
- arrow_convert_idx);
847
+ ColumnArrowToDuckDB(output.data[idx], array, scan_state, output.size(), arrow_type);
871
848
  }
872
849
  }
873
850
  }
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev2399"
2
+ #define DUCKDB_VERSION "0.8.2-dev2669"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "20ad35b3fa"
5
+ #define DUCKDB_SOURCE_ID "c6f10389c3"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -12,6 +12,7 @@
12
12
  #include "duckdb/common/unordered_set.hpp"
13
13
  #include "duckdb/common/string.hpp"
14
14
  #include "duckdb/common/string_util.hpp"
15
+ #include "duckdb/common/helper.hpp"
15
16
 
16
17
  namespace duckdb {
17
18
 
@@ -200,6 +200,8 @@ enum class PhysicalType : uint8_t;
200
200
 
201
201
  enum class PragmaType : uint8_t;
202
202
 
203
+ enum class PreparedParamType : uint8_t;
204
+
203
205
  enum class ProfilerPrintFormat : uint8_t;
204
206
 
205
207
  enum class QueryNodeType : uint8_t;
@@ -533,6 +535,9 @@ const char* EnumUtil::ToChars<PhysicalType>(PhysicalType value);
533
535
  template<>
534
536
  const char* EnumUtil::ToChars<PragmaType>(PragmaType value);
535
537
 
538
+ template<>
539
+ const char* EnumUtil::ToChars<PreparedParamType>(PreparedParamType value);
540
+
536
541
  template<>
537
542
  const char* EnumUtil::ToChars<ProfilerPrintFormat>(ProfilerPrintFormat value);
538
543
 
@@ -906,6 +911,9 @@ PhysicalType EnumUtil::FromString<PhysicalType>(const char *value);
906
911
  template<>
907
912
  PragmaType EnumUtil::FromString<PragmaType>(const char *value);
908
913
 
914
+ template<>
915
+ PreparedParamType EnumUtil::FromString<PreparedParamType>(const char *value);
916
+
909
917
  template<>
910
918
  ProfilerPrintFormat EnumUtil::FromString<ProfilerPrintFormat>(const char *value);
911
919