duckdb 0.9.2-dev22.0 → 0.9.2-dev26.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-timebucket.cpp +7 -10
  3. package/src/duckdb/extension/icu/icu-timezone.cpp +3 -0
  4. package/src/duckdb/extension/json/buffered_json_reader.cpp +11 -18
  5. package/src/duckdb/extension/json/json_scan.cpp +10 -5
  6. package/src/duckdb/extension/parquet/parquet_extension.cpp +2 -2
  7. package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +6 -12
  8. package/src/duckdb/src/catalog/catalog_set.cpp +3 -4
  9. package/src/duckdb/src/common/arrow/appender/union_data.cpp +1 -1
  10. package/src/duckdb/src/common/arrow/arrow_appender.cpp +17 -8
  11. package/src/duckdb/src/common/arrow/arrow_converter.cpp +5 -1
  12. package/src/duckdb/src/common/enum_util.cpp +67 -0
  13. package/src/duckdb/src/common/file_system.cpp +5 -1
  14. package/src/duckdb/src/common/hive_partitioning.cpp +6 -3
  15. package/src/duckdb/src/common/multi_file_reader.cpp +4 -2
  16. package/src/duckdb/src/common/types/list_segment.cpp +4 -0
  17. package/src/duckdb/src/common/types/vector.cpp +66 -34
  18. package/src/duckdb/src/common/types.cpp +3 -1
  19. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +84 -25
  20. package/src/duckdb/src/core_functions/function_list.cpp +2 -1
  21. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +8 -1
  22. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +23 -0
  23. package/src/duckdb/src/core_functions/scalar/string/jaccard.cpp +16 -23
  24. package/src/duckdb/src/execution/aggregate_hashtable.cpp +27 -18
  25. package/src/duckdb/src/execution/index/art/art_key.cpp +4 -4
  26. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -3
  27. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +5 -2
  28. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +4 -3
  29. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +25 -4
  30. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +5 -2
  31. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +0 -1
  32. package/src/duckdb/src/execution/physical_operator.cpp +2 -1
  33. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +5 -0
  34. package/src/duckdb/src/execution/window_executor.cpp +13 -1
  35. package/src/duckdb/src/function/cast/union/from_struct.cpp +24 -7
  36. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +1 -1
  37. package/src/duckdb/src/function/function_set.cpp +1 -1
  38. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  39. package/src/duckdb/src/function/scalar/string/concat.cpp +4 -1
  40. package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +32 -0
  41. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +46 -2
  42. package/src/duckdb/src/function/table/arrow.cpp +19 -17
  43. package/src/duckdb/src/function/table/arrow_conversion.cpp +67 -31
  44. package/src/duckdb/src/function/table/copy_csv.cpp +3 -3
  45. package/src/duckdb/src/function/table/system/pragma_user_agent.cpp +50 -0
  46. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  47. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  48. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +4 -0
  49. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +8 -2
  50. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +74 -4
  51. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +82 -3
  52. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +19 -9
  53. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +16 -0
  54. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +6 -0
  55. package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -0
  56. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +7 -0
  57. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  58. package/src/duckdb/src/include/duckdb/common/pipe_file_system.hpp +3 -0
  59. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +14 -2
  60. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +6 -0
  61. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_enum.hpp +21 -0
  62. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +5 -2
  63. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +6 -3
  64. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +1 -1
  65. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +1 -1
  66. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +1 -1
  67. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +2 -2
  68. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +6 -0
  69. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +2 -2
  70. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +3 -1
  71. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +37 -2
  72. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  73. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/main/config.hpp +5 -0
  75. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +5 -4
  76. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +12 -0
  77. package/src/duckdb/src/include/duckdb/main/settings.hpp +18 -0
  78. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -1
  79. package/src/duckdb/src/include/duckdb/optimizer/rule.hpp +0 -2
  80. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +8 -0
  81. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +5 -0
  82. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +4 -1
  83. package/src/duckdb/src/include/duckdb/parser/statement/create_statement.hpp +1 -0
  84. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
  85. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +5 -7
  86. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -1
  87. package/src/duckdb/src/include/duckdb.h +1 -1
  88. package/src/duckdb/src/main/capi/config-c.cpp +1 -0
  89. package/src/duckdb/src/main/capi/duckdb-c.cpp +9 -1
  90. package/src/duckdb/src/main/config.cpp +18 -0
  91. package/src/duckdb/src/main/database.cpp +1 -0
  92. package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
  93. package/src/duckdb/src/main/extension/extension_helper.cpp +5 -4
  94. package/src/duckdb/src/main/settings/settings.cpp +49 -0
  95. package/src/duckdb/src/optimizer/expression_rewriter.cpp +0 -8
  96. package/src/duckdb/src/optimizer/filter_combiner.cpp +37 -23
  97. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +7 -4
  98. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +5 -4
  99. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +15 -4
  100. package/src/duckdb/src/parallel/pipeline_executor.cpp +81 -40
  101. package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +27 -0
  102. package/src/duckdb/src/parser/statement/create_statement.cpp +4 -0
  103. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +16 -3
  104. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +7 -0
  105. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +3 -2
  106. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +3 -0
  107. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +1 -1
  108. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +76 -2
  109. package/src/duckdb/src/storage/data_table.cpp +7 -1
  110. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +14 -0
  111. package/src/duckdb/src/storage/storage_info.cpp +2 -1
  112. package/src/duckdb/src/storage/table/row_version_manager.cpp +5 -3
  113. package/src/duckdb/src/transaction/commit_state.cpp +1 -0
  114. package/src/duckdb/third_party/parquet/parquet_types.cpp +224 -221
  115. package/src/duckdb/third_party/parquet/parquet_types.h +0 -14
  116. package/src/duckdb/ub_src_common_arrow_appender.cpp +0 -4
  117. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
  118. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  119. package/test/columns.test.ts +1 -1
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.9.2-dev22.0",
5
+ "version": "0.9.2-dev26.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -76,24 +76,21 @@ struct ICUTimeBucket : public ICUDateFunc {
76
76
 
77
77
  static inline timestamp_t WidthConvertibleToDaysCommon(int32_t bucket_width_days, const timestamp_t ts,
78
78
  const timestamp_t origin, icu::Calendar *calendar) {
79
- const auto trunc_days = TruncationFactory(DatePartSpecifier::DAY);
80
79
  const auto sub_days = SubtractFactory(DatePartSpecifier::DAY);
81
80
 
82
- uint64_t tmp_micros = SetTime(calendar, ts);
83
- trunc_days(calendar, tmp_micros);
84
- timestamp_t truncated_ts = GetTimeUnsafe(calendar, tmp_micros);
85
-
86
- int64_t ts_days = sub_days(calendar, origin, truncated_ts);
81
+ int64_t ts_days = sub_days(calendar, origin, ts);
87
82
  int64_t result_days = (ts_days / bucket_width_days) * bucket_width_days;
88
83
  if (result_days < NumericLimits<int32_t>::Minimum() || result_days > NumericLimits<int32_t>::Maximum()) {
89
84
  throw OutOfRangeException("Timestamp out of range");
90
85
  }
91
- if (ts_days < 0 && ts_days % bucket_width_days != 0) {
92
- result_days =
93
- SubtractOperatorOverflowCheck::Operation<int32_t, int32_t, int32_t>(result_days, bucket_width_days);
86
+ timestamp_t bucket = Add(calendar, origin, interval_t {0, static_cast<int32_t>(result_days), 0});
87
+ if (ts < bucket) {
88
+ D_ASSERT(ts < origin);
89
+ bucket = Add(calendar, bucket, interval_t {0, -bucket_width_days, 0});
90
+ D_ASSERT(ts > bucket);
94
91
  }
95
92
 
96
- return Add(calendar, origin, interval_t {0, static_cast<int32_t>(result_days), 0});
93
+ return bucket;
97
94
  }
98
95
 
99
96
  static inline timestamp_t WidthConvertibleToMonthsCommon(int32_t bucket_width_months, const timestamp_t ts,
@@ -81,6 +81,9 @@ static void ICUTimeZoneFunction(ClientContext &context, TableFunctionInput &data
81
81
  break;
82
82
  }
83
83
 
84
+ // What PG reports is the total offset for today,
85
+ // which is the ICU total offset (i.e., "raw") plus the DST offset.
86
+ raw_offset_ms += dst_offset_ms;
84
87
  output.SetValue(2, index, Value::INTERVAL(Interval::FromMicro(raw_offset_ms * Interval::MICROS_PER_MSEC)));
85
88
  output.SetValue(3, index, Value(dst_offset_ms != 0));
86
89
  ++index;
@@ -23,7 +23,7 @@ bool JSONFileHandle::IsOpen() const {
23
23
  }
24
24
 
25
25
  void JSONFileHandle::Close() {
26
- if (IsOpen() && file_handle->OnDiskFile()) {
26
+ if (IsOpen() && !file_handle->IsPipe()) {
27
27
  file_handle->Close();
28
28
  file_handle = nullptr;
29
29
  }
@@ -72,30 +72,23 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
72
72
  D_ASSERT(size != 0);
73
73
  if (plain_file_source) {
74
74
  file_handle->Read(pointer, size, position);
75
- actual_reads++;
76
-
77
- return;
78
- }
79
-
80
- if (sample_run) { // Cache the buffer
75
+ } else if (sample_run) { // Cache the buffer
81
76
  file_handle->Read(pointer, size, position);
82
- actual_reads++;
83
77
 
84
78
  cached_buffers.emplace_back(allocator.Allocate(size));
85
79
  memcpy(cached_buffers.back().get(), pointer, size);
86
80
  cached_size += size;
81
+ } else {
82
+ if (!cached_buffers.empty() || position < cached_size) {
83
+ ReadFromCache(pointer, size, position);
84
+ }
87
85
 
88
- return;
89
- }
90
-
91
- if (!cached_buffers.empty() || position < cached_size) {
92
- ReadFromCache(pointer, size, position);
93
- actual_reads++;
86
+ if (size != 0) {
87
+ file_handle->Read(pointer, size, position);
88
+ }
94
89
  }
95
-
96
- if (size != 0) {
97
- file_handle->Read(pointer, size, position);
98
- actual_reads++;
90
+ if (++actual_reads > requested_reads) {
91
+ throw InternalException("JSONFileHandle performed more actual reads than requested reads");
99
92
  }
100
93
  }
101
94
 
@@ -214,17 +214,22 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
214
214
 
215
215
  idx_t JSONGlobalTableFunctionState::MaxThreads() const {
216
216
  auto &bind_data = state.bind_data;
217
- if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
218
- return state.system_threads;
219
- }
220
217
 
221
218
  if (!state.json_readers.empty() && state.json_readers[0]->HasFileHandle()) {
219
+ // We opened and auto-detected a file, so we can get a better estimate
222
220
  auto &reader = *state.json_readers[0];
223
- if (reader.GetFormat() == JSONFormat::NEWLINE_DELIMITED) { // Auto-detected NDJSON
224
- return state.system_threads;
221
+ if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED ||
222
+ reader.GetFormat() == JSONFormat::NEWLINE_DELIMITED) {
223
+ return MaxValue<idx_t>(state.json_readers[0]->GetFileHandle().FileSize() / bind_data.maximum_object_size,
224
+ 1);
225
225
  }
226
226
  }
227
227
 
228
+ if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
229
+ // We haven't opened any files, so this is our best bet
230
+ return state.system_threads;
231
+ }
232
+
228
233
  // One reader per file
229
234
  return bind_data.files.size();
230
235
  }
@@ -740,8 +740,8 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
740
740
  }
741
741
  }
742
742
 
743
- unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
744
- vector<LogicalType> &sql_types) {
743
+ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, const CopyInfo &info, const vector<string> &names,
744
+ const vector<LogicalType> &sql_types) {
745
745
  D_ASSERT(names.size() == sql_types.size());
746
746
  bool row_group_size_bytes_set = false;
747
747
  auto bind_data = make_uniq<ParquetWriteBindData>();
@@ -32,6 +32,7 @@ unique_ptr<CreateInfo> ViewCatalogEntry::GetInfo() const {
32
32
  result->query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
33
33
  result->aliases = aliases;
34
34
  result->types = types;
35
+ result->temporary = temporary;
35
36
  return std::move(result);
36
37
  }
37
38
 
@@ -58,23 +59,16 @@ string ViewCatalogEntry::ToSQL() const {
58
59
  //! Return empty sql with view name so pragma view_tables don't complain
59
60
  return sql;
60
61
  }
61
- return sql + "\n;";
62
+ auto info = GetInfo();
63
+ auto result = info->ToString();
64
+ return result + ";\n";
62
65
  }
63
66
 
64
67
  unique_ptr<CatalogEntry> ViewCatalogEntry::Copy(ClientContext &context) const {
65
68
  D_ASSERT(!internal);
66
- CreateViewInfo create_info(schema, name);
67
- create_info.query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
68
- for (idx_t i = 0; i < aliases.size(); i++) {
69
- create_info.aliases.push_back(aliases[i]);
70
- }
71
- for (idx_t i = 0; i < types.size(); i++) {
72
- create_info.types.push_back(types[i]);
73
- }
74
- create_info.temporary = temporary;
75
- create_info.sql = sql;
69
+ auto create_info = GetInfo();
76
70
 
77
- return make_uniq<ViewCatalogEntry>(catalog, schema, create_info);
71
+ return make_uniq<ViewCatalogEntry>(catalog, schema, create_info->Cast<CreateViewInfo>());
78
72
  }
79
73
 
80
74
  } // namespace duckdb
@@ -199,6 +199,8 @@ bool CatalogSet::AlterOwnership(CatalogTransaction transaction, ChangeOwnershipI
199
199
  bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name, AlterInfo &alter_info) {
200
200
  // lock the catalog for writing
201
201
  lock_guard<mutex> write_lock(catalog.GetWriteLock());
202
+ // lock this catalog set to disallow reading
203
+ lock_guard<mutex> read_lock(catalog_lock);
202
204
 
203
205
  // first check if the entry exists in the unordered set
204
206
  EntryIndex entry_index;
@@ -210,9 +212,6 @@ bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name,
210
212
  throw CatalogException("Cannot alter entry \"%s\" because it is an internal system entry", entry->name);
211
213
  }
212
214
 
213
- // lock this catalog set to disallow reading
214
- lock_guard<mutex> read_lock(catalog_lock);
215
-
216
215
  // create a new entry and replace the currently stored one
217
216
  // set the timestamp to the timestamp of the current transaction
218
217
  // and point it to the updated table node
@@ -316,6 +315,7 @@ void CatalogSet::DropEntryInternal(CatalogTransaction transaction, EntryIndex en
316
315
  bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, bool cascade, bool allow_drop_internal) {
317
316
  // lock the catalog for writing
318
317
  lock_guard<mutex> write_lock(catalog.GetWriteLock());
318
+ lock_guard<mutex> read_lock(catalog_lock);
319
319
  // we can only delete an entry that exists
320
320
  EntryIndex entry_index;
321
321
  auto entry = GetEntryInternal(transaction, name, &entry_index);
@@ -326,7 +326,6 @@ bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, b
326
326
  throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name);
327
327
  }
328
328
 
329
- lock_guard<mutex> read_lock(catalog_lock);
330
329
  DropEntryInternal(transaction, std::move(entry_index), *entry, cascade);
331
330
  return true;
332
331
  }
@@ -24,7 +24,7 @@ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t f
24
24
 
25
25
  duckdb::vector<Vector> child_vectors;
26
26
  for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
27
- child_vectors.emplace_back(child.second);
27
+ child_vectors.emplace_back(child.second, size);
28
28
  }
29
29
 
30
30
  for (idx_t input_idx = from; input_idx < to; input_idx++) {
@@ -193,26 +193,26 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
193
193
  if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
194
194
  InitializeAppenderForType<ArrowVarcharData<string_t>>(append_data);
195
195
  } else {
196
- InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter, uint32_t>>(append_data);
196
+ InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter, int32_t>>(append_data);
197
197
  }
198
198
  break;
199
199
  case LogicalTypeId::UUID:
200
200
  if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
201
201
  InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter>>(append_data);
202
202
  } else {
203
- InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, uint32_t>>(append_data);
203
+ InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, int32_t>>(append_data);
204
204
  }
205
205
  break;
206
206
  case LogicalTypeId::ENUM:
207
207
  switch (type.InternalType()) {
208
208
  case PhysicalType::UINT8:
209
- InitializeAppenderForType<ArrowEnumData<uint8_t>>(append_data);
209
+ InitializeAppenderForType<ArrowEnumData<int8_t>>(append_data);
210
210
  break;
211
211
  case PhysicalType::UINT16:
212
- InitializeAppenderForType<ArrowEnumData<uint16_t>>(append_data);
212
+ InitializeAppenderForType<ArrowEnumData<int16_t>>(append_data);
213
213
  break;
214
214
  case PhysicalType::UINT32:
215
- InitializeAppenderForType<ArrowEnumData<uint32_t>>(append_data);
215
+ InitializeAppenderForType<ArrowEnumData<int32_t>>(append_data);
216
216
  break;
217
217
  default:
218
218
  throw InternalException("Unsupported internal enum type");
@@ -227,11 +227,20 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
227
227
  case LogicalTypeId::STRUCT:
228
228
  InitializeAppenderForType<ArrowStructData>(append_data);
229
229
  break;
230
- case LogicalTypeId::LIST:
231
- InitializeAppenderForType<ArrowListData>(append_data);
230
+ case LogicalTypeId::LIST: {
231
+ if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
232
+ InitializeAppenderForType<ArrowListData<int64_t>>(append_data);
233
+ } else {
234
+ InitializeAppenderForType<ArrowListData<int32_t>>(append_data);
235
+ }
232
236
  break;
237
+ }
233
238
  case LogicalTypeId::MAP:
234
- InitializeAppenderForType<ArrowMapData>(append_data);
239
+ if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
240
+ InitializeAppenderForType<ArrowMapData<int64_t>>(append_data);
241
+ } else {
242
+ InitializeAppenderForType<ArrowMapData<int32_t>>(append_data);
243
+ }
235
244
  break;
236
245
  default:
237
246
  throw NotImplementedException("Unsupported type in DuckDB -> Arrow Conversion: %s\n", type.ToString());
@@ -187,7 +187,11 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
187
187
  break;
188
188
  }
189
189
  case LogicalTypeId::LIST: {
190
- child.format = "+l";
190
+ if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
191
+ child.format = "+L";
192
+ } else {
193
+ child.format = "+l";
194
+ }
191
195
  child.n_children = 1;
192
196
  root_holder.nested_children.emplace_back();
193
197
  root_holder.nested_children.back().resize(1);
@@ -64,6 +64,7 @@
64
64
  #include "duckdb/common/types/timestamp.hpp"
65
65
  #include "duckdb/common/types/vector.hpp"
66
66
  #include "duckdb/common/types/vector_buffer.hpp"
67
+ #include "duckdb/core_functions/aggregate/quantile_enum.hpp"
67
68
  #include "duckdb/execution/index/art/art.hpp"
68
69
  #include "duckdb/execution/index/art/node.hpp"
69
70
  #include "duckdb/execution/operator/scan/csv/base_csv_reader.hpp"
@@ -4571,6 +4572,44 @@ ProfilerPrintFormat EnumUtil::FromString<ProfilerPrintFormat>(const char *value)
4571
4572
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
4572
4573
  }
4573
4574
 
4575
+ template<>
4576
+ const char* EnumUtil::ToChars<QuantileSerializationType>(QuantileSerializationType value) {
4577
+ switch(value) {
4578
+ case QuantileSerializationType::NON_DECIMAL:
4579
+ return "NON_DECIMAL";
4580
+ case QuantileSerializationType::DECIMAL_DISCRETE:
4581
+ return "DECIMAL_DISCRETE";
4582
+ case QuantileSerializationType::DECIMAL_DISCRETE_LIST:
4583
+ return "DECIMAL_DISCRETE_LIST";
4584
+ case QuantileSerializationType::DECIMAL_CONTINUOUS:
4585
+ return "DECIMAL_CONTINUOUS";
4586
+ case QuantileSerializationType::DECIMAL_CONTINUOUS_LIST:
4587
+ return "DECIMAL_CONTINUOUS_LIST";
4588
+ default:
4589
+ throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
4590
+ }
4591
+ }
4592
+
4593
+ template<>
4594
+ QuantileSerializationType EnumUtil::FromString<QuantileSerializationType>(const char *value) {
4595
+ if (StringUtil::Equals(value, "NON_DECIMAL")) {
4596
+ return QuantileSerializationType::NON_DECIMAL;
4597
+ }
4598
+ if (StringUtil::Equals(value, "DECIMAL_DISCRETE")) {
4599
+ return QuantileSerializationType::DECIMAL_DISCRETE;
4600
+ }
4601
+ if (StringUtil::Equals(value, "DECIMAL_DISCRETE_LIST")) {
4602
+ return QuantileSerializationType::DECIMAL_DISCRETE_LIST;
4603
+ }
4604
+ if (StringUtil::Equals(value, "DECIMAL_CONTINUOUS")) {
4605
+ return QuantileSerializationType::DECIMAL_CONTINUOUS;
4606
+ }
4607
+ if (StringUtil::Equals(value, "DECIMAL_CONTINUOUS_LIST")) {
4608
+ return QuantileSerializationType::DECIMAL_CONTINUOUS_LIST;
4609
+ }
4610
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
4611
+ }
4612
+
4574
4613
  template<>
4575
4614
  const char* EnumUtil::ToChars<QueryNodeType>(QueryNodeType value) {
4576
4615
  switch(value) {
@@ -5118,6 +5157,29 @@ SinkFinalizeType EnumUtil::FromString<SinkFinalizeType>(const char *value) {
5118
5157
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
5119
5158
  }
5120
5159
 
5160
+ template<>
5161
+ const char* EnumUtil::ToChars<SinkNextBatchType>(SinkNextBatchType value) {
5162
+ switch(value) {
5163
+ case SinkNextBatchType::READY:
5164
+ return "READY";
5165
+ case SinkNextBatchType::BLOCKED:
5166
+ return "BLOCKED";
5167
+ default:
5168
+ throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
5169
+ }
5170
+ }
5171
+
5172
+ template<>
5173
+ SinkNextBatchType EnumUtil::FromString<SinkNextBatchType>(const char *value) {
5174
+ if (StringUtil::Equals(value, "READY")) {
5175
+ return SinkNextBatchType::READY;
5176
+ }
5177
+ if (StringUtil::Equals(value, "BLOCKED")) {
5178
+ return SinkNextBatchType::BLOCKED;
5179
+ }
5180
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
5181
+ }
5182
+
5121
5183
  template<>
5122
5184
  const char* EnumUtil::ToChars<SinkResultType>(SinkResultType value) {
5123
5185
  switch(value) {
@@ -6010,6 +6072,8 @@ const char* EnumUtil::ToChars<UnionInvalidReason>(UnionInvalidReason value) {
6010
6072
  return "VALIDITY_OVERLAP";
6011
6073
  case UnionInvalidReason::TAG_MISMATCH:
6012
6074
  return "TAG_MISMATCH";
6075
+ case UnionInvalidReason::NULL_TAG:
6076
+ return "NULL_TAG";
6013
6077
  default:
6014
6078
  throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
6015
6079
  }
@@ -6032,6 +6096,9 @@ UnionInvalidReason EnumUtil::FromString<UnionInvalidReason>(const char *value) {
6032
6096
  if (StringUtil::Equals(value, "TAG_MISMATCH")) {
6033
6097
  return UnionInvalidReason::TAG_MISMATCH;
6034
6098
  }
6099
+ if (StringUtil::Equals(value, "NULL_TAG")) {
6100
+ return UnionInvalidReason::NULL_TAG;
6101
+ }
6035
6102
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
6036
6103
  }
6037
6104
 
@@ -344,7 +344,7 @@ bool FileSystem::FileExists(const string &filename) {
344
344
  }
345
345
 
346
346
  bool FileSystem::IsPipe(const string &filename) {
347
- throw NotImplementedException("%s: IsPipe is not implemented!", GetName());
347
+ return false;
348
348
  }
349
349
 
350
350
  void FileSystem::RemoveFile(const string &filename) {
@@ -500,6 +500,10 @@ bool FileHandle::CanSeek() {
500
500
  return file_system.CanSeek();
501
501
  }
502
502
 
503
+ bool FileHandle::IsPipe() {
504
+ return file_system.IsPipe(path);
505
+ }
506
+
503
507
  string FileHandle::ReadLine() {
504
508
  string result;
505
509
  char buffer[1];
@@ -64,7 +64,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
64
64
  // - s3://bucket/var1=value1/bla/bla/var2=value2
65
65
  // - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
66
66
  // - folder/folder/folder/../var1=value1/etc/.//var2=value2
67
- const string HivePartitioning::REGEX_STRING = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
67
+ const string &HivePartitioning::RegexString() {
68
+ static string REGEX = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
69
+ return REGEX;
70
+ }
68
71
 
69
72
  std::map<string, string> HivePartitioning::Parse(const string &filename, duckdb_re2::RE2 &regex) {
70
73
  std::map<string, string> result;
@@ -79,7 +82,7 @@ std::map<string, string> HivePartitioning::Parse(const string &filename, duckdb_
79
82
  }
80
83
 
81
84
  std::map<string, string> HivePartitioning::Parse(const string &filename) {
82
- duckdb_re2::RE2 regex(REGEX_STRING);
85
+ duckdb_re2::RE2 regex(RegexString());
83
86
  return Parse(filename, regex);
84
87
  }
85
88
 
@@ -94,7 +97,7 @@ void HivePartitioning::ApplyFiltersToFileList(ClientContext &context, vector<str
94
97
  vector<bool> have_preserved_filter(filters.size(), false);
95
98
  vector<unique_ptr<Expression>> pruned_filters;
96
99
  unordered_set<idx_t> filters_applied_to_files;
97
- duckdb_re2::RE2 regex(REGEX_STRING);
100
+ duckdb_re2::RE2 regex(RegexString());
98
101
  auto table_index = get.table_index;
99
102
 
100
103
  if ((!filename_enabled && !hive_enabled) || filters.empty()) {
@@ -102,7 +102,9 @@ bool MultiFileReader::ComplexFilterPushdown(ClientContext &context, vector<strin
102
102
 
103
103
  unordered_map<string, column_t> column_map;
104
104
  for (idx_t i = 0; i < get.column_ids.size(); i++) {
105
- column_map.insert({get.names[get.column_ids[i]], i});
105
+ if (!IsRowIdColumnId(get.column_ids[i])) {
106
+ column_map.insert({get.names[get.column_ids[i]], i});
107
+ }
106
108
  }
107
109
 
108
110
  auto start_files = files.size();
@@ -432,7 +434,7 @@ void MultiFileReaderOptions::AutoDetectHiveTypesInternal(const string &file, Cli
432
434
  }
433
435
  Value value(part.second);
434
436
  for (auto &candidate : candidates) {
435
- const bool success = value.TryCastAs(context, candidate);
437
+ const bool success = value.TryCastAs(context, candidate, true);
436
438
  if (success) {
437
439
  hive_types_schema[name] = candidate;
438
440
  break;
@@ -462,6 +462,10 @@ void SegmentPrimitiveFunction(ListSegmentFunctions &functions) {
462
462
 
463
463
  void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type) {
464
464
 
465
+ if (type.id() == LogicalTypeId::UNKNOWN) {
466
+ throw ParameterNotResolvedException();
467
+ }
468
+
465
469
  auto physical_type = type.InternalType();
466
470
  switch (physical_type) {
467
471
  case PhysicalType::BIT:
@@ -1131,9 +1131,12 @@ void Vector::VerifyMap(Vector &vector_p, const SelectionVector &sel_p, idx_t cou
1131
1131
 
1132
1132
  void Vector::VerifyUnion(Vector &vector_p, const SelectionVector &sel_p, idx_t count) {
1133
1133
  #ifdef DEBUG
1134
+
1134
1135
  D_ASSERT(vector_p.GetType().id() == LogicalTypeId::UNION);
1135
1136
  auto valid_check = UnionVector::CheckUnionValidity(vector_p, count, sel_p);
1136
- D_ASSERT(valid_check == UnionInvalidReason::VALID);
1137
+ if (valid_check != UnionInvalidReason::VALID) {
1138
+ throw InternalException("Union not valid, reason: %s", EnumUtil::ToString(valid_check));
1139
+ }
1137
1140
  #endif // DEBUG
1138
1141
  }
1139
1142
 
@@ -1250,7 +1253,8 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
1250
1253
  }
1251
1254
 
1252
1255
  if (vector->GetType().id() == LogicalTypeId::UNION) {
1253
- VerifyUnion(*vector, *sel, count);
1256
+ // Pass in raw vector
1257
+ VerifyUnion(vector_p, sel_p, count);
1254
1258
  }
1255
1259
  }
1256
1260
 
@@ -1911,7 +1915,13 @@ void UnionVector::SetToMember(Vector &union_vector, union_tag_t tag, Vector &mem
1911
1915
  // if the member vector is constant, we can set the union to constant as well
1912
1916
  union_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
1913
1917
  ConstantVector::GetData<union_tag_t>(tag_vector)[0] = tag;
1914
- ConstantVector::SetNull(union_vector, ConstantVector::IsNull(member_vector));
1918
+ if (keep_tags_for_null) {
1919
+ ConstantVector::SetNull(union_vector, false);
1920
+ ConstantVector::SetNull(tag_vector, false);
1921
+ } else {
1922
+ ConstantVector::SetNull(union_vector, ConstantVector::IsNull(member_vector));
1923
+ ConstantVector::SetNull(tag_vector, ConstantVector::IsNull(member_vector));
1924
+ }
1915
1925
 
1916
1926
  } else {
1917
1927
  // otherwise flatten and set to flatvector
@@ -1962,53 +1972,75 @@ union_tag_t UnionVector::GetTag(const Vector &vector, idx_t index) {
1962
1972
  return FlatVector::GetData<union_tag_t>(tag_vector)[index];
1963
1973
  }
1964
1974
 
1965
- UnionInvalidReason UnionVector::CheckUnionValidity(Vector &vector, idx_t count, const SelectionVector &sel) {
1966
- D_ASSERT(vector.GetType().id() == LogicalTypeId::UNION);
1967
- auto member_count = UnionType::GetMemberCount(vector.GetType());
1975
+ //! Raw selection vector passed in (not merged with any other selection vectors)
1976
+ UnionInvalidReason UnionVector::CheckUnionValidity(Vector &vector_p, idx_t count, const SelectionVector &sel_p) {
1977
+ D_ASSERT(vector_p.GetType().id() == LogicalTypeId::UNION);
1978
+
1979
+ // Will contain the (possibly) merged selection vector
1980
+ const SelectionVector *sel = &sel_p;
1981
+ SelectionVector owned_sel;
1982
+ Vector *vector = &vector_p;
1983
+ if (vector->GetVectorType() == VectorType::DICTIONARY_VECTOR) {
1984
+ // In the case of a dictionary vector, unwrap the Vector, and merge the selection vectors.
1985
+ auto &child = DictionaryVector::Child(*vector);
1986
+ D_ASSERT(child.GetVectorType() != VectorType::DICTIONARY_VECTOR);
1987
+ auto &dict_sel = DictionaryVector::SelVector(*vector);
1988
+ // merge the selection vectors and verify the child
1989
+ auto new_buffer = dict_sel.Slice(*sel, count);
1990
+ owned_sel.Initialize(new_buffer);
1991
+ sel = &owned_sel;
1992
+ vector = &child;
1993
+ } else if (vector->GetVectorType() == VectorType::CONSTANT_VECTOR) {
1994
+ sel = ConstantVector::ZeroSelectionVector(count, owned_sel);
1995
+ }
1996
+
1997
+ auto member_count = UnionType::GetMemberCount(vector_p.GetType());
1968
1998
  if (member_count == 0) {
1969
1999
  return UnionInvalidReason::NO_MEMBERS;
1970
2000
  }
1971
2001
 
1972
- UnifiedVectorFormat union_vdata;
1973
- vector.ToUnifiedFormat(count, union_vdata);
2002
+ UnifiedVectorFormat vector_vdata;
2003
+ vector_p.ToUnifiedFormat(count, vector_vdata);
1974
2004
 
1975
- UnifiedVectorFormat tags_vdata;
1976
- auto &tag_vector = UnionVector::GetTags(vector);
1977
- tag_vector.ToUnifiedFormat(count, tags_vdata);
2005
+ auto &entries = StructVector::GetEntries(vector_p);
2006
+ duckdb::vector<UnifiedVectorFormat> child_vdata(entries.size());
2007
+ for (idx_t entry_idx = 0; entry_idx < entries.size(); entry_idx++) {
2008
+ auto &child = *entries[entry_idx];
2009
+ child.ToUnifiedFormat(count, child_vdata[entry_idx]);
2010
+ }
2011
+
2012
+ auto &tag_vdata = child_vdata[0];
1978
2013
 
1979
- // check that only one member is valid at a time
1980
2014
  for (idx_t row_idx = 0; row_idx < count; row_idx++) {
1981
- auto union_mapped_row_idx = sel.get_index(row_idx);
1982
- if (!union_vdata.validity.RowIsValid(union_mapped_row_idx)) {
1983
- continue;
1984
- }
2015
+ auto mapped_idx = sel->get_index(row_idx);
1985
2016
 
1986
- auto tag_mapped_row_idx = tags_vdata.sel->get_index(row_idx);
1987
- if (!tags_vdata.validity.RowIsValid(tag_mapped_row_idx)) {
2017
+ if (!vector_vdata.validity.RowIsValid(mapped_idx)) {
1988
2018
  continue;
1989
2019
  }
1990
2020
 
1991
- auto tag = (UnifiedVectorFormat::GetData<union_tag_t>(tags_vdata))[tag_mapped_row_idx];
2021
+ auto tag_idx = tag_vdata.sel->get_index(sel_p.get_index(row_idx));
2022
+ if (!tag_vdata.validity.RowIsValid(tag_idx)) {
2023
+ // we can't have NULL tags!
2024
+ return UnionInvalidReason::NULL_TAG;
2025
+ }
2026
+ auto tag = UnifiedVectorFormat::GetData<union_tag_t>(tag_vdata)[tag_idx];
1992
2027
  if (tag >= member_count) {
1993
2028
  return UnionInvalidReason::TAG_OUT_OF_RANGE;
1994
2029
  }
1995
2030
 
1996
2031
  bool found_valid = false;
1997
- for (idx_t member_idx = 0; member_idx < member_count; member_idx++) {
1998
-
1999
- UnifiedVectorFormat member_vdata;
2000
- auto &member = UnionVector::GetMember(vector, member_idx);
2001
- member.ToUnifiedFormat(count, member_vdata);
2002
-
2003
- auto mapped_row_idx = member_vdata.sel->get_index(row_idx);
2004
- if (member_vdata.validity.RowIsValid(mapped_row_idx)) {
2005
- if (found_valid) {
2006
- return UnionInvalidReason::VALIDITY_OVERLAP;
2007
- }
2008
- found_valid = true;
2009
- if (tag != static_cast<union_tag_t>(member_idx)) {
2010
- return UnionInvalidReason::TAG_MISMATCH;
2011
- }
2032
+ for (idx_t i = 0; i < member_count; i++) {
2033
+ auto &member_vdata = child_vdata[1 + i]; // skip the tag
2034
+ idx_t member_idx = member_vdata.sel->get_index(sel_p.get_index(row_idx));
2035
+ if (!member_vdata.validity.RowIsValid(member_idx)) {
2036
+ continue;
2037
+ }
2038
+ if (found_valid) {
2039
+ return UnionInvalidReason::VALIDITY_OVERLAP;
2040
+ }
2041
+ found_valid = true;
2042
+ if (tag != static_cast<union_tag_t>(i)) {
2043
+ return UnionInvalidReason::TAG_MISMATCH;
2012
2044
  }
2013
2045
  }
2014
2046
  }
@@ -373,7 +373,9 @@ string LogicalType::ToString() const {
373
373
  string ret = "UNION(";
374
374
  size_t count = UnionType::GetMemberCount(*this);
375
375
  for (size_t i = 0; i < count; i++) {
376
- ret += UnionType::GetMemberName(*this, i) + " " + UnionType::GetMemberType(*this, i).ToString();
376
+ auto member_name = UnionType::GetMemberName(*this, i);
377
+ auto member_type = UnionType::GetMemberType(*this, i).ToString();
378
+ ret += StringUtil::Format("%s %s", SQLIdentifier(member_name), member_type);
377
379
  if (i < count - 1) {
378
380
  ret += ", ";
379
381
  }