duckdb 1.2.1-dev6.0 → 1.2.1-dev8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
  3. package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
  4. package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
  5. package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
  6. package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
  7. package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
  8. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
  9. package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
  10. package/src/duckdb/extension/json/json_extension.cpp +8 -3
  11. package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
  12. package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
  13. package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
  14. package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
  15. package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
  16. package/src/duckdb/src/catalog/catalog.cpp +12 -0
  17. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  18. package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
  19. package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
  20. package/src/duckdb/src/common/bind_helpers.cpp +3 -0
  21. package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
  22. package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
  23. package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
  24. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  25. package/src/duckdb/src/execution/index/art/art.cpp +19 -6
  26. package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
  27. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
  28. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
  29. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
  30. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
  31. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
  32. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
  33. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
  34. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
  35. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
  36. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
  37. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
  38. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
  39. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
  40. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
  41. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
  42. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
  43. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
  44. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
  45. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
  46. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
  47. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
  48. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  49. package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
  50. package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
  51. package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
  52. package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
  53. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
  54. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
  55. package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
  56. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
  57. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
  58. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
  59. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
  60. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
  62. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
  66. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
  67. package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
  68. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
  69. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
  70. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
  71. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
  72. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
  73. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
  74. package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
  75. package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
  76. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
  77. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
  78. package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
  79. package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
  80. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
  81. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
  83. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
  84. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
  85. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
  86. package/src/duckdb/src/include/duckdb.h +495 -480
  87. package/src/duckdb/src/main/attached_database.cpp +1 -1
  88. package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
  89. package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
  90. package/src/duckdb/src/main/config.cpp +7 -1
  91. package/src/duckdb/src/main/database.cpp +8 -8
  92. package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
  93. package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
  94. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  95. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
  96. package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
  97. package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
  98. package/src/duckdb/src/parallel/executor_task.cpp +10 -6
  99. package/src/duckdb/src/parallel/task_executor.cpp +4 -1
  100. package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
  101. package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
  102. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
  103. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
  104. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
  105. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
  106. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
  107. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
  108. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  109. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
  110. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  111. package/src/duckdb/src/storage/storage_manager.cpp +2 -2
  112. package/src/duckdb/src/storage/table/row_group.cpp +5 -6
  113. package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
  114. package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
  115. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
  116. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
  117. package/src/duckdb/ub_src_parallel.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "1.2.1-dev6.0",
5
+ "version": "1.2.1-dev8.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -44,14 +44,7 @@ struct StringAggFunction {
44
44
  if (!state.dataptr) {
45
45
  finalize_data.ReturnNull();
46
46
  } else {
47
- target = StringVector::AddString(finalize_data.result, state.dataptr, state.size);
48
- }
49
- }
50
-
51
- template <class STATE>
52
- static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
53
- if (state.dataptr) {
54
- delete[] state.dataptr;
47
+ target = string_t(state.dataptr, state.size);
55
48
  }
56
49
  }
57
50
 
@@ -59,12 +52,12 @@ struct StringAggFunction {
59
52
  return true;
60
53
  }
61
54
 
62
- static inline void PerformOperation(StringAggState &state, const char *str, const char *sep, idx_t str_size,
63
- idx_t sep_size) {
55
+ static inline void PerformOperation(StringAggState &state, ArenaAllocator &allocator, const char *str,
56
+ const char *sep, idx_t str_size, idx_t sep_size) {
64
57
  if (!state.dataptr) {
65
58
  // first iteration: allocate space for the string and copy it into the state
66
59
  state.alloc_size = MaxValue<idx_t>(8, NextPowerOfTwo(str_size));
67
- state.dataptr = new char[state.alloc_size];
60
+ state.dataptr = char_ptr_cast(allocator.Allocate(state.alloc_size));
68
61
  state.size = str_size;
69
62
  memcpy(state.dataptr, str, str_size);
70
63
  } else {
@@ -72,13 +65,12 @@ struct StringAggFunction {
72
65
  idx_t required_size = state.size + str_size + sep_size;
73
66
  if (required_size > state.alloc_size) {
74
67
  // no space! allocate extra space
68
+ const auto old_size = state.alloc_size;
75
69
  while (state.alloc_size < required_size) {
76
70
  state.alloc_size *= 2;
77
71
  }
78
- auto new_data = new char[state.alloc_size];
79
- memcpy(new_data, state.dataptr, state.size);
80
- delete[] state.dataptr;
81
- state.dataptr = new_data;
72
+ state.dataptr =
73
+ char_ptr_cast(allocator.Reallocate(data_ptr_cast(state.dataptr), old_size, state.alloc_size));
82
74
  }
83
75
  // copy the separator
84
76
  memcpy(state.dataptr + state.size, sep, sep_size);
@@ -89,14 +81,15 @@ struct StringAggFunction {
89
81
  }
90
82
  }
91
83
 
92
- static inline void PerformOperation(StringAggState &state, string_t str, optional_ptr<FunctionData> data_p) {
84
+ static inline void PerformOperation(StringAggState &state, ArenaAllocator &allocator, string_t str,
85
+ optional_ptr<FunctionData> data_p) {
93
86
  auto &data = data_p->Cast<StringAggBindData>();
94
- PerformOperation(state, str.GetData(), data.sep.c_str(), str.GetSize(), data.sep.size());
87
+ PerformOperation(state, allocator, str.GetData(), data.sep.c_str(), str.GetSize(), data.sep.size());
95
88
  }
96
89
 
97
90
  template <class INPUT_TYPE, class STATE, class OP>
98
91
  static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &unary_input) {
99
- PerformOperation(state, input, unary_input.input.bind_data);
92
+ PerformOperation(state, unary_input.input.allocator, input, unary_input.input.bind_data);
100
93
  }
101
94
 
102
95
  template <class INPUT_TYPE, class STATE, class OP>
@@ -113,8 +106,8 @@ struct StringAggFunction {
113
106
  // source is not set: skip combining
114
107
  return;
115
108
  }
116
- PerformOperation(target, string_t(source.dataptr, UnsafeNumericCast<uint32_t>(source.size)),
117
- aggr_input_data.bind_data);
109
+ PerformOperation(target, aggr_input_data.allocator,
110
+ string_t(source.dataptr, UnsafeNumericCast<uint32_t>(source.size)), aggr_input_data.bind_data);
118
111
  }
119
112
  };
120
113
 
@@ -162,8 +155,7 @@ AggregateFunctionSet StringAggFun::GetFunctions() {
162
155
  AggregateFunction::UnaryScatterUpdate<StringAggState, string_t, StringAggFunction>,
163
156
  AggregateFunction::StateCombine<StringAggState, StringAggFunction>,
164
157
  AggregateFunction::StateFinalize<StringAggState, string_t, StringAggFunction>,
165
- AggregateFunction::UnaryUpdate<StringAggState, string_t, StringAggFunction>, StringAggBind,
166
- AggregateFunction::StateDestroy<StringAggState, StringAggFunction>);
158
+ AggregateFunction::UnaryUpdate<StringAggState, string_t, StringAggFunction>, StringAggBind);
167
159
  string_agg_param.serialize = StringAggSerialize;
168
160
  string_agg_param.deserialize = StringAggDeserialize;
169
161
  string_agg.AddFunction(string_agg_param);
@@ -116,7 +116,6 @@ static void ListFinalize(Vector &states_vector, AggregateInputData &aggr_input_d
116
116
 
117
117
  // first iterate over all entries and set up the list entries, and get the newly required total length
118
118
  for (idx_t i = 0; i < count; i++) {
119
-
120
119
  auto &state = *states[states_data.sel->get_index(i)];
121
120
  const auto rid = i + offset;
122
121
  result_data[rid].offset = total_len;
@@ -223,17 +223,6 @@ void ExecuteExpression(const idx_t elem_cnt, const LambdaFunctions::ColumnInfo &
223
223
  // ListLambdaBindData
224
224
  //===--------------------------------------------------------------------===//
225
225
 
226
- unique_ptr<FunctionData> ListLambdaBindData::Copy() const {
227
- auto lambda_expr_copy = lambda_expr ? lambda_expr->Copy() : nullptr;
228
- return make_uniq<ListLambdaBindData>(return_type, std::move(lambda_expr_copy), has_index);
229
- }
230
-
231
- bool ListLambdaBindData::Equals(const FunctionData &other_p) const {
232
- auto &other = other_p.Cast<ListLambdaBindData>();
233
- return Expression::Equals(lambda_expr, other.lambda_expr) && return_type == other.return_type &&
234
- has_index == other.has_index;
235
- }
236
-
237
226
  void ListLambdaBindData::Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
238
227
  const ScalarFunction &) {
239
228
  auto &bind_data = bind_data_p->Cast<ListLambdaBindData>();
@@ -15,7 +15,17 @@
15
15
 
16
16
  namespace duckdb {
17
17
 
18
- // FIXME: use a local state for each thread to increase performance?
18
+ struct ListAggregatesLocalState : public FunctionLocalState {
19
+ explicit ListAggregatesLocalState(Allocator &allocator) : arena_allocator(allocator) {
20
+ }
21
+
22
+ ArenaAllocator arena_allocator;
23
+ };
24
+
25
+ unique_ptr<FunctionLocalState> ListAggregatesInitLocalState(ExpressionState &state, const BoundFunctionExpression &expr,
26
+ FunctionData *bind_data) {
27
+ return make_uniq<ListAggregatesLocalState>(BufferAllocator::Get(state.GetContext()));
28
+ }
19
29
  // FIXME: benchmark the use of simple_update against using update (if applicable)
20
30
 
21
31
  static unique_ptr<FunctionData> ListAggregatesBindFailure(ScalarFunction &bound_function) {
@@ -207,7 +217,8 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
207
217
  auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
208
218
  auto &info = func_expr.bind_info->Cast<ListAggregatesBindData>();
209
219
  auto &aggr = info.aggr_expr->Cast<BoundAggregateExpression>();
210
- ArenaAllocator allocator(Allocator::DefaultAllocator());
220
+ auto &allocator = ExecuteFunctionState::GetFunctionState(state)->Cast<ListAggregatesLocalState>().arena_allocator;
221
+ allocator.Reset();
211
222
  AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
212
223
 
213
224
  D_ASSERT(aggr.function.update);
@@ -511,8 +522,9 @@ static unique_ptr<FunctionData> ListUniqueBind(ClientContext &context, ScalarFun
511
522
  }
512
523
 
513
524
  ScalarFunction ListAggregateFun::GetFunction() {
514
- auto result = ScalarFunction({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR}, LogicalType::ANY,
515
- ListAggregateFunction, ListAggregateBind);
525
+ auto result =
526
+ ScalarFunction({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR}, LogicalType::ANY,
527
+ ListAggregateFunction, ListAggregateBind, nullptr, nullptr, ListAggregatesInitLocalState);
516
528
  BaseScalarFunction::SetReturnsError(result);
517
529
  result.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
518
530
  result.varargs = LogicalType::ANY;
@@ -523,12 +535,12 @@ ScalarFunction ListAggregateFun::GetFunction() {
523
535
 
524
536
  ScalarFunction ListDistinctFun::GetFunction() {
525
537
  return ScalarFunction({LogicalType::LIST(LogicalType::ANY)}, LogicalType::LIST(LogicalType::ANY),
526
- ListDistinctFunction, ListDistinctBind);
538
+ ListDistinctFunction, ListDistinctBind, nullptr, nullptr, ListAggregatesInitLocalState);
527
539
  }
528
540
 
529
541
  ScalarFunction ListUniqueFun::GetFunction() {
530
542
  return ScalarFunction({LogicalType::LIST(LogicalType::ANY)}, LogicalType::UBIGINT, ListUniqueFunction,
531
- ListUniqueBind);
543
+ ListUniqueBind, nullptr, nullptr, ListAggregatesInitLocalState);
532
544
  }
533
545
 
534
546
  } // namespace duckdb
@@ -71,13 +71,20 @@ unique_ptr<FunctionData> ICUDateFunc::Bind(ClientContext &context, ScalarFunctio
71
71
  return make_uniq<BindData>(context);
72
72
  }
73
73
 
74
- void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
74
+ bool ICUDateFunc::TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
75
75
  auto tz = icu_66::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(icu::StringPiece(tz_id.GetString())));
76
76
  if (*tz == icu::TimeZone::getUnknown()) {
77
77
  delete tz;
78
- throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString());
78
+ return false;
79
79
  }
80
80
  calendar->adoptTimeZone(tz);
81
+ return true;
82
+ }
83
+
84
+ void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
85
+ if (!TrySetTimeZone(calendar, tz_id)) {
86
+ throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString());
87
+ }
81
88
  }
82
89
 
83
90
  timestamp_t ICUDateFunc::GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros) {
@@ -11,9 +11,7 @@
11
11
  #include "duckdb/execution/expression_executor.hpp"
12
12
  #include "duckdb/function/scalar/strftime_format.hpp"
13
13
  #include "duckdb/main/client_context.hpp"
14
- #include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
15
14
  #include "duckdb/planner/expression/bound_function_expression.hpp"
16
- #include "duckdb/function/function_binder.hpp"
17
15
  #include "duckdb/function/cast/default_casts.hpp"
18
16
  #include "duckdb/main/extension_util.hpp"
19
17
 
@@ -60,14 +58,7 @@ struct ICUStrptime : public ICUDateFunc {
60
58
  }
61
59
 
62
60
  static uint64_t ToMicros(icu::Calendar *calendar, const ParseResult &parsed, const StrpTimeFormat &format) {
63
- // Set TZ first, if any.
64
- // Note that empty TZ names are not allowed,
65
- // but unknown names will map to GMT.
66
- if (!parsed.tz.empty()) {
67
- SetTimeZone(calendar, parsed.tz);
68
- }
69
-
70
- // Now get the parts in the given time zone
61
+ // Get the parts in the current time zone
71
62
  uint64_t micros = parsed.GetMicros();
72
63
  calendar->set(UCAL_EXTENDED_YEAR, parsed.data[0]); // strptime doesn't understand eras
73
64
  calendar->set(UCAL_MONTH, parsed.data[1] - 1);
@@ -110,6 +101,11 @@ struct ICUStrptime : public ICUDateFunc {
110
101
  if (parsed.is_special) {
111
102
  return parsed.ToTimestamp();
112
103
  } else {
104
+ // Set TZ first, if any.
105
+ if (!parsed.tz.empty()) {
106
+ SetTimeZone(calendar, parsed.tz);
107
+ }
108
+
113
109
  return GetTime(calendar, ToMicros(calendar, parsed, format));
114
110
  }
115
111
  }
@@ -143,7 +139,7 @@ struct ICUStrptime : public ICUDateFunc {
143
139
  if (format.Parse(input, parsed)) {
144
140
  if (parsed.is_special) {
145
141
  return parsed.ToTimestamp();
146
- } else {
142
+ } else if (parsed.tz.empty() || TrySetTimeZone(calendar, parsed.tz)) {
147
143
  timestamp_t result;
148
144
  if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
149
145
  return result;
@@ -49,7 +49,9 @@ struct ICUDateFunc {
49
49
  static duckdb::unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
50
50
  vector<duckdb::unique_ptr<Expression>> &arguments);
51
51
 
52
- //! Sets the time zone for the calendar.
52
+ //! Tries to set the time zone for the calendar and returns false if it is not valid.
53
+ static bool TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
54
+ //! Sets the time zone for the calendar. Throws if it is not valid
53
55
  static void SetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
54
56
  //! Gets the timestamp from the calendar, throwing if it is not in range.
55
57
  static bool TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result);
@@ -90,22 +90,16 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
90
90
  optional_ptr<FileHandle> override_handle) {
91
91
  if (size != 0) {
92
92
  auto &handle = override_handle ? *override_handle.get() : *file_handle.get();
93
- if (can_seek) {
94
- handle.Read(pointer, size, position);
95
- } else if (sample_run) { // Cache the buffer
96
- handle.Read(pointer, size, position);
97
93
 
94
+ if (!cached_buffers.empty() || position < cached_size) {
95
+ ReadFromCache(pointer, size, position);
96
+ }
97
+
98
+ handle.Read(pointer, size, position);
99
+ if (file_handle->IsPipe()) { // Cache the buffer
98
100
  cached_buffers.emplace_back(allocator.Allocate(size));
99
101
  memcpy(cached_buffers.back().get(), pointer, size);
100
102
  cached_size += size;
101
- } else {
102
- if (!cached_buffers.empty() || position < cached_size) {
103
- ReadFromCache(pointer, size, position);
104
- }
105
-
106
- if (size != 0) {
107
- handle.Read(pointer, size, position);
108
- }
109
103
  }
110
104
  }
111
105
 
@@ -121,30 +115,23 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
121
115
 
122
116
  bool JSONFileHandle::Read(char *pointer, idx_t &read_size, idx_t requested_size, bool &file_done, bool sample_run) {
123
117
  D_ASSERT(requested_size != 0);
118
+ read_size = 0;
124
119
  if (last_read_requested) {
125
120
  return false;
126
121
  }
127
122
 
128
- if (can_seek) {
129
- read_size = ReadInternal(pointer, requested_size);
130
- read_position += read_size;
131
- } else if (sample_run) { // Cache the buffer
132
- read_size = ReadInternal(pointer, requested_size);
133
- if (read_size > 0) {
134
- cached_buffers.emplace_back(allocator.Allocate(read_size));
135
- memcpy(cached_buffers.back().get(), pointer, read_size);
136
- }
137
- cached_size += read_size;
138
- read_position += read_size;
139
- } else {
140
- read_size = 0;
141
- if (!cached_buffers.empty() || read_position < cached_size) {
142
- read_size += ReadFromCache(pointer, requested_size, read_position);
143
- }
144
- if (requested_size != 0) {
145
- read_size += ReadInternal(pointer, requested_size);
146
- }
123
+ if (!cached_buffers.empty() || read_position < cached_size) {
124
+ read_size += ReadFromCache(pointer, requested_size, read_position);
125
+ }
126
+
127
+ auto temp_read_size = ReadInternal(pointer, requested_size);
128
+ if (file_handle->IsPipe() && temp_read_size != 0) { // Cache the buffer
129
+ cached_buffers.emplace_back(allocator.Allocate(temp_read_size));
130
+ memcpy(cached_buffers.back().get(), pointer, temp_read_size);
147
131
  }
132
+ cached_size += temp_read_size;
133
+ read_position += temp_read_size;
134
+ read_size += temp_read_size;
148
135
 
149
136
  if (read_size == 0) {
150
137
  last_read_requested = true;
@@ -17,12 +17,17 @@
17
17
  namespace duckdb {
18
18
 
19
19
  static DefaultMacro json_macros[] = {
20
- {DEFAULT_SCHEMA, "json_group_array", {"x", nullptr}, {{nullptr, nullptr}}, "to_json(list(x))"},
20
+ {DEFAULT_SCHEMA,
21
+ "json_group_array",
22
+ {"x", nullptr},
23
+ {{nullptr, nullptr}},
24
+ "CAST('[' || string_agg(CASE WHEN x IS NULL THEN 'null'::JSON ELSE to_json(x) END, ',') || ']' AS JSON)"},
21
25
  {DEFAULT_SCHEMA,
22
26
  "json_group_object",
23
- {"name", "value", nullptr},
27
+ {"n", "v", nullptr},
24
28
  {{nullptr, nullptr}},
25
- "to_json(map(list(name), list(value)))"},
29
+ "CAST('{' || string_agg(to_json(n::VARCHAR) || ':' || CASE WHEN v IS NULL THEN 'null'::JSON ELSE to_json(v) END, "
30
+ "',') || '}' AS JSON)"},
26
31
  {DEFAULT_SCHEMA,
27
32
  "json_group_structure",
28
33
  {"x", nullptr},
@@ -319,7 +319,8 @@ void ColumnReader::PreparePageV2(PageHeader &page_hdr) {
319
319
 
320
320
  auto compressed_bytes = page_hdr.compressed_page_size - uncompressed_bytes;
321
321
 
322
- AllocateCompressed(compressed_bytes);
322
+ ResizeableBuffer compressed_buffer;
323
+ compressed_buffer.resize(GetAllocator(), compressed_bytes);
323
324
  reader.ReadData(*protocol, compressed_buffer.ptr, compressed_bytes);
324
325
 
325
326
  DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, compressed_bytes, block->ptr + uncompressed_bytes,
@@ -334,10 +335,6 @@ void ColumnReader::AllocateBlock(idx_t size) {
334
335
  }
335
336
  }
336
337
 
337
- void ColumnReader::AllocateCompressed(idx_t size) {
338
- compressed_buffer.resize(GetAllocator(), size);
339
- }
340
-
341
338
  void ColumnReader::PreparePage(PageHeader &page_hdr) {
342
339
  AllocateBlock(page_hdr.uncompressed_page_size + 1);
343
340
  if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) {
@@ -348,7 +345,8 @@ void ColumnReader::PreparePage(PageHeader &page_hdr) {
348
345
  return;
349
346
  }
350
347
 
351
- AllocateCompressed(page_hdr.compressed_page_size + 1);
348
+ ResizeableBuffer compressed_buffer;
349
+ compressed_buffer.resize(GetAllocator(), page_hdr.compressed_page_size + 1);
352
350
  reader.ReadData(*protocol, compressed_buffer.ptr, page_hdr.compressed_page_size);
353
351
 
354
352
  DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, page_hdr.compressed_page_size, block->ptr,
@@ -309,6 +309,7 @@ struct PageInformation {
309
309
  idx_t offset = 0;
310
310
  idx_t row_count = 0;
311
311
  idx_t empty_count = 0;
312
+ idx_t null_count = 0;
312
313
  idx_t estimated_page_size = 0;
313
314
  };
314
315
 
@@ -388,7 +389,7 @@ protected:
388
389
  virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
389
390
 
390
391
  //! Initialize the writer for a specific page. Only used for scalar types.
391
- virtual unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state);
392
+ virtual unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx);
392
393
 
393
394
  //! Flushes the writer for a specific page. Only used for scalar types.
394
395
  virtual void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state);
@@ -427,7 +428,8 @@ void BasicColumnWriter::RegisterToRowGroup(duckdb_parquet::RowGroup &row_group)
427
428
  row_group.columns.push_back(std::move(column_chunk));
428
429
  }
429
430
 
430
- unique_ptr<ColumnWriterPageState> BasicColumnWriter::InitializePageState(BasicColumnWriterState &state) {
431
+ unique_ptr<ColumnWriterPageState> BasicColumnWriter::InitializePageState(BasicColumnWriterState &state,
432
+ idx_t page_idx) {
431
433
  return nullptr;
432
434
  }
433
435
 
@@ -463,6 +465,8 @@ void BasicColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *p
463
465
  state.page_info.push_back(new_info);
464
466
  page_info_ref = state.page_info.back();
465
467
  }
468
+ } else {
469
+ page_info.null_count++;
466
470
  }
467
471
  vector_index++;
468
472
  }
@@ -502,7 +506,7 @@ void BasicColumnWriter::BeginWrite(ColumnWriterState &state_p) {
502
506
  MaxValue<idx_t>(NextPowerOfTwo(page_info.estimated_page_size), MemoryStream::DEFAULT_INITIAL_CAPACITY));
503
507
  write_info.write_count = page_info.empty_count;
504
508
  write_info.max_write_count = page_info.row_count;
505
- write_info.page_state = InitializePageState(state);
509
+ write_info.page_state = InitializePageState(state, page_idx);
506
510
 
507
511
  write_info.compressed_size = 0;
508
512
  write_info.compressed_data = nullptr;
@@ -796,7 +800,6 @@ public:
796
800
  };
797
801
 
798
802
  struct BaseParquetOperator {
799
-
800
803
  template <class SRC, class TGT>
801
804
  static void WriteToStream(const TGT &input, WriteStream &ser) {
802
805
  ser.WriteData(const_data_ptr_cast(&input), sizeof(TGT));
@@ -815,6 +818,11 @@ struct BaseParquetOperator {
815
818
  template <class SRC, class TGT>
816
819
  static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
817
820
  }
821
+
822
+ template <class SRC, class TGT>
823
+ static idx_t GetRowSize(const Vector &, idx_t) {
824
+ return sizeof(TGT);
825
+ }
818
826
  };
819
827
 
820
828
  struct ParquetCastOperator : public BaseParquetOperator {
@@ -936,6 +944,11 @@ struct ParquetStringOperator : public BaseParquetOperator {
936
944
  static uint64_t XXHash64(const TGT &target_value) {
937
945
  return duckdb_zstd::XXH64(target_value.GetData(), target_value.GetSize(), 0);
938
946
  }
947
+
948
+ template <class SRC, class TGT>
949
+ static idx_t GetRowSize(const Vector &vector, idx_t index) {
950
+ return FlatVector::GetData<string_t>(vector)[index].GetSize();
951
+ }
939
952
  };
940
953
 
941
954
  struct ParquetIntervalTargetType {
@@ -1066,6 +1079,7 @@ public:
1066
1079
  // analysis state for integer values for DELTA_BINARY_PACKED/DELTA_LENGTH_BYTE_ARRAY
1067
1080
  idx_t total_value_count = 0;
1068
1081
  idx_t total_string_size = 0;
1082
+ uint32_t key_bit_width = 0;
1069
1083
 
1070
1084
  unordered_map<T, uint32_t> dictionary;
1071
1085
  duckdb_parquet::Encoding::type encoding;
@@ -1222,11 +1236,12 @@ public:
1222
1236
  return std::move(result);
1223
1237
  }
1224
1238
 
1225
- unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
1239
+ unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p, idx_t page_idx) override {
1226
1240
  auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
1227
-
1228
- auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(state.total_value_count, state.total_string_size,
1229
- state.encoding, state.dictionary);
1241
+ const auto &page_info = state_p.page_info[page_idx];
1242
+ auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(
1243
+ page_info.row_count - (page_info.empty_count + page_info.null_count), state.total_string_size,
1244
+ state.encoding, state.dictionary);
1230
1245
  return std::move(result);
1231
1246
  }
1232
1247
 
@@ -1335,6 +1350,8 @@ public:
1335
1350
  }
1336
1351
  }
1337
1352
  state.dictionary.clear();
1353
+ } else {
1354
+ state.key_bit_width = RleBpDecoder::ComputeBitWidth(state.dictionary.size());
1338
1355
  }
1339
1356
  }
1340
1357
 
@@ -1488,9 +1505,13 @@ public:
1488
1505
  // bloom filter will be queued for writing in ParquetWriter::BufferBloomFilter one level up
1489
1506
  }
1490
1507
 
1491
- // TODO this now vastly over-estimates the page size
1492
1508
  idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state_p) const override {
1493
- return sizeof(TGT);
1509
+ auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
1510
+ if (state.encoding == Encoding::RLE_DICTIONARY) {
1511
+ return (state.key_bit_width + 7) / 8;
1512
+ } else {
1513
+ return OP::template GetRowSize<SRC, TGT>(vector, index);
1514
+ }
1494
1515
  }
1495
1516
  };
1496
1517
 
@@ -1570,7 +1591,7 @@ public:
1570
1591
  }
1571
1592
  }
1572
1593
 
1573
- unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
1594
+ unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx) override {
1574
1595
  return make_uniq<BooleanWriterPageState>();
1575
1596
  }
1576
1597
 
@@ -1812,7 +1833,7 @@ public:
1812
1833
  }
1813
1834
  }
1814
1835
 
1815
- unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
1836
+ unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx) override {
1816
1837
  return make_uniq<EnumWriterPageState>(bit_width);
1817
1838
  }
1818
1839
 
@@ -160,7 +160,6 @@ protected:
160
160
 
161
161
  private:
162
162
  void AllocateBlock(idx_t size);
163
- void AllocateCompressed(idx_t size);
164
163
  void PrepareRead(parquet_filter_t &filter);
165
164
  void PreparePage(PageHeader &page_hdr);
166
165
  void PrepareDataPage(PageHeader &page_hdr);
@@ -178,7 +177,6 @@ private:
178
177
 
179
178
  shared_ptr<ResizeableBuffer> block;
180
179
 
181
- ResizeableBuffer compressed_buffer;
182
180
  ResizeableBuffer offset_buffer;
183
181
 
184
182
  unique_ptr<RleBpDecoder> dict_decoder;
@@ -30,7 +30,6 @@ public:
30
30
  }
31
31
 
32
32
  void FinishWrite(WriteStream &writer) {
33
- D_ASSERT(count == total_value_count);
34
33
  writer.WriteData(buffer.get(), total_value_count * bit_width);
35
34
  }
36
35
 
@@ -33,9 +33,8 @@ public:
33
33
  }
34
34
 
35
35
  void FinishWrite(WriteStream &writer) {
36
- D_ASSERT(stream->GetPosition() == total_string_size);
37
36
  dbp_encoder.FinishWrite(writer);
38
- writer.WriteData(buffer.get(), total_string_size);
37
+ writer.WriteData(buffer.get(), stream->GetPosition());
39
38
  }
40
39
 
41
40
  private:
@@ -769,6 +769,12 @@ CatalogEntryLookup Catalog::TryLookupEntry(CatalogEntryRetriever &retriever, Cat
769
769
 
770
770
  if (if_not_found == OnEntryNotFound::RETURN_NULL) {
771
771
  return {nullptr, nullptr, ErrorData()};
772
+ }
773
+ // Check if the default database is actually attached. CreateMissingEntryException will throw binder exception
774
+ // otherwise.
775
+ if (!GetCatalogEntry(context, GetDefaultCatalog(retriever))) {
776
+ auto except = CatalogException("%s with name %s does not exist!", CatalogTypeToString(type), name);
777
+ return {nullptr, nullptr, ErrorData(except)};
772
778
  } else {
773
779
  auto except = CreateMissingEntryException(retriever, name, type, schemas, error_context);
774
780
  return {nullptr, nullptr, ErrorData(except)};
@@ -805,6 +811,12 @@ CatalogEntryLookup Catalog::TryLookupEntry(CatalogEntryRetriever &retriever, vec
805
811
 
806
812
  if (if_not_found == OnEntryNotFound::RETURN_NULL) {
807
813
  return {nullptr, nullptr, ErrorData()};
814
+ }
815
+ // Check if the default database is actually attached. CreateMissingEntryException will throw binder exception
816
+ // otherwise.
817
+ if (!GetCatalogEntry(context, GetDefaultCatalog(retriever))) {
818
+ auto except = CatalogException("%s with name %s does not exist!", CatalogTypeToString(type), name);
819
+ return {nullptr, nullptr, ErrorData(except)};
808
820
  } else {
809
821
  auto except = CreateMissingEntryException(retriever, name, type, schemas, error_context);
810
822
  return {nullptr, nullptr, ErrorData(except)};
@@ -863,7 +863,7 @@ unique_ptr<CatalogEntry> DuckTableEntry::Copy(ClientContext &context) const {
863
863
  }
864
864
 
865
865
  auto binder = Binder::CreateBinder(context);
866
- auto bound_create_info = binder->BindCreateTableInfo(std::move(create_info), schema);
866
+ auto bound_create_info = binder->BindCreateTableCheckpoint(std::move(create_info), schema);
867
867
  return make_uniq<DuckTableEntry>(catalog, schema, *bound_create_info, storage);
868
868
  }
869
869
 
@@ -76,7 +76,7 @@ void CatalogEntryRetriever::Inherit(const CatalogEntryRetriever &parent) {
76
76
  this->search_path = parent.search_path;
77
77
  }
78
78
 
79
- CatalogSearchPath &CatalogEntryRetriever::GetSearchPath() {
79
+ const CatalogSearchPath &CatalogEntryRetriever::GetSearchPath() const {
80
80
  if (search_path) {
81
81
  return *search_path;
82
82
  }