duckdb 0.8.2-dev4871.0 → 0.8.2-dev5080.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-strptime.cpp +1 -0
  3. package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
  4. package/src/duckdb/src/common/enum_util.cpp +5 -0
  5. package/src/duckdb/src/common/operator/cast_operators.cpp +18 -0
  6. package/src/duckdb/src/common/row_operations/row_matcher.cpp +5 -38
  7. package/src/duckdb/src/common/types/data_chunk.cpp +47 -10
  8. package/src/duckdb/src/common/types/vector.cpp +0 -1
  9. package/src/duckdb/src/common/types.cpp +10 -1
  10. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +2 -2
  11. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +18 -8
  12. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +10 -1
  13. package/src/duckdb/src/core_functions/scalar/map/map_concat.cpp +0 -2
  14. package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp +12 -6
  15. package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_inner.cpp +20 -27
  16. package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp +21 -9
  17. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
  18. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +1 -1
  19. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -2
  20. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +14 -2
  21. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +1 -1
  22. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +13 -8
  23. package/src/duckdb/src/function/cast/struct_cast.cpp +8 -0
  24. package/src/duckdb/src/function/cast/time_casts.cpp +12 -0
  25. package/src/duckdb/src/function/cast/union_casts.cpp +5 -0
  26. package/src/duckdb/src/function/function_binder.cpp +11 -2
  27. package/src/duckdb/src/function/pragma/pragma_functions.cpp +5 -0
  28. package/src/duckdb/src/function/pragma/pragma_queries.cpp +3 -0
  29. package/src/duckdb/src/function/scalar/strftime_format.cpp +29 -8
  30. package/src/duckdb/src/function/table/arrow.cpp +4 -0
  31. package/src/duckdb/src/function/table/copy_csv.cpp +2 -1
  32. package/src/duckdb/src/function/table/read_csv.cpp +4 -1
  33. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  34. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -0
  35. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +27 -0
  36. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +38 -2
  37. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
  38. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +4 -4
  39. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +4 -4
  40. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +5 -5
  41. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +7 -7
  42. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +12 -12
  43. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +12 -12
  44. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -3
  45. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +33 -33
  46. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +2 -2
  47. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -3
  48. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +12 -12
  49. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +2 -2
  50. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +2 -2
  51. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +2 -1
  52. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  53. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +1 -0
  54. package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +28 -1
  55. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +0 -18
  56. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -0
  58. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +1 -2
  59. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -1
  60. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/planner/planner.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -1
  65. package/src/duckdb/src/main/capi/prepared-c.cpp +9 -3
  66. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +1 -0
  67. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +9 -19
  68. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +28 -6
  69. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +3 -0
  70. package/src/duckdb/src/planner/bound_parameter_map.cpp +67 -0
  71. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +14 -6
  72. package/src/duckdb/src/planner/operator/logical_create_table.cpp +3 -3
  73. package/src/duckdb/src/planner/operator/logical_delete.cpp +3 -2
  74. package/src/duckdb/src/planner/operator/logical_insert.cpp +3 -2
  75. package/src/duckdb/src/planner/operator/logical_update.cpp +3 -2
  76. package/src/duckdb/src/planner/planner.cpp +2 -2
  77. package/src/duckdb/src/storage/data_table.cpp +8 -8
  78. package/src/duckdb/src/storage/local_storage.cpp +2 -3
  79. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +64 -80
  80. package/src/duckdb/src/storage/storage_manager.cpp +6 -2
  81. package/src/duckdb/src/storage/table/row_group.cpp +6 -0
  82. package/src/duckdb/src/storage/table/row_group_collection.cpp +4 -3
  83. package/src/duckdb/src/storage/table/struct_column_data.cpp +5 -0
  84. package/src/duckdb/src/transaction/duck_transaction.cpp +1 -0
  85. package/src/duckdb/ub_src_planner.cpp +2 -0
@@ -343,8 +343,8 @@ void PhysicalHashAggregate::SinkDistinct(ExecutionContext &context, DataChunk &c
343
343
 
344
344
  SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk &chunk,
345
345
  OperatorSinkInput &input) const {
346
- auto &llstate = input.local_state.Cast<HashAggregateLocalSinkState>();
347
- auto &gstate = input.global_state.Cast<HashAggregateGlobalSinkState>();
346
+ auto &local_state = input.local_state.Cast<HashAggregateLocalSinkState>();
347
+ auto &global_state = input.global_state.Cast<HashAggregateGlobalSinkState>();
348
348
 
349
349
  if (distinct_collection_info) {
350
350
  SinkDistinct(context, chunk, input);
@@ -354,8 +354,7 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk
354
354
  return SinkResultType::NEED_MORE_INPUT;
355
355
  }
356
356
 
357
- DataChunk &aggregate_input_chunk = llstate.aggregate_input_chunk;
358
-
357
+ DataChunk &aggregate_input_chunk = local_state.aggregate_input_chunk;
359
358
  auto &aggregates = grouped_aggregate_data.aggregates;
360
359
  idx_t aggregate_input_idx = 0;
361
360
 
@@ -385,10 +384,11 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk
385
384
 
386
385
  // For every grouping set there is one radix_table
387
386
  for (idx_t i = 0; i < groupings.size(); i++) {
388
- auto &grouping_gstate = gstate.grouping_states[i];
389
- auto &grouping_lstate = llstate.grouping_states[i];
387
+ auto &grouping_local_state = global_state.grouping_states[i];
388
+ auto &grouping_global_state = local_state.grouping_states[i];
390
389
  InterruptState interrupt_state;
391
- OperatorSinkInput sink_input {*grouping_gstate.table_state, *grouping_lstate.table_state, interrupt_state};
390
+ OperatorSinkInput sink_input {*grouping_local_state.table_state, *grouping_global_state.table_state,
391
+ interrupt_state};
392
392
 
393
393
  auto &grouping = groupings[i];
394
394
  auto &table = grouping.table_data;
@@ -437,7 +437,7 @@ void CSVReaderOptions::FromNamedParameters(named_parameter_map_t &in, ClientCont
437
437
  }
438
438
  sql_type_list.reserve(sql_type_names.size());
439
439
  for (auto &sql_type : sql_type_names) {
440
- auto def_type = TransformStringToLogicalType(sql_type);
440
+ auto def_type = TransformStringToLogicalType(sql_type, context);
441
441
  if (def_type.id() == LogicalTypeId::USER) {
442
442
  throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
443
443
  kv.first);
@@ -335,8 +335,6 @@ normal : {
335
335
  if (c == options.dialect_options.state_machine_options.delimiter) {
336
336
  // delimiter: end the value and add it to the chunk
337
337
  goto add_value;
338
- } else if (c == options.dialect_options.state_machine_options.quote && try_add_line) {
339
- return false;
340
338
  } else if (StringUtil::CharacterIsNewline(c)) {
341
339
  // newline: add row
342
340
  if (column > 0 || try_add_line || parse_chunk.data.size() == 1) {
@@ -141,6 +141,10 @@ struct SniffValue {
141
141
  (current_char == '\r' || current_char == '\n')) ||
142
142
  (machine.dialect_options.new_line == NewLineIdentifier::CARRY_ON && current_char == '\n')) {
143
143
  machine.rows_read++;
144
+ }
145
+
146
+ if ((machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE) ||
147
+ (machine.state != CSVState::RECORD_SEPARATOR && machine.previous_state == CSVState::CARRIAGE_RETURN)) {
144
148
  sniffed_values[machine.cur_rows].position = machine.line_start_pos;
145
149
  sniffed_values[machine.cur_rows].set = true;
146
150
  machine.line_start_pos = current_pos;
@@ -287,11 +291,15 @@ void CSVSniffer::DetectTypes() {
287
291
  candidate->csv_buffer_iterator.Process<SniffValue>(*candidate, tuples);
288
292
  // Potentially Skip empty rows (I find this dirty, but it is what the original code does)
289
293
  idx_t true_start = 0;
294
+ idx_t true_pos = 0;
290
295
  idx_t values_start = 0;
291
296
  while (true_start < tuples.size()) {
292
297
  if (tuples[true_start].values.empty() ||
293
298
  (tuples[true_start].values.size() == 1 && tuples[true_start].values[0].IsNull())) {
294
299
  true_start = tuples[true_start].line_number;
300
+ if (true_start < tuples.size()) {
301
+ true_pos = tuples[true_start].position;
302
+ }
295
303
  values_start++;
296
304
  } else {
297
305
  break;
@@ -301,7 +309,11 @@ void CSVSniffer::DetectTypes() {
301
309
  // Potentially Skip Notes (I also find this dirty, but it is what the original code does)
302
310
  while (true_start < tuples.size()) {
303
311
  if (tuples[true_start].values.size() < max_columns_found && !options.null_padding) {
312
+
304
313
  true_start = tuples[true_start].line_number;
314
+ if (true_start < tuples.size()) {
315
+ true_pos = tuples[true_start].position;
316
+ }
305
317
  values_start++;
306
318
  } else {
307
319
  break;
@@ -317,7 +329,7 @@ void CSVSniffer::DetectTypes() {
317
329
  row_idx = 1;
318
330
  }
319
331
  if (!tuples.empty()) {
320
- best_start_without_header = tuples[0].position;
332
+ best_start_without_header = tuples[0].position - true_pos;
321
333
  }
322
334
 
323
335
  // First line where we start our type detection
@@ -387,7 +399,7 @@ void CSVSniffer::DetectTypes() {
387
399
  best_sql_types_candidates_per_column_idx = info_sql_types_candidates;
388
400
  best_format_candidates = format_candidates;
389
401
  best_header_row = tuples[0].values;
390
- best_start_with_header = tuples[0].position;
402
+ best_start_with_header = tuples[0].position - true_pos;
391
403
  }
392
404
  }
393
405
  // Assert that it's all good at this point.
@@ -59,7 +59,7 @@ static void WriteCopyStatement(FileSystem &fs, stringstream &ss, CopyInfo &info,
59
59
  if (info.format == "csv") {
60
60
  // insert default csv options, if not specified
61
61
  if (info.options.find("header") == info.options.end()) {
62
- info.options["header"].push_back(Value::INTEGER(0));
62
+ info.options["header"].push_back(Value::INTEGER(1));
63
63
  }
64
64
  if (info.options.find("delimiter") == info.options.end() && info.options.find("sep") == info.options.end() &&
65
65
  info.options.find("delim") == info.options.end()) {
@@ -184,19 +184,21 @@ static bool PlanIndexJoin(ClientContext &context, LogicalComparisonJoin &op, uni
184
184
  if (!index) {
185
185
  return false;
186
186
  }
187
- // index joins are not supported if there are pushed down table filters
188
- D_ASSERT(right->type == PhysicalOperatorType::TABLE_SCAN);
189
- auto &tbl_scan = right->Cast<PhysicalTableScan>();
190
- // if (tbl_scan.table_filters && !tbl_scan.table_filters->filters.empty()) {
191
- // return false;
192
- // }
187
+
193
188
  // index joins are disabled if enable_optimizer is false
194
189
  if (!ClientConfig::GetConfig(context).enable_optimizer) {
195
190
  return false;
196
191
  }
192
+
193
+ // index joins are disabled on default
194
+ auto force_index_join = ClientConfig::GetConfig(context).force_index_join;
195
+ if (!ClientConfig::GetConfig(context).enable_index_join && !force_index_join) {
196
+ return false;
197
+ }
198
+
197
199
  // check if the cardinality difference justifies an index join
198
- if (!((ClientConfig::GetConfig(context).force_index_join ||
199
- left->estimated_cardinality < 0.01 * right->estimated_cardinality))) {
200
+ auto index_join_is_applicable = left->estimated_cardinality < 0.01 * right->estimated_cardinality;
201
+ if (!index_join_is_applicable && !force_index_join) {
200
202
  return false;
201
203
  }
202
204
 
@@ -205,6 +207,9 @@ static bool PlanIndexJoin(ClientContext &context, LogicalComparisonJoin &op, uni
205
207
  swap(op.conditions[0].left, op.conditions[0].right);
206
208
  swap(op.left_projection_map, op.right_projection_map);
207
209
  }
210
+ D_ASSERT(right->type == PhysicalOperatorType::TABLE_SCAN);
211
+ auto &tbl_scan = right->Cast<PhysicalTableScan>();
212
+
208
213
  plan = make_uniq<PhysicalIndexJoin>(op, std::move(left), std::move(right), std::move(op.conditions), op.join_type,
209
214
  op.left_projection_map, op.right_projection_map, tbl_scan.column_ids, *index,
210
215
  !swap_condition, op.estimated_cardinality);
@@ -9,10 +9,18 @@ unique_ptr<BoundCastData> StructBoundCastData::BindStructToStructCast(BindCastIn
9
9
  vector<BoundCastInfo> child_cast_info;
10
10
  auto &source_child_types = StructType::GetChildTypes(source);
11
11
  auto &result_child_types = StructType::GetChildTypes(target);
12
+
13
+ auto target_is_unnamed = StructType::IsUnnamed(target);
14
+ auto source_is_unnamed = StructType::IsUnnamed(source);
15
+
12
16
  if (source_child_types.size() != result_child_types.size()) {
13
17
  throw TypeMismatchException(source, target, "Cannot cast STRUCTs of different size");
14
18
  }
15
19
  for (idx_t i = 0; i < source_child_types.size(); i++) {
20
+ if (!target_is_unnamed && !source_is_unnamed &&
21
+ !StringUtil::CIEquals(source_child_types[i].first, result_child_types[i].first)) {
22
+ throw TypeMismatchException(source, target, "Cannot cast STRUCTs with different names");
23
+ }
16
24
  auto child_cast = input.GetCastFunction(source_child_types[i].second, result_child_types[i].second);
17
25
  child_cast_info.push_back(std::move(child_cast));
18
26
  }
@@ -134,6 +134,10 @@ BoundCastInfo DefaultCasts::TimestampMsCastSwitch(BindCastInput &input, const Lo
134
134
  // timestamp (ms) to timestamp (us)
135
135
  return BoundCastInfo(
136
136
  &VectorCastHelpers::TemplatedCastLoop<timestamp_t, timestamp_t, duckdb::CastTimestampMsToUs>);
137
+ case LogicalTypeId::TIMESTAMP_NS:
138
+ // timestamp (ms) to timestamp (ns)
139
+ return BoundCastInfo(
140
+ &VectorCastHelpers::TemplatedCastLoop<timestamp_t, timestamp_t, duckdb::CastTimestampMsToNs>);
137
141
  default:
138
142
  return TryVectorNullCast;
139
143
  }
@@ -146,10 +150,18 @@ BoundCastInfo DefaultCasts::TimestampSecCastSwitch(BindCastInput &input, const L
146
150
  case LogicalTypeId::VARCHAR:
147
151
  // timestamp (sec) to varchar
148
152
  return BoundCastInfo(&VectorCastHelpers::StringCast<timestamp_t, duckdb::CastFromTimestampSec>);
153
+ case LogicalTypeId::TIMESTAMP_MS:
154
+ // timestamp (s) to timestamp (ms)
155
+ return BoundCastInfo(
156
+ &VectorCastHelpers::TemplatedCastLoop<timestamp_t, timestamp_t, duckdb::CastTimestampSecToMs>);
149
157
  case LogicalTypeId::TIMESTAMP:
150
158
  // timestamp (s) to timestamp (us)
151
159
  return BoundCastInfo(
152
160
  &VectorCastHelpers::TemplatedCastLoop<timestamp_t, timestamp_t, duckdb::CastTimestampSecToUs>);
161
+ case LogicalTypeId::TIMESTAMP_NS:
162
+ // timestamp (s) to timestamp (ns)
163
+ return BoundCastInfo(
164
+ &VectorCastHelpers::TemplatedCastLoop<timestamp_t, timestamp_t, duckdb::CastTimestampSecToNs>);
153
165
  default:
154
166
  return TryVectorNullCast;
155
167
  }
@@ -304,6 +304,11 @@ static bool UnionToVarcharCast(Vector &source, Vector &result, idx_t count, Cast
304
304
  // now construct the actual varchar vector
305
305
  varchar_union.Flatten(count);
306
306
  auto &tag_vector = UnionVector::GetTags(source);
307
+ auto tag_vector_type = tag_vector.GetVectorType();
308
+ if (tag_vector_type != VectorType::CONSTANT_VECTOR && tag_vector_type != VectorType::FLAT_VECTOR) {
309
+ tag_vector.Flatten(count);
310
+ }
311
+
307
312
  auto tags = FlatVector::GetData<union_tag_t>(tag_vector);
308
313
 
309
314
  auto &validity = FlatVector::Validity(varchar_union);
@@ -268,8 +268,17 @@ unique_ptr<Expression> FunctionBinder::BindScalarFunction(ScalarFunctionCatalogE
268
268
 
269
269
  if (bound_function.null_handling == FunctionNullHandling::DEFAULT_NULL_HANDLING) {
270
270
  for (auto &child : children) {
271
- if (child->return_type == LogicalTypeId::SQLNULL ||
272
- (child->IsFoldable() && ExpressionExecutor::EvaluateScalar(context, *child).IsNull())) {
271
+ if (child->return_type == LogicalTypeId::SQLNULL) {
272
+ return make_uniq<BoundConstantExpression>(Value(LogicalType::SQLNULL));
273
+ }
274
+ if (!child->IsFoldable()) {
275
+ continue;
276
+ }
277
+ Value result;
278
+ if (!ExpressionExecutor::TryEvaluateScalar(context, *child, result)) {
279
+ continue;
280
+ }
281
+ if (result.IsNull()) {
273
282
  return make_uniq<BoundConstantExpression>(Value(LogicalType::SQLNULL));
274
283
  }
275
284
  }
@@ -80,6 +80,10 @@ static void PragmaEnableForceParallelism(ClientContext &context, const FunctionP
80
80
  ClientConfig::GetConfig(context).verify_parallelism = true;
81
81
  }
82
82
 
83
+ static void PragmaEnableIndexJoin(ClientContext &context, const FunctionParameters &parameters) {
84
+ ClientConfig::GetConfig(context).enable_index_join = true;
85
+ }
86
+
83
87
  static void PragmaEnableForceIndexJoin(ClientContext &context, const FunctionParameters &parameters) {
84
88
  ClientConfig::GetConfig(context).force_index_join = true;
85
89
  }
@@ -140,6 +144,7 @@ void PragmaFunctions::RegisterFunction(BuiltinFunctions &set) {
140
144
  set.AddFunction(PragmaFunction::PragmaStatement("enable_optimizer", PragmaEnableOptimizer));
141
145
  set.AddFunction(PragmaFunction::PragmaStatement("disable_optimizer", PragmaDisableOptimizer));
142
146
 
147
+ set.AddFunction(PragmaFunction::PragmaStatement("enable_index_join", PragmaEnableIndexJoin));
143
148
  set.AddFunction(PragmaFunction::PragmaStatement("force_index_join", PragmaEnableForceIndexJoin));
144
149
  set.AddFunction(PragmaFunction::PragmaStatement("force_checkpoint", PragmaForceCheckpoint));
145
150
 
@@ -124,12 +124,15 @@ string PragmaShow(ClientContext &context, const FunctionParameters &parameters)
124
124
  ON cols.column_name = pragma_table_info.name
125
125
  AND cols.table_name='%table_name%'
126
126
  AND cols.schema_name='%table_schema%'
127
+ AND cols.database_name = '%table_database%'
127
128
  ORDER BY column_index;)";
128
129
  // clang-format on
129
130
 
130
131
  sql = StringUtil::Replace(sql, "%func_param_table%", parameters.values[0].ToString());
131
132
  sql = StringUtil::Replace(sql, "%table_name%", table.name);
132
133
  sql = StringUtil::Replace(sql, "%table_schema%", table.schema.empty() ? DEFAULT_SCHEMA : table.schema);
134
+ sql = StringUtil::Replace(sql, "%table_database%",
135
+ table.catalog.empty() ? DatabaseManager::GetDefaultDatabase(context) : table.catalog);
133
136
  return sql;
134
137
  }
135
138
 
@@ -26,6 +26,8 @@ idx_t StrfTimepecifierSize(StrTimeSpecifier specifier) {
26
26
  case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
27
27
  case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
28
28
  return 2;
29
+ case StrTimeSpecifier::NANOSECOND_PADDED:
30
+ return 9;
29
31
  case StrTimeSpecifier::MICROSECOND_PADDED:
30
32
  return 6;
31
33
  case StrTimeSpecifier::MILLISECOND_PADDED:
@@ -183,9 +185,15 @@ char *StrfTimeFormat::WritePadded3(char *target, uint32_t value) {
183
185
  }
184
186
  }
185
187
 
186
- // write a value in the range of 0..999999 padded to 6 digits
188
+ // write a value in the range of 0..999999... padded to the given number of digits
187
189
  char *StrfTimeFormat::WritePadded(char *target, uint32_t value, size_t padding) {
188
- D_ASSERT(padding % 2 == 0);
190
+ D_ASSERT(padding > 1);
191
+ if (padding % 2) {
192
+ int decimals = value % 1000;
193
+ WritePadded3(target + padding - 3, decimals);
194
+ value /= 1000;
195
+ padding -= 3;
196
+ }
189
197
  for (size_t i = 0; i < padding / 2; i++) {
190
198
  int decimals = value % 100;
191
199
  WritePadded2(target + padding - 2 * (i + 1), decimals);
@@ -309,11 +317,14 @@ char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t
309
317
  case StrTimeSpecifier::SECOND_PADDED:
310
318
  target = WritePadded2(target, data[5]);
311
319
  break;
320
+ case StrTimeSpecifier::NANOSECOND_PADDED:
321
+ target = WritePadded(target, data[6] * Interval::NANOS_PER_MICRO, 9);
322
+ break;
312
323
  case StrTimeSpecifier::MICROSECOND_PADDED:
313
324
  target = WritePadded(target, data[6], 6);
314
325
  break;
315
326
  case StrTimeSpecifier::MILLISECOND_PADDED:
316
- target = WritePadded3(target, data[6] / 1000);
327
+ target = WritePadded3(target, data[6] / Interval::MICROS_PER_MSEC);
317
328
  break;
318
329
  case StrTimeSpecifier::UTC_OFFSET: {
319
330
  *target++ = (data[7] < 0) ? '-' : '+';
@@ -516,6 +527,9 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
516
527
  case 'S':
517
528
  specifier = StrTimeSpecifier::SECOND_PADDED;
518
529
  break;
530
+ case 'n':
531
+ specifier = StrTimeSpecifier::NANOSECOND_PADDED;
532
+ break;
519
533
  case 'f':
520
534
  specifier = StrTimeSpecifier::MICROSECOND_PADDED;
521
535
  break;
@@ -660,6 +674,8 @@ int StrpTimeFormat::NumericSpecifierWidth(StrTimeSpecifier specifier) {
660
674
  return 4;
661
675
  case StrTimeSpecifier::MICROSECOND_PADDED:
662
676
  return 6;
677
+ case StrTimeSpecifier::NANOSECOND_PADDED:
678
+ return 9;
663
679
  default:
664
680
  return -1;
665
681
  }
@@ -855,15 +871,20 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
855
871
  // seconds
856
872
  result_data[5] = number;
857
873
  break;
874
+ case StrTimeSpecifier::NANOSECOND_PADDED:
875
+ D_ASSERT(number < Interval::NANOS_PER_SEC); // enforced by the length of the number
876
+ // microseconds (rounded)
877
+ result_data[6] = (number + Interval::NANOS_PER_MICRO / 2) / Interval::NANOS_PER_MICRO;
878
+ break;
858
879
  case StrTimeSpecifier::MICROSECOND_PADDED:
859
- D_ASSERT(number < 1000000ULL); // enforced by the length of the number
860
- // milliseconds
880
+ D_ASSERT(number < Interval::MICROS_PER_SEC); // enforced by the length of the number
881
+ // microseconds
861
882
  result_data[6] = number;
862
883
  break;
863
884
  case StrTimeSpecifier::MILLISECOND_PADDED:
864
- D_ASSERT(number < 1000ULL); // enforced by the length of the number
865
- // milliseconds
866
- result_data[6] = number * 1000;
885
+ D_ASSERT(number < Interval::MSECS_PER_SEC); // enforced by the length of the number
886
+ // microseconds
887
+ result_data[6] = number * Interval::MICROS_PER_MSEC;
867
888
  break;
868
889
  case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
869
890
  case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
@@ -237,6 +237,10 @@ void ArrowTableFunction::PopulateArrowTableType(ArrowTableType &arrow_table, Arr
237
237
 
238
238
  unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
239
239
  vector<LogicalType> &return_types, vector<string> &names) {
240
+ if (input.inputs[0].IsNull() || input.inputs[1].IsNull() || input.inputs[2].IsNull()) {
241
+ throw BinderException("arrow_scan: pointers cannot be null");
242
+ }
243
+
240
244
  auto stream_factory_ptr = input.inputs[0].GetPointer();
241
245
  auto stream_factory_produce = (stream_factory_produce_t)input.inputs[1].GetPointer(); // NOLINT
242
246
  auto stream_factory_get_schema = (stream_factory_get_schema_t)input.inputs[2].GetPointer(); // NOLINT
@@ -157,6 +157,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, CopyInfo &in
157
157
  }
158
158
 
159
159
  bind_data->FinalizeRead(context);
160
+
160
161
  if (options.auto_detect) {
161
162
  // We must run the sniffer.
162
163
  auto file_handle = BaseCSVReader::OpenCSV(context, options);
@@ -332,7 +333,7 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
332
333
  global_data->WriteData(options.prefix.c_str(), options.prefix.size());
333
334
  }
334
335
 
335
- if (options.dialect_options.header) {
336
+ if (!(options.has_header && !options.dialect_options.header)) {
336
337
  MemoryStream stream;
337
338
  // write the header line to the file
338
339
  for (idx_t i = 0; i < csv_data.options.name_list.size(); i++) {
@@ -178,7 +178,10 @@ public:
178
178
  current_file_path = files_path_p[0];
179
179
  CSVFileHandle *file_handle_ptr;
180
180
 
181
- if (!buffer_manager) {
181
+ if (!buffer_manager || (options.skip_rows_set && options.dialect_options.skip_rows > 0)) {
182
+ // If our buffers are too small, and we skip too many rows there is a chance things will go over-buffer
183
+ // for now don't reuse the buffer manager
184
+ buffer_manager.reset();
182
185
  file_handle = ReadCSV::OpenCSV(current_file_path, options.compression, context);
183
186
  file_handle_ptr = file_handle.get();
184
187
  } else {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev4871"
2
+ #define DUCKDB_VERSION "0.8.2-dev5080"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "5a29c99891"
5
+ #define DUCKDB_SOURCE_ID "e07b94a748"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -182,6 +182,11 @@ struct MultiFileReader {
182
182
  }
183
183
  }
184
184
  for (idx_t r = 0; r < data.union_readers.size(); r++) {
185
+ if (!data.union_readers[r]) {
186
+ data.union_readers.erase(data.union_readers.begin() + r);
187
+ r--;
188
+ continue;
189
+ }
185
190
  // check if the union reader should still be read or not
186
191
  auto entry = file_set.find(data.union_readers[r]->GetFileName());
187
192
  if (entry == file_set.end()) {
@@ -632,6 +632,13 @@ struct CastTimestampMsToUs {
632
632
  }
633
633
  };
634
634
 
635
+ struct CastTimestampMsToNs {
636
+ template <class SRC, class DST>
637
+ static inline DST Operation(SRC input) {
638
+ throw duckdb::NotImplementedException("Cast to TIMESTAMP_NS could not be performed!");
639
+ }
640
+ };
641
+
635
642
  struct CastTimestampNsToUs {
636
643
  template <class SRC, class DST>
637
644
  static inline DST Operation(SRC input) {
@@ -639,6 +646,13 @@ struct CastTimestampNsToUs {
639
646
  }
640
647
  };
641
648
 
649
+ struct CastTimestampSecToMs {
650
+ template <class SRC, class DST>
651
+ static inline DST Operation(SRC input) {
652
+ throw duckdb::NotImplementedException("Cast to TIMESTAMP_MS could not be performed!");
653
+ }
654
+ };
655
+
642
656
  struct CastTimestampSecToUs {
643
657
  template <class SRC, class DST>
644
658
  static inline DST Operation(SRC input) {
@@ -646,6 +660,13 @@ struct CastTimestampSecToUs {
646
660
  }
647
661
  };
648
662
 
663
+ struct CastTimestampSecToNs {
664
+ template <class SRC, class DST>
665
+ static inline DST Operation(SRC input) {
666
+ throw duckdb::NotImplementedException("Cast to TIMESTAMP_NS could not be performed!");
667
+ }
668
+ };
669
+
649
670
  template <>
650
671
  duckdb::timestamp_t CastTimestampUsToMs::Operation(duckdb::timestamp_t input);
651
672
  template <>
@@ -655,9 +676,15 @@ duckdb::timestamp_t CastTimestampUsToSec::Operation(duckdb::timestamp_t input);
655
676
  template <>
656
677
  duckdb::timestamp_t CastTimestampMsToUs::Operation(duckdb::timestamp_t input);
657
678
  template <>
679
+ duckdb::timestamp_t CastTimestampMsToNs::Operation(duckdb::timestamp_t input);
680
+ template <>
658
681
  duckdb::timestamp_t CastTimestampNsToUs::Operation(duckdb::timestamp_t input);
659
682
  template <>
683
+ duckdb::timestamp_t CastTimestampSecToMs::Operation(duckdb::timestamp_t input);
684
+ template <>
660
685
  duckdb::timestamp_t CastTimestampSecToUs::Operation(duckdb::timestamp_t input);
686
+ template <>
687
+ duckdb::timestamp_t CastTimestampSecToNs::Operation(duckdb::timestamp_t input);
661
688
 
662
689
  template <>
663
690
  duckdb::string_t CastFromTimestampNS::Operation(duckdb::timestamp_t input, Vector &result);
@@ -8,11 +8,11 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/common/types/string_type.hpp"
11
+ #include "duckdb/common/helper.hpp"
12
12
  #include "duckdb/common/types.hpp"
13
13
  #include "duckdb/common/types/hugeint.hpp"
14
14
  #include "duckdb/common/types/interval.hpp"
15
- #include "duckdb/common/helper.hpp"
15
+ #include "duckdb/common/types/string_type.hpp"
16
16
 
17
17
  #include <cstring>
18
18
 
@@ -142,6 +142,42 @@ struct DistinctLessThanEquals {
142
142
  }
143
143
  };
144
144
 
145
+ //===--------------------------------------------------------------------===//
146
+ // Comparison Operator Wrappers (so (Not)DistinctFrom have the same API)
147
+ //===--------------------------------------------------------------------===//
148
+ template <class OP>
149
+ struct ComparisonOperationWrapper {
150
+ static constexpr const bool COMPARE_NULL = false;
151
+
152
+ template <class T>
153
+ static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
154
+ if (right_null || left_null) {
155
+ return false;
156
+ }
157
+ return OP::template Operation<T>(left, right);
158
+ }
159
+ };
160
+
161
+ template <>
162
+ struct ComparisonOperationWrapper<DistinctFrom> {
163
+ static constexpr const bool COMPARE_NULL = true;
164
+
165
+ template <class T>
166
+ static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
167
+ return DistinctFrom::template Operation<T>(left, right, left_null, right_null);
168
+ }
169
+ };
170
+
171
+ template <>
172
+ struct ComparisonOperationWrapper<NotDistinctFrom> {
173
+ static constexpr const bool COMPARE_NULL = true;
174
+
175
+ template <class T>
176
+ static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
177
+ return NotDistinctFrom::template Operation<T>(left, right, left_null, right_null);
178
+ }
179
+ };
180
+
145
181
  //===--------------------------------------------------------------------===//
146
182
  // Specialized Boolean Comparison Operators
147
183
  //===--------------------------------------------------------------------===//
@@ -408,6 +408,7 @@ struct StructType {
408
408
  DUCKDB_API static const LogicalType &GetChildType(const LogicalType &type, idx_t index);
409
409
  DUCKDB_API static const string &GetChildName(const LogicalType &type, idx_t index);
410
410
  DUCKDB_API static idx_t GetChildCount(const LogicalType &type);
411
+ DUCKDB_API static bool IsUnnamed(const LogicalType &type);
411
412
  };
412
413
 
413
414
  struct MapType {
@@ -18,7 +18,7 @@ namespace duckdb {
18
18
  struct GetBitFun {
19
19
  static constexpr const char *Name = "get_bit";
20
20
  static constexpr const char *Parameters = "bitstring,index";
21
- static constexpr const char *Description = "Extracts the nth bit from bitstring; the first (leftmost) bit is indexed 0.";
21
+ static constexpr const char *Description = "Extracts the nth bit from bitstring; the first (leftmost) bit is indexed 0";
22
22
  static constexpr const char *Example = "get_bit('0110010'::BIT, 2)";
23
23
 
24
24
  static ScalarFunction GetFunction();
@@ -27,7 +27,7 @@ struct GetBitFun {
27
27
  struct SetBitFun {
28
28
  static constexpr const char *Name = "set_bit";
29
29
  static constexpr const char *Parameters = "bitstring,index,new_value";
30
- static constexpr const char *Description = "Sets the nth bit in bitstring to newvalue; the first (leftmost) bit is indexed 0. Returns a new bitstring.";
30
+ static constexpr const char *Description = "Sets the nth bit in bitstring to newvalue; the first (leftmost) bit is indexed 0. Returns a new bitstring";
31
31
  static constexpr const char *Example = "set_bit('0110010'::BIT, 2, 0)";
32
32
 
33
33
  static ScalarFunction GetFunction();
@@ -36,7 +36,7 @@ struct SetBitFun {
36
36
  struct BitPositionFun {
37
37
  static constexpr const char *Name = "bit_position";
38
38
  static constexpr const char *Parameters = "substring,bitstring";
39
- static constexpr const char *Description = "Returns first starting index of the specified substring within bits, or zero if it’s not present. The first (leftmost) bit is indexed 1";
39
+ static constexpr const char *Description = "Returns first starting index of the specified substring within bits, or zero if it is not present. The first (leftmost) bit is indexed 1";
40
40
  static constexpr const char *Example = "bit_position('010'::BIT, '1110101'::BIT)";
41
41
 
42
42
  static ScalarFunction GetFunction();
@@ -45,7 +45,7 @@ struct BitPositionFun {
45
45
  struct BitStringFun {
46
46
  static constexpr const char *Name = "bitstring";
47
47
  static constexpr const char *Parameters = "bitstring,length";
48
- static constexpr const char *Description = "Pads the bitstring until the specified length.";
48
+ static constexpr const char *Description = "Pads the bitstring until the specified length";
49
49
  static constexpr const char *Example = "bitstring('1010'::BIT, 7)";
50
50
 
51
51
  static ScalarFunction GetFunction();
@@ -18,7 +18,7 @@ namespace duckdb {
18
18
  struct DecodeFun {
19
19
  static constexpr const char *Name = "decode";
20
20
  static constexpr const char *Parameters = "blob";
21
- static constexpr const char *Description = "Convert blob to varchar. Fails if blob is not valid utf-8.";
21
+ static constexpr const char *Description = "Convert blob to varchar. Fails if blob is not valid utf-8";
22
22
  static constexpr const char *Example = "decode('\\xC3\\xBC'::BLOB)";
23
23
 
24
24
  static ScalarFunction GetFunction();
@@ -27,7 +27,7 @@ struct DecodeFun {
27
27
  struct EncodeFun {
28
28
  static constexpr const char *Name = "encode";
29
29
  static constexpr const char *Parameters = "string";
30
- static constexpr const char *Description = "Convert varchar to blob. Converts utf-8 characters into literal encoding.";
30
+ static constexpr const char *Description = "Convert varchar to blob. Converts utf-8 characters into literal encoding";
31
31
  static constexpr const char *Example = "encode('my_string_with_ü')";
32
32
 
33
33
  static ScalarFunction GetFunction();
@@ -36,7 +36,7 @@ struct EncodeFun {
36
36
  struct FromBase64Fun {
37
37
  static constexpr const char *Name = "from_base64";
38
38
  static constexpr const char *Parameters = "string";
39
- static constexpr const char *Description = "Convert a base64 encoded string to a character string.";
39
+ static constexpr const char *Description = "Convert a base64 encoded string to a character string";
40
40
  static constexpr const char *Example = "from_base64('QQ==')";
41
41
 
42
42
  static ScalarFunction GetFunction();
@@ -45,7 +45,7 @@ struct FromBase64Fun {
45
45
  struct ToBase64Fun {
46
46
  static constexpr const char *Name = "to_base64";
47
47
  static constexpr const char *Parameters = "blob";
48
- static constexpr const char *Description = "Convert a blob to a base64 encoded string.";
48
+ static constexpr const char *Description = "Convert a blob to a base64 encoded string";
49
49
  static constexpr const char *Example = "base64('A'::blob)";
50
50
 
51
51
  static ScalarFunction GetFunction();