duckdb 0.8.2-dev4711.0 → 0.8.2-dev4871.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +0 -1
- package/binding.gyp.in +0 -1
- package/package.json +1 -1
- package/src/connection.cpp +10 -23
- package/src/data_chunk.cpp +1 -3
- package/src/database.cpp +4 -9
- package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
- package/src/duckdb/extension/json/json_functions.cpp +4 -6
- package/src/duckdb/src/common/enum_util.cpp +10 -5
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +408 -0
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +28 -17
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
- package/src/duckdb/src/core_functions/function_list.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
- package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
- package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
- package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
- package/src/duckdb/src/function/function_binder.cpp +10 -9
- package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
- package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
- package/src/duckdb/src/main/config.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +10 -0
- package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
- package/src/duckdb/src/storage/data_table.cpp +10 -0
- package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
- package/src/statement.cpp +2 -4
- package/test/database_fail.test.ts +6 -0
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -66,7 +66,7 @@ inline string_t TupleDataWithinListValueLoad(const data_ptr_t &location, data_pt
|
|
66
66
|
static void ResetCombinedListData(vector<TupleDataVectorFormat> &vector_data) {
|
67
67
|
for (auto &vd : vector_data) {
|
68
68
|
vd.combined_list_data = nullptr;
|
69
|
-
ResetCombinedListData(vd.
|
69
|
+
ResetCombinedListData(vd.children);
|
70
70
|
}
|
71
71
|
}
|
72
72
|
#endif
|
@@ -102,7 +102,7 @@ void TupleDataCollection::ComputeHeapSizes(Vector &heap_sizes_v, const Vector &s
|
|
102
102
|
|
103
103
|
auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
|
104
104
|
|
105
|
-
const auto &source_vector_data = source_format.
|
105
|
+
const auto &source_vector_data = source_format.unified;
|
106
106
|
const auto &source_sel = *source_vector_data.sel;
|
107
107
|
const auto &source_validity = source_vector_data.validity;
|
108
108
|
|
@@ -125,7 +125,7 @@ void TupleDataCollection::ComputeHeapSizes(Vector &heap_sizes_v, const Vector &s
|
|
125
125
|
auto &struct_sources = StructVector::GetEntries(source_v);
|
126
126
|
for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
|
127
127
|
const auto &struct_source = struct_sources[struct_col_idx];
|
128
|
-
auto &struct_format = source_format.
|
128
|
+
auto &struct_format = source_format.children[struct_col_idx];
|
129
129
|
TupleDataCollection::ComputeHeapSizes(heap_sizes_v, *struct_source, struct_format, append_sel,
|
130
130
|
append_count);
|
131
131
|
}
|
@@ -141,9 +141,9 @@ void TupleDataCollection::ComputeHeapSizes(Vector &heap_sizes_v, const Vector &s
|
|
141
141
|
}
|
142
142
|
|
143
143
|
// Recurse
|
144
|
-
D_ASSERT(source_format.
|
144
|
+
D_ASSERT(source_format.children.size() == 1);
|
145
145
|
auto &child_source_v = ListVector::GetEntry(source_v);
|
146
|
-
auto &child_format = source_format.
|
146
|
+
auto &child_format = source_format.children[0];
|
147
147
|
TupleDataCollection::WithinListHeapComputeSizes(heap_sizes_v, child_source_v, child_format, append_sel,
|
148
148
|
append_count, source_vector_data);
|
149
149
|
break;
|
@@ -217,8 +217,8 @@ void TupleDataCollection::StringWithinListComputeHeapSizes(Vector &heap_sizes_v,
|
|
217
217
|
const SelectionVector &append_sel, const idx_t append_count,
|
218
218
|
const UnifiedVectorFormat &list_data) {
|
219
219
|
// Source
|
220
|
-
const auto &source_data = source_format.
|
221
|
-
const auto source_sel = *source_data.sel;
|
220
|
+
const auto &source_data = source_format.unified;
|
221
|
+
const auto &source_sel = *source_data.sel;
|
222
222
|
const auto data = UnifiedVectorFormat::GetData<string_t>(source_data);
|
223
223
|
const auto &source_validity = source_data.validity;
|
224
224
|
|
@@ -285,7 +285,7 @@ void TupleDataCollection::StructWithinListComputeHeapSizes(Vector &heap_sizes_v,
|
|
285
285
|
auto &struct_sources = StructVector::GetEntries(source_v);
|
286
286
|
for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
|
287
287
|
auto &struct_source = *struct_sources[struct_col_idx];
|
288
|
-
auto &struct_format = source_format.
|
288
|
+
auto &struct_format = source_format.children[struct_col_idx];
|
289
289
|
TupleDataCollection::WithinListHeapComputeSizes(heap_sizes_v, struct_source, struct_format, append_sel,
|
290
290
|
append_count, list_data);
|
291
291
|
}
|
@@ -297,22 +297,22 @@ static void ApplySliceRecursive(const Vector &source_v, TupleDataVectorFormat &s
|
|
297
297
|
auto &combined_list_data = *source_format.combined_list_data;
|
298
298
|
|
299
299
|
combined_list_data.selection_data = source_format.original_sel->Slice(combined_sel, count);
|
300
|
-
source_format.
|
301
|
-
source_format.
|
300
|
+
source_format.unified.owned_sel.Initialize(combined_list_data.selection_data);
|
301
|
+
source_format.unified.sel = &source_format.unified.owned_sel;
|
302
302
|
|
303
303
|
if (source_v.GetType().InternalType() == PhysicalType::STRUCT) {
|
304
304
|
// We have to apply it to the child vectors too
|
305
305
|
auto &struct_sources = StructVector::GetEntries(source_v);
|
306
306
|
for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
|
307
307
|
auto &struct_source = *struct_sources[struct_col_idx];
|
308
|
-
auto &struct_format = source_format.
|
308
|
+
auto &struct_format = source_format.children[struct_col_idx];
|
309
309
|
#ifdef DEBUG
|
310
310
|
D_ASSERT(!struct_format.combined_list_data);
|
311
311
|
#endif
|
312
312
|
if (!struct_format.combined_list_data) {
|
313
313
|
struct_format.combined_list_data = make_uniq<CombinedListData>();
|
314
314
|
}
|
315
|
-
ApplySliceRecursive(struct_source, struct_format, *source_format.
|
315
|
+
ApplySliceRecursive(struct_source, struct_format, *source_format.unified.sel, count);
|
316
316
|
}
|
317
317
|
}
|
318
318
|
}
|
@@ -327,7 +327,7 @@ void TupleDataCollection::ListWithinListComputeHeapSizes(Vector &heap_sizes_v, c
|
|
327
327
|
const auto &list_validity = list_data.validity;
|
328
328
|
|
329
329
|
// Child list ("source_v")
|
330
|
-
const auto &child_list_data = source_format.
|
330
|
+
const auto &child_list_data = source_format.unified;
|
331
331
|
const auto child_list_sel = *child_list_data.sel;
|
332
332
|
const auto child_list_entries = UnifiedVectorFormat::GetData<list_entry_t>(child_list_data);
|
333
333
|
const auto &child_list_validity = child_list_data.validity;
|
@@ -362,7 +362,7 @@ void TupleDataCollection::ListWithinListComputeHeapSizes(Vector &heap_sizes_v, c
|
|
362
362
|
auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
|
363
363
|
|
364
364
|
// Construct combined list entries and a selection vector for the child list child
|
365
|
-
auto &child_format = source_format.
|
365
|
+
auto &child_format = source_format.children[0];
|
366
366
|
#ifdef DEBUG
|
367
367
|
// In debug mode this should be deleted by ResetCombinedListData
|
368
368
|
D_ASSERT(!child_format.combined_list_data);
|
@@ -425,7 +425,7 @@ void TupleDataCollection::ListWithinListComputeHeapSizes(Vector &heap_sizes_v, c
|
|
425
425
|
combined_child_list_data.validity = list_data.validity;
|
426
426
|
|
427
427
|
// Combine the selection vectors
|
428
|
-
D_ASSERT(source_format.
|
428
|
+
D_ASSERT(source_format.children.size() == 1);
|
429
429
|
auto &child_source = ListVector::GetEntry(source_v);
|
430
430
|
ApplySliceRecursive(child_source, child_format, combined_sel, child_list_child_count);
|
431
431
|
|
@@ -464,18 +464,18 @@ void TupleDataCollection::Scatter(TupleDataChunkState &chunk_state, const Vector
|
|
464
464
|
const auto &scatter_function = scatter_functions[column_id];
|
465
465
|
scatter_function.function(source, chunk_state.vector_data[column_id], append_sel, append_count, layout,
|
466
466
|
chunk_state.row_locations, chunk_state.heap_locations, column_id,
|
467
|
-
chunk_state.vector_data[column_id].
|
467
|
+
chunk_state.vector_data[column_id].unified, scatter_function.child_functions);
|
468
468
|
}
|
469
469
|
|
470
470
|
template <class T>
|
471
471
|
static void TupleDataTemplatedScatter(const Vector &source, const TupleDataVectorFormat &source_format,
|
472
472
|
const SelectionVector &append_sel, const idx_t append_count,
|
473
|
-
const TupleDataLayout &layout, Vector &row_locations,
|
474
|
-
const idx_t col_idx, const UnifiedVectorFormat &dummy_arg,
|
473
|
+
const TupleDataLayout &layout, const Vector &row_locations,
|
474
|
+
Vector &heap_locations, const idx_t col_idx, const UnifiedVectorFormat &dummy_arg,
|
475
475
|
const vector<TupleDataScatterFunction> &child_functions) {
|
476
476
|
// Source
|
477
|
-
const auto &source_data = source_format.
|
478
|
-
const auto source_sel = *source_data.sel;
|
477
|
+
const auto &source_data = source_format.unified;
|
478
|
+
const auto &source_sel = *source_data.sel;
|
479
479
|
const auto data = UnifiedVectorFormat::GetData<T>(source_data);
|
480
480
|
const auto &validity = source_data.validity;
|
481
481
|
|
@@ -509,12 +509,12 @@ static void TupleDataTemplatedScatter(const Vector &source, const TupleDataVecto
|
|
509
509
|
|
510
510
|
static void TupleDataStructScatter(const Vector &source, const TupleDataVectorFormat &source_format,
|
511
511
|
const SelectionVector &append_sel, const idx_t append_count,
|
512
|
-
const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations,
|
512
|
+
const TupleDataLayout &layout, const Vector &row_locations, Vector &heap_locations,
|
513
513
|
const idx_t col_idx, const UnifiedVectorFormat &dummy_arg,
|
514
514
|
const vector<TupleDataScatterFunction> &child_functions) {
|
515
515
|
// Source
|
516
|
-
const auto &source_data = source_format.
|
517
|
-
const auto source_sel = *source_data.sel;
|
516
|
+
const auto &source_data = source_format.unified;
|
517
|
+
const auto &source_sel = *source_data.sel;
|
518
518
|
const auto &validity = source_data.validity;
|
519
519
|
|
520
520
|
// Target
|
@@ -556,7 +556,7 @@ static void TupleDataStructScatter(const Vector &source, const TupleDataVectorFo
|
|
556
556
|
// Recurse through the struct children
|
557
557
|
for (idx_t struct_col_idx = 0; struct_col_idx < struct_layout.ColumnCount(); struct_col_idx++) {
|
558
558
|
auto &struct_source = *struct_sources[struct_col_idx];
|
559
|
-
const auto &struct_source_format = source_format.
|
559
|
+
const auto &struct_source_format = source_format.children[struct_col_idx];
|
560
560
|
const auto &struct_scatter_function = child_functions[struct_col_idx];
|
561
561
|
struct_scatter_function.function(struct_source, struct_source_format, append_sel, append_count, struct_layout,
|
562
562
|
struct_row_locations, heap_locations, struct_col_idx, dummy_arg,
|
@@ -566,12 +566,12 @@ static void TupleDataStructScatter(const Vector &source, const TupleDataVectorFo
|
|
566
566
|
|
567
567
|
static void TupleDataListScatter(const Vector &source, const TupleDataVectorFormat &source_format,
|
568
568
|
const SelectionVector &append_sel, const idx_t append_count,
|
569
|
-
const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations,
|
569
|
+
const TupleDataLayout &layout, const Vector &row_locations, Vector &heap_locations,
|
570
570
|
const idx_t col_idx, const UnifiedVectorFormat &dummy_arg,
|
571
571
|
const vector<TupleDataScatterFunction> &child_functions) {
|
572
572
|
// Source
|
573
|
-
const auto &source_data = source_format.
|
574
|
-
const auto source_sel = *source_data.sel;
|
573
|
+
const auto &source_data = source_format.unified;
|
574
|
+
const auto &source_sel = *source_data.sel;
|
575
575
|
const auto data = UnifiedVectorFormat::GetData<list_entry_t>(source_data);
|
576
576
|
const auto &validity = source_data.validity;
|
577
577
|
|
@@ -603,22 +603,22 @@ static void TupleDataListScatter(const Vector &source, const TupleDataVectorForm
|
|
603
603
|
// Recurse
|
604
604
|
D_ASSERT(child_functions.size() == 1);
|
605
605
|
auto &child_source = ListVector::GetEntry(source);
|
606
|
-
auto &child_format = source_format.
|
606
|
+
auto &child_format = source_format.children[0];
|
607
607
|
const auto &child_function = child_functions[0];
|
608
608
|
child_function.function(child_source, child_format, append_sel, append_count, layout, row_locations, heap_locations,
|
609
|
-
col_idx, source_format.
|
609
|
+
col_idx, source_format.unified, child_function.child_functions);
|
610
610
|
}
|
611
611
|
|
612
612
|
template <class T>
|
613
613
|
static void TupleDataTemplatedWithinListScatter(const Vector &source, const TupleDataVectorFormat &source_format,
|
614
614
|
const SelectionVector &append_sel, const idx_t append_count,
|
615
|
-
const TupleDataLayout &layout, Vector &row_locations,
|
615
|
+
const TupleDataLayout &layout, const Vector &row_locations,
|
616
616
|
Vector &heap_locations, const idx_t col_idx,
|
617
617
|
const UnifiedVectorFormat &list_data,
|
618
618
|
const vector<TupleDataScatterFunction> &child_functions) {
|
619
619
|
// Source
|
620
|
-
const auto &source_data = source_format.
|
621
|
-
const auto source_sel = *source_data.sel;
|
620
|
+
const auto &source_data = source_format.unified;
|
621
|
+
const auto &source_sel = *source_data.sel;
|
622
622
|
const auto data = UnifiedVectorFormat::GetData<T>(source_data);
|
623
623
|
const auto &source_validity = source_data.validity;
|
624
624
|
|
@@ -667,13 +667,13 @@ static void TupleDataTemplatedWithinListScatter(const Vector &source, const Tupl
|
|
667
667
|
|
668
668
|
static void TupleDataStructWithinListScatter(const Vector &source, const TupleDataVectorFormat &source_format,
|
669
669
|
const SelectionVector &append_sel, const idx_t append_count,
|
670
|
-
const TupleDataLayout &layout, Vector &row_locations,
|
670
|
+
const TupleDataLayout &layout, const Vector &row_locations,
|
671
671
|
Vector &heap_locations, const idx_t col_idx,
|
672
672
|
const UnifiedVectorFormat &list_data,
|
673
673
|
const vector<TupleDataScatterFunction> &child_functions) {
|
674
674
|
// Source
|
675
|
-
const auto &source_data = source_format.
|
676
|
-
const auto source_sel = *source_data.sel;
|
675
|
+
const auto &source_data = source_format.unified;
|
676
|
+
const auto &source_sel = *source_data.sel;
|
677
677
|
const auto &source_validity = source_data.validity;
|
678
678
|
|
679
679
|
// List data
|
@@ -715,7 +715,7 @@ static void TupleDataStructWithinListScatter(const Vector &source, const TupleDa
|
|
715
715
|
auto &struct_sources = StructVector::GetEntries(source);
|
716
716
|
for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
|
717
717
|
auto &struct_source = *struct_sources[struct_col_idx];
|
718
|
-
auto &struct_format = source_format.
|
718
|
+
auto &struct_format = source_format.children[struct_col_idx];
|
719
719
|
const auto &struct_scatter_function = child_functions[struct_col_idx];
|
720
720
|
struct_scatter_function.function(struct_source, struct_format, append_sel, append_count, layout, row_locations,
|
721
721
|
heap_locations, struct_col_idx, list_data,
|
@@ -725,8 +725,9 @@ static void TupleDataStructWithinListScatter(const Vector &source, const TupleDa
|
|
725
725
|
|
726
726
|
static void TupleDataListWithinListScatter(const Vector &child_list, const TupleDataVectorFormat &child_list_format,
|
727
727
|
const SelectionVector &append_sel, const idx_t append_count,
|
728
|
-
const TupleDataLayout &layout, Vector &row_locations,
|
729
|
-
|
728
|
+
const TupleDataLayout &layout, const Vector &row_locations,
|
729
|
+
Vector &heap_locations, const idx_t col_idx,
|
730
|
+
const UnifiedVectorFormat &list_data,
|
730
731
|
const vector<TupleDataScatterFunction> &child_functions) {
|
731
732
|
// List data (of the list Vector that "child_list" is in)
|
732
733
|
const auto list_sel = *list_data.sel;
|
@@ -734,7 +735,7 @@ static void TupleDataListWithinListScatter(const Vector &child_list, const Tuple
|
|
734
735
|
const auto &list_validity = list_data.validity;
|
735
736
|
|
736
737
|
// Child list
|
737
|
-
const auto &child_list_data = child_list_format.
|
738
|
+
const auto &child_list_data = child_list_format.unified;
|
738
739
|
const auto child_list_sel = *child_list_data.sel;
|
739
740
|
const auto child_list_entries = UnifiedVectorFormat::GetData<list_entry_t>(child_list_data);
|
740
741
|
const auto &child_list_validity = child_list_data.validity;
|
@@ -777,7 +778,7 @@ static void TupleDataListWithinListScatter(const Vector &child_list, const Tuple
|
|
777
778
|
// Recurse
|
778
779
|
D_ASSERT(child_functions.size() == 1);
|
779
780
|
auto &child_vec = ListVector::GetEntry(child_list);
|
780
|
-
auto &child_format = child_list_format.
|
781
|
+
auto &child_format = child_list_format.children[0];
|
781
782
|
auto &combined_child_list_data = child_format.combined_list_data->combined_data;
|
782
783
|
const auto &child_function = child_functions[0];
|
783
784
|
child_function.function(child_vec, child_format, append_sel, append_count, layout, row_locations, heap_locations,
|
@@ -899,7 +900,7 @@ static void TupleDataTemplatedGather(const TupleDataLayout &layout, Vector &row_
|
|
899
900
|
const auto &source_row = source_locations[scan_sel.get_index(i)];
|
900
901
|
const auto target_idx = target_sel.get_index(i);
|
901
902
|
ValidityBytes row_mask(source_row);
|
902
|
-
if (row_mask.RowIsValid(row_mask.
|
903
|
+
if (row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
|
903
904
|
target_data[target_idx] = Load<T>(source_row + offset_in_row);
|
904
905
|
} else {
|
905
906
|
target_validity.SetInvalid(target_idx);
|
@@ -932,7 +933,7 @@ static void TupleDataStructGather(const TupleDataLayout &layout, Vector &row_loc
|
|
932
933
|
|
933
934
|
// Set the validity
|
934
935
|
ValidityBytes row_mask(source_row);
|
935
|
-
if (!row_mask.RowIsValid(row_mask.
|
936
|
+
if (!row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
|
936
937
|
const auto target_idx = target_sel.get_index(i);
|
937
938
|
target_validity.SetInvalid(target_idx);
|
938
939
|
}
|
@@ -985,7 +986,7 @@ static void TupleDataListGather(const TupleDataLayout &layout, Vector &row_locat
|
|
985
986
|
|
986
987
|
const auto &source_row = source_locations[source_idx];
|
987
988
|
ValidityBytes row_mask(source_row);
|
988
|
-
if (row_mask.RowIsValid(row_mask.
|
989
|
+
if (row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
|
989
990
|
auto &source_heap_location = source_heap_locations[source_idx];
|
990
991
|
source_heap_location = Load<data_ptr_t>(source_row + offset_in_row);
|
991
992
|
|
@@ -103,6 +103,7 @@ static inline void ListLoopHash(Vector &input, Vector &hashes, const SelectionVe
|
|
103
103
|
Vector child_hashes(LogicalType::HASH, child_count);
|
104
104
|
if (child_count > 0) {
|
105
105
|
VectorOperations::Hash(child, child_hashes, child_count);
|
106
|
+
child_hashes.Flatten(child_count);
|
106
107
|
}
|
107
108
|
auto chdata = FlatVector::GetData<hash_t>(child_hashes);
|
108
109
|
|
@@ -274,7 +274,7 @@ static StaticFunctionDefinition internal_functions[] = {
|
|
274
274
|
DUCKDB_AGGREGATE_FUNCTION(RegrSXXFun),
|
275
275
|
DUCKDB_AGGREGATE_FUNCTION(RegrSXYFun),
|
276
276
|
DUCKDB_AGGREGATE_FUNCTION(RegrSYYFun),
|
277
|
-
|
277
|
+
DUCKDB_SCALAR_FUNCTION_SET(RepeatFun),
|
278
278
|
DUCKDB_SCALAR_FUNCTION(ReplaceFun),
|
279
279
|
DUCKDB_AGGREGATE_FUNCTION_SET(ReservoirQuantileFun),
|
280
280
|
DUCKDB_SCALAR_FUNCTION(ReverseFun),
|
@@ -35,6 +35,7 @@ DatePartSpecifier GetDateTypePartSpecifier(const string &specifier, LogicalType
|
|
35
35
|
case DatePartSpecifier::DOY:
|
36
36
|
case DatePartSpecifier::YEARWEEK:
|
37
37
|
case DatePartSpecifier::ERA:
|
38
|
+
case DatePartSpecifier::EPOCH:
|
38
39
|
case DatePartSpecifier::JULIAN_DAY:
|
39
40
|
return part;
|
40
41
|
default:
|
@@ -465,7 +466,7 @@ struct DatePart {
|
|
465
466
|
|
466
467
|
template <class T>
|
467
468
|
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
|
468
|
-
return PropagateDatePartStatistics<T, EpochOperator>(input.child_stats);
|
469
|
+
return PropagateDatePartStatistics<T, EpochOperator, double>(input.child_stats, LogicalType::DOUBLE);
|
469
470
|
}
|
470
471
|
};
|
471
472
|
|
@@ -574,6 +575,8 @@ struct DatePart {
|
|
574
575
|
case DatePartSpecifier::TIMEZONE_MINUTE:
|
575
576
|
mask |= ZONE;
|
576
577
|
break;
|
578
|
+
case DatePartSpecifier::INVALID:
|
579
|
+
throw InternalException("Invalid DatePartSpecifier for STRUCT mask!");
|
577
580
|
}
|
578
581
|
}
|
579
582
|
return mask;
|
@@ -581,12 +584,11 @@ struct DatePart {
|
|
581
584
|
|
582
585
|
template <typename P>
|
583
586
|
static inline P HasPartValue(vector<P> part_values, DatePartSpecifier part) {
|
584
|
-
static const auto BEGIN_DOUBLE = size_t(DatePartSpecifier::JULIAN_DAY);
|
585
587
|
auto idx = size_t(part);
|
586
588
|
if (IsBigintDatepart(part)) {
|
587
|
-
return part_values[idx];
|
589
|
+
return part_values[idx - size_t(DatePartSpecifier::BEGIN_BIGINT)];
|
588
590
|
} else {
|
589
|
-
return part_values[idx - BEGIN_DOUBLE];
|
591
|
+
return part_values[idx - size_t(DatePartSpecifier::BEGIN_DOUBLE)];
|
590
592
|
}
|
591
593
|
}
|
592
594
|
|
@@ -670,9 +672,9 @@ struct DatePart {
|
|
670
672
|
}
|
671
673
|
|
672
674
|
if (mask & EPOCH) {
|
673
|
-
|
674
|
-
if (
|
675
|
-
|
675
|
+
auto double_data = HasPartValue(double_values, DatePartSpecifier::EPOCH);
|
676
|
+
if (double_data) {
|
677
|
+
double_data[idx] = Date::Epoch(input);
|
676
678
|
}
|
677
679
|
}
|
678
680
|
if (mask & DOY) {
|
@@ -1016,12 +1018,12 @@ int64_t DatePart::HoursOperator::Operation(dtime_t input) {
|
|
1016
1018
|
}
|
1017
1019
|
|
1018
1020
|
template <>
|
1019
|
-
|
1020
|
-
return Timestamp::
|
1021
|
+
double DatePart::EpochOperator::Operation(timestamp_t input) {
|
1022
|
+
return Timestamp::GetEpochMicroSeconds(input) / double(Interval::MICROS_PER_SEC);
|
1021
1023
|
}
|
1022
1024
|
|
1023
1025
|
template <>
|
1024
|
-
|
1026
|
+
double DatePart::EpochOperator::Operation(interval_t input) {
|
1025
1027
|
int64_t interval_years = input.months / Interval::MONTHS_PER_YEAR;
|
1026
1028
|
int64_t interval_days;
|
1027
1029
|
interval_days = Interval::DAYS_PER_YEAR * interval_years;
|
@@ -1031,20 +1033,29 @@ int64_t DatePart::EpochOperator::Operation(interval_t input) {
|
|
1031
1033
|
interval_epoch = interval_days * Interval::SECS_PER_DAY;
|
1032
1034
|
// we add 0.25 days per year to sort of account for leap days
|
1033
1035
|
interval_epoch += interval_years * (Interval::SECS_PER_DAY / 4);
|
1034
|
-
interval_epoch
|
1035
|
-
|
1036
|
+
return interval_epoch + input.micros / double(Interval::MICROS_PER_SEC);
|
1037
|
+
}
|
1038
|
+
|
1039
|
+
// TODO: We can't propagate interval statistics because we can't easily compare interval_t for order.
|
1040
|
+
template <>
|
1041
|
+
unique_ptr<BaseStatistics> DatePart::EpochOperator::PropagateStatistics<interval_t>(ClientContext &context,
|
1042
|
+
FunctionStatisticsInput &input) {
|
1043
|
+
return nullptr;
|
1036
1044
|
}
|
1037
1045
|
|
1038
1046
|
template <>
|
1039
|
-
|
1040
|
-
return input.micros / Interval::MICROS_PER_SEC;
|
1047
|
+
double DatePart::EpochOperator::Operation(dtime_t input) {
|
1048
|
+
return input.micros / double(Interval::MICROS_PER_SEC);
|
1041
1049
|
}
|
1042
1050
|
|
1043
1051
|
template <>
|
1044
1052
|
unique_ptr<BaseStatistics> DatePart::EpochOperator::PropagateStatistics<dtime_t>(ClientContext &context,
|
1045
1053
|
FunctionStatisticsInput &input) {
|
1046
|
-
|
1047
|
-
|
1054
|
+
auto result = NumericStats::CreateEmpty(LogicalType::DOUBLE);
|
1055
|
+
result.CopyValidity(input.child_stats[0]);
|
1056
|
+
NumericStats::SetMin(result, Value::DOUBLE(0));
|
1057
|
+
NumericStats::SetMax(result, Value::DOUBLE(Interval::SECS_PER_DAY));
|
1058
|
+
return result.ToUnique();
|
1048
1059
|
}
|
1049
1060
|
|
1050
1061
|
template <>
|
@@ -1121,9 +1132,9 @@ void DatePart::StructOperator::Operation(bigint_vec &bigint_values, double_vec &
|
|
1121
1132
|
}
|
1122
1133
|
|
1123
1134
|
if (mask & EPOCH) {
|
1124
|
-
part_data = HasPartValue(
|
1135
|
+
auto part_data = HasPartValue(double_values, DatePartSpecifier::EPOCH);
|
1125
1136
|
if (part_data) {
|
1126
|
-
part_data[idx] = EpochOperator::Operation<dtime_t,
|
1137
|
+
part_data[idx] = EpochOperator::Operation<dtime_t, double>(input);
|
1127
1138
|
;
|
1128
1139
|
}
|
1129
1140
|
}
|
@@ -1157,9 +1168,9 @@ void DatePart::StructOperator::Operation(bigint_vec &bigint_values, double_vec &
|
|
1157
1168
|
Operation(bigint_values, double_values, t, idx, mask & ~EPOCH);
|
1158
1169
|
|
1159
1170
|
if (mask & EPOCH) {
|
1160
|
-
auto part_data = HasPartValue(
|
1171
|
+
auto part_data = HasPartValue(double_values, DatePartSpecifier::EPOCH);
|
1161
1172
|
if (part_data) {
|
1162
|
-
part_data[idx] = EpochOperator::Operation<timestamp_t,
|
1173
|
+
part_data[idx] = EpochOperator::Operation<timestamp_t, double>(input);
|
1163
1174
|
}
|
1164
1175
|
}
|
1165
1176
|
|
@@ -1232,9 +1243,9 @@ void DatePart::StructOperator::Operation(bigint_vec &bigint_values, double_vec &
|
|
1232
1243
|
}
|
1233
1244
|
|
1234
1245
|
if (mask & EPOCH) {
|
1235
|
-
part_data = HasPartValue(
|
1246
|
+
auto part_data = HasPartValue(double_values, DatePartSpecifier::EPOCH);
|
1236
1247
|
if (part_data) {
|
1237
|
-
part_data[idx] = EpochOperator::Operation<interval_t,
|
1248
|
+
part_data[idx] = EpochOperator::Operation<interval_t, double>(input);
|
1238
1249
|
}
|
1239
1250
|
}
|
1240
1251
|
}
|
@@ -1268,8 +1279,6 @@ static int64_t ExtractElement(DatePartSpecifier type, T element) {
|
|
1268
1279
|
return DatePart::ISOYearOperator::template Operation<T, int64_t>(element);
|
1269
1280
|
case DatePartSpecifier::YEARWEEK:
|
1270
1281
|
return DatePart::YearWeekOperator::template Operation<T, int64_t>(element);
|
1271
|
-
case DatePartSpecifier::EPOCH:
|
1272
|
-
return DatePart::EpochOperator::template Operation<T, int64_t>(element);
|
1273
1282
|
case DatePartSpecifier::MICROSECONDS:
|
1274
1283
|
return DatePart::MicrosecondsOperator::template Operation<T, int64_t>(element);
|
1275
1284
|
case DatePartSpecifier::MILLISECONDS:
|
@@ -1317,9 +1326,6 @@ static unique_ptr<FunctionData> DatePartBind(ClientContext &context, ScalarFunct
|
|
1317
1326
|
}
|
1318
1327
|
|
1319
1328
|
Value part_value = ExpressionExecutor::EvaluateScalar(context, *arguments[0]);
|
1320
|
-
if (part_value.IsNull()) {
|
1321
|
-
return nullptr;
|
1322
|
-
}
|
1323
1329
|
const auto part_name = part_value.ToString();
|
1324
1330
|
switch (GetDatePartSpecifier(part_name)) {
|
1325
1331
|
case DatePartSpecifier::JULIAN_DAY:
|
@@ -1340,6 +1346,32 @@ static unique_ptr<FunctionData> DatePartBind(ClientContext &context, ScalarFunct
|
|
1340
1346
|
throw BinderException("%s can only take DATE or TIMESTAMP arguments", bound_function.name);
|
1341
1347
|
}
|
1342
1348
|
break;
|
1349
|
+
case DatePartSpecifier::EPOCH:
|
1350
|
+
arguments.erase(arguments.begin());
|
1351
|
+
bound_function.arguments.erase(bound_function.arguments.begin());
|
1352
|
+
bound_function.name = "epoch";
|
1353
|
+
bound_function.return_type = LogicalType::DOUBLE;
|
1354
|
+
switch (arguments[0]->return_type.id()) {
|
1355
|
+
case LogicalType::TIMESTAMP:
|
1356
|
+
bound_function.function = DatePart::UnaryFunction<timestamp_t, double, DatePart::EpochOperator>;
|
1357
|
+
bound_function.statistics = DatePart::EpochOperator::template PropagateStatistics<timestamp_t>;
|
1358
|
+
break;
|
1359
|
+
case LogicalType::DATE:
|
1360
|
+
bound_function.function = DatePart::UnaryFunction<date_t, double, DatePart::EpochOperator>;
|
1361
|
+
bound_function.statistics = DatePart::EpochOperator::template PropagateStatistics<date_t>;
|
1362
|
+
break;
|
1363
|
+
case LogicalType::INTERVAL:
|
1364
|
+
bound_function.function = DatePart::UnaryFunction<interval_t, double, DatePart::EpochOperator>;
|
1365
|
+
bound_function.statistics = DatePart::EpochOperator::template PropagateStatistics<interval_t>;
|
1366
|
+
break;
|
1367
|
+
case LogicalType::TIME:
|
1368
|
+
bound_function.function = DatePart::UnaryFunction<dtime_t, double, DatePart::EpochOperator>;
|
1369
|
+
bound_function.statistics = DatePart::EpochOperator::template PropagateStatistics<dtime_t>;
|
1370
|
+
break;
|
1371
|
+
default:
|
1372
|
+
throw BinderException("%s can only take temporal arguments", bound_function.name);
|
1373
|
+
}
|
1374
|
+
break;
|
1343
1375
|
default:
|
1344
1376
|
break;
|
1345
1377
|
}
|
@@ -1367,26 +1399,26 @@ static ScalarFunctionSet GetDatePartFunction() {
|
|
1367
1399
|
OP::template PropagateStatistics<timestamp_t>);
|
1368
1400
|
}
|
1369
1401
|
|
1370
|
-
ScalarFunctionSet GetGenericTimePartFunction(
|
1371
|
-
scalar_function_t
|
1372
|
-
|
1373
|
-
function_statistics_t time_stats) {
|
1402
|
+
ScalarFunctionSet GetGenericTimePartFunction(const LogicalType &result_type, scalar_function_t date_func,
|
1403
|
+
scalar_function_t ts_func, scalar_function_t interval_func,
|
1404
|
+
scalar_function_t time_func, function_statistics_t date_stats,
|
1405
|
+
function_statistics_t ts_stats, function_statistics_t time_stats) {
|
1374
1406
|
ScalarFunctionSet operator_set;
|
1375
1407
|
operator_set.AddFunction(
|
1376
|
-
ScalarFunction({LogicalType::DATE},
|
1408
|
+
ScalarFunction({LogicalType::DATE}, result_type, std::move(date_func), nullptr, nullptr, date_stats));
|
1377
1409
|
operator_set.AddFunction(
|
1378
|
-
ScalarFunction({LogicalType::TIMESTAMP},
|
1379
|
-
operator_set.AddFunction(ScalarFunction({LogicalType::INTERVAL},
|
1410
|
+
ScalarFunction({LogicalType::TIMESTAMP}, result_type, std::move(ts_func), nullptr, nullptr, ts_stats));
|
1411
|
+
operator_set.AddFunction(ScalarFunction({LogicalType::INTERVAL}, result_type, std::move(interval_func)));
|
1380
1412
|
operator_set.AddFunction(
|
1381
|
-
ScalarFunction({LogicalType::TIME},
|
1413
|
+
ScalarFunction({LogicalType::TIME}, result_type, std::move(time_func), nullptr, nullptr, time_stats));
|
1382
1414
|
return operator_set;
|
1383
1415
|
}
|
1384
1416
|
|
1385
|
-
template <class OP>
|
1386
|
-
static ScalarFunctionSet GetTimePartFunction() {
|
1417
|
+
template <class OP, class TR = int64_t>
|
1418
|
+
static ScalarFunctionSet GetTimePartFunction(const LogicalType &result_type = LogicalType::BIGINT) {
|
1387
1419
|
return GetGenericTimePartFunction(
|
1388
|
-
DatePart::UnaryFunction<date_t,
|
1389
|
-
ScalarFunction::UnaryFunction<interval_t,
|
1420
|
+
result_type, DatePart::UnaryFunction<date_t, TR, OP>, DatePart::UnaryFunction<timestamp_t, TR, OP>,
|
1421
|
+
ScalarFunction::UnaryFunction<interval_t, TR, OP>, ScalarFunction::UnaryFunction<dtime_t, TR, OP>,
|
1390
1422
|
OP::template PropagateStatistics<date_t>, OP::template PropagateStatistics<timestamp_t>,
|
1391
1423
|
OP::template PropagateStatistics<dtime_t>);
|
1392
1424
|
}
|
@@ -1425,10 +1457,6 @@ struct DayNameOperator {
|
|
1425
1457
|
struct StructDatePart {
|
1426
1458
|
using part_codes_t = vector<DatePartSpecifier>;
|
1427
1459
|
|
1428
|
-
static const auto BEGIN_BIGINT = size_t(DatePartSpecifier::YEAR);
|
1429
|
-
static const auto BEGIN_DOUBLE = size_t(DatePartSpecifier::JULIAN_DAY);
|
1430
|
-
static const auto BEGIN_INVALID = size_t(DatePartSpecifier::JULIAN_DAY) + 1;
|
1431
|
-
|
1432
1460
|
struct BindData : public VariableReturnBindData {
|
1433
1461
|
part_codes_t part_codes;
|
1434
1462
|
|
@@ -1492,8 +1520,12 @@ struct StructDatePart {
|
|
1492
1520
|
|
1493
1521
|
const auto count = args.size();
|
1494
1522
|
Vector &input = args.data[0];
|
1495
|
-
|
1496
|
-
|
1523
|
+
|
1524
|
+
// Type counts
|
1525
|
+
const auto BIGINT_COUNT = size_t(DatePartSpecifier::BEGIN_DOUBLE) - size_t(DatePartSpecifier::BEGIN_BIGINT);
|
1526
|
+
const auto DOUBLE_COUNT = size_t(DatePartSpecifier::BEGIN_INVALID) - size_t(DatePartSpecifier::BEGIN_DOUBLE);
|
1527
|
+
DatePart::StructOperator::bigint_vec bigint_values(BIGINT_COUNT, nullptr);
|
1528
|
+
DatePart::StructOperator::double_vec double_values(DOUBLE_COUNT, nullptr);
|
1497
1529
|
const auto part_mask = DatePart::StructOperator::GetMask(info.part_codes);
|
1498
1530
|
|
1499
1531
|
auto &child_entries = StructVector::GetEntries(result);
|
@@ -1521,9 +1553,11 @@ struct StructDatePart {
|
|
1521
1553
|
const auto part_index = size_t(info.part_codes[col]);
|
1522
1554
|
if (owners[part_index] == col) {
|
1523
1555
|
if (IsBigintDatepart(info.part_codes[col])) {
|
1524
|
-
bigint_values[part_index - BEGIN_BIGINT] =
|
1556
|
+
bigint_values[part_index - size_t(DatePartSpecifier::BEGIN_BIGINT)] =
|
1557
|
+
ConstantVector::GetData<int64_t>(*child_entry);
|
1525
1558
|
} else {
|
1526
|
-
double_values[part_index - BEGIN_DOUBLE] =
|
1559
|
+
double_values[part_index - size_t(DatePartSpecifier::BEGIN_DOUBLE)] =
|
1560
|
+
ConstantVector::GetData<double>(*child_entry);
|
1527
1561
|
}
|
1528
1562
|
}
|
1529
1563
|
}
|
@@ -1563,9 +1597,11 @@ struct StructDatePart {
|
|
1563
1597
|
const auto part_index = size_t(info.part_codes[col]);
|
1564
1598
|
if (owners[part_index] == col) {
|
1565
1599
|
if (IsBigintDatepart(info.part_codes[col])) {
|
1566
|
-
bigint_values[part_index - BEGIN_BIGINT] =
|
1600
|
+
bigint_values[part_index - size_t(DatePartSpecifier::BEGIN_BIGINT)] =
|
1601
|
+
FlatVector::GetData<int64_t>(*child_entry);
|
1567
1602
|
} else {
|
1568
|
-
double_values[part_index - BEGIN_DOUBLE] =
|
1603
|
+
double_values[part_index - size_t(DatePartSpecifier::BEGIN_DOUBLE)] =
|
1604
|
+
FlatVector::GetData<double>(*child_entry);
|
1569
1605
|
}
|
1570
1606
|
}
|
1571
1607
|
}
|
@@ -1694,7 +1730,7 @@ ScalarFunctionSet TimezoneMinuteFun::GetFunctions() {
|
|
1694
1730
|
}
|
1695
1731
|
|
1696
1732
|
ScalarFunctionSet EpochFun::GetFunctions() {
|
1697
|
-
return GetTimePartFunction<DatePart::EpochOperator>();
|
1733
|
+
return GetTimePartFunction<DatePart::EpochOperator, double>(LogicalType::DOUBLE);
|
1698
1734
|
}
|
1699
1735
|
|
1700
1736
|
ScalarFunctionSet EpochNsFun::GetFunctions() {
|
@@ -1,10 +1,9 @@
|
|
1
|
-
#include "duckdb/core_functions/scalar/string_functions.hpp"
|
2
|
-
|
3
1
|
#include "duckdb/common/exception.hpp"
|
4
2
|
#include "duckdb/common/vector_operations/binary_executor.hpp"
|
3
|
+
#include "duckdb/core_functions/scalar/string_functions.hpp"
|
5
4
|
|
6
|
-
#include <string.h>
|
7
5
|
#include <ctype.h>
|
6
|
+
#include <string.h>
|
8
7
|
|
9
8
|
namespace duckdb {
|
10
9
|
|
@@ -33,8 +32,12 @@ static void RepeatFunction(DataChunk &args, ExpressionState &state, Vector &resu
|
|
33
32
|
});
|
34
33
|
}
|
35
34
|
|
36
|
-
|
37
|
-
|
35
|
+
ScalarFunctionSet RepeatFun::GetFunctions() {
|
36
|
+
ScalarFunctionSet repeat;
|
37
|
+
for (const auto &type : {LogicalType::VARCHAR, LogicalType::BLOB}) {
|
38
|
+
repeat.AddFunction(ScalarFunction({type, LogicalType::BIGINT}, type, RepeatFunction));
|
39
|
+
}
|
40
|
+
return repeat;
|
38
41
|
}
|
39
42
|
|
40
43
|
} // namespace duckdb
|