duckdb 0.3.4-dev9.0 → 0.3.5-dev2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +55 -63
- package/src/duckdb.hpp +26 -6
- package/src/parquet-amalgamation.cpp +29837 -29837
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -60698,6 +60698,7 @@ void PhysicalIEJoin::GetData(ExecutionContext &context, DataChunk &result, Globa
|
|
|
60698
60698
|
const idx_t count = ie_lstate.SelectOuterRows(ie_lstate.right_matches);
|
|
60699
60699
|
if (!count) {
|
|
60700
60700
|
ie_gstate.GetNextPair(context.client, ie_sink, ie_lstate);
|
|
60701
|
+
continue;
|
|
60701
60702
|
}
|
|
60702
60703
|
|
|
60703
60704
|
SliceSortedPayload(result, ie_sink.tables[1]->global_sort_state, ie_lstate.right_base, ie_lstate.true_sel,
|
|
@@ -68361,12 +68362,12 @@ void PartitionableHashTable::Partition() {
|
|
|
68361
68362
|
D_ASSERT(radix_partitioned_hts.size() == 0);
|
|
68362
68363
|
D_ASSERT(partition_info.n_partitions > 1);
|
|
68363
68364
|
|
|
68364
|
-
vector<GroupedAggregateHashTable *> partition_hts;
|
|
68365
|
+
vector<GroupedAggregateHashTable *> partition_hts(partition_info.n_partitions);
|
|
68365
68366
|
for (auto &unpartitioned_ht : unpartitioned_hts) {
|
|
68366
68367
|
for (idx_t r = 0; r < partition_info.n_partitions; r++) {
|
|
68367
68368
|
radix_partitioned_hts[r].push_back(make_unique<GroupedAggregateHashTable>(
|
|
68368
68369
|
buffer_manager, group_types, payload_types, bindings, HtEntryType::HT_WIDTH_32));
|
|
68369
|
-
partition_hts
|
|
68370
|
+
partition_hts[r] = radix_partitioned_hts[r].back().get();
|
|
68370
68371
|
}
|
|
68371
68372
|
unpartitioned_ht->Partition(partition_hts, partition_info.radix_mask, partition_info.RADIX_SHIFT);
|
|
68372
68373
|
unpartitioned_ht.reset();
|
|
@@ -68845,6 +68846,8 @@ template <>
|
|
|
68845
68846
|
bool TrySubtractOperator::Operation(int32_t left, int32_t right, int32_t &result);
|
|
68846
68847
|
template <>
|
|
68847
68848
|
bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result);
|
|
68849
|
+
template <>
|
|
68850
|
+
bool TrySubtractOperator::Operation(hugeint_t left, hugeint_t right, hugeint_t &result);
|
|
68848
68851
|
|
|
68849
68852
|
struct SubtractOperatorOverflowCheck {
|
|
68850
68853
|
template <class TA, class TB, class TR>
|
|
@@ -80015,6 +80018,9 @@ void HistogramFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
80015
80018
|
fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIMESTAMP_S));
|
|
80016
80019
|
fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIMESTAMP_MS));
|
|
80017
80020
|
fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIMESTAMP_NS));
|
|
80021
|
+
fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIME));
|
|
80022
|
+
fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIME_TZ));
|
|
80023
|
+
fun.AddFunction(GetHistogramFunction<int32_t>(LogicalType::DATE));
|
|
80018
80024
|
set.AddFunction(fun);
|
|
80019
80025
|
}
|
|
80020
80026
|
|
|
@@ -87821,6 +87827,10 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state,
|
|
|
87821
87827
|
VectorData value_data;
|
|
87822
87828
|
value_vector.Orrify(count, value_data);
|
|
87823
87829
|
|
|
87830
|
+
// not required for a comparison of nested types
|
|
87831
|
+
auto child_value = FlatVector::GetData<CHILD_TYPE>(child_vector);
|
|
87832
|
+
auto values = FlatVector::GetData<CHILD_TYPE>(value_vector);
|
|
87833
|
+
|
|
87824
87834
|
for (idx_t i = 0; i < count; i++) {
|
|
87825
87835
|
auto list_index = list_data.sel->get_index(i);
|
|
87826
87836
|
auto value_index = value_data.sel->get_index(i);
|
|
@@ -87831,23 +87841,18 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state,
|
|
|
87831
87841
|
}
|
|
87832
87842
|
|
|
87833
87843
|
const auto &list_entry = list_entries[list_index];
|
|
87834
|
-
auto source_idx = child_data.sel->get_index(list_entry.offset);
|
|
87835
|
-
|
|
87836
|
-
// not required for a comparison of nested types
|
|
87837
|
-
auto child_value = FlatVector::GetData<CHILD_TYPE>(child_vector);
|
|
87838
|
-
auto values = FlatVector::GetData<CHILD_TYPE>(value_vector);
|
|
87839
87844
|
|
|
87840
|
-
result_entries[
|
|
87845
|
+
result_entries[i] = OP::Initialize();
|
|
87841
87846
|
for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) {
|
|
87842
|
-
auto child_value_idx = source_idx + child_idx;
|
|
87843
87847
|
|
|
87848
|
+
auto child_value_idx = child_data.sel->get_index(list_entry.offset + child_idx);
|
|
87844
87849
|
if (!child_data.validity.RowIsValid(child_value_idx)) {
|
|
87845
87850
|
continue;
|
|
87846
87851
|
}
|
|
87847
87852
|
|
|
87848
87853
|
if (!is_nested) {
|
|
87849
87854
|
if (ValueEqualsOrNot<CHILD_TYPE>(child_value[child_value_idx], values[value_index])) {
|
|
87850
|
-
result_entries[
|
|
87855
|
+
result_entries[i] = OP::UpdateResultEntries(child_idx);
|
|
87851
87856
|
break; // Found value in list, no need to look further
|
|
87852
87857
|
}
|
|
87853
87858
|
} else {
|
|
@@ -87855,7 +87860,7 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state,
|
|
|
87855
87860
|
// to more efficiently compare nested types
|
|
87856
87861
|
if (ValueEqualsOrNot<Value>(child_vector.GetValue(child_value_idx),
|
|
87857
87862
|
value_vector.GetValue(value_index))) {
|
|
87858
|
-
result_entries[
|
|
87863
|
+
result_entries[i] = OP::UpdateResultEntries(child_idx);
|
|
87859
87864
|
break; // Found value in list, no need to look further
|
|
87860
87865
|
}
|
|
87861
87866
|
}
|
|
@@ -88274,10 +88279,7 @@ static void ListAggregateFunction(DataChunk &args, ExpressionState &state, Vecto
|
|
|
88274
88279
|
continue;
|
|
88275
88280
|
}
|
|
88276
88281
|
|
|
88277
|
-
|
|
88278
|
-
idx_t child_idx = 0;
|
|
88279
|
-
|
|
88280
|
-
while (child_idx < list_entry.length) {
|
|
88282
|
+
for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) {
|
|
88281
88283
|
|
|
88282
88284
|
// states vector is full, update
|
|
88283
88285
|
if (states_idx == STANDARD_VECTOR_SIZE) {
|
|
@@ -88290,10 +88292,10 @@ static void ListAggregateFunction(DataChunk &args, ExpressionState &state, Vecto
|
|
|
88290
88292
|
states_idx = 0;
|
|
88291
88293
|
}
|
|
88292
88294
|
|
|
88293
|
-
|
|
88295
|
+
auto source_idx = child_data.sel->get_index(list_entry.offset + child_idx);
|
|
88296
|
+
sel_vector.set_index(states_idx, source_idx);
|
|
88294
88297
|
states_update[states_idx] = state_ptr;
|
|
88295
88298
|
states_idx++;
|
|
88296
|
-
child_idx++;
|
|
88297
88299
|
}
|
|
88298
88300
|
}
|
|
88299
88301
|
|
|
@@ -88330,7 +88332,7 @@ static unique_ptr<FunctionData> ListAggregateBind(ClientContext &context, Scalar
|
|
|
88330
88332
|
|
|
88331
88333
|
// get the function name
|
|
88332
88334
|
Value function_value = ExpressionExecutor::EvaluateScalar(*arguments[1]);
|
|
88333
|
-
auto function_name =
|
|
88335
|
+
auto function_name = function_value.ToString();
|
|
88334
88336
|
|
|
88335
88337
|
vector<LogicalType> types;
|
|
88336
88338
|
types.push_back(list_child_type);
|
|
@@ -92442,6 +92444,12 @@ bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result
|
|
|
92442
92444
|
return true;
|
|
92443
92445
|
}
|
|
92444
92446
|
|
|
92447
|
+
template <>
|
|
92448
|
+
bool TrySubtractOperator::Operation(hugeint_t left, hugeint_t right, hugeint_t &result) {
|
|
92449
|
+
result = left;
|
|
92450
|
+
return Hugeint::SubtractInPlace(result, right);
|
|
92451
|
+
}
|
|
92452
|
+
|
|
92445
92453
|
//===--------------------------------------------------------------------===//
|
|
92446
92454
|
// subtract decimal with overflow check
|
|
92447
92455
|
//===--------------------------------------------------------------------===//
|
|
@@ -129644,28 +129652,25 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundAggreg
|
|
|
129644
129652
|
|
|
129645
129653
|
namespace duckdb {
|
|
129646
129654
|
|
|
129647
|
-
|
|
129648
|
-
|
|
129649
|
-
|
|
129650
|
-
return expr;
|
|
129651
|
-
}
|
|
129652
|
-
|
|
129653
|
-
auto min_val = num_stats.min.GetValue<hugeint_t>();
|
|
129654
|
-
auto max_val = num_stats.max.GetValue<hugeint_t>();
|
|
129655
|
-
if (max_val < min_val) {
|
|
129656
|
-
return expr;
|
|
129657
|
-
}
|
|
129655
|
+
template <class T>
|
|
129656
|
+
bool GetCastType(T signed_range, LogicalType &cast_type) {
|
|
129657
|
+
auto range = static_cast<typename std::make_unsigned<decltype(signed_range)>::type>(signed_range);
|
|
129658
129658
|
|
|
129659
|
-
//
|
|
129660
|
-
if (
|
|
129661
|
-
|
|
129659
|
+
// Check if this range fits in a smaller type
|
|
129660
|
+
if (range < NumericLimits<uint8_t>::Maximum()) {
|
|
129661
|
+
cast_type = LogicalType::UTINYINT;
|
|
129662
|
+
} else if (sizeof(T) > sizeof(uint16_t) && range < NumericLimits<uint16_t>::Maximum()) {
|
|
129663
|
+
cast_type = LogicalType::USMALLINT;
|
|
129664
|
+
} else if (sizeof(T) > sizeof(uint32_t) && range < NumericLimits<uint32_t>::Maximum()) {
|
|
129665
|
+
cast_type = LogicalType::UINTEGER;
|
|
129666
|
+
} else {
|
|
129667
|
+
return false;
|
|
129662
129668
|
}
|
|
129669
|
+
return true;
|
|
129670
|
+
}
|
|
129663
129671
|
|
|
129664
|
-
|
|
129665
|
-
|
|
129666
|
-
|
|
129667
|
-
// Check if this range fits in a smaller type
|
|
129668
|
-
LogicalType cast_type;
|
|
129672
|
+
template <>
|
|
129673
|
+
bool GetCastType(hugeint_t range, LogicalType &cast_type) {
|
|
129669
129674
|
if (range < NumericLimits<uint8_t>().Maximum()) {
|
|
129670
129675
|
cast_type = LogicalType::UTINYINT;
|
|
129671
129676
|
} else if (range < NumericLimits<uint16_t>().Maximum()) {
|
|
@@ -129673,22 +129678,11 @@ unique_ptr<Expression> CastHugeintToSmallestType(unique_ptr<Expression> expr, Nu
|
|
|
129673
129678
|
} else if (range < NumericLimits<uint32_t>().Maximum()) {
|
|
129674
129679
|
cast_type = LogicalType::UINTEGER;
|
|
129675
129680
|
} else if (range < NumericLimits<uint64_t>().Maximum()) {
|
|
129676
|
-
cast_type =
|
|
129681
|
+
cast_type = LogicalType::UBIGINT;
|
|
129677
129682
|
} else {
|
|
129678
|
-
return
|
|
129683
|
+
return false;
|
|
129679
129684
|
}
|
|
129680
|
-
|
|
129681
|
-
// Create expression to map to a smaller range
|
|
129682
|
-
auto input_type = expr->return_type;
|
|
129683
|
-
auto minimum_expr = make_unique<BoundConstantExpression>(Value::CreateValue(min_val));
|
|
129684
|
-
vector<unique_ptr<Expression>> arguments;
|
|
129685
|
-
arguments.push_back(move(expr));
|
|
129686
|
-
arguments.push_back(move(minimum_expr));
|
|
129687
|
-
auto minus_expr = make_unique<BoundFunctionExpression>(input_type, SubtractFun::GetFunction(input_type, input_type),
|
|
129688
|
-
move(arguments), nullptr, true);
|
|
129689
|
-
|
|
129690
|
-
// Cast to smaller type
|
|
129691
|
-
return make_unique<BoundCastExpression>(move(minus_expr), cast_type);
|
|
129685
|
+
return true;
|
|
129692
129686
|
}
|
|
129693
129687
|
|
|
129694
129688
|
template <class T>
|
|
@@ -129706,21 +129700,14 @@ unique_ptr<Expression> TemplatedCastToSmallestType(unique_ptr<Expression> expr,
|
|
|
129706
129700
|
|
|
129707
129701
|
// Compute range, cast to unsigned to prevent comparing signed with unsigned
|
|
129708
129702
|
T signed_range;
|
|
129709
|
-
if (!TrySubtractOperator::Operation(
|
|
129703
|
+
if (!TrySubtractOperator::Operation(signed_max_val, signed_min_val, signed_range)) {
|
|
129710
129704
|
// overflow in subtraction: cannot do any simplification
|
|
129711
129705
|
return expr;
|
|
129712
129706
|
}
|
|
129713
|
-
auto range = static_cast<typename std::make_unsigned<decltype(signed_range)>::type>(signed_range);
|
|
129714
129707
|
|
|
129715
129708
|
// Check if this range fits in a smaller type
|
|
129716
129709
|
LogicalType cast_type;
|
|
129717
|
-
if (
|
|
129718
|
-
cast_type = LogicalType::UTINYINT;
|
|
129719
|
-
} else if (sizeof(T) > sizeof(uint16_t) && range < NumericLimits<uint16_t>::Maximum()) {
|
|
129720
|
-
cast_type = LogicalType::USMALLINT;
|
|
129721
|
-
} else if (sizeof(T) > sizeof(uint32_t) && range < NumericLimits<uint32_t>::Maximum()) {
|
|
129722
|
-
cast_type = LogicalType::UINTEGER;
|
|
129723
|
-
} else {
|
|
129710
|
+
if (!GetCastType(signed_range, cast_type)) {
|
|
129724
129711
|
return expr;
|
|
129725
129712
|
}
|
|
129726
129713
|
|
|
@@ -129756,7 +129743,7 @@ unique_ptr<Expression> CastToSmallestType(unique_ptr<Expression> expr, NumericSt
|
|
|
129756
129743
|
case PhysicalType::INT64:
|
|
129757
129744
|
return TemplatedCastToSmallestType<int64_t>(move(expr), num_stats);
|
|
129758
129745
|
case PhysicalType::INT128:
|
|
129759
|
-
return
|
|
129746
|
+
return TemplatedCastToSmallestType<hugeint_t>(move(expr), num_stats);
|
|
129760
129747
|
default:
|
|
129761
129748
|
throw NotImplementedException("Unknown integer type!");
|
|
129762
129749
|
}
|
|
@@ -130724,6 +130711,8 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
|
|
|
130724
130711
|
if (join.conditions.size() > 1) {
|
|
130725
130712
|
// there are multiple conditions: erase this condition
|
|
130726
130713
|
join.conditions.erase(join.conditions.begin() + i);
|
|
130714
|
+
// remove the corresponding statistics
|
|
130715
|
+
join.join_stats.clear();
|
|
130727
130716
|
i--;
|
|
130728
130717
|
continue;
|
|
130729
130718
|
} else {
|
|
@@ -155786,8 +155775,7 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt) {
|
|
|
155786
155775
|
auto copy = make_unique<LogicalCopyToFile>(copy_function->function, move(function_data));
|
|
155787
155776
|
copy->file_path = stmt.info->file_path;
|
|
155788
155777
|
copy->use_tmp_file = use_tmp_file;
|
|
155789
|
-
|
|
155790
|
-
copy->is_file_and_exists = fs.FileExists(copy->file_path);
|
|
155778
|
+
copy->is_file_and_exists = config.file_system->FileExists(copy->file_path);
|
|
155791
155779
|
|
|
155792
155780
|
copy->AddChild(move(select_node.plan));
|
|
155793
155781
|
|
|
@@ -170302,6 +170290,10 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
|
|
|
170302
170290
|
D_ASSERT(removed_column < column_definitions.size());
|
|
170303
170291
|
column_definitions.erase(column_definitions.begin() + removed_column);
|
|
170304
170292
|
|
|
170293
|
+
for (idx_t i = 0; i < column_definitions.size(); i++) {
|
|
170294
|
+
column_definitions[i].oid = i;
|
|
170295
|
+
}
|
|
170296
|
+
|
|
170305
170297
|
// alter the row_groups and remove the column from each of them
|
|
170306
170298
|
this->row_groups = make_shared<SegmentTree>();
|
|
170307
170299
|
auto current_row_group = (RowGroup *)parent.row_groups->GetRootSegment();
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.
|
|
14
|
+
#define DUCKDB_SOURCE_ID "0c68c88e5"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev2"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -2473,6 +2473,13 @@ public:
|
|
|
2473
2473
|
entry_idx = row_idx / BITS_PER_VALUE;
|
|
2474
2474
|
idx_in_entry = row_idx % BITS_PER_VALUE;
|
|
2475
2475
|
}
|
|
2476
|
+
//! Get an entry that has first-n bits set as valid and rest set as invalid
|
|
2477
|
+
static inline V EntryWithValidBits(idx_t n) {
|
|
2478
|
+
if (n == 0) {
|
|
2479
|
+
return V(0);
|
|
2480
|
+
}
|
|
2481
|
+
return ValidityBuffer::MAX_ENTRY >> (BITS_PER_VALUE - n);
|
|
2482
|
+
}
|
|
2476
2483
|
|
|
2477
2484
|
//! RowIsValidUnsafe should only be used if AllValid() is false: it achieves the same as RowIsValid but skips a
|
|
2478
2485
|
//! not-null check
|
|
@@ -2548,20 +2555,33 @@ public:
|
|
|
2548
2555
|
}
|
|
2549
2556
|
}
|
|
2550
2557
|
|
|
2551
|
-
//! Marks "count"
|
|
2558
|
+
//! Marks exactly "count" bits in the validity mask as invalid (null)
|
|
2552
2559
|
inline void SetAllInvalid(idx_t count) {
|
|
2553
2560
|
EnsureWritable();
|
|
2554
|
-
|
|
2561
|
+
if (count == 0) {
|
|
2562
|
+
return;
|
|
2563
|
+
}
|
|
2564
|
+
auto last_entry_index = ValidityBuffer::EntryCount(count) - 1;
|
|
2565
|
+
for (idx_t i = 0; i < last_entry_index; i++) {
|
|
2555
2566
|
validity_mask[i] = 0;
|
|
2556
2567
|
}
|
|
2568
|
+
auto last_entry_bits = count % static_cast<idx_t>(BITS_PER_VALUE);
|
|
2569
|
+
validity_mask[last_entry_index] = (last_entry_bits == 0) ? 0 : (ValidityBuffer::MAX_ENTRY << (last_entry_bits));
|
|
2557
2570
|
}
|
|
2558
2571
|
|
|
2559
|
-
//! Marks "count"
|
|
2572
|
+
//! Marks exactly "count" bits in the validity mask as valid (not null)
|
|
2560
2573
|
inline void SetAllValid(idx_t count) {
|
|
2561
2574
|
EnsureWritable();
|
|
2562
|
-
|
|
2575
|
+
if (count == 0) {
|
|
2576
|
+
return;
|
|
2577
|
+
}
|
|
2578
|
+
auto last_entry_index = ValidityBuffer::EntryCount(count) - 1;
|
|
2579
|
+
for (idx_t i = 0; i < last_entry_index; i++) {
|
|
2563
2580
|
validity_mask[i] = ValidityBuffer::MAX_ENTRY;
|
|
2564
2581
|
}
|
|
2582
|
+
auto last_entry_bits = count % static_cast<idx_t>(BITS_PER_VALUE);
|
|
2583
|
+
validity_mask[last_entry_index] |=
|
|
2584
|
+
(last_entry_bits == 0) ? ValidityBuffer::MAX_ENTRY : ~(ValidityBuffer::MAX_ENTRY << (last_entry_bits));
|
|
2565
2585
|
}
|
|
2566
2586
|
|
|
2567
2587
|
inline bool IsMaskSet() const {
|