duckdb 0.3.4-dev9.0 → 0.3.5-dev2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.3.4-dev9.0",
4
+ "version": "0.3.5-dev2.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -60698,6 +60698,7 @@ void PhysicalIEJoin::GetData(ExecutionContext &context, DataChunk &result, Globa
60698
60698
  const idx_t count = ie_lstate.SelectOuterRows(ie_lstate.right_matches);
60699
60699
  if (!count) {
60700
60700
  ie_gstate.GetNextPair(context.client, ie_sink, ie_lstate);
60701
+ continue;
60701
60702
  }
60702
60703
 
60703
60704
  SliceSortedPayload(result, ie_sink.tables[1]->global_sort_state, ie_lstate.right_base, ie_lstate.true_sel,
@@ -68361,12 +68362,12 @@ void PartitionableHashTable::Partition() {
68361
68362
  D_ASSERT(radix_partitioned_hts.size() == 0);
68362
68363
  D_ASSERT(partition_info.n_partitions > 1);
68363
68364
 
68364
- vector<GroupedAggregateHashTable *> partition_hts;
68365
+ vector<GroupedAggregateHashTable *> partition_hts(partition_info.n_partitions);
68365
68366
  for (auto &unpartitioned_ht : unpartitioned_hts) {
68366
68367
  for (idx_t r = 0; r < partition_info.n_partitions; r++) {
68367
68368
  radix_partitioned_hts[r].push_back(make_unique<GroupedAggregateHashTable>(
68368
68369
  buffer_manager, group_types, payload_types, bindings, HtEntryType::HT_WIDTH_32));
68369
- partition_hts.push_back(radix_partitioned_hts[r].back().get());
68370
+ partition_hts[r] = radix_partitioned_hts[r].back().get();
68370
68371
  }
68371
68372
  unpartitioned_ht->Partition(partition_hts, partition_info.radix_mask, partition_info.RADIX_SHIFT);
68372
68373
  unpartitioned_ht.reset();
@@ -68845,6 +68846,8 @@ template <>
68845
68846
  bool TrySubtractOperator::Operation(int32_t left, int32_t right, int32_t &result);
68846
68847
  template <>
68847
68848
  bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result);
68849
+ template <>
68850
+ bool TrySubtractOperator::Operation(hugeint_t left, hugeint_t right, hugeint_t &result);
68848
68851
 
68849
68852
  struct SubtractOperatorOverflowCheck {
68850
68853
  template <class TA, class TB, class TR>
@@ -80015,6 +80018,9 @@ void HistogramFun::RegisterFunction(BuiltinFunctions &set) {
80015
80018
  fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIMESTAMP_S));
80016
80019
  fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIMESTAMP_MS));
80017
80020
  fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIMESTAMP_NS));
80021
+ fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIME));
80022
+ fun.AddFunction(GetHistogramFunction<int64_t>(LogicalType::TIME_TZ));
80023
+ fun.AddFunction(GetHistogramFunction<int32_t>(LogicalType::DATE));
80018
80024
  set.AddFunction(fun);
80019
80025
  }
80020
80026
 
@@ -87821,6 +87827,10 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state,
87821
87827
  VectorData value_data;
87822
87828
  value_vector.Orrify(count, value_data);
87823
87829
 
87830
+ // not required for a comparison of nested types
87831
+ auto child_value = FlatVector::GetData<CHILD_TYPE>(child_vector);
87832
+ auto values = FlatVector::GetData<CHILD_TYPE>(value_vector);
87833
+
87824
87834
  for (idx_t i = 0; i < count; i++) {
87825
87835
  auto list_index = list_data.sel->get_index(i);
87826
87836
  auto value_index = value_data.sel->get_index(i);
@@ -87831,23 +87841,18 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state,
87831
87841
  }
87832
87842
 
87833
87843
  const auto &list_entry = list_entries[list_index];
87834
- auto source_idx = child_data.sel->get_index(list_entry.offset);
87835
-
87836
- // not required for a comparison of nested types
87837
- auto child_value = FlatVector::GetData<CHILD_TYPE>(child_vector);
87838
- auto values = FlatVector::GetData<CHILD_TYPE>(value_vector);
87839
87844
 
87840
- result_entries[list_index] = OP::Initialize();
87845
+ result_entries[i] = OP::Initialize();
87841
87846
  for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) {
87842
- auto child_value_idx = source_idx + child_idx;
87843
87847
 
87848
+ auto child_value_idx = child_data.sel->get_index(list_entry.offset + child_idx);
87844
87849
  if (!child_data.validity.RowIsValid(child_value_idx)) {
87845
87850
  continue;
87846
87851
  }
87847
87852
 
87848
87853
  if (!is_nested) {
87849
87854
  if (ValueEqualsOrNot<CHILD_TYPE>(child_value[child_value_idx], values[value_index])) {
87850
- result_entries[list_index] = OP::UpdateResultEntries(child_idx);
87855
+ result_entries[i] = OP::UpdateResultEntries(child_idx);
87851
87856
  break; // Found value in list, no need to look further
87852
87857
  }
87853
87858
  } else {
@@ -87855,7 +87860,7 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state,
87855
87860
  // to more efficiently compare nested types
87856
87861
  if (ValueEqualsOrNot<Value>(child_vector.GetValue(child_value_idx),
87857
87862
  value_vector.GetValue(value_index))) {
87858
- result_entries[list_index] = OP::UpdateResultEntries(child_idx);
87863
+ result_entries[i] = OP::UpdateResultEntries(child_idx);
87859
87864
  break; // Found value in list, no need to look further
87860
87865
  }
87861
87866
  }
@@ -88274,10 +88279,7 @@ static void ListAggregateFunction(DataChunk &args, ExpressionState &state, Vecto
88274
88279
  continue;
88275
88280
  }
88276
88281
 
88277
- auto source_idx = child_data.sel->get_index(list_entry.offset);
88278
- idx_t child_idx = 0;
88279
-
88280
- while (child_idx < list_entry.length) {
88282
+ for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) {
88281
88283
 
88282
88284
  // states vector is full, update
88283
88285
  if (states_idx == STANDARD_VECTOR_SIZE) {
@@ -88290,10 +88292,10 @@ static void ListAggregateFunction(DataChunk &args, ExpressionState &state, Vecto
88290
88292
  states_idx = 0;
88291
88293
  }
88292
88294
 
88293
- sel_vector.set_index(states_idx, source_idx + child_idx);
88295
+ auto source_idx = child_data.sel->get_index(list_entry.offset + child_idx);
88296
+ sel_vector.set_index(states_idx, source_idx);
88294
88297
  states_update[states_idx] = state_ptr;
88295
88298
  states_idx++;
88296
- child_idx++;
88297
88299
  }
88298
88300
  }
88299
88301
 
@@ -88330,7 +88332,7 @@ static unique_ptr<FunctionData> ListAggregateBind(ClientContext &context, Scalar
88330
88332
 
88331
88333
  // get the function name
88332
88334
  Value function_value = ExpressionExecutor::EvaluateScalar(*arguments[1]);
88333
- auto function_name = StringValue::Get(function_value);
88335
+ auto function_name = function_value.ToString();
88334
88336
 
88335
88337
  vector<LogicalType> types;
88336
88338
  types.push_back(list_child_type);
@@ -92442,6 +92444,12 @@ bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result
92442
92444
  return true;
92443
92445
  }
92444
92446
 
92447
+ template <>
92448
+ bool TrySubtractOperator::Operation(hugeint_t left, hugeint_t right, hugeint_t &result) {
92449
+ result = left;
92450
+ return Hugeint::SubtractInPlace(result, right);
92451
+ }
92452
+
92445
92453
  //===--------------------------------------------------------------------===//
92446
92454
  // subtract decimal with overflow check
92447
92455
  //===--------------------------------------------------------------------===//
@@ -129644,28 +129652,25 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundAggreg
129644
129652
 
129645
129653
  namespace duckdb {
129646
129654
 
129647
- unique_ptr<Expression> CastHugeintToSmallestType(unique_ptr<Expression> expr, NumericStatistics &num_stats) {
129648
- // Compute range
129649
- if (num_stats.min.IsNull() || num_stats.max.IsNull()) {
129650
- return expr;
129651
- }
129652
-
129653
- auto min_val = num_stats.min.GetValue<hugeint_t>();
129654
- auto max_val = num_stats.max.GetValue<hugeint_t>();
129655
- if (max_val < min_val) {
129656
- return expr;
129657
- }
129655
+ template <class T>
129656
+ bool GetCastType(T signed_range, LogicalType &cast_type) {
129657
+ auto range = static_cast<typename std::make_unsigned<decltype(signed_range)>::type>(signed_range);
129658
129658
 
129659
- // Prevent overflow
129660
- if (min_val < NumericLimits<int64_t>().Minimum() && max_val > NumericLimits<int64_t>().Maximum()) {
129661
- return expr;
129659
+ // Check if this range fits in a smaller type
129660
+ if (range < NumericLimits<uint8_t>::Maximum()) {
129661
+ cast_type = LogicalType::UTINYINT;
129662
+ } else if (sizeof(T) > sizeof(uint16_t) && range < NumericLimits<uint16_t>::Maximum()) {
129663
+ cast_type = LogicalType::USMALLINT;
129664
+ } else if (sizeof(T) > sizeof(uint32_t) && range < NumericLimits<uint32_t>::Maximum()) {
129665
+ cast_type = LogicalType::UINTEGER;
129666
+ } else {
129667
+ return false;
129662
129668
  }
129669
+ return true;
129670
+ }
129663
129671
 
129664
- // Compute range
129665
- auto range = max_val - min_val;
129666
-
129667
- // Check if this range fits in a smaller type
129668
- LogicalType cast_type;
129672
+ template <>
129673
+ bool GetCastType(hugeint_t range, LogicalType &cast_type) {
129669
129674
  if (range < NumericLimits<uint8_t>().Maximum()) {
129670
129675
  cast_type = LogicalType::UTINYINT;
129671
129676
  } else if (range < NumericLimits<uint16_t>().Maximum()) {
@@ -129673,22 +129678,11 @@ unique_ptr<Expression> CastHugeintToSmallestType(unique_ptr<Expression> expr, Nu
129673
129678
  } else if (range < NumericLimits<uint32_t>().Maximum()) {
129674
129679
  cast_type = LogicalType::UINTEGER;
129675
129680
  } else if (range < NumericLimits<uint64_t>().Maximum()) {
129676
- cast_type = LogicalTypeId::UBIGINT;
129681
+ cast_type = LogicalType::UBIGINT;
129677
129682
  } else {
129678
- return expr;
129683
+ return false;
129679
129684
  }
129680
-
129681
- // Create expression to map to a smaller range
129682
- auto input_type = expr->return_type;
129683
- auto minimum_expr = make_unique<BoundConstantExpression>(Value::CreateValue(min_val));
129684
- vector<unique_ptr<Expression>> arguments;
129685
- arguments.push_back(move(expr));
129686
- arguments.push_back(move(minimum_expr));
129687
- auto minus_expr = make_unique<BoundFunctionExpression>(input_type, SubtractFun::GetFunction(input_type, input_type),
129688
- move(arguments), nullptr, true);
129689
-
129690
- // Cast to smaller type
129691
- return make_unique<BoundCastExpression>(move(minus_expr), cast_type);
129685
+ return true;
129692
129686
  }
129693
129687
 
129694
129688
  template <class T>
@@ -129706,21 +129700,14 @@ unique_ptr<Expression> TemplatedCastToSmallestType(unique_ptr<Expression> expr,
129706
129700
 
129707
129701
  // Compute range, cast to unsigned to prevent comparing signed with unsigned
129708
129702
  T signed_range;
129709
- if (!TrySubtractOperator::Operation(signed_min_val, signed_max_val, signed_range)) {
129703
+ if (!TrySubtractOperator::Operation(signed_max_val, signed_min_val, signed_range)) {
129710
129704
  // overflow in subtraction: cannot do any simplification
129711
129705
  return expr;
129712
129706
  }
129713
- auto range = static_cast<typename std::make_unsigned<decltype(signed_range)>::type>(signed_range);
129714
129707
 
129715
129708
  // Check if this range fits in a smaller type
129716
129709
  LogicalType cast_type;
129717
- if (range < NumericLimits<uint8_t>::Maximum()) {
129718
- cast_type = LogicalType::UTINYINT;
129719
- } else if (sizeof(T) > sizeof(uint16_t) && range < NumericLimits<uint16_t>::Maximum()) {
129720
- cast_type = LogicalType::USMALLINT;
129721
- } else if (sizeof(T) > sizeof(uint32_t) && range < NumericLimits<uint32_t>::Maximum()) {
129722
- cast_type = LogicalType::UINTEGER;
129723
- } else {
129710
+ if (!GetCastType(signed_range, cast_type)) {
129724
129711
  return expr;
129725
129712
  }
129726
129713
 
@@ -129756,7 +129743,7 @@ unique_ptr<Expression> CastToSmallestType(unique_ptr<Expression> expr, NumericSt
129756
129743
  case PhysicalType::INT64:
129757
129744
  return TemplatedCastToSmallestType<int64_t>(move(expr), num_stats);
129758
129745
  case PhysicalType::INT128:
129759
- return CastHugeintToSmallestType(move(expr), num_stats);
129746
+ return TemplatedCastToSmallestType<hugeint_t>(move(expr), num_stats);
129760
129747
  default:
129761
129748
  throw NotImplementedException("Unknown integer type!");
129762
129749
  }
@@ -130724,6 +130711,8 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
130724
130711
  if (join.conditions.size() > 1) {
130725
130712
  // there are multiple conditions: erase this condition
130726
130713
  join.conditions.erase(join.conditions.begin() + i);
130714
+ // remove the corresponding statistics
130715
+ join.join_stats.clear();
130727
130716
  i--;
130728
130717
  continue;
130729
130718
  } else {
@@ -155786,8 +155775,7 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt) {
155786
155775
  auto copy = make_unique<LogicalCopyToFile>(copy_function->function, move(function_data));
155787
155776
  copy->file_path = stmt.info->file_path;
155788
155777
  copy->use_tmp_file = use_tmp_file;
155789
- LocalFileSystem fs;
155790
- copy->is_file_and_exists = fs.FileExists(copy->file_path);
155778
+ copy->is_file_and_exists = config.file_system->FileExists(copy->file_path);
155791
155779
 
155792
155780
  copy->AddChild(move(select_node.plan));
155793
155781
 
@@ -170302,6 +170290,10 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
170302
170290
  D_ASSERT(removed_column < column_definitions.size());
170303
170291
  column_definitions.erase(column_definitions.begin() + removed_column);
170304
170292
 
170293
+ for (idx_t i = 0; i < column_definitions.size(); i++) {
170294
+ column_definitions[i].oid = i;
170295
+ }
170296
+
170305
170297
  // alter the row_groups and remove the column from each of them
170306
170298
  this->row_groups = make_shared<SegmentTree>();
170307
170299
  auto current_row_group = (RowGroup *)parent.row_groups->GetRootSegment();
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "5079e8e7f"
15
- #define DUCKDB_VERSION "v0.3.4-dev9"
14
+ #define DUCKDB_SOURCE_ID "0c68c88e5"
15
+ #define DUCKDB_VERSION "v0.3.5-dev2"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -2473,6 +2473,13 @@ public:
2473
2473
  entry_idx = row_idx / BITS_PER_VALUE;
2474
2474
  idx_in_entry = row_idx % BITS_PER_VALUE;
2475
2475
  }
2476
+ //! Get an entry that has first-n bits set as valid and rest set as invalid
2477
+ static inline V EntryWithValidBits(idx_t n) {
2478
+ if (n == 0) {
2479
+ return V(0);
2480
+ }
2481
+ return ValidityBuffer::MAX_ENTRY >> (BITS_PER_VALUE - n);
2482
+ }
2476
2483
 
2477
2484
  //! RowIsValidUnsafe should only be used if AllValid() is false: it achieves the same as RowIsValid but skips a
2478
2485
  //! not-null check
@@ -2548,20 +2555,33 @@ public:
2548
2555
  }
2549
2556
  }
2550
2557
 
2551
- //! Marks "count" entries in the validity mask as invalid (null)
2558
+ //! Marks exactly "count" bits in the validity mask as invalid (null)
2552
2559
  inline void SetAllInvalid(idx_t count) {
2553
2560
  EnsureWritable();
2554
- for (idx_t i = 0; i < ValidityBuffer::EntryCount(count); i++) {
2561
+ if (count == 0) {
2562
+ return;
2563
+ }
2564
+ auto last_entry_index = ValidityBuffer::EntryCount(count) - 1;
2565
+ for (idx_t i = 0; i < last_entry_index; i++) {
2555
2566
  validity_mask[i] = 0;
2556
2567
  }
2568
+ auto last_entry_bits = count % static_cast<idx_t>(BITS_PER_VALUE);
2569
+ validity_mask[last_entry_index] = (last_entry_bits == 0) ? 0 : (ValidityBuffer::MAX_ENTRY << (last_entry_bits));
2557
2570
  }
2558
2571
 
2559
- //! Marks "count" entries in the validity mask as valid (not null)
2572
+ //! Marks exactly "count" bits in the validity mask as valid (not null)
2560
2573
  inline void SetAllValid(idx_t count) {
2561
2574
  EnsureWritable();
2562
- for (idx_t i = 0; i < ValidityBuffer::EntryCount(count); i++) {
2575
+ if (count == 0) {
2576
+ return;
2577
+ }
2578
+ auto last_entry_index = ValidityBuffer::EntryCount(count) - 1;
2579
+ for (idx_t i = 0; i < last_entry_index; i++) {
2563
2580
  validity_mask[i] = ValidityBuffer::MAX_ENTRY;
2564
2581
  }
2582
+ auto last_entry_bits = count % static_cast<idx_t>(BITS_PER_VALUE);
2583
+ validity_mask[last_entry_index] |=
2584
+ (last_entry_bits == 0) ? ValidityBuffer::MAX_ENTRY : ~(ValidityBuffer::MAX_ENTRY << (last_entry_bits));
2565
2585
  }
2566
2586
 
2567
2587
  inline bool IsMaskSet() const {