duckdb 0.7.2-dev1138.0 → 0.7.2-dev1146.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev1138.0",
5
+ "version": "0.7.2-dev1146.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -122,7 +122,8 @@ struct ParquetWriteGlobalState : public GlobalFunctionData {
122
122
  };
123
123
 
124
124
  struct ParquetWriteLocalState : public LocalFunctionData {
125
- explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types) : buffer(context, types) {
125
+ explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types)
126
+ : buffer(Allocator::Get(context), types) {
126
127
  }
127
128
 
128
129
  ColumnDataCollection buffer;
@@ -169,11 +169,8 @@ idx_t ColumnDataCollectionSegment::ReadVectorInternal(ChunkManagementState &stat
169
169
  if (type_size > 0) {
170
170
  memcpy(target_data + current_offset * type_size, base_ptr, current_vdata.count * type_size);
171
171
  }
172
- // FIXME: use bitwise operations here
173
172
  ValidityMask current_validity(validity_data);
174
- for (idx_t k = 0; k < current_vdata.count; k++) {
175
- target_validity.Set(current_offset + k, current_validity.RowIsValid(k));
176
- }
173
+ target_validity.SliceInPlace(current_validity, current_offset, 0, current_vdata.count);
177
174
  current_offset += current_vdata.count;
178
175
  next_index = current_vdata.next_data;
179
176
  }
@@ -68,24 +68,41 @@ void ValidityMask::Resize(idx_t old_size, idx_t new_size) {
68
68
  }
69
69
  }
70
70
 
71
- void ValidityMask::Slice(const ValidityMask &other, idx_t offset, idx_t end) {
71
+ void ValidityMask::Slice(const ValidityMask &other, idx_t source_offset, idx_t count) {
72
72
  if (other.AllValid()) {
73
73
  validity_mask = nullptr;
74
74
  validity_data.reset();
75
75
  return;
76
76
  }
77
- if (offset == 0) {
77
+ if (source_offset == 0) {
78
78
  Initialize(other);
79
79
  return;
80
80
  }
81
- ValidityMask new_mask(end - offset);
81
+ ValidityMask new_mask(count);
82
+ new_mask.SliceInPlace(other, 0, source_offset, count);
83
+ Initialize(new_mask);
84
+ }
82
85
 
83
- // FIXME THIS NEEDS FIXING!
86
+ bool ValidityMask::IsAligned(idx_t count) {
87
+ return count % BITS_PER_VALUE == 0;
88
+ }
89
+
90
+ void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset, idx_t source_offset, idx_t count) {
91
+ if (IsAligned(source_offset) && IsAligned(target_offset)) {
92
+ auto target_validity = GetData();
93
+ auto source_validity = other.GetData();
94
+ auto source_offset_entries = EntryCount(source_offset);
95
+ auto target_offset_entries = EntryCount(target_offset);
96
+ memcpy(target_validity + target_offset_entries, source_validity + source_offset_entries,
97
+ sizeof(validity_t) * EntryCount(count));
98
+ return;
99
+ }
100
+
101
+ // FIXME: use bitwise operations here
84
102
  #if 1
85
- for (idx_t i = offset; i < end; i++) {
86
- new_mask.Set(i - offset, other.RowIsValid(i));
103
+ for (idx_t i = 0; i < count; i++) {
104
+ Set(target_offset + i, other.RowIsValid(source_offset + i));
87
105
  }
88
- Initialize(new_mask);
89
106
  #else
90
107
  // first shift the "whole" units
91
108
  idx_t entire_units = offset / BITS_PER_VALUE;
@@ -136,17 +136,13 @@ void Vector::Slice(Vector &other, idx_t offset, idx_t end) {
136
136
  for (idx_t i = 0; i < entries.size(); i++) {
137
137
  entries[i]->Slice(*other_entries[i], offset, end);
138
138
  }
139
- if (offset > 0) {
140
- new_vector.validity.Slice(other.validity, offset, end);
141
- } else {
142
- new_vector.validity = other.validity;
143
- }
139
+ new_vector.validity.Slice(other.validity, offset, end - offset);
144
140
  Reference(new_vector);
145
141
  } else {
146
142
  Reference(other);
147
143
  if (offset > 0) {
148
144
  data = data + GetTypeIdSize(internal_type) * offset;
149
- validity.Slice(other.validity, offset, end);
145
+ validity.Slice(other.validity, offset, end - offset);
150
146
  }
151
147
  }
152
148
  }
@@ -53,10 +53,13 @@ RadixPartitionedHashTable::RadixPartitionedHashTable(GroupingSet &grouping_set_p
53
53
  // Sink
54
54
  //===--------------------------------------------------------------------===//
55
55
  class RadixHTGlobalState : public GlobalSinkState {
56
+ constexpr const static idx_t MAX_RADIX_PARTITIONS = 32;
57
+
56
58
  public:
57
59
  explicit RadixHTGlobalState(ClientContext &context)
58
- : is_empty(true), multi_scan(true), total_groups(0),
59
- partition_info((idx_t)TaskScheduler::GetScheduler(context).NumberOfThreads()) {
60
+ : is_empty(true), multi_scan(true), partitioned(false),
61
+ partition_info(
62
+ MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads())) {
60
63
  }
61
64
 
62
65
  vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
@@ -68,8 +71,8 @@ public:
68
71
  bool multi_scan;
69
72
  //! The lock for updating the global aggregate state
70
73
  mutex lock;
71
- //! a counter to determine if we should switch over to partitioning
72
- atomic<idx_t> total_groups;
74
+ //! Whether or not any thread has crossed the partitioning threshold
75
+ atomic<bool> partitioned;
73
76
 
74
77
  bool is_finalized = false;
75
78
  bool is_partitioned = false;
@@ -79,7 +82,7 @@ public:
79
82
 
80
83
  class RadixHTLocalState : public LocalSinkState {
81
84
  public:
82
- explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : is_empty(true) {
85
+ explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : total_groups(0), is_empty(true) {
83
86
  // if there are no groups we create a fake group so everything has the same group
84
87
  group_chunk.InitializeEmpty(ht.group_types);
85
88
  if (ht.grouping_set.empty()) {
@@ -90,6 +93,8 @@ public:
90
93
  DataChunk group_chunk;
91
94
  //! The aggregate HT
92
95
  unique_ptr<PartitionableHashTable> ht;
96
+ //! The total number of groups found by this thread
97
+ idx_t total_groups;
93
98
 
94
99
  //! Whether or not any tuples were added to the HT
95
100
  bool is_empty;
@@ -146,7 +151,7 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState
146
151
  }
147
152
  D_ASSERT(gstate.finalized_hts.size() == 1);
148
153
  D_ASSERT(gstate.finalized_hts[0]);
149
- gstate.total_groups += gstate.finalized_hts[0]->AddChunk(group_chunk, payload_input, filter);
154
+ llstate.total_groups += gstate.finalized_hts[0]->AddChunk(group_chunk, payload_input, filter);
150
155
  return;
151
156
  }
152
157
 
@@ -160,9 +165,11 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState
160
165
  group_types, op.payload_types, op.bindings);
161
166
  }
162
167
 
163
- gstate.total_groups +=
164
- llstate.ht->AddChunk(group_chunk, payload_input,
165
- gstate.total_groups > radix_limit && gstate.partition_info.n_partitions > 1, filter);
168
+ llstate.total_groups += llstate.ht->AddChunk(group_chunk, payload_input,
169
+ gstate.partitioned && gstate.partition_info.n_partitions > 1, filter);
170
+ if (llstate.total_groups >= radix_limit) {
171
+ gstate.partitioned = true;
172
+ }
166
173
  }
167
174
 
168
175
  void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkState &state,
@@ -183,7 +190,7 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
183
190
  return; // no data
184
191
  }
185
192
 
186
- if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.total_groups > radix_limit) {
193
+ if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.partitioned) {
187
194
  llstate.ht->Partition();
188
195
  }
189
196
 
@@ -207,6 +207,15 @@ int64_t CastRules::ImplicitCast(const LogicalType &from, const LogicalType &to)
207
207
  // if aliases are different, an implicit cast is not possible
208
208
  return -1;
209
209
  }
210
+ if (from.id() == LogicalTypeId::LIST && to.id() == LogicalTypeId::LIST) {
211
+ // Lists can be cast if their child types can be cast
212
+ auto child_cost = ImplicitCast(ListType::GetChildType(from), ListType::GetChildType(to));
213
+ if (child_cost >= 100) {
214
+ // subtract one from the cost because we prefer LIST[X] -> LIST[VARCHAR] over LIST[X] -> VARCHAR
215
+ child_cost--;
216
+ }
217
+ return child_cost;
218
+ }
210
219
  if (from.id() == to.id()) {
211
220
  // arguments match: do nothing
212
221
  return 0;
@@ -219,10 +228,6 @@ int64_t CastRules::ImplicitCast(const LogicalType &from, const LogicalType &to)
219
228
  // everything can be cast to VARCHAR, but this cast has a high cost
220
229
  return TargetTypeCost(to);
221
230
  }
222
- if (from.id() == LogicalTypeId::LIST && to.id() == LogicalTypeId::LIST) {
223
- // Lists can be cast if their child types can be cast
224
- return ImplicitCast(ListType::GetChildType(from), ListType::GetChildType(to));
225
- }
226
231
 
227
232
  if (from.id() == LogicalTypeId::UNION && to.id() == LogicalTypeId::UNION) {
228
233
  // Unions can be cast if the source tags are a subset of the target tags
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev1138"
2
+ #define DUCKDB_VERSION "0.7.2-dev1146"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "41104b611e"
5
+ #define DUCKDB_SOURCE_ID "b8cf6a98e2"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -323,9 +323,12 @@ public:
323
323
  public:
324
324
  DUCKDB_API void Resize(idx_t old_size, idx_t new_size);
325
325
 
326
- DUCKDB_API void Slice(const ValidityMask &other, idx_t offset, idx_t end);
326
+ DUCKDB_API void SliceInPlace(const ValidityMask &other, idx_t target_offset, idx_t source_offset, idx_t count);
327
+ DUCKDB_API void Slice(const ValidityMask &other, idx_t source_offset, idx_t count);
327
328
  DUCKDB_API void Combine(const ValidityMask &other, idx_t count);
328
329
  DUCKDB_API string ToString(idx_t count) const;
330
+
331
+ DUCKDB_API static bool IsAligned(idx_t count);
329
332
  };
330
333
 
331
334
  } // namespace duckdb