duckdb 0.5.2-dev1468.0 → 0.5.2-dev1479.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +173 -20
- package/src/duckdb.hpp +5 -2
- package/src/parquet-amalgamation.cpp +33229 -33229
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -27954,6 +27954,16 @@ struct IntegerCastOperation {
|
|
|
27954
27954
|
return true;
|
|
27955
27955
|
}
|
|
27956
27956
|
|
|
27957
|
+
template <class T, bool NEGATIVE>
|
|
27958
|
+
static bool HandleHexDigit(T &state, uint8_t digit) {
|
|
27959
|
+
using result_t = typename T::Result;
|
|
27960
|
+
if (state.result > (NumericLimits<result_t>::Maximum() - digit) / 16) {
|
|
27961
|
+
return false;
|
|
27962
|
+
}
|
|
27963
|
+
state.result = state.result * 16 + digit;
|
|
27964
|
+
return true;
|
|
27965
|
+
}
|
|
27966
|
+
|
|
27957
27967
|
template <class T, bool NEGATIVE>
|
|
27958
27968
|
static bool HandleExponent(T &state, int32_t exponent) {
|
|
27959
27969
|
using result_t = typename T::Result;
|
|
@@ -28078,6 +28088,36 @@ static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict)
|
|
|
28078
28088
|
return pos > start_pos;
|
|
28079
28089
|
}
|
|
28080
28090
|
|
|
28091
|
+
template <class T, bool NEGATIVE, bool ALLOW_EXPONENT, class OP = IntegerCastOperation>
|
|
28092
|
+
static bool IntegerHexCastLoop(const char *buf, idx_t len, T &result, bool strict) {
|
|
28093
|
+
if (ALLOW_EXPONENT || NEGATIVE) {
|
|
28094
|
+
return false;
|
|
28095
|
+
}
|
|
28096
|
+
idx_t start_pos = 1;
|
|
28097
|
+
idx_t pos = start_pos;
|
|
28098
|
+
char current_char;
|
|
28099
|
+
while (pos < len) {
|
|
28100
|
+
current_char = StringUtil::CharacterToLower(buf[pos]);
|
|
28101
|
+
if (!StringUtil::CharacterIsHex(current_char)) {
|
|
28102
|
+
return false;
|
|
28103
|
+
}
|
|
28104
|
+
uint8_t digit;
|
|
28105
|
+
if (current_char >= 'a') {
|
|
28106
|
+
digit = current_char - 'a' + 10;
|
|
28107
|
+
} else {
|
|
28108
|
+
digit = current_char - '0';
|
|
28109
|
+
}
|
|
28110
|
+
pos++;
|
|
28111
|
+
if (!OP::template HandleHexDigit<T, NEGATIVE>(result, digit)) {
|
|
28112
|
+
return false;
|
|
28113
|
+
}
|
|
28114
|
+
}
|
|
28115
|
+
if (!OP::template Finalize<T, NEGATIVE>(result)) {
|
|
28116
|
+
return false;
|
|
28117
|
+
}
|
|
28118
|
+
return pos > start_pos;
|
|
28119
|
+
}
|
|
28120
|
+
|
|
28081
28121
|
template <class T, bool IS_SIGNED = true, bool ALLOW_EXPONENT = true, class OP = IntegerCastOperation,
|
|
28082
28122
|
bool ZERO_INITIALIZE = true>
|
|
28083
28123
|
static bool TryIntegerCast(const char *buf, idx_t len, T &result, bool strict) {
|
|
@@ -28091,11 +28131,21 @@ static bool TryIntegerCast(const char *buf, idx_t len, T &result, bool strict) {
|
|
|
28091
28131
|
}
|
|
28092
28132
|
int negative = *buf == '-';
|
|
28093
28133
|
|
|
28134
|
+
// If it starts with 0x or 0X, we parse it as a hex value
|
|
28135
|
+
int hex = len > 1 && *buf == '0' && (buf[1] == 'x' || buf[1] == 'X');
|
|
28136
|
+
|
|
28094
28137
|
if (ZERO_INITIALIZE) {
|
|
28095
28138
|
memset(&result, 0, sizeof(T));
|
|
28096
28139
|
}
|
|
28097
28140
|
if (!negative) {
|
|
28098
|
-
|
|
28141
|
+
if (hex) {
|
|
28142
|
+
// Skip the 0x
|
|
28143
|
+
buf++;
|
|
28144
|
+
len--;
|
|
28145
|
+
return IntegerHexCastLoop<T, false, false, OP>(buf, len, result, strict);
|
|
28146
|
+
} else {
|
|
28147
|
+
return IntegerCastLoop<T, false, ALLOW_EXPONENT, OP>(buf, len, result, strict);
|
|
28148
|
+
}
|
|
28099
28149
|
} else {
|
|
28100
28150
|
if (!IS_SIGNED) {
|
|
28101
28151
|
// Need to check if its not -0
|
|
@@ -28601,6 +28651,11 @@ struct HugeIntegerCastOperation {
|
|
|
28601
28651
|
return true;
|
|
28602
28652
|
}
|
|
28603
28653
|
|
|
28654
|
+
template <class T, bool NEGATIVE>
|
|
28655
|
+
static bool HandleHexDigit(T &result, uint8_t digit) {
|
|
28656
|
+
return false;
|
|
28657
|
+
}
|
|
28658
|
+
|
|
28604
28659
|
template <class T, bool NEGATIVE>
|
|
28605
28660
|
static bool HandleExponent(T &result, int32_t exponent) {
|
|
28606
28661
|
if (!result.Flush()) {
|
|
@@ -28696,6 +28751,11 @@ struct DecimalCastOperation {
|
|
|
28696
28751
|
return true;
|
|
28697
28752
|
}
|
|
28698
28753
|
|
|
28754
|
+
template <class T, bool NEGATIVE>
|
|
28755
|
+
static bool HandleHexDigit(T &state, uint8_t digit) {
|
|
28756
|
+
return false;
|
|
28757
|
+
}
|
|
28758
|
+
|
|
28699
28759
|
template <class T, bool NEGATIVE>
|
|
28700
28760
|
static void RoundUpResult(T &state) {
|
|
28701
28761
|
if (NEGATIVE) {
|
|
@@ -64951,10 +65011,9 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
|
|
|
64951
65011
|
|
|
64952
65012
|
//! REGULAR FINALIZE EVENT
|
|
64953
65013
|
|
|
64954
|
-
class
|
|
65014
|
+
class HashAggregateMergeEvent : public BasePipelineEvent {
|
|
64955
65015
|
public:
|
|
64956
|
-
|
|
64957
|
-
Pipeline *pipeline_p)
|
|
65016
|
+
HashAggregateMergeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p, Pipeline *pipeline_p)
|
|
64958
65017
|
: BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
|
|
64959
65018
|
}
|
|
64960
65019
|
|
|
@@ -64976,14 +65035,61 @@ public:
|
|
|
64976
65035
|
}
|
|
64977
65036
|
};
|
|
64978
65037
|
|
|
65038
|
+
//! REGULAR FINALIZE FROM DISTINCT FINALIZE
|
|
65039
|
+
|
|
65040
|
+
class HashAggregateFinalizeTask : public ExecutorTask {
|
|
65041
|
+
public:
|
|
65042
|
+
HashAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, HashAggregateGlobalState &state_p,
|
|
65043
|
+
ClientContext &context, const PhysicalHashAggregate &op)
|
|
65044
|
+
: ExecutorTask(pipeline.executor), pipeline(pipeline), event(move(event_p)), gstate(state_p), context(context),
|
|
65045
|
+
op(op) {
|
|
65046
|
+
}
|
|
65047
|
+
|
|
65048
|
+
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
|
|
65049
|
+
op.FinalizeInternal(pipeline, *event, context, gstate, false);
|
|
65050
|
+
D_ASSERT(!gstate.finished);
|
|
65051
|
+
gstate.finished = true;
|
|
65052
|
+
event->FinishTask();
|
|
65053
|
+
return TaskExecutionResult::TASK_FINISHED;
|
|
65054
|
+
}
|
|
65055
|
+
|
|
65056
|
+
private:
|
|
65057
|
+
Pipeline &pipeline;
|
|
65058
|
+
shared_ptr<Event> event;
|
|
65059
|
+
HashAggregateGlobalState &gstate;
|
|
65060
|
+
ClientContext &context;
|
|
65061
|
+
const PhysicalHashAggregate &op;
|
|
65062
|
+
};
|
|
65063
|
+
|
|
65064
|
+
class HashAggregateFinalizeEvent : public BasePipelineEvent {
|
|
65065
|
+
public:
|
|
65066
|
+
HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
|
|
65067
|
+
Pipeline *pipeline_p, ClientContext &context)
|
|
65068
|
+
: BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p), context(context) {
|
|
65069
|
+
}
|
|
65070
|
+
|
|
65071
|
+
const PhysicalHashAggregate &op;
|
|
65072
|
+
HashAggregateGlobalState &gstate;
|
|
65073
|
+
ClientContext &context;
|
|
65074
|
+
|
|
65075
|
+
public:
|
|
65076
|
+
void Schedule() override {
|
|
65077
|
+
vector<unique_ptr<Task>> tasks;
|
|
65078
|
+
tasks.push_back(make_unique<HashAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate, context, op));
|
|
65079
|
+
D_ASSERT(!tasks.empty());
|
|
65080
|
+
SetTasks(move(tasks));
|
|
65081
|
+
}
|
|
65082
|
+
};
|
|
65083
|
+
|
|
64979
65084
|
//! DISTINCT FINALIZE TASK
|
|
64980
65085
|
|
|
64981
65086
|
class HashDistinctAggregateFinalizeTask : public ExecutorTask {
|
|
64982
65087
|
public:
|
|
64983
65088
|
HashDistinctAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, HashAggregateGlobalState &state_p,
|
|
64984
|
-
ClientContext &context, const PhysicalHashAggregate &op
|
|
65089
|
+
ClientContext &context, const PhysicalHashAggregate &op,
|
|
65090
|
+
vector<vector<unique_ptr<GlobalSourceState>>> &global_sources_p)
|
|
64985
65091
|
: ExecutorTask(pipeline.executor), pipeline(pipeline), event(move(event_p)), gstate(state_p), context(context),
|
|
64986
|
-
op(op) {
|
|
65092
|
+
op(op), global_sources(global_sources_p) {
|
|
64987
65093
|
}
|
|
64988
65094
|
|
|
64989
65095
|
void AggregateDistinctGrouping(DistinctAggregateCollectionInfo &info,
|
|
@@ -64992,6 +65098,7 @@ public:
|
|
|
64992
65098
|
auto &aggregates = info.aggregates;
|
|
64993
65099
|
auto &data = *grouping_data.distinct_data;
|
|
64994
65100
|
auto &state = *grouping_state.distinct_state;
|
|
65101
|
+
auto &table_state = *grouping_state.table_state;
|
|
64995
65102
|
|
|
64996
65103
|
ThreadContext temp_thread_context(context);
|
|
64997
65104
|
ExecutionContext temp_exec_context(context, temp_thread_context, &pipeline);
|
|
@@ -65029,9 +65136,12 @@ public:
|
|
|
65029
65136
|
D_ASSERT(data.info.table_map.count(i));
|
|
65030
65137
|
auto table_idx = data.info.table_map.at(i);
|
|
65031
65138
|
auto &radix_table_p = data.radix_tables[table_idx];
|
|
65032
|
-
auto &output_chunk = *state.distinct_output_chunks[table_idx];
|
|
65033
65139
|
|
|
65034
|
-
|
|
65140
|
+
// Create a duplicate of the output_chunk, because of multi-threading we cant alter the original
|
|
65141
|
+
DataChunk output_chunk;
|
|
65142
|
+
output_chunk.Initialize(context, state.distinct_output_chunks[table_idx]->GetTypes());
|
|
65143
|
+
|
|
65144
|
+
auto &global_source = global_sources[grouping_idx][i];
|
|
65035
65145
|
auto local_source = radix_table_p->GetLocalSourceState(temp_exec_context);
|
|
65036
65146
|
|
|
65037
65147
|
// Fetch all the data from the aggregate ht, and Sink it into the main ht
|
|
@@ -65063,11 +65173,11 @@ public:
|
|
|
65063
65173
|
aggregate_input_chunk.SetCardinality(output_chunk);
|
|
65064
65174
|
|
|
65065
65175
|
// Sink it into the main ht
|
|
65066
|
-
grouping_data.table_data.Sink(temp_exec_context,
|
|
65067
|
-
|
|
65176
|
+
grouping_data.table_data.Sink(temp_exec_context, table_state, *temp_local_state, group_chunk,
|
|
65177
|
+
aggregate_input_chunk, {i});
|
|
65068
65178
|
}
|
|
65069
65179
|
}
|
|
65070
|
-
grouping_data.table_data.Combine(temp_exec_context,
|
|
65180
|
+
grouping_data.table_data.Combine(temp_exec_context, table_state, *temp_local_state);
|
|
65071
65181
|
}
|
|
65072
65182
|
|
|
65073
65183
|
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
|
|
@@ -65078,9 +65188,6 @@ public:
|
|
|
65078
65188
|
auto &grouping_state = gstate.grouping_states[i];
|
|
65079
65189
|
AggregateDistinctGrouping(info, grouping, grouping_state, i);
|
|
65080
65190
|
}
|
|
65081
|
-
op.FinalizeInternal(pipeline, *event, context, gstate, false);
|
|
65082
|
-
D_ASSERT(!gstate.finished);
|
|
65083
|
-
gstate.finished = true;
|
|
65084
65191
|
event->FinishTask();
|
|
65085
65192
|
return TaskExecutionResult::TASK_FINISHED;
|
|
65086
65193
|
}
|
|
@@ -65091,6 +65198,7 @@ private:
|
|
|
65091
65198
|
HashAggregateGlobalState &gstate;
|
|
65092
65199
|
ClientContext &context;
|
|
65093
65200
|
const PhysicalHashAggregate &op;
|
|
65201
|
+
vector<vector<unique_ptr<GlobalSourceState>>> &global_sources;
|
|
65094
65202
|
};
|
|
65095
65203
|
|
|
65096
65204
|
//! DISTINCT FINALIZE EVENT
|
|
@@ -65105,15 +65213,60 @@ public:
|
|
|
65105
65213
|
const PhysicalHashAggregate &op;
|
|
65106
65214
|
HashAggregateGlobalState &gstate;
|
|
65107
65215
|
ClientContext &context;
|
|
65216
|
+
//! The GlobalSourceStates for all the radix tables of the distinct aggregates
|
|
65217
|
+
vector<vector<unique_ptr<GlobalSourceState>>> global_sources;
|
|
65108
65218
|
|
|
65109
65219
|
public:
|
|
65110
65220
|
void Schedule() override {
|
|
65221
|
+
global_sources = CreateGlobalSources();
|
|
65222
|
+
|
|
65111
65223
|
vector<unique_ptr<Task>> tasks;
|
|
65112
|
-
|
|
65113
|
-
|
|
65224
|
+
auto &scheduler = TaskScheduler::GetScheduler(context);
|
|
65225
|
+
auto number_of_threads = scheduler.NumberOfThreads();
|
|
65226
|
+
tasks.reserve(number_of_threads);
|
|
65227
|
+
for (int32_t i = 0; i < number_of_threads; i++) {
|
|
65228
|
+
tasks.push_back(make_unique<HashDistinctAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate,
|
|
65229
|
+
context, op, global_sources));
|
|
65230
|
+
}
|
|
65114
65231
|
D_ASSERT(!tasks.empty());
|
|
65115
65232
|
SetTasks(move(tasks));
|
|
65116
65233
|
}
|
|
65234
|
+
|
|
65235
|
+
void FinishEvent() override {
|
|
65236
|
+
//! Now that everything is added to the main ht, we can actually finalize
|
|
65237
|
+
auto new_event = make_shared<HashAggregateFinalizeEvent>(op, gstate, pipeline.get(), context);
|
|
65238
|
+
this->InsertEvent(move(new_event));
|
|
65239
|
+
}
|
|
65240
|
+
|
|
65241
|
+
private:
|
|
65242
|
+
vector<vector<unique_ptr<GlobalSourceState>>> CreateGlobalSources() {
|
|
65243
|
+
vector<vector<unique_ptr<GlobalSourceState>>> grouping_sources;
|
|
65244
|
+
grouping_sources.reserve(op.groupings.size());
|
|
65245
|
+
for (idx_t grouping_idx = 0; grouping_idx < op.groupings.size(); grouping_idx++) {
|
|
65246
|
+
auto &grouping = op.groupings[grouping_idx];
|
|
65247
|
+
auto &data = *grouping.distinct_data;
|
|
65248
|
+
|
|
65249
|
+
vector<unique_ptr<GlobalSourceState>> aggregate_sources;
|
|
65250
|
+
aggregate_sources.reserve(op.grouped_aggregate_data.aggregates.size());
|
|
65251
|
+
|
|
65252
|
+
for (idx_t i = 0; i < op.grouped_aggregate_data.aggregates.size(); i++) {
|
|
65253
|
+
auto &aggregate = op.grouped_aggregate_data.aggregates[i];
|
|
65254
|
+
auto &aggr = (BoundAggregateExpression &)*aggregate;
|
|
65255
|
+
|
|
65256
|
+
if (!aggr.IsDistinct()) {
|
|
65257
|
+
aggregate_sources.push_back(nullptr);
|
|
65258
|
+
continue;
|
|
65259
|
+
}
|
|
65260
|
+
|
|
65261
|
+
D_ASSERT(data.info.table_map.count(i));
|
|
65262
|
+
auto table_idx = data.info.table_map.at(i);
|
|
65263
|
+
auto &radix_table_p = data.radix_tables[table_idx];
|
|
65264
|
+
aggregate_sources.push_back(radix_table_p->GetGlobalSourceState(context));
|
|
65265
|
+
}
|
|
65266
|
+
grouping_sources.push_back(move(aggregate_sources));
|
|
65267
|
+
}
|
|
65268
|
+
return grouping_sources;
|
|
65269
|
+
}
|
|
65117
65270
|
};
|
|
65118
65271
|
|
|
65119
65272
|
//! DISTINCT COMBINE EVENT
|
|
@@ -65145,12 +65298,12 @@ public:
|
|
|
65145
65298
|
}
|
|
65146
65299
|
}
|
|
65147
65300
|
|
|
65301
|
+
D_ASSERT(!tasks.empty());
|
|
65302
|
+
SetTasks(move(tasks));
|
|
65303
|
+
|
|
65148
65304
|
//! Now that all tables are combined, it's time to do the distinct aggregations
|
|
65149
65305
|
auto new_event = make_shared<HashDistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
|
|
65150
65306
|
this->InsertEvent(move(new_event));
|
|
65151
|
-
|
|
65152
|
-
D_ASSERT(!tasks.empty());
|
|
65153
|
-
SetTasks(move(tasks));
|
|
65154
65307
|
}
|
|
65155
65308
|
};
|
|
65156
65309
|
|
|
@@ -65214,7 +65367,7 @@ SinkFinalizeType PhysicalHashAggregate::FinalizeInternal(Pipeline &pipeline, Eve
|
|
|
65214
65367
|
}
|
|
65215
65368
|
}
|
|
65216
65369
|
if (any_partitioned) {
|
|
65217
|
-
auto new_event = make_shared<
|
|
65370
|
+
auto new_event = make_shared<HashAggregateMergeEvent>(*this, gstate, &pipeline);
|
|
65218
65371
|
event.InsertEvent(move(new_event));
|
|
65219
65372
|
}
|
|
65220
65373
|
return SinkFinalizeType::READY;
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "d95b0d50d3"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev1479"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -6279,6 +6279,9 @@ public:
|
|
|
6279
6279
|
DUCKDB_API static bool CharacterIsDigit(char c) {
|
|
6280
6280
|
return c >= '0' && c <= '9';
|
|
6281
6281
|
}
|
|
6282
|
+
DUCKDB_API static bool CharacterIsHex(char c) {
|
|
6283
|
+
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
|
|
6284
|
+
}
|
|
6282
6285
|
DUCKDB_API static char CharacterToLower(char c) {
|
|
6283
6286
|
if (c >= 'A' && c <= 'Z') {
|
|
6284
6287
|
return c - ('A' - 'a');
|