duckdb 0.5.2-dev1468.0 → 0.5.2-dev1479.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1468.0",
5
+ "version": "0.5.2-dev1479.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -27954,6 +27954,16 @@ struct IntegerCastOperation {
27954
27954
  return true;
27955
27955
  }
27956
27956
 
27957
+ template <class T, bool NEGATIVE>
27958
+ static bool HandleHexDigit(T &state, uint8_t digit) {
27959
+ using result_t = typename T::Result;
27960
+ if (state.result > (NumericLimits<result_t>::Maximum() - digit) / 16) {
27961
+ return false;
27962
+ }
27963
+ state.result = state.result * 16 + digit;
27964
+ return true;
27965
+ }
27966
+
27957
27967
  template <class T, bool NEGATIVE>
27958
27968
  static bool HandleExponent(T &state, int32_t exponent) {
27959
27969
  using result_t = typename T::Result;
@@ -28078,6 +28088,36 @@ static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict)
28078
28088
  return pos > start_pos;
28079
28089
  }
28080
28090
 
28091
+ template <class T, bool NEGATIVE, bool ALLOW_EXPONENT, class OP = IntegerCastOperation>
28092
+ static bool IntegerHexCastLoop(const char *buf, idx_t len, T &result, bool strict) {
28093
+ if (ALLOW_EXPONENT || NEGATIVE) {
28094
+ return false;
28095
+ }
28096
+ idx_t start_pos = 1;
28097
+ idx_t pos = start_pos;
28098
+ char current_char;
28099
+ while (pos < len) {
28100
+ current_char = StringUtil::CharacterToLower(buf[pos]);
28101
+ if (!StringUtil::CharacterIsHex(current_char)) {
28102
+ return false;
28103
+ }
28104
+ uint8_t digit;
28105
+ if (current_char >= 'a') {
28106
+ digit = current_char - 'a' + 10;
28107
+ } else {
28108
+ digit = current_char - '0';
28109
+ }
28110
+ pos++;
28111
+ if (!OP::template HandleHexDigit<T, NEGATIVE>(result, digit)) {
28112
+ return false;
28113
+ }
28114
+ }
28115
+ if (!OP::template Finalize<T, NEGATIVE>(result)) {
28116
+ return false;
28117
+ }
28118
+ return pos > start_pos;
28119
+ }
28120
+
28081
28121
  template <class T, bool IS_SIGNED = true, bool ALLOW_EXPONENT = true, class OP = IntegerCastOperation,
28082
28122
  bool ZERO_INITIALIZE = true>
28083
28123
  static bool TryIntegerCast(const char *buf, idx_t len, T &result, bool strict) {
@@ -28091,11 +28131,21 @@ static bool TryIntegerCast(const char *buf, idx_t len, T &result, bool strict) {
28091
28131
  }
28092
28132
  int negative = *buf == '-';
28093
28133
 
28134
+ // If it starts with 0x or 0X, we parse it as a hex value
28135
+ int hex = len > 1 && *buf == '0' && (buf[1] == 'x' || buf[1] == 'X');
28136
+
28094
28137
  if (ZERO_INITIALIZE) {
28095
28138
  memset(&result, 0, sizeof(T));
28096
28139
  }
28097
28140
  if (!negative) {
28098
- return IntegerCastLoop<T, false, ALLOW_EXPONENT, OP>(buf, len, result, strict);
28141
+ if (hex) {
28142
+ // Skip the 0x
28143
+ buf++;
28144
+ len--;
28145
+ return IntegerHexCastLoop<T, false, false, OP>(buf, len, result, strict);
28146
+ } else {
28147
+ return IntegerCastLoop<T, false, ALLOW_EXPONENT, OP>(buf, len, result, strict);
28148
+ }
28099
28149
  } else {
28100
28150
  if (!IS_SIGNED) {
28101
28151
  // Need to check if its not -0
@@ -28601,6 +28651,11 @@ struct HugeIntegerCastOperation {
28601
28651
  return true;
28602
28652
  }
28603
28653
 
28654
+ template <class T, bool NEGATIVE>
28655
+ static bool HandleHexDigit(T &result, uint8_t digit) {
28656
+ return false;
28657
+ }
28658
+
28604
28659
  template <class T, bool NEGATIVE>
28605
28660
  static bool HandleExponent(T &result, int32_t exponent) {
28606
28661
  if (!result.Flush()) {
@@ -28696,6 +28751,11 @@ struct DecimalCastOperation {
28696
28751
  return true;
28697
28752
  }
28698
28753
 
28754
+ template <class T, bool NEGATIVE>
28755
+ static bool HandleHexDigit(T &state, uint8_t digit) {
28756
+ return false;
28757
+ }
28758
+
28699
28759
  template <class T, bool NEGATIVE>
28700
28760
  static void RoundUpResult(T &state) {
28701
28761
  if (NEGATIVE) {
@@ -64951,10 +65011,9 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
64951
65011
 
64952
65012
  //! REGULAR FINALIZE EVENT
64953
65013
 
64954
- class HashAggregateFinalizeEvent : public BasePipelineEvent {
65014
+ class HashAggregateMergeEvent : public BasePipelineEvent {
64955
65015
  public:
64956
- HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
64957
- Pipeline *pipeline_p)
65016
+ HashAggregateMergeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p, Pipeline *pipeline_p)
64958
65017
  : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
64959
65018
  }
64960
65019
 
@@ -64976,14 +65035,61 @@ public:
64976
65035
  }
64977
65036
  };
64978
65037
 
65038
+ //! REGULAR FINALIZE FROM DISTINCT FINALIZE
65039
+
65040
+ class HashAggregateFinalizeTask : public ExecutorTask {
65041
+ public:
65042
+ HashAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, HashAggregateGlobalState &state_p,
65043
+ ClientContext &context, const PhysicalHashAggregate &op)
65044
+ : ExecutorTask(pipeline.executor), pipeline(pipeline), event(move(event_p)), gstate(state_p), context(context),
65045
+ op(op) {
65046
+ }
65047
+
65048
+ TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
65049
+ op.FinalizeInternal(pipeline, *event, context, gstate, false);
65050
+ D_ASSERT(!gstate.finished);
65051
+ gstate.finished = true;
65052
+ event->FinishTask();
65053
+ return TaskExecutionResult::TASK_FINISHED;
65054
+ }
65055
+
65056
+ private:
65057
+ Pipeline &pipeline;
65058
+ shared_ptr<Event> event;
65059
+ HashAggregateGlobalState &gstate;
65060
+ ClientContext &context;
65061
+ const PhysicalHashAggregate &op;
65062
+ };
65063
+
65064
+ class HashAggregateFinalizeEvent : public BasePipelineEvent {
65065
+ public:
65066
+ HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
65067
+ Pipeline *pipeline_p, ClientContext &context)
65068
+ : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p), context(context) {
65069
+ }
65070
+
65071
+ const PhysicalHashAggregate &op;
65072
+ HashAggregateGlobalState &gstate;
65073
+ ClientContext &context;
65074
+
65075
+ public:
65076
+ void Schedule() override {
65077
+ vector<unique_ptr<Task>> tasks;
65078
+ tasks.push_back(make_unique<HashAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate, context, op));
65079
+ D_ASSERT(!tasks.empty());
65080
+ SetTasks(move(tasks));
65081
+ }
65082
+ };
65083
+
64979
65084
  //! DISTINCT FINALIZE TASK
64980
65085
 
64981
65086
  class HashDistinctAggregateFinalizeTask : public ExecutorTask {
64982
65087
  public:
64983
65088
  HashDistinctAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, HashAggregateGlobalState &state_p,
64984
- ClientContext &context, const PhysicalHashAggregate &op)
65089
+ ClientContext &context, const PhysicalHashAggregate &op,
65090
+ vector<vector<unique_ptr<GlobalSourceState>>> &global_sources_p)
64985
65091
  : ExecutorTask(pipeline.executor), pipeline(pipeline), event(move(event_p)), gstate(state_p), context(context),
64986
- op(op) {
65092
+ op(op), global_sources(global_sources_p) {
64987
65093
  }
64988
65094
 
64989
65095
  void AggregateDistinctGrouping(DistinctAggregateCollectionInfo &info,
@@ -64992,6 +65098,7 @@ public:
64992
65098
  auto &aggregates = info.aggregates;
64993
65099
  auto &data = *grouping_data.distinct_data;
64994
65100
  auto &state = *grouping_state.distinct_state;
65101
+ auto &table_state = *grouping_state.table_state;
64995
65102
 
64996
65103
  ThreadContext temp_thread_context(context);
64997
65104
  ExecutionContext temp_exec_context(context, temp_thread_context, &pipeline);
@@ -65029,9 +65136,12 @@ public:
65029
65136
  D_ASSERT(data.info.table_map.count(i));
65030
65137
  auto table_idx = data.info.table_map.at(i);
65031
65138
  auto &radix_table_p = data.radix_tables[table_idx];
65032
- auto &output_chunk = *state.distinct_output_chunks[table_idx];
65033
65139
 
65034
- auto global_source = radix_table_p->GetGlobalSourceState(context);
65140
+ // Create a duplicate of the output_chunk, because of multi-threading we cant alter the original
65141
+ DataChunk output_chunk;
65142
+ output_chunk.Initialize(context, state.distinct_output_chunks[table_idx]->GetTypes());
65143
+
65144
+ auto &global_source = global_sources[grouping_idx][i];
65035
65145
  auto local_source = radix_table_p->GetLocalSourceState(temp_exec_context);
65036
65146
 
65037
65147
  // Fetch all the data from the aggregate ht, and Sink it into the main ht
@@ -65063,11 +65173,11 @@ public:
65063
65173
  aggregate_input_chunk.SetCardinality(output_chunk);
65064
65174
 
65065
65175
  // Sink it into the main ht
65066
- grouping_data.table_data.Sink(temp_exec_context, *grouping_state.table_state, *temp_local_state,
65067
- group_chunk, aggregate_input_chunk, {i});
65176
+ grouping_data.table_data.Sink(temp_exec_context, table_state, *temp_local_state, group_chunk,
65177
+ aggregate_input_chunk, {i});
65068
65178
  }
65069
65179
  }
65070
- grouping_data.table_data.Combine(temp_exec_context, *grouping_state.table_state, *temp_local_state);
65180
+ grouping_data.table_data.Combine(temp_exec_context, table_state, *temp_local_state);
65071
65181
  }
65072
65182
 
65073
65183
  TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
@@ -65078,9 +65188,6 @@ public:
65078
65188
  auto &grouping_state = gstate.grouping_states[i];
65079
65189
  AggregateDistinctGrouping(info, grouping, grouping_state, i);
65080
65190
  }
65081
- op.FinalizeInternal(pipeline, *event, context, gstate, false);
65082
- D_ASSERT(!gstate.finished);
65083
- gstate.finished = true;
65084
65191
  event->FinishTask();
65085
65192
  return TaskExecutionResult::TASK_FINISHED;
65086
65193
  }
@@ -65091,6 +65198,7 @@ private:
65091
65198
  HashAggregateGlobalState &gstate;
65092
65199
  ClientContext &context;
65093
65200
  const PhysicalHashAggregate &op;
65201
+ vector<vector<unique_ptr<GlobalSourceState>>> &global_sources;
65094
65202
  };
65095
65203
 
65096
65204
  //! DISTINCT FINALIZE EVENT
@@ -65105,15 +65213,60 @@ public:
65105
65213
  const PhysicalHashAggregate &op;
65106
65214
  HashAggregateGlobalState &gstate;
65107
65215
  ClientContext &context;
65216
+ //! The GlobalSourceStates for all the radix tables of the distinct aggregates
65217
+ vector<vector<unique_ptr<GlobalSourceState>>> global_sources;
65108
65218
 
65109
65219
  public:
65110
65220
  void Schedule() override {
65221
+ global_sources = CreateGlobalSources();
65222
+
65111
65223
  vector<unique_ptr<Task>> tasks;
65112
- tasks.push_back(
65113
- make_unique<HashDistinctAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate, context, op));
65224
+ auto &scheduler = TaskScheduler::GetScheduler(context);
65225
+ auto number_of_threads = scheduler.NumberOfThreads();
65226
+ tasks.reserve(number_of_threads);
65227
+ for (int32_t i = 0; i < number_of_threads; i++) {
65228
+ tasks.push_back(make_unique<HashDistinctAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate,
65229
+ context, op, global_sources));
65230
+ }
65114
65231
  D_ASSERT(!tasks.empty());
65115
65232
  SetTasks(move(tasks));
65116
65233
  }
65234
+
65235
+ void FinishEvent() override {
65236
+ //! Now that everything is added to the main ht, we can actually finalize
65237
+ auto new_event = make_shared<HashAggregateFinalizeEvent>(op, gstate, pipeline.get(), context);
65238
+ this->InsertEvent(move(new_event));
65239
+ }
65240
+
65241
+ private:
65242
+ vector<vector<unique_ptr<GlobalSourceState>>> CreateGlobalSources() {
65243
+ vector<vector<unique_ptr<GlobalSourceState>>> grouping_sources;
65244
+ grouping_sources.reserve(op.groupings.size());
65245
+ for (idx_t grouping_idx = 0; grouping_idx < op.groupings.size(); grouping_idx++) {
65246
+ auto &grouping = op.groupings[grouping_idx];
65247
+ auto &data = *grouping.distinct_data;
65248
+
65249
+ vector<unique_ptr<GlobalSourceState>> aggregate_sources;
65250
+ aggregate_sources.reserve(op.grouped_aggregate_data.aggregates.size());
65251
+
65252
+ for (idx_t i = 0; i < op.grouped_aggregate_data.aggregates.size(); i++) {
65253
+ auto &aggregate = op.grouped_aggregate_data.aggregates[i];
65254
+ auto &aggr = (BoundAggregateExpression &)*aggregate;
65255
+
65256
+ if (!aggr.IsDistinct()) {
65257
+ aggregate_sources.push_back(nullptr);
65258
+ continue;
65259
+ }
65260
+
65261
+ D_ASSERT(data.info.table_map.count(i));
65262
+ auto table_idx = data.info.table_map.at(i);
65263
+ auto &radix_table_p = data.radix_tables[table_idx];
65264
+ aggregate_sources.push_back(radix_table_p->GetGlobalSourceState(context));
65265
+ }
65266
+ grouping_sources.push_back(move(aggregate_sources));
65267
+ }
65268
+ return grouping_sources;
65269
+ }
65117
65270
  };
65118
65271
 
65119
65272
  //! DISTINCT COMBINE EVENT
@@ -65145,12 +65298,12 @@ public:
65145
65298
  }
65146
65299
  }
65147
65300
 
65301
+ D_ASSERT(!tasks.empty());
65302
+ SetTasks(move(tasks));
65303
+
65148
65304
  //! Now that all tables are combined, it's time to do the distinct aggregations
65149
65305
  auto new_event = make_shared<HashDistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
65150
65306
  this->InsertEvent(move(new_event));
65151
-
65152
- D_ASSERT(!tasks.empty());
65153
- SetTasks(move(tasks));
65154
65307
  }
65155
65308
  };
65156
65309
 
@@ -65214,7 +65367,7 @@ SinkFinalizeType PhysicalHashAggregate::FinalizeInternal(Pipeline &pipeline, Eve
65214
65367
  }
65215
65368
  }
65216
65369
  if (any_partitioned) {
65217
- auto new_event = make_shared<HashAggregateFinalizeEvent>(*this, gstate, &pipeline);
65370
+ auto new_event = make_shared<HashAggregateMergeEvent>(*this, gstate, &pipeline);
65218
65371
  event.InsertEvent(move(new_event));
65219
65372
  }
65220
65373
  return SinkFinalizeType::READY;
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "4f495e8eea"
15
- #define DUCKDB_VERSION "v0.5.2-dev1468"
14
+ #define DUCKDB_SOURCE_ID "d95b0d50d3"
15
+ #define DUCKDB_VERSION "v0.5.2-dev1479"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -6279,6 +6279,9 @@ public:
6279
6279
  DUCKDB_API static bool CharacterIsDigit(char c) {
6280
6280
  return c >= '0' && c <= '9';
6281
6281
  }
6282
+ DUCKDB_API static bool CharacterIsHex(char c) {
6283
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
6284
+ }
6282
6285
  DUCKDB_API static char CharacterToLower(char c) {
6283
6286
  if (c >= 'A' && c <= 'Z') {
6284
6287
  return c - ('A' - 'a');