duckdb 0.3.5-dev673.0 → 0.3.5-dev699.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.3.5-dev673.0",
4
+ "version": "0.3.5-dev699.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -18728,6 +18728,7 @@ duckdb::string_t StringCastTZ::Operation(timestamp_t input, Vector &result);
18728
18728
 
18729
18729
 
18730
18730
 
18731
+ #include <cmath>
18731
18732
 
18732
18733
  namespace duckdb {
18733
18734
 
@@ -18786,10 +18787,21 @@ bool TryCastWithOverflowCheckFloat(SRC value, T &result, SRC min, SRC max) {
18786
18787
  if (!(value >= min && value < max)) {
18787
18788
  return false;
18788
18789
  }
18789
- result = T(value);
18790
+ // PG FLOAT => INT casts use statistical rounding.
18791
+ result = std::nearbyint(value);
18790
18792
  return true;
18791
18793
  }
18792
18794
 
18795
+ template <>
18796
+ bool TryCastWithOverflowCheck(float value, int8_t &result) {
18797
+ return TryCastWithOverflowCheckFloat<float, int8_t>(value, result, -128.0f, 128.0f);
18798
+ }
18799
+
18800
+ template <>
18801
+ bool TryCastWithOverflowCheck(float value, int16_t &result) {
18802
+ return TryCastWithOverflowCheckFloat<float, int16_t>(value, result, -32768.0f, 32768.0f);
18803
+ }
18804
+
18793
18805
  template <>
18794
18806
  bool TryCastWithOverflowCheck(float value, int32_t &result) {
18795
18807
  return TryCastWithOverflowCheckFloat<float, int32_t>(value, result, -2147483648.0f, 2147483648.0f);
@@ -18801,6 +18813,21 @@ bool TryCastWithOverflowCheck(float value, int64_t &result) {
18801
18813
  9223372036854775808.0f);
18802
18814
  }
18803
18815
 
18816
+ template <>
18817
+ bool TryCastWithOverflowCheck(double value, int8_t &result) {
18818
+ return TryCastWithOverflowCheckFloat<double, int8_t>(value, result, -128.0, 128.0);
18819
+ }
18820
+
18821
+ template <>
18822
+ bool TryCastWithOverflowCheck(double value, int16_t &result) {
18823
+ return TryCastWithOverflowCheckFloat<double, int16_t>(value, result, -32768.0, 32768.0);
18824
+ }
18825
+
18826
+ template <>
18827
+ bool TryCastWithOverflowCheck(double value, int32_t &result) {
18828
+ return TryCastWithOverflowCheckFloat<double, int32_t>(value, result, -2147483648.0, 2147483648.0);
18829
+ }
18830
+
18804
18831
  template <>
18805
18832
  bool TryCastWithOverflowCheck(double value, int64_t &result) {
18806
18833
  return TryCastWithOverflowCheckFloat<double, int64_t>(value, result, -9223372036854775808.0, 9223372036854775808.0);
@@ -19026,12 +19053,12 @@ bool TryCastWithOverflowCheck(uint64_t value, hugeint_t &result) {
19026
19053
 
19027
19054
  template <>
19028
19055
  bool TryCastWithOverflowCheck(float value, hugeint_t &result) {
19029
- return Hugeint::TryConvert(value, result);
19056
+ return Hugeint::TryConvert(std::nearbyintf(value), result);
19030
19057
  }
19031
19058
 
19032
19059
  template <>
19033
19060
  bool TryCastWithOverflowCheck(double value, hugeint_t &result) {
19034
- return Hugeint::TryConvert(value, result);
19061
+ return Hugeint::TryConvert(std::nearbyint(value), result);
19035
19062
  }
19036
19063
 
19037
19064
  template <>
@@ -23282,7 +23309,7 @@ struct IntegerCastOperation {
23282
23309
  if (dbl_res < NumericLimits<result_t>::Minimum() || dbl_res > NumericLimits<result_t>::Maximum()) {
23283
23310
  return false;
23284
23311
  }
23285
- state.result = (result_t)dbl_res;
23312
+ state.result = (result_t)std::nearbyint(dbl_res);
23286
23313
  return true;
23287
23314
  }
23288
23315
 
@@ -41269,11 +41296,11 @@ inline uint64_t TemplatedHash(const string_t &elem) {
41269
41296
  data_ptr_t data = (data_ptr_t)elem.GetDataUnsafe();
41270
41297
  const auto &len = elem.GetSize();
41271
41298
  uint64_t h = 0;
41272
- for (idx_t i = 0; i < len / 8; i += 8) {
41299
+ for (idx_t i = 0; i + sizeof(uint64_t) <= len; i += sizeof(uint64_t)) {
41273
41300
  h ^= TemplatedHash<uint64_t>(Load<uint64_t>(data));
41274
- data += 8;
41301
+ data += sizeof(uint64_t);
41275
41302
  }
41276
- switch (len & 7) {
41303
+ switch (len & (sizeof(uint64_t) - 1)) {
41277
41304
  case 4:
41278
41305
  h ^= TemplatedHash<uint32_t>(Load<uint32_t>(data));
41279
41306
  break;
@@ -104365,6 +104392,31 @@ LogicalType GetArrowLogicalType(ArrowSchema &schema,
104365
104392
  }
104366
104393
  }
104367
104394
 
104395
+ // Renames repeated columns and case sensitive columns
104396
+ void RenameArrowColumns(vector<string> &names) {
104397
+ unordered_map<string, idx_t> name_map;
104398
+ for (auto &column_name : names) {
104399
+ // put it all lower_case
104400
+ auto low_column_name = StringUtil::Lower(column_name);
104401
+ if (name_map.find(low_column_name) == name_map.end()) {
104402
+ // Name does not exist yet
104403
+ name_map[low_column_name]++;
104404
+ } else {
104405
+ // Name already exists, we add _x where x is the repetition number
104406
+ string new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]);
104407
+ auto new_column_name_low = StringUtil::Lower(new_column_name);
104408
+ while (name_map.find(new_column_name_low) != name_map.end()) {
104409
+ // This name is already here due to a previous definition
104410
+ name_map[low_column_name]++;
104411
+ new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]);
104412
+ new_column_name_low = StringUtil::Lower(new_column_name);
104413
+ }
104414
+ column_name = new_column_name;
104415
+ name_map[new_column_name_low]++;
104416
+ }
104417
+ }
104418
+ }
104419
+
104368
104420
  unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
104369
104421
  vector<LogicalType> &return_types, vector<string> &names) {
104370
104422
  typedef unique_ptr<ArrowArrayStreamWrapper> (*stream_factory_produce_t)(
@@ -104408,6 +104460,7 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
104408
104460
  }
104409
104461
  names.push_back(name);
104410
104462
  }
104463
+ RenameArrowColumns(names);
104411
104464
  return move(res);
104412
104465
  }
104413
104466
 
@@ -181002,6 +181055,8 @@ void BaseStatistics::Verify(Vector &vector, idx_t count) const {
181002
181055
 
181003
181056
 
181004
181057
 
181058
+ #include <math.h>
181059
+
181005
181060
  namespace duckdb {
181006
181061
 
181007
181062
  DistinctStatistics::DistinctStatistics()
@@ -181062,7 +181117,7 @@ void DistinctStatistics::Update(VectorData &vdata, const LogicalType &type, idx_
181062
181117
  return;
181063
181118
  }
181064
181119
  total_count += count;
181065
- count = MaxValue<idx_t>(idx_t(SAMPLE_RATE * double(count)), 1);
181120
+ count = MinValue<idx_t>(idx_t(SAMPLE_RATE * MaxValue<idx_t>(STANDARD_VECTOR_SIZE, count)), count);
181066
181121
  sample_count += count;
181067
181122
 
181068
181123
  uint64_t indices[STANDARD_VECTOR_SIZE];
@@ -181080,12 +181135,17 @@ idx_t DistinctStatistics::GetCount() const {
181080
181135
  if (sample_count == 0 || total_count == 0) {
181081
181136
  return 0;
181082
181137
  }
181083
- // Estimate HLL count because we use sampling
181084
- double hll_count = log->Count();
181085
- double unique_proportion = hll_count / double(sample_count);
181086
- double actual_sample_rate = double(sample_count) / double(total_count);
181087
- double multiplier = double(1) + unique_proportion * (double(1) / actual_sample_rate - double(1));
181088
- return idx_t(multiplier * hll_count);
181138
+
181139
+ double u = MinValue<idx_t>(log->Count(), sample_count);
181140
+ double s = sample_count;
181141
+ double n = total_count;
181142
+
181143
+ // Assume this proportion of the the sampled values occurred only once
181144
+ double u1 = pow(u / s, 2) * u;
181145
+
181146
+ // Estimate total uniques using Good Turing Estimation
181147
+ idx_t estimate = u + u1 / s * (n - s);
181148
+ return MinValue<idx_t>(estimate, total_count);
181089
181149
  }
181090
181150
 
181091
181151
  } // namespace duckdb
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "064847033"
15
- #define DUCKDB_VERSION "v0.3.5-dev673"
14
+ #define DUCKDB_SOURCE_ID "360aedc4f"
15
+ #define DUCKDB_VERSION "v0.3.5-dev699"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //