duckdb 0.3.5-dev673.0 → 0.3.5-dev699.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +74 -14
- package/src/duckdb.hpp +2 -2
- package/src/parquet-amalgamation.cpp +34534 -34534
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -18728,6 +18728,7 @@ duckdb::string_t StringCastTZ::Operation(timestamp_t input, Vector &result);
|
|
|
18728
18728
|
|
|
18729
18729
|
|
|
18730
18730
|
|
|
18731
|
+
#include <cmath>
|
|
18731
18732
|
|
|
18732
18733
|
namespace duckdb {
|
|
18733
18734
|
|
|
@@ -18786,10 +18787,21 @@ bool TryCastWithOverflowCheckFloat(SRC value, T &result, SRC min, SRC max) {
|
|
|
18786
18787
|
if (!(value >= min && value < max)) {
|
|
18787
18788
|
return false;
|
|
18788
18789
|
}
|
|
18789
|
-
|
|
18790
|
+
// PG FLOAT => INT casts use statistical rounding.
|
|
18791
|
+
result = std::nearbyint(value);
|
|
18790
18792
|
return true;
|
|
18791
18793
|
}
|
|
18792
18794
|
|
|
18795
|
+
template <>
|
|
18796
|
+
bool TryCastWithOverflowCheck(float value, int8_t &result) {
|
|
18797
|
+
return TryCastWithOverflowCheckFloat<float, int8_t>(value, result, -128.0f, 128.0f);
|
|
18798
|
+
}
|
|
18799
|
+
|
|
18800
|
+
template <>
|
|
18801
|
+
bool TryCastWithOverflowCheck(float value, int16_t &result) {
|
|
18802
|
+
return TryCastWithOverflowCheckFloat<float, int16_t>(value, result, -32768.0f, 32768.0f);
|
|
18803
|
+
}
|
|
18804
|
+
|
|
18793
18805
|
template <>
|
|
18794
18806
|
bool TryCastWithOverflowCheck(float value, int32_t &result) {
|
|
18795
18807
|
return TryCastWithOverflowCheckFloat<float, int32_t>(value, result, -2147483648.0f, 2147483648.0f);
|
|
@@ -18801,6 +18813,21 @@ bool TryCastWithOverflowCheck(float value, int64_t &result) {
|
|
|
18801
18813
|
9223372036854775808.0f);
|
|
18802
18814
|
}
|
|
18803
18815
|
|
|
18816
|
+
template <>
|
|
18817
|
+
bool TryCastWithOverflowCheck(double value, int8_t &result) {
|
|
18818
|
+
return TryCastWithOverflowCheckFloat<double, int8_t>(value, result, -128.0, 128.0);
|
|
18819
|
+
}
|
|
18820
|
+
|
|
18821
|
+
template <>
|
|
18822
|
+
bool TryCastWithOverflowCheck(double value, int16_t &result) {
|
|
18823
|
+
return TryCastWithOverflowCheckFloat<double, int16_t>(value, result, -32768.0, 32768.0);
|
|
18824
|
+
}
|
|
18825
|
+
|
|
18826
|
+
template <>
|
|
18827
|
+
bool TryCastWithOverflowCheck(double value, int32_t &result) {
|
|
18828
|
+
return TryCastWithOverflowCheckFloat<double, int32_t>(value, result, -2147483648.0, 2147483648.0);
|
|
18829
|
+
}
|
|
18830
|
+
|
|
18804
18831
|
template <>
|
|
18805
18832
|
bool TryCastWithOverflowCheck(double value, int64_t &result) {
|
|
18806
18833
|
return TryCastWithOverflowCheckFloat<double, int64_t>(value, result, -9223372036854775808.0, 9223372036854775808.0);
|
|
@@ -19026,12 +19053,12 @@ bool TryCastWithOverflowCheck(uint64_t value, hugeint_t &result) {
|
|
|
19026
19053
|
|
|
19027
19054
|
template <>
|
|
19028
19055
|
bool TryCastWithOverflowCheck(float value, hugeint_t &result) {
|
|
19029
|
-
return Hugeint::TryConvert(value, result);
|
|
19056
|
+
return Hugeint::TryConvert(std::nearbyintf(value), result);
|
|
19030
19057
|
}
|
|
19031
19058
|
|
|
19032
19059
|
template <>
|
|
19033
19060
|
bool TryCastWithOverflowCheck(double value, hugeint_t &result) {
|
|
19034
|
-
return Hugeint::TryConvert(value, result);
|
|
19061
|
+
return Hugeint::TryConvert(std::nearbyint(value), result);
|
|
19035
19062
|
}
|
|
19036
19063
|
|
|
19037
19064
|
template <>
|
|
@@ -23282,7 +23309,7 @@ struct IntegerCastOperation {
|
|
|
23282
23309
|
if (dbl_res < NumericLimits<result_t>::Minimum() || dbl_res > NumericLimits<result_t>::Maximum()) {
|
|
23283
23310
|
return false;
|
|
23284
23311
|
}
|
|
23285
|
-
state.result = (result_t)dbl_res;
|
|
23312
|
+
state.result = (result_t)std::nearbyint(dbl_res);
|
|
23286
23313
|
return true;
|
|
23287
23314
|
}
|
|
23288
23315
|
|
|
@@ -41269,11 +41296,11 @@ inline uint64_t TemplatedHash(const string_t &elem) {
|
|
|
41269
41296
|
data_ptr_t data = (data_ptr_t)elem.GetDataUnsafe();
|
|
41270
41297
|
const auto &len = elem.GetSize();
|
|
41271
41298
|
uint64_t h = 0;
|
|
41272
|
-
for (idx_t i = 0; i
|
|
41299
|
+
for (idx_t i = 0; i + sizeof(uint64_t) <= len; i += sizeof(uint64_t)) {
|
|
41273
41300
|
h ^= TemplatedHash<uint64_t>(Load<uint64_t>(data));
|
|
41274
|
-
data +=
|
|
41301
|
+
data += sizeof(uint64_t);
|
|
41275
41302
|
}
|
|
41276
|
-
switch (len &
|
|
41303
|
+
switch (len & (sizeof(uint64_t) - 1)) {
|
|
41277
41304
|
case 4:
|
|
41278
41305
|
h ^= TemplatedHash<uint32_t>(Load<uint32_t>(data));
|
|
41279
41306
|
break;
|
|
@@ -104365,6 +104392,31 @@ LogicalType GetArrowLogicalType(ArrowSchema &schema,
|
|
|
104365
104392
|
}
|
|
104366
104393
|
}
|
|
104367
104394
|
|
|
104395
|
+
// Renames repeated columns and case sensitive columns
|
|
104396
|
+
void RenameArrowColumns(vector<string> &names) {
|
|
104397
|
+
unordered_map<string, idx_t> name_map;
|
|
104398
|
+
for (auto &column_name : names) {
|
|
104399
|
+
// put it all lower_case
|
|
104400
|
+
auto low_column_name = StringUtil::Lower(column_name);
|
|
104401
|
+
if (name_map.find(low_column_name) == name_map.end()) {
|
|
104402
|
+
// Name does not exist yet
|
|
104403
|
+
name_map[low_column_name]++;
|
|
104404
|
+
} else {
|
|
104405
|
+
// Name already exists, we add _x where x is the repetition number
|
|
104406
|
+
string new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]);
|
|
104407
|
+
auto new_column_name_low = StringUtil::Lower(new_column_name);
|
|
104408
|
+
while (name_map.find(new_column_name_low) != name_map.end()) {
|
|
104409
|
+
// This name is already here due to a previous definition
|
|
104410
|
+
name_map[low_column_name]++;
|
|
104411
|
+
new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]);
|
|
104412
|
+
new_column_name_low = StringUtil::Lower(new_column_name);
|
|
104413
|
+
}
|
|
104414
|
+
column_name = new_column_name;
|
|
104415
|
+
name_map[new_column_name_low]++;
|
|
104416
|
+
}
|
|
104417
|
+
}
|
|
104418
|
+
}
|
|
104419
|
+
|
|
104368
104420
|
unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
|
|
104369
104421
|
vector<LogicalType> &return_types, vector<string> &names) {
|
|
104370
104422
|
typedef unique_ptr<ArrowArrayStreamWrapper> (*stream_factory_produce_t)(
|
|
@@ -104408,6 +104460,7 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
|
|
|
104408
104460
|
}
|
|
104409
104461
|
names.push_back(name);
|
|
104410
104462
|
}
|
|
104463
|
+
RenameArrowColumns(names);
|
|
104411
104464
|
return move(res);
|
|
104412
104465
|
}
|
|
104413
104466
|
|
|
@@ -181002,6 +181055,8 @@ void BaseStatistics::Verify(Vector &vector, idx_t count) const {
|
|
|
181002
181055
|
|
|
181003
181056
|
|
|
181004
181057
|
|
|
181058
|
+
#include <math.h>
|
|
181059
|
+
|
|
181005
181060
|
namespace duckdb {
|
|
181006
181061
|
|
|
181007
181062
|
DistinctStatistics::DistinctStatistics()
|
|
@@ -181062,7 +181117,7 @@ void DistinctStatistics::Update(VectorData &vdata, const LogicalType &type, idx_
|
|
|
181062
181117
|
return;
|
|
181063
181118
|
}
|
|
181064
181119
|
total_count += count;
|
|
181065
|
-
count =
|
|
181120
|
+
count = MinValue<idx_t>(idx_t(SAMPLE_RATE * MaxValue<idx_t>(STANDARD_VECTOR_SIZE, count)), count);
|
|
181066
181121
|
sample_count += count;
|
|
181067
181122
|
|
|
181068
181123
|
uint64_t indices[STANDARD_VECTOR_SIZE];
|
|
@@ -181080,12 +181135,17 @@ idx_t DistinctStatistics::GetCount() const {
|
|
|
181080
181135
|
if (sample_count == 0 || total_count == 0) {
|
|
181081
181136
|
return 0;
|
|
181082
181137
|
}
|
|
181083
|
-
|
|
181084
|
-
double
|
|
181085
|
-
double
|
|
181086
|
-
double
|
|
181087
|
-
|
|
181088
|
-
|
|
181138
|
+
|
|
181139
|
+
double u = MinValue<idx_t>(log->Count(), sample_count);
|
|
181140
|
+
double s = sample_count;
|
|
181141
|
+
double n = total_count;
|
|
181142
|
+
|
|
181143
|
+
// Assume this proportion of the the sampled values occurred only once
|
|
181144
|
+
double u1 = pow(u / s, 2) * u;
|
|
181145
|
+
|
|
181146
|
+
// Estimate total uniques using Good Turing Estimation
|
|
181147
|
+
idx_t estimate = u + u1 / s * (n - s);
|
|
181148
|
+
return MinValue<idx_t>(estimate, total_count);
|
|
181089
181149
|
}
|
|
181090
181150
|
|
|
181091
181151
|
} // namespace duckdb
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "360aedc4f"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev699"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|