duckdb 0.3.5-dev692.0 → 0.3.5-dev699.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +17 -10
- package/src/duckdb.hpp +2 -2
- package/src/parquet-amalgamation.cpp +36555 -36555
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -41296,11 +41296,11 @@ inline uint64_t TemplatedHash(const string_t &elem) {
|
|
|
41296
41296
|
data_ptr_t data = (data_ptr_t)elem.GetDataUnsafe();
|
|
41297
41297
|
const auto &len = elem.GetSize();
|
|
41298
41298
|
uint64_t h = 0;
|
|
41299
|
-
for (idx_t i = 0; i
|
|
41299
|
+
for (idx_t i = 0; i + sizeof(uint64_t) <= len; i += sizeof(uint64_t)) {
|
|
41300
41300
|
h ^= TemplatedHash<uint64_t>(Load<uint64_t>(data));
|
|
41301
|
-
data +=
|
|
41301
|
+
data += sizeof(uint64_t);
|
|
41302
41302
|
}
|
|
41303
|
-
switch (len &
|
|
41303
|
+
switch (len & (sizeof(uint64_t) - 1)) {
|
|
41304
41304
|
case 4:
|
|
41305
41305
|
h ^= TemplatedHash<uint32_t>(Load<uint32_t>(data));
|
|
41306
41306
|
break;
|
|
@@ -181055,6 +181055,8 @@ void BaseStatistics::Verify(Vector &vector, idx_t count) const {
|
|
|
181055
181055
|
|
|
181056
181056
|
|
|
181057
181057
|
|
|
181058
|
+
#include <math.h>
|
|
181059
|
+
|
|
181058
181060
|
namespace duckdb {
|
|
181059
181061
|
|
|
181060
181062
|
DistinctStatistics::DistinctStatistics()
|
|
@@ -181115,7 +181117,7 @@ void DistinctStatistics::Update(VectorData &vdata, const LogicalType &type, idx_
|
|
|
181115
181117
|
return;
|
|
181116
181118
|
}
|
|
181117
181119
|
total_count += count;
|
|
181118
|
-
count =
|
|
181120
|
+
count = MinValue<idx_t>(idx_t(SAMPLE_RATE * MaxValue<idx_t>(STANDARD_VECTOR_SIZE, count)), count);
|
|
181119
181121
|
sample_count += count;
|
|
181120
181122
|
|
|
181121
181123
|
uint64_t indices[STANDARD_VECTOR_SIZE];
|
|
@@ -181133,12 +181135,17 @@ idx_t DistinctStatistics::GetCount() const {
|
|
|
181133
181135
|
if (sample_count == 0 || total_count == 0) {
|
|
181134
181136
|
return 0;
|
|
181135
181137
|
}
|
|
181136
|
-
|
|
181137
|
-
double
|
|
181138
|
-
double
|
|
181139
|
-
double
|
|
181140
|
-
|
|
181141
|
-
|
|
181138
|
+
|
|
181139
|
+
double u = MinValue<idx_t>(log->Count(), sample_count);
|
|
181140
|
+
double s = sample_count;
|
|
181141
|
+
double n = total_count;
|
|
181142
|
+
|
|
181143
|
+
// Assume this proportion of the the sampled values occurred only once
|
|
181144
|
+
double u1 = pow(u / s, 2) * u;
|
|
181145
|
+
|
|
181146
|
+
// Estimate total uniques using Good Turing Estimation
|
|
181147
|
+
idx_t estimate = u + u1 / s * (n - s);
|
|
181148
|
+
return MinValue<idx_t>(estimate, total_count);
|
|
181142
181149
|
}
|
|
181143
181150
|
|
|
181144
181151
|
} // namespace duckdb
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "360aedc4f"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev699"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|