duckdb 1.3.2-dev0.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +2 -2
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-timezone.cpp +10 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +2 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +7 -2
- package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +9 -5
- package/src/duckdb/extension/parquet/parquet_extension.cpp +5 -2
- package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -2
- package/src/duckdb/src/catalog/catalog.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +4 -2
- package/src/duckdb/src/common/error_data.cpp +7 -0
- package/src/duckdb/src/common/operator/string_cast.cpp +3 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +5 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +10 -2
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -2
- package/src/duckdb/src/function/function_list.cpp +1 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +6 -6
- package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +1 -1
- package/src/duckdb/src/function/table/table_scan.cpp +43 -84
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +3 -1
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parallel/concurrentqueue.hpp +8 -0
- package/src/duckdb/src/include/duckdb/planner/filter/expression_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +1 -1
- package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
- package/src/duckdb/src/main/database_manager.cpp +7 -2
- package/src/duckdb/src/main/database_path_and_type.cpp +1 -1
- package/src/duckdb/src/main/settings/autogenerated_settings.cpp +0 -34
- package/src/duckdb/src/main/settings/custom_settings.cpp +49 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +4 -4
- package/src/duckdb/src/optimizer/filter_combiner.cpp +1 -1
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +9 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +3 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +68 -3
- package/src/duckdb/src/parser/statement/set_statement.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +2 -3
- package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -3
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +4 -4
- package/src/duckdb/src/planner/filter/expression_filter.cpp +4 -3
- package/src/duckdb/src/planner/table_filter.cpp +2 -1
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +5 -3
- package/src/duckdb/src/storage/compression/fsst.cpp +20 -10
- package/src/duckdb/src/storage/compression/roaring/compress.cpp +15 -9
- package/src/duckdb/src/storage/compression/roaring/scan.cpp +10 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +2 -1
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -1
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +6 -3
@@ -204,7 +204,7 @@ jobs:
|
|
204
204
|
|
205
205
|
win-nodejs:
|
206
206
|
name: node.js Windows
|
207
|
-
runs-on: windows-
|
207
|
+
runs-on: windows-latest
|
208
208
|
needs: set-up-npm
|
209
209
|
continue-on-error: ${{ matrix.node != '18' && matrix.node != '20' && matrix.node != '21' }}
|
210
210
|
env:
|
@@ -273,7 +273,7 @@ jobs:
|
|
273
273
|
- win-nodejs
|
274
274
|
strategy:
|
275
275
|
matrix:
|
276
|
-
os: [windows-latest, ubuntu-latest,
|
276
|
+
os: [windows-latest, ubuntu-latest, macos-latest]
|
277
277
|
version: [20]
|
278
278
|
runs-on: ${{ matrix.os }}
|
279
279
|
steps:
|
package/package.json
CHANGED
@@ -185,16 +185,20 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc {
|
|
185
185
|
}
|
186
186
|
}
|
187
187
|
|
188
|
+
static void AddCast(CastFunctionSet &casts, const LogicalType &source, const LogicalType &target) {
|
189
|
+
const auto implicit_cost = CastRules::ImplicitCast(source, target);
|
190
|
+
casts.RegisterCastFunction(source, target, BindCastFromNaive, implicit_cost);
|
191
|
+
}
|
192
|
+
|
188
193
|
static void AddCasts(DatabaseInstance &db) {
|
189
194
|
auto &config = DBConfig::GetConfig(db);
|
190
195
|
auto &casts = config.GetCastFunctions();
|
191
196
|
|
192
|
-
|
193
|
-
casts
|
194
|
-
casts
|
195
|
-
casts
|
196
|
-
casts
|
197
|
-
casts.RegisterCastFunction(LogicalType::DATE, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
|
197
|
+
AddCast(casts, LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ);
|
198
|
+
AddCast(casts, LogicalType::TIMESTAMP_MS, LogicalType::TIMESTAMP_TZ);
|
199
|
+
AddCast(casts, LogicalType::TIMESTAMP_NS, LogicalType::TIMESTAMP_TZ);
|
200
|
+
AddCast(casts, LogicalType::TIMESTAMP_S, LogicalType::TIMESTAMP_TZ);
|
201
|
+
AddCast(casts, LogicalType::DATE, LogicalType::TIMESTAMP_TZ);
|
198
202
|
}
|
199
203
|
};
|
200
204
|
|
@@ -763,6 +763,8 @@ unique_ptr<ColumnReader> CreateDecimalReader(ParquetReader &reader, const Parque
|
|
763
763
|
return make_uniq<TemplatedColumnReader<int32_t, TemplatedParquetValueConversion<T>>>(reader, schema);
|
764
764
|
case PhysicalType::INT64:
|
765
765
|
return make_uniq<TemplatedColumnReader<int64_t, TemplatedParquetValueConversion<T>>>(reader, schema);
|
766
|
+
case PhysicalType::INT128:
|
767
|
+
return make_uniq<TemplatedColumnReader<hugeint_t, TemplatedParquetValueConversion<T>>>(reader, schema);
|
766
768
|
default:
|
767
769
|
throw NotImplementedException("Unimplemented internal type for CreateDecimalReader");
|
768
770
|
}
|
@@ -82,8 +82,9 @@ public:
|
|
82
82
|
vector<string> names, duckdb_parquet::CompressionCodec::type codec, ChildFieldIDs field_ids,
|
83
83
|
const vector<pair<string, string>> &kv_metadata,
|
84
84
|
shared_ptr<ParquetEncryptionConfig> encryption_config, idx_t dictionary_size_limit,
|
85
|
-
idx_t string_dictionary_page_size_limit,
|
86
|
-
int64_t compression_level, bool debug_use_openssl,
|
85
|
+
idx_t string_dictionary_page_size_limit, bool enable_bloom_filters,
|
86
|
+
double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl,
|
87
|
+
ParquetVersion parquet_version);
|
87
88
|
~ParquetWriter();
|
88
89
|
|
89
90
|
public:
|
@@ -122,6 +123,9 @@ public:
|
|
122
123
|
idx_t StringDictionaryPageSizeLimit() const {
|
123
124
|
return string_dictionary_page_size_limit;
|
124
125
|
}
|
126
|
+
double EnableBloomFilters() const {
|
127
|
+
return enable_bloom_filters;
|
128
|
+
}
|
125
129
|
double BloomFilterFalsePositiveRatio() const {
|
126
130
|
return bloom_filter_false_positive_ratio;
|
127
131
|
}
|
@@ -164,6 +168,7 @@ private:
|
|
164
168
|
shared_ptr<ParquetEncryptionConfig> encryption_config;
|
165
169
|
idx_t dictionary_size_limit;
|
166
170
|
idx_t string_dictionary_page_size_limit;
|
171
|
+
bool enable_bloom_filters;
|
167
172
|
double bloom_filter_false_positive_ratio;
|
168
173
|
int64_t compression_level;
|
169
174
|
bool debug_use_openssl;
|
@@ -284,15 +284,19 @@ public:
|
|
284
284
|
auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
|
285
285
|
D_ASSERT(state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY);
|
286
286
|
|
287
|
-
|
288
|
-
|
287
|
+
if (writer.EnableBloomFilters()) {
|
288
|
+
state.bloom_filter =
|
289
|
+
make_uniq<ParquetBloomFilter>(state.dictionary.GetSize(), writer.BloomFilterFalsePositiveRatio());
|
290
|
+
}
|
289
291
|
|
290
292
|
state.dictionary.IterateValues([&](const SRC &src_value, const TGT &tgt_value) {
|
291
293
|
// update the statistics
|
292
294
|
OP::template HandleStats<SRC, TGT>(stats, tgt_value);
|
293
|
-
|
294
|
-
|
295
|
-
|
295
|
+
if (state.bloom_filter) {
|
296
|
+
// update the bloom filter
|
297
|
+
auto hash = OP::template XXHash64<SRC, TGT>(tgt_value);
|
298
|
+
state.bloom_filter->FilterInsert(hash);
|
299
|
+
}
|
296
300
|
});
|
297
301
|
|
298
302
|
// flush the dictionary page and add it to the to-be-written pages
|
@@ -227,6 +227,7 @@ struct ParquetWriteBindData : public TableFunctionData {
|
|
227
227
|
//! This is huge but we grow it starting from 1 MB
|
228
228
|
idx_t string_dictionary_page_size_limit = PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE;
|
229
229
|
|
230
|
+
bool enable_bloom_filters = true;
|
230
231
|
//! What false positive rate are we willing to accept for bloom filters
|
231
232
|
double bloom_filter_false_positive_ratio = 0.01;
|
232
233
|
|
@@ -373,6 +374,8 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBi
|
|
373
374
|
PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE);
|
374
375
|
}
|
375
376
|
bind_data->string_dictionary_page_size_limit = val;
|
377
|
+
} else if (loption == "write_bloom_filter") {
|
378
|
+
bind_data->enable_bloom_filters = BooleanValue::Get(option.second[0].DefaultCastAs(LogicalType::BOOLEAN));
|
376
379
|
} else if (loption == "bloom_filter_false_positive_ratio") {
|
377
380
|
auto val = option.second[0].GetValue<double>();
|
378
381
|
if (val <= 0) {
|
@@ -436,8 +439,8 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
|
|
436
439
|
context, fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec,
|
437
440
|
parquet_bind.field_ids.Copy(), parquet_bind.kv_metadata, parquet_bind.encryption_config,
|
438
441
|
parquet_bind.dictionary_size_limit, parquet_bind.string_dictionary_page_size_limit,
|
439
|
-
parquet_bind.
|
440
|
-
parquet_bind.parquet_version);
|
442
|
+
parquet_bind.enable_bloom_filters, parquet_bind.bloom_filter_false_positive_ratio,
|
443
|
+
parquet_bind.compression_level, parquet_bind.debug_use_openssl, parquet_bind.parquet_version);
|
441
444
|
return std::move(global_state);
|
442
445
|
}
|
443
446
|
|
@@ -345,12 +345,14 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file
|
|
345
345
|
vector<string> names_p, CompressionCodec::type codec, ChildFieldIDs field_ids_p,
|
346
346
|
const vector<pair<string, string>> &kv_metadata,
|
347
347
|
shared_ptr<ParquetEncryptionConfig> encryption_config_p, idx_t dictionary_size_limit_p,
|
348
|
-
idx_t string_dictionary_page_size_limit_p,
|
349
|
-
|
348
|
+
idx_t string_dictionary_page_size_limit_p, bool enable_bloom_filters_p,
|
349
|
+
double bloom_filter_false_positive_ratio_p, int64_t compression_level_p,
|
350
|
+
bool debug_use_openssl_p, ParquetVersion parquet_version)
|
350
351
|
: context(context), file_name(std::move(file_name_p)), sql_types(std::move(types_p)),
|
351
352
|
column_names(std::move(names_p)), codec(codec), field_ids(std::move(field_ids_p)),
|
352
353
|
encryption_config(std::move(encryption_config_p)), dictionary_size_limit(dictionary_size_limit_p),
|
353
354
|
string_dictionary_page_size_limit(string_dictionary_page_size_limit_p),
|
355
|
+
enable_bloom_filters(enable_bloom_filters_p),
|
354
356
|
bloom_filter_false_positive_ratio(bloom_filter_false_positive_ratio_p), compression_level(compression_level_p),
|
355
357
|
debug_use_openssl(debug_use_openssl_p), parquet_version(parquet_version), total_written(0), num_row_groups(0) {
|
356
358
|
|
@@ -662,7 +662,7 @@ CatalogException Catalog::CreateMissingEntryException(CatalogEntryRetriever &ret
|
|
662
662
|
break;
|
663
663
|
}
|
664
664
|
auto &catalog = database.get().GetCatalog();
|
665
|
-
auto current_schemas = catalog.
|
665
|
+
auto current_schemas = catalog.GetSchemas(context);
|
666
666
|
for (auto ¤t_schema : current_schemas) {
|
667
667
|
if (unseen_schemas.size() >= max_schema_count) {
|
668
668
|
break;
|
@@ -358,7 +358,9 @@ struct ArrowBool8 {
|
|
358
358
|
auto source_ptr = reinterpret_cast<bool *>(format.data);
|
359
359
|
auto result_ptr = reinterpret_cast<int8_t *>(FlatVector::GetData(result));
|
360
360
|
for (idx_t i = 0; i < count; i++) {
|
361
|
-
|
361
|
+
if (format.validity.RowIsValid(i)) {
|
362
|
+
result_ptr[i] = static_cast<int8_t>(source_ptr[i]);
|
363
|
+
}
|
362
364
|
}
|
363
365
|
}
|
364
366
|
};
|
@@ -380,7 +382,7 @@ void ArrowTypeExtensionSet::Initialize(const DBConfig &config) {
|
|
380
382
|
|
381
383
|
// Types that are 1:n
|
382
384
|
config.RegisterArrowExtension({"arrow.json", &ArrowJson::PopulateSchema, &ArrowJson::GetType,
|
383
|
-
make_shared_ptr<ArrowTypeExtensionData>(LogicalType::
|
385
|
+
make_shared_ptr<ArrowTypeExtensionData>(LogicalType::JSON())});
|
384
386
|
|
385
387
|
config.RegisterArrowExtension({"DuckDB", "bit", &ArrowBit::PopulateSchema, &ArrowBit::GetType,
|
386
388
|
make_shared_ptr<ArrowTypeExtensionData>(LogicalType::BIT), nullptr, nullptr});
|
@@ -65,6 +65,13 @@ string ErrorData::ConstructFinalMessage() const {
|
|
65
65
|
error += "\nThis error signals an assertion failure within DuckDB. This usually occurs due to "
|
66
66
|
"unexpected conditions or errors in the program's logic.\nFor more information, see "
|
67
67
|
"https://duckdb.org/docs/stable/dev/internal_errors";
|
68
|
+
|
69
|
+
// Ensure that we print the stack trace for internal exceptions.
|
70
|
+
auto entry = extra_info.find("stack_trace_pointers");
|
71
|
+
if (entry != extra_info.end()) {
|
72
|
+
auto stack_trace = StackTrace::ResolveStacktraceSymbols(entry->second);
|
73
|
+
error += "\n\nStack Trace:\n" + stack_trace;
|
74
|
+
}
|
68
75
|
}
|
69
76
|
return error;
|
70
77
|
}
|
@@ -161,6 +161,9 @@ duckdb::string_t StringFromTimestamp(timestamp_t input, Vector &vector) {
|
|
161
161
|
idx_t nano_length = 0;
|
162
162
|
if (picos) {
|
163
163
|
// If there are ps, we need all the µs
|
164
|
+
if (!time[3]) {
|
165
|
+
TimeToStringCast::FormatMicros(time[3], micro_buffer);
|
166
|
+
}
|
164
167
|
time_length = 15;
|
165
168
|
nano_length = 6;
|
166
169
|
nano_length -= NumericCast<idx_t>(TimeToStringCast::FormatMicros(picos, nano_buffer));
|
@@ -41,6 +41,11 @@ idx_t CSVIterator::BytesPerThread(const CSVReaderOptions &reader_options) {
|
|
41
41
|
// If we are setting up the buffer size directly, we must make sure each thread will read the full buffer.
|
42
42
|
return max_row_size;
|
43
43
|
}
|
44
|
+
if (bytes_per_thread == 0) {
|
45
|
+
// Bytes per thread can never be zero, but it might happen if max_row_size = 0
|
46
|
+
// Not sure why a human being would do that...
|
47
|
+
return 1;
|
48
|
+
}
|
44
49
|
return bytes_per_thread;
|
45
50
|
}
|
46
51
|
|
@@ -1373,7 +1373,7 @@ void StringValueScanner::ProcessOverBufferValue() {
|
|
1373
1373
|
result.escaped = true;
|
1374
1374
|
}
|
1375
1375
|
if (states.IsComment()) {
|
1376
|
-
result.
|
1376
|
+
result.SetComment(result, j);
|
1377
1377
|
}
|
1378
1378
|
if (states.IsInvalid()) {
|
1379
1379
|
result.InvalidState(result);
|
@@ -1435,7 +1435,7 @@ void StringValueScanner::ProcessOverBufferValue() {
|
|
1435
1435
|
result.SetQuoted(result, j);
|
1436
1436
|
}
|
1437
1437
|
if (states.IsComment()) {
|
1438
|
-
result.
|
1438
|
+
result.SetComment(result, j);
|
1439
1439
|
}
|
1440
1440
|
if (states.IsEscaped() && result.state_machine.dialect_options.state_machine_options.escape != '\0') {
|
1441
1441
|
result.escaped = true;
|
@@ -166,6 +166,10 @@ SinkResultType PhysicalIEJoin::Sink(ExecutionContext &context, DataChunk &chunk,
|
|
166
166
|
auto &gstate = input.global_state.Cast<IEJoinGlobalState>();
|
167
167
|
auto &lstate = input.local_state.Cast<IEJoinLocalState>();
|
168
168
|
|
169
|
+
if (gstate.child == 0 && gstate.tables[1]->global_sort_state.sorted_blocks.empty() && EmptyResultIfRHSIsEmpty()) {
|
170
|
+
return SinkResultType::FINISHED;
|
171
|
+
}
|
172
|
+
|
169
173
|
gstate.Sink(chunk, lstate);
|
170
174
|
|
171
175
|
if (filter_pushdown && !gstate.skip_filter_pushdown) {
|
@@ -207,15 +211,19 @@ SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, Clie
|
|
207
211
|
// for FULL/LEFT/RIGHT OUTER JOIN, initialize found_match to false for every tuple
|
208
212
|
table.IntializeMatches();
|
209
213
|
}
|
214
|
+
|
215
|
+
SinkFinalizeType res;
|
210
216
|
if (gstate.child == 1 && global_sort_state.sorted_blocks.empty() && EmptyResultIfRHSIsEmpty()) {
|
211
217
|
// Empty input!
|
212
|
-
|
218
|
+
res = SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
219
|
+
} else {
|
220
|
+
res = SinkFinalizeType::READY;
|
213
221
|
}
|
214
222
|
|
215
223
|
// Move to the next input child
|
216
224
|
gstate.Finalize(pipeline, event);
|
217
225
|
|
218
|
-
return
|
226
|
+
return res;
|
219
227
|
}
|
220
228
|
|
221
229
|
//===--------------------------------------------------------------------===//
|
@@ -95,8 +95,13 @@ PhysicalPlanGenerator::PlanAsOfLoopJoin(LogicalComparisonJoin &op, PhysicalOpera
|
|
95
95
|
asof_idx = i;
|
96
96
|
arg_min_max = "arg_min";
|
97
97
|
break;
|
98
|
-
|
98
|
+
case ExpressionType::COMPARE_EQUAL:
|
99
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
100
|
+
case ExpressionType::COMPARE_DISTINCT_FROM:
|
99
101
|
break;
|
102
|
+
default:
|
103
|
+
// Unsupported NLJ comparison
|
104
|
+
return nullptr;
|
100
105
|
}
|
101
106
|
}
|
102
107
|
|
@@ -271,7 +276,7 @@ PhysicalOperator &PhysicalPlanGenerator::PlanAsOfJoin(LogicalComparisonJoin &op)
|
|
271
276
|
|
272
277
|
auto &config = ClientConfig::GetConfig(context);
|
273
278
|
if (!config.force_asof_iejoin) {
|
274
|
-
if (op.children[0]->has_estimated_cardinality && lhs_cardinality
|
279
|
+
if (op.children[0]->has_estimated_cardinality && lhs_cardinality < config.asof_loop_join_threshold) {
|
275
280
|
auto result = PlanAsOfLoopJoin(op, left, right);
|
276
281
|
if (result) {
|
277
282
|
return *result;
|
@@ -53,8 +53,8 @@ static const StaticFunctionDefinition function[] = {
|
|
53
53
|
DUCKDB_SCALAR_FUNCTION_SET(InternalCompressIntegralUintegerFun),
|
54
54
|
DUCKDB_SCALAR_FUNCTION_SET(InternalCompressIntegralUsmallintFun),
|
55
55
|
DUCKDB_SCALAR_FUNCTION_SET(InternalCompressIntegralUtinyintFun),
|
56
|
-
DUCKDB_SCALAR_FUNCTION(InternalCompressStringHugeintFun),
|
57
56
|
DUCKDB_SCALAR_FUNCTION(InternalCompressStringUbigintFun),
|
57
|
+
DUCKDB_SCALAR_FUNCTION(InternalCompressStringUhugeintFun),
|
58
58
|
DUCKDB_SCALAR_FUNCTION(InternalCompressStringUintegerFun),
|
59
59
|
DUCKDB_SCALAR_FUNCTION(InternalCompressStringUsmallintFun),
|
60
60
|
DUCKDB_SCALAR_FUNCTION(InternalCompressStringUtinyintFun),
|
@@ -93,8 +93,8 @@ static scalar_function_t GetStringCompressFunctionSwitch(const LogicalType &resu
|
|
93
93
|
return GetStringCompressFunction<uint32_t>(result_type);
|
94
94
|
case LogicalTypeId::UBIGINT:
|
95
95
|
return GetStringCompressFunction<uint64_t>(result_type);
|
96
|
-
case LogicalTypeId::
|
97
|
-
return GetStringCompressFunction<
|
96
|
+
case LogicalTypeId::UHUGEINT:
|
97
|
+
return GetStringCompressFunction<uhugeint_t>(result_type);
|
98
98
|
default:
|
99
99
|
throw InternalException("Unexpected type in GetStringCompressFunctionSwitch");
|
100
100
|
}
|
@@ -189,8 +189,8 @@ static scalar_function_t GetStringDecompressFunctionSwitch(const LogicalType &in
|
|
189
189
|
return GetStringDecompressFunction<uint32_t>(input_type);
|
190
190
|
case LogicalTypeId::UBIGINT:
|
191
191
|
return GetStringDecompressFunction<uint64_t>(input_type);
|
192
|
-
case LogicalTypeId::
|
193
|
-
return GetStringDecompressFunction<
|
192
|
+
case LogicalTypeId::UHUGEINT:
|
193
|
+
return GetStringDecompressFunction<uhugeint_t>(input_type);
|
194
194
|
default:
|
195
195
|
throw InternalException("Unexpected type in GetStringDecompressFunctionSwitch");
|
196
196
|
}
|
@@ -262,8 +262,8 @@ ScalarFunction InternalCompressStringUbigintFun::GetFunction() {
|
|
262
262
|
return CMStringCompressFun::GetFunction(LogicalType(LogicalTypeId::UBIGINT));
|
263
263
|
}
|
264
264
|
|
265
|
-
ScalarFunction
|
266
|
-
return CMStringCompressFun::GetFunction(LogicalType(LogicalTypeId::
|
265
|
+
ScalarFunction InternalCompressStringUhugeintFun::GetFunction() {
|
266
|
+
return CMStringCompressFun::GetFunction(LogicalType(LogicalTypeId::UHUGEINT));
|
267
267
|
}
|
268
268
|
|
269
269
|
ScalarFunctionSet InternalDecompressStringFun::GetFunctions() {
|
@@ -8,7 +8,7 @@ const vector<LogicalType> CMUtils::IntegralTypes() {
|
|
8
8
|
|
9
9
|
const vector<LogicalType> CMUtils::StringTypes() {
|
10
10
|
return {LogicalType::UTINYINT, LogicalType::USMALLINT, LogicalType::UINTEGER, LogicalType::UBIGINT,
|
11
|
-
LogicalType::
|
11
|
+
LogicalType::UHUGEINT};
|
12
12
|
}
|
13
13
|
|
14
14
|
// LCOV_EXCL_START
|
@@ -26,6 +26,8 @@
|
|
26
26
|
#include "duckdb/planner/filter/conjunction_filter.hpp"
|
27
27
|
#include "duckdb/common/types/value_map.hpp"
|
28
28
|
|
29
|
+
#include <list>
|
30
|
+
|
29
31
|
namespace duckdb {
|
30
32
|
|
31
33
|
struct TableScanLocalState : public LocalTableFunctionState {
|
@@ -384,75 +386,40 @@ unique_ptr<GlobalTableFunctionState> DuckIndexScanInitGlobal(ClientContext &cont
|
|
384
386
|
return std::move(g_state);
|
385
387
|
}
|
386
388
|
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
auto
|
392
|
-
|
393
|
-
|
389
|
+
bool ExtractComparisonsAndInFilters(TableFilter &filter, vector<reference<ConstantFilter>> &comparisons,
|
390
|
+
vector<reference<InFilter>> &in_filters) {
|
391
|
+
switch (filter.filter_type) {
|
392
|
+
case TableFilterType::CONSTANT_COMPARISON: {
|
393
|
+
auto &comparison = filter.Cast<ConstantFilter>();
|
394
|
+
comparisons.push_back(comparison);
|
395
|
+
return true;
|
394
396
|
}
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
value_set_t unique_values;
|
400
|
-
for (const auto &value : filter.values) {
|
401
|
-
if (unique_values.find(value) == unique_values.end()) {
|
402
|
-
unique_values.insert(value);
|
397
|
+
case TableFilterType::OPTIONAL_FILTER: {
|
398
|
+
auto &optional_filter = filter.Cast<OptionalFilter>();
|
399
|
+
if (!optional_filter.child_filter) {
|
400
|
+
return true; // No child filters, always OK
|
403
401
|
}
|
402
|
+
return ExtractComparisonsAndInFilters(*optional_filter.child_filter, comparisons, in_filters);
|
404
403
|
}
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
void ExtractConjunctionAnd(ConjunctionAndFilter &filter, BoundColumnRefExpression &bound_ref,
|
409
|
-
vector<unique_ptr<Expression>> &expressions) {
|
410
|
-
if (filter.child_filters.empty()) {
|
411
|
-
return;
|
404
|
+
case TableFilterType::IN_FILTER: {
|
405
|
+
in_filters.push_back(filter.Cast<InFilter>());
|
406
|
+
return true;
|
412
407
|
}
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
for (idx_t i = 0; i < filter.child_filters.size(); i++) {
|
419
|
-
auto &child_filter = *filter.child_filters[i];
|
420
|
-
switch (child_filter.filter_type) {
|
421
|
-
case TableFilterType::CONSTANT_COMPARISON: {
|
422
|
-
auto &comparison = child_filter.Cast<ConstantFilter>();
|
423
|
-
comparisons.push_back(comparison);
|
424
|
-
break;
|
425
|
-
}
|
426
|
-
case TableFilterType::CONJUNCTION_AND: {
|
427
|
-
auto &conjunction = child_filter.Cast<ConjunctionAndFilter>();
|
428
|
-
ExtractConjunctionAnd(conjunction, bound_ref, expressions);
|
429
|
-
break;
|
430
|
-
}
|
431
|
-
case TableFilterType::OPTIONAL_FILTER: {
|
432
|
-
auto &optional_filter = child_filter.Cast<OptionalFilter>();
|
433
|
-
if (!optional_filter.child_filter) {
|
434
|
-
return;
|
435
|
-
}
|
436
|
-
if (optional_filter.child_filter->filter_type != TableFilterType::IN_FILTER) {
|
437
|
-
// No support for other optional filter types yet.
|
438
|
-
return;
|
408
|
+
case TableFilterType::CONJUNCTION_AND: {
|
409
|
+
auto &conjunction_and = filter.Cast<ConjunctionAndFilter>();
|
410
|
+
for (idx_t i = 0; i < conjunction_and.child_filters.size(); i++) {
|
411
|
+
if (!ExtractComparisonsAndInFilters(*conjunction_and.child_filters[i], comparisons, in_filters)) {
|
412
|
+
return false;
|
439
413
|
}
|
440
|
-
auto &in_filter = optional_filter.child_filter->Cast<InFilter>();
|
441
|
-
in_filters.push_back(in_filter);
|
442
|
-
break;
|
443
|
-
}
|
444
|
-
default:
|
445
|
-
// Not yet supported: filter types than CONSTANT_COMPARISON/IN_FILTER/CONJUNCTION_AND in CONJUNCTION_AND.
|
446
|
-
expressions.clear();
|
447
|
-
return;
|
448
414
|
}
|
415
|
+
return true;
|
449
416
|
}
|
450
|
-
|
451
|
-
|
452
|
-
if (in_filters.empty()) {
|
453
|
-
return;
|
417
|
+
default:
|
418
|
+
return false;
|
454
419
|
}
|
420
|
+
}
|
455
421
|
|
422
|
+
value_set_t GetUniqueValues(vector<reference<ConstantFilter>> &comparisons, vector<reference<InFilter>> &in_filters) {
|
456
423
|
// Get the combined unique values of the IN filters.
|
457
424
|
value_set_t unique_values;
|
458
425
|
for (idx_t filter_idx = 0; filter_idx < in_filters.size(); filter_idx++) {
|
@@ -481,31 +448,16 @@ void ExtractConjunctionAnd(ConjunctionAndFilter &filter, BoundColumnRefExpressio
|
|
481
448
|
}
|
482
449
|
}
|
483
450
|
|
484
|
-
|
451
|
+
return unique_values;
|
485
452
|
}
|
486
453
|
|
487
|
-
void
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
auto
|
492
|
-
|
493
|
-
|
494
|
-
}
|
495
|
-
return ExtractFilter(*optional_filter.child_filter, bound_ref, expressions);
|
496
|
-
}
|
497
|
-
case TableFilterType::IN_FILTER: {
|
498
|
-
auto &in_filter = filter.Cast<InFilter>();
|
499
|
-
ExtractIn(in_filter, bound_ref, expressions);
|
500
|
-
return;
|
501
|
-
}
|
502
|
-
case TableFilterType::CONJUNCTION_AND: {
|
503
|
-
auto &conjunction_and = filter.Cast<ConjunctionAndFilter>();
|
504
|
-
ExtractConjunctionAnd(conjunction_and, bound_ref, expressions);
|
505
|
-
return;
|
506
|
-
}
|
507
|
-
default:
|
508
|
-
return;
|
454
|
+
void ExtractExpressionsFromValues(const value_set_t &unique_values, BoundColumnRefExpression &bound_ref,
|
455
|
+
vector<unique_ptr<Expression>> &expressions) {
|
456
|
+
for (const auto &value : unique_values) {
|
457
|
+
auto bound_constant = make_uniq<BoundConstantExpression>(value);
|
458
|
+
auto filter_expr = make_uniq<BoundComparisonExpression>(ExpressionType::COMPARE_EQUAL, bound_ref.Copy(),
|
459
|
+
std::move(bound_constant));
|
460
|
+
expressions.push_back(std::move(filter_expr));
|
509
461
|
}
|
510
462
|
}
|
511
463
|
|
@@ -514,14 +466,21 @@ vector<unique_ptr<Expression>> ExtractFilterExpressions(const ColumnDefinition &
|
|
514
466
|
ColumnBinding binding(0, storage_idx);
|
515
467
|
auto bound_ref = make_uniq<BoundColumnRefExpression>(col.Name(), col.Type(), binding);
|
516
468
|
|
469
|
+
// Extract all comparisons and IN filters from nested filters
|
517
470
|
vector<unique_ptr<Expression>> expressions;
|
518
|
-
|
471
|
+
vector<reference<ConstantFilter>> comparisons;
|
472
|
+
vector<reference<InFilter>> in_filters;
|
473
|
+
if (ExtractComparisonsAndInFilters(*filter, comparisons, in_filters)) {
|
474
|
+
// Deduplicate/deal with conflicting filters, then convert to expressions
|
475
|
+
ExtractExpressionsFromValues(GetUniqueValues(comparisons, in_filters), *bound_ref, expressions);
|
476
|
+
}
|
519
477
|
|
520
478
|
// Attempt matching the top-level filter to the index expression.
|
521
479
|
if (expressions.empty()) {
|
522
480
|
auto filter_expr = filter->ToExpression(*bound_ref);
|
523
481
|
expressions.push_back(std::move(filter_expr));
|
524
482
|
}
|
483
|
+
|
525
484
|
return expressions;
|
526
485
|
}
|
527
486
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#ifndef DUCKDB_PATCH_VERSION
|
2
|
-
#define DUCKDB_PATCH_VERSION "
|
2
|
+
#define DUCKDB_PATCH_VERSION "2"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_MINOR_VERSION
|
5
5
|
#define DUCKDB_MINOR_VERSION 3
|
@@ -8,10 +8,10 @@
|
|
8
8
|
#define DUCKDB_MAJOR_VERSION 1
|
9
9
|
#endif
|
10
10
|
#ifndef DUCKDB_VERSION
|
11
|
-
#define DUCKDB_VERSION "v1.3.
|
11
|
+
#define DUCKDB_VERSION "v1.3.2"
|
12
12
|
#endif
|
13
13
|
#ifndef DUCKDB_SOURCE_ID
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
14
|
+
#define DUCKDB_SOURCE_ID "0b83e5d2f6"
|
15
15
|
#endif
|
16
16
|
#include "duckdb/function/table/system_functions.hpp"
|
17
17
|
#include "duckdb/main/database.hpp"
|
@@ -35,6 +35,7 @@ public:
|
|
35
35
|
virtual SettingLookupResult TryGetCurrentSetting(const string &key, Value &result) = 0;
|
36
36
|
virtual optional_ptr<ClientContext> TryGetClientContext() = 0;
|
37
37
|
virtual optional_ptr<DatabaseInstance> TryGetDatabase() = 0;
|
38
|
+
virtual shared_ptr<HTTPUtil> &GetHTTPUtil() = 0;
|
38
39
|
|
39
40
|
DUCKDB_API virtual Logger &GetLogger() const = 0;
|
40
41
|
DUCKDB_API static unique_ptr<CatalogTransaction> TryGetCatalogTransaction(optional_ptr<FileOpener> opener);
|
@@ -9,9 +9,11 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
|
-
#include "duckdb/common/types.hpp"
|
13
12
|
#include "duckdb/common/exception.hpp"
|
13
|
+
#include "duckdb/common/types.hpp"
|
14
|
+
#include "duckdb/common/types/date.hpp"
|
14
15
|
#include "duckdb/common/types/string_type.hpp"
|
16
|
+
#include "duckdb/common/types/timestamp.hpp"
|
15
17
|
|
16
18
|
namespace duckdb {
|
17
19
|
|
package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp
CHANGED
@@ -95,8 +95,8 @@ struct InternalCompressStringUbigintFun {
|
|
95
95
|
static ScalarFunction GetFunction();
|
96
96
|
};
|
97
97
|
|
98
|
-
struct
|
99
|
-
static constexpr const char *Name = "
|
98
|
+
struct InternalCompressStringUhugeintFun {
|
99
|
+
static constexpr const char *Name = "__internal_compress_string_uhugeint";
|
100
100
|
static constexpr const char *Parameters = "";
|
101
101
|
static constexpr const char *Description = "";
|
102
102
|
static constexpr const char *Example = "";
|