duckdb 0.6.2-dev1206.0 → 0.6.2-dev1218.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +4 -1
- package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +4 -0
- package/src/duckdb/extension/parquet/include/column_reader.hpp +2 -0
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +2 -0
- package/src/duckdb/extension/parquet/parquet_reader.cpp +27 -7
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
package/package.json
CHANGED
|
@@ -184,7 +184,6 @@ void ColumnReader::PrepareRead(parquet_filter_t &filter) {
|
|
|
184
184
|
dict_decoder.reset();
|
|
185
185
|
defined_decoder.reset();
|
|
186
186
|
block.reset();
|
|
187
|
-
|
|
188
187
|
PageHeader page_hdr;
|
|
189
188
|
page_hdr.read(protocol);
|
|
190
189
|
|
|
@@ -204,6 +203,10 @@ void ColumnReader::PrepareRead(parquet_filter_t &filter) {
|
|
|
204
203
|
default:
|
|
205
204
|
break; // ignore INDEX page type and any other custom extensions
|
|
206
205
|
}
|
|
206
|
+
ResetPage();
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
void ColumnReader::ResetPage() {
|
|
207
210
|
}
|
|
208
211
|
|
|
209
212
|
void ColumnReader::PreparePageV2(PageHeader &page_hdr) {
|
|
@@ -109,6 +109,8 @@ public:
|
|
|
109
109
|
vector<string> names;
|
|
110
110
|
shared_ptr<ParquetFileMetadataCache> metadata;
|
|
111
111
|
ParquetOptions parquet_options;
|
|
112
|
+
//! maps hive partition names to string columns
|
|
113
|
+
unique_ptr<std::map<string, string>> hive_map;
|
|
112
114
|
|
|
113
115
|
//! when reading multiple parquet files (with union by name option)
|
|
114
116
|
//! TableFunction might return more cols than any single parquet file. Even all parquet files have same
|
|
@@ -344,6 +344,17 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(const FileMetaData
|
|
|
344
344
|
std::move(element_reader));
|
|
345
345
|
}
|
|
346
346
|
|
|
347
|
+
// if this is a hive partition col, we should not read it at all but instead do a constant reader.
|
|
348
|
+
if (parquet_options.hive_partitioning && hive_map && depth == 1) {
|
|
349
|
+
auto lookup = hive_map->find(s_ele.name);
|
|
350
|
+
if (lookup != hive_map->end()) {
|
|
351
|
+
Value val = Value(lookup->second);
|
|
352
|
+
return make_unique<GeneratedConstantColumnReader>(*this, LogicalType::VARCHAR, SchemaElement(),
|
|
353
|
+
next_file_idx++, max_define, max_repeat, val);
|
|
354
|
+
;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
347
358
|
// TODO check return value of derive type or should we only do this on read()
|
|
348
359
|
return ColumnReader::CreateReader(*this, DeriveLogicalType(s_ele), s_ele, next_file_idx++, max_define,
|
|
349
360
|
max_repeat);
|
|
@@ -381,9 +392,7 @@ unique_ptr<ColumnReader> ParquetReader::CreateReader(const duckdb_parquet::forma
|
|
|
381
392
|
}
|
|
382
393
|
|
|
383
394
|
if (parquet_options.hive_partitioning) {
|
|
384
|
-
auto
|
|
385
|
-
|
|
386
|
-
for (auto &partition : res) {
|
|
395
|
+
for (auto &partition : *hive_map) {
|
|
387
396
|
Value val = Value(partition.second);
|
|
388
397
|
root_struct_reader.child_readers.push_back(make_unique<GeneratedConstantColumnReader>(
|
|
389
398
|
*this, LogicalType::VARCHAR, SchemaElement(), next_file_idx, 0, 0, val));
|
|
@@ -439,10 +448,16 @@ void ParquetReader::InitializeSchema(const vector<string> &expected_names, const
|
|
|
439
448
|
|
|
440
449
|
// Add generated constant column for filename
|
|
441
450
|
if (parquet_options.hive_partitioning) {
|
|
442
|
-
auto
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
451
|
+
for (auto &part : *hive_map) {
|
|
452
|
+
// We need to lookup the hive col in the cols of the file to avoid duplicating columns that are both
|
|
453
|
+
// in the file and the hive path
|
|
454
|
+
auto lookup =
|
|
455
|
+
std::find_if(child_types.begin(), child_types.end(),
|
|
456
|
+
[&part](const std::pair<std::string, LogicalType> &x) { return x.first == part.first; });
|
|
457
|
+
if (lookup == child_types.end()) {
|
|
458
|
+
return_types.emplace_back(LogicalType::VARCHAR);
|
|
459
|
+
names.emplace_back(part.first);
|
|
460
|
+
}
|
|
446
461
|
}
|
|
447
462
|
}
|
|
448
463
|
|
|
@@ -541,6 +556,11 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, const
|
|
|
541
556
|
ObjectCache::GetObjectCache(context_p).Put(file_name, metadata);
|
|
542
557
|
}
|
|
543
558
|
}
|
|
559
|
+
|
|
560
|
+
if (parquet_options.hive_partitioning) {
|
|
561
|
+
hive_map = make_unique<std::map<string, string>>(HivePartitioning::Parse(file_name));
|
|
562
|
+
}
|
|
563
|
+
|
|
544
564
|
InitializeSchema(expected_names, expected_types_p, column_ids, initial_filename_p);
|
|
545
565
|
}
|
|
546
566
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
|
2
|
-
#define DUCKDB_VERSION "0.6.2-
|
|
2
|
+
#define DUCKDB_VERSION "0.6.2-dev1218"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
|
5
|
+
#define DUCKDB_SOURCE_ID "e2dfc274b0"
|
|
6
6
|
#endif
|
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
|
8
8
|
#include "duckdb/main/database.hpp"
|