duckdb 0.9.1-dev95.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +250 -0
- package/Makefile +3 -9
- package/README.md +2 -2
- package/binding.gyp +8 -8
- package/package.json +4 -4
- package/scripts/install_node.sh +21 -0
- package/scripts/node_build.sh +40 -0
- package/scripts/node_build_win.sh +21 -0
- package/scripts/node_version.sh +33 -0
- package/src/duckdb/extension/icu/icu-makedate.cpp +1 -1
- package/src/duckdb/extension/icu/icu-strptime.cpp +0 -2
- package/src/duckdb/extension/icu/icu_extension.cpp +0 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +27 -14
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +26 -14
- package/src/duckdb/extension/json/json_functions.cpp +1 -10
- package/src/duckdb/extension/parquet/column_reader.cpp +26 -1
- package/src/duckdb/extension/parquet/column_writer.cpp +10 -1
- package/src/duckdb/extension/parquet/include/column_reader.hpp +2 -0
- package/src/duckdb/extension/parquet/include/parquet_bss_decoder.hpp +49 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +3 -4
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +3 -4
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +2 -2
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +15 -10
- package/src/duckdb/src/common/arrow/appender/struct_data.cpp +2 -2
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +2 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +26 -7
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +3 -3
- package/src/duckdb/src/common/exception.cpp +60 -84
- package/src/duckdb/src/common/preserved_error.cpp +20 -0
- package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
- package/src/duckdb/src/execution/expression_executor/execute_reference.cpp +1 -1
- package/src/duckdb/src/execution/expression_executor_state.cpp +8 -2
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +2 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +5 -5
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +4 -4
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +2 -2
- package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +1 -4
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +2 -4
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +4 -6
- package/src/duckdb/src/function/function_binder.cpp +1 -1
- package/src/duckdb/src/function/table/arrow_conversion.cpp +2 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -4
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +10 -10
- package/src/duckdb/src/include/duckdb/function/replacement_scan.hpp +20 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -2
- package/src/duckdb/src/main/capi/arrow-c.cpp +7 -4
- package/src/duckdb/src/main/config.cpp +14 -0
- package/src/duckdb/src/main/extension/extension_install.cpp +14 -12
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +1 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_distinct.cpp +19 -0
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +4 -2
- package/src/duckdb/src/parser/transform/statement/transform_create_sequence.cpp +10 -5
- package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +5 -7
- package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +4 -2
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +17 -14
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +5 -12
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +15 -1
- package/src/duckdb/src/planner/bound_parameter_map.cpp +16 -5
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -5
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +13 -9
- package/src/duckdb/third_party/parquet/parquet_types.h +2 -1
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
- package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
- package/src/statement.cpp +4 -4
- package/test/arrow.test.ts +3 -1
- package/test/parquet.test.ts +1 -1
- package/test/userdata1.parquet +0 -0
- package/{configure → vendor} +1 -1
- package/{configure.py → vendor.py} +12 -1
- package/duckdb_extension_config.cmake +0 -10
@@ -243,6 +243,7 @@ void ColumnReader::InitializeRead(idx_t row_group_idx_p, const vector<ColumnChun
|
|
243
243
|
void ColumnReader::PrepareRead(parquet_filter_t &filter) {
|
244
244
|
dict_decoder.reset();
|
245
245
|
defined_decoder.reset();
|
246
|
+
bss_decoder.reset();
|
246
247
|
block.reset();
|
247
248
|
PageHeader page_hdr;
|
248
249
|
page_hdr.read(protocol);
|
@@ -443,6 +444,13 @@ void ColumnReader::PrepareDataPage(PageHeader &page_hdr) {
|
|
443
444
|
PrepareDeltaByteArray(*block);
|
444
445
|
break;
|
445
446
|
}
|
447
|
+
case Encoding::BYTE_STREAM_SPLIT: {
|
448
|
+
// Subtract 1 from length as the block is allocated with 1 extra byte,
|
449
|
+
// but the byte stream split encoder needs to know the correct data size.
|
450
|
+
bss_decoder = make_uniq<BssDecoder>(block->ptr, block->len - 1);
|
451
|
+
block->inc(block->len);
|
452
|
+
break;
|
453
|
+
}
|
446
454
|
case Encoding::PLAIN:
|
447
455
|
// nothing to do here, will be read directly below
|
448
456
|
break;
|
@@ -488,7 +496,7 @@ idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr
|
|
488
496
|
|
489
497
|
idx_t null_count = 0;
|
490
498
|
|
491
|
-
if ((dict_decoder || dbp_decoder || rle_decoder) && HasDefines()) {
|
499
|
+
if ((dict_decoder || dbp_decoder || rle_decoder || bss_decoder) && HasDefines()) {
|
492
500
|
// we need the null count because the dictionary offsets have no entries for nulls
|
493
501
|
for (idx_t i = 0; i < read_now; i++) {
|
494
502
|
if (define_out[i + result_offset] != max_define) {
|
@@ -534,6 +542,23 @@ idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr
|
|
534
542
|
} else if (byte_array_data) {
|
535
543
|
// DELTA_BYTE_ARRAY or DELTA_LENGTH_BYTE_ARRAY
|
536
544
|
DeltaByteArray(define_out, read_now, filter, result_offset, result);
|
545
|
+
} else if (bss_decoder) {
|
546
|
+
auto read_buf = make_shared<ResizeableBuffer>();
|
547
|
+
|
548
|
+
switch (schema.type) {
|
549
|
+
case duckdb_parquet::format::Type::FLOAT:
|
550
|
+
read_buf->resize(reader.allocator, sizeof(float) * (read_now - null_count));
|
551
|
+
bss_decoder->GetBatch<float>(read_buf->ptr, read_now - null_count);
|
552
|
+
break;
|
553
|
+
case duckdb_parquet::format::Type::DOUBLE:
|
554
|
+
read_buf->resize(reader.allocator, sizeof(double) * (read_now - null_count));
|
555
|
+
bss_decoder->GetBatch<double>(read_buf->ptr, read_now - null_count);
|
556
|
+
break;
|
557
|
+
default:
|
558
|
+
throw std::runtime_error("BYTE_STREAM_SPLIT encoding is only supported for FLOAT or DOUBLE data");
|
559
|
+
}
|
560
|
+
|
561
|
+
Plain(read_buf, define_out, read_now, filter, result_offset, result);
|
537
562
|
} else {
|
538
563
|
PlainReference(block, result);
|
539
564
|
Plain(block, define_out, read_now, filter, result_offset, result);
|
@@ -796,6 +796,13 @@ struct ParquetTimestampSOperator : public BaseParquetOperator {
|
|
796
796
|
}
|
797
797
|
};
|
798
798
|
|
799
|
+
struct ParquetTimeTZOperator : public BaseParquetOperator {
|
800
|
+
template <class SRC, class TGT>
|
801
|
+
static TGT Operation(SRC input) {
|
802
|
+
return input.time().micros;
|
803
|
+
}
|
804
|
+
};
|
805
|
+
|
799
806
|
struct ParquetHugeintOperator {
|
800
807
|
template <class SRC, class TGT>
|
801
808
|
static TGT Operation(SRC input) {
|
@@ -1975,12 +1982,14 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
|
|
1975
1982
|
max_define, can_have_nulls);
|
1976
1983
|
case LogicalTypeId::BIGINT:
|
1977
1984
|
case LogicalTypeId::TIME:
|
1978
|
-
case LogicalTypeId::TIME_TZ:
|
1979
1985
|
case LogicalTypeId::TIMESTAMP:
|
1980
1986
|
case LogicalTypeId::TIMESTAMP_TZ:
|
1981
1987
|
case LogicalTypeId::TIMESTAMP_MS:
|
1982
1988
|
return make_uniq<StandardColumnWriter<int64_t, int64_t>>(writer, schema_idx, std::move(schema_path), max_repeat,
|
1983
1989
|
max_define, can_have_nulls);
|
1990
|
+
case LogicalTypeId::TIME_TZ:
|
1991
|
+
return make_uniq<StandardColumnWriter<dtime_tz_t, int64_t, ParquetTimeTZOperator>>(
|
1992
|
+
writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
|
1984
1993
|
case LogicalTypeId::HUGEINT:
|
1985
1994
|
return make_uniq<StandardColumnWriter<hugeint_t, double, ParquetHugeintOperator>>(
|
1986
1995
|
writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb.hpp"
|
12
|
+
#include "parquet_bss_decoder.hpp"
|
12
13
|
#include "parquet_dbp_decoder.hpp"
|
13
14
|
#include "parquet_rle_bp_decoder.hpp"
|
14
15
|
#include "parquet_statistics.hpp"
|
@@ -161,6 +162,7 @@ private:
|
|
161
162
|
unique_ptr<RleBpDecoder> repeated_decoder;
|
162
163
|
unique_ptr<DbpDecoder> dbp_decoder;
|
163
164
|
unique_ptr<RleBpDecoder> rle_decoder;
|
165
|
+
unique_ptr<BssDecoder> bss_decoder;
|
164
166
|
|
165
167
|
// dummies for Skip()
|
166
168
|
parquet_filter_t none_filter;
|
@@ -0,0 +1,49 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// parquet_bss_decoder.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
#include "parquet_types.h"
|
11
|
+
#include "resizable_buffer.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
/// Decoder for the Byte Stream Split encoding
|
16
|
+
class BssDecoder {
|
17
|
+
public:
|
18
|
+
/// Create a decoder object. buffer/buffer_len is the encoded data.
|
19
|
+
BssDecoder(data_ptr_t buffer, uint32_t buffer_len) : buffer_(buffer, buffer_len), value_offset_(0) {
|
20
|
+
}
|
21
|
+
|
22
|
+
public:
|
23
|
+
template <typename T>
|
24
|
+
void GetBatch(data_ptr_t values_target_ptr, uint32_t batch_size) {
|
25
|
+
if (buffer_.len % sizeof(T) != 0) {
|
26
|
+
std::stringstream error;
|
27
|
+
error << "Data buffer size for the BYTE_STREAM_SPLIT encoding (" << buffer_.len
|
28
|
+
<< ") should be a multiple of the type size (" << sizeof(T) << ")";
|
29
|
+
throw std::runtime_error(error.str());
|
30
|
+
}
|
31
|
+
uint32_t num_buffer_values = buffer_.len / sizeof(T);
|
32
|
+
|
33
|
+
buffer_.available((value_offset_ + batch_size) * sizeof(T));
|
34
|
+
|
35
|
+
for (uint32_t byte_offset = 0; byte_offset < sizeof(T); ++byte_offset) {
|
36
|
+
data_ptr_t input_bytes = buffer_.ptr + byte_offset * num_buffer_values + value_offset_;
|
37
|
+
for (uint32_t i = 0; i < batch_size; ++i) {
|
38
|
+
values_target_ptr[byte_offset + i * sizeof(T)] = *(input_bytes + i);
|
39
|
+
}
|
40
|
+
}
|
41
|
+
value_offset_ += batch_size;
|
42
|
+
}
|
43
|
+
|
44
|
+
private:
|
45
|
+
ByteBuffer buffer_;
|
46
|
+
uint32_t value_offset_;
|
47
|
+
};
|
48
|
+
|
49
|
+
} // namespace duckdb
|
@@ -20,6 +20,8 @@
|
|
20
20
|
#include "duckdb/common/enums/file_compression_type.hpp"
|
21
21
|
#include "duckdb/common/file_system.hpp"
|
22
22
|
#include "duckdb/common/multi_file_reader.hpp"
|
23
|
+
#include "duckdb/common/serializer/deserializer.hpp"
|
24
|
+
#include "duckdb/common/serializer/serializer.hpp"
|
23
25
|
#include "duckdb/common/types/chunk_collection.hpp"
|
24
26
|
#include "duckdb/function/copy_function.hpp"
|
25
27
|
#include "duckdb/function/table_function.hpp"
|
@@ -34,8 +36,6 @@
|
|
34
36
|
#include "duckdb/planner/operator/logical_get.hpp"
|
35
37
|
#include "duckdb/storage/statistics/base_statistics.hpp"
|
36
38
|
#include "duckdb/storage/table/row_group.hpp"
|
37
|
-
#include "duckdb/common/serializer/serializer.hpp"
|
38
|
-
#include "duckdb/common/serializer/deserializer.hpp"
|
39
39
|
#endif
|
40
40
|
|
41
41
|
namespace duckdb {
|
@@ -983,8 +983,7 @@ idx_t ParquetWriteDesiredBatchSize(ClientContext &context, FunctionData &bind_da
|
|
983
983
|
//===--------------------------------------------------------------------===//
|
984
984
|
unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context, const string &table_name,
|
985
985
|
ReplacementScanData *data) {
|
986
|
-
|
987
|
-
if (!StringUtil::EndsWith(lower_name, ".parquet") && !StringUtil::Contains(lower_name, ".parquet?")) {
|
986
|
+
if (!ReplacementScan::CanReplace(table_name, {"parquet"})) {
|
988
987
|
return nullptr;
|
989
988
|
}
|
990
989
|
auto table_function = make_uniq<TableFunctionRef>();
|
@@ -66,10 +66,9 @@ dtime_t ParquetIntToTimeNs(const int64_t &raw_time) {
|
|
66
66
|
return Time::FromTimeNs(raw_time);
|
67
67
|
}
|
68
68
|
|
69
|
-
dtime_tz_t ParquetIntToTimeTZ(const int64_t &
|
70
|
-
|
71
|
-
|
72
|
-
return result;
|
69
|
+
dtime_tz_t ParquetIntToTimeTZ(const int64_t &raw_micros) {
|
70
|
+
dtime_t t(raw_micros);
|
71
|
+
return dtime_tz_t(t, 0);
|
73
72
|
}
|
74
73
|
|
75
74
|
} // namespace duckdb
|
@@ -69,10 +69,10 @@ void ArrowListData::Finalize(ArrowAppendData &append_data, const LogicalType &ty
|
|
69
69
|
result->buffers[1] = append_data.main_buffer.data();
|
70
70
|
|
71
71
|
auto &child_type = ListType::GetChildType(type);
|
72
|
-
append_data
|
72
|
+
ArrowAppender::AddChildren(append_data, 1);
|
73
73
|
result->children = append_data.child_pointers.data();
|
74
74
|
result->n_children = 1;
|
75
|
-
append_data.
|
75
|
+
append_data.child_arrays[0] = *ArrowAppender::FinalizeChild(child_type, std::move(append_data.child_data[0]));
|
76
76
|
}
|
77
77
|
|
78
78
|
} // namespace duckdb
|
@@ -52,33 +52,38 @@ void ArrowMapData::Append(ArrowAppendData &append_data, Vector &input, idx_t fro
|
|
52
52
|
|
53
53
|
void ArrowMapData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
54
54
|
// set up the main map buffer
|
55
|
+
D_ASSERT(result);
|
55
56
|
result->n_buffers = 2;
|
56
57
|
result->buffers[1] = append_data.main_buffer.data();
|
57
58
|
|
58
59
|
// the main map buffer has a single child: a struct
|
59
|
-
append_data
|
60
|
+
ArrowAppender::AddChildren(append_data, 1);
|
60
61
|
result->children = append_data.child_pointers.data();
|
61
62
|
result->n_children = 1;
|
62
|
-
append_data.child_pointers[0] = ArrowAppender::FinalizeChild(type, *append_data.child_data[0]);
|
63
63
|
|
64
|
-
// now that struct has two children: the key and the value type
|
65
64
|
auto &struct_data = *append_data.child_data[0];
|
66
|
-
auto
|
67
|
-
|
65
|
+
auto struct_result = ArrowAppender::FinalizeChild(type, std::move(append_data.child_data[0]));
|
66
|
+
|
67
|
+
// Initialize the struct array data
|
68
|
+
const auto struct_child_count = 2;
|
69
|
+
ArrowAppender::AddChildren(struct_data, struct_child_count);
|
70
|
+
struct_result->children = struct_data.child_pointers.data();
|
68
71
|
struct_result->n_buffers = 1;
|
69
|
-
struct_result->n_children =
|
72
|
+
struct_result->n_children = struct_child_count;
|
70
73
|
struct_result->length = struct_data.child_data[0]->row_count;
|
71
|
-
|
74
|
+
|
75
|
+
append_data.child_arrays[0] = *struct_result;
|
72
76
|
|
73
77
|
D_ASSERT(struct_data.child_data[0]->row_count == struct_data.child_data[1]->row_count);
|
74
78
|
|
75
79
|
auto &key_type = MapType::KeyType(type);
|
76
80
|
auto &value_type = MapType::ValueType(type);
|
77
|
-
|
78
|
-
struct_data.
|
81
|
+
auto key_data = ArrowAppender::FinalizeChild(key_type, std::move(struct_data.child_data[0]));
|
82
|
+
struct_data.child_arrays[0] = *key_data;
|
83
|
+
struct_data.child_arrays[1] = *ArrowAppender::FinalizeChild(value_type, std::move(struct_data.child_data[1]));
|
79
84
|
|
80
85
|
// keys cannot have null values
|
81
|
-
if (
|
86
|
+
if (key_data->null_count > 0) {
|
82
87
|
throw std::runtime_error("Arrow doesn't accept NULL keys on Maps");
|
83
88
|
}
|
84
89
|
}
|
@@ -33,12 +33,12 @@ void ArrowStructData::Finalize(ArrowAppendData &append_data, const LogicalType &
|
|
33
33
|
result->n_buffers = 1;
|
34
34
|
|
35
35
|
auto &child_types = StructType::GetChildTypes(type);
|
36
|
-
append_data
|
36
|
+
ArrowAppender::AddChildren(append_data, child_types.size());
|
37
37
|
result->children = append_data.child_pointers.data();
|
38
38
|
result->n_children = child_types.size();
|
39
39
|
for (idx_t i = 0; i < child_types.size(); i++) {
|
40
40
|
auto &child_type = child_types[i].second;
|
41
|
-
append_data.
|
41
|
+
append_data.child_arrays[i] = *ArrowAppender::FinalizeChild(child_type, std::move(append_data.child_data[i]));
|
42
42
|
}
|
43
43
|
}
|
44
44
|
|
@@ -58,12 +58,12 @@ void ArrowUnionData::Finalize(ArrowAppendData &append_data, const LogicalType &t
|
|
58
58
|
result->buffers[1] = append_data.main_buffer.data();
|
59
59
|
|
60
60
|
auto &child_types = UnionType::CopyMemberTypes(type);
|
61
|
-
append_data
|
61
|
+
ArrowAppender::AddChildren(append_data, child_types.size());
|
62
62
|
result->children = append_data.child_pointers.data();
|
63
63
|
result->n_children = child_types.size();
|
64
64
|
for (idx_t i = 0; i < child_types.size(); i++) {
|
65
65
|
auto &child_type = child_types[i].second;
|
66
|
-
append_data.
|
66
|
+
append_data.child_arrays[i] = *ArrowAppender::FinalizeChild(child_type, std::move(append_data.child_data[i]));
|
67
67
|
}
|
68
68
|
}
|
69
69
|
|
@@ -39,18 +39,31 @@ void ArrowAppender::ReleaseArray(ArrowArray *array) {
|
|
39
39
|
if (!array || !array->release) {
|
40
40
|
return;
|
41
41
|
}
|
42
|
-
array->release = nullptr;
|
43
42
|
auto holder = static_cast<ArrowAppendData *>(array->private_data);
|
43
|
+
for (int64_t i = 0; i < array->n_children; i++) {
|
44
|
+
auto child = array->children[i];
|
45
|
+
if (!child->release) {
|
46
|
+
// Child was moved out of the array
|
47
|
+
continue;
|
48
|
+
}
|
49
|
+
child->release(child);
|
50
|
+
D_ASSERT(!child->release);
|
51
|
+
}
|
52
|
+
if (array->dictionary && array->dictionary->release) {
|
53
|
+
array->dictionary->release(array->dictionary);
|
54
|
+
}
|
55
|
+
array->release = nullptr;
|
44
56
|
delete holder;
|
45
57
|
}
|
46
58
|
|
47
59
|
//===--------------------------------------------------------------------===//
|
48
60
|
// Finalize Arrow Child
|
49
61
|
//===--------------------------------------------------------------------===//
|
50
|
-
ArrowArray *ArrowAppender::FinalizeChild(const LogicalType &type, ArrowAppendData
|
62
|
+
ArrowArray *ArrowAppender::FinalizeChild(const LogicalType &type, unique_ptr<ArrowAppendData> append_data_p) {
|
51
63
|
auto result = make_uniq<ArrowArray>();
|
52
64
|
|
53
|
-
|
65
|
+
auto &append_data = *append_data_p;
|
66
|
+
result->private_data = append_data_p.release();
|
54
67
|
result->release = ArrowAppender::ReleaseArray;
|
55
68
|
result->n_children = 0;
|
56
69
|
result->null_count = 0;
|
@@ -75,7 +88,7 @@ ArrowArray ArrowAppender::Finalize() {
|
|
75
88
|
auto root_holder = make_uniq<ArrowAppendData>(options);
|
76
89
|
|
77
90
|
ArrowArray result;
|
78
|
-
root_holder
|
91
|
+
AddChildren(*root_holder, types.size());
|
79
92
|
result.children = root_holder->child_pointers.data();
|
80
93
|
result.n_children = types.size();
|
81
94
|
|
@@ -88,10 +101,8 @@ ArrowArray ArrowAppender::Finalize() {
|
|
88
101
|
result.dictionary = nullptr;
|
89
102
|
root_holder->child_data = std::move(root_data);
|
90
103
|
|
91
|
-
// FIXME: this violates a property of the arrow format, if root owns all the child memory then consumers can't move
|
92
|
-
// child arrays https://arrow.apache.org/docs/format/CDataInterface.html#moving-child-arrays
|
93
104
|
for (idx_t i = 0; i < root_holder->child_data.size(); i++) {
|
94
|
-
root_holder->
|
105
|
+
root_holder->child_arrays[i] = *ArrowAppender::FinalizeChild(types[i], std::move(root_holder->child_data[i]));
|
95
106
|
}
|
96
107
|
|
97
108
|
// Release ownership to caller
|
@@ -238,4 +249,12 @@ unique_ptr<ArrowAppendData> ArrowAppender::InitializeChild(const LogicalType &ty
|
|
238
249
|
return result;
|
239
250
|
}
|
240
251
|
|
252
|
+
void ArrowAppender::AddChildren(ArrowAppendData &data, idx_t count) {
|
253
|
+
data.child_pointers.resize(count);
|
254
|
+
data.child_arrays.resize(count);
|
255
|
+
for (idx_t i = 0; i < count; i++) {
|
256
|
+
data.child_pointers[i] = &data.child_arrays[i];
|
257
|
+
}
|
258
|
+
}
|
259
|
+
|
241
260
|
} // namespace duckdb
|
@@ -16,21 +16,21 @@ namespace duckdb {
|
|
16
16
|
ArrowSchemaWrapper::~ArrowSchemaWrapper() {
|
17
17
|
if (arrow_schema.release) {
|
18
18
|
arrow_schema.release(&arrow_schema);
|
19
|
-
arrow_schema.release
|
19
|
+
D_ASSERT(!arrow_schema.release);
|
20
20
|
}
|
21
21
|
}
|
22
22
|
|
23
23
|
ArrowArrayWrapper::~ArrowArrayWrapper() {
|
24
24
|
if (arrow_array.release) {
|
25
25
|
arrow_array.release(&arrow_array);
|
26
|
-
arrow_array.release
|
26
|
+
D_ASSERT(!arrow_array.release);
|
27
27
|
}
|
28
28
|
}
|
29
29
|
|
30
30
|
ArrowArrayStreamWrapper::~ArrowArrayStreamWrapper() {
|
31
31
|
if (arrow_array_stream.release) {
|
32
32
|
arrow_array_stream.release(&arrow_array_stream);
|
33
|
-
arrow_array_stream.release
|
33
|
+
D_ASSERT(!arrow_array_stream.release);
|
34
34
|
}
|
35
35
|
}
|
36
36
|
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#include "duckdb/common/exception.hpp"
|
2
|
-
|
3
2
|
#include "duckdb/common/string_util.hpp"
|
4
3
|
#include "duckdb/common/to_string.hpp"
|
5
4
|
#include "duckdb/common/types.hpp"
|
@@ -82,91 +81,68 @@ string Exception::ConstructMessageRecursive(const string &msg, std::vector<Excep
|
|
82
81
|
return ExceptionFormatValue::Format(msg, values);
|
83
82
|
}
|
84
83
|
|
84
|
+
struct ExceptionEntry {
|
85
|
+
ExceptionType type;
|
86
|
+
char text[48];
|
87
|
+
};
|
88
|
+
|
89
|
+
static constexpr ExceptionEntry EXCEPTION_MAP[] = {{ExceptionType::INVALID, "Invalid"},
|
90
|
+
{ExceptionType::OUT_OF_RANGE, "Out of Range"},
|
91
|
+
{ExceptionType::CONVERSION, "Conversion"},
|
92
|
+
{ExceptionType::UNKNOWN_TYPE, "Unknown Type"},
|
93
|
+
{ExceptionType::DECIMAL, "Decimal"},
|
94
|
+
{ExceptionType::MISMATCH_TYPE, "Mismatch Type"},
|
95
|
+
{ExceptionType::DIVIDE_BY_ZERO, "Divide by Zero"},
|
96
|
+
{ExceptionType::OBJECT_SIZE, "Object Size"},
|
97
|
+
{ExceptionType::INVALID_TYPE, "Invalid type"},
|
98
|
+
{ExceptionType::SERIALIZATION, "Serialization"},
|
99
|
+
{ExceptionType::TRANSACTION, "TransactionContext"},
|
100
|
+
{ExceptionType::NOT_IMPLEMENTED, "Not implemented"},
|
101
|
+
{ExceptionType::EXPRESSION, "Expression"},
|
102
|
+
{ExceptionType::CATALOG, "Catalog"},
|
103
|
+
{ExceptionType::PARSER, "Parser"},
|
104
|
+
{ExceptionType::BINDER, "Binder"},
|
105
|
+
{ExceptionType::PLANNER, "Planner"},
|
106
|
+
{ExceptionType::SCHEDULER, "Scheduler"},
|
107
|
+
{ExceptionType::EXECUTOR, "Executor"},
|
108
|
+
{ExceptionType::CONSTRAINT, "Constraint"},
|
109
|
+
{ExceptionType::INDEX, "Index"},
|
110
|
+
{ExceptionType::STAT, "Stat"},
|
111
|
+
{ExceptionType::CONNECTION, "Connection"},
|
112
|
+
{ExceptionType::SYNTAX, "Syntax"},
|
113
|
+
{ExceptionType::SETTINGS, "Settings"},
|
114
|
+
{ExceptionType::OPTIMIZER, "Optimizer"},
|
115
|
+
{ExceptionType::NULL_POINTER, "NullPointer"},
|
116
|
+
{ExceptionType::IO, "IO"},
|
117
|
+
{ExceptionType::INTERRUPT, "INTERRUPT"},
|
118
|
+
{ExceptionType::FATAL, "FATAL"},
|
119
|
+
{ExceptionType::INTERNAL, "INTERNAL"},
|
120
|
+
{ExceptionType::INVALID_INPUT, "Invalid Input"},
|
121
|
+
{ExceptionType::OUT_OF_MEMORY, "Out of Memory"},
|
122
|
+
{ExceptionType::PERMISSION, "Permission"},
|
123
|
+
{ExceptionType::PARAMETER_NOT_RESOLVED, "Parameter Not Resolved"},
|
124
|
+
{ExceptionType::PARAMETER_NOT_ALLOWED, "Parameter Not Allowed"},
|
125
|
+
{ExceptionType::DEPENDENCY, "Dependency"},
|
126
|
+
{ExceptionType::MISSING_EXTENSION, "Missing Extension"},
|
127
|
+
{ExceptionType::HTTP, "HTTP"},
|
128
|
+
{ExceptionType::AUTOLOAD, "Extension Autoloading"}};
|
129
|
+
|
85
130
|
string Exception::ExceptionTypeToString(ExceptionType type) {
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
case ExceptionType::DIVIDE_BY_ZERO:
|
100
|
-
return "Divide by Zero";
|
101
|
-
case ExceptionType::OBJECT_SIZE:
|
102
|
-
return "Object Size";
|
103
|
-
case ExceptionType::INVALID_TYPE:
|
104
|
-
return "Invalid type";
|
105
|
-
case ExceptionType::SERIALIZATION:
|
106
|
-
return "Serialization";
|
107
|
-
case ExceptionType::TRANSACTION:
|
108
|
-
return "TransactionContext";
|
109
|
-
case ExceptionType::NOT_IMPLEMENTED:
|
110
|
-
return "Not implemented";
|
111
|
-
case ExceptionType::EXPRESSION:
|
112
|
-
return "Expression";
|
113
|
-
case ExceptionType::CATALOG:
|
114
|
-
return "Catalog";
|
115
|
-
case ExceptionType::PARSER:
|
116
|
-
return "Parser";
|
117
|
-
case ExceptionType::BINDER:
|
118
|
-
return "Binder";
|
119
|
-
case ExceptionType::PLANNER:
|
120
|
-
return "Planner";
|
121
|
-
case ExceptionType::SCHEDULER:
|
122
|
-
return "Scheduler";
|
123
|
-
case ExceptionType::EXECUTOR:
|
124
|
-
return "Executor";
|
125
|
-
case ExceptionType::CONSTRAINT:
|
126
|
-
return "Constraint";
|
127
|
-
case ExceptionType::INDEX:
|
128
|
-
return "Index";
|
129
|
-
case ExceptionType::STAT:
|
130
|
-
return "Stat";
|
131
|
-
case ExceptionType::CONNECTION:
|
132
|
-
return "Connection";
|
133
|
-
case ExceptionType::SYNTAX:
|
134
|
-
return "Syntax";
|
135
|
-
case ExceptionType::SETTINGS:
|
136
|
-
return "Settings";
|
137
|
-
case ExceptionType::OPTIMIZER:
|
138
|
-
return "Optimizer";
|
139
|
-
case ExceptionType::NULL_POINTER:
|
140
|
-
return "NullPointer";
|
141
|
-
case ExceptionType::IO:
|
142
|
-
return "IO";
|
143
|
-
case ExceptionType::INTERRUPT:
|
144
|
-
return "INTERRUPT";
|
145
|
-
case ExceptionType::FATAL:
|
146
|
-
return "FATAL";
|
147
|
-
case ExceptionType::INTERNAL:
|
148
|
-
return "INTERNAL";
|
149
|
-
case ExceptionType::INVALID_INPUT:
|
150
|
-
return "Invalid Input";
|
151
|
-
case ExceptionType::OUT_OF_MEMORY:
|
152
|
-
return "Out of Memory";
|
153
|
-
case ExceptionType::PERMISSION:
|
154
|
-
return "Permission";
|
155
|
-
case ExceptionType::PARAMETER_NOT_RESOLVED:
|
156
|
-
return "Parameter Not Resolved";
|
157
|
-
case ExceptionType::PARAMETER_NOT_ALLOWED:
|
158
|
-
return "Parameter Not Allowed";
|
159
|
-
case ExceptionType::DEPENDENCY:
|
160
|
-
return "Dependency";
|
161
|
-
case ExceptionType::MISSING_EXTENSION:
|
162
|
-
return "Missing Extension";
|
163
|
-
case ExceptionType::HTTP:
|
164
|
-
return "HTTP";
|
165
|
-
case ExceptionType::AUTOLOAD:
|
166
|
-
return "Extension Autoloading";
|
167
|
-
default:
|
168
|
-
return "Unknown";
|
131
|
+
for (auto &e : EXCEPTION_MAP) {
|
132
|
+
if (e.type == type) {
|
133
|
+
return e.text;
|
134
|
+
}
|
135
|
+
}
|
136
|
+
return "Unknown";
|
137
|
+
}
|
138
|
+
|
139
|
+
ExceptionType Exception::StringToExceptionType(const string &type) {
|
140
|
+
for (auto &e : EXCEPTION_MAP) {
|
141
|
+
if (e.text == type) {
|
142
|
+
return e.type;
|
143
|
+
}
|
169
144
|
}
|
145
|
+
return ExceptionType::INVALID;
|
170
146
|
}
|
171
147
|
|
172
148
|
const HTTPException &Exception::AsHTTPException() const {
|
@@ -18,6 +18,26 @@ PreservedError::PreservedError(const Exception &exception)
|
|
18
18
|
PreservedError::PreservedError(const string &message)
|
19
19
|
: initialized(true), type(ExceptionType::INVALID), raw_message(SanitizeErrorMessage(message)),
|
20
20
|
exception_instance(nullptr) {
|
21
|
+
// Given a message in the form: xxxxx Error: yyyyy
|
22
|
+
// Try to match xxxxxxx with known error so to potentially reconstruct the original error type
|
23
|
+
auto position_semicolon = raw_message.find(':');
|
24
|
+
if (position_semicolon == std::string::npos) {
|
25
|
+
// Semicolon not found, bail out
|
26
|
+
return;
|
27
|
+
}
|
28
|
+
if (position_semicolon + 2 >= raw_message.size()) {
|
29
|
+
// Not enough characters afterward, bail out
|
30
|
+
return;
|
31
|
+
}
|
32
|
+
string err = raw_message.substr(0, position_semicolon);
|
33
|
+
string msg = raw_message.substr(position_semicolon + 2);
|
34
|
+
if (err.size() > 6 && err.substr(err.size() - 6) == " Error" && !msg.empty()) {
|
35
|
+
ExceptionType new_type = Exception::StringToExceptionType(err.substr(0, err.size() - 6));
|
36
|
+
if (new_type != type) {
|
37
|
+
type = new_type;
|
38
|
+
raw_message = msg;
|
39
|
+
}
|
40
|
+
}
|
21
41
|
}
|
22
42
|
|
23
43
|
const string &PreservedError::Message() {
|
@@ -6,7 +6,7 @@ namespace duckdb {
|
|
6
6
|
unique_ptr<ExpressionState> ExpressionExecutor::InitializeState(const BoundReferenceExpression &expr,
|
7
7
|
ExpressionExecutorState &root) {
|
8
8
|
auto result = make_uniq<ExpressionState>(expr, root);
|
9
|
-
result->Finalize();
|
9
|
+
result->Finalize(true);
|
10
10
|
return result;
|
11
11
|
}
|
12
12
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "duckdb/execution/expression_executor_state.hpp"
|
2
|
+
|
2
3
|
#include "duckdb/execution/expression_executor.hpp"
|
3
4
|
#include "duckdb/planner/expression.hpp"
|
4
5
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
@@ -10,8 +11,13 @@ void ExpressionState::AddChild(Expression *expr) {
|
|
10
11
|
child_states.push_back(ExpressionExecutor::InitializeState(*expr, root));
|
11
12
|
}
|
12
13
|
|
13
|
-
void ExpressionState::Finalize() {
|
14
|
-
if (
|
14
|
+
void ExpressionState::Finalize(bool empty) {
|
15
|
+
if (types.empty()) {
|
16
|
+
return;
|
17
|
+
}
|
18
|
+
if (empty) {
|
19
|
+
intermediate_chunk.InitializeEmpty(types);
|
20
|
+
} else {
|
15
21
|
intermediate_chunk.Initialize(GetAllocator(), types);
|
16
22
|
}
|
17
23
|
}
|
@@ -192,6 +192,7 @@ void BufferedCSVReader::ParseCSV(ParserMode mode) {
|
|
192
192
|
}
|
193
193
|
|
194
194
|
bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_chunk, string &error_message) {
|
195
|
+
cached_buffers.clear();
|
195
196
|
mode = parser_mode;
|
196
197
|
// used for parsing algorithm
|
197
198
|
bool finished_chunk = false;
|
@@ -427,7 +428,6 @@ final_state:
|
|
427
428
|
Flush(insert_chunk);
|
428
429
|
}
|
429
430
|
|
430
|
-
end_of_file_reached = true;
|
431
431
|
return true;
|
432
432
|
}
|
433
433
|
|
@@ -70,6 +70,8 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
|
|
70
70
|
// 8) Empty Line State
|
71
71
|
transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>('\r')] = CSVState::EMPTY_LINE;
|
72
72
|
transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>('\n')] = CSVState::EMPTY_LINE;
|
73
|
+
transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>(state_machine_options.delimiter)] = CSVState::DELIMITER;
|
74
|
+
transition_array[CSVState::EMPTY_LINE][static_cast<uint8_t>(state_machine_options.quote)] = CSVState::QUOTED;
|
73
75
|
}
|
74
76
|
|
75
77
|
CSVStateMachineCache::CSVStateMachineCache() {
|