duckdb 0.8.2-dev2044.0 → 0.8.2-dev2090.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/configure.py +7 -2
- package/package.json +1 -1
- package/src/duckdb/src/common/adbc/adbc.cpp +18 -4
- package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
- package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +89 -727
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +2 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/assert.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -3
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -4
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +3 -76
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +13 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +9 -0
- package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
@@ -179,7 +179,8 @@ bool ArrowUtil::TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray
|
|
179
179
|
auto ¤t_chunk = result->current_chunk;
|
180
180
|
if (current_chunk.Valid()) {
|
181
181
|
// We start by scanning the non-finished current chunk
|
182
|
-
|
182
|
+
// Limit the amount we're fetching to the chunk_size
|
183
|
+
idx_t cur_consumption = MinValue<idx_t>(current_chunk.RemainingSize(), chunk_size);
|
183
184
|
count += cur_consumption;
|
184
185
|
appender.Append(*current_chunk.data_chunk, current_chunk.position, current_chunk.position + cur_consumption,
|
185
186
|
current_chunk.data_chunk->size());
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev2090"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "8d8f9f1b79"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -0,0 +1,109 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/types.hpp"
|
4
|
+
#include "duckdb/common/types/vector.hpp"
|
5
|
+
#include "duckdb/common/arrow/arrow.hpp"
|
6
|
+
#include "duckdb/common/arrow/arrow_buffer.hpp"
|
7
|
+
#include "duckdb/common/arrow/arrow_options.hpp"
|
8
|
+
#include "duckdb/common/array.hpp"
|
9
|
+
|
10
|
+
namespace duckdb {
|
11
|
+
|
12
|
+
//===--------------------------------------------------------------------===//
|
13
|
+
// Arrow append data
|
14
|
+
//===--------------------------------------------------------------------===//
|
15
|
+
typedef void (*initialize_t)(ArrowAppendData &result, const LogicalType &type, idx_t capacity);
|
16
|
+
// append_data: The arrow array we're appending into
|
17
|
+
// input: The data we're appending
|
18
|
+
// from: The offset into the input we're scanning
|
19
|
+
// to: The last index of the input we're scanning
|
20
|
+
// input_size: The total size of the 'input' Vector.
|
21
|
+
typedef void (*append_vector_t)(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size);
|
22
|
+
typedef void (*finalize_t)(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result);
|
23
|
+
|
24
|
+
// This struct is used to save state for appending a column
|
25
|
+
// afterwards the ownership is passed to the arrow array, as 'private_data'
|
26
|
+
// FIXME: we should separate the append state variables from the variables required by the ArrowArray into
|
27
|
+
// ArrowAppendState
|
28
|
+
struct ArrowAppendData {
|
29
|
+
explicit ArrowAppendData(ArrowOptions &options_p) : options(options_p) {
|
30
|
+
}
|
31
|
+
// the buffers of the arrow vector
|
32
|
+
ArrowBuffer validity;
|
33
|
+
ArrowBuffer main_buffer;
|
34
|
+
ArrowBuffer aux_buffer;
|
35
|
+
|
36
|
+
idx_t row_count = 0;
|
37
|
+
idx_t null_count = 0;
|
38
|
+
|
39
|
+
// function pointers for construction
|
40
|
+
initialize_t initialize = nullptr;
|
41
|
+
append_vector_t append_vector = nullptr;
|
42
|
+
finalize_t finalize = nullptr;
|
43
|
+
|
44
|
+
// child data (if any)
|
45
|
+
vector<unique_ptr<ArrowAppendData>> child_data;
|
46
|
+
|
47
|
+
// the arrow array C API data, only set after Finalize
|
48
|
+
unique_ptr<ArrowArray> array;
|
49
|
+
duckdb::array<const void *, 3> buffers = {{nullptr, nullptr, nullptr}};
|
50
|
+
vector<ArrowArray *> child_pointers;
|
51
|
+
|
52
|
+
ArrowOptions options;
|
53
|
+
};
|
54
|
+
|
55
|
+
//===--------------------------------------------------------------------===//
|
56
|
+
// Append Helper Functions
|
57
|
+
//===--------------------------------------------------------------------===//
|
58
|
+
static void GetBitPosition(idx_t row_idx, idx_t ¤t_byte, uint8_t ¤t_bit) {
|
59
|
+
current_byte = row_idx / 8;
|
60
|
+
current_bit = row_idx % 8;
|
61
|
+
}
|
62
|
+
|
63
|
+
static void UnsetBit(uint8_t *data, idx_t current_byte, uint8_t current_bit) {
|
64
|
+
data[current_byte] &= ~((uint64_t)1 << current_bit);
|
65
|
+
}
|
66
|
+
|
67
|
+
static void NextBit(idx_t ¤t_byte, uint8_t ¤t_bit) {
|
68
|
+
current_bit++;
|
69
|
+
if (current_bit == 8) {
|
70
|
+
current_byte++;
|
71
|
+
current_bit = 0;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
static void ResizeValidity(ArrowBuffer &buffer, idx_t row_count) {
|
76
|
+
auto byte_count = (row_count + 7) / 8;
|
77
|
+
buffer.resize(byte_count, 0xFF);
|
78
|
+
}
|
79
|
+
|
80
|
+
static void SetNull(ArrowAppendData &append_data, uint8_t *validity_data, idx_t current_byte, uint8_t current_bit) {
|
81
|
+
UnsetBit(validity_data, current_byte, current_bit);
|
82
|
+
append_data.null_count++;
|
83
|
+
}
|
84
|
+
|
85
|
+
static void AppendValidity(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t from, idx_t to) {
|
86
|
+
// resize the buffer, filling the validity buffer with all valid values
|
87
|
+
idx_t size = to - from;
|
88
|
+
ResizeValidity(append_data.validity, append_data.row_count + size);
|
89
|
+
if (format.validity.AllValid()) {
|
90
|
+
// if all values are valid we don't need to do anything else
|
91
|
+
return;
|
92
|
+
}
|
93
|
+
|
94
|
+
// otherwise we iterate through the validity mask
|
95
|
+
auto validity_data = (uint8_t *)append_data.validity.data();
|
96
|
+
uint8_t current_bit;
|
97
|
+
idx_t current_byte;
|
98
|
+
GetBitPosition(append_data.row_count, current_byte, current_bit);
|
99
|
+
for (idx_t i = from; i < to; i++) {
|
100
|
+
auto source_idx = format.sel->get_index(i);
|
101
|
+
// append the validity mask
|
102
|
+
if (!format.validity.RowIsValid(source_idx)) {
|
103
|
+
SetNull(append_data, validity_data, current_byte, current_bit);
|
104
|
+
}
|
105
|
+
NextBit(current_byte, current_bit);
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
} // namespace duckdb
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/arrow/appender/append_data.hpp"
|
4
|
+
#include "duckdb/common/types/vector.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
struct ArrowBoolData {
|
9
|
+
public:
|
10
|
+
static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity);
|
11
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size);
|
12
|
+
static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result);
|
13
|
+
};
|
14
|
+
|
15
|
+
} // namespace duckdb
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/arrow/appender/append_data.hpp"
|
4
|
+
#include "duckdb/common/arrow/appender/scalar_data.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
//===--------------------------------------------------------------------===//
|
9
|
+
// Enums
|
10
|
+
//===--------------------------------------------------------------------===//
|
11
|
+
template <class TGT>
|
12
|
+
struct ArrowEnumData : public ArrowScalarBaseData<TGT> {
|
13
|
+
static idx_t GetLength(string_t input) {
|
14
|
+
return input.GetSize();
|
15
|
+
}
|
16
|
+
static void WriteData(data_ptr_t target, string_t input) {
|
17
|
+
memcpy(target, input.GetData(), input.GetSize());
|
18
|
+
}
|
19
|
+
static void EnumAppendVector(ArrowAppendData &append_data, const Vector &input, idx_t size) {
|
20
|
+
D_ASSERT(input.GetVectorType() == VectorType::FLAT_VECTOR);
|
21
|
+
|
22
|
+
// resize the validity mask and set up the validity buffer for iteration
|
23
|
+
ResizeValidity(append_data.validity, append_data.row_count + size);
|
24
|
+
|
25
|
+
// resize the offset buffer - the offset buffer holds the offsets into the child array
|
26
|
+
append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(uint32_t) * (size + 1));
|
27
|
+
auto data = FlatVector::GetData<string_t>(input);
|
28
|
+
auto offset_data = append_data.main_buffer.GetData<uint32_t>();
|
29
|
+
if (append_data.row_count == 0) {
|
30
|
+
// first entry
|
31
|
+
offset_data[0] = 0;
|
32
|
+
}
|
33
|
+
// now append the string data to the auxiliary buffer
|
34
|
+
// the auxiliary buffer's length depends on the string lengths, so we resize as required
|
35
|
+
auto last_offset = offset_data[append_data.row_count];
|
36
|
+
for (idx_t i = 0; i < size; i++) {
|
37
|
+
auto offset_idx = append_data.row_count + i + 1;
|
38
|
+
|
39
|
+
auto string_length = GetLength(data[i]);
|
40
|
+
|
41
|
+
// append the offset data
|
42
|
+
auto current_offset = last_offset + string_length;
|
43
|
+
offset_data[offset_idx] = current_offset;
|
44
|
+
|
45
|
+
// resize the string buffer if required, and write the string data
|
46
|
+
append_data.aux_buffer.resize(current_offset);
|
47
|
+
WriteData(append_data.aux_buffer.data() + last_offset, data[i]);
|
48
|
+
|
49
|
+
last_offset = current_offset;
|
50
|
+
}
|
51
|
+
append_data.row_count += size;
|
52
|
+
}
|
53
|
+
static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
|
54
|
+
result.main_buffer.reserve(capacity * sizeof(TGT));
|
55
|
+
// construct the enum child data
|
56
|
+
auto enum_data = ArrowAppender::InitializeChild(LogicalType::VARCHAR, EnumType::GetSize(type), result.options);
|
57
|
+
EnumAppendVector(*enum_data, EnumType::GetValuesInsertOrder(type), EnumType::GetSize(type));
|
58
|
+
result.child_data.push_back(std::move(enum_data));
|
59
|
+
}
|
60
|
+
|
61
|
+
static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
62
|
+
result->n_buffers = 2;
|
63
|
+
result->buffers[1] = append_data.main_buffer.data();
|
64
|
+
// finalize the enum child data, and assign it to the dictionary
|
65
|
+
result->dictionary = ArrowAppender::FinalizeChild(LogicalType::VARCHAR, *append_data.child_data[0]);
|
66
|
+
}
|
67
|
+
};
|
68
|
+
|
69
|
+
} // namespace duckdb
|
@@ -0,0 +1,8 @@
|
|
1
|
+
#include "duckdb/common/arrow/appender/bool_data.hpp"
|
2
|
+
#include "duckdb/common/arrow/appender/enum_data.hpp"
|
3
|
+
#include "duckdb/common/arrow/appender/list_data.hpp"
|
4
|
+
#include "duckdb/common/arrow/appender/map_data.hpp"
|
5
|
+
#include "duckdb/common/arrow/appender/scalar_data.hpp"
|
6
|
+
#include "duckdb/common/arrow/appender/struct_data.hpp"
|
7
|
+
#include "duckdb/common/arrow/appender/union_data.hpp"
|
8
|
+
#include "duckdb/common/arrow/appender/varchar_data.hpp"
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/arrow/appender/append_data.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
struct ArrowListData {
|
8
|
+
public:
|
9
|
+
static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity);
|
10
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size);
|
11
|
+
static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result);
|
12
|
+
|
13
|
+
public:
|
14
|
+
static void AppendOffsets(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t from, idx_t to,
|
15
|
+
vector<sel_t> &child_sel);
|
16
|
+
};
|
17
|
+
|
18
|
+
} // namespace duckdb
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/arrow/arrow_appender.hpp"
|
4
|
+
#include "duckdb/common/arrow/appender/append_data.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
//===--------------------------------------------------------------------===//
|
9
|
+
// Maps
|
10
|
+
//===--------------------------------------------------------------------===//
|
11
|
+
struct ArrowMapData {
|
12
|
+
public:
|
13
|
+
static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity);
|
14
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size);
|
15
|
+
static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result);
|
16
|
+
};
|
17
|
+
|
18
|
+
} // namespace duckdb
|
@@ -0,0 +1,88 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/arrow/appender/append_data.hpp"
|
4
|
+
#include "duckdb/function/table/arrow.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
//===--------------------------------------------------------------------===//
|
9
|
+
// Scalar Types
|
10
|
+
//===--------------------------------------------------------------------===//
|
11
|
+
struct ArrowScalarConverter {
|
12
|
+
template <class TGT, class SRC>
|
13
|
+
static TGT Operation(SRC input) {
|
14
|
+
return input;
|
15
|
+
}
|
16
|
+
|
17
|
+
static bool SkipNulls() {
|
18
|
+
return false;
|
19
|
+
}
|
20
|
+
|
21
|
+
template <class TGT>
|
22
|
+
static void SetNull(TGT &value) {
|
23
|
+
}
|
24
|
+
};
|
25
|
+
|
26
|
+
struct ArrowIntervalConverter {
|
27
|
+
template <class TGT, class SRC>
|
28
|
+
static TGT Operation(SRC input) {
|
29
|
+
ArrowInterval result;
|
30
|
+
result.months = input.months;
|
31
|
+
result.days = input.days;
|
32
|
+
result.nanoseconds = input.micros * Interval::NANOS_PER_MICRO;
|
33
|
+
return result;
|
34
|
+
}
|
35
|
+
|
36
|
+
static bool SkipNulls() {
|
37
|
+
return true;
|
38
|
+
}
|
39
|
+
|
40
|
+
template <class TGT>
|
41
|
+
static void SetNull(TGT &value) {
|
42
|
+
}
|
43
|
+
};
|
44
|
+
|
45
|
+
template <class TGT, class SRC = TGT, class OP = ArrowScalarConverter>
|
46
|
+
struct ArrowScalarBaseData {
|
47
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
48
|
+
D_ASSERT(to >= from);
|
49
|
+
idx_t size = to - from;
|
50
|
+
D_ASSERT(size <= input_size);
|
51
|
+
UnifiedVectorFormat format;
|
52
|
+
input.ToUnifiedFormat(input_size, format);
|
53
|
+
|
54
|
+
// append the validity mask
|
55
|
+
AppendValidity(append_data, format, from, to);
|
56
|
+
|
57
|
+
// append the main data
|
58
|
+
append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(TGT) * size);
|
59
|
+
auto data = UnifiedVectorFormat::GetData<SRC>(format);
|
60
|
+
auto result_data = append_data.main_buffer.GetData<TGT>();
|
61
|
+
|
62
|
+
for (idx_t i = from; i < to; i++) {
|
63
|
+
auto source_idx = format.sel->get_index(i);
|
64
|
+
auto result_idx = append_data.row_count + i - from;
|
65
|
+
|
66
|
+
if (OP::SkipNulls() && !format.validity.RowIsValid(source_idx)) {
|
67
|
+
OP::template SetNull<TGT>(result_data[result_idx]);
|
68
|
+
continue;
|
69
|
+
}
|
70
|
+
result_data[result_idx] = OP::template Operation<TGT, SRC>(data[source_idx]);
|
71
|
+
}
|
72
|
+
append_data.row_count += size;
|
73
|
+
}
|
74
|
+
};
|
75
|
+
|
76
|
+
template <class TGT, class SRC = TGT, class OP = ArrowScalarConverter>
|
77
|
+
struct ArrowScalarData : public ArrowScalarBaseData<TGT, SRC, OP> {
|
78
|
+
static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
|
79
|
+
result.main_buffer.reserve(capacity * sizeof(TGT));
|
80
|
+
}
|
81
|
+
|
82
|
+
static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
83
|
+
result->n_buffers = 2;
|
84
|
+
result->buffers[1] = append_data.main_buffer.data();
|
85
|
+
}
|
86
|
+
};
|
87
|
+
|
88
|
+
} // namespace duckdb
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/arrow/appender/append_data.hpp"
|
4
|
+
#include "duckdb/common/arrow/appender/scalar_data.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
//===--------------------------------------------------------------------===//
|
9
|
+
// Structs
|
10
|
+
//===--------------------------------------------------------------------===//
|
11
|
+
struct ArrowStructData {
|
12
|
+
public:
|
13
|
+
static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity);
|
14
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size);
|
15
|
+
static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result);
|
16
|
+
};
|
17
|
+
|
18
|
+
} // namespace duckdb
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/arrow/appender/append_data.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
//===--------------------------------------------------------------------===//
|
8
|
+
// Unions
|
9
|
+
//===--------------------------------------------------------------------===//
|
10
|
+
/**
|
11
|
+
* Based on https://arrow.apache.org/docs/format/Columnar.html#union-layout &
|
12
|
+
* https://arrow.apache.org/docs/format/CDataInterface.html
|
13
|
+
*/
|
14
|
+
struct ArrowUnionData {
|
15
|
+
public:
|
16
|
+
static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity);
|
17
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size);
|
18
|
+
static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result);
|
19
|
+
};
|
20
|
+
|
21
|
+
} // namespace duckdb
|
@@ -0,0 +1,105 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/arrow/appender/append_data.hpp"
|
4
|
+
#include "duckdb/common/arrow/appender/scalar_data.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
//===--------------------------------------------------------------------===//
|
9
|
+
// Varchar
|
10
|
+
//===--------------------------------------------------------------------===//
|
11
|
+
struct ArrowVarcharConverter {
|
12
|
+
template <class SRC>
|
13
|
+
static idx_t GetLength(SRC input) {
|
14
|
+
return input.GetSize();
|
15
|
+
}
|
16
|
+
|
17
|
+
template <class SRC>
|
18
|
+
static void WriteData(data_ptr_t target, SRC input) {
|
19
|
+
memcpy(target, input.GetData(), input.GetSize());
|
20
|
+
}
|
21
|
+
};
|
22
|
+
|
23
|
+
struct ArrowUUIDConverter {
|
24
|
+
template <class SRC>
|
25
|
+
static idx_t GetLength(SRC input) {
|
26
|
+
return UUID::STRING_SIZE;
|
27
|
+
}
|
28
|
+
|
29
|
+
template <class SRC>
|
30
|
+
static void WriteData(data_ptr_t target, SRC input) {
|
31
|
+
UUID::ToString(input, char_ptr_cast(target));
|
32
|
+
}
|
33
|
+
};
|
34
|
+
|
35
|
+
template <class SRC = string_t, class OP = ArrowVarcharConverter, class BUFTYPE = uint64_t>
|
36
|
+
struct ArrowVarcharData {
|
37
|
+
static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
|
38
|
+
result.main_buffer.reserve((capacity + 1) * sizeof(BUFTYPE));
|
39
|
+
|
40
|
+
result.aux_buffer.reserve(capacity);
|
41
|
+
}
|
42
|
+
|
43
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
44
|
+
idx_t size = to - from;
|
45
|
+
UnifiedVectorFormat format;
|
46
|
+
input.ToUnifiedFormat(input_size, format);
|
47
|
+
|
48
|
+
// resize the validity mask and set up the validity buffer for iteration
|
49
|
+
ResizeValidity(append_data.validity, append_data.row_count + size);
|
50
|
+
auto validity_data = (uint8_t *)append_data.validity.data();
|
51
|
+
|
52
|
+
// resize the offset buffer - the offset buffer holds the offsets into the child array
|
53
|
+
append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(BUFTYPE) * (size + 1));
|
54
|
+
auto data = UnifiedVectorFormat::GetData<SRC>(format);
|
55
|
+
auto offset_data = append_data.main_buffer.GetData<BUFTYPE>();
|
56
|
+
if (append_data.row_count == 0) {
|
57
|
+
// first entry
|
58
|
+
offset_data[0] = 0;
|
59
|
+
}
|
60
|
+
// now append the string data to the auxiliary buffer
|
61
|
+
// the auxiliary buffer's length depends on the string lengths, so we resize as required
|
62
|
+
auto last_offset = offset_data[append_data.row_count];
|
63
|
+
idx_t max_offset = append_data.row_count + to - from;
|
64
|
+
if (max_offset > NumericLimits<uint32_t>::Maximum() &&
|
65
|
+
append_data.options.offset_size == ArrowOffsetSize::REGULAR) {
|
66
|
+
throw InvalidInputException("Arrow Appender: The maximum total string size for regular string buffers is "
|
67
|
+
"%u but the offset of %lu exceeds this.",
|
68
|
+
NumericLimits<uint32_t>::Maximum(), max_offset);
|
69
|
+
}
|
70
|
+
for (idx_t i = from; i < to; i++) {
|
71
|
+
auto source_idx = format.sel->get_index(i);
|
72
|
+
auto offset_idx = append_data.row_count + i + 1 - from;
|
73
|
+
|
74
|
+
if (!format.validity.RowIsValid(source_idx)) {
|
75
|
+
uint8_t current_bit;
|
76
|
+
idx_t current_byte;
|
77
|
+
GetBitPosition(append_data.row_count + i - from, current_byte, current_bit);
|
78
|
+
SetNull(append_data, validity_data, current_byte, current_bit);
|
79
|
+
offset_data[offset_idx] = last_offset;
|
80
|
+
continue;
|
81
|
+
}
|
82
|
+
|
83
|
+
auto string_length = OP::GetLength(data[source_idx]);
|
84
|
+
|
85
|
+
// append the offset data
|
86
|
+
auto current_offset = last_offset + string_length;
|
87
|
+
offset_data[offset_idx] = current_offset;
|
88
|
+
|
89
|
+
// resize the string buffer if required, and write the string data
|
90
|
+
append_data.aux_buffer.resize(current_offset);
|
91
|
+
OP::WriteData(append_data.aux_buffer.data() + last_offset, data[source_idx]);
|
92
|
+
|
93
|
+
last_offset = current_offset;
|
94
|
+
}
|
95
|
+
append_data.row_count += size;
|
96
|
+
}
|
97
|
+
|
98
|
+
static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
99
|
+
result->n_buffers = 3;
|
100
|
+
result->buffers[1] = append_data.main_buffer.data();
|
101
|
+
result->buffers[2] = append_data.aux_buffer.data();
|
102
|
+
}
|
103
|
+
};
|
104
|
+
|
105
|
+
} // namespace duckdb
|
@@ -27,6 +27,11 @@ public:
|
|
27
27
|
//! Returns the underlying arrow array
|
28
28
|
DUCKDB_API ArrowArray Finalize();
|
29
29
|
|
30
|
+
public:
|
31
|
+
static void ReleaseArray(ArrowArray *array);
|
32
|
+
static ArrowArray *FinalizeChild(const LogicalType &type, ArrowAppendData &append_data);
|
33
|
+
static unique_ptr<ArrowAppendData> InitializeChild(const LogicalType &type, idx_t capacity, ArrowOptions &options);
|
34
|
+
|
30
35
|
private:
|
31
36
|
//! The types of the chunks that will be appended in
|
32
37
|
vector<LogicalType> types;
|
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
#pragma once
|
12
12
|
|
13
|
-
#if (defined(DUCKDB_USE_STANDARD_ASSERT) || !defined(DEBUG)) && !defined(DUCKDB_FORCE_ASSERT)
|
13
|
+
#if (defined(DUCKDB_USE_STANDARD_ASSERT) || !defined(DEBUG)) && !defined(DUCKDB_FORCE_ASSERT) && !defined(__MVS__)
|
14
14
|
|
15
15
|
#include <assert.h>
|
16
16
|
#define D_ASSERT assert
|
@@ -101,9 +101,9 @@ struct AtanFun {
|
|
101
101
|
|
102
102
|
struct Atan2Fun {
|
103
103
|
static constexpr const char *Name = "atan2";
|
104
|
-
static constexpr const char *Parameters = "x
|
105
|
-
static constexpr const char *Description = "computes the arctangent (
|
106
|
-
static constexpr const char *Example = "atan2(0
|
104
|
+
static constexpr const char *Parameters = "y,x";
|
105
|
+
static constexpr const char *Description = "computes the arctangent (y, x)";
|
106
|
+
static constexpr const char *Example = "atan2(1.0, 0.0)";
|
107
107
|
|
108
108
|
static ScalarFunction GetFunction();
|
109
109
|
};
|
@@ -107,7 +107,7 @@ void Executor::SchedulePipeline(const shared_ptr<MetaPipeline> &meta_pipeline, S
|
|
107
107
|
group_stack.pipeline_finish_event, base_stack.pipeline_complete_event);
|
108
108
|
|
109
109
|
// dependencies: base_finish -> pipeline_event -> group_finish
|
110
|
-
pipeline_stack.pipeline_event.AddDependency(base_stack.
|
110
|
+
pipeline_stack.pipeline_event.AddDependency(base_stack.pipeline_finish_event);
|
111
111
|
group_stack.pipeline_finish_event.AddDependency(pipeline_stack.pipeline_event);
|
112
112
|
|
113
113
|
// add pipeline stack to event map
|
@@ -61,10 +61,7 @@ bool BoundWindowExpression::KeysAreCompatible(const BoundWindowExpression &other
|
|
61
61
|
return false;
|
62
62
|
}
|
63
63
|
for (idx_t i = 0; i < orders.size(); i++) {
|
64
|
-
if (orders[i].
|
65
|
-
return false;
|
66
|
-
}
|
67
|
-
if (!Expression::Equals(*orders[i].expression, *other.orders[i].expression)) {
|
64
|
+
if (!orders[i].Equals(other.orders[i])) {
|
68
65
|
return false;
|
69
66
|
}
|
70
67
|
}
|
@@ -29,6 +29,7 @@ extern "C" {
|
|
29
29
|
#include <chrono>
|
30
30
|
#elif defined(__MVS__)
|
31
31
|
#include <zos-semaphore.h>
|
32
|
+
#include <chrono>
|
32
33
|
#endif
|
33
34
|
|
34
35
|
namespace duckdb_moodycamel
|
@@ -162,9 +163,9 @@ public:
|
|
162
163
|
}
|
163
164
|
}
|
164
165
|
};
|
165
|
-
#elif defined(__unix__)
|
166
|
+
#elif defined(__unix__) || defined(__MVS__)
|
166
167
|
//---------------------------------------------------------
|
167
|
-
// Semaphore (POSIX, Linux)
|
168
|
+
// Semaphore (POSIX, Linux, zOS aka MVS)
|
168
169
|
//---------------------------------------------------------
|
169
170
|
class Semaphore
|
170
171
|
{
|
@@ -256,80 +257,6 @@ public:
|
|
256
257
|
}
|
257
258
|
}
|
258
259
|
};
|
259
|
-
#elif defined(__MVS__)
|
260
|
-
//---------------------------------------------------------
|
261
|
-
// Semaphore (MVS aka z/OS)
|
262
|
-
//---------------------------------------------------------
|
263
|
-
class Semaphore
|
264
|
-
{
|
265
|
-
private:
|
266
|
-
sem_t m_sema;
|
267
|
-
|
268
|
-
Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
|
269
|
-
Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
|
270
|
-
|
271
|
-
public:
|
272
|
-
Semaphore(int initialCount = 0)
|
273
|
-
{
|
274
|
-
assert(initialCount >= 0);
|
275
|
-
int rc = sem_init(&m_sema, 0, initialCount);
|
276
|
-
assert(rc == 0);
|
277
|
-
(void)rc;
|
278
|
-
}
|
279
|
-
|
280
|
-
~Semaphore()
|
281
|
-
{
|
282
|
-
sem_destroy(&m_sema);
|
283
|
-
}
|
284
|
-
|
285
|
-
bool wait()
|
286
|
-
{
|
287
|
-
// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
|
288
|
-
int rc;
|
289
|
-
do {
|
290
|
-
rc = sem_wait(&m_sema);
|
291
|
-
} while (rc == -1 && errno == EINTR);
|
292
|
-
return rc == 0;
|
293
|
-
}
|
294
|
-
|
295
|
-
bool try_wait()
|
296
|
-
{
|
297
|
-
int rc;
|
298
|
-
do {
|
299
|
-
rc = sem_trywait(&m_sema);
|
300
|
-
} while (rc == -1 && errno == EINTR);
|
301
|
-
return rc == 0;
|
302
|
-
}
|
303
|
-
|
304
|
-
bool timed_wait(std::uint64_t usecs)
|
305
|
-
{
|
306
|
-
struct timespec ts;
|
307
|
-
const int usecs_in_1_sec = 1000000;
|
308
|
-
const int nsecs_in_1_sec = 1000000000;
|
309
|
-
|
310
|
-
ts.tv_sec = usecs / usecs_in_1_sec;
|
311
|
-
ts.tv_nsec = (usecs % usecs_in_1_sec) * 1000;
|
312
|
-
|
313
|
-
int rc;
|
314
|
-
do {
|
315
|
-
rc = sem_timedwait(&m_sema, &ts);
|
316
|
-
} while (rc == -1 && errno == EINTR);
|
317
|
-
return rc == 0;
|
318
|
-
}
|
319
|
-
|
320
|
-
void signal()
|
321
|
-
{
|
322
|
-
while (sem_post(&m_sema) == -1);
|
323
|
-
}
|
324
|
-
|
325
|
-
void signal(int count)
|
326
|
-
{
|
327
|
-
while (count-- > 0)
|
328
|
-
{
|
329
|
-
while (sem_post(&m_sema) == -1);
|
330
|
-
}
|
331
|
-
}
|
332
|
-
};
|
333
260
|
#else
|
334
261
|
#error Unsupported platform! (No semaphore wrapper available)
|
335
262
|
#endif
|