duckdb 0.4.1-dev182.0 → 0.4.1-dev1896.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/duckdb.js +316 -20
- package/package.json +3 -1
- package/src/connection.cpp +25 -18
- package/src/data_chunk.cpp +2 -2
- package/src/database.cpp +66 -16
- package/src/duckdb.cpp +70002 -31503
- package/src/duckdb.hpp +7965 -4400
- package/src/duckdb_node.cpp +1 -0
- package/src/duckdb_node.hpp +37 -1
- package/src/parquet-amalgamation.cpp +36019 -35374
- package/src/parquet-amalgamation.hpp +112 -114
- package/src/statement.cpp +128 -20
- package/test/data_type_support.test.js +83 -13
- package/test/extension.test.js +1 -1
- package/test/jsdoc.test.js +60 -0
- package/test/query_result.test.js +23 -0
- package/test/syntax_error.test.js +16 -0
|
@@ -34,6 +34,10 @@ public:
|
|
|
34
34
|
|
|
35
35
|
#include "duckdb.hpp"
|
|
36
36
|
#ifndef DUCKDB_AMALGAMATION
|
|
37
|
+
#include "duckdb/planner/table_filter.hpp"
|
|
38
|
+
#include "duckdb/planner/filter/constant_filter.hpp"
|
|
39
|
+
#include "duckdb/planner/filter/null_filter.hpp"
|
|
40
|
+
#include "duckdb/planner/filter/conjunction_filter.hpp"
|
|
37
41
|
#include "duckdb/common/common.hpp"
|
|
38
42
|
#include "duckdb/common/exception.hpp"
|
|
39
43
|
#include "duckdb/common/string_util.hpp"
|
|
@@ -91,8 +95,8 @@ public:
|
|
|
91
95
|
* under the License.
|
|
92
96
|
*/
|
|
93
97
|
|
|
94
|
-
#ifndef
|
|
95
|
-
#define
|
|
98
|
+
#ifndef _DUCKDB_THRIFT_THRIFT_H_
|
|
99
|
+
#define _DUCKDB_THRIFT_THRIFT_H_ 1
|
|
96
100
|
|
|
97
101
|
|
|
98
102
|
|
|
@@ -121,8 +125,8 @@ public:
|
|
|
121
125
|
|
|
122
126
|
// clang-format off
|
|
123
127
|
|
|
124
|
-
#ifndef
|
|
125
|
-
# define
|
|
128
|
+
#ifndef _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
|
|
129
|
+
# define _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
|
|
126
130
|
|
|
127
131
|
#ifdef _WIN32
|
|
128
132
|
#ifdef _WINSOCKAPI_
|
|
@@ -236,7 +240,7 @@ public:
|
|
|
236
240
|
# define THRIFT_SHUT_RDWR SHUT_RDWR
|
|
237
241
|
#endif
|
|
238
242
|
|
|
239
|
-
#endif //
|
|
243
|
+
#endif // _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
|
|
240
244
|
|
|
241
245
|
|
|
242
246
|
// LICENSE_CHANGE_END
|
|
@@ -329,8 +333,8 @@ public:
|
|
|
329
333
|
* under the License.
|
|
330
334
|
*/
|
|
331
335
|
|
|
332
|
-
#ifndef
|
|
333
|
-
#define
|
|
336
|
+
#ifndef _DUCKDB_THRIFT_TLOGGING_H_
|
|
337
|
+
#define _DUCKDB_THRIFT_TLOGGING_H_ 1
|
|
334
338
|
|
|
335
339
|
|
|
336
340
|
|
|
@@ -454,7 +458,7 @@ public:
|
|
|
454
458
|
#define T_GENERIC_PROTOCOL(template_class, generic_prot, specific_prot)
|
|
455
459
|
#endif
|
|
456
460
|
|
|
457
|
-
#endif // #ifndef
|
|
461
|
+
#endif // #ifndef _DUCKDB_THRIFT_TLOGGING_H_
|
|
458
462
|
|
|
459
463
|
|
|
460
464
|
// LICENSE_CHANGE_END
|
|
@@ -546,7 +550,7 @@ void profile_write_pprof(FILE* gen_calls_f, FILE* virtual_calls_f);
|
|
|
546
550
|
}
|
|
547
551
|
} // duckdb_apache::thrift
|
|
548
552
|
|
|
549
|
-
#endif // #ifndef
|
|
553
|
+
#endif // #ifndef _DUCKDB_THRIFT_THRIFT_H_
|
|
550
554
|
|
|
551
555
|
|
|
552
556
|
// LICENSE_CHANGE_END
|
|
@@ -576,8 +580,8 @@ void profile_write_pprof(FILE* gen_calls_f, FILE* virtual_calls_f);
|
|
|
576
580
|
* under the License.
|
|
577
581
|
*/
|
|
578
582
|
|
|
579
|
-
#ifndef
|
|
580
|
-
#define
|
|
583
|
+
#ifndef _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_
|
|
584
|
+
#define _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_ 1
|
|
581
585
|
|
|
582
586
|
|
|
583
587
|
|
|
@@ -671,7 +675,7 @@ protected:
|
|
|
671
675
|
}
|
|
672
676
|
} // duckdb_apache::thrift
|
|
673
677
|
|
|
674
|
-
#endif // #ifndef
|
|
678
|
+
#endif // #ifndef _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_
|
|
675
679
|
|
|
676
680
|
|
|
677
681
|
// LICENSE_CHANGE_END
|
|
@@ -701,8 +705,8 @@ protected:
|
|
|
701
705
|
* under the License.
|
|
702
706
|
*/
|
|
703
707
|
|
|
704
|
-
#ifndef
|
|
705
|
-
#define
|
|
708
|
+
#ifndef _DUCKDB_THRIFT_TBASE_H_
|
|
709
|
+
#define _DUCKDB_THRIFT_TBASE_H_ 1
|
|
706
710
|
|
|
707
711
|
|
|
708
712
|
|
|
@@ -730,8 +734,8 @@ protected:
|
|
|
730
734
|
* under the License.
|
|
731
735
|
*/
|
|
732
736
|
|
|
733
|
-
#ifndef
|
|
734
|
-
#define
|
|
737
|
+
#ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_
|
|
738
|
+
#define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ 1
|
|
735
739
|
|
|
736
740
|
#ifdef _WIN32
|
|
737
741
|
// Need to come before any Windows.h includes
|
|
@@ -763,8 +767,8 @@ protected:
|
|
|
763
767
|
* under the License.
|
|
764
768
|
*/
|
|
765
769
|
|
|
766
|
-
#ifndef
|
|
767
|
-
#define
|
|
770
|
+
#ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_
|
|
771
|
+
#define _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_ 1
|
|
768
772
|
|
|
769
773
|
|
|
770
774
|
|
|
@@ -792,8 +796,8 @@ protected:
|
|
|
792
796
|
* under the License.
|
|
793
797
|
*/
|
|
794
798
|
|
|
795
|
-
#ifndef
|
|
796
|
-
#define
|
|
799
|
+
#ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_
|
|
800
|
+
#define _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ 1
|
|
797
801
|
|
|
798
802
|
// FUCK OFF #include <boost/numeric/conversion/cast.hpp>
|
|
799
803
|
#include <string>
|
|
@@ -878,7 +882,7 @@ protected:
|
|
|
878
882
|
}
|
|
879
883
|
} // duckdb_apache::thrift::transport
|
|
880
884
|
|
|
881
|
-
#endif // #ifndef
|
|
885
|
+
#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_
|
|
882
886
|
|
|
883
887
|
|
|
884
888
|
// LICENSE_CHANGE_END
|
|
@@ -1129,7 +1133,7 @@ public:
|
|
|
1129
1133
|
}
|
|
1130
1134
|
} // duckdb_apache::thrift::transport
|
|
1131
1135
|
|
|
1132
|
-
#endif // #ifndef
|
|
1136
|
+
#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_
|
|
1133
1137
|
|
|
1134
1138
|
|
|
1135
1139
|
// LICENSE_CHANGE_END
|
|
@@ -1159,8 +1163,8 @@ public:
|
|
|
1159
1163
|
* under the License.
|
|
1160
1164
|
*/
|
|
1161
1165
|
|
|
1162
|
-
#ifndef
|
|
1163
|
-
#define
|
|
1166
|
+
#ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_
|
|
1167
|
+
#define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ 1
|
|
1164
1168
|
|
|
1165
1169
|
#include <string>
|
|
1166
1170
|
|
|
@@ -1244,7 +1248,7 @@ protected:
|
|
|
1244
1248
|
}
|
|
1245
1249
|
} // duckdb_apache::thrift::protocol
|
|
1246
1250
|
|
|
1247
|
-
#endif // #ifndef
|
|
1251
|
+
#endif // #ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_
|
|
1248
1252
|
|
|
1249
1253
|
|
|
1250
1254
|
// LICENSE_CHANGE_END
|
|
@@ -1267,37 +1271,39 @@ protected:
|
|
|
1267
1271
|
// but that doesn't work.
|
|
1268
1272
|
// For a pretty in-depth explanation of the problem, see
|
|
1269
1273
|
// http://cellperformance.beyond3d.com/articles/2006/06/understanding-strict-aliasing.html
|
|
1274
|
+
namespace duckdb_apache { namespace thrift {
|
|
1270
1275
|
template <typename To, typename From>
|
|
1271
1276
|
static inline To bitwise_cast(From from) {
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1277
|
+
static_assert(sizeof(From) == sizeof(To), "sizeof(From) == sizeof(To)");
|
|
1278
|
+
|
|
1279
|
+
// BAD!!! These are all broken with -O2.
|
|
1280
|
+
// return *reinterpret_cast<To*>(&from); // BAD!!!
|
|
1281
|
+
// return *static_cast<To*>(static_cast<void*>(&from)); // BAD!!!
|
|
1282
|
+
// return *(To*)(void*)&from; // BAD!!!
|
|
1283
|
+
|
|
1284
|
+
// Super clean and paritally blessed by section 3.9 of the standard.
|
|
1285
|
+
// unsigned char c[sizeof(from)];
|
|
1286
|
+
// memcpy(c, &from, sizeof(from));
|
|
1287
|
+
// To to;
|
|
1288
|
+
// memcpy(&to, c, sizeof(c));
|
|
1289
|
+
// return to;
|
|
1290
|
+
|
|
1291
|
+
// Slightly more questionable.
|
|
1292
|
+
// Same code emitted by GCC.
|
|
1293
|
+
// To to;
|
|
1294
|
+
// memcpy(&to, &from, sizeof(from));
|
|
1295
|
+
// return to;
|
|
1296
|
+
|
|
1297
|
+
// Technically undefined, but almost universally supported,
|
|
1298
|
+
// and the most efficient implementation.
|
|
1299
|
+
union {
|
|
1300
|
+
From f;
|
|
1301
|
+
To t;
|
|
1302
|
+
} u;
|
|
1303
|
+
u.f = from;
|
|
1304
|
+
return u.t;
|
|
1300
1305
|
}
|
|
1306
|
+
}} // namespace duckdb_apache::thrift
|
|
1301
1307
|
|
|
1302
1308
|
|
|
1303
1309
|
#ifdef HAVE_SYS_PARAM_H
|
|
@@ -1983,7 +1989,7 @@ uint32_t skip(Protocol_& prot, TType type) {
|
|
|
1983
1989
|
|
|
1984
1990
|
}}} // duckdb_apache::thrift::protocol
|
|
1985
1991
|
|
|
1986
|
-
#endif // #define
|
|
1992
|
+
#endif // #define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ 1
|
|
1987
1993
|
|
|
1988
1994
|
|
|
1989
1995
|
// LICENSE_CHANGE_END
|
|
@@ -2001,7 +2007,7 @@ public:
|
|
|
2001
2007
|
}
|
|
2002
2008
|
} // duckdb_apache::thrift
|
|
2003
2009
|
|
|
2004
|
-
#endif // #ifndef
|
|
2010
|
+
#endif // #ifndef _DUCKDB_THRIFT_TBASE_H_
|
|
2005
2011
|
|
|
2006
2012
|
|
|
2007
2013
|
// LICENSE_CHANGE_END
|
|
@@ -4651,8 +4657,8 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj);
|
|
|
4651
4657
|
* under the License.
|
|
4652
4658
|
*/
|
|
4653
4659
|
|
|
4654
|
-
#ifndef
|
|
4655
|
-
#define
|
|
4660
|
+
#ifndef _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_
|
|
4661
|
+
#define _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_ 1
|
|
4656
4662
|
|
|
4657
4663
|
|
|
4658
4664
|
|
|
@@ -4679,8 +4685,8 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj);
|
|
|
4679
4685
|
* under the License.
|
|
4680
4686
|
*/
|
|
4681
4687
|
|
|
4682
|
-
#ifndef
|
|
4683
|
-
#define
|
|
4688
|
+
#ifndef _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_
|
|
4689
|
+
#define _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1
|
|
4684
4690
|
|
|
4685
4691
|
|
|
4686
4692
|
|
|
@@ -5172,7 +5178,7 @@ protected:
|
|
|
5172
5178
|
}
|
|
5173
5179
|
} // duckdb_apache::thrift::protocol
|
|
5174
5180
|
|
|
5175
|
-
#endif // #define
|
|
5181
|
+
#endif // #define _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1
|
|
5176
5182
|
|
|
5177
5183
|
|
|
5178
5184
|
// LICENSE_CHANGE_END
|
|
@@ -5441,8 +5447,8 @@ typedef TCompactProtocolFactoryT<TTransport> TCompactProtocolFactory;
|
|
|
5441
5447
|
* specific language governing permissions and limitations
|
|
5442
5448
|
* under the License.
|
|
5443
5449
|
*/
|
|
5444
|
-
#ifndef
|
|
5445
|
-
#define
|
|
5450
|
+
#ifndef _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_
|
|
5451
|
+
#define _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ 1
|
|
5446
5452
|
|
|
5447
5453
|
#include <limits>
|
|
5448
5454
|
|
|
@@ -6248,7 +6254,7 @@ TType TCompactProtocolT<Transport_>::getTType(int8_t type) {
|
|
|
6248
6254
|
|
|
6249
6255
|
}}} // duckdb_apache::thrift::protocol
|
|
6250
6256
|
|
|
6251
|
-
#endif //
|
|
6257
|
+
#endif // _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_
|
|
6252
6258
|
|
|
6253
6259
|
|
|
6254
6260
|
// LICENSE_CHANGE_END
|
|
@@ -6284,8 +6290,8 @@ TType TCompactProtocolT<Transport_>::getTType(int8_t type) {
|
|
|
6284
6290
|
* under the License.
|
|
6285
6291
|
*/
|
|
6286
6292
|
|
|
6287
|
-
#ifndef
|
|
6288
|
-
#define
|
|
6293
|
+
#ifndef _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_
|
|
6294
|
+
#define _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ 1
|
|
6289
6295
|
|
|
6290
6296
|
#include <cstdlib>
|
|
6291
6297
|
#include <cstddef>
|
|
@@ -6319,8 +6325,8 @@ TType TCompactProtocolT<Transport_>::getTType(int8_t type) {
|
|
|
6319
6325
|
* under the License.
|
|
6320
6326
|
*/
|
|
6321
6327
|
|
|
6322
|
-
#ifndef
|
|
6323
|
-
#define
|
|
6328
|
+
#ifndef _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_
|
|
6329
|
+
#define _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ 1
|
|
6324
6330
|
|
|
6325
6331
|
|
|
6326
6332
|
|
|
@@ -6439,7 +6445,7 @@ protected:
|
|
|
6439
6445
|
}
|
|
6440
6446
|
} // duckdb_apache::thrift::transport
|
|
6441
6447
|
|
|
6442
|
-
#endif // #ifndef
|
|
6448
|
+
#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_
|
|
6443
6449
|
|
|
6444
6450
|
|
|
6445
6451
|
// LICENSE_CHANGE_END
|
|
@@ -6902,7 +6908,7 @@ protected:
|
|
|
6902
6908
|
}
|
|
6903
6909
|
} // duckdb_apache::thrift::transport
|
|
6904
6910
|
|
|
6905
|
-
#endif // #ifndef
|
|
6911
|
+
#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_
|
|
6906
6912
|
|
|
6907
6913
|
|
|
6908
6914
|
// LICENSE_CHANGE_END
|
|
@@ -6924,7 +6930,7 @@ struct ReadHead {
|
|
|
6924
6930
|
uint64_t size;
|
|
6925
6931
|
|
|
6926
6932
|
// Current info
|
|
6927
|
-
|
|
6933
|
+
AllocatedData data;
|
|
6928
6934
|
bool data_isset = false;
|
|
6929
6935
|
|
|
6930
6936
|
idx_t GetEnd() const {
|
|
@@ -7019,7 +7025,7 @@ struct ReadAheadBuffer {
|
|
|
7019
7025
|
throw std::runtime_error("Prefetch registered requested for bytes outside file");
|
|
7020
7026
|
}
|
|
7021
7027
|
|
|
7022
|
-
handle.Read(read_head.data
|
|
7028
|
+
handle.Read(read_head.data.get(), read_head.size, read_head.location);
|
|
7023
7029
|
read_head.data_isset = true;
|
|
7024
7030
|
}
|
|
7025
7031
|
}
|
|
@@ -7041,16 +7047,16 @@ public:
|
|
|
7041
7047
|
|
|
7042
7048
|
if (!prefetch_buffer->data_isset) {
|
|
7043
7049
|
prefetch_buffer->Allocate(allocator);
|
|
7044
|
-
handle.Read(prefetch_buffer->data
|
|
7050
|
+
handle.Read(prefetch_buffer->data.get(), prefetch_buffer->size, prefetch_buffer->location);
|
|
7045
7051
|
prefetch_buffer->data_isset = true;
|
|
7046
7052
|
}
|
|
7047
|
-
memcpy(buf, prefetch_buffer->data
|
|
7053
|
+
memcpy(buf, prefetch_buffer->data.get() + location - prefetch_buffer->location, len);
|
|
7048
7054
|
} else {
|
|
7049
7055
|
if (prefetch_mode && len < PREFETCH_FALLBACK_BUFFERSIZE && len > 0) {
|
|
7050
7056
|
Prefetch(location, MinValue<uint64_t>(PREFETCH_FALLBACK_BUFFERSIZE, handle.GetFileSize() - location));
|
|
7051
7057
|
auto prefetch_buffer_fallback = ra_buffer.GetReadHead(location);
|
|
7052
7058
|
D_ASSERT(location - prefetch_buffer_fallback->location + len <= prefetch_buffer_fallback->size);
|
|
7053
|
-
memcpy(buf, prefetch_buffer_fallback->data
|
|
7059
|
+
memcpy(buf, prefetch_buffer_fallback->data.get() + location - prefetch_buffer_fallback->location, len);
|
|
7054
7060
|
} else {
|
|
7055
7061
|
handle.Read(buf, len, location);
|
|
7056
7062
|
}
|
|
@@ -7191,12 +7197,12 @@ public:
|
|
|
7191
7197
|
if (new_size > alloc_len) {
|
|
7192
7198
|
alloc_len = new_size;
|
|
7193
7199
|
allocated_data = allocator.Allocate(alloc_len);
|
|
7194
|
-
ptr = (char *)allocated_data
|
|
7200
|
+
ptr = (char *)allocated_data.get();
|
|
7195
7201
|
}
|
|
7196
7202
|
}
|
|
7197
7203
|
|
|
7198
7204
|
private:
|
|
7199
|
-
|
|
7205
|
+
AllocatedData allocated_data;
|
|
7200
7206
|
idx_t alloc_len = 0;
|
|
7201
7207
|
};
|
|
7202
7208
|
|
|
@@ -7699,7 +7705,6 @@ class FileMetaData;
|
|
|
7699
7705
|
namespace duckdb {
|
|
7700
7706
|
class Allocator;
|
|
7701
7707
|
class ClientContext;
|
|
7702
|
-
class ChunkCollection;
|
|
7703
7708
|
class BaseStatistics;
|
|
7704
7709
|
class TableFilterSet;
|
|
7705
7710
|
|
|
@@ -7734,6 +7739,12 @@ struct ParquetOptions {
|
|
|
7734
7739
|
explicit ParquetOptions(ClientContext &context);
|
|
7735
7740
|
|
|
7736
7741
|
bool binary_as_string = false;
|
|
7742
|
+
bool filename = false;
|
|
7743
|
+
bool hive_partitioning = false;
|
|
7744
|
+
|
|
7745
|
+
public:
|
|
7746
|
+
void Serialize(FieldWriter &writer) const;
|
|
7747
|
+
void Deserialize(FieldReader &reader);
|
|
7737
7748
|
};
|
|
7738
7749
|
|
|
7739
7750
|
class ParquetReader {
|
|
@@ -7830,7 +7841,7 @@ private:
|
|
|
7830
7841
|
#include "duckdb/common/exception.hpp"
|
|
7831
7842
|
#include "duckdb/common/mutex.hpp"
|
|
7832
7843
|
#include "duckdb/common/serializer/buffered_file_writer.hpp"
|
|
7833
|
-
#include "duckdb/common/types/
|
|
7844
|
+
#include "duckdb/common/types/column_data_collection.hpp"
|
|
7834
7845
|
#endif
|
|
7835
7846
|
|
|
7836
7847
|
|
|
@@ -7851,7 +7862,7 @@ namespace duckdb {
|
|
|
7851
7862
|
class BufferedSerializer;
|
|
7852
7863
|
class ParquetWriter;
|
|
7853
7864
|
class ColumnWriterPageState;
|
|
7854
|
-
class
|
|
7865
|
+
class BasicColumnWriterState;
|
|
7855
7866
|
|
|
7856
7867
|
class ColumnWriterState {
|
|
7857
7868
|
public:
|
|
@@ -7873,9 +7884,6 @@ public:
|
|
|
7873
7884
|
};
|
|
7874
7885
|
|
|
7875
7886
|
class ColumnWriter {
|
|
7876
|
-
//! We limit the uncompressed page size to 100MB
|
|
7877
|
-
// The max size in Parquet is 2GB, but we choose a more conservative limit
|
|
7878
|
-
static constexpr const idx_t MAX_UNCOMPRESSED_PAGE_SIZE = 100000000;
|
|
7879
7887
|
|
|
7880
7888
|
public:
|
|
7881
7889
|
ColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path, idx_t max_repeat,
|
|
@@ -7899,46 +7907,35 @@ public:
|
|
|
7899
7907
|
idx_t max_repeat = 0, idx_t max_define = 1,
|
|
7900
7908
|
bool can_have_nulls = true);
|
|
7901
7909
|
|
|
7902
|
-
virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group);
|
|
7903
|
-
virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count);
|
|
7910
|
+
virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) = 0;
|
|
7904
7911
|
|
|
7905
|
-
|
|
7906
|
-
virtual
|
|
7907
|
-
|
|
7908
|
-
|
|
7909
|
-
protected:
|
|
7910
|
-
void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, idx_t count,
|
|
7911
|
-
uint16_t define_value, uint16_t null_value);
|
|
7912
|
-
void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat);
|
|
7913
|
-
|
|
7914
|
-
void WriteLevels(Serializer &temp_writer, const vector<uint16_t> &levels, idx_t max_value, idx_t start_offset,
|
|
7915
|
-
idx_t count);
|
|
7912
|
+
//! indicates whether the write need to analyse the data before preparing it
|
|
7913
|
+
virtual bool HasAnalyze() {
|
|
7914
|
+
return false;
|
|
7915
|
+
}
|
|
7916
7916
|
|
|
7917
|
-
virtual
|
|
7917
|
+
virtual void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) {
|
|
7918
|
+
throw NotImplementedException("Writer does not need analysis");
|
|
7919
|
+
}
|
|
7918
7920
|
|
|
7919
|
-
|
|
7920
|
-
void
|
|
7921
|
-
|
|
7921
|
+
//! Called after all data has been passed to Analyze
|
|
7922
|
+
virtual void FinalizeAnalyze(ColumnWriterState &state) {
|
|
7923
|
+
throw NotImplementedException("Writer does not need analysis");
|
|
7924
|
+
}
|
|
7922
7925
|
|
|
7923
|
-
virtual void
|
|
7926
|
+
virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) = 0;
|
|
7924
7927
|
|
|
7925
|
-
|
|
7926
|
-
virtual
|
|
7927
|
-
|
|
7928
|
-
virtual idx_t GetRowSize(Vector &vector, idx_t index);
|
|
7929
|
-
//! Writes a (subset of a) vector to the specified serializer. Only used for scalar types.
|
|
7930
|
-
virtual void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state,
|
|
7931
|
-
Vector &vector, idx_t chunk_start, idx_t chunk_end);
|
|
7928
|
+
virtual void BeginWrite(ColumnWriterState &state) = 0;
|
|
7929
|
+
virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count) = 0;
|
|
7930
|
+
virtual void FinalizeWrite(ColumnWriterState &state) = 0;
|
|
7932
7931
|
|
|
7933
|
-
|
|
7934
|
-
|
|
7935
|
-
|
|
7936
|
-
|
|
7932
|
+
protected:
|
|
7933
|
+
void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, idx_t count,
|
|
7934
|
+
uint16_t define_value, uint16_t null_value);
|
|
7935
|
+
void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat);
|
|
7937
7936
|
|
|
7938
7937
|
void CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
|
|
7939
7938
|
unique_ptr<data_t[]> &compressed_buf);
|
|
7940
|
-
|
|
7941
|
-
void SetParquetStatistics(StandardColumnWriterState &state, duckdb_parquet::format::ColumnChunk &column);
|
|
7942
7939
|
};
|
|
7943
7940
|
|
|
7944
7941
|
} // namespace duckdb
|
|
@@ -7951,6 +7948,7 @@ class FileOpener;
|
|
|
7951
7948
|
|
|
7952
7949
|
class ParquetWriter {
|
|
7953
7950
|
friend class ColumnWriter;
|
|
7951
|
+
friend class BasicColumnWriter;
|
|
7954
7952
|
friend class ListColumnWriter;
|
|
7955
7953
|
friend class StructColumnWriter;
|
|
7956
7954
|
|
|
@@ -7959,7 +7957,7 @@ public:
|
|
|
7959
7957
|
vector<string> names, duckdb_parquet::format::CompressionCodec::type codec);
|
|
7960
7958
|
|
|
7961
7959
|
public:
|
|
7962
|
-
void Flush(
|
|
7960
|
+
void Flush(ColumnDataCollection &buffer);
|
|
7963
7961
|
void Finalize();
|
|
7964
7962
|
|
|
7965
7963
|
static duckdb_parquet::format::Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type);
|