duckdb 0.8.2-dev2068.0 → 0.8.2-dev2133.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +4 -0
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -0
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +5 -19
- package/src/duckdb/extension/json/include/json_enums.hpp +60 -0
- package/src/duckdb/extension/json/include/json_scan.hpp +14 -10
- package/src/duckdb/extension/json/include/json_transform.hpp +3 -0
- package/src/duckdb/extension/json/json_enums.cpp +105 -0
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +2 -0
- package/src/duckdb/extension/json/json_scan.cpp +44 -0
- package/src/duckdb/extension/json/serialize_json.cpp +92 -0
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +3 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +23 -0
- package/src/duckdb/extension/parquet/parquet_reader.cpp +3 -0
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +26 -0
- package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
- package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +89 -727
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +2 -1
- package/src/duckdb/src/common/local_file_system.cpp +17 -14
- package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +26 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +47 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +28 -0
- package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +10 -0
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +22 -3
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +0 -11
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +1 -9
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +27 -0
- package/src/duckdb/src/function/scalar_function.cpp +2 -1
- package/src/duckdb/src/function/table/read_csv.cpp +18 -0
- package/src/duckdb/src/function/table/table_scan.cpp +35 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/function/table_function.cpp +4 -3
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +32 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +45 -15
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +11 -2
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +81 -0
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +8 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +8 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -0
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +7 -1
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +7 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +13 -0
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +23 -0
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +22 -0
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +47 -0
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +8 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +69 -0
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +9 -0
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +6 -0
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +190 -0
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +97 -0
- package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
- package/src/duckdb/ub_src_common_serializer.cpp +2 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
package/binding.gyp
CHANGED
@@ -14,6 +14,7 @@
|
|
14
14
|
"src/duckdb/ub_src_catalog_default.cpp",
|
15
15
|
"src/duckdb/ub_src_common_adbc.cpp",
|
16
16
|
"src/duckdb/ub_src_common.cpp",
|
17
|
+
"src/duckdb/ub_src_common_arrow_appender.cpp",
|
17
18
|
"src/duckdb/ub_src_common_arrow.cpp",
|
18
19
|
"src/duckdb/ub_src_common_crypto.cpp",
|
19
20
|
"src/duckdb/ub_src_common_enums.cpp",
|
@@ -206,6 +207,7 @@
|
|
206
207
|
"src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp",
|
207
208
|
"src/duckdb/extension/parquet/parquet_extension.cpp",
|
208
209
|
"src/duckdb/extension/parquet/column_writer.cpp",
|
210
|
+
"src/duckdb/extension/parquet/serialize_parquet.cpp",
|
209
211
|
"src/duckdb/extension/parquet/parquet_reader.cpp",
|
210
212
|
"src/duckdb/extension/parquet/parquet_timestamp.cpp",
|
211
213
|
"src/duckdb/extension/parquet/parquet_writer.cpp",
|
@@ -257,12 +259,14 @@
|
|
257
259
|
"src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
|
258
260
|
"src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",
|
259
261
|
"src/duckdb/extension/json/buffered_json_reader.cpp",
|
262
|
+
"src/duckdb/extension/json/json_enums.cpp",
|
260
263
|
"src/duckdb/extension/json/json_extension.cpp",
|
261
264
|
"src/duckdb/extension/json/json_common.cpp",
|
262
265
|
"src/duckdb/extension/json/json_functions.cpp",
|
263
266
|
"src/duckdb/extension/json/json_scan.cpp",
|
264
267
|
"src/duckdb/extension/json/json_serializer.cpp",
|
265
268
|
"src/duckdb/extension/json/json_deserializer.cpp",
|
269
|
+
"src/duckdb/extension/json/serialize_json.cpp",
|
266
270
|
"src/duckdb/ub_extension_json_json_functions.cpp",
|
267
271
|
"src/duckdb/extension/json/yyjson/yyjson.cpp"
|
268
272
|
],
|
package/package.json
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
#include "duckdb/common/field_writer.hpp"
|
4
4
|
#include "duckdb/common/file_opener.hpp"
|
5
5
|
#include "duckdb/common/printer.hpp"
|
6
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
7
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
6
8
|
|
7
9
|
namespace duckdb {
|
8
10
|
|
@@ -14,28 +14,11 @@
|
|
14
14
|
#include "duckdb/common/multi_file_reader.hpp"
|
15
15
|
#include "duckdb/common/mutex.hpp"
|
16
16
|
#include "json_common.hpp"
|
17
|
+
#include "json_enums.hpp"
|
18
|
+
#include "duckdb/common/enum_util.hpp"
|
17
19
|
|
18
20
|
namespace duckdb {
|
19
21
|
|
20
|
-
enum class JSONFormat : uint8_t {
|
21
|
-
//! Auto-detect format (UNSTRUCTURED / NEWLINE_DELIMITED)
|
22
|
-
AUTO_DETECT = 0,
|
23
|
-
//! One unit after another, newlines can be anywhere
|
24
|
-
UNSTRUCTURED = 1,
|
25
|
-
//! Units are separated by newlines, newlines do not occur within Units (NDJSON)
|
26
|
-
NEWLINE_DELIMITED = 2,
|
27
|
-
//! File is one big array of units
|
28
|
-
ARRAY = 3,
|
29
|
-
};
|
30
|
-
|
31
|
-
enum class JSONRecordType : uint8_t {
|
32
|
-
AUTO_DETECT = 0,
|
33
|
-
//! Sequential objects that are unpacked
|
34
|
-
RECORDS = 1,
|
35
|
-
//! Any other JSON type, e.g., ARRAY
|
36
|
-
VALUES = 2,
|
37
|
-
};
|
38
|
-
|
39
22
|
struct BufferedJSONReaderOptions {
|
40
23
|
public:
|
41
24
|
//! The format of the JSON
|
@@ -50,6 +33,9 @@ public:
|
|
50
33
|
public:
|
51
34
|
void Serialize(FieldWriter &writer) const;
|
52
35
|
void Deserialize(FieldReader &reader);
|
36
|
+
|
37
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
38
|
+
static BufferedJSONReaderOptions FormatDeserialize(FormatDeserializer &deserializer);
|
53
39
|
};
|
54
40
|
|
55
41
|
struct JSONBufferHandle {
|
@@ -0,0 +1,60 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// This file is automatically generated by scripts/generate_enums.py
|
3
|
+
// Do not edit this file manually, your changes will be overwritten
|
4
|
+
//===----------------------------------------------------------------------===//
|
5
|
+
|
6
|
+
#pragma once
|
7
|
+
|
8
|
+
#include "duckdb/common/constants.hpp"
|
9
|
+
#include "duckdb/common/enum_util.hpp"
|
10
|
+
|
11
|
+
namespace duckdb {
|
12
|
+
|
13
|
+
enum class JSONScanType : uint8_t {
|
14
|
+
INVALID = 0,
|
15
|
+
//! Read JSON straight to columnar data
|
16
|
+
READ_JSON = 1,
|
17
|
+
//! Read JSON values as strings
|
18
|
+
READ_JSON_OBJECTS = 2,
|
19
|
+
//! Sample run for schema detection
|
20
|
+
SAMPLE = 3,
|
21
|
+
};
|
22
|
+
|
23
|
+
enum class JSONRecordType : uint8_t {
|
24
|
+
AUTO_DETECT = 0,
|
25
|
+
//! Sequential objects that are unpacked
|
26
|
+
RECORDS = 1,
|
27
|
+
//! Any other JSON type, e.g., ARRAY
|
28
|
+
VALUES = 2,
|
29
|
+
};
|
30
|
+
|
31
|
+
enum class JSONFormat : uint8_t {
|
32
|
+
//! Auto-detect format (UNSTRUCTURED / NEWLINE_DELIMITED)
|
33
|
+
AUTO_DETECT = 0,
|
34
|
+
//! One unit after another, newlines can be anywhere
|
35
|
+
UNSTRUCTURED = 1,
|
36
|
+
//! Units are separated by newlines, newlines do not occur within Units (NDJSON)
|
37
|
+
NEWLINE_DELIMITED = 2,
|
38
|
+
//! File is one big array of units
|
39
|
+
ARRAY = 3,
|
40
|
+
};
|
41
|
+
|
42
|
+
template<>
|
43
|
+
const char* EnumUtil::ToChars<JSONScanType>(JSONScanType value);
|
44
|
+
|
45
|
+
template<>
|
46
|
+
JSONScanType EnumUtil::FromString<JSONScanType>(const char *value);
|
47
|
+
|
48
|
+
template<>
|
49
|
+
const char* EnumUtil::ToChars<JSONRecordType>(JSONRecordType value);
|
50
|
+
|
51
|
+
template<>
|
52
|
+
JSONRecordType EnumUtil::FromString<JSONRecordType>(const char *value);
|
53
|
+
|
54
|
+
template<>
|
55
|
+
const char* EnumUtil::ToChars<JSONFormat>(JSONFormat value);
|
56
|
+
|
57
|
+
template<>
|
58
|
+
JSONFormat EnumUtil::FromString<JSONFormat>(const char *value);
|
59
|
+
|
60
|
+
} // namespace duckdb
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "buffered_json_reader.hpp"
|
12
|
+
#include "json_enums.hpp"
|
12
13
|
#include "duckdb/common/multi_file_reader.hpp"
|
13
14
|
#include "duckdb/common/mutex.hpp"
|
14
15
|
#include "duckdb/common/pair.hpp"
|
@@ -19,16 +20,6 @@
|
|
19
20
|
|
20
21
|
namespace duckdb {
|
21
22
|
|
22
|
-
enum class JSONScanType : uint8_t {
|
23
|
-
INVALID = 0,
|
24
|
-
//! Read JSON straight to columnar data
|
25
|
-
READ_JSON = 1,
|
26
|
-
//! Read JSON values as strings
|
27
|
-
READ_JSON_OBJECTS = 2,
|
28
|
-
//! Sample run for schema detection
|
29
|
-
SAMPLE = 3,
|
30
|
-
};
|
31
|
-
|
32
23
|
struct JSONString {
|
33
24
|
public:
|
34
25
|
JSONString() {
|
@@ -104,6 +95,9 @@ public:
|
|
104
95
|
void Serialize(FieldWriter &writer) const;
|
105
96
|
void Deserialize(ClientContext &context, FieldReader &reader);
|
106
97
|
|
98
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
99
|
+
static unique_ptr<JSONScanData> FormatDeserialize(FormatDeserializer &deserializer);
|
100
|
+
|
107
101
|
public:
|
108
102
|
//! Scan type
|
109
103
|
JSONScanType type;
|
@@ -144,6 +138,12 @@ public:
|
|
144
138
|
|
145
139
|
//! The inferred avg tuple size
|
146
140
|
idx_t avg_tuple_size = 420;
|
141
|
+
|
142
|
+
private:
|
143
|
+
JSONScanData(ClientContext &context, vector<string> files, string date_format, string timestamp_format);
|
144
|
+
|
145
|
+
string GetDateFormat() const;
|
146
|
+
string GetTimestampFormat() const;
|
147
147
|
};
|
148
148
|
|
149
149
|
struct JSONScanInfo : public TableFunctionInfo {
|
@@ -295,6 +295,10 @@ public:
|
|
295
295
|
static unique_ptr<FunctionData> Deserialize(PlanDeserializationState &state, FieldReader &reader,
|
296
296
|
TableFunction &function);
|
297
297
|
|
298
|
+
static void FormatSerialize(FormatSerializer &serializer, const optional_ptr<FunctionData> bind_data,
|
299
|
+
const TableFunction &function);
|
300
|
+
static unique_ptr<FunctionData> FormatDeserialize(FormatDeserializer &deserializer, TableFunction &function);
|
301
|
+
|
298
302
|
static void TableFunctionDefaults(TableFunction &table_function);
|
299
303
|
};
|
300
304
|
|
@@ -44,6 +44,9 @@ public:
|
|
44
44
|
public:
|
45
45
|
void Serialize(FieldWriter &writer) const;
|
46
46
|
void Deserialize(FieldReader &reader);
|
47
|
+
|
48
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
49
|
+
static JSONTransformOptions FormatDeserialize(FormatDeserializer &deserializer);
|
47
50
|
};
|
48
51
|
|
49
52
|
struct TryParseDate {
|
@@ -0,0 +1,105 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// This file is automatically generated by scripts/generate_enums.py
|
3
|
+
// Do not edit this file manually, your changes will be overwritten
|
4
|
+
//===----------------------------------------------------------------------===//
|
5
|
+
|
6
|
+
#include "json_enums.hpp"
|
7
|
+
#include "duckdb/common/string_util.hpp"
|
8
|
+
|
9
|
+
namespace duckdb {
|
10
|
+
|
11
|
+
template<>
|
12
|
+
const char* EnumUtil::ToChars<JSONScanType>(JSONScanType value) {
|
13
|
+
switch(value) {
|
14
|
+
case JSONScanType::INVALID:
|
15
|
+
return "INVALID";
|
16
|
+
case JSONScanType::READ_JSON:
|
17
|
+
return "READ_JSON";
|
18
|
+
case JSONScanType::READ_JSON_OBJECTS:
|
19
|
+
return "READ_JSON_OBJECTS";
|
20
|
+
case JSONScanType::SAMPLE:
|
21
|
+
return "SAMPLE";
|
22
|
+
default:
|
23
|
+
throw NotImplementedException(StringUtil::Format("Enum value of type JSONScanType: '%d' not implemented", value));
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
template<>
|
28
|
+
JSONScanType EnumUtil::FromString<JSONScanType>(const char *value) {
|
29
|
+
if (StringUtil::Equals(value, "INVALID")) {
|
30
|
+
return JSONScanType::INVALID;
|
31
|
+
}
|
32
|
+
if (StringUtil::Equals(value, "READ_JSON")) {
|
33
|
+
return JSONScanType::READ_JSON;
|
34
|
+
}
|
35
|
+
if (StringUtil::Equals(value, "READ_JSON_OBJECTS")) {
|
36
|
+
return JSONScanType::READ_JSON_OBJECTS;
|
37
|
+
}
|
38
|
+
if (StringUtil::Equals(value, "SAMPLE")) {
|
39
|
+
return JSONScanType::SAMPLE;
|
40
|
+
}
|
41
|
+
throw NotImplementedException(StringUtil::Format("Enum value of type JSONScanType: '%s' not implemented", value));
|
42
|
+
}
|
43
|
+
|
44
|
+
template<>
|
45
|
+
const char* EnumUtil::ToChars<JSONRecordType>(JSONRecordType value) {
|
46
|
+
switch(value) {
|
47
|
+
case JSONRecordType::AUTO_DETECT:
|
48
|
+
return "AUTO_DETECT";
|
49
|
+
case JSONRecordType::RECORDS:
|
50
|
+
return "RECORDS";
|
51
|
+
case JSONRecordType::VALUES:
|
52
|
+
return "VALUES";
|
53
|
+
default:
|
54
|
+
throw NotImplementedException(StringUtil::Format("Enum value of type JSONRecordType: '%d' not implemented", value));
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
template<>
|
59
|
+
JSONRecordType EnumUtil::FromString<JSONRecordType>(const char *value) {
|
60
|
+
if (StringUtil::Equals(value, "AUTO_DETECT")) {
|
61
|
+
return JSONRecordType::AUTO_DETECT;
|
62
|
+
}
|
63
|
+
if (StringUtil::Equals(value, "RECORDS")) {
|
64
|
+
return JSONRecordType::RECORDS;
|
65
|
+
}
|
66
|
+
if (StringUtil::Equals(value, "VALUES")) {
|
67
|
+
return JSONRecordType::VALUES;
|
68
|
+
}
|
69
|
+
throw NotImplementedException(StringUtil::Format("Enum value of type JSONRecordType: '%s' not implemented", value));
|
70
|
+
}
|
71
|
+
|
72
|
+
template<>
|
73
|
+
const char* EnumUtil::ToChars<JSONFormat>(JSONFormat value) {
|
74
|
+
switch(value) {
|
75
|
+
case JSONFormat::AUTO_DETECT:
|
76
|
+
return "AUTO_DETECT";
|
77
|
+
case JSONFormat::UNSTRUCTURED:
|
78
|
+
return "UNSTRUCTURED";
|
79
|
+
case JSONFormat::NEWLINE_DELIMITED:
|
80
|
+
return "NEWLINE_DELIMITED";
|
81
|
+
case JSONFormat::ARRAY:
|
82
|
+
return "ARRAY";
|
83
|
+
default:
|
84
|
+
throw NotImplementedException(StringUtil::Format("Enum value of type JSONFormat: '%d' not implemented", value));
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
template<>
|
89
|
+
JSONFormat EnumUtil::FromString<JSONFormat>(const char *value) {
|
90
|
+
if (StringUtil::Equals(value, "AUTO_DETECT")) {
|
91
|
+
return JSONFormat::AUTO_DETECT;
|
92
|
+
}
|
93
|
+
if (StringUtil::Equals(value, "UNSTRUCTURED")) {
|
94
|
+
return JSONFormat::UNSTRUCTURED;
|
95
|
+
}
|
96
|
+
if (StringUtil::Equals(value, "NEWLINE_DELIMITED")) {
|
97
|
+
return JSONFormat::NEWLINE_DELIMITED;
|
98
|
+
}
|
99
|
+
if (StringUtil::Equals(value, "ARRAY")) {
|
100
|
+
return JSONFormat::ARRAY;
|
101
|
+
}
|
102
|
+
throw NotImplementedException(StringUtil::Format("Enum value of type JSONFormat: '%s' not implemented", value));
|
103
|
+
}
|
104
|
+
|
105
|
+
} // namespace duckdb
|
@@ -8,6 +8,8 @@
|
|
8
8
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
9
9
|
#include "json_functions.hpp"
|
10
10
|
#include "json_scan.hpp"
|
11
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
12
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
11
13
|
|
12
14
|
namespace duckdb {
|
13
15
|
|
@@ -5,12 +5,22 @@
|
|
5
5
|
#include "duckdb/main/extension_helper.hpp"
|
6
6
|
#include "duckdb/parallel/task_scheduler.hpp"
|
7
7
|
#include "duckdb/storage/buffer_manager.hpp"
|
8
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
9
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
8
10
|
|
9
11
|
namespace duckdb {
|
10
12
|
|
11
13
|
JSONScanData::JSONScanData() {
|
12
14
|
}
|
13
15
|
|
16
|
+
JSONScanData::JSONScanData(ClientContext &context, vector<string> files_p, string date_format_p,
|
17
|
+
string timestamp_format_p)
|
18
|
+
: files(std::move(files_p)), date_format(std::move(date_format_p)),
|
19
|
+
timestamp_format(std::move(timestamp_format_p)) {
|
20
|
+
InitializeReaders(context);
|
21
|
+
InitializeFormats();
|
22
|
+
}
|
23
|
+
|
14
24
|
void JSONScanData::Bind(ClientContext &context, TableFunctionBindInput &input) {
|
15
25
|
auto &info = input.info->Cast<JSONScanInfo>();
|
16
26
|
type = info.type;
|
@@ -164,6 +174,26 @@ void JSONScanData::Deserialize(ClientContext &context, FieldReader &reader) {
|
|
164
174
|
transform_options.date_format_map = &date_format_map;
|
165
175
|
}
|
166
176
|
|
177
|
+
string JSONScanData::GetDateFormat() const {
|
178
|
+
if (!date_format.empty()) {
|
179
|
+
return date_format;
|
180
|
+
} else if (date_format_map.HasFormats(LogicalTypeId::DATE)) {
|
181
|
+
return date_format_map.GetFormat(LogicalTypeId::DATE).format_specifier;
|
182
|
+
} else {
|
183
|
+
return string();
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
string JSONScanData::GetTimestampFormat() const {
|
188
|
+
if (!timestamp_format.empty()) {
|
189
|
+
return timestamp_format;
|
190
|
+
} else if (date_format_map.HasFormats(LogicalTypeId::TIMESTAMP)) {
|
191
|
+
return date_format_map.GetFormat(LogicalTypeId::TIMESTAMP).format_specifier;
|
192
|
+
} else {
|
193
|
+
return string();
|
194
|
+
}
|
195
|
+
}
|
196
|
+
|
167
197
|
JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, const JSONScanData &bind_data_p)
|
168
198
|
: bind_data(bind_data_p), transform_options(bind_data.transform_options),
|
169
199
|
allocator(BufferManager::GetBufferManager(context).GetBufferAllocator()),
|
@@ -966,6 +996,18 @@ unique_ptr<FunctionData> JSONScan::Deserialize(PlanDeserializationState &state,
|
|
966
996
|
return std::move(result);
|
967
997
|
}
|
968
998
|
|
999
|
+
void JSONScan::FormatSerialize(FormatSerializer &serializer, const optional_ptr<FunctionData> bind_data_p,
|
1000
|
+
const TableFunction &function) {
|
1001
|
+
auto &bind_data = bind_data_p->Cast<JSONScanData>();
|
1002
|
+
serializer.WriteProperty("scan_data", bind_data);
|
1003
|
+
}
|
1004
|
+
|
1005
|
+
unique_ptr<FunctionData> JSONScan::FormatDeserialize(FormatDeserializer &deserializer, TableFunction &function) {
|
1006
|
+
unique_ptr<JSONScanData> result;
|
1007
|
+
deserializer.ReadProperty("scan_data", result);
|
1008
|
+
return std::move(result);
|
1009
|
+
}
|
1010
|
+
|
969
1011
|
void JSONScan::TableFunctionDefaults(TableFunction &table_function) {
|
970
1012
|
MultiFileReader::AddParameters(table_function);
|
971
1013
|
|
@@ -980,6 +1022,8 @@ void JSONScan::TableFunctionDefaults(TableFunction &table_function) {
|
|
980
1022
|
|
981
1023
|
table_function.serialize = Serialize;
|
982
1024
|
table_function.deserialize = Deserialize;
|
1025
|
+
table_function.format_serialize = FormatSerialize;
|
1026
|
+
table_function.format_deserialize = FormatDeserialize;
|
983
1027
|
|
984
1028
|
table_function.projection_pushdown = true;
|
985
1029
|
table_function.filter_pushdown = false;
|
@@ -0,0 +1,92 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// This file is automatically generated by scripts/generate_serialization.py
|
3
|
+
// Do not edit this file manually, your changes will be overwritten
|
4
|
+
//===----------------------------------------------------------------------===//
|
5
|
+
|
6
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
7
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
8
|
+
#include "buffered_json_reader.hpp"
|
9
|
+
#include "json_transform.hpp"
|
10
|
+
#include "json_scan.hpp"
|
11
|
+
|
12
|
+
namespace duckdb {
|
13
|
+
|
14
|
+
void BufferedJSONReaderOptions::FormatSerialize(FormatSerializer &serializer) const {
|
15
|
+
serializer.WriteProperty("format", format);
|
16
|
+
serializer.WriteProperty("record_type", record_type);
|
17
|
+
serializer.WriteProperty("compression", compression);
|
18
|
+
serializer.WriteProperty("file_options", file_options);
|
19
|
+
}
|
20
|
+
|
21
|
+
BufferedJSONReaderOptions BufferedJSONReaderOptions::FormatDeserialize(FormatDeserializer &deserializer) {
|
22
|
+
BufferedJSONReaderOptions result;
|
23
|
+
deserializer.ReadProperty("format", result.format);
|
24
|
+
deserializer.ReadProperty("record_type", result.record_type);
|
25
|
+
deserializer.ReadProperty("compression", result.compression);
|
26
|
+
deserializer.ReadProperty("file_options", result.file_options);
|
27
|
+
return result;
|
28
|
+
}
|
29
|
+
|
30
|
+
void JSONScanData::FormatSerialize(FormatSerializer &serializer) const {
|
31
|
+
serializer.WriteProperty("json_type", type);
|
32
|
+
serializer.WriteProperty("options", options);
|
33
|
+
serializer.WriteProperty("reader_bind", reader_bind);
|
34
|
+
serializer.WriteProperty("files", files);
|
35
|
+
serializer.WriteProperty("ignore_errors", ignore_errors);
|
36
|
+
serializer.WriteProperty("maximum_object_size", maximum_object_size);
|
37
|
+
serializer.WriteProperty("auto_detect", auto_detect);
|
38
|
+
serializer.WriteProperty("sample_size", sample_size);
|
39
|
+
serializer.WriteProperty("max_depth", max_depth);
|
40
|
+
serializer.WriteProperty("transform_options", transform_options);
|
41
|
+
serializer.WriteProperty("names", names);
|
42
|
+
serializer.WriteProperty("date_format", GetDateFormat());
|
43
|
+
serializer.WriteProperty("timestamp_format", GetTimestampFormat());
|
44
|
+
}
|
45
|
+
|
46
|
+
unique_ptr<JSONScanData> JSONScanData::FormatDeserialize(FormatDeserializer &deserializer) {
|
47
|
+
auto type = deserializer.ReadProperty<JSONScanType>("json_type");
|
48
|
+
auto options = deserializer.ReadProperty<BufferedJSONReaderOptions>("options");
|
49
|
+
auto reader_bind = deserializer.ReadProperty<MultiFileReaderBindData>("reader_bind");
|
50
|
+
auto files = deserializer.ReadProperty<vector<string>>("files");
|
51
|
+
auto ignore_errors = deserializer.ReadProperty<bool>("ignore_errors");
|
52
|
+
auto maximum_object_size = deserializer.ReadProperty<idx_t>("maximum_object_size");
|
53
|
+
auto auto_detect = deserializer.ReadProperty<bool>("auto_detect");
|
54
|
+
auto sample_size = deserializer.ReadProperty<idx_t>("sample_size");
|
55
|
+
auto max_depth = deserializer.ReadProperty<idx_t>("max_depth");
|
56
|
+
auto transform_options = deserializer.ReadProperty<JSONTransformOptions>("transform_options");
|
57
|
+
auto names = deserializer.ReadProperty<vector<string>>("names");
|
58
|
+
auto date_format = deserializer.ReadProperty<string>("date_format");
|
59
|
+
auto timestamp_format = deserializer.ReadProperty<string>("timestamp_format");
|
60
|
+
auto result = duckdb::unique_ptr<JSONScanData>(new JSONScanData(deserializer.Get<ClientContext &>(), std::move(files), std::move(date_format), std::move(timestamp_format)));
|
61
|
+
result->type = type;
|
62
|
+
result->options = options;
|
63
|
+
result->reader_bind = reader_bind;
|
64
|
+
result->ignore_errors = ignore_errors;
|
65
|
+
result->maximum_object_size = maximum_object_size;
|
66
|
+
result->auto_detect = auto_detect;
|
67
|
+
result->sample_size = sample_size;
|
68
|
+
result->max_depth = max_depth;
|
69
|
+
result->transform_options = transform_options;
|
70
|
+
result->names = std::move(names);
|
71
|
+
return result;
|
72
|
+
}
|
73
|
+
|
74
|
+
void JSONTransformOptions::FormatSerialize(FormatSerializer &serializer) const {
|
75
|
+
serializer.WriteProperty("strict_cast", strict_cast);
|
76
|
+
serializer.WriteProperty("error_duplicate_key", error_duplicate_key);
|
77
|
+
serializer.WriteProperty("error_missing_key", error_missing_key);
|
78
|
+
serializer.WriteProperty("error_unknown_key", error_unknown_key);
|
79
|
+
serializer.WriteProperty("delay_error", delay_error);
|
80
|
+
}
|
81
|
+
|
82
|
+
JSONTransformOptions JSONTransformOptions::FormatDeserialize(FormatDeserializer &deserializer) {
|
83
|
+
JSONTransformOptions result;
|
84
|
+
deserializer.ReadProperty("strict_cast", result.strict_cast);
|
85
|
+
deserializer.ReadProperty("error_duplicate_key", result.error_duplicate_key);
|
86
|
+
deserializer.ReadProperty("error_missing_key", result.error_missing_key);
|
87
|
+
deserializer.ReadProperty("error_unknown_key", result.error_unknown_key);
|
88
|
+
deserializer.ReadProperty("delay_error", result.delay_error);
|
89
|
+
return result;
|
90
|
+
}
|
91
|
+
|
92
|
+
} // namespace duckdb
|
@@ -76,6 +76,9 @@ struct ParquetOptions {
|
|
76
76
|
public:
|
77
77
|
void Serialize(FieldWriter &writer) const;
|
78
78
|
void Deserialize(FieldReader &reader);
|
79
|
+
|
80
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
81
|
+
static ParquetOptions FormatDeserialize(FormatDeserializer &deserializer);
|
79
82
|
};
|
80
83
|
|
81
84
|
class ParquetReader {
|
@@ -35,6 +35,8 @@
|
|
35
35
|
#include "duckdb/planner/operator/logical_get.hpp"
|
36
36
|
#include "duckdb/storage/statistics/base_statistics.hpp"
|
37
37
|
#include "duckdb/storage/table/row_group.hpp"
|
38
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
39
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
38
40
|
#endif
|
39
41
|
|
40
42
|
namespace duckdb {
|
@@ -181,6 +183,8 @@ public:
|
|
181
183
|
table_function.get_batch_index = ParquetScanGetBatchIndex;
|
182
184
|
table_function.serialize = ParquetScanSerialize;
|
183
185
|
table_function.deserialize = ParquetScanDeserialize;
|
186
|
+
table_function.format_serialize = ParquetScanFormatSerialize;
|
187
|
+
table_function.format_deserialize = ParquetScanFormatDeserialize;
|
184
188
|
table_function.get_batch_info = ParquetGetBatchInfo;
|
185
189
|
table_function.projection_pushdown = true;
|
186
190
|
table_function.filter_pushdown = true;
|
@@ -430,6 +434,25 @@ public:
|
|
430
434
|
return ParquetScanBindInternal(context, files, types, names, options);
|
431
435
|
}
|
432
436
|
|
437
|
+
static void ParquetScanFormatSerialize(FormatSerializer &serializer, const optional_ptr<FunctionData> bind_data_p,
|
438
|
+
const TableFunction &function) {
|
439
|
+
auto &bind_data = bind_data_p->Cast<ParquetReadBindData>();
|
440
|
+
serializer.WriteProperty("files", bind_data.files);
|
441
|
+
serializer.WriteProperty("types", bind_data.types);
|
442
|
+
serializer.WriteProperty("names", bind_data.names);
|
443
|
+
serializer.WriteProperty("parquet_options", bind_data.parquet_options);
|
444
|
+
}
|
445
|
+
|
446
|
+
static unique_ptr<FunctionData> ParquetScanFormatDeserialize(FormatDeserializer &deserializer,
|
447
|
+
TableFunction &function) {
|
448
|
+
auto &context = deserializer.Get<ClientContext &>();
|
449
|
+
auto files = deserializer.ReadProperty<vector<string>>("files");
|
450
|
+
auto types = deserializer.ReadProperty<vector<LogicalType>>("types");
|
451
|
+
auto names = deserializer.ReadProperty<vector<string>>("names");
|
452
|
+
auto parquet_options = deserializer.ReadProperty<ParquetOptions>("parquet_options");
|
453
|
+
return ParquetScanBindInternal(context, files, types, names, parquet_options);
|
454
|
+
}
|
455
|
+
|
433
456
|
static void ParquetScanImplementation(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
|
434
457
|
if (!data_p.local_state) {
|
435
458
|
return;
|
@@ -72,6 +72,9 @@ static shared_ptr<ParquetFileMetadataCache> LoadMetadata(Allocator &allocator, F
|
|
72
72
|
transport.read((uint8_t *)buf.ptr, 8);
|
73
73
|
|
74
74
|
if (memcmp(buf.ptr + 4, "PAR1", 4) != 0) {
|
75
|
+
if (memcmp(buf.ptr + 4, "PARE", 4) == 0) {
|
76
|
+
throw InvalidInputException("Encrypted Parquet files are not supported for file '%s'", file_handle.path);
|
77
|
+
}
|
75
78
|
throw InvalidInputException("No magic bytes found at end of file '%s'", file_handle.path);
|
76
79
|
}
|
77
80
|
// read four-byte footer length from just before the end magic bytes
|
@@ -0,0 +1,26 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// This file is automatically generated by scripts/generate_serialization.py
|
3
|
+
// Do not edit this file manually, your changes will be overwritten
|
4
|
+
//===----------------------------------------------------------------------===//
|
5
|
+
|
6
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
7
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
8
|
+
#include "parquet_reader.hpp"
|
9
|
+
|
10
|
+
namespace duckdb {
|
11
|
+
|
12
|
+
void ParquetOptions::FormatSerialize(FormatSerializer &serializer) const {
|
13
|
+
serializer.WriteProperty("binary_as_string", binary_as_string);
|
14
|
+
serializer.WriteProperty("file_row_number", file_row_number);
|
15
|
+
serializer.WriteProperty("file_options", file_options);
|
16
|
+
}
|
17
|
+
|
18
|
+
ParquetOptions ParquetOptions::FormatDeserialize(FormatDeserializer &deserializer) {
|
19
|
+
ParquetOptions result;
|
20
|
+
deserializer.ReadProperty("binary_as_string", result.binary_as_string);
|
21
|
+
deserializer.ReadProperty("file_row_number", result.file_row_number);
|
22
|
+
deserializer.ReadProperty("file_options", result.file_options);
|
23
|
+
return result;
|
24
|
+
}
|
25
|
+
|
26
|
+
} // namespace duckdb
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#include "duckdb/common/arrow/arrow_appender.hpp"
|
2
|
+
#include "duckdb/common/arrow/appender/bool_data.hpp"
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
void ArrowBoolData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
|
7
|
+
auto byte_count = (capacity + 7) / 8;
|
8
|
+
result.main_buffer.reserve(byte_count);
|
9
|
+
}
|
10
|
+
|
11
|
+
void ArrowBoolData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
12
|
+
idx_t size = to - from;
|
13
|
+
UnifiedVectorFormat format;
|
14
|
+
input.ToUnifiedFormat(input_size, format);
|
15
|
+
|
16
|
+
// we initialize both the validity and the bit set to 1's
|
17
|
+
ResizeValidity(append_data.validity, append_data.row_count + size);
|
18
|
+
ResizeValidity(append_data.main_buffer, append_data.row_count + size);
|
19
|
+
auto data = UnifiedVectorFormat::GetData<bool>(format);
|
20
|
+
|
21
|
+
auto result_data = append_data.main_buffer.GetData<uint8_t>();
|
22
|
+
auto validity_data = append_data.validity.GetData<uint8_t>();
|
23
|
+
uint8_t current_bit;
|
24
|
+
idx_t current_byte;
|
25
|
+
GetBitPosition(append_data.row_count, current_byte, current_bit);
|
26
|
+
for (idx_t i = from; i < to; i++) {
|
27
|
+
auto source_idx = format.sel->get_index(i);
|
28
|
+
// append the validity mask
|
29
|
+
if (!format.validity.RowIsValid(source_idx)) {
|
30
|
+
SetNull(append_data, validity_data, current_byte, current_bit);
|
31
|
+
} else if (!data[source_idx]) {
|
32
|
+
UnsetBit(result_data, current_byte, current_bit);
|
33
|
+
}
|
34
|
+
NextBit(current_byte, current_bit);
|
35
|
+
}
|
36
|
+
append_data.row_count += size;
|
37
|
+
}
|
38
|
+
|
39
|
+
void ArrowBoolData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) {
|
40
|
+
result->n_buffers = 2;
|
41
|
+
result->buffers[1] = append_data.main_buffer.data();
|
42
|
+
}
|
43
|
+
|
44
|
+
} // namespace duckdb
|