duckdb 0.8.2-dev2090.0 → 0.8.2-dev2208.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/binding.gyp +3 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -0
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +5 -19
  5. package/src/duckdb/extension/json/include/json_enums.hpp +60 -0
  6. package/src/duckdb/extension/json/include/json_scan.hpp +14 -10
  7. package/src/duckdb/extension/json/include/json_transform.hpp +3 -0
  8. package/src/duckdb/extension/json/json_enums.cpp +105 -0
  9. package/src/duckdb/extension/json/json_functions/json_transform.cpp +2 -0
  10. package/src/duckdb/extension/json/json_functions.cpp +2 -1
  11. package/src/duckdb/extension/json/json_scan.cpp +44 -0
  12. package/src/duckdb/extension/json/serialize_json.cpp +92 -0
  13. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +3 -0
  14. package/src/duckdb/extension/parquet/parquet_extension.cpp +25 -1
  15. package/src/duckdb/extension/parquet/parquet_reader.cpp +3 -0
  16. package/src/duckdb/extension/parquet/serialize_parquet.cpp +26 -0
  17. package/src/duckdb/src/catalog/catalog.cpp +2 -6
  18. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -4
  19. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +0 -4
  20. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +7 -26
  21. package/src/duckdb/src/catalog/catalog_set.cpp +0 -63
  22. package/src/duckdb/src/catalog/dependency_manager.cpp +0 -36
  23. package/src/duckdb/src/common/extra_type_info.cpp +24 -46
  24. package/src/duckdb/src/common/field_writer.cpp +0 -1
  25. package/src/duckdb/src/common/file_system.cpp +6 -6
  26. package/src/duckdb/src/common/filename_pattern.cpp +1 -1
  27. package/src/duckdb/src/common/gzip_file_system.cpp +7 -12
  28. package/src/duckdb/src/common/local_file_system.cpp +17 -14
  29. package/src/duckdb/src/common/multi_file_reader.cpp +8 -5
  30. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +0 -9
  31. package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
  32. package/src/duckdb/src/common/types.cpp +12 -56
  33. package/src/duckdb/src/common/virtual_file_system.cpp +4 -0
  34. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +26 -0
  35. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +47 -0
  36. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +28 -0
  37. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +10 -0
  38. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +22 -3
  39. package/src/duckdb/src/core_functions/scalar/union/union_tag.cpp +1 -1
  40. package/src/duckdb/src/execution/column_binding_resolver.cpp +3 -7
  41. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +1 -1
  42. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +0 -10
  43. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +2 -1
  44. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -6
  45. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +1 -2
  46. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +14 -1
  47. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +3 -17
  48. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +2 -5
  49. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -5
  50. package/src/duckdb/src/function/aggregate/distributive/count.cpp +0 -11
  51. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +1 -9
  52. package/src/duckdb/src/function/cast/string_cast.cpp +0 -1
  53. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +27 -0
  54. package/src/duckdb/src/function/scalar_function.cpp +5 -20
  55. package/src/duckdb/src/function/table/read_csv.cpp +20 -1
  56. package/src/duckdb/src/function/table/system/test_all_types.cpp +4 -4
  57. package/src/duckdb/src/function/table/table_scan.cpp +35 -0
  58. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  59. package/src/duckdb/src/function/table_function.cpp +4 -3
  60. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +0 -2
  61. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +1 -4
  62. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +0 -6
  63. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +2 -6
  64. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +0 -4
  65. package/src/duckdb/src/include/duckdb/common/file_system.hpp +8 -8
  66. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +1 -1
  67. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -1
  68. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +3 -1
  69. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +4 -0
  70. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +0 -4
  71. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +32 -0
  72. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +45 -15
  73. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +10 -0
  74. package/src/duckdb/src/include/duckdb/common/serializer.hpp +0 -7
  75. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +14 -10
  76. package/src/duckdb/src/include/duckdb/common/types.hpp +5 -10
  77. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +2 -0
  78. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  79. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +0 -4
  80. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -2
  81. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +11 -2
  82. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +81 -0
  83. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +8 -0
  84. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +8 -3
  85. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -0
  86. package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
  87. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -1
  88. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +2 -2
  89. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  90. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +4 -0
  91. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +3 -0
  92. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
  93. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
  94. package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
  95. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +0 -2
  96. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -2
  97. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -4
  98. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +2 -0
  99. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +10 -7
  100. package/src/duckdb/src/include/duckdb/planner/operator/logical_extension_operator.hpp +5 -0
  101. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +7 -1
  102. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +1 -0
  103. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +7 -1
  104. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +0 -3
  105. package/src/duckdb/src/main/attached_database.cpp +2 -2
  106. package/src/duckdb/src/main/database.cpp +1 -1
  107. package/src/duckdb/src/main/db_instance_cache.cpp +14 -6
  108. package/src/duckdb/src/main/extension/extension_helper.cpp +13 -0
  109. package/src/duckdb/src/main/extension/extension_install.cpp +1 -1
  110. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  111. package/src/duckdb/src/optimizer/compressed_materialization.cpp +0 -1
  112. package/src/duckdb/src/optimizer/deliminator.cpp +7 -7
  113. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +3 -5
  114. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +16 -1
  115. package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
  116. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +3 -3
  117. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -23
  118. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +0 -24
  119. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +3 -5
  120. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +23 -0
  121. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +22 -0
  122. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +47 -0
  123. package/src/duckdb/src/planner/logical_operator.cpp +1 -6
  124. package/src/duckdb/src/planner/logical_operator_visitor.cpp +3 -6
  125. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +2 -14
  126. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +8 -0
  127. package/src/duckdb/src/planner/operator/logical_create_index.cpp +21 -12
  128. package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +24 -0
  129. package/src/duckdb/src/planner/operator/logical_get.cpp +69 -0
  130. package/src/duckdb/src/planner/planner.cpp +0 -1
  131. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -5
  132. package/src/duckdb/src/storage/meta_block_reader.cpp +0 -9
  133. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +2 -0
  134. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +9 -0
  135. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +33 -0
  136. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +190 -0
  137. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +97 -0
  138. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  139. package/src/duckdb/src/storage/wal_replay.cpp +1 -3
  140. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  141. package/src/duckdb/ub_src_planner_operator.cpp +0 -4
  142. package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
  143. package/src/statement.cpp +0 -2
  144. package/test/columns.test.ts +1 -2
  145. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +0 -27
  146. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_join.hpp +0 -32
  147. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +0 -14
  148. package/src/duckdb/src/planner/operator/logical_delim_join.cpp +0 -25
package/binding.gyp CHANGED
@@ -207,6 +207,7 @@
207
207
  "src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp",
208
208
  "src/duckdb/extension/parquet/parquet_extension.cpp",
209
209
  "src/duckdb/extension/parquet/column_writer.cpp",
210
+ "src/duckdb/extension/parquet/serialize_parquet.cpp",
210
211
  "src/duckdb/extension/parquet/parquet_reader.cpp",
211
212
  "src/duckdb/extension/parquet/parquet_timestamp.cpp",
212
213
  "src/duckdb/extension/parquet/parquet_writer.cpp",
@@ -258,12 +259,14 @@
258
259
  "src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
259
260
  "src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",
260
261
  "src/duckdb/extension/json/buffered_json_reader.cpp",
262
+ "src/duckdb/extension/json/json_enums.cpp",
261
263
  "src/duckdb/extension/json/json_extension.cpp",
262
264
  "src/duckdb/extension/json/json_common.cpp",
263
265
  "src/duckdb/extension/json/json_functions.cpp",
264
266
  "src/duckdb/extension/json/json_scan.cpp",
265
267
  "src/duckdb/extension/json/json_serializer.cpp",
266
268
  "src/duckdb/extension/json/json_deserializer.cpp",
269
+ "src/duckdb/extension/json/serialize_json.cpp",
267
270
  "src/duckdb/ub_extension_json_json_functions.cpp",
268
271
  "src/duckdb/extension/json/yyjson/yyjson.cpp"
269
272
  ],
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev2090.0",
5
+ "version": "0.8.2-dev2208.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -3,6 +3,8 @@
3
3
  #include "duckdb/common/field_writer.hpp"
4
4
  #include "duckdb/common/file_opener.hpp"
5
5
  #include "duckdb/common/printer.hpp"
6
+ #include "duckdb/common/serializer/format_serializer.hpp"
7
+ #include "duckdb/common/serializer/format_deserializer.hpp"
6
8
 
7
9
  namespace duckdb {
8
10
 
@@ -14,28 +14,11 @@
14
14
  #include "duckdb/common/multi_file_reader.hpp"
15
15
  #include "duckdb/common/mutex.hpp"
16
16
  #include "json_common.hpp"
17
+ #include "json_enums.hpp"
18
+ #include "duckdb/common/enum_util.hpp"
17
19
 
18
20
  namespace duckdb {
19
21
 
20
- enum class JSONFormat : uint8_t {
21
- //! Auto-detect format (UNSTRUCTURED / NEWLINE_DELIMITED)
22
- AUTO_DETECT = 0,
23
- //! One unit after another, newlines can be anywhere
24
- UNSTRUCTURED = 1,
25
- //! Units are separated by newlines, newlines do not occur within Units (NDJSON)
26
- NEWLINE_DELIMITED = 2,
27
- //! File is one big array of units
28
- ARRAY = 3,
29
- };
30
-
31
- enum class JSONRecordType : uint8_t {
32
- AUTO_DETECT = 0,
33
- //! Sequential objects that are unpacked
34
- RECORDS = 1,
35
- //! Any other JSON type, e.g., ARRAY
36
- VALUES = 2,
37
- };
38
-
39
22
  struct BufferedJSONReaderOptions {
40
23
  public:
41
24
  //! The format of the JSON
@@ -50,6 +33,9 @@ public:
50
33
  public:
51
34
  void Serialize(FieldWriter &writer) const;
52
35
  void Deserialize(FieldReader &reader);
36
+
37
+ void FormatSerialize(FormatSerializer &serializer) const;
38
+ static BufferedJSONReaderOptions FormatDeserialize(FormatDeserializer &deserializer);
53
39
  };
54
40
 
55
41
  struct JSONBufferHandle {
@@ -0,0 +1,60 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // This file is automatically generated by scripts/generate_enums.py
3
+ // Do not edit this file manually, your changes will be overwritten
4
+ //===----------------------------------------------------------------------===//
5
+
6
+ #pragma once
7
+
8
+ #include "duckdb/common/constants.hpp"
9
+ #include "duckdb/common/enum_util.hpp"
10
+
11
+ namespace duckdb {
12
+
13
+ enum class JSONScanType : uint8_t {
14
+ INVALID = 0,
15
+ //! Read JSON straight to columnar data
16
+ READ_JSON = 1,
17
+ //! Read JSON values as strings
18
+ READ_JSON_OBJECTS = 2,
19
+ //! Sample run for schema detection
20
+ SAMPLE = 3,
21
+ };
22
+
23
+ enum class JSONRecordType : uint8_t {
24
+ AUTO_DETECT = 0,
25
+ //! Sequential objects that are unpacked
26
+ RECORDS = 1,
27
+ //! Any other JSON type, e.g., ARRAY
28
+ VALUES = 2,
29
+ };
30
+
31
+ enum class JSONFormat : uint8_t {
32
+ //! Auto-detect format (UNSTRUCTURED / NEWLINE_DELIMITED)
33
+ AUTO_DETECT = 0,
34
+ //! One unit after another, newlines can be anywhere
35
+ UNSTRUCTURED = 1,
36
+ //! Units are separated by newlines, newlines do not occur within Units (NDJSON)
37
+ NEWLINE_DELIMITED = 2,
38
+ //! File is one big array of units
39
+ ARRAY = 3,
40
+ };
41
+
42
+ template<>
43
+ const char* EnumUtil::ToChars<JSONScanType>(JSONScanType value);
44
+
45
+ template<>
46
+ JSONScanType EnumUtil::FromString<JSONScanType>(const char *value);
47
+
48
+ template<>
49
+ const char* EnumUtil::ToChars<JSONRecordType>(JSONRecordType value);
50
+
51
+ template<>
52
+ JSONRecordType EnumUtil::FromString<JSONRecordType>(const char *value);
53
+
54
+ template<>
55
+ const char* EnumUtil::ToChars<JSONFormat>(JSONFormat value);
56
+
57
+ template<>
58
+ JSONFormat EnumUtil::FromString<JSONFormat>(const char *value);
59
+
60
+ } // namespace duckdb
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "buffered_json_reader.hpp"
12
+ #include "json_enums.hpp"
12
13
  #include "duckdb/common/multi_file_reader.hpp"
13
14
  #include "duckdb/common/mutex.hpp"
14
15
  #include "duckdb/common/pair.hpp"
@@ -19,16 +20,6 @@
19
20
 
20
21
  namespace duckdb {
21
22
 
22
- enum class JSONScanType : uint8_t {
23
- INVALID = 0,
24
- //! Read JSON straight to columnar data
25
- READ_JSON = 1,
26
- //! Read JSON values as strings
27
- READ_JSON_OBJECTS = 2,
28
- //! Sample run for schema detection
29
- SAMPLE = 3,
30
- };
31
-
32
23
  struct JSONString {
33
24
  public:
34
25
  JSONString() {
@@ -104,6 +95,9 @@ public:
104
95
  void Serialize(FieldWriter &writer) const;
105
96
  void Deserialize(ClientContext &context, FieldReader &reader);
106
97
 
98
+ void FormatSerialize(FormatSerializer &serializer) const;
99
+ static unique_ptr<JSONScanData> FormatDeserialize(FormatDeserializer &deserializer);
100
+
107
101
  public:
108
102
  //! Scan type
109
103
  JSONScanType type;
@@ -144,6 +138,12 @@ public:
144
138
 
145
139
  //! The inferred avg tuple size
146
140
  idx_t avg_tuple_size = 420;
141
+
142
+ private:
143
+ JSONScanData(ClientContext &context, vector<string> files, string date_format, string timestamp_format);
144
+
145
+ string GetDateFormat() const;
146
+ string GetTimestampFormat() const;
147
147
  };
148
148
 
149
149
  struct JSONScanInfo : public TableFunctionInfo {
@@ -295,6 +295,10 @@ public:
295
295
  static unique_ptr<FunctionData> Deserialize(PlanDeserializationState &state, FieldReader &reader,
296
296
  TableFunction &function);
297
297
 
298
+ static void FormatSerialize(FormatSerializer &serializer, const optional_ptr<FunctionData> bind_data,
299
+ const TableFunction &function);
300
+ static unique_ptr<FunctionData> FormatDeserialize(FormatDeserializer &deserializer, TableFunction &function);
301
+
298
302
  static void TableFunctionDefaults(TableFunction &table_function);
299
303
  };
300
304
 
@@ -44,6 +44,9 @@ public:
44
44
  public:
45
45
  void Serialize(FieldWriter &writer) const;
46
46
  void Deserialize(FieldReader &reader);
47
+
48
+ void FormatSerialize(FormatSerializer &serializer) const;
49
+ static JSONTransformOptions FormatDeserialize(FormatDeserializer &deserializer);
47
50
  };
48
51
 
49
52
  struct TryParseDate {
@@ -0,0 +1,105 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // This file is automatically generated by scripts/generate_enums.py
3
+ // Do not edit this file manually, your changes will be overwritten
4
+ //===----------------------------------------------------------------------===//
5
+
6
+ #include "json_enums.hpp"
7
+ #include "duckdb/common/string_util.hpp"
8
+
9
+ namespace duckdb {
10
+
11
+ template<>
12
+ const char* EnumUtil::ToChars<JSONScanType>(JSONScanType value) {
13
+ switch(value) {
14
+ case JSONScanType::INVALID:
15
+ return "INVALID";
16
+ case JSONScanType::READ_JSON:
17
+ return "READ_JSON";
18
+ case JSONScanType::READ_JSON_OBJECTS:
19
+ return "READ_JSON_OBJECTS";
20
+ case JSONScanType::SAMPLE:
21
+ return "SAMPLE";
22
+ default:
23
+ throw NotImplementedException(StringUtil::Format("Enum value of type JSONScanType: '%d' not implemented", value));
24
+ }
25
+ }
26
+
27
+ template<>
28
+ JSONScanType EnumUtil::FromString<JSONScanType>(const char *value) {
29
+ if (StringUtil::Equals(value, "INVALID")) {
30
+ return JSONScanType::INVALID;
31
+ }
32
+ if (StringUtil::Equals(value, "READ_JSON")) {
33
+ return JSONScanType::READ_JSON;
34
+ }
35
+ if (StringUtil::Equals(value, "READ_JSON_OBJECTS")) {
36
+ return JSONScanType::READ_JSON_OBJECTS;
37
+ }
38
+ if (StringUtil::Equals(value, "SAMPLE")) {
39
+ return JSONScanType::SAMPLE;
40
+ }
41
+ throw NotImplementedException(StringUtil::Format("Enum value of type JSONScanType: '%s' not implemented", value));
42
+ }
43
+
44
+ template<>
45
+ const char* EnumUtil::ToChars<JSONRecordType>(JSONRecordType value) {
46
+ switch(value) {
47
+ case JSONRecordType::AUTO_DETECT:
48
+ return "AUTO_DETECT";
49
+ case JSONRecordType::RECORDS:
50
+ return "RECORDS";
51
+ case JSONRecordType::VALUES:
52
+ return "VALUES";
53
+ default:
54
+ throw NotImplementedException(StringUtil::Format("Enum value of type JSONRecordType: '%d' not implemented", value));
55
+ }
56
+ }
57
+
58
+ template<>
59
+ JSONRecordType EnumUtil::FromString<JSONRecordType>(const char *value) {
60
+ if (StringUtil::Equals(value, "AUTO_DETECT")) {
61
+ return JSONRecordType::AUTO_DETECT;
62
+ }
63
+ if (StringUtil::Equals(value, "RECORDS")) {
64
+ return JSONRecordType::RECORDS;
65
+ }
66
+ if (StringUtil::Equals(value, "VALUES")) {
67
+ return JSONRecordType::VALUES;
68
+ }
69
+ throw NotImplementedException(StringUtil::Format("Enum value of type JSONRecordType: '%s' not implemented", value));
70
+ }
71
+
72
+ template<>
73
+ const char* EnumUtil::ToChars<JSONFormat>(JSONFormat value) {
74
+ switch(value) {
75
+ case JSONFormat::AUTO_DETECT:
76
+ return "AUTO_DETECT";
77
+ case JSONFormat::UNSTRUCTURED:
78
+ return "UNSTRUCTURED";
79
+ case JSONFormat::NEWLINE_DELIMITED:
80
+ return "NEWLINE_DELIMITED";
81
+ case JSONFormat::ARRAY:
82
+ return "ARRAY";
83
+ default:
84
+ throw NotImplementedException(StringUtil::Format("Enum value of type JSONFormat: '%d' not implemented", value));
85
+ }
86
+ }
87
+
88
+ template<>
89
+ JSONFormat EnumUtil::FromString<JSONFormat>(const char *value) {
90
+ if (StringUtil::Equals(value, "AUTO_DETECT")) {
91
+ return JSONFormat::AUTO_DETECT;
92
+ }
93
+ if (StringUtil::Equals(value, "UNSTRUCTURED")) {
94
+ return JSONFormat::UNSTRUCTURED;
95
+ }
96
+ if (StringUtil::Equals(value, "NEWLINE_DELIMITED")) {
97
+ return JSONFormat::NEWLINE_DELIMITED;
98
+ }
99
+ if (StringUtil::Equals(value, "ARRAY")) {
100
+ return JSONFormat::ARRAY;
101
+ }
102
+ throw NotImplementedException(StringUtil::Format("Enum value of type JSONFormat: '%s' not implemented", value));
103
+ }
104
+
105
+ } // namespace duckdb
@@ -8,6 +8,8 @@
8
8
  #include "duckdb/function/scalar/nested_functions.hpp"
9
9
  #include "json_functions.hpp"
10
10
  #include "json_scan.hpp"
11
+ #include "duckdb/common/serializer/format_serializer.hpp"
12
+ #include "duckdb/common/serializer/format_deserializer.hpp"
11
13
 
12
14
  namespace duckdb {
13
15
 
@@ -209,7 +209,8 @@ unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context,
209
209
  table_function->function = make_uniq<FunctionExpression>("read_json_auto", std::move(children));
210
210
 
211
211
  if (!FileSystem::HasGlob(table_name)) {
212
- table_function->alias = FileSystem::ExtractBaseName(table_name);
212
+ auto &fs = FileSystem::GetFileSystem(context);
213
+ table_function->alias = fs.ExtractBaseName(table_name);
213
214
  }
214
215
 
215
216
  return std::move(table_function);
@@ -5,12 +5,22 @@
5
5
  #include "duckdb/main/extension_helper.hpp"
6
6
  #include "duckdb/parallel/task_scheduler.hpp"
7
7
  #include "duckdb/storage/buffer_manager.hpp"
8
+ #include "duckdb/common/serializer/format_serializer.hpp"
9
+ #include "duckdb/common/serializer/format_deserializer.hpp"
8
10
 
9
11
  namespace duckdb {
10
12
 
11
13
  JSONScanData::JSONScanData() {
12
14
  }
13
15
 
16
+ JSONScanData::JSONScanData(ClientContext &context, vector<string> files_p, string date_format_p,
17
+ string timestamp_format_p)
18
+ : files(std::move(files_p)), date_format(std::move(date_format_p)),
19
+ timestamp_format(std::move(timestamp_format_p)) {
20
+ InitializeReaders(context);
21
+ InitializeFormats();
22
+ }
23
+
14
24
  void JSONScanData::Bind(ClientContext &context, TableFunctionBindInput &input) {
15
25
  auto &info = input.info->Cast<JSONScanInfo>();
16
26
  type = info.type;
@@ -164,6 +174,26 @@ void JSONScanData::Deserialize(ClientContext &context, FieldReader &reader) {
164
174
  transform_options.date_format_map = &date_format_map;
165
175
  }
166
176
 
177
+ string JSONScanData::GetDateFormat() const {
178
+ if (!date_format.empty()) {
179
+ return date_format;
180
+ } else if (date_format_map.HasFormats(LogicalTypeId::DATE)) {
181
+ return date_format_map.GetFormat(LogicalTypeId::DATE).format_specifier;
182
+ } else {
183
+ return string();
184
+ }
185
+ }
186
+
187
+ string JSONScanData::GetTimestampFormat() const {
188
+ if (!timestamp_format.empty()) {
189
+ return timestamp_format;
190
+ } else if (date_format_map.HasFormats(LogicalTypeId::TIMESTAMP)) {
191
+ return date_format_map.GetFormat(LogicalTypeId::TIMESTAMP).format_specifier;
192
+ } else {
193
+ return string();
194
+ }
195
+ }
196
+
167
197
  JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, const JSONScanData &bind_data_p)
168
198
  : bind_data(bind_data_p), transform_options(bind_data.transform_options),
169
199
  allocator(BufferManager::GetBufferManager(context).GetBufferAllocator()),
@@ -966,6 +996,18 @@ unique_ptr<FunctionData> JSONScan::Deserialize(PlanDeserializationState &state,
966
996
  return std::move(result);
967
997
  }
968
998
 
999
+ void JSONScan::FormatSerialize(FormatSerializer &serializer, const optional_ptr<FunctionData> bind_data_p,
1000
+ const TableFunction &function) {
1001
+ auto &bind_data = bind_data_p->Cast<JSONScanData>();
1002
+ serializer.WriteProperty("scan_data", bind_data);
1003
+ }
1004
+
1005
+ unique_ptr<FunctionData> JSONScan::FormatDeserialize(FormatDeserializer &deserializer, TableFunction &function) {
1006
+ unique_ptr<JSONScanData> result;
1007
+ deserializer.ReadProperty("scan_data", result);
1008
+ return std::move(result);
1009
+ }
1010
+
969
1011
  void JSONScan::TableFunctionDefaults(TableFunction &table_function) {
970
1012
  MultiFileReader::AddParameters(table_function);
971
1013
 
@@ -980,6 +1022,8 @@ void JSONScan::TableFunctionDefaults(TableFunction &table_function) {
980
1022
 
981
1023
  table_function.serialize = Serialize;
982
1024
  table_function.deserialize = Deserialize;
1025
+ table_function.format_serialize = FormatSerialize;
1026
+ table_function.format_deserialize = FormatDeserialize;
983
1027
 
984
1028
  table_function.projection_pushdown = true;
985
1029
  table_function.filter_pushdown = false;
@@ -0,0 +1,92 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // This file is automatically generated by scripts/generate_serialization.py
3
+ // Do not edit this file manually, your changes will be overwritten
4
+ //===----------------------------------------------------------------------===//
5
+
6
+ #include "duckdb/common/serializer/format_serializer.hpp"
7
+ #include "duckdb/common/serializer/format_deserializer.hpp"
8
+ #include "buffered_json_reader.hpp"
9
+ #include "json_transform.hpp"
10
+ #include "json_scan.hpp"
11
+
12
+ namespace duckdb {
13
+
14
+ void BufferedJSONReaderOptions::FormatSerialize(FormatSerializer &serializer) const {
15
+ serializer.WriteProperty("format", format);
16
+ serializer.WriteProperty("record_type", record_type);
17
+ serializer.WriteProperty("compression", compression);
18
+ serializer.WriteProperty("file_options", file_options);
19
+ }
20
+
21
+ BufferedJSONReaderOptions BufferedJSONReaderOptions::FormatDeserialize(FormatDeserializer &deserializer) {
22
+ BufferedJSONReaderOptions result;
23
+ deserializer.ReadProperty("format", result.format);
24
+ deserializer.ReadProperty("record_type", result.record_type);
25
+ deserializer.ReadProperty("compression", result.compression);
26
+ deserializer.ReadProperty("file_options", result.file_options);
27
+ return result;
28
+ }
29
+
30
+ void JSONScanData::FormatSerialize(FormatSerializer &serializer) const {
31
+ serializer.WriteProperty("json_type", type);
32
+ serializer.WriteProperty("options", options);
33
+ serializer.WriteProperty("reader_bind", reader_bind);
34
+ serializer.WriteProperty("files", files);
35
+ serializer.WriteProperty("ignore_errors", ignore_errors);
36
+ serializer.WriteProperty("maximum_object_size", maximum_object_size);
37
+ serializer.WriteProperty("auto_detect", auto_detect);
38
+ serializer.WriteProperty("sample_size", sample_size);
39
+ serializer.WriteProperty("max_depth", max_depth);
40
+ serializer.WriteProperty("transform_options", transform_options);
41
+ serializer.WriteProperty("names", names);
42
+ serializer.WriteProperty("date_format", GetDateFormat());
43
+ serializer.WriteProperty("timestamp_format", GetTimestampFormat());
44
+ }
45
+
46
+ unique_ptr<JSONScanData> JSONScanData::FormatDeserialize(FormatDeserializer &deserializer) {
47
+ auto type = deserializer.ReadProperty<JSONScanType>("json_type");
48
+ auto options = deserializer.ReadProperty<BufferedJSONReaderOptions>("options");
49
+ auto reader_bind = deserializer.ReadProperty<MultiFileReaderBindData>("reader_bind");
50
+ auto files = deserializer.ReadProperty<vector<string>>("files");
51
+ auto ignore_errors = deserializer.ReadProperty<bool>("ignore_errors");
52
+ auto maximum_object_size = deserializer.ReadProperty<idx_t>("maximum_object_size");
53
+ auto auto_detect = deserializer.ReadProperty<bool>("auto_detect");
54
+ auto sample_size = deserializer.ReadProperty<idx_t>("sample_size");
55
+ auto max_depth = deserializer.ReadProperty<idx_t>("max_depth");
56
+ auto transform_options = deserializer.ReadProperty<JSONTransformOptions>("transform_options");
57
+ auto names = deserializer.ReadProperty<vector<string>>("names");
58
+ auto date_format = deserializer.ReadProperty<string>("date_format");
59
+ auto timestamp_format = deserializer.ReadProperty<string>("timestamp_format");
60
+ auto result = duckdb::unique_ptr<JSONScanData>(new JSONScanData(deserializer.Get<ClientContext &>(), std::move(files), std::move(date_format), std::move(timestamp_format)));
61
+ result->type = type;
62
+ result->options = options;
63
+ result->reader_bind = reader_bind;
64
+ result->ignore_errors = ignore_errors;
65
+ result->maximum_object_size = maximum_object_size;
66
+ result->auto_detect = auto_detect;
67
+ result->sample_size = sample_size;
68
+ result->max_depth = max_depth;
69
+ result->transform_options = transform_options;
70
+ result->names = std::move(names);
71
+ return result;
72
+ }
73
+
74
+ void JSONTransformOptions::FormatSerialize(FormatSerializer &serializer) const {
75
+ serializer.WriteProperty("strict_cast", strict_cast);
76
+ serializer.WriteProperty("error_duplicate_key", error_duplicate_key);
77
+ serializer.WriteProperty("error_missing_key", error_missing_key);
78
+ serializer.WriteProperty("error_unknown_key", error_unknown_key);
79
+ serializer.WriteProperty("delay_error", delay_error);
80
+ }
81
+
82
+ JSONTransformOptions JSONTransformOptions::FormatDeserialize(FormatDeserializer &deserializer) {
83
+ JSONTransformOptions result;
84
+ deserializer.ReadProperty("strict_cast", result.strict_cast);
85
+ deserializer.ReadProperty("error_duplicate_key", result.error_duplicate_key);
86
+ deserializer.ReadProperty("error_missing_key", result.error_missing_key);
87
+ deserializer.ReadProperty("error_unknown_key", result.error_unknown_key);
88
+ deserializer.ReadProperty("delay_error", result.delay_error);
89
+ return result;
90
+ }
91
+
92
+ } // namespace duckdb
@@ -76,6 +76,9 @@ struct ParquetOptions {
76
76
  public:
77
77
  void Serialize(FieldWriter &writer) const;
78
78
  void Deserialize(FieldReader &reader);
79
+
80
+ void FormatSerialize(FormatSerializer &serializer) const;
81
+ static ParquetOptions FormatDeserialize(FormatDeserializer &deserializer);
79
82
  };
80
83
 
81
84
  class ParquetReader {
@@ -35,6 +35,8 @@
35
35
  #include "duckdb/planner/operator/logical_get.hpp"
36
36
  #include "duckdb/storage/statistics/base_statistics.hpp"
37
37
  #include "duckdb/storage/table/row_group.hpp"
38
+ #include "duckdb/common/serializer/format_serializer.hpp"
39
+ #include "duckdb/common/serializer/format_deserializer.hpp"
38
40
  #endif
39
41
 
40
42
  namespace duckdb {
@@ -181,6 +183,8 @@ public:
181
183
  table_function.get_batch_index = ParquetScanGetBatchIndex;
182
184
  table_function.serialize = ParquetScanSerialize;
183
185
  table_function.deserialize = ParquetScanDeserialize;
186
+ table_function.format_serialize = ParquetScanFormatSerialize;
187
+ table_function.format_deserialize = ParquetScanFormatDeserialize;
184
188
  table_function.get_batch_info = ParquetGetBatchInfo;
185
189
  table_function.projection_pushdown = true;
186
190
  table_function.filter_pushdown = true;
@@ -430,6 +434,25 @@ public:
430
434
  return ParquetScanBindInternal(context, files, types, names, options);
431
435
  }
432
436
 
437
+ static void ParquetScanFormatSerialize(FormatSerializer &serializer, const optional_ptr<FunctionData> bind_data_p,
438
+ const TableFunction &function) {
439
+ auto &bind_data = bind_data_p->Cast<ParquetReadBindData>();
440
+ serializer.WriteProperty("files", bind_data.files);
441
+ serializer.WriteProperty("types", bind_data.types);
442
+ serializer.WriteProperty("names", bind_data.names);
443
+ serializer.WriteProperty("parquet_options", bind_data.parquet_options);
444
+ }
445
+
446
+ static unique_ptr<FunctionData> ParquetScanFormatDeserialize(FormatDeserializer &deserializer,
447
+ TableFunction &function) {
448
+ auto &context = deserializer.Get<ClientContext &>();
449
+ auto files = deserializer.ReadProperty<vector<string>>("files");
450
+ auto types = deserializer.ReadProperty<vector<LogicalType>>("types");
451
+ auto names = deserializer.ReadProperty<vector<string>>("names");
452
+ auto parquet_options = deserializer.ReadProperty<ParquetOptions>("parquet_options");
453
+ return ParquetScanBindInternal(context, files, types, names, parquet_options);
454
+ }
455
+
433
456
  static void ParquetScanImplementation(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
434
457
  if (!data_p.local_state) {
435
458
  return;
@@ -938,7 +961,8 @@ unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context, const string
938
961
  table_function->function = make_uniq<FunctionExpression>("parquet_scan", std::move(children));
939
962
 
940
963
  if (!FileSystem::HasGlob(table_name)) {
941
- table_function->alias = FileSystem::ExtractBaseName(table_name);
964
+ auto &fs = FileSystem::GetFileSystem(context);
965
+ table_function->alias = fs.ExtractBaseName(table_name);
942
966
  }
943
967
 
944
968
  return std::move(table_function);
@@ -72,6 +72,9 @@ static shared_ptr<ParquetFileMetadataCache> LoadMetadata(Allocator &allocator, F
72
72
  transport.read((uint8_t *)buf.ptr, 8);
73
73
 
74
74
  if (memcmp(buf.ptr + 4, "PAR1", 4) != 0) {
75
+ if (memcmp(buf.ptr + 4, "PARE", 4) == 0) {
76
+ throw InvalidInputException("Encrypted Parquet files are not supported for file '%s'", file_handle.path);
77
+ }
75
78
  throw InvalidInputException("No magic bytes found at end of file '%s'", file_handle.path);
76
79
  }
77
80
  // read four-byte footer length from just before the end magic bytes
@@ -0,0 +1,26 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // This file is automatically generated by scripts/generate_serialization.py
3
+ // Do not edit this file manually, your changes will be overwritten
4
+ //===----------------------------------------------------------------------===//
5
+
6
+ #include "duckdb/common/serializer/format_serializer.hpp"
7
+ #include "duckdb/common/serializer/format_deserializer.hpp"
8
+ #include "parquet_reader.hpp"
9
+
10
+ namespace duckdb {
11
+
12
+ void ParquetOptions::FormatSerialize(FormatSerializer &serializer) const {
13
+ serializer.WriteProperty("binary_as_string", binary_as_string);
14
+ serializer.WriteProperty("file_row_number", file_row_number);
15
+ serializer.WriteProperty("file_options", file_options);
16
+ }
17
+
18
+ ParquetOptions ParquetOptions::FormatDeserialize(FormatDeserializer &deserializer) {
19
+ ParquetOptions result;
20
+ deserializer.ReadProperty("binary_as_string", result.binary_as_string);
21
+ deserializer.ReadProperty("file_row_number", result.file_row_number);
22
+ deserializer.ReadProperty("file_options", result.file_options);
23
+ return result;
24
+ }
25
+
26
+ } // namespace duckdb
@@ -655,17 +655,13 @@ LogicalType Catalog::GetType(ClientContext &context, const string &schema, const
655
655
  if (!type_entry) {
656
656
  return LogicalType::INVALID;
657
657
  }
658
- auto result_type = type_entry->user_type;
659
- EnumType::SetCatalog(result_type, type_entry.get());
660
- return result_type;
658
+ return type_entry->user_type;
661
659
  }
662
660
 
663
661
  LogicalType Catalog::GetType(ClientContext &context, const string &catalog_name, const string &schema,
664
662
  const string &name) {
665
663
  auto &type_entry = Catalog::GetEntry<TypeCatalogEntry>(context, catalog_name, schema, name);
666
- auto result_type = type_entry.user_type;
667
- EnumType::SetCatalog(result_type, &type_entry);
668
- return result_type;
664
+ return type_entry.user_type;
669
665
  }
670
666
 
671
667
  vector<reference<SchemaCatalogEntry>> Catalog::GetSchemas(ClientContext &context) {
@@ -527,10 +527,7 @@ unique_ptr<CatalogEntry> DuckTableEntry::DropNotNull(ClientContext &context, Dro
527
527
  }
528
528
 
529
529
  unique_ptr<CatalogEntry> DuckTableEntry::ChangeColumnType(ClientContext &context, ChangeColumnTypeInfo &info) {
530
- if (info.target_type.id() == LogicalTypeId::USER) {
531
- info.target_type =
532
- Catalog::GetType(context, catalog.GetName(), schema.name, UserType::GetTypeName(info.target_type));
533
- }
530
+ Binder::BindLogicalType(context, info.target_type, &catalog, schema.name);
534
531
  auto change_idx = GetColumnIndex(info.column_name);
535
532
  auto create_info = make_uniq<CreateTableInfo>(schema, name);
536
533
  create_info->temporary = temporary;
@@ -172,10 +172,6 @@ const ColumnList &TableCatalogEntry::GetColumns() const {
172
172
  return columns;
173
173
  }
174
174
 
175
- ColumnList &TableCatalogEntry::GetColumnsMutable() {
176
- return columns;
177
- }
178
-
179
175
  const ColumnDefinition &TableCatalogEntry::GetColumn(LogicalIndex idx) {
180
176
  return columns.GetColumn(idx);
181
177
  }