duckdb 1.4.0 → 1.4.1-dev2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/core_functions/scalar/generic/current_setting.cpp +1 -4
  3. package/src/duckdb/extension/icu/icu-strptime.cpp +2 -1
  4. package/src/duckdb/extension/json/include/json_common.hpp +2 -4
  5. package/src/duckdb/extension/json/json_functions.cpp +5 -1
  6. package/src/duckdb/extension/parquet/column_writer.cpp +31 -21
  7. package/src/duckdb/extension/parquet/geo_parquet.cpp +21 -6
  8. package/src/duckdb/extension/parquet/include/column_writer.hpp +2 -2
  9. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +28 -1
  10. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +7 -2
  11. package/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp +13 -0
  12. package/src/duckdb/extension/parquet/include/writer/array_column_writer.hpp +4 -0
  13. package/src/duckdb/extension/parquet/parquet_extension.cpp +56 -1
  14. package/src/duckdb/extension/parquet/parquet_reader.cpp +4 -1
  15. package/src/duckdb/extension/parquet/parquet_statistics.cpp +5 -7
  16. package/src/duckdb/extension/parquet/parquet_writer.cpp +15 -8
  17. package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +17 -4
  18. package/src/duckdb/extension/parquet/writer/array_column_writer.cpp +22 -28
  19. package/src/duckdb/extension/parquet/writer/primitive_column_writer.cpp +17 -5
  20. package/src/duckdb/extension/parquet/writer/struct_column_writer.cpp +3 -2
  21. package/src/duckdb/src/catalog/catalog_search_path.cpp +2 -2
  22. package/src/duckdb/src/catalog/catalog_set.cpp +1 -2
  23. package/src/duckdb/src/common/enum_util.cpp +20 -0
  24. package/src/duckdb/src/common/file_system.cpp +0 -30
  25. package/src/duckdb/src/common/sorting/sort.cpp +25 -6
  26. package/src/duckdb/src/common/sorting/sorted_run_merger.cpp +1 -0
  27. package/src/duckdb/src/common/string_util.cpp +24 -0
  28. package/src/duckdb/src/common/virtual_file_system.cpp +59 -10
  29. package/src/duckdb/src/execution/index/art/art_merger.cpp +0 -3
  30. package/src/duckdb/src/execution/index/art/prefix.cpp +4 -0
  31. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +1 -1
  32. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +2 -2
  33. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +1 -1
  34. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +3 -3
  35. package/src/duckdb/src/function/table/system/duckdb_connection_count.cpp +45 -0
  36. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +11 -1
  37. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  38. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  39. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  40. package/src/duckdb/src/include/duckdb/common/string_util.hpp +2 -0
  41. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +4 -1
  42. package/src/duckdb/src/include/duckdb/function/scalar/variant_functions.hpp +1 -1
  43. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  44. package/src/duckdb/src/include/duckdb/logging/log_storage.hpp +6 -6
  45. package/src/duckdb/src/include/duckdb/logging/log_type.hpp +26 -3
  46. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +4 -0
  47. package/src/duckdb/src/include/duckdb/main/client_context.hpp +2 -0
  48. package/src/duckdb/src/include/duckdb/main/connection.hpp +0 -1
  49. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +0 -1
  50. package/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +12 -1
  51. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
  52. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +2 -0
  53. package/src/duckdb/src/include/duckdb/main/relation/create_view_relation.hpp +2 -0
  54. package/src/duckdb/src/include/duckdb/main/relation/delete_relation.hpp +2 -0
  55. package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +2 -0
  56. package/src/duckdb/src/include/duckdb/main/relation/insert_relation.hpp +2 -0
  57. package/src/duckdb/src/include/duckdb/main/relation/query_relation.hpp +1 -0
  58. package/src/duckdb/src/include/duckdb/main/relation/update_relation.hpp +2 -0
  59. package/src/duckdb/src/include/duckdb/main/relation/write_csv_relation.hpp +2 -0
  60. package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +2 -0
  61. package/src/duckdb/src/include/duckdb/main/relation.hpp +2 -1
  62. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +3 -1
  63. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -2
  64. package/src/duckdb/src/include/duckdb/planner/binder.hpp +62 -3
  65. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +2 -2
  66. package/src/duckdb/src/include/duckdb/planner/operator/logical_cte.hpp +1 -1
  67. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +3 -3
  68. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  69. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +2 -2
  70. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp +2 -2
  71. package/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +6 -1
  73. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +5 -1
  74. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +9 -7
  75. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +2 -0
  76. package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +4 -4
  77. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +6 -6
  78. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +4 -4
  79. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -4
  80. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +5 -3
  81. package/src/duckdb/src/include/duckdb/storage/table/row_id_column_data.hpp +4 -4
  82. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +4 -4
  83. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +4 -4
  84. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
  85. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +2 -1
  86. package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +4 -1
  87. package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +1 -1
  88. package/src/duckdb/src/logging/log_types.cpp +63 -0
  89. package/src/duckdb/src/main/attached_database.cpp +16 -3
  90. package/src/duckdb/src/main/client_context.cpp +27 -19
  91. package/src/duckdb/src/main/connection.cpp +2 -5
  92. package/src/duckdb/src/main/database_file_path_manager.cpp +23 -6
  93. package/src/duckdb/src/main/database_manager.cpp +18 -3
  94. package/src/duckdb/src/main/http/http_util.cpp +3 -1
  95. package/src/duckdb/src/main/relation/create_table_relation.cpp +8 -0
  96. package/src/duckdb/src/main/relation/create_view_relation.cpp +8 -0
  97. package/src/duckdb/src/main/relation/delete_relation.cpp +8 -0
  98. package/src/duckdb/src/main/relation/explain_relation.cpp +8 -0
  99. package/src/duckdb/src/main/relation/insert_relation.cpp +8 -0
  100. package/src/duckdb/src/main/relation/query_relation.cpp +4 -0
  101. package/src/duckdb/src/main/relation/update_relation.cpp +8 -0
  102. package/src/duckdb/src/main/relation/write_csv_relation.cpp +8 -0
  103. package/src/duckdb/src/main/relation/write_parquet_relation.cpp +8 -0
  104. package/src/duckdb/src/main/relation.cpp +2 -2
  105. package/src/duckdb/src/optimizer/filter_combiner.cpp +7 -0
  106. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -3
  107. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +4 -1
  108. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +3 -7
  109. package/src/duckdb/src/parser/statement/relation_statement.cpp +1 -4
  110. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +2 -0
  111. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +8 -6
  112. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +1 -5
  113. package/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp +10 -2
  114. package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +20 -3
  115. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +8 -3
  116. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -2
  117. package/src/duckdb/src/planner/binder.cpp +2 -2
  118. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +9 -13
  119. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +4 -0
  120. package/src/duckdb/src/planner/expression_binder.cpp +3 -1
  121. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +2 -2
  122. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +12 -14
  123. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +1 -1
  124. package/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp +2 -2
  125. package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -2
  126. package/src/duckdb/src/storage/data_table.cpp +2 -2
  127. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  128. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +67 -25
  129. package/src/duckdb/src/storage/statistics/string_stats.cpp +8 -0
  130. package/src/duckdb/src/storage/table/array_column_data.cpp +6 -5
  131. package/src/duckdb/src/storage/table/column_data.cpp +23 -9
  132. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +15 -1
  133. package/src/duckdb/src/storage/table/list_column_data.cpp +5 -4
  134. package/src/duckdb/src/storage/table/row_group.cpp +8 -8
  135. package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -8
  136. package/src/duckdb/src/storage/table/row_id_column_data.cpp +5 -4
  137. package/src/duckdb/src/storage/table/standard_column_data.cpp +9 -8
  138. package/src/duckdb/src/storage/table/struct_column_data.cpp +10 -9
  139. package/src/duckdb/src/storage/table/update_segment.cpp +12 -10
  140. package/src/duckdb/src/transaction/commit_state.cpp +18 -0
  141. package/src/duckdb/src/transaction/duck_transaction.cpp +2 -10
  142. package/src/duckdb/src/transaction/wal_write_state.cpp +5 -5
  143. package/src/duckdb/third_party/httplib/httplib.hpp +6 -1
  144. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "1.4.0",
5
+ "version": "1.4.1-dev2.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -53,10 +53,7 @@ unique_ptr<FunctionData> CurrentSettingBind(ClientContext &context, ScalarFuncti
53
53
  if (!context.TryGetCurrentSetting(key, val)) {
54
54
  auto extension_name = Catalog::AutoloadExtensionByConfigName(context, key);
55
55
  // If autoloader didn't throw, the config is now available
56
- if (!context.TryGetCurrentSetting(key, val)) {
57
- throw InternalException("Extension %s did not provide the '%s' config setting",
58
- extension_name.ToStdString(), key);
59
- }
56
+ context.TryGetCurrentSetting(key, val);
60
57
  }
61
58
 
62
59
  bound_function.return_type = val.type();
@@ -221,8 +221,9 @@ struct ICUStrptime : public ICUDateFunc {
221
221
  if (!error.empty()) {
222
222
  throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
223
223
  }
224
- // If any format has UTC offsets, then we have to produce TSTZ
224
+ // If any format has UTC offsets or names, then we have to produce TSTZ
225
225
  has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME);
226
+ has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::UTC_OFFSET);
226
227
  formats.emplace_back(format);
227
228
  }
228
229
  if (has_tz) {
@@ -13,6 +13,7 @@
13
13
  #include "duckdb/common/operator/string_cast.hpp"
14
14
  #include "duckdb/planner/expression/bound_function_expression.hpp"
15
15
  #include "yyjson.hpp"
16
+ #include "duckdb/common/types/blob.hpp"
16
17
 
17
18
  using namespace duckdb_yyjson; // NOLINT
18
19
 
@@ -228,11 +229,8 @@ public:
228
229
 
229
230
  static string FormatParseError(const char *data, idx_t length, yyjson_read_err &error, const string &extra = "") {
230
231
  D_ASSERT(error.code != YYJSON_READ_SUCCESS);
231
- // Go to blob so we can have a better error message for weird strings
232
- auto blob = Value::BLOB(string(data, length));
233
232
  // Truncate, so we don't print megabytes worth of JSON
234
- string input = blob.ToString();
235
- input = input.length() > 50 ? string(input.c_str(), 47) + "..." : input;
233
+ auto input = length > 50 ? string(data, 47) + "..." : string(data, length);
236
234
  // Have to replace \r, otherwise output is unreadable
237
235
  input = StringUtil::Replace(input, "\r", "\\r");
238
236
  return StringUtil::Format("Malformed JSON at byte %lld of input: %s. %s Input: \"%s\"", error.pos, error.msg,
@@ -394,7 +394,11 @@ void JSONFunctions::RegisterSimpleCastFunctions(ExtensionLoader &loader) {
394
394
  loader.RegisterCastFunction(LogicalType::LIST(LogicalType::JSON()), LogicalTypeId::VARCHAR, CastJSONListToVarchar,
395
395
  json_list_to_varchar_cost);
396
396
 
397
- // VARCHAR to JSON[] (also needs a special case otherwise get a VARCHAR -> VARCHAR[] cast first)
397
+ // JSON[] to JSON is allowed implicitly
398
+ loader.RegisterCastFunction(LogicalType::LIST(LogicalType::JSON()), LogicalType::JSON(), CastJSONListToVarchar,
399
+ 100);
400
+
401
+ // VARCHAR to JSON[] (also needs a special case otherwise we get a VARCHAR -> VARCHAR[] cast first)
398
402
  const auto varchar_to_json_list_cost =
399
403
  CastFunctionSet::ImplicitCastCost(db, LogicalType::VARCHAR, LogicalType::LIST(LogicalType::JSON())) - 1;
400
404
  BoundCastInfo varchar_to_json_list_info(CastVarcharToJSONList, nullptr, JSONFunctionLocalState::InitCastLocalState);
@@ -187,9 +187,12 @@ void ColumnWriter::HandleRepeatLevels(ColumnWriterState &state, ColumnWriterStat
187
187
  // no repeat levels without a parent node
188
188
  return;
189
189
  }
190
- while (state.repetition_levels.size() < parent->repetition_levels.size()) {
191
- state.repetition_levels.push_back(parent->repetition_levels[state.repetition_levels.size()]);
190
+ if (state.repetition_levels.size() >= parent->repetition_levels.size()) {
191
+ return;
192
192
  }
193
+ state.repetition_levels.insert(state.repetition_levels.end(),
194
+ parent->repetition_levels.begin() + state.repetition_levels.size(),
195
+ parent->repetition_levels.end());
193
196
  }
194
197
 
195
198
  void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity,
@@ -200,36 +203,41 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat
200
203
  while (state.definition_levels.size() < parent->definition_levels.size()) {
201
204
  idx_t current_index = state.definition_levels.size();
202
205
  if (parent->definition_levels[current_index] != PARQUET_DEFINE_VALID) {
206
+ //! Inherit nulls from parent
203
207
  state.definition_levels.push_back(parent->definition_levels[current_index]);
204
208
  state.parent_null_count++;
205
209
  } else if (validity.RowIsValid(vector_index)) {
210
+ //! Produce a non-null define
206
211
  state.definition_levels.push_back(define_value);
207
212
  } else {
213
+ //! Produce a null define
208
214
  if (!can_have_nulls) {
209
215
  throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
210
216
  }
211
217
  state.null_count++;
212
218
  state.definition_levels.push_back(null_value);
213
219
  }
220
+ D_ASSERT(parent->is_empty.empty() || current_index < parent->is_empty.size());
214
221
  if (parent->is_empty.empty() || !parent->is_empty[current_index]) {
215
222
  vector_index++;
216
223
  }
217
224
  }
225
+ return;
226
+ }
227
+
228
+ // no parent: set definition levels only from this validity mask
229
+ if (validity.AllValid()) {
230
+ state.definition_levels.insert(state.definition_levels.end(), count, define_value);
218
231
  } else {
219
- // no parent: set definition levels only from this validity mask
220
- if (validity.AllValid()) {
221
- state.definition_levels.insert(state.definition_levels.end(), count, define_value);
222
- } else {
223
- for (idx_t i = 0; i < count; i++) {
224
- const auto is_null = !validity.RowIsValid(i);
225
- state.definition_levels.emplace_back(is_null ? null_value : define_value);
226
- state.null_count += is_null;
227
- }
228
- }
229
- if (!can_have_nulls && state.null_count != 0) {
230
- throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
232
+ for (idx_t i = 0; i < count; i++) {
233
+ const auto is_null = !validity.RowIsValid(i);
234
+ state.definition_levels.emplace_back(is_null ? null_value : define_value);
235
+ state.null_count += is_null;
231
236
  }
232
237
  }
238
+ if (!can_have_nulls && state.null_count != 0) {
239
+ throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
240
+ }
233
241
  }
234
242
 
235
243
  //===--------------------------------------------------------------------===//
@@ -237,7 +245,7 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat
237
245
  //===--------------------------------------------------------------------===//
238
246
 
239
247
  ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::SchemaElement> &schemas,
240
- const LogicalType &type, const string &name,
248
+ const LogicalType &type, const string &name, bool allow_geometry,
241
249
  optional_ptr<const ChildFieldIDs> field_ids, idx_t max_repeat,
242
250
  idx_t max_define, bool can_have_nulls) {
243
251
  auto null_type = can_have_nulls ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED;
@@ -277,7 +285,8 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
277
285
  struct_column.children.reserve(child_types.size());
278
286
  for (auto &child_type : child_types) {
279
287
  struct_column.children.emplace_back(FillParquetSchema(schemas, child_type.second, child_type.first,
280
- child_field_ids, max_repeat, max_define + 1));
288
+ allow_geometry, child_field_ids, max_repeat,
289
+ max_define + 1));
281
290
  }
282
291
  return struct_column;
283
292
  }
@@ -313,8 +322,8 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
313
322
  schemas.push_back(std::move(repeated_element));
314
323
 
315
324
  ParquetColumnSchema list_column(name, type, max_define, max_repeat, schema_idx, 0);
316
- list_column.children.push_back(
317
- FillParquetSchema(schemas, child_type, "element", child_field_ids, max_repeat + 1, max_define + 2));
325
+ list_column.children.push_back(FillParquetSchema(schemas, child_type, "element", allow_geometry,
326
+ child_field_ids, max_repeat + 1, max_define + 2));
318
327
  return list_column;
319
328
  }
320
329
  if (type.id() == LogicalTypeId::MAP) {
@@ -361,13 +370,14 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
361
370
  for (idx_t i = 0; i < 2; i++) {
362
371
  // key needs to be marked as REQUIRED
363
372
  bool is_key = i == 0;
364
- auto child_schema = FillParquetSchema(schemas, kv_types[i], kv_names[i], child_field_ids, max_repeat + 1,
365
- max_define + 2, !is_key);
373
+ auto child_schema = FillParquetSchema(schemas, kv_types[i], kv_names[i], allow_geometry, child_field_ids,
374
+ max_repeat + 1, max_define + 2, !is_key);
366
375
 
367
376
  map_column.children.push_back(std::move(child_schema));
368
377
  }
369
378
  return map_column;
370
379
  }
380
+
371
381
  duckdb_parquet::SchemaElement schema_element;
372
382
  schema_element.type = ParquetWriter::DuckDBTypeToParquetType(type);
373
383
  schema_element.repetition_type = null_type;
@@ -379,7 +389,7 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
379
389
  schema_element.__isset.field_id = true;
380
390
  schema_element.field_id = field_id->field_id;
381
391
  }
382
- ParquetWriter::SetSchemaProperties(type, schema_element);
392
+ ParquetWriter::SetSchemaProperties(type, schema_element, allow_geometry);
383
393
  schemas.push_back(std::move(schema_element));
384
394
  return ParquetColumnSchema(name, type, max_define, max_repeat, schema_idx, 0);
385
395
  }
@@ -208,17 +208,19 @@ unique_ptr<GeoParquetFileMetadata> GeoParquetFileMetadata::TryRead(const duckdb_
208
208
  throw InvalidInputException("Geoparquet metadata is not an object");
209
209
  }
210
210
 
211
- auto result = make_uniq<GeoParquetFileMetadata>();
211
+ // We dont actually care about the version for now, as we only support V1+native
212
+ auto result = make_uniq<GeoParquetFileMetadata>(GeoParquetVersion::BOTH);
212
213
 
213
214
  // Check and parse the version
214
215
  const auto version_val = yyjson_obj_get(root, "version");
215
216
  if (!yyjson_is_str(version_val)) {
216
217
  throw InvalidInputException("Geoparquet metadata does not have a version");
217
218
  }
218
- result->version = yyjson_get_str(version_val);
219
- if (StringUtil::StartsWith(result->version, "2")) {
220
- // Guard against a breaking future 2.0 version
221
- throw InvalidInputException("Geoparquet version %s is not supported", result->version);
219
+
220
+ auto version = yyjson_get_str(version_val);
221
+ if (StringUtil::StartsWith(version, "3")) {
222
+ // Guard against a breaking future 3.0 version
223
+ throw InvalidInputException("Geoparquet version %s is not supported", version);
222
224
  }
223
225
 
224
226
  // Check and parse the geometry columns
@@ -344,7 +346,20 @@ void GeoParquetFileMetadata::Write(duckdb_parquet::FileMetaData &file_meta_data)
344
346
  yyjson_mut_doc_set_root(doc, root);
345
347
 
346
348
  // Add the version
347
- yyjson_mut_obj_add_strncpy(doc, root, "version", version.c_str(), version.size());
349
+ switch (version) {
350
+ case GeoParquetVersion::V1:
351
+ case GeoParquetVersion::BOTH:
352
+ yyjson_mut_obj_add_strcpy(doc, root, "version", "1.0.0");
353
+ break;
354
+ case GeoParquetVersion::V2:
355
+ yyjson_mut_obj_add_strcpy(doc, root, "version", "2.0.0");
356
+ break;
357
+ case GeoParquetVersion::NONE:
358
+ default:
359
+ // Should never happen, we should not be writing anything
360
+ yyjson_mut_doc_free(doc);
361
+ throw InternalException("GeoParquetVersion::NONE should not write metadata");
362
+ }
348
363
 
349
364
  // Add the primary column
350
365
  yyjson_mut_obj_add_strncpy(doc, root, "primary_column", primary_geometry_column.c_str(),
@@ -27,7 +27,7 @@ public:
27
27
 
28
28
  unsafe_vector<uint16_t> definition_levels;
29
29
  unsafe_vector<uint16_t> repetition_levels;
30
- vector<bool> is_empty;
30
+ unsafe_vector<uint8_t> is_empty;
31
31
  idx_t parent_null_count = 0;
32
32
  idx_t null_count = 0;
33
33
 
@@ -94,7 +94,7 @@ public:
94
94
  }
95
95
 
96
96
  static ParquetColumnSchema FillParquetSchema(vector<duckdb_parquet::SchemaElement> &schemas,
97
- const LogicalType &type, const string &name,
97
+ const LogicalType &type, const string &name, bool allow_geometry,
98
98
  optional_ptr<const ChildFieldIDs> field_ids, idx_t max_repeat = 0,
99
99
  idx_t max_define = 1, bool can_have_nulls = true);
100
100
  //! Create the column writer for a specific type recursively
@@ -199,6 +199,31 @@ enum class GeoParquetColumnEncoding : uint8_t {
199
199
  MULTIPOLYGON,
200
200
  };
201
201
 
202
+ enum class GeoParquetVersion : uint8_t {
203
+ // Write GeoParquet 1.0 metadata
204
+ // GeoParquet 1.0 has the widest support among readers and writers
205
+ V1,
206
+
207
+ // Write GeoParquet 2.0
208
+ // The GeoParquet 2.0 options is identical to GeoParquet 1.0 except the underlying storage
209
+ // of spatial columns is Parquet native geometry, where the Parquet writer will include
210
+ // native statistics according to the underlying Parquet options. Compared to 'BOTH', this will
211
+ // actually write the metadata as containing GeoParquet version 2.0.0
212
+ // However, V2 isnt standardized yet, so this option is still a bit experimental
213
+ V2,
214
+
215
+ // Write GeoParquet 1.0 metadata, with native Parquet geometry types
216
+ // This is a bit of a hold-over option for compatibility with systems that
217
+ // reject GeoParquet 2.0 metadata, but can read Parquet native geometry types as they simply ignore the extra
218
+ // logical type. DuckDB v1.4.0 falls into this category.
219
+ BOTH,
220
+
221
+ // Do not write GeoParquet metadata
222
+ // This option suppresses GeoParquet metadata; however, spatial types will be written as
223
+ // Parquet native Geometry/Geography.
224
+ NONE,
225
+ };
226
+
202
227
  struct GeoParquetColumnMetadata {
203
228
  // The encoding of the geometry column
204
229
  GeoParquetColumnEncoding geometry_encoding;
@@ -215,6 +240,8 @@ struct GeoParquetColumnMetadata {
215
240
 
216
241
  class GeoParquetFileMetadata {
217
242
  public:
243
+ GeoParquetFileMetadata(GeoParquetVersion geo_parquet_version) : version(geo_parquet_version) {
244
+ }
218
245
  void AddGeoParquetStats(const string &column_name, const LogicalType &type, const GeometryStats &stats);
219
246
  void Write(duckdb_parquet::FileMetaData &file_meta_data);
220
247
 
@@ -234,8 +261,8 @@ public:
234
261
 
235
262
  private:
236
263
  mutex write_lock;
237
- string version = "1.1.0";
238
264
  unordered_map<string, GeoParquetColumnMetadata> geometry_columns;
265
+ GeoParquetVersion version;
239
266
  };
240
267
 
241
268
  } // namespace duckdb
@@ -85,7 +85,7 @@ public:
85
85
  shared_ptr<ParquetEncryptionConfig> encryption_config, optional_idx dictionary_size_limit,
86
86
  idx_t string_dictionary_page_size_limit, bool enable_bloom_filters,
87
87
  double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl,
88
- ParquetVersion parquet_version);
88
+ ParquetVersion parquet_version, GeoParquetVersion geoparquet_version);
89
89
  ~ParquetWriter();
90
90
 
91
91
  public:
@@ -95,7 +95,8 @@ public:
95
95
  void Finalize();
96
96
 
97
97
  static duckdb_parquet::Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type);
98
- static void SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::SchemaElement &schema_ele);
98
+ static void SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::SchemaElement &schema_ele,
99
+ bool allow_geometry);
99
100
 
100
101
  ClientContext &GetContext() {
101
102
  return context;
@@ -139,6 +140,9 @@ public:
139
140
  ParquetVersion GetParquetVersion() const {
140
141
  return parquet_version;
141
142
  }
143
+ GeoParquetVersion GetGeoParquetVersion() const {
144
+ return geoparquet_version;
145
+ }
142
146
  const string &GetFileName() const {
143
147
  return file_name;
144
148
  }
@@ -175,6 +179,7 @@ private:
175
179
  bool debug_use_openssl;
176
180
  shared_ptr<EncryptionUtil> encryption_util;
177
181
  ParquetVersion parquet_version;
182
+ GeoParquetVersion geoparquet_version;
178
183
  vector<ParquetColumnSchema> column_schemas;
179
184
 
180
185
  unique_ptr<BufferedFileWriter> writer;
@@ -14,12 +14,25 @@
14
14
  namespace duckdb {
15
15
 
16
16
  class StringColumnReader : public ColumnReader {
17
+ enum class StringColumnType : uint8_t { VARCHAR, JSON, OTHER };
18
+
19
+ static StringColumnType GetStringColumnType(const LogicalType &type) {
20
+ if (type.IsJSONType()) {
21
+ return StringColumnType::JSON;
22
+ }
23
+ if (type.id() == LogicalTypeId::VARCHAR) {
24
+ return StringColumnType::VARCHAR;
25
+ }
26
+ return StringColumnType::OTHER;
27
+ }
28
+
17
29
  public:
18
30
  static constexpr const PhysicalType TYPE = PhysicalType::VARCHAR;
19
31
 
20
32
  public:
21
33
  StringColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema);
22
34
  idx_t fixed_width_string_length;
35
+ const StringColumnType string_column_type;
23
36
 
24
37
  public:
25
38
  static void VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar);
@@ -25,6 +25,10 @@ public:
25
25
  void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count,
26
26
  bool vector_can_span_multiple_pages) override;
27
27
  void Write(ColumnWriterState &state, Vector &vector, idx_t count) override;
28
+
29
+ protected:
30
+ void WriteArrayState(ListColumnWriterState &state, idx_t array_size, uint16_t first_repeat_level,
31
+ idx_t define_value, const bool is_empty = false);
28
32
  };
29
33
 
30
34
  } // namespace duckdb
@@ -238,6 +238,9 @@ struct ParquetWriteBindData : public TableFunctionData {
238
238
 
239
239
  //! Which encodings to include when writing
240
240
  ParquetVersion parquet_version = ParquetVersion::V1;
241
+
242
+ //! Which geo-parquet version to use when writing
243
+ GeoParquetVersion geoparquet_version = GeoParquetVersion::V1;
241
244
  };
242
245
 
243
246
  struct ParquetWriteGlobalState : public GlobalFunctionData {
@@ -291,6 +294,7 @@ static void ParquetListCopyOptions(ClientContext &context, CopyOptionsInput &inp
291
294
  copy_options["binary_as_string"] = CopyOption(LogicalType::BOOLEAN, CopyOptionMode::READ_ONLY);
292
295
  copy_options["file_row_number"] = CopyOption(LogicalType::BOOLEAN, CopyOptionMode::READ_ONLY);
293
296
  copy_options["can_have_nan"] = CopyOption(LogicalType::BOOLEAN, CopyOptionMode::READ_ONLY);
297
+ copy_options["geoparquet_version"] = CopyOption(LogicalType::VARCHAR, CopyOptionMode::WRITE_ONLY);
294
298
  }
295
299
 
296
300
  static unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBindInput &input,
@@ -426,6 +430,19 @@ static unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFun
426
430
  } else {
427
431
  throw BinderException("Expected parquet_version 'V1' or 'V2'");
428
432
  }
433
+ } else if (loption == "geoparquet_version") {
434
+ const auto roption = StringUtil::Upper(option.second[0].ToString());
435
+ if (roption == "NONE") {
436
+ bind_data->geoparquet_version = GeoParquetVersion::NONE;
437
+ } else if (roption == "V1") {
438
+ bind_data->geoparquet_version = GeoParquetVersion::V1;
439
+ } else if (roption == "V2") {
440
+ bind_data->geoparquet_version = GeoParquetVersion::V2;
441
+ } else if (roption == "BOTH") {
442
+ bind_data->geoparquet_version = GeoParquetVersion::BOTH;
443
+ } else {
444
+ throw BinderException("Expected geoparquet_version 'NONE', 'V1' or 'BOTH'");
445
+ }
429
446
  } else {
430
447
  throw InternalException("Unrecognized option for PARQUET: %s", option.first.c_str());
431
448
  }
@@ -457,7 +474,8 @@ static unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext
457
474
  parquet_bind.field_ids.Copy(), parquet_bind.kv_metadata, parquet_bind.encryption_config,
458
475
  parquet_bind.dictionary_size_limit, parquet_bind.string_dictionary_page_size_limit,
459
476
  parquet_bind.enable_bloom_filters, parquet_bind.bloom_filter_false_positive_ratio,
460
- parquet_bind.compression_level, parquet_bind.debug_use_openssl, parquet_bind.parquet_version);
477
+ parquet_bind.compression_level, parquet_bind.debug_use_openssl, parquet_bind.parquet_version,
478
+ parquet_bind.geoparquet_version);
461
479
  return std::move(global_state);
462
480
  }
463
481
 
@@ -626,6 +644,39 @@ ParquetVersion EnumUtil::FromString<ParquetVersion>(const char *value) {
626
644
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
627
645
  }
628
646
 
647
+ template <>
648
+ const char *EnumUtil::ToChars<GeoParquetVersion>(GeoParquetVersion value) {
649
+ switch (value) {
650
+ case GeoParquetVersion::NONE:
651
+ return "NONE";
652
+ case GeoParquetVersion::V1:
653
+ return "V1";
654
+ case GeoParquetVersion::V2:
655
+ return "V2";
656
+ case GeoParquetVersion::BOTH:
657
+ return "BOTH";
658
+ default:
659
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
660
+ }
661
+ }
662
+
663
+ template <>
664
+ GeoParquetVersion EnumUtil::FromString<GeoParquetVersion>(const char *value) {
665
+ if (StringUtil::Equals(value, "NONE")) {
666
+ return GeoParquetVersion::NONE;
667
+ }
668
+ if (StringUtil::Equals(value, "V1")) {
669
+ return GeoParquetVersion::V1;
670
+ }
671
+ if (StringUtil::Equals(value, "V2")) {
672
+ return GeoParquetVersion::V2;
673
+ }
674
+ if (StringUtil::Equals(value, "BOTH")) {
675
+ return GeoParquetVersion::BOTH;
676
+ }
677
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
678
+ }
679
+
629
680
  static optional_idx SerializeCompressionLevel(const int64_t compression_level) {
630
681
  return compression_level < 0 ? NumericLimits<idx_t>::Maximum() - NumericCast<idx_t>(AbsValue(compression_level))
631
682
  : NumericCast<idx_t>(compression_level);
@@ -679,6 +730,8 @@ static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bin
679
730
  serializer.WritePropertyWithDefault(115, "string_dictionary_page_size_limit",
680
731
  bind_data.string_dictionary_page_size_limit,
681
732
  default_value.string_dictionary_page_size_limit);
733
+ serializer.WritePropertyWithDefault(116, "geoparquet_version", bind_data.geoparquet_version,
734
+ default_value.geoparquet_version);
682
735
  }
683
736
 
684
737
  static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserializer, CopyFunction &function) {
@@ -711,6 +764,8 @@ static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserialize
711
764
  deserializer.ReadPropertyWithExplicitDefault(114, "parquet_version", default_value.parquet_version);
712
765
  data->string_dictionary_page_size_limit = deserializer.ReadPropertyWithExplicitDefault(
713
766
  115, "string_dictionary_page_size_limit", default_value.string_dictionary_page_size_limit);
767
+ data->geoparquet_version =
768
+ deserializer.ReadPropertyWithExplicitDefault(116, "geoparquet_version", default_value.geoparquet_version);
714
769
 
715
770
  return std::move(data);
716
771
  }
@@ -570,7 +570,10 @@ ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_d
570
570
 
571
571
  auto file_meta_data = GetFileMetadata();
572
572
  D_ASSERT(file_meta_data);
573
- D_ASSERT(next_schema_idx < file_meta_data->schema.size());
573
+ if (next_schema_idx >= file_meta_data->schema.size()) {
574
+ throw InvalidInputException("Malformed Parquet schema in file \"%s\": invalid schema index %d", file.path,
575
+ next_schema_idx);
576
+ }
574
577
  auto &s_ele = file_meta_data->schema[next_schema_idx];
575
578
  auto this_idx = next_schema_idx;
576
579
 
@@ -395,23 +395,21 @@ unique_ptr<BaseStatistics> ParquetStatisticsUtils::TransformColumnStatistics(con
395
395
  }
396
396
  break;
397
397
  case LogicalTypeId::VARCHAR: {
398
- auto string_stats = StringStats::CreateEmpty(type);
398
+ auto string_stats = StringStats::CreateUnknown(type);
399
399
  if (parquet_stats.__isset.min_value) {
400
400
  StringColumnReader::VerifyString(parquet_stats.min_value.c_str(), parquet_stats.min_value.size(), true);
401
- StringStats::Update(string_stats, parquet_stats.min_value);
401
+ StringStats::SetMin(string_stats, parquet_stats.min_value);
402
402
  } else if (parquet_stats.__isset.min) {
403
403
  StringColumnReader::VerifyString(parquet_stats.min.c_str(), parquet_stats.min.size(), true);
404
- StringStats::Update(string_stats, parquet_stats.min);
404
+ StringStats::SetMin(string_stats, parquet_stats.min);
405
405
  }
406
406
  if (parquet_stats.__isset.max_value) {
407
407
  StringColumnReader::VerifyString(parquet_stats.max_value.c_str(), parquet_stats.max_value.size(), true);
408
- StringStats::Update(string_stats, parquet_stats.max_value);
408
+ StringStats::SetMax(string_stats, parquet_stats.max_value);
409
409
  } else if (parquet_stats.__isset.max) {
410
410
  StringColumnReader::VerifyString(parquet_stats.max.c_str(), parquet_stats.max.size(), true);
411
- StringStats::Update(string_stats, parquet_stats.max);
411
+ StringStats::SetMax(string_stats, parquet_stats.max);
412
412
  }
413
- StringStats::SetContainsUnicode(string_stats);
414
- StringStats::ResetMaxStringLength(string_stats);
415
413
  row_group_stats = string_stats.ToUnique();
416
414
  break;
417
415
  }
@@ -166,7 +166,8 @@ Type::type ParquetWriter::DuckDBTypeToParquetType(const LogicalType &duckdb_type
166
166
  throw NotImplementedException("Unimplemented type for Parquet \"%s\"", duckdb_type.ToString());
167
167
  }
168
168
 
169
- void ParquetWriter::SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::SchemaElement &schema_ele) {
169
+ void ParquetWriter::SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::SchemaElement &schema_ele,
170
+ bool allow_geometry) {
170
171
  if (duckdb_type.IsJSONType()) {
171
172
  schema_ele.converted_type = ConvertedType::JSON;
172
173
  schema_ele.__isset.converted_type = true;
@@ -174,7 +175,7 @@ void ParquetWriter::SetSchemaProperties(const LogicalType &duckdb_type, duckdb_p
174
175
  schema_ele.logicalType.__set_JSON(duckdb_parquet::JsonType());
175
176
  return;
176
177
  }
177
- if (duckdb_type.GetAlias() == "WKB_BLOB") {
178
+ if (duckdb_type.GetAlias() == "WKB_BLOB" && allow_geometry) {
178
179
  schema_ele.__isset.logicalType = true;
179
180
  schema_ele.logicalType.__isset.GEOMETRY = true;
180
181
  // TODO: Set CRS in the future
@@ -356,14 +357,16 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file
356
357
  shared_ptr<ParquetEncryptionConfig> encryption_config_p,
357
358
  optional_idx dictionary_size_limit_p, idx_t string_dictionary_page_size_limit_p,
358
359
  bool enable_bloom_filters_p, double bloom_filter_false_positive_ratio_p,
359
- int64_t compression_level_p, bool debug_use_openssl_p, ParquetVersion parquet_version)
360
+ int64_t compression_level_p, bool debug_use_openssl_p, ParquetVersion parquet_version,
361
+ GeoParquetVersion geoparquet_version)
360
362
  : context(context), file_name(std::move(file_name_p)), sql_types(std::move(types_p)),
361
363
  column_names(std::move(names_p)), codec(codec), field_ids(std::move(field_ids_p)),
362
364
  encryption_config(std::move(encryption_config_p)), dictionary_size_limit(dictionary_size_limit_p),
363
365
  string_dictionary_page_size_limit(string_dictionary_page_size_limit_p),
364
366
  enable_bloom_filters(enable_bloom_filters_p),
365
367
  bloom_filter_false_positive_ratio(bloom_filter_false_positive_ratio_p), compression_level(compression_level_p),
366
- debug_use_openssl(debug_use_openssl_p), parquet_version(parquet_version), total_written(0), num_row_groups(0) {
368
+ debug_use_openssl(debug_use_openssl_p), parquet_version(parquet_version), geoparquet_version(geoparquet_version),
369
+ total_written(0), num_row_groups(0) {
367
370
 
368
371
  // initialize the file writer
369
372
  writer = make_uniq<BufferedFileWriter>(fs, file_name.c_str(),
@@ -416,10 +419,13 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file
416
419
  auto &unique_names = column_names;
417
420
  VerifyUniqueNames(unique_names);
418
421
 
422
+ // V1 GeoParquet stores geometries as blobs, no logical type
423
+ auto allow_geometry = geoparquet_version != GeoParquetVersion::V1;
424
+
419
425
  // construct the child schemas
420
426
  for (idx_t i = 0; i < sql_types.size(); i++) {
421
- auto child_schema =
422
- ColumnWriter::FillParquetSchema(file_meta_data.schema, sql_types[i], unique_names[i], &field_ids);
427
+ auto child_schema = ColumnWriter::FillParquetSchema(file_meta_data.schema, sql_types[i], unique_names[i],
428
+ allow_geometry, &field_ids);
423
429
  column_schemas.push_back(std::move(child_schema));
424
430
  }
425
431
  // now construct the writers based on the schemas
@@ -975,7 +981,8 @@ void ParquetWriter::Finalize() {
975
981
  }
976
982
 
977
983
  // Add geoparquet metadata to the file metadata
978
- if (geoparquet_data && GeoParquetFileMetadata::IsGeoParquetConversionEnabled(context)) {
984
+ if (geoparquet_data && GeoParquetFileMetadata::IsGeoParquetConversionEnabled(context) &&
985
+ geoparquet_version != GeoParquetVersion::NONE) {
979
986
  geoparquet_data->Write(file_meta_data);
980
987
  }
981
988
 
@@ -1005,7 +1012,7 @@ void ParquetWriter::Finalize() {
1005
1012
 
1006
1013
  GeoParquetFileMetadata &ParquetWriter::GetGeoParquetData() {
1007
1014
  if (!geoparquet_data) {
1008
- geoparquet_data = make_uniq<GeoParquetFileMetadata>();
1015
+ geoparquet_data = make_uniq<GeoParquetFileMetadata>(geoparquet_version);
1009
1016
  }
1010
1017
  return *geoparquet_data;
1011
1018
  }
@@ -9,7 +9,7 @@ namespace duckdb {
9
9
  // String Column Reader
10
10
  //===--------------------------------------------------------------------===//
11
11
  StringColumnReader::StringColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema)
12
- : ColumnReader(reader, schema) {
12
+ : ColumnReader(reader, schema), string_column_type(GetStringColumnType(Type())) {
13
13
  fixed_width_string_length = 0;
14
14
  if (schema.parquet_type == Type::FIXED_LEN_BYTE_ARRAY) {
15
15
  fixed_width_string_length = schema.type_length;
@@ -26,13 +26,26 @@ void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, co
26
26
  size_t pos;
27
27
  auto utf_type = Utf8Proc::Analyze(str_data, str_len, &reason, &pos);
28
28
  if (utf_type == UnicodeType::INVALID) {
29
- throw InvalidInputException("Invalid string encoding found in Parquet file: value \"" +
30
- Blob::ToString(string_t(str_data, str_len)) + "\" is not valid UTF8!");
29
+ throw InvalidInputException("Invalid string encoding found in Parquet file: value \"%s\" is not valid UTF8!",
30
+ Blob::ToString(string_t(str_data, str_len)));
31
31
  }
32
32
  }
33
33
 
34
34
  void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) {
35
- VerifyString(str_data, str_len, Type().id() == LogicalTypeId::VARCHAR);
35
+ switch (string_column_type) {
36
+ case StringColumnType::VARCHAR:
37
+ VerifyString(str_data, str_len, true);
38
+ break;
39
+ case StringColumnType::JSON: {
40
+ const auto error = StringUtil::ValidateJSON(str_data, str_len);
41
+ if (!error.empty()) {
42
+ throw InvalidInputException("Invalid JSON found in Parquet file: %s", error);
43
+ }
44
+ break;
45
+ }
46
+ default:
47
+ break;
48
+ }
36
49
  }
37
50
 
38
51
  class ParquetStringVectorBuffer : public VectorBuffer {