duckdb 1.4.1 → 1.4.3-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/.github/workflows/NodeJS.yml +9 -6
  2. package/package.json +2 -2
  3. package/scripts/node_version.sh +1 -1
  4. package/src/duckdb/extension/icu/icu_extension.cpp +67 -6
  5. package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +9 -3
  6. package/src/duckdb/extension/json/include/json_serializer.hpp +12 -0
  7. package/src/duckdb/extension/json/json_functions/json_create.cpp +10 -10
  8. package/src/duckdb/extension/parquet/decoder/delta_length_byte_array_decoder.cpp +19 -5
  9. package/src/duckdb/extension/parquet/include/decoder/delta_length_byte_array_decoder.hpp +1 -1
  10. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +11 -2
  11. package/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp +2 -1
  12. package/src/duckdb/extension/parquet/parquet_reader.cpp +3 -1
  13. package/src/duckdb/extension/parquet/parquet_writer.cpp +16 -1
  14. package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
  15. package/src/duckdb/extension/parquet/writer/primitive_column_writer.cpp +1 -1
  16. package/src/duckdb/src/catalog/default/default_table_functions.cpp +1 -1
  17. package/src/duckdb/src/common/adbc/adbc.cpp +8 -6
  18. package/src/duckdb/src/common/csv_writer.cpp +1 -13
  19. package/src/duckdb/src/common/encryption_key_manager.cpp +10 -9
  20. package/src/duckdb/src/common/enum_util.cpp +19 -0
  21. package/src/duckdb/src/common/enums/compression_type.cpp +51 -16
  22. package/src/duckdb/src/common/exception/binder_exception.cpp +7 -2
  23. package/src/duckdb/src/common/progress_bar/unscented_kalman_filter.cpp +2 -2
  24. package/src/duckdb/src/common/random_engine.cpp +10 -0
  25. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +13 -2
  26. package/src/duckdb/src/execution/index/art/art.cpp +6 -3
  27. package/src/duckdb/src/execution/index/bound_index.cpp +32 -21
  28. package/src/duckdb/src/execution/index/unbound_index.cpp +20 -9
  29. package/src/duckdb/src/execution/join_hashtable.cpp +9 -3
  30. package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +1 -1
  31. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
  32. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +5 -0
  33. package/src/duckdb/src/function/cast/cast_function_set.cpp +3 -1
  34. package/src/duckdb/src/function/macro_function.cpp +1 -1
  35. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -1
  36. package/src/duckdb/src/function/scalar/create_sort_key.cpp +5 -3
  37. package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +1 -1
  38. package/src/duckdb/src/function/scalar/system/parse_log_message.cpp +4 -2
  39. package/src/duckdb/src/function/table/copy_csv.cpp +28 -4
  40. package/src/duckdb/src/function/table/direct_file_reader.cpp +10 -0
  41. package/src/duckdb/src/function/table/read_file.cpp +65 -1
  42. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  43. package/src/duckdb/src/include/duckdb/common/csv_writer.hpp +0 -3
  44. package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +2 -0
  45. package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  47. package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +42 -2
  48. package/src/duckdb/src/include/duckdb/common/http_util.hpp +7 -0
  49. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +0 -11
  51. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +2 -0
  52. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +1 -0
  53. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +6 -6
  54. package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +115 -97
  55. package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +54 -0
  56. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +21 -2
  57. package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +26 -8
  58. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +2 -0
  59. package/src/duckdb/src/include/duckdb/function/table/read_file.hpp +0 -49
  60. package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/logging/log_type.hpp +14 -0
  62. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +2 -1
  63. package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +1 -1
  65. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +1 -1
  66. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  67. package/src/duckdb/src/include/duckdb/main/database.hpp +2 -2
  68. package/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +10 -6
  69. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +4 -0
  70. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +1 -0
  71. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +1 -0
  72. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -0
  73. package/src/duckdb/src/include/duckdb/main/relation/insert_relation.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +2 -0
  75. package/src/duckdb/src/include/duckdb/main/relation.hpp +10 -2
  76. package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
  77. package/src/duckdb/src/include/duckdb/optimizer/filter_pullup.hpp +10 -14
  78. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +5 -1
  79. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +3 -0
  80. package/src/duckdb/src/include/duckdb/planner/bound_statement.hpp +1 -0
  81. package/src/duckdb/src/include/duckdb/storage/block.hpp +9 -0
  82. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +9 -2
  83. package/src/duckdb/src/include/duckdb/storage/index.hpp +8 -2
  84. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  85. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +1 -1
  86. package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +0 -7
  87. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -2
  88. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +6 -0
  89. package/src/duckdb/src/logging/log_manager.cpp +2 -1
  90. package/src/duckdb/src/logging/log_types.cpp +30 -1
  91. package/src/duckdb/src/main/attached_database.cpp +4 -7
  92. package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +2 -3
  93. package/src/duckdb/src/main/buffered_data/buffered_data.cpp +2 -3
  94. package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +1 -2
  95. package/src/duckdb/src/main/capi/prepared-c.cpp +9 -2
  96. package/src/duckdb/src/main/config.cpp +6 -5
  97. package/src/duckdb/src/main/database.cpp +9 -3
  98. package/src/duckdb/src/main/database_file_path_manager.cpp +43 -14
  99. package/src/duckdb/src/main/database_manager.cpp +1 -1
  100. package/src/duckdb/src/main/http/http_util.cpp +19 -1
  101. package/src/duckdb/src/main/profiling_info.cpp +11 -0
  102. package/src/duckdb/src/main/query_profiler.cpp +16 -0
  103. package/src/duckdb/src/main/relation/create_table_relation.cpp +9 -0
  104. package/src/duckdb/src/main/relation/insert_relation.cpp +7 -0
  105. package/src/duckdb/src/main/relation/table_relation.cpp +14 -0
  106. package/src/duckdb/src/main/relation.cpp +28 -12
  107. package/src/duckdb/src/main/settings/custom_settings.cpp +9 -3
  108. package/src/duckdb/src/optimizer/filter_pullup.cpp +14 -0
  109. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +29 -10
  110. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +7 -0
  111. package/src/duckdb/src/parallel/task_executor.cpp +4 -2
  112. package/src/duckdb/src/parser/query_node/cte_node.cpp +79 -0
  113. package/src/duckdb/src/parser/transform/expression/transform_cast.cpp +3 -1
  114. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +1 -0
  115. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +12 -4
  116. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +16 -12
  117. package/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp +42 -5
  118. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +0 -24
  119. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +1 -1
  120. package/src/duckdb/src/planner/binder.cpp +0 -1
  121. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +1 -2
  122. package/src/duckdb/src/storage/buffer/block_manager.cpp +20 -6
  123. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +8 -6
  124. package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -22
  125. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +7 -0
  126. package/src/duckdb/src/storage/compression/zstd.cpp +34 -12
  127. package/src/duckdb/src/storage/data_table.cpp +1 -1
  128. package/src/duckdb/src/storage/local_storage.cpp +15 -2
  129. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -6
  130. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +11 -15
  131. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +1 -1
  132. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +1 -19
  133. package/src/duckdb/src/storage/single_file_block_manager.cpp +33 -3
  134. package/src/duckdb/src/storage/standard_buffer_manager.cpp +3 -1
  135. package/src/duckdb/src/storage/storage_info.cpp +4 -0
  136. package/src/duckdb/src/storage/storage_manager.cpp +8 -0
  137. package/src/duckdb/src/storage/table/array_column_data.cpp +1 -1
  138. package/src/duckdb/src/storage/table/column_data.cpp +3 -2
  139. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -2
  140. package/src/duckdb/src/storage/table/row_group.cpp +41 -24
  141. package/src/duckdb/src/storage/table/row_group_collection.cpp +114 -11
  142. package/src/duckdb/src/storage/table_index_list.cpp +18 -5
  143. package/src/duckdb/src/transaction/cleanup_state.cpp +7 -2
  144. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +5 -0
  145. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +8 -21
  146. package/src/duckdb/third_party/parquet/parquet_types.cpp +57 -35
  147. package/src/duckdb/third_party/parquet/parquet_types.h +9 -2
  148. package/src/duckdb/ub_src_common_types_row.cpp +0 -2
@@ -13,29 +13,32 @@ env:
13
13
  GH_TOKEN: ${{ secrets.GH_TOKEN }}
14
14
  AWS_ACCESS_KEY_ID: ${{secrets.S3_DUCKDB_NODE_ID}}
15
15
  AWS_SECRET_ACCESS_KEY: ${{secrets.S3_DUCKDB_NODE_KEY}}
16
- AWS_DEFAULT_REGION: us-east-1
16
+ AWS_ENDPOINT_URL: ${{ secrets.S3_DUCKDB_NODE_ENDPOINT }}
17
17
 
18
18
  jobs:
19
19
  set-up-npm:
20
- name: Set up NPM
21
- runs-on: ubuntu-22.04
20
+ name: Upload to NPM
21
+ runs-on: ubuntu-latest
22
22
  env:
23
23
  DUCKDB_NODE_BUILD_CACHE: 0
24
+ permissions: # only this job has permission to upload to npm using trusted publishing
25
+ id-token: write # Required for OIDC
26
+ contents: read
24
27
  steps:
25
28
  - uses: actions/checkout@v3
26
29
  with:
27
30
  fetch-depth: 0
28
31
 
29
- - uses: actions/setup-python@v4
32
+ - uses: actions/setup-node@v4
30
33
  with:
31
- python-version: '3.11'
34
+ node-version: '24'
35
+ registry-url: 'https://registry.npmjs.org'
32
36
 
33
37
  - name: Setup NPM
34
38
  shell: bash
35
39
  run: ./scripts/node_version.sh upload
36
40
  env:
37
41
  DUCKDB_NODE_BUILD_CACHE: 0 # create a standalone package
38
- NODE_AUTH_TOKEN: ${{secrets.NODE_AUTH_TOKEN}}
39
42
 
40
43
  linux-nodejs:
41
44
  name: node.js Linux
package/package.json CHANGED
@@ -2,13 +2,13 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "1.4.1",
5
+ "version": "1.4.3-dev0.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
9
9
  "@mapbox/node-pre-gyp": "^2.0.0",
10
10
  "node-addon-api": "^7.0.0",
11
- "node-gyp": "^9.3.0"
11
+ "node-gyp": "^9.4.1"
12
12
  },
13
13
  "binary": {
14
14
  "module_name": "duckdb",
@@ -5,6 +5,7 @@ set -ex
5
5
  git config --global user.email "quack@duckdb.org"
6
6
  git config --global user.name "DuckDB Admin"
7
7
 
8
+ npm -v
8
9
 
9
10
  export TAG=''
10
11
  # for main do prereleases
@@ -28,6 +29,5 @@ npm pack --dry-run
28
29
  # upload to npm, maybe
29
30
  if [[ "$GITHUB_REF" =~ ^(refs/heads/main|refs/tags/v.+)$ && "$1" = "upload" ]] ; then
30
31
  npm version
31
- npm config set //registry.npmjs.org/:_authToken $NODE_AUTH_TOKEN
32
32
  npm publish --access public $TAG
33
33
  fi
@@ -5,11 +5,8 @@
5
5
  #include "duckdb/function/scalar_function.hpp"
6
6
  #include "duckdb/main/config.hpp"
7
7
  #include "duckdb/main/connection.hpp"
8
- #include "duckdb/main/database.hpp"
9
8
  #include "duckdb/main/extension/extension_loader.hpp"
10
9
  #include "duckdb/parser/parsed_data/create_collation_info.hpp"
11
- #include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
12
- #include "duckdb/parser/parsed_data/create_table_function_info.hpp"
13
10
  #include "duckdb/planner/expression/bound_function_expression.hpp"
14
11
  #include "include/icu-current.hpp"
15
12
  #include "include/icu-dateadd.hpp"
@@ -25,8 +22,6 @@
25
22
  #include "include/icu_extension.hpp"
26
23
  #include "unicode/calendar.h"
27
24
  #include "unicode/coll.h"
28
- #include "unicode/errorcode.h"
29
- #include "unicode/sortkey.h"
30
25
  #include "unicode/stringpiece.h"
31
26
  #include "unicode/timezone.h"
32
27
  #include "unicode/ucol.h"
@@ -209,7 +204,7 @@ static ScalarFunction GetICUCollateFunction(const string &collation, const strin
209
204
  return result;
210
205
  }
211
206
 
212
- unique_ptr<icu::TimeZone> GetTimeZoneInternal(string &tz_str, vector<string> &candidates) {
207
+ unique_ptr<icu::TimeZone> GetKnownTimeZone(const string &tz_str) {
213
208
  icu::StringPiece tz_name_utf8(tz_str);
214
209
  const auto uid = icu::UnicodeString::fromUTF8(tz_name_utf8);
215
210
  duckdb::unique_ptr<icu::TimeZone> tz(icu::TimeZone::createTimeZone(uid));
@@ -217,6 +212,66 @@ unique_ptr<icu::TimeZone> GetTimeZoneInternal(string &tz_str, vector<string> &ca
217
212
  return tz;
218
213
  }
219
214
 
215
+ return nullptr;
216
+ }
217
+
218
+ static string NormalizeTimeZone(const string &tz_str) {
219
+ if (GetKnownTimeZone(tz_str)) {
220
+ return tz_str;
221
+ }
222
+
223
+ // Map UTC±NN00 to Etc/UTC±N
224
+ do {
225
+ if (tz_str.size() <= 4) {
226
+ break;
227
+ }
228
+ if (tz_str.compare(0, 3, "UTC")) {
229
+ break;
230
+ }
231
+
232
+ idx_t pos = 3;
233
+ const auto sign = tz_str[pos++];
234
+ if (sign != '+' && sign != '-') {
235
+ break;
236
+ }
237
+
238
+ string mapped = "Etc/GMT";
239
+ mapped += sign;
240
+ const auto base_len = mapped.size();
241
+ for (; pos < tz_str.size(); ++pos) {
242
+ const auto digit = tz_str[pos];
243
+ // We could get fancy here and count colons and their locations, but I doubt anyone cares.
244
+ if (digit == '0' || digit == ':') {
245
+ continue;
246
+ }
247
+ if (!StringUtil::CharacterIsDigit(digit)) {
248
+ break;
249
+ }
250
+ mapped += digit;
251
+ }
252
+ if (pos < tz_str.size()) {
253
+ break;
254
+ }
255
+ // If we didn't add anything, then make it +0
256
+ if (mapped.size() == base_len) {
257
+ mapped.back() = '+';
258
+ mapped += '0';
259
+ }
260
+ // Final sanity check
261
+ if (GetKnownTimeZone(mapped)) {
262
+ return mapped;
263
+ }
264
+ } while (false);
265
+
266
+ return tz_str;
267
+ }
268
+
269
+ unique_ptr<icu::TimeZone> GetTimeZoneInternal(string &tz_str, vector<string> &candidates) {
270
+ auto tz = GetKnownTimeZone(tz_str);
271
+ if (tz) {
272
+ return tz;
273
+ }
274
+
220
275
  // Try to be friendlier
221
276
  // Go through all the zone names and look for a case insensitive match
222
277
  // If we don't find one, make a suggestion
@@ -269,6 +324,7 @@ unique_ptr<icu::TimeZone> ICUHelpers::GetTimeZone(string &tz_str, string *error_
269
324
 
270
325
  static void SetICUTimeZone(ClientContext &context, SetScope scope, Value &parameter) {
271
326
  auto tz_str = StringValue::Get(parameter);
327
+ tz_str = NormalizeTimeZone(tz_str);
272
328
  ICUHelpers::GetTimeZone(tz_str);
273
329
  parameter = Value(tz_str);
274
330
  }
@@ -405,6 +461,11 @@ static void LoadInternal(ExtensionLoader &loader) {
405
461
  icu::UnicodeString tz_id;
406
462
  std::string tz_string;
407
463
  tz->getID(tz_id).toUTF8String(tz_string);
464
+ // If the environment TZ is invalid, look for some alternatives
465
+ tz_string = NormalizeTimeZone(tz_string);
466
+ if (!GetKnownTimeZone(tz_string)) {
467
+ tz_string = "UTC";
468
+ }
408
469
  config.AddExtensionOption("TimeZone", "The current time zone", LogicalType::VARCHAR, Value(tz_string),
409
470
  SetICUTimeZone);
410
471
 
@@ -1090,9 +1090,15 @@ uprv_tzname(int n)
1090
1090
  if (tzid[0] == ':') {
1091
1091
  tzid++;
1092
1092
  }
1093
- /* This might be a good Olson ID. */
1094
- skipZoneIDPrefix(&tzid);
1095
- return tzid;
1093
+ #if defined(TZDEFAULT)
1094
+ if (uprv_strcmp(tzid, TZDEFAULT) != 0) {
1095
+ #endif
1096
+ /* This might be a good Olson ID. */
1097
+ skipZoneIDPrefix(&tzid);
1098
+ return tzid;
1099
+ #if defined(TZDEFAULT)
1100
+ }
1101
+ #endif
1096
1102
  }
1097
1103
  /* else U_TZNAME will give a better result. */
1098
1104
  #endif
@@ -39,6 +39,18 @@ public:
39
39
  return serializer.GetRootObject();
40
40
  }
41
41
 
42
+ template <class T>
43
+ static string SerializeToString(T &value) {
44
+ auto doc = yyjson_mut_doc_new(nullptr);
45
+ JsonSerializer serializer(doc, false, false, false);
46
+ value.Serialize(serializer);
47
+ auto result_obj = serializer.GetRootObject();
48
+ idx_t len = 0;
49
+ auto data = yyjson_mut_val_write_opts(result_obj, JSONCommon::WRITE_PRETTY_FLAG, nullptr,
50
+ reinterpret_cast<size_t *>(&len), nullptr);
51
+ return string(data, len);
52
+ }
53
+
42
54
  yyjson_mut_val *GetRootObject() {
43
55
  D_ASSERT(stack.size() == 1); // or we forgot to pop somewhere
44
56
  return stack.front();
@@ -111,11 +111,11 @@ static unique_ptr<FunctionData> JSONCreateBindParams(ScalarFunction &bound_funct
111
111
  auto &type = arguments[i]->return_type;
112
112
  if (arguments[i]->HasParameter()) {
113
113
  throw ParameterNotResolvedException();
114
- } else if (type == LogicalTypeId::SQLNULL) {
115
- // This is needed for macro's
116
- bound_function.arguments.push_back(type);
117
114
  } else if (object && i % 2 == 0) {
118
- // Key, must be varchar
115
+ if (type != LogicalType::VARCHAR) {
116
+ throw BinderException("json_object() keys must be VARCHAR, add an explicit cast to argument \"%s\"",
117
+ arguments[i]->GetName());
118
+ }
119
119
  bound_function.arguments.push_back(LogicalType::VARCHAR);
120
120
  } else {
121
121
  // Value, cast to types that we can put in JSON
@@ -128,7 +128,7 @@ static unique_ptr<FunctionData> JSONCreateBindParams(ScalarFunction &bound_funct
128
128
  static unique_ptr<FunctionData> JSONObjectBind(ClientContext &context, ScalarFunction &bound_function,
129
129
  vector<unique_ptr<Expression>> &arguments) {
130
130
  if (arguments.size() % 2 != 0) {
131
- throw InvalidInputException("json_object() requires an even number of arguments");
131
+ throw BinderException("json_object() requires an even number of arguments");
132
132
  }
133
133
  return JSONCreateBindParams(bound_function, arguments, true);
134
134
  }
@@ -141,7 +141,7 @@ static unique_ptr<FunctionData> JSONArrayBind(ClientContext &context, ScalarFunc
141
141
  static unique_ptr<FunctionData> ToJSONBind(ClientContext &context, ScalarFunction &bound_function,
142
142
  vector<unique_ptr<Expression>> &arguments) {
143
143
  if (arguments.size() != 1) {
144
- throw InvalidInputException("to_json() takes exactly one argument");
144
+ throw BinderException("to_json() takes exactly one argument");
145
145
  }
146
146
  return JSONCreateBindParams(bound_function, arguments, false);
147
147
  }
@@ -149,14 +149,14 @@ static unique_ptr<FunctionData> ToJSONBind(ClientContext &context, ScalarFunctio
149
149
  static unique_ptr<FunctionData> ArrayToJSONBind(ClientContext &context, ScalarFunction &bound_function,
150
150
  vector<unique_ptr<Expression>> &arguments) {
151
151
  if (arguments.size() != 1) {
152
- throw InvalidInputException("array_to_json() takes exactly one argument");
152
+ throw BinderException("array_to_json() takes exactly one argument");
153
153
  }
154
154
  auto arg_id = arguments[0]->return_type.id();
155
155
  if (arguments[0]->HasParameter()) {
156
156
  throw ParameterNotResolvedException();
157
157
  }
158
158
  if (arg_id != LogicalTypeId::LIST && arg_id != LogicalTypeId::SQLNULL) {
159
- throw InvalidInputException("array_to_json() argument type must be LIST");
159
+ throw BinderException("array_to_json() argument type must be LIST");
160
160
  }
161
161
  return JSONCreateBindParams(bound_function, arguments, false);
162
162
  }
@@ -164,14 +164,14 @@ static unique_ptr<FunctionData> ArrayToJSONBind(ClientContext &context, ScalarFu
164
164
  static unique_ptr<FunctionData> RowToJSONBind(ClientContext &context, ScalarFunction &bound_function,
165
165
  vector<unique_ptr<Expression>> &arguments) {
166
166
  if (arguments.size() != 1) {
167
- throw InvalidInputException("row_to_json() takes exactly one argument");
167
+ throw BinderException("row_to_json() takes exactly one argument");
168
168
  }
169
169
  auto arg_id = arguments[0]->return_type.id();
170
170
  if (arguments[0]->HasParameter()) {
171
171
  throw ParameterNotResolvedException();
172
172
  }
173
173
  if (arguments[0]->return_type.id() != LogicalTypeId::STRUCT && arg_id != LogicalTypeId::SQLNULL) {
174
- throw InvalidInputException("row_to_json() argument type must be STRUCT");
174
+ throw BinderException("row_to_json() argument type must be STRUCT");
175
175
  }
176
176
  return JSONCreateBindParams(bound_function, arguments, false);
177
177
  }
@@ -34,13 +34,21 @@ void DeltaLengthByteArrayDecoder::InitializePage() {
34
34
  void DeltaLengthByteArrayDecoder::Read(shared_ptr<ResizeableBuffer> &block_ref, uint8_t *defines, idx_t read_count,
35
35
  Vector &result, idx_t result_offset) {
36
36
  if (defines) {
37
- ReadInternal<true>(block_ref, defines, read_count, result, result_offset);
37
+ if (reader.Type().IsJSONType()) {
38
+ ReadInternal<true, true>(block_ref, defines, read_count, result, result_offset);
39
+ } else {
40
+ ReadInternal<true, false>(block_ref, defines, read_count, result, result_offset);
41
+ }
38
42
  } else {
39
- ReadInternal<false>(block_ref, defines, read_count, result, result_offset);
43
+ if (reader.Type().IsJSONType()) {
44
+ ReadInternal<false, true>(block_ref, defines, read_count, result, result_offset);
45
+ } else {
46
+ ReadInternal<false, false>(block_ref, defines, read_count, result, result_offset);
47
+ }
40
48
  }
41
49
  }
42
50
 
43
- template <bool HAS_DEFINES>
51
+ template <bool HAS_DEFINES, bool VALIDATE_INDIVIDUAL_STRINGS>
44
52
  void DeltaLengthByteArrayDecoder::ReadInternal(shared_ptr<ResizeableBuffer> &block_ref, uint8_t *const defines,
45
53
  const idx_t read_count, Vector &result, const idx_t result_offset) {
46
54
  auto &block = *block_ref;
@@ -58,6 +66,8 @@ void DeltaLengthByteArrayDecoder::ReadInternal(shared_ptr<ResizeableBuffer> &blo
58
66
  }
59
67
  }
60
68
 
69
+ const auto &string_column_reader = reader.Cast<StringColumnReader>();
70
+
61
71
  const auto start_ptr = block.ptr;
62
72
  for (idx_t row_idx = 0; row_idx < read_count; row_idx++) {
63
73
  const auto result_idx = result_offset + row_idx;
@@ -75,11 +85,15 @@ void DeltaLengthByteArrayDecoder::ReadInternal(shared_ptr<ResizeableBuffer> &blo
75
85
  }
76
86
  const auto &str_len = length_data[length_idx++];
77
87
  result_data[result_idx] = string_t(char_ptr_cast(block.ptr), str_len);
88
+ if (VALIDATE_INDIVIDUAL_STRINGS) {
89
+ string_column_reader.VerifyString(char_ptr_cast(block.ptr), str_len);
90
+ }
78
91
  block.unsafe_inc(str_len);
79
92
  }
80
93
 
81
- // Verify that the strings we read are valid UTF-8
82
- reader.Cast<StringColumnReader>().VerifyString(char_ptr_cast(start_ptr), block.ptr - start_ptr);
94
+ if (!VALIDATE_INDIVIDUAL_STRINGS) {
95
+ string_column_reader.VerifyString(char_ptr_cast(start_ptr), NumericCast<uint32_t>(block.ptr - start_ptr));
96
+ }
83
97
 
84
98
  StringColumnReader::ReferenceBlock(result, block_ref);
85
99
  }
@@ -27,7 +27,7 @@ public:
27
27
  void Skip(uint8_t *defines, idx_t skip_count);
28
28
 
29
29
  private:
30
- template <bool HAS_DEFINES>
30
+ template <bool HAS_DEFINES, bool VALIDATE_INDIVIDUAL_STRINGS>
31
31
  void ReadInternal(shared_ptr<ResizeableBuffer> &block, uint8_t *defines, idx_t read_count, Vector &result,
32
32
  idx_t result_offset);
33
33
  template <bool HAS_DEFINES>
@@ -18,7 +18,7 @@ public:
18
18
  : buffer_(buffer, buffer_len),
19
19
  //<block size in values> <number of miniblocks in a block> <total value count> <first value>
20
20
  block_size_in_values(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_)),
21
- number_of_miniblocks_per_block(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_)),
21
+ number_of_miniblocks_per_block(DecodeNumberOfMiniblocksPerBlock(buffer_)),
22
22
  number_of_values_in_a_miniblock(block_size_in_values / number_of_miniblocks_per_block),
23
23
  total_value_count(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_)),
24
24
  previous_value(ParquetDecodeUtils::ZigzagToInt(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_))),
@@ -31,7 +31,7 @@ public:
31
31
  number_of_values_in_a_miniblock % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE == 0)) {
32
32
  throw InvalidInputException("Parquet file has invalid block sizes for DELTA_BINARY_PACKED");
33
33
  }
34
- };
34
+ }
35
35
 
36
36
  ByteBuffer BufferPtr() const {
37
37
  return buffer_;
@@ -68,6 +68,15 @@ public:
68
68
  }
69
69
 
70
70
  private:
71
+ static idx_t DecodeNumberOfMiniblocksPerBlock(ByteBuffer &buffer) {
72
+ auto res = ParquetDecodeUtils::VarintDecode<uint64_t>(buffer);
73
+ if (res == 0) {
74
+ throw InvalidInputException(
75
+ "Parquet file has invalid number of miniblocks per block for DELTA_BINARY_PACKED");
76
+ }
77
+ return res;
78
+ }
79
+
71
80
  template <typename T, bool SKIP_READ = false>
72
81
  void GetBatchInternal(const data_ptr_t target_values_ptr, const idx_t batch_size) {
73
82
  if (batch_size == 0) {
@@ -14,6 +14,7 @@
14
14
  namespace duckdb {
15
15
 
16
16
  class StringColumnReader : public ColumnReader {
17
+ public:
17
18
  enum class StringColumnType : uint8_t { VARCHAR, JSON, OTHER };
18
19
 
19
20
  static StringColumnType GetStringColumnType(const LogicalType &type) {
@@ -36,7 +37,7 @@ public:
36
37
 
37
38
  public:
38
39
  static void VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar);
39
- void VerifyString(const char *str_data, uint32_t str_len);
40
+ void VerifyString(const char *str_data, uint32_t str_len) const;
40
41
 
41
42
  static void ReferenceBlock(Vector &result, shared_ptr<ResizeableBuffer> &block);
42
43
 
@@ -740,7 +740,9 @@ unique_ptr<ParquetColumnSchema> ParquetReader::ParseSchema(ClientContext &contex
740
740
  throw InvalidInputException("Root element of Parquet file must be a struct");
741
741
  }
742
742
  D_ASSERT(next_schema_idx == file_meta_data->schema.size() - 1);
743
- D_ASSERT(file_meta_data->row_groups.empty() || next_file_idx == file_meta_data->row_groups[0].columns.size());
743
+ if (!file_meta_data->row_groups.empty() && next_file_idx != file_meta_data->row_groups[0].columns.size()) {
744
+ throw InvalidInputException("Parquet reader: row group does not have enough columns");
745
+ }
744
746
  if (parquet_options.file_row_number) {
745
747
  for (auto &column : root.children) {
746
748
  auto &name = column.name;
@@ -12,6 +12,7 @@
12
12
  #include "duckdb/common/serializer/write_stream.hpp"
13
13
  #include "duckdb/common/string_util.hpp"
14
14
  #include "duckdb/function/table_function.hpp"
15
+ #include "duckdb/main/extension_helper.hpp"
15
16
  #include "duckdb/main/client_context.hpp"
16
17
  #include "duckdb/main/connection.hpp"
17
18
  #include "duckdb/parser/parsed_data/create_copy_function_info.hpp"
@@ -374,6 +375,12 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file
374
375
 
375
376
  if (encryption_config) {
376
377
  auto &config = DBConfig::GetConfig(context);
378
+
379
+ // To ensure we can write, we need to autoload httpfs
380
+ if (!config.encryption_util || !config.encryption_util->SupportsEncryption()) {
381
+ ExtensionHelper::TryAutoLoadExtension(context, "httpfs");
382
+ }
383
+
377
384
  if (config.encryption_util && debug_use_openssl) {
378
385
  // Use OpenSSL
379
386
  encryption_util = config.encryption_util;
@@ -562,7 +569,7 @@ void ParquetWriter::FlushRowGroup(PreparedRowGroup &prepared) {
562
569
  row_group.__isset.total_compressed_size = true;
563
570
 
564
571
  if (encryption_config) {
565
- auto row_group_ordinal = num_row_groups.load();
572
+ const auto row_group_ordinal = file_meta_data.row_groups.size();
566
573
  if (row_group_ordinal > std::numeric_limits<int16_t>::max()) {
567
574
  throw InvalidInputException("RowGroup ordinal exceeds 32767 when encryption enabled");
568
575
  }
@@ -583,6 +590,14 @@ void ParquetWriter::Flush(ColumnDataCollection &buffer) {
583
590
  return;
584
591
  }
585
592
 
593
+ // "total_written" is only used for the FILE_SIZE_BYTES flag, and only when threads are writing in parallel.
594
+ // We pre-emptively increase it here to try to reduce overshooting when many threads are writing in parallel.
595
+ // However, waiting for the exact value (PrepareRowGroup) takes too long, and would cause overshoots to happen.
596
+ // So, we guess the compression ratio. We guess 3x, but this will be off depending on the data.
597
+ // "total_written" is restored to the exact number of written bytes at the end of FlushRowGroup.
598
+ // PhysicalCopyToFile should be reworked to use prepare/flush batch separately for better accuracy.
599
+ total_written += buffer.SizeInBytes() / 2;
600
+
586
601
  PreparedRowGroup prepared_row_group;
587
602
  PrepareRowGroup(buffer, prepared_row_group);
588
603
  buffer.Reset();
@@ -31,7 +31,7 @@ void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, co
31
31
  }
32
32
  }
33
33
 
34
- void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) {
34
+ void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) const {
35
35
  switch (string_column_type) {
36
36
  case StringColumnType::VARCHAR:
37
37
  VerifyString(str_data, str_len, true);
@@ -111,7 +111,7 @@ void PrimitiveColumnWriter::BeginWrite(ColumnWriterState &state_p) {
111
111
  hdr.type = PageType::DATA_PAGE;
112
112
  hdr.__isset.data_page_header = true;
113
113
 
114
- hdr.data_page_header.num_values = UnsafeNumericCast<int32_t>(page_info.row_count);
114
+ hdr.data_page_header.num_values = NumericCast<int32_t>(page_info.row_count);
115
115
  hdr.data_page_header.encoding = GetEncoding(state);
116
116
  hdr.data_page_header.definition_level_encoding = Encoding::RLE;
117
117
  hdr.data_page_header.repetition_level_encoding = Encoding::RLE;
@@ -69,7 +69,7 @@ FROM histogram_values(source, col_name, bin_count := bin_count, technique := tec
69
69
  {DEFAULT_SCHEMA, "duckdb_logs_parsed", {"log_type"}, {}, R"(
70
70
  SELECT * EXCLUDE (message), UNNEST(parse_duckdb_log_message(log_type, message))
71
71
  FROM duckdb_logs(denormalized_table=1)
72
- WHERE type = log_type
72
+ WHERE type ILIKE log_type
73
73
  )"},
74
74
  {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}
75
75
  };
@@ -537,7 +537,8 @@ static int get_schema(struct ArrowArrayStream *stream, struct ArrowSchema *out)
537
537
  auto count = duckdb_column_count(&result_wrapper->result);
538
538
  std::vector<duckdb_logical_type> types(count);
539
539
 
540
- std::vector<std::string> owned_names(count);
540
+ std::vector<std::string> owned_names;
541
+ owned_names.reserve(count);
541
542
  duckdb::vector<const char *> names(count);
542
543
  for (idx_t i = 0; i < count; i++) {
543
544
  types[i] = duckdb_column_logical_type(&result_wrapper->result, i);
@@ -659,12 +660,12 @@ AdbcStatusCode Ingest(duckdb_connection connection, const char *table_name, cons
659
660
  std::ostringstream create_table;
660
661
  create_table << "CREATE TABLE ";
661
662
  if (schema) {
662
- create_table << schema << ".";
663
+ create_table << duckdb::KeywordHelper::WriteOptionallyQuoted(schema) << ".";
663
664
  }
664
- create_table << table_name << " (";
665
+ create_table << duckdb::KeywordHelper::WriteOptionallyQuoted(table_name) << " (";
665
666
  for (idx_t i = 0; i < types.size(); i++) {
666
- create_table << names[i] << " ";
667
- create_table << types[i].ToString();
667
+ create_table << duckdb::KeywordHelper::WriteOptionallyQuoted(names[i]);
668
+ create_table << " " << types[i].ToString();
668
669
  if (i + 1 < types.size()) {
669
670
  create_table << ", ";
670
671
  }
@@ -793,7 +794,8 @@ AdbcStatusCode StatementGetParameterSchema(struct AdbcStatement *statement, stru
793
794
  count = 1;
794
795
  }
795
796
  std::vector<duckdb_logical_type> types(count);
796
- std::vector<std::string> owned_names(count);
797
+ std::vector<std::string> owned_names;
798
+ owned_names.reserve(count);
797
799
  duckdb::vector<const char *> names(count);
798
800
 
799
801
  for (idx_t i = 0; i < count; i++) {
@@ -16,7 +16,7 @@ CSVWriterState::CSVWriterState()
16
16
  }
17
17
 
18
18
  CSVWriterState::CSVWriterState(ClientContext &context, idx_t flush_size_p)
19
- : flush_size(flush_size_p), stream(make_uniq<MemoryStream>(Allocator::Get(context))) {
19
+ : flush_size(flush_size_p), stream(make_uniq<MemoryStream>(Allocator::Get(context), flush_size)) {
20
20
  }
21
21
 
22
22
  CSVWriterState::CSVWriterState(DatabaseInstance &db, idx_t flush_size_p)
@@ -198,18 +198,6 @@ void CSVWriter::ResetInternal(optional_ptr<CSVWriterState> local_state) {
198
198
  bytes_written = 0;
199
199
  }
200
200
 
201
- unique_ptr<CSVWriterState> CSVWriter::InitializeLocalWriteState(ClientContext &context, idx_t flush_size) {
202
- auto res = make_uniq<CSVWriterState>(context, flush_size);
203
- res->stream = make_uniq<MemoryStream>();
204
- return res;
205
- }
206
-
207
- unique_ptr<CSVWriterState> CSVWriter::InitializeLocalWriteState(DatabaseInstance &db, idx_t flush_size) {
208
- auto res = make_uniq<CSVWriterState>(db, flush_size);
209
- res->stream = make_uniq<MemoryStream>();
210
- return res;
211
- }
212
-
213
201
  idx_t CSVWriter::BytesWritten() {
214
202
  if (shared) {
215
203
  lock_guard<mutex> flock(lock);
@@ -19,7 +19,8 @@ EncryptionKey::EncryptionKey(data_ptr_t encryption_key_p) {
19
19
  D_ASSERT(memcmp(key, encryption_key_p, MainHeader::DEFAULT_ENCRYPTION_KEY_LENGTH) == 0);
20
20
 
21
21
  // zero out the encryption key in memory
22
- memset(encryption_key_p, 0, MainHeader::DEFAULT_ENCRYPTION_KEY_LENGTH);
22
+ duckdb_mbedtls::MbedTlsWrapper::AESStateMBEDTLS::SecureClearData(encryption_key_p,
23
+ MainHeader::DEFAULT_ENCRYPTION_KEY_LENGTH);
23
24
  LockEncryptionKey(key);
24
25
  }
25
26
 
@@ -37,7 +38,7 @@ void EncryptionKey::LockEncryptionKey(data_ptr_t key, idx_t key_len) {
37
38
  }
38
39
 
39
40
  void EncryptionKey::UnlockEncryptionKey(data_ptr_t key, idx_t key_len) {
40
- memset(key, 0, key_len);
41
+ duckdb_mbedtls::MbedTlsWrapper::AESStateMBEDTLS::SecureClearData(key, key_len);
41
42
  #if defined(_WIN32)
42
43
  VirtualUnlock(key, key_len);
43
44
  #else
@@ -64,7 +65,8 @@ EncryptionKeyManager &EncryptionKeyManager::Get(DatabaseInstance &db) {
64
65
 
65
66
  string EncryptionKeyManager::GenerateRandomKeyID() {
66
67
  uint8_t key_id[KEY_ID_BYTES];
67
- duckdb_mbedtls::MbedTlsWrapper::AESStateMBEDTLS::GenerateRandomDataStatic(key_id, KEY_ID_BYTES);
68
+ RandomEngine engine;
69
+ engine.RandomData(key_id, KEY_ID_BYTES);
68
70
  string key_id_str(reinterpret_cast<const char *>(key_id), KEY_ID_BYTES);
69
71
  return key_id_str;
70
72
  }
@@ -72,7 +74,7 @@ string EncryptionKeyManager::GenerateRandomKeyID() {
72
74
  void EncryptionKeyManager::AddKey(const string &key_name, data_ptr_t key) {
73
75
  derived_keys.emplace(key_name, EncryptionKey(key));
74
76
  // Zero-out the encryption key
75
- std::memset(key, 0, DERIVED_KEY_LENGTH);
77
+ duckdb_mbedtls::MbedTlsWrapper::AESStateMBEDTLS::SecureClearData(key, DERIVED_KEY_LENGTH);
76
78
  }
77
79
 
78
80
  bool EncryptionKeyManager::HasKey(const string &key_name) const {
@@ -107,7 +109,7 @@ string EncryptionKeyManager::Base64Decode(const string &key) {
107
109
  auto output = duckdb::unique_ptr<unsigned char[]>(new unsigned char[result_size]);
108
110
  Blob::FromBase64(key, output.get(), result_size);
109
111
  string decoded_key(reinterpret_cast<const char *>(output.get()), result_size);
110
- memset(output.get(), 0, result_size);
112
+ duckdb_mbedtls::MbedTlsWrapper::AESStateMBEDTLS::SecureClearData(output.get(), result_size);
111
113
  return decoded_key;
112
114
  }
113
115
 
@@ -124,10 +126,9 @@ void EncryptionKeyManager::DeriveKey(string &user_key, data_ptr_t salt, data_ptr
124
126
 
125
127
  KeyDerivationFunctionSHA256(reinterpret_cast<const_data_ptr_t>(decoded_key.data()), decoded_key.size(), salt,
126
128
  derived_key);
127
-
128
- // wipe the original and decoded key
129
- std::fill(user_key.begin(), user_key.end(), 0);
130
- std::fill(decoded_key.begin(), decoded_key.end(), 0);
129
+ duckdb_mbedtls::MbedTlsWrapper::AESStateMBEDTLS::SecureClearData(data_ptr_cast(&user_key[0]), user_key.size());
130
+ duckdb_mbedtls::MbedTlsWrapper::AESStateMBEDTLS::SecureClearData(data_ptr_cast(&decoded_key[0]),
131
+ decoded_key.size());
131
132
  user_key.clear();
132
133
  decoded_key.clear();
133
134
  }
@@ -100,6 +100,7 @@
100
100
  #include "duckdb/execution/index/art/art_scanner.hpp"
101
101
  #include "duckdb/execution/index/art/node.hpp"
102
102
  #include "duckdb/execution/index/bound_index.hpp"
103
+ #include "duckdb/execution/index/unbound_index.hpp"
103
104
  #include "duckdb/execution/operator/csv_scanner/csv_option.hpp"
104
105
  #include "duckdb/execution/operator/csv_scanner/csv_state.hpp"
105
106
  #include "duckdb/execution/reservoir_sample.hpp"
@@ -707,6 +708,24 @@ BlockState EnumUtil::FromString<BlockState>(const char *value) {
707
708
  return static_cast<BlockState>(StringUtil::StringToEnum(GetBlockStateValues(), 2, "BlockState", value));
708
709
  }
709
710
 
711
+ const StringUtil::EnumStringLiteral *GetBufferedIndexReplayValues() {
712
+ static constexpr StringUtil::EnumStringLiteral values[] {
713
+ { static_cast<uint32_t>(BufferedIndexReplay::INSERT_ENTRY), "INSERT_ENTRY" },
714
+ { static_cast<uint32_t>(BufferedIndexReplay::DEL_ENTRY), "DEL_ENTRY" }
715
+ };
716
+ return values;
717
+ }
718
+
719
+ template<>
720
+ const char* EnumUtil::ToChars<BufferedIndexReplay>(BufferedIndexReplay value) {
721
+ return StringUtil::EnumToString(GetBufferedIndexReplayValues(), 2, "BufferedIndexReplay", static_cast<uint32_t>(value));
722
+ }
723
+
724
+ template<>
725
+ BufferedIndexReplay EnumUtil::FromString<BufferedIndexReplay>(const char *value) {
726
+ return static_cast<BufferedIndexReplay>(StringUtil::StringToEnum(GetBufferedIndexReplayValues(), 2, "BufferedIndexReplay", value));
727
+ }
728
+
710
729
  const StringUtil::EnumStringLiteral *GetCAPIResultSetTypeValues() {
711
730
  static constexpr StringUtil::EnumStringLiteral values[] {
712
731
  { static_cast<uint32_t>(CAPIResultSetType::CAPI_RESULT_TYPE_NONE), "CAPI_RESULT_TYPE_NONE" },