duckdb 0.6.2-dev2115.0 → 0.6.2-dev2226.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -5
  3. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +6 -1
  4. package/src/duckdb/extension/json/include/json_common.hpp +1 -0
  5. package/src/duckdb/extension/json/include/json_scan.hpp +7 -0
  6. package/src/duckdb/extension/json/include/json_transform.hpp +25 -10
  7. package/src/duckdb/extension/json/json_common.cpp +6 -2
  8. package/src/duckdb/extension/json/json_functions/json_structure.cpp +47 -9
  9. package/src/duckdb/extension/json/json_functions/json_transform.cpp +183 -106
  10. package/src/duckdb/extension/json/json_functions/read_json.cpp +35 -22
  11. package/src/duckdb/extension/json/json_scan.cpp +26 -5
  12. package/src/duckdb/extension/parquet/parquet-extension.cpp +1 -0
  13. package/src/duckdb/src/catalog/catalog.cpp +11 -12
  14. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  15. package/src/duckdb/src/common/box_renderer.cpp +9 -1
  16. package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
  17. package/src/duckdb/src/common/enums/relation_type.cpp +2 -0
  18. package/src/duckdb/src/common/gzip_file_system.cpp +1 -1
  19. package/src/duckdb/src/common/local_file_system.cpp +1 -1
  20. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +2 -2
  21. package/src/duckdb/src/common/types/column_data_allocator.cpp +2 -2
  22. package/src/duckdb/src/common/types/date.cpp +7 -2
  23. package/src/duckdb/src/common/types/vector.cpp +3 -2
  24. package/src/duckdb/src/common/virtual_file_system.cpp +1 -1
  25. package/src/duckdb/src/execution/index/art/art.cpp +5 -5
  26. package/src/duckdb/src/execution/join_hashtable.cpp +4 -5
  27. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +2 -0
  28. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +182 -123
  29. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +22 -18
  30. package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +1 -1
  31. package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -3
  32. package/src/duckdb/src/function/scalar/math/setseed.cpp +1 -1
  33. package/src/duckdb/src/function/scalar/string/substring.cpp +8 -0
  34. package/src/duckdb/src/function/table/read_csv.cpp +1 -1
  35. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  36. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
  37. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +4 -0
  38. package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +1 -0
  39. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
  40. package/src/duckdb/src/include/duckdb/common/http_stats.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/common/limits.hpp +3 -0
  42. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +1 -9
  43. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +2 -2
  44. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  45. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -3
  46. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_unnest.hpp +5 -1
  47. package/src/duckdb/src/include/duckdb/main/client_context.hpp +3 -0
  48. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -4
  49. package/src/duckdb/src/include/duckdb/main/database.hpp +6 -0
  50. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +5 -5
  51. package/src/duckdb/src/include/duckdb/main/relation/write_csv_relation.hpp +2 -1
  52. package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +34 -0
  53. package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -1
  54. package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +2 -1
  55. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
  56. package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -3
  58. package/src/duckdb/src/include/duckdb.h +7 -0
  59. package/src/duckdb/src/main/capi/threading-c.cpp +8 -0
  60. package/src/duckdb/src/main/client_context.cpp +7 -0
  61. package/src/duckdb/src/main/client_context_file_opener.cpp +14 -0
  62. package/src/duckdb/src/main/database.cpp +57 -40
  63. package/src/duckdb/src/main/extension/extension_load.cpp +20 -28
  64. package/src/duckdb/src/main/relation/write_csv_relation.cpp +4 -2
  65. package/src/duckdb/src/main/relation/write_parquet_relation.cpp +37 -0
  66. package/src/duckdb/src/main/relation.cpp +12 -2
  67. package/src/duckdb/src/parallel/executor.cpp +4 -0
  68. package/src/duckdb/src/parser/statement/copy_statement.cpp +1 -1
  69. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +4 -3
  70. package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
  71. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +24 -3
  72. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
  73. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +2 -0
  74. package/src/duckdb/src/storage/compression/bitpacking.cpp +2 -1
  75. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +1 -1
  76. package/src/duckdb/src/storage/index.cpp +1 -1
  77. package/src/duckdb/src/storage/meta_block_writer.cpp +1 -1
  78. package/src/duckdb/src/storage/table/column_segment.cpp +3 -3
  79. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1 -2
  80. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +539 -300
  81. package/src/duckdb/ub_src_main.cpp +0 -2
  82. package/src/duckdb/ub_src_main_relation.cpp +2 -0
  83. package/src/duckdb/src/include/duckdb/function/replacement_open.hpp +0 -54
  84. package/src/duckdb/src/include/duckdb/main/replacement_opens.hpp +0 -20
  85. package/src/duckdb/src/main/extension_prefix_opener.cpp +0 -55
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev2115.0",
5
+ "version": "0.6.2-dev2226.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -185,8 +185,7 @@ void BufferedJSONReader::SetBufferLineOrObjectCount(idx_t index, idx_t count) {
185
185
  buffer_line_or_object_counts[index] = count;
186
186
  }
187
187
 
188
- void BufferedJSONReader::ThrowParseError(idx_t buf_index, idx_t line_or_object_in_buf, yyjson_read_err &err,
189
- const string &extra) {
188
+ idx_t BufferedJSONReader::GetLineNumber(idx_t buf_index, idx_t line_or_object_in_buf) {
190
189
  D_ASSERT(options.format == JSONFormat::UNSTRUCTURED || options.format == JSONFormat::NEWLINE_DELIMITED);
191
190
  while (true) {
192
191
  lock_guard<mutex> guard(lock);
@@ -203,13 +202,27 @@ void BufferedJSONReader::ThrowParseError(idx_t buf_index, idx_t line_or_object_i
203
202
  if (!can_throw) {
204
203
  continue;
205
204
  }
206
- string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "object";
207
205
  // SQL uses 1-based indexing so I guess we will do that in our exception here as well
208
- throw InvalidInputException("Malformed JSON in file \"%s\", at byte %llu in %s %llu: %s. %s", file_path,
209
- err.pos + 1, unit, line + 1, err.msg, extra);
206
+ return line + 1;
210
207
  }
211
208
  }
212
209
 
210
+ void BufferedJSONReader::ThrowParseError(idx_t buf_index, idx_t line_or_object_in_buf, yyjson_read_err &err,
211
+ const string &extra) {
212
+ string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "object";
213
+ auto line = GetLineNumber(buf_index, line_or_object_in_buf);
214
+ throw InvalidInputException("Malformed JSON in file \"%s\", at byte %llu in %s %llu: %s. %s", file_path,
215
+ err.pos + 1, unit, line + 1, err.msg, extra);
216
+ }
217
+
218
+ void BufferedJSONReader::ThrowTransformError(idx_t buf_index, idx_t line_or_object_in_buf,
219
+ const string &error_message) {
220
+ string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "object";
221
+ auto line = GetLineNumber(buf_index, line_or_object_in_buf);
222
+ throw InvalidInputException("JSON transform error in file \"%s\", in %s %llu: %s", file_path, unit, line,
223
+ error_message);
224
+ }
225
+
213
226
  double BufferedJSONReader::GetProgress() const {
214
227
  if (file_handle) {
215
228
  return 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
@@ -112,12 +112,17 @@ public:
112
112
  idx_t GetBufferIndex();
113
113
  //! Set line count for a buffer that is done (grabs the lock)
114
114
  void SetBufferLineOrObjectCount(idx_t index, idx_t count);
115
- //! Throws an error that mentions the file name and line number
115
+ //! Throws a parse error that mentions the file name and line number
116
116
  void ThrowParseError(idx_t buf_index, idx_t line_or_object_in_buf, yyjson_read_err &err, const string &extra = "");
117
+ //! Throws a transform error that mentions the file name and line number
118
+ void ThrowTransformError(idx_t buf_index, idx_t line_or_object_in_buf, const string &error_message);
117
119
 
118
120
  double GetProgress() const;
119
121
  void Reset();
120
122
 
123
+ private:
124
+ idx_t GetLineNumber(idx_t buf_index, idx_t line_or_object_in_buf);
125
+
121
126
  public:
122
127
  mutex lock;
123
128
 
@@ -216,6 +216,7 @@ public:
216
216
  auto data = WriteVal<YYJSON_VAL_T>(val, alc, len);
217
217
  return string_t(data, len);
218
218
  }
219
+ static string ValToString(yyjson_val *val);
219
220
  //! Throw an error with the printed yyjson_val
220
221
  static void ThrowValFormatError(string error_string, yyjson_val *val);
221
222
 
@@ -97,6 +97,8 @@ public:
97
97
  vector<string> names;
98
98
  //! Max depth we go to detect nested JSON schema (defaults to unlimited)
99
99
  idx_t max_depth = NumericLimits<idx_t>::Maximum();
100
+ //! Whether we're parsing objects (usually), or something else like arrays
101
+ bool objects = true;
100
102
 
101
103
  //! Stored readers for when we're detecting the schema
102
104
  vector<unique_ptr<BufferedJSONReader>> stored_readers;
@@ -167,12 +169,17 @@ public:
167
169
  public:
168
170
  idx_t ReadNext(JSONScanGlobalState &gstate);
169
171
  yyjson_alc *GetAllocator();
172
+ void ThrowTransformError(idx_t count, idx_t object_index, const string &error_message);
170
173
 
171
174
  JSONLine lines[STANDARD_VECTOR_SIZE];
172
175
  yyjson_val *objects[STANDARD_VECTOR_SIZE];
173
176
 
174
177
  idx_t batch_index;
175
178
 
179
+ //! Options when transforming the JSON to columnar data
180
+ DateFormatMap date_format_map;
181
+ JSONTransformOptions transform_options;
182
+
176
183
  private:
177
184
  yyjson_val *ParseLine(char *line_start, idx_t line_size, idx_t remaining, JSONLine &line);
178
185
 
@@ -14,19 +14,32 @@
14
14
  namespace duckdb {
15
15
 
16
16
  struct DateFormatMap;
17
+ class BufferedJSONReader;
17
18
 
19
+ //! Options for error handling while transforming JSON
18
20
  struct JSONTransformOptions {
21
+ public:
22
+ JSONTransformOptions();
23
+ JSONTransformOptions(bool strict_cast, bool error_duplicate_key, bool error_missing_key, bool error_unkown_key);
24
+
19
25
  public:
20
26
  //! Throws an error if the cast doesn't work (instead of NULL-ing it)
21
- bool strict_cast;
27
+ bool strict_cast = false;
22
28
  //! Throws an error if there is a duplicate key (instead of ignoring it)
23
- bool error_duplicate_key;
29
+ bool error_duplicate_key = false;
24
30
  //! Throws an error if a key is missing (instead of NULL-ing it)
25
- bool error_missing_key;
31
+ bool error_missing_key = false;
26
32
  //! Throws an error if an object has a key we didn't know about
27
- bool error_unknown_key;
28
- //! Date format used for parsing
29
- DateFormatMap *date_format_map;
33
+ bool error_unknown_key = false;
34
+
35
+ //! JSON reader for adding line number information to transform errors (can be NULL)
36
+ bool from_file = false;
37
+ //! Date format used for parsing (can be NULL)
38
+ DateFormatMap *date_format_map = nullptr;
39
+ //! String to store errors in
40
+ string error_message;
41
+ //! Index of the object where the error occurred
42
+ idx_t object_index = DConstants::INVALID_INDEX;
30
43
 
31
44
  public:
32
45
  void Serialize(FieldWriter &writer);
@@ -48,10 +61,12 @@ struct TryParseTimeStamp {
48
61
  };
49
62
 
50
63
  struct JSONTransform {
51
- static void TransformObject(yyjson_val *objects[], yyjson_alc *alc, const idx_t count, const vector<string> &names,
52
- const vector<Vector *> &result_vectors, const JSONTransformOptions &options);
53
- static void GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target, Vector &string_vector,
54
- const bool strict);
64
+ static bool Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
65
+ JSONTransformOptions &options);
66
+ static bool TransformObject(yyjson_val *objects[], yyjson_alc *alc, const idx_t count, const vector<string> &names,
67
+ const vector<Vector *> &result_vectors, JSONTransformOptions &options);
68
+ static bool GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target, Vector &string_vector,
69
+ JSONTransformOptions &options);
55
70
  };
56
71
 
57
72
  } // namespace duckdb
@@ -2,11 +2,15 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- void JSONCommon::ThrowValFormatError(string error_string, yyjson_val *val) {
5
+ string JSONCommon::ValToString(yyjson_val *val) {
6
6
  JSONAllocator json_allocator(Allocator::DefaultAllocator());
7
7
  idx_t len;
8
8
  auto data = JSONCommon::WriteVal<yyjson_val>(val, json_allocator.GetYYJSONAllocator(), len);
9
- error_string = StringUtil::Format(error_string, string(data, len));
9
+ return string(data, len);
10
+ }
11
+
12
+ void JSONCommon::ThrowValFormatError(string error_string, yyjson_val *val) {
13
+ error_string = StringUtil::Format(error_string, JSONCommon::ValToString(val));
10
14
  throw InvalidInputException(error_string);
11
15
  }
12
16
 
@@ -28,10 +28,27 @@ JSONStructureNode::JSONStructureNode(yyjson_val *key_p, yyjson_val *val_p)
28
28
  }
29
29
 
30
30
  JSONStructureDescription &JSONStructureNode::GetOrCreateDescription(LogicalTypeId type) {
31
+ if (descriptions.empty()) {
32
+ // Empty, just put this type in there
33
+ descriptions.emplace_back(type);
34
+ return descriptions.back();
35
+ }
36
+
37
+ if (descriptions.size() == 1 && descriptions[0].type == LogicalTypeId::SQLNULL) {
38
+ // Only a NULL in there, override
39
+ descriptions[0].type = type;
40
+ return descriptions[0];
41
+ }
42
+
43
+ if (type == LogicalTypeId::SQLNULL) {
44
+ // 'descriptions' is non-empty, so let's not add NULL
45
+ return descriptions.back();
46
+ }
47
+
31
48
  // Check if type is already in there or if we can merge numerics
32
49
  const auto is_numeric = IsNumeric(type);
33
50
  for (auto &description : descriptions) {
34
- if (type == LogicalTypeId::SQLNULL || type == description.type) {
51
+ if (type == description.type) {
35
52
  return description;
36
53
  } else if (is_numeric && IsNumeric(description.type)) {
37
54
  description.type = MaxNumericType(type, description.type);
@@ -111,19 +128,19 @@ void JSONStructureNode::RefineCandidateTypesArray(yyjson_val *vals[], idx_t coun
111
128
 
112
129
  idx_t total_list_size = 0;
113
130
  for (idx_t i = 0; i < count; i++) {
114
- if (vals[i] && !yyjson_is_null(vals[i])) {
131
+ if (vals[i] && !unsafe_yyjson_is_null(vals[i])) {
115
132
  D_ASSERT(yyjson_is_arr(vals[i]));
116
133
  total_list_size += unsafe_yyjson_get_len(vals[i]);
117
134
  }
118
135
  }
119
136
 
120
137
  idx_t offset = 0;
121
- auto child_vals = (yyjson_val **)allocator.Allocate(total_list_size * sizeof(yyjson_val *));
138
+ auto child_vals = (yyjson_val **)allocator.AllocateAligned(total_list_size * sizeof(yyjson_val *));
122
139
 
123
140
  size_t idx, max;
124
141
  yyjson_val *child_val;
125
142
  for (idx_t i = 0; i < count; i++) {
126
- if (vals[i] && !yyjson_is_null(vals[i])) {
143
+ if (vals[i] && !unsafe_yyjson_is_null(vals[i])) {
127
144
  yyjson_arr_foreach(vals[i], idx, max, child_val) {
128
145
  child_vals[offset++] = child_val;
129
146
  }
@@ -141,14 +158,20 @@ void JSONStructureNode::RefineCandidateTypesObject(yyjson_val *vals[], idx_t cou
141
158
  vector<yyjson_val **> child_vals;
142
159
  child_vals.reserve(child_count);
143
160
  for (idx_t child_idx = 0; child_idx < child_count; child_idx++) {
144
- child_vals.emplace_back((yyjson_val **)allocator.Allocate(count * sizeof(yyjson_val *)));
161
+ child_vals.emplace_back((yyjson_val **)allocator.AllocateAligned(count * sizeof(yyjson_val *)));
145
162
  }
146
163
 
164
+ idx_t found_key_count;
165
+ auto found_keys = (bool *)allocator.AllocateAligned(sizeof(bool) * child_count);
166
+
147
167
  const auto &key_map = desc.key_map;
148
168
  size_t idx, max;
149
169
  yyjson_val *child_key, *child_val;
150
170
  for (idx_t i = 0; i < count; i++) {
151
- if (vals[i] && !yyjson_is_null(vals[i])) {
171
+ if (vals[i] && !unsafe_yyjson_is_null(vals[i])) {
172
+ found_key_count = 0;
173
+ memset(found_keys, false, child_count);
174
+
152
175
  D_ASSERT(yyjson_is_obj(vals[i]));
153
176
  yyjson_obj_foreach(vals[i], idx, max, child_key, child_val) {
154
177
  D_ASSERT(yyjson_is_str(child_key));
@@ -156,7 +179,19 @@ void JSONStructureNode::RefineCandidateTypesObject(yyjson_val *vals[], idx_t cou
156
179
  auto key_len = unsafe_yyjson_get_len(child_key);
157
180
  auto it = key_map.find({key_ptr, key_len});
158
181
  D_ASSERT(it != key_map.end());
159
- child_vals[it->second][i] = child_val;
182
+ const auto child_idx = it->second;
183
+ child_vals[child_idx][i] = child_val;
184
+ found_keys[child_idx] = true;
185
+ found_key_count++;
186
+ }
187
+
188
+ if (found_key_count != child_count) {
189
+ // Set child val to nullptr so recursion doesn't break
190
+ for (idx_t child_idx = 0; child_idx < child_count; child_idx++) {
191
+ if (!found_keys[child_idx]) {
192
+ child_vals[child_idx][i] = nullptr;
193
+ }
194
+ }
160
195
  }
161
196
  } else {
162
197
  for (idx_t child_idx = 0; child_idx < child_count; child_idx++) {
@@ -180,7 +215,8 @@ void JSONStructureNode::RefineCandidateTypesString(yyjson_val *vals[], idx_t cou
180
215
  if (descriptions[0].candidate_types.empty()) {
181
216
  return;
182
217
  }
183
- JSONTransform::GetStringVector(vals, count, LogicalType::SQLNULL, string_vector, false);
218
+ static JSONTransformOptions OPTIONS;
219
+ JSONTransform::GetStringVector(vals, count, LogicalType::SQLNULL, string_vector, OPTIONS);
184
220
  EliminateCandidateTypes(count, string_vector, date_format_map);
185
221
  }
186
222
 
@@ -203,7 +239,7 @@ void JSONStructureNode::EliminateCandidateTypes(idx_t count, Vector &string_vect
203
239
  }
204
240
  } else {
205
241
  string error_message;
206
- if (!VectorOperations::DefaultTryCast(string_vector, result_vector, count, &error_message)) {
242
+ if (!VectorOperations::DefaultTryCast(string_vector, result_vector, count, &error_message, true)) {
207
243
  candidate_types.pop_back();
208
244
  } else {
209
245
  return;
@@ -467,6 +503,8 @@ LogicalType JSONStructure::StructureToType(ClientContext &context, const JSONStr
467
503
  return StructureToTypeObject(context, node, max_depth, depth);
468
504
  case LogicalTypeId::VARCHAR:
469
505
  return StructureToTypeString(node);
506
+ case LogicalTypeId::SQLNULL:
507
+ return LogicalTypeId::INTEGER;
470
508
  default:
471
509
  return desc.type;
472
510
  }