duckdb 0.8.2-dev5120.0 → 0.8.2-dev5216.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-dateadd.cpp +11 -19
  3. package/src/duckdb/extension/icu/icu-datepart.cpp +44 -53
  4. package/src/duckdb/extension/icu/icu-datesub.cpp +10 -15
  5. package/src/duckdb/extension/icu/icu-datetrunc.cpp +6 -8
  6. package/src/duckdb/extension/icu/icu-list-range.cpp +6 -8
  7. package/src/duckdb/extension/icu/icu-makedate.cpp +8 -10
  8. package/src/duckdb/extension/icu/icu-strptime.cpp +30 -32
  9. package/src/duckdb/extension/icu/icu-table-range.cpp +6 -9
  10. package/src/duckdb/extension/icu/icu-timebucket.cpp +5 -7
  11. package/src/duckdb/extension/icu/icu-timezone.cpp +18 -29
  12. package/src/duckdb/extension/icu/icu_extension.cpp +18 -25
  13. package/src/duckdb/extension/icu/include/icu-dateadd.hpp +1 -1
  14. package/src/duckdb/extension/icu/include/icu-datepart.hpp +1 -1
  15. package/src/duckdb/extension/icu/include/icu-datesub.hpp +1 -1
  16. package/src/duckdb/extension/icu/include/icu-datetrunc.hpp +1 -1
  17. package/src/duckdb/extension/icu/include/icu-list-range.hpp +1 -1
  18. package/src/duckdb/extension/icu/include/icu-makedate.hpp +1 -1
  19. package/src/duckdb/extension/icu/include/icu-strptime.hpp +1 -1
  20. package/src/duckdb/extension/icu/include/icu-table-range.hpp +1 -1
  21. package/src/duckdb/extension/icu/include/icu-timebucket.hpp +1 -1
  22. package/src/duckdb/extension/icu/include/icu-timezone.hpp +1 -1
  23. package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -2
  24. package/src/duckdb/extension/json/json_functions/read_json.cpp +15 -0
  25. package/src/duckdb/src/catalog/catalog.cpp +6 -1
  26. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +14 -0
  27. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +22 -22
  28. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +4 -1
  29. package/src/duckdb/src/function/table/read_csv.cpp +3 -1
  30. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  31. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +34 -0
  32. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +14 -0
  33. package/src/duckdb/src/include/duckdb/main/settings.hpp +1 -1
  34. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +2 -0
  35. package/src/duckdb/src/main/extension/extension_util.cpp +56 -0
  36. package/src/duckdb/src/planner/binder.cpp +5 -1
  37. package/src/duckdb/src/storage/checkpoint_manager.cpp +162 -138
  38. package/src/duckdb/src/storage/storage_info.cpp +1 -1
@@ -2,6 +2,7 @@
2
2
  #include "duckdb/common/types/time.hpp"
3
3
  #include "duckdb/common/types/timestamp.hpp"
4
4
  #include "duckdb/function/cast/cast_function_set.hpp"
5
+ #include "duckdb/main/extension_util.hpp"
5
6
  #include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
6
7
  #include "duckdb/parser/parsed_data/create_table_function_info.hpp"
7
8
  #include "include/icu-datefunc.hpp"
@@ -143,8 +144,8 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc {
143
144
  return BoundCastInfo(CastFromNaive, std::move(cast_data));
144
145
  }
145
146
 
146
- static void AddCasts(ClientContext &context) {
147
- auto &config = DBConfig::GetConfig(context);
147
+ static void AddCasts(DatabaseInstance &db) {
148
+ auto &config = DBConfig::GetConfig(db);
148
149
  auto &casts = config.GetCastFunctions();
149
150
 
150
151
  casts.RegisterCastFunction(LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ, BindCastFromNaive);
@@ -206,8 +207,8 @@ struct ICUToNaiveTimestamp : public ICUDateFunc {
206
207
  return BoundCastInfo(CastToNaive, std::move(cast_data));
207
208
  }
208
209
 
209
- static void AddCasts(ClientContext &context) {
210
- auto &config = DBConfig::GetConfig(context);
210
+ static void AddCasts(DatabaseInstance &db) {
211
+ auto &config = DBConfig::GetConfig(db);
211
212
  auto &casts = config.GetCastFunctions();
212
213
 
213
214
  casts.RegisterCastFunction(LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP, BindCastToNaive);
@@ -262,18 +263,14 @@ struct ICULocalTimestampFunc : public ICUDateFunc {
262
263
  rdata[0] = GetLocalTimestamp(state);
263
264
  }
264
265
 
265
- static void AddFunction(const string &name, ClientContext &context) {
266
+ static void AddFunction(const string &name, DatabaseInstance &db) {
266
267
  ScalarFunctionSet set(name);
267
268
  set.AddFunction(ScalarFunction({}, LogicalType::TIMESTAMP, Execute, BindNow));
268
-
269
- CreateScalarFunctionInfo func_info(set);
270
- auto &catalog = Catalog::GetSystemCatalog(context);
271
- catalog.AddFunction(context, func_info);
269
+ ExtensionUtil::RegisterFunction(db, set);
272
270
  }
273
271
  };
274
272
 
275
273
  struct ICULocalTimeFunc : public ICUDateFunc {
276
-
277
274
  static void Execute(DataChunk &input, ExpressionState &state, Vector &result) {
278
275
  D_ASSERT(input.ColumnCount() == 0);
279
276
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
@@ -282,13 +279,10 @@ struct ICULocalTimeFunc : public ICUDateFunc {
282
279
  rdata[0] = Timestamp::GetTime(local);
283
280
  }
284
281
 
285
- static void AddFunction(const string &name, ClientContext &context) {
282
+ static void AddFunction(const string &name, DatabaseInstance &db) {
286
283
  ScalarFunctionSet set(name);
287
284
  set.AddFunction(ScalarFunction({}, LogicalType::TIME, Execute, ICULocalTimestampFunc::BindNow));
288
-
289
- CreateScalarFunctionInfo func_info(set);
290
- auto &catalog = Catalog::GetSystemCatalog(context);
291
- catalog.AddFunction(context, func_info);
285
+ ExtensionUtil::RegisterFunction(db, set);
292
286
  }
293
287
  };
294
288
 
@@ -326,16 +320,13 @@ struct ICUTimeZoneFunc : public ICUDateFunc {
326
320
  }
327
321
  }
328
322
 
329
- static void AddFunction(const string &name, ClientContext &context) {
323
+ static void AddFunction(const string &name, DatabaseInstance &db) {
330
324
  ScalarFunctionSet set(name);
331
325
  set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, LogicalType::TIMESTAMP_TZ,
332
326
  Execute<ICUFromNaiveTimestamp>, Bind));
333
327
  set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP_TZ}, LogicalType::TIMESTAMP,
334
328
  Execute<ICUToNaiveTimestamp>, Bind));
335
-
336
- CreateScalarFunctionInfo func_info(set);
337
- auto &catalog = Catalog::GetSystemCatalog(context);
338
- catalog.AddFunction(context, func_info);
329
+ ExtensionUtil::AddFunctionOverload(db, set);
339
330
  }
340
331
  };
341
332
 
@@ -343,21 +334,19 @@ timestamp_t ICUDateFunc::FromNaive(icu::Calendar *calendar, timestamp_t naive) {
343
334
  return ICUFromNaiveTimestamp::Operation(calendar, naive);
344
335
  }
345
336
 
346
- void RegisterICUTimeZoneFunctions(ClientContext &context) {
337
+ void RegisterICUTimeZoneFunctions(DatabaseInstance &db) {
347
338
  // Table functions
348
- auto &catalog = Catalog::GetSystemCatalog(context);
349
339
  TableFunction tz_names("pg_timezone_names", {}, ICUTimeZoneFunction, ICUTimeZoneBind, ICUTimeZoneInit);
350
- CreateTableFunctionInfo tz_names_info(std::move(tz_names));
351
- catalog.CreateTableFunction(context, tz_names_info);
340
+ ExtensionUtil::RegisterFunction(db, tz_names);
352
341
 
353
342
  // Scalar functions
354
- ICUTimeZoneFunc::AddFunction("timezone", context);
355
- ICULocalTimestampFunc::AddFunction("current_localtimestamp", context);
356
- ICULocalTimeFunc::AddFunction("current_localtime", context);
343
+ ICUTimeZoneFunc::AddFunction("timezone", db);
344
+ ICULocalTimestampFunc::AddFunction("current_localtimestamp", db);
345
+ ICULocalTimeFunc::AddFunction("current_localtime", db);
357
346
 
358
347
  // Casts
359
- ICUFromNaiveTimestamp::AddCasts(context);
360
- ICUToNaiveTimestamp::AddCasts(context);
348
+ ICUFromNaiveTimestamp::AddCasts(db);
349
+ ICUToNaiveTimestamp::AddCasts(db);
361
350
  }
362
351
 
363
352
  } // namespace duckdb
@@ -8,6 +8,7 @@
8
8
  #include "duckdb/main/config.hpp"
9
9
  #include "duckdb/main/connection.hpp"
10
10
  #include "duckdb/main/database.hpp"
11
+ #include "duckdb/main/extension_util.hpp"
11
12
  #include "duckdb/parser/parsed_data/create_collation_info.hpp"
12
13
  #include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
13
14
  #include "duckdb/parser/parsed_data/create_table_function_info.hpp"
@@ -220,11 +221,9 @@ static void SetICUCalendar(ClientContext &context, SetScope scope, Value &parame
220
221
  }
221
222
  }
222
223
 
223
- void IcuExtension::Load(DuckDB &db) {
224
- Connection con(db);
225
- con.BeginTransaction();
226
-
227
- auto &catalog = Catalog::GetSystemCatalog(*con.context);
224
+ void IcuExtension::Load(DuckDB &ddb) {
225
+ auto &db = *ddb.instance;
226
+ auto &catalog = Catalog::GetSystemCatalog(db);
228
227
 
229
228
  // iterate over all the collations
230
229
  int32_t count;
@@ -241,17 +240,14 @@ void IcuExtension::Load(DuckDB &db) {
241
240
  collation = StringUtil::Lower(collation);
242
241
 
243
242
  CreateCollationInfo info(collation, GetICUFunction(collation), false, true);
244
- info.on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
245
- catalog.CreateCollation(*con.context, info);
243
+ ExtensionUtil::RegisterCollation(db, info);
246
244
  }
247
245
  ScalarFunction sort_key("icu_sort_key", {LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::VARCHAR,
248
246
  ICUCollateFunction, ICUSortKeyBind);
249
-
250
- CreateScalarFunctionInfo sort_key_info(std::move(sort_key));
251
- catalog.CreateFunction(*con.context, sort_key_info);
247
+ ExtensionUtil::RegisterFunction(db, sort_key);
252
248
 
253
249
  // Time Zones
254
- auto &config = DBConfig::GetConfig(*db.instance);
250
+ auto &config = DBConfig::GetConfig(db);
255
251
  duckdb::unique_ptr<icu::TimeZone> tz(icu::TimeZone::createDefault());
256
252
  icu::UnicodeString tz_id;
257
253
  std::string tz_string;
@@ -259,16 +255,16 @@ void IcuExtension::Load(DuckDB &db) {
259
255
  config.AddExtensionOption("TimeZone", "The current time zone", LogicalType::VARCHAR, Value(tz_string),
260
256
  SetICUTimeZone);
261
257
 
262
- RegisterICUDateAddFunctions(*con.context);
263
- RegisterICUDatePartFunctions(*con.context);
264
- RegisterICUDateSubFunctions(*con.context);
265
- RegisterICUDateTruncFunctions(*con.context);
266
- RegisterICUMakeDateFunctions(*con.context);
267
- RegisterICUTableRangeFunctions(*con.context);
268
- RegisterICUListRangeFunctions(*con.context);
269
- RegisterICUStrptimeFunctions(*con.context);
270
- RegisterICUTimeBucketFunctions(*con.context);
271
- RegisterICUTimeZoneFunctions(*con.context);
258
+ RegisterICUDateAddFunctions(db);
259
+ RegisterICUDatePartFunctions(db);
260
+ RegisterICUDateSubFunctions(db);
261
+ RegisterICUDateTruncFunctions(db);
262
+ RegisterICUMakeDateFunctions(db);
263
+ RegisterICUTableRangeFunctions(db);
264
+ RegisterICUListRangeFunctions(db);
265
+ RegisterICUStrptimeFunctions(db);
266
+ RegisterICUTimeBucketFunctions(db);
267
+ RegisterICUTimeZoneFunctions(db);
272
268
 
273
269
  // Calendars
274
270
  UErrorCode status = U_ZERO_ERROR;
@@ -277,10 +273,7 @@ void IcuExtension::Load(DuckDB &db) {
277
273
  SetICUCalendar);
278
274
 
279
275
  TableFunction cal_names("icu_calendar_names", {}, ICUCalendarFunction, ICUCalendarBind, ICUCalendarInit);
280
- CreateTableFunctionInfo cal_names_info(std::move(cal_names));
281
- catalog.CreateTableFunction(*con.context, cal_names_info);
282
-
283
- con.Commit();
276
+ ExtensionUtil::RegisterFunction(db, cal_names);
284
277
  }
285
278
 
286
279
  std::string IcuExtension::Name() {
@@ -12,6 +12,6 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- void RegisterICUDateAddFunctions(ClientContext &context);
15
+ void RegisterICUDateAddFunctions(DatabaseInstance &db);
16
16
 
17
17
  } // namespace duckdb
@@ -12,6 +12,6 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- void RegisterICUDatePartFunctions(ClientContext &context);
15
+ void RegisterICUDatePartFunctions(DatabaseInstance &db);
16
16
 
17
17
  } // namespace duckdb
@@ -12,6 +12,6 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- void RegisterICUDateSubFunctions(ClientContext &context);
15
+ void RegisterICUDateSubFunctions(DatabaseInstance &db);
16
16
 
17
17
  } // namespace duckdb
@@ -12,6 +12,6 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- void RegisterICUDateTruncFunctions(ClientContext &context);
15
+ void RegisterICUDateTruncFunctions(DatabaseInstance &db);
16
16
 
17
17
  } // namespace duckdb
@@ -12,6 +12,6 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- void RegisterICUListRangeFunctions(ClientContext &context);
15
+ void RegisterICUListRangeFunctions(DatabaseInstance &db);
16
16
 
17
17
  } // namespace duckdb
@@ -12,6 +12,6 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- void RegisterICUMakeDateFunctions(ClientContext &context);
15
+ void RegisterICUMakeDateFunctions(DatabaseInstance &db);
16
16
 
17
17
  } // namespace duckdb
@@ -12,6 +12,6 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- void RegisterICUStrptimeFunctions(ClientContext &context);
15
+ void RegisterICUStrptimeFunctions(DatabaseInstance &db);
16
16
 
17
17
  } // namespace duckdb
@@ -12,6 +12,6 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- void RegisterICUTableRangeFunctions(ClientContext &context);
15
+ void RegisterICUTableRangeFunctions(DatabaseInstance &db);
16
16
 
17
17
  } // namespace duckdb
@@ -12,6 +12,6 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- void RegisterICUTimeBucketFunctions(ClientContext &context);
15
+ void RegisterICUTimeBucketFunctions(DatabaseInstance &db);
16
16
 
17
17
  } // namespace duckdb
@@ -12,6 +12,6 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
- void RegisterICUTimeZoneFunctions(ClientContext &context);
15
+ void RegisterICUTimeZoneFunctions(DatabaseInstance &db);
16
16
 
17
17
  } // namespace duckdb
@@ -23,7 +23,7 @@ bool JSONFileHandle::IsOpen() const {
23
23
  }
24
24
 
25
25
  void JSONFileHandle::Close() {
26
- if (IsOpen() && plain_file_source) {
26
+ if (IsOpen() && file_handle->OnDiskFile()) {
27
27
  file_handle->Close();
28
28
  file_handle = nullptr;
29
29
  }
@@ -34,7 +34,7 @@ void JSONFileHandle::Reset() {
34
34
  read_position = 0;
35
35
  requested_reads = 0;
36
36
  actual_reads = 0;
37
- if (IsOpen() && plain_file_source) {
37
+ if (IsOpen() && CanSeek()) {
38
38
  file_handle->Reset();
39
39
  }
40
40
  }
@@ -229,6 +229,21 @@ unique_ptr<FunctionData> ReadJSONBind(ClientContext &context, TableFunctionBindI
229
229
  transform_options.error_unknown_key = bind_data->auto_detect && !bind_data->ignore_errors;
230
230
  transform_options.delay_error = true;
231
231
 
232
+ if (bind_data->auto_detect) {
233
+ // JSON may contain columns such as "id" and "Id", which are duplicates for us due to case-insensitivity
234
+ // We rename them so we can parse the file anyway. Note that we can't change bind_data->names,
235
+ // because the JSON reader gets columns by exact name, not position
236
+ case_insensitive_map_t<idx_t> name_count_map;
237
+ for (auto &name : names) {
238
+ auto it = name_count_map.find(name);
239
+ if (it == name_count_map.end()) {
240
+ name_count_map[name] = 1;
241
+ } else {
242
+ name = StringUtil::Format("%s_%llu", name, it->second++);
243
+ }
244
+ }
245
+ }
246
+
232
247
  return std::move(bind_data);
233
248
  }
234
249
 
@@ -432,7 +432,8 @@ void FindMinimalQualification(ClientContext &context, const string &catalog_name
432
432
  qualify_schema = true;
433
433
  }
434
434
 
435
- bool Catalog::TryAutoLoad(ClientContext &context, const string &extension_name) noexcept {
435
+ bool Catalog::TryAutoLoad(ClientContext &context, const string &original_name) noexcept {
436
+ string extension_name = ExtensionHelper::ApplyExtensionAlias(original_name);
436
437
  if (context.db->ExtensionIsLoaded(extension_name)) {
437
438
  return true;
438
439
  }
@@ -479,6 +480,8 @@ bool Catalog::AutoLoadExtensionByCatalogEntry(ClientContext &context, CatalogTyp
479
480
  extension_name = ExtensionHelper::FindExtensionInEntries(entry_name, EXTENSION_COPY_FUNCTIONS);
480
481
  } else if (type == CatalogType::TYPE_ENTRY) {
481
482
  extension_name = ExtensionHelper::FindExtensionInEntries(entry_name, EXTENSION_TYPES);
483
+ } else if (type == CatalogType::COLLATION_ENTRY) {
484
+ extension_name = ExtensionHelper::FindExtensionInEntries(entry_name, EXTENSION_COLLATIONS);
482
485
  }
483
486
 
484
487
  if (!extension_name.empty() && ExtensionHelper::CanAutoloadExtension(extension_name)) {
@@ -536,6 +539,8 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
536
539
  extension_name = ExtensionHelper::FindExtensionInEntries(entry_name, EXTENSION_TYPES);
537
540
  } else if (type == CatalogType::COPY_FUNCTION_ENTRY) {
538
541
  extension_name = ExtensionHelper::FindExtensionInEntries(entry_name, EXTENSION_COPY_FUNCTIONS);
542
+ } else if (type == CatalogType::COLLATION_ENTRY) {
543
+ extension_name = ExtensionHelper::FindExtensionInEntries(entry_name, EXTENSION_COLLATIONS);
539
544
  }
540
545
 
541
546
  // if we found an extension that can handle this catalog entry, create an error hinting the user
@@ -333,9 +333,23 @@ normal : {
333
333
  for (; position_buffer < end_buffer; position_buffer++) {
334
334
  auto c = (*buffer)[position_buffer];
335
335
  if (c == options.dialect_options.state_machine_options.delimiter) {
336
+ // Check if previous character is a quote, if yes, this means we are in a non-initialized quoted value
337
+ // This only matters for when trying to figure out where csv lines start
338
+ if (position_buffer > 0 && try_add_line) {
339
+ if ((*buffer)[position_buffer - 1] == options.dialect_options.state_machine_options.quote) {
340
+ return false;
341
+ }
342
+ }
336
343
  // delimiter: end the value and add it to the chunk
337
344
  goto add_value;
338
345
  } else if (StringUtil::CharacterIsNewline(c)) {
346
+ // Check if previous character is a quote, if yes, this means we are in a non-initialized quoted value
347
+ // This only matters for when trying to figure out where csv lines start
348
+ if (position_buffer > 0 && try_add_line) {
349
+ if ((*buffer)[position_buffer - 1] == options.dialect_options.state_machine_options.quote) {
350
+ return false;
351
+ }
352
+ }
339
353
  // newline: add row
340
354
  if (column > 0 || try_add_line || parse_chunk.data.size() == 1) {
341
355
  goto add_row;
@@ -290,37 +290,37 @@ void CSVSniffer::DetectTypes() {
290
290
  vector<TupleSniffing> tuples(STANDARD_VECTOR_SIZE);
291
291
  candidate->csv_buffer_iterator.Process<SniffValue>(*candidate, tuples);
292
292
  // Potentially Skip empty rows (I find this dirty, but it is what the original code does)
293
- idx_t true_start = 0;
293
+ // The true line where parsing starts in reference to the csv file
294
+ idx_t true_line_start = 0;
294
295
  idx_t true_pos = 0;
295
- idx_t values_start = 0;
296
- while (true_start < tuples.size()) {
297
- if (tuples[true_start].values.empty() ||
298
- (tuples[true_start].values.size() == 1 && tuples[true_start].values[0].IsNull())) {
299
- true_start = tuples[true_start].line_number;
300
- if (true_start < tuples.size()) {
301
- true_pos = tuples[true_start].position;
302
- }
303
- values_start++;
296
+ // The start point of the tuples
297
+ idx_t tuple_true_start = 0;
298
+ while (tuple_true_start < tuples.size()) {
299
+ if (tuples[tuple_true_start].values.empty() ||
300
+ (tuples[tuple_true_start].values.size() == 1 && tuples[tuple_true_start].values[0].IsNull())) {
301
+ true_line_start = tuples[tuple_true_start].line_number;
302
+ true_pos = tuples[tuple_true_start].position;
303
+ tuple_true_start++;
304
304
  } else {
305
305
  break;
306
306
  }
307
307
  }
308
308
 
309
309
  // Potentially Skip Notes (I also find this dirty, but it is what the original code does)
310
- while (true_start < tuples.size()) {
311
- if (tuples[true_start].values.size() < max_columns_found && !options.null_padding) {
312
-
313
- true_start = tuples[true_start].line_number;
314
- if (true_start < tuples.size()) {
315
- true_pos = tuples[true_start].position;
316
- }
317
- values_start++;
310
+ while (tuple_true_start < tuples.size()) {
311
+ if (tuples[tuple_true_start].values.size() < max_columns_found && !options.null_padding) {
312
+ true_line_start = tuples[tuple_true_start].line_number;
313
+ true_pos = tuples[tuple_true_start].position;
314
+ tuple_true_start++;
318
315
  } else {
319
316
  break;
320
317
  }
321
318
  }
322
- if (values_start > 0) {
323
- tuples.erase(tuples.begin(), tuples.begin() + values_start);
319
+ if (tuple_true_start < tuples.size()) {
320
+ true_pos = tuples[tuple_true_start].position;
321
+ }
322
+ if (tuple_true_start > 0) {
323
+ tuples.erase(tuples.begin(), tuples.begin() + tuple_true_start);
324
324
  }
325
325
 
326
326
  idx_t row_idx = 0;
@@ -390,9 +390,9 @@ void CSVSniffer::DetectTypes() {
390
390
  // it's good if the dialect creates more non-varchar columns, but only if we sacrifice < 30% of best_num_cols.
391
391
  if (varchar_cols < min_varchar_cols && info_sql_types_candidates.size() > (max_columns_found * 0.7)) {
392
392
  // we have a new best_options candidate
393
- if (true_start > 0) {
393
+ if (true_line_start > 0) {
394
394
  // Add empty rows to skip_rows
395
- candidate->dialect_options.skip_rows += true_start;
395
+ candidate->dialect_options.skip_rows += true_line_start;
396
396
  }
397
397
  best_candidate = std::move(candidate);
398
398
  min_varchar_cols = varchar_cols;
@@ -56,7 +56,10 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c
56
56
 
57
57
  // if we are loading a database type from an extension - check if that extension is loaded
58
58
  if (!type.empty()) {
59
- if (!db.ExtensionIsLoaded(type)) {
59
+ if (!Catalog::TryAutoLoad(context.client, type)) {
60
+ // FIXME: Here it might be preferrable to use an AutoLoadOrThrow kind of function
61
+ // so that either there will be success or a message to throw, and load will be
62
+ // attempted only once respecting the autoloading options
60
63
  ExtensionHelper::LoadExternalExtension(context.client, type);
61
64
  }
62
65
  }
@@ -178,7 +178,8 @@ public:
178
178
  current_file_path = files_path_p[0];
179
179
  CSVFileHandle *file_handle_ptr;
180
180
 
181
- if (!buffer_manager || (options.skip_rows_set && options.dialect_options.skip_rows > 0)) {
181
+ if (!buffer_manager || (options.skip_rows_set && options.dialect_options.skip_rows > 0) ||
182
+ buffer_manager->file_handle->GetFilePath() != current_file_path) {
182
183
  // If our buffers are too small, and we skip too many rows there is a chance things will go over-buffer
183
184
  // for now don't reuse the buffer manager
184
185
  buffer_manager.reset();
@@ -210,6 +211,7 @@ public:
210
211
  line_info.lines_read[0][0]++;
211
212
  }
212
213
  first_position = options.dialect_options.true_start;
214
+ next_byte = options.dialect_options.true_start;
213
215
  }
214
216
  explicit ParallelCSVGlobalState(idx_t system_threads_p)
215
217
  : system_threads(system_threads_p), line_info(main_mutex, batch_to_tuple_end, tuple_start, tuple_end) {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev5120"
2
+ #define DUCKDB_VERSION "0.8.2-dev5216"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "fc2e4b26a6"
5
+ #define DUCKDB_SOURCE_ID "7ffdb9fd0e"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -117,8 +117,10 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
117
117
  {"st_dwithin", "spatial"},
118
118
  {"st_dwithin_spheroid", "spatial"},
119
119
  {"st_envelope", "spatial"},
120
+ {"st_envelope_agg", "spatial"},
120
121
  {"st_equals", "spatial"},
121
122
  {"st_extent", "spatial"},
123
+ {"st_exteriorring", "spatial"},
122
124
  {"st_flipcoordinates", "spatial"},
123
125
  {"st_geometrytype", "spatial"},
124
126
  {"st_geomfromgeojson", "spatial"},
@@ -129,6 +131,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
129
131
  {"st_intersection", "spatial"},
130
132
  {"st_intersection_agg", "spatial"},
131
133
  {"st_intersects", "spatial"},
134
+ {"st_intersects_extent", "spatial"},
132
135
  {"st_isclosed", "spatial"},
133
136
  {"st_isempty", "spatial"},
134
137
  {"st_isring", "spatial"},
@@ -139,8 +142,12 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
139
142
  {"st_linestring2dfromwkb", "spatial"},
140
143
  {"st_list_proj_crs", "spatial"},
141
144
  {"st_makeline", "spatial"},
145
+ {"st_ngeometries", "spatial"},
146
+ {"st_ninteriorrings", "spatial"},
142
147
  {"st_normalize", "spatial"},
143
148
  {"st_npoints", "spatial"},
149
+ {"st_numgeometries", "spatial"},
150
+ {"st_numinteriorrings", "spatial"},
144
151
  {"st_numpoints", "spatial"},
145
152
  {"st_overlaps", "spatial"},
146
153
  {"st_perimeter", "spatial"},
@@ -150,6 +157,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
150
157
  {"st_point2dfromwkb", "spatial"},
151
158
  {"st_point3d", "spatial"},
152
159
  {"st_point4d", "spatial"},
160
+ {"st_pointn", "spatial"},
153
161
  {"st_pointonsurface", "spatial"},
154
162
  {"st_polygon2dfromwkb", "spatial"},
155
163
  {"st_read", "spatial"},
@@ -218,6 +226,32 @@ static constexpr ExtensionEntry EXTENSION_COPY_FUNCTIONS[] = {{"parquet", "parqu
218
226
  static constexpr ExtensionEntry EXTENSION_TYPES[] = {
219
227
  {"json", "json"}, {"inet", "inet"}, {"geometry", "spatial"}}; // END_OF_EXTENSION_TYPES
220
228
 
229
+ // Note: these are currently hardcoded in scripts/generate_extensions_function.py
230
+ // TODO: automate by passing though to script via duckdb
231
+ static constexpr ExtensionEntry EXTENSION_COLLATIONS[] = {
232
+ {"af", "icu"}, {"am", "icu"}, {"ar", "icu"}, {"ar_sa", "icu"}, {"as", "icu"}, {"az", "icu"},
233
+ {"be", "icu"}, {"bg", "icu"}, {"bn", "icu"}, {"bo", "icu"}, {"br", "icu"}, {"bs", "icu"},
234
+ {"ca", "icu"}, {"ceb", "icu"}, {"chr", "icu"}, {"cs", "icu"}, {"cy", "icu"}, {"da", "icu"},
235
+ {"de", "icu"}, {"de_at", "icu"}, {"dsb", "icu"}, {"dz", "icu"}, {"ee", "icu"}, {"el", "icu"},
236
+ {"en", "icu"}, {"en_us", "icu"}, {"eo", "icu"}, {"es", "icu"}, {"et", "icu"}, {"fa", "icu"},
237
+ {"fa_af", "icu"}, {"ff", "icu"}, {"fi", "icu"}, {"fil", "icu"}, {"fo", "icu"}, {"fr", "icu"},
238
+ {"fr_ca", "icu"}, {"fy", "icu"}, {"ga", "icu"}, {"gl", "icu"}, {"gu", "icu"}, {"ha", "icu"},
239
+ {"haw", "icu"}, {"he", "icu"}, {"he_il", "icu"}, {"hi", "icu"}, {"hr", "icu"}, {"hsb", "icu"},
240
+ {"hu", "icu"}, {"hy", "icu"}, {"id", "icu"}, {"id_id", "icu"}, {"ig", "icu"}, {"is", "icu"},
241
+ {"it", "icu"}, {"ja", "icu"}, {"ka", "icu"}, {"kk", "icu"}, {"kl", "icu"}, {"km", "icu"},
242
+ {"kn", "icu"}, {"ko", "icu"}, {"kok", "icu"}, {"ku", "icu"}, {"ky", "icu"}, {"lb", "icu"},
243
+ {"lkt", "icu"}, {"ln", "icu"}, {"lo", "icu"}, {"lt", "icu"}, {"lv", "icu"}, {"mk", "icu"},
244
+ {"ml", "icu"}, {"mn", "icu"}, {"mr", "icu"}, {"ms", "icu"}, {"mt", "icu"}, {"my", "icu"},
245
+ {"nb", "icu"}, {"nb_no", "icu"}, {"ne", "icu"}, {"nl", "icu"}, {"nn", "icu"}, {"om", "icu"},
246
+ {"or", "icu"}, {"pa", "icu"}, {"pa_in", "icu"}, {"pl", "icu"}, {"ps", "icu"}, {"pt", "icu"},
247
+ {"ro", "icu"}, {"ru", "icu"}, {"sa", "icu"}, {"se", "icu"}, {"si", "icu"}, {"sk", "icu"},
248
+ {"sl", "icu"}, {"smn", "icu"}, {"sq", "icu"}, {"sr", "icu"}, {"sr_ba", "icu"}, {"sr_me", "icu"},
249
+ {"sr_rs", "icu"}, {"sv", "icu"}, {"sw", "icu"}, {"ta", "icu"}, {"te", "icu"}, {"th", "icu"},
250
+ {"tk", "icu"}, {"to", "icu"}, {"tr", "icu"}, {"ug", "icu"}, {"uk", "icu"}, {"ur", "icu"},
251
+ {"uz", "icu"}, {"vi", "icu"}, {"wae", "icu"}, {"wo", "icu"}, {"xh", "icu"}, {"yi", "icu"},
252
+ {"yo", "icu"}, {"yue", "icu"}, {"yue_cn", "icu"}, {"zh", "icu"}, {"zh_cn", "icu"}, {"zh_hk", "icu"},
253
+ {"zh_mo", "icu"}, {"zh_sg", "icu"}, {"zh_tw", "icu"}, {"zu", "icu"}}; // END_OF_EXTENSION_COLLATIONS
254
+
221
255
  // Note: these are currently hardcoded in scripts/generate_extensions_function.py
222
256
  // TODO: automate by passing though to script via duckdb
223
257
  static constexpr ExtensionEntry EXTENSION_FILE_PREFIXES[] = {
@@ -14,6 +14,7 @@
14
14
 
15
15
  namespace duckdb {
16
16
  struct CreateMacroInfo;
17
+ struct CreateCollationInfo;
17
18
  class DatabaseInstance;
18
19
 
19
20
  //! The ExtensionUtil class contains methods that are useful for extensions
@@ -40,6 +41,19 @@ public:
40
41
  //! Register a new macro function - throw an exception if the function already exists
41
42
  DUCKDB_API static void RegisterFunction(DatabaseInstance &db, CreateMacroInfo &info);
42
43
 
44
+ //! Register a new collation
45
+ DUCKDB_API static void RegisterCollation(DatabaseInstance &db, CreateCollationInfo &info);
46
+
47
+ //! Returns a reference to the function in the catalog - throws an exception if it does not exist
48
+ DUCKDB_API static ScalarFunctionCatalogEntry &GetFunction(DatabaseInstance &db, const string &name);
49
+ DUCKDB_API static TableFunctionCatalogEntry &GetTableFunction(DatabaseInstance &db, const string &name);
50
+
51
+ //! Add a function overload
52
+ DUCKDB_API static void AddFunctionOverload(DatabaseInstance &db, ScalarFunction function);
53
+ DUCKDB_API static void AddFunctionOverload(DatabaseInstance &db, ScalarFunctionSet function);
54
+
55
+ DUCKDB_API static void AddFunctionOverload(DatabaseInstance &db, TableFunctionSet function);
56
+
43
57
  //! Registers a new type
44
58
  DUCKDB_API static void RegisterType(DatabaseInstance &db, string type_name, LogicalType type);
45
59
 
@@ -508,7 +508,7 @@ struct SchemaSetting {
508
508
  struct SearchPathSetting {
509
509
  static constexpr const char *Name = "search_path";
510
510
  static constexpr const char *Description =
511
- "Sets the default search search path as a comma-separated list of values";
511
+ "Sets the default catalog search path as a comma-separated list of values";
512
512
  static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
513
513
  static void SetLocal(ClientContext &context, const Value &parameter);
514
514
  static void ResetLocal(ClientContext &context);
@@ -38,6 +38,7 @@ public:
38
38
  virtual unique_ptr<TableDataWriter> GetTableDataWriter(TableCatalogEntry &table) = 0;
39
39
 
40
40
  protected:
41
+ virtual void WriteEntry(CatalogEntry &entry, Serializer &serializer);
41
42
  virtual void WriteSchema(SchemaCatalogEntry &schema, Serializer &serializer);
42
43
  virtual void WriteTable(TableCatalogEntry &table, Serializer &serializer);
43
44
  virtual void WriteView(ViewCatalogEntry &table, Serializer &serializer);
@@ -60,6 +61,7 @@ protected:
60
61
 
61
62
  protected:
62
63
  virtual void LoadCheckpoint(ClientContext &context, MetadataReader &reader);
64
+ virtual void ReadEntry(ClientContext &context, Deserializer &deserializer);
63
65
  virtual void ReadSchema(ClientContext &context, Deserializer &deserializer);
64
66
  virtual void ReadTable(ClientContext &context, Deserializer &deserializer);
65
67
  virtual void ReadView(ClientContext &context, Deserializer &deserializer);