duckdb 0.9.1-dev0.0 → 0.9.1-dev143.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/column_reader.cpp +26 -1
  3. package/src/duckdb/extension/parquet/include/column_reader.hpp +2 -0
  4. package/src/duckdb/extension/parquet/include/parquet_bss_decoder.hpp +49 -0
  5. package/src/duckdb/src/common/enum_util.cpp +1 -1
  6. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +4 -2
  7. package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
  8. package/src/duckdb/src/core_functions/scalar/map/map.cpp +66 -32
  9. package/src/duckdb/src/execution/expression_executor/execute_reference.cpp +1 -1
  10. package/src/duckdb/src/execution/expression_executor_state.cpp +8 -2
  11. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +41 -48
  12. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +13 -9
  13. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +22 -24
  14. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +6 -11
  15. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +8 -3
  16. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +5 -9
  17. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +8 -13
  18. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +2 -2
  19. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +1 -4
  20. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +2 -4
  21. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +4 -6
  22. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -1
  23. package/src/duckdb/src/function/table/read_csv.cpp +1 -1
  24. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  25. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
  26. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
  27. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +12 -10
  28. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp +28 -0
  29. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +9 -14
  30. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +20 -6
  31. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +1 -1
  32. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  33. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -2
  34. package/src/duckdb/src/include/duckdb.h +5 -5
  35. package/src/duckdb/src/main/config.cpp +14 -0
  36. package/src/duckdb/src/main/extension/extension_helper.cpp +7 -0
  37. package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +2 -2
  38. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +5 -7
  39. package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +4 -2
  40. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +17 -14
  41. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +5 -12
  42. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +3 -0
  43. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +13 -9
  44. package/src/duckdb/third_party/parquet/parquet_types.h +2 -1
@@ -97,9 +97,14 @@ void CSVSniffer::DetectHeader() {
97
97
  bool first_row_consistent = true;
98
98
  // check if header row is all null and/or consistent with detected column data types
99
99
  bool first_row_nulls = true;
100
- // This case will fail in dialect detection, so we assert here just for sanity
101
- D_ASSERT(best_candidate->options.null_padding ||
102
- best_sql_types_candidates_per_column_idx.size() == best_header_row.size());
100
+ // If null-padding is not allowed and there is a mismatch between our header candidate and the number of columns
101
+ // We can't detect the dialect/type options properly
102
+ if (!best_candidate->options.null_padding &&
103
+ best_sql_types_candidates_per_column_idx.size() != best_header_row.size()) {
104
+ throw InvalidInputException(
105
+ "Error in file \"%s\": CSV options could not be auto-detected. Consider setting parser options manually.",
106
+ options.file_path);
107
+ }
103
108
  for (idx_t col = 0; col < best_header_row.size(); col++) {
104
109
  auto dummy_val = best_header_row[col];
105
110
  if (!dummy_val.IsNull()) {
@@ -143,20 +143,17 @@ struct SniffValue {
143
143
  machine.rows_read++;
144
144
  }
145
145
 
146
- if ((machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE) ||
146
+ if ((machine.previous_state == CSVState::RECORD_SEPARATOR) ||
147
147
  (machine.state != CSVState::RECORD_SEPARATOR && machine.previous_state == CSVState::CARRIAGE_RETURN)) {
148
148
  sniffed_values[machine.cur_rows].position = machine.line_start_pos;
149
149
  sniffed_values[machine.cur_rows].set = true;
150
150
  machine.line_start_pos = current_pos;
151
151
  }
152
- machine.pre_previous_state = machine.previous_state;
153
- machine.previous_state = machine.state;
154
- machine.state = static_cast<CSVState>(
155
- machine.transition_array[static_cast<uint8_t>(machine.state)][static_cast<uint8_t>(current_char)]);
152
+
153
+ machine.Transition(current_char);
156
154
 
157
155
  bool carriage_return = machine.previous_state == CSVState::CARRIAGE_RETURN;
158
- if (machine.previous_state == CSVState::DELIMITER ||
159
- (machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE) ||
156
+ if (machine.previous_state == CSVState::DELIMITER || (machine.previous_state == CSVState::RECORD_SEPARATOR) ||
160
157
  (machine.state != CSVState::RECORD_SEPARATOR && carriage_return)) {
161
158
  // Started a new value
162
159
  // Check if it's UTF-8
@@ -175,8 +172,7 @@ struct SniffValue {
175
172
  (machine.state == CSVState::QUOTED && machine.previous_state == CSVState::QUOTED)) {
176
173
  machine.value += current_char;
177
174
  }
178
- machine.cur_rows +=
179
- machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE;
175
+ machine.cur_rows += machine.previous_state == CSVState::RECORD_SEPARATOR;
180
176
  // It means our carriage return is actually a record separator
181
177
  machine.cur_rows += machine.state != CSVState::RECORD_SEPARATOR && carriage_return;
182
178
  if (machine.cur_rows >= sniffed_values.size()) {
@@ -3,9 +3,9 @@
3
3
  namespace duckdb {
4
4
  struct Parse {
5
5
  inline static void Initialize(CSVStateMachine &machine) {
6
- machine.state = CSVState::STANDARD;
7
- machine.previous_state = CSVState::STANDARD;
8
- machine.pre_previous_state = CSVState::STANDARD;
6
+ machine.state = CSVState::EMPTY_LINE;
7
+ machine.previous_state = CSVState::EMPTY_LINE;
8
+ machine.pre_previous_state = CSVState::EMPTY_LINE;
9
9
 
10
10
  machine.cur_rows = 0;
11
11
  machine.column_count = 0;
@@ -14,22 +14,18 @@ struct Parse {
14
14
 
15
15
  inline static bool Process(CSVStateMachine &machine, DataChunk &parse_chunk, char current_char, idx_t current_pos) {
16
16
 
17
- machine.pre_previous_state = machine.previous_state;
18
- machine.previous_state = machine.state;
19
- machine.state = static_cast<CSVState>(
20
- machine.transition_array[static_cast<uint8_t>(machine.state)][static_cast<uint8_t>(current_char)]);
17
+ machine.Transition(current_char);
21
18
 
22
19
  bool carriage_return = machine.previous_state == CSVState::CARRIAGE_RETURN;
23
- if (machine.previous_state == CSVState::DELIMITER ||
24
- (machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE) ||
20
+ if (machine.previous_state == CSVState::DELIMITER || (machine.previous_state == CSVState::RECORD_SEPARATOR) ||
25
21
  (machine.state != CSVState::RECORD_SEPARATOR && carriage_return)) {
26
22
  // Started a new value
27
23
  // Check if it's UTF-8 (Or not?)
28
24
  machine.VerifyUTF8();
29
25
  auto &v = parse_chunk.data[machine.column_count++];
30
26
  auto parse_data = FlatVector::GetData<string_t>(v);
31
- auto &validity_mask = FlatVector::Validity(v);
32
27
  if (machine.value.empty()) {
28
+ auto &validity_mask = FlatVector::Validity(v);
33
29
  validity_mask.SetInvalid(machine.cur_rows);
34
30
  } else {
35
31
  parse_data[machine.cur_rows] = StringVector::AddStringOrBlob(v, string_t(machine.value));
@@ -50,12 +46,11 @@ struct Parse {
50
46
  (machine.state == CSVState::QUOTED && machine.previous_state == CSVState::QUOTED)) {
51
47
  machine.value += current_char;
52
48
  }
53
- machine.cur_rows +=
54
- machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE;
49
+ machine.cur_rows += machine.previous_state == CSVState::RECORD_SEPARATOR && machine.column_count > 0;
55
50
  machine.column_count -= machine.column_count * (machine.previous_state == CSVState::RECORD_SEPARATOR);
56
51
 
57
52
  // It means our carriage return is actually a record separator
58
- machine.cur_rows += machine.state != CSVState::RECORD_SEPARATOR && carriage_return;
53
+ machine.cur_rows += machine.state != CSVState::RECORD_SEPARATOR && carriage_return && machine.column_count > 0;
59
54
  machine.column_count -= machine.column_count * (machine.state != CSVState::RECORD_SEPARATOR && carriage_return);
60
55
 
61
56
  if (machine.cur_rows >= STANDARD_VECTOR_SIZE) {
@@ -14,7 +14,7 @@ void CSVSniffer::ReplaceTypes() {
14
14
  for (idx_t i = 0; i < names.size(); i++) {
15
15
  auto it = best_candidate->options.sql_types_per_column.find(names[i]);
16
16
  if (it != best_candidate->options.sql_types_per_column.end()) {
17
- best_sql_types_candidates_per_column_idx[i] = {best_candidate->options.sql_type_list[it->second]};
17
+ detected_types[i] = best_candidate->options.sql_type_list[it->second];
18
18
  found++;
19
19
  }
20
20
  }
@@ -33,7 +33,7 @@ void CSVSniffer::ReplaceTypes() {
33
33
  best_candidate->options.sql_type_list.size(), names.size());
34
34
  }
35
35
  for (idx_t i = 0; i < best_candidate->options.sql_type_list.size(); i++) {
36
- best_sql_types_candidates_per_column_idx[i] = {best_candidate->options.sql_type_list[i]};
36
+ detected_types[i] = best_candidate->options.sql_type_list[i];
37
37
  }
38
38
  }
39
39
  } // namespace duckdb
@@ -21,10 +21,7 @@ void PhysicalReset::ResetExtensionVariable(ExecutionContext &context, DBConfig &
21
21
 
22
22
  SourceResultType PhysicalReset::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
23
23
  auto &config = DBConfig::GetConfig(context.client);
24
- if (config.options.lock_configuration) {
25
- throw InvalidInputException("Cannot reset configuration option \"%s\" - the configuration has been locked",
26
- name);
27
- }
24
+ config.CheckLock(name);
28
25
  auto option = DBConfig::GetOptionByName(name);
29
26
  if (!option) {
30
27
  // check if this is an extra extension variable
@@ -24,10 +24,8 @@ void PhysicalSet::SetExtensionVariable(ClientContext &context, ExtensionOption &
24
24
 
25
25
  SourceResultType PhysicalSet::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
26
26
  auto &config = DBConfig::GetConfig(context.client);
27
- if (config.options.lock_configuration) {
28
- throw InvalidInputException("Cannot change configuration option \"%s\" - the configuration has been locked",
29
- name);
30
- }
27
+ // check if we are allowed to change the configuration option
28
+ config.CheckLock(name);
31
29
  auto option = DBConfig::GetOptionByName(name);
32
30
  if (!option) {
33
31
  // check if this is an extra extension variable
@@ -298,12 +298,10 @@ void PerfectAggregateHashTable::Destroy() {
298
298
  RowOperationsState row_state(*aggregate_allocator);
299
299
  data_ptr_t payload_ptr = data;
300
300
  for (idx_t i = 0; i < total_groups; i++) {
301
- if (group_is_set[i]) {
302
- data_pointers[count++] = payload_ptr;
303
- if (count == STANDARD_VECTOR_SIZE) {
304
- RowOperations::DestroyStates(row_state, layout, addresses, count);
305
- count = 0;
306
- }
301
+ data_pointers[count++] = payload_ptr;
302
+ if (count == STANDARD_VECTOR_SIZE) {
303
+ RowOperations::DestroyStates(row_state, layout, addresses, count);
304
+ count = 0;
307
305
  }
308
306
  payload_ptr += tuple_size;
309
307
  }
@@ -261,7 +261,7 @@ idx_t RadixHTConfig::ExternalRadixBits(const idx_t &maximum_sink_radix_bits_p) {
261
261
  idx_t RadixHTConfig::SinkCapacity(ClientContext &context) {
262
262
  // Get active and maximum number of threads
263
263
  const idx_t active_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
264
- const auto max_threads = DBConfig::GetSystemMaxThreads(FileSystem::GetFileSystem(context));
264
+ const auto max_threads = DBConfig::GetConfig(context).options.maximum_threads;
265
265
 
266
266
  // Compute cache size per active thread (assuming cache is shared)
267
267
  const auto total_shared_cache_size = max_threads * L3_CACHE_SIZE;
@@ -38,7 +38,7 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
38
38
  auto number_of_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
39
39
  //! If we have many csv files, we run single-threaded on each file and parallelize on the number of files
40
40
  bool many_csv_files = files.size() > 1 && int64_t(files.size() * 2) >= number_of_threads;
41
- if (options.parallel_mode != ParallelMode::PARALLEL && many_csv_files) {
41
+ if (options.parallel_mode != ParallelMode::PARALLEL && (many_csv_files || number_of_threads == 1)) {
42
42
  single_threaded = true;
43
43
  }
44
44
  if (options.parallel_mode == ParallelMode::SINGLE_THREADED || not_supported_options ||
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.9.0"
2
+ #define DUCKDB_VERSION "v0.9.1-dev143"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "0d84ccf478"
5
+ #define DUCKDB_SOURCE_ID "e2649c46be"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
  #include <type_traits>
3
3
  #include <cstdint>
4
+ #include <atomic>
4
5
 
5
6
  #include "duckdb/common/vector.hpp"
6
7
  #include "duckdb/common/unordered_map.hpp"
@@ -33,7 +33,7 @@ struct ExpressionState {
33
33
 
34
34
  public:
35
35
  void AddChild(Expression *expr);
36
- void Finalize();
36
+ void Finalize(bool empty = false);
37
37
  Allocator &GetAllocator();
38
38
  bool HasContext();
39
39
  DUCKDB_API ClientContext &GetContext();
@@ -34,9 +34,9 @@ public:
34
34
  //! CSV Sniffing consists of five steps:
35
35
  //! 1. Dialect Detection: Generate the CSV Options (delimiter, quote, escape, etc.)
36
36
  //! 2. Type Detection: Figures out the types of the columns (For one chunk)
37
- //! 3. Header Detection: Figures out if the CSV file has a header and produces the names of the columns
38
- //! 4. Type Replacement: Replaces the types of the columns if the user specified them
39
- //! 5. Type Refinement: Refines the types of the columns for the remaining chunks
37
+ //! 3. Type Refinement: Refines the types of the columns for the remaining chunks
38
+ //! 4. Header Detection: Figures out if the CSV file has a header and produces the names of the columns
39
+ //! 5. Type Replacement: Replaces the types of the columns if the user specified them
40
40
  SnifferResult SniffCSV();
41
41
 
42
42
  private:
@@ -50,6 +50,8 @@ private:
50
50
  CSVReaderOptions &options;
51
51
  //! Buffer being used on sniffer
52
52
  shared_ptr<CSVBufferManager> buffer_manager;
53
+ //! Sets the result options
54
+ void SetResultOptions();
53
55
 
54
56
  //! ------------------------------------------------------//
55
57
  //! ----------------- Dialect Detection ----------------- //
@@ -105,6 +107,13 @@ private:
105
107
  idx_t best_start_without_header = 0;
106
108
  vector<Value> best_header_row;
107
109
 
110
+ //! ------------------------------------------------------//
111
+ //! ------------------ Type Refinement ------------------ //
112
+ //! ------------------------------------------------------//
113
+ void RefineTypes();
114
+ bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type);
115
+ vector<LogicalType> detected_types;
116
+
108
117
  //! ------------------------------------------------------//
109
118
  //! ------------------ Header Detection ----------------- //
110
119
  //! ------------------------------------------------------//
@@ -117,13 +126,6 @@ private:
117
126
  //! ------------------ Type Replacement ----------------- //
118
127
  //! ------------------------------------------------------//
119
128
  void ReplaceTypes();
120
-
121
- //! ------------------------------------------------------//
122
- //! ------------------ Type Refinement ------------------ //
123
- //! ------------------------------------------------------//
124
- void RefineTypes();
125
- bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type);
126
- vector<LogicalType> detected_types;
127
129
  };
128
130
 
129
131
  } // namespace duckdb
@@ -0,0 +1,28 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/operator/scan/csv/csv_state.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include <cstdint>
12
+
13
+ namespace duckdb {
14
+
15
+ //! All States of CSV Parsing
16
+ enum class CSVState : uint8_t {
17
+ STANDARD = 0, //! Regular unquoted field state
18
+ DELIMITER = 1, //! State after encountering a field separator (e.g., ;)
19
+ RECORD_SEPARATOR = 2, //! State after encountering a record separator (i.e., \n)
20
+ CARRIAGE_RETURN = 3, //! State after encountering a carriage return(i.e., \r)
21
+ QUOTED = 4, //! State when inside a quoted field
22
+ UNQUOTED = 5, //! State when leaving a quoted field
23
+ ESCAPE = 6, //! State when encountering an escape character (e.g., \)
24
+ EMPTY_LINE = 7, //! State when encountering an empty line (i.e., \r\r \n\n, \n\r)
25
+ INVALID = 8 //! Got to an Invalid State, this should error.
26
+ };
27
+
28
+ } // namespace duckdb
@@ -14,19 +14,6 @@
14
14
 
15
15
  namespace duckdb {
16
16
 
17
- //! All States of CSV Parsing
18
- enum class CSVState : uint8_t {
19
- STANDARD = 0, //! Regular unquoted field state
20
- DELIMITER = 1, //! State after encountering a field separator (e.g., ;)
21
- RECORD_SEPARATOR = 2, //! State after encountering a record separator (i.e., \n)
22
- CARRIAGE_RETURN = 3, //! State after encountering a carriage return(i.e., \r)
23
- QUOTED = 4, //! State when inside a quoted field
24
- UNQUOTED = 5, //! State when leaving a quoted field
25
- ESCAPE = 6, //! State when encountering an escape character (e.g., \)
26
- EMPTY_LINE = 7, //! State when encountering an empty line (i.e., \r\r \n\n, \n\r)
27
- INVALID = 8 //! Got to an Invalid State, this should error.
28
- };
29
-
30
17
  //! The CSV State Machine comprises a state transition array (STA).
31
18
  //! The STA indicates the current state of parsing based on both the current and preceding characters.
32
19
  //! This reveals whether we are dealing with a Field, a New Line, a Delimiter, and so forth.
@@ -38,6 +25,14 @@ public:
38
25
  explicit CSVStateMachine(CSVReaderOptions &options_p, const CSVStateMachineOptions &state_machine_options,
39
26
  shared_ptr<CSVBufferManager> buffer_manager_p,
40
27
  CSVStateMachineCache &csv_state_machine_cache_p);
28
+
29
+ //! Transition all states to next state, that depends on the current char
30
+ inline void Transition(char current_char) {
31
+ pre_previous_state = previous_state;
32
+ previous_state = state;
33
+ state = transition_array[state][static_cast<uint8_t>(current_char)];
34
+ }
35
+
41
36
  //! Resets the state machine, so it can be used again
42
37
  void Reset();
43
38
 
@@ -52,7 +47,7 @@ public:
52
47
  idx_t start_row = 0;
53
48
  //! The Transition Array is a Finite State Machine
54
49
  //! It holds the transitions of all states, on all 256 possible different characters
55
- const state_machine_t &transition_array;
50
+ const StateMachine &transition_array;
56
51
 
57
52
  //! Both these variables are used for new line identifier detection
58
53
  bool single_record_separator = false;
@@ -8,14 +8,28 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
11
+ #include "duckdb/execution/operator/scan/csv/csv_state.hpp"
12
12
  #include "duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp"
13
+ #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
13
14
  #include "duckdb/execution/operator/scan/csv/quote_rules.hpp"
14
15
 
15
16
  namespace duckdb {
16
- static constexpr uint32_t NUM_STATES = 9;
17
- static constexpr uint32_t NUM_TRANSITIONS = 256;
18
- typedef uint8_t state_machine_t[NUM_STATES][NUM_TRANSITIONS];
17
+
18
+ //! Class to wrap the state machine matrix
19
+ class StateMachine {
20
+ public:
21
+ static constexpr uint32_t NUM_STATES = 9;
22
+ static constexpr uint32_t NUM_TRANSITIONS = 256;
23
+ CSVState state_machine[NUM_STATES][NUM_TRANSITIONS];
24
+
25
+ const CSVState *operator[](CSVState state) const {
26
+ return state_machine[static_cast<uint8_t>(state)];
27
+ }
28
+
29
+ CSVState *operator[](CSVState state) {
30
+ return state_machine[static_cast<uint8_t>(state)];
31
+ }
32
+ };
19
33
 
20
34
  //! Hash function used in out state machine cache, it hashes and combines all options used to generate a state machine
21
35
  struct HashCSVStateMachineConfig {
@@ -36,12 +50,12 @@ public:
36
50
  ~CSVStateMachineCache() {};
37
51
  //! Gets a state machine from the cache, if it's not from one the default options
38
52
  //! It first caches it, then returns it.
39
- const state_machine_t &Get(const CSVStateMachineOptions &state_machine_options);
53
+ const StateMachine &Get(const CSVStateMachineOptions &state_machine_options);
40
54
 
41
55
  private:
42
56
  void Insert(const CSVStateMachineOptions &state_machine_options);
43
57
  //! Cache on delimiter|quote|escape
44
- unordered_map<CSVStateMachineOptions, state_machine_t, HashCSVStateMachineConfig> state_machine_cache;
58
+ unordered_map<CSVStateMachineOptions, StateMachine, HashCSVStateMachineConfig> state_machine_cache;
45
59
  //! Default value for options used to intialize CSV State Machine Cache
46
60
  const vector<char> default_delimiter = {',', '|', ';', '\t'};
47
61
  const vector<vector<char>> default_quote = {{'\"'}, {'\"', '\''}, {'\0'}};
@@ -148,7 +148,7 @@ private:
148
148
  //! Sets Position depending on the byte_start of this thread
149
149
  bool SetPosition();
150
150
  //! Called when scanning the 1st buffer, skips empty lines
151
- void SkipEmptyLines();
151
+ bool SkipEmptyLines();
152
152
  //! When a buffer finishes reading its piece, it still can try to scan up to the real end of the buffer
153
153
  //! Up to finding a new line. This function sets the buffer_end and marks a boolean variable
154
154
  //! when changing the buffer end the first time.
@@ -240,6 +240,8 @@ public:
240
240
  DUCKDB_API void SetOption(const string &name, Value value);
241
241
  DUCKDB_API void ResetOption(const string &name);
242
242
 
243
+ DUCKDB_API void CheckLock(const string &name);
244
+
243
245
  DUCKDB_API static idx_t ParseMemoryLimit(const string &arg);
244
246
 
245
247
  //! Return the list of possible compression functions for the specific physical type
@@ -90,8 +90,8 @@ public:
90
90
  void QualifyColumnNames(unique_ptr<ParsedExpression> &expr);
91
91
  static void QualifyColumnNames(Binder &binder, unique_ptr<ParsedExpression> &expr);
92
92
 
93
- static unique_ptr<Expression> PushCollation(ClientContext &context, unique_ptr<Expression> source,
94
- const string &collation, bool equality_only = false);
93
+ static bool PushCollation(ClientContext &context, unique_ptr<Expression> &source, const LogicalType &sql_type,
94
+ bool equality_only = false);
95
95
  static void TestCollation(ClientContext &context, const string &collation);
96
96
 
97
97
  bool BindCorrelatedColumns(unique_ptr<ParsedExpression> &expr);
@@ -317,7 +317,7 @@ typedef enum {
317
317
  //===--------------------------------------------------------------------===//
318
318
 
319
319
  /*!
320
- Creates a new database or opens an existing database file stored at the the given path.
320
+ Creates a new database or opens an existing database file stored at the given path.
321
321
  If no path is given a new in-memory database is created instead.
322
322
  The instantiated database should be closed with 'duckdb_close'
323
323
 
@@ -328,7 +328,7 @@ The instantiated database should be closed with 'duckdb_close'
328
328
  DUCKDB_API duckdb_state duckdb_open(const char *path, duckdb_database *out_database);
329
329
 
330
330
  /*!
331
- Extended version of duckdb_open. Creates a new database or opens an existing database file stored at the the given path.
331
+ Extended version of duckdb_open. Creates a new database or opens an existing database file stored at the given path.
332
332
 
333
333
  * path: Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database.
334
334
  * out_database: The result database object.
@@ -1009,7 +1009,7 @@ Binds an int64_t value to the prepared statement at the specified index.
1009
1009
  DUCKDB_API duckdb_state duckdb_bind_int64(duckdb_prepared_statement prepared_statement, idx_t param_idx, int64_t val);
1010
1010
 
1011
1011
  /*!
1012
- Binds an duckdb_hugeint value to the prepared statement at the specified index.
1012
+ Binds a duckdb_hugeint value to the prepared statement at the specified index.
1013
1013
  */
1014
1014
  DUCKDB_API duckdb_state duckdb_bind_hugeint(duckdb_prepared_statement prepared_statement, idx_t param_idx,
1015
1015
  duckdb_hugeint val);
@@ -1040,12 +1040,12 @@ Binds an uint64_t value to the prepared statement at the specified index.
1040
1040
  DUCKDB_API duckdb_state duckdb_bind_uint64(duckdb_prepared_statement prepared_statement, idx_t param_idx, uint64_t val);
1041
1041
 
1042
1042
  /*!
1043
- Binds an float value to the prepared statement at the specified index.
1043
+ Binds a float value to the prepared statement at the specified index.
1044
1044
  */
1045
1045
  DUCKDB_API duckdb_state duckdb_bind_float(duckdb_prepared_statement prepared_statement, idx_t param_idx, float val);
1046
1046
 
1047
1047
  /*!
1048
- Binds an double value to the prepared statement at the specified index.
1048
+ Binds a double value to the prepared statement at the specified index.
1049
1049
  */
1050
1050
  DUCKDB_API duckdb_state duckdb_bind_double(duckdb_prepared_statement prepared_statement, idx_t param_idx, double val);
1051
1051
 
@@ -233,6 +233,20 @@ void DBConfig::SetDefaultMaxMemory() {
233
233
  }
234
234
  }
235
235
 
236
+ void DBConfig::CheckLock(const string &name) {
237
+ if (!options.lock_configuration) {
238
+ // not locked
239
+ return;
240
+ }
241
+ case_insensitive_set_t allowed_settings {"schema", "search_path"};
242
+ if (allowed_settings.find(name) != allowed_settings.end()) {
243
+ // we are always allowed to change these settings
244
+ return;
245
+ }
246
+ // not allowed!
247
+ throw InvalidInputException("Cannot change configuration option \"%s\" - the configuration has been locked", name);
248
+ }
249
+
236
250
  idx_t CGroupBandwidthQuota(idx_t physical_cores, FileSystem &fs) {
237
251
  static constexpr const char *CPU_MAX = "/sys/fs/cgroup/cpu.max";
238
252
  static constexpr const char *CFS_QUOTA = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us";
@@ -196,6 +196,9 @@ string ExtensionHelper::AddExtensionInstallHintToErrorMsg(ClientContext &context
196
196
  }
197
197
 
198
198
  bool ExtensionHelper::TryAutoLoadExtension(ClientContext &context, const string &extension_name) noexcept {
199
+ if (context.db->ExtensionIsLoaded(extension_name)) {
200
+ return true;
201
+ }
199
202
  auto &dbconfig = DBConfig::GetConfig(context);
200
203
  try {
201
204
  if (dbconfig.options.autoinstall_known_extensions) {
@@ -211,6 +214,10 @@ bool ExtensionHelper::TryAutoLoadExtension(ClientContext &context, const string
211
214
  }
212
215
 
213
216
  void ExtensionHelper::AutoLoadExtension(ClientContext &context, const string &extension_name) {
217
+ if (context.db->ExtensionIsLoaded(extension_name)) {
218
+ // Avoid downloading again
219
+ return;
220
+ }
214
221
  auto &dbconfig = DBConfig::GetConfig(context);
215
222
  try {
216
223
  #ifndef DUCKDB_WASM
@@ -38,8 +38,8 @@ void CommonAggregateOptimizer::ExtractCommonAggregates(LogicalAggregate &aggr) {
38
38
  // aggregate does not exist yet: add it to the map
39
39
  aggregate_remap[*aggr.expressions[i]] = i;
40
40
  if (i != original_index) {
41
- // this aggregate is not erased, however an agregate BEFORE it has been erased
42
- // so we need to remap this aggregaet
41
+ // this aggregate is not erased, however an aggregate BEFORE it has been erased
42
+ // so we need to remap this aggregate
43
43
  ColumnBinding original_binding(aggr.aggregate_index, original_index);
44
44
  ColumnBinding new_binding(aggr.aggregate_index, i);
45
45
  aggregate_map[original_binding] = new_binding;
@@ -34,13 +34,11 @@ BindResult ExpressionBinder::BindExpression(BetweenExpression &expr, idx_t depth
34
34
  input = BoundCastExpression::AddCastToType(context, std::move(input), input_type);
35
35
  lower = BoundCastExpression::AddCastToType(context, std::move(lower), input_type);
36
36
  upper = BoundCastExpression::AddCastToType(context, std::move(upper), input_type);
37
- if (input_type.id() == LogicalTypeId::VARCHAR) {
38
- // handle collation
39
- auto collation = StringType::GetCollation(input_type);
40
- input = PushCollation(context, std::move(input), collation, false);
41
- lower = PushCollation(context, std::move(lower), collation, false);
42
- upper = PushCollation(context, std::move(upper), collation, false);
43
- }
37
+ // handle collation
38
+ PushCollation(context, input, input_type, false);
39
+ PushCollation(context, lower, input_type, false);
40
+ PushCollation(context, upper, input_type, false);
41
+
44
42
  if (!input->HasSideEffects() && !input->HasParameter() && !input->HasSubquery()) {
45
43
  // the expression does not have side effects and can be copied: create two comparisons
46
44
  // the reason we do this is that individual comparisons are easier to handle in optimizers
@@ -18,8 +18,10 @@ BindResult ExpressionBinder::BindExpression(CollateExpression &expr, idx_t depth
18
18
  throw BinderException("collations are only supported for type varchar");
19
19
  }
20
20
  // Validate the collation, but don't use it
21
- PushCollation(context, child->Copy(), expr.collation, false);
22
- child->return_type = LogicalType::VARCHAR_COLLATION(expr.collation);
21
+ auto child_copy = child->Copy();
22
+ auto collation_type = LogicalType::VARCHAR_COLLATION(expr.collation);
23
+ PushCollation(context, child_copy, collation_type, false);
24
+ child->return_type = collation_type;
23
25
  return BindResult(std::move(child));
24
26
  }
25
27
 
@@ -18,20 +18,25 @@
18
18
 
19
19
  namespace duckdb {
20
20
 
21
- unique_ptr<Expression> ExpressionBinder::PushCollation(ClientContext &context, unique_ptr<Expression> source,
22
- const string &collation_p, bool equality_only) {
21
+ bool ExpressionBinder::PushCollation(ClientContext &context, unique_ptr<Expression> &source,
22
+ const LogicalType &sql_type, bool equality_only) {
23
+ if (sql_type.id() != LogicalTypeId::VARCHAR) {
24
+ // only VARCHAR columns require collation
25
+ return false;
26
+ }
23
27
  // replace default collation with system collation
28
+ auto str_collation = StringType::GetCollation(sql_type);
24
29
  string collation;
25
- if (collation_p.empty()) {
30
+ if (str_collation.empty()) {
26
31
  collation = DBConfig::GetConfig(context).options.collation;
27
32
  } else {
28
- collation = collation_p;
33
+ collation = str_collation;
29
34
  }
30
35
  collation = StringUtil::Lower(collation);
31
36
  // bind the collation
32
37
  if (collation.empty() || collation == "binary" || collation == "c" || collation == "posix") {
33
- // binary collation: just skip
34
- return source;
38
+ // no collation or binary collation: skip
39
+ return false;
35
40
  }
36
41
  auto &catalog = Catalog::GetSystemCatalog(context);
37
42
  auto splits = StringUtil::Split(StringUtil::Lower(collation), ".");
@@ -60,11 +65,12 @@ unique_ptr<Expression> ExpressionBinder::PushCollation(ClientContext &context, u
60
65
  auto function = function_binder.BindScalarFunction(collation_entry.function, std::move(children));
61
66
  source = std::move(function);
62
67
  }
63
- return source;
68
+ return true;
64
69
  }
65
70
 
66
71
  void ExpressionBinder::TestCollation(ClientContext &context, const string &collation) {
67
- PushCollation(context, make_uniq<BoundConstantExpression>(Value("")), collation);
72
+ auto expr = make_uniq_base<Expression, BoundConstantExpression>(Value(""));
73
+ PushCollation(context, expr, LogicalType::VARCHAR_COLLATION(collation));
68
74
  }
69
75
 
70
76
  LogicalType BoundComparisonExpression::BindComparison(LogicalType left_type, LogicalType right_type) {
@@ -134,12 +140,9 @@ BindResult ExpressionBinder::BindExpression(ComparisonExpression &expr, idx_t de
134
140
  right = BoundCastExpression::AddCastToType(context, std::move(right), input_type,
135
141
  input_type.id() == LogicalTypeId::ENUM);
136
142
 
137
- if (input_type.id() == LogicalTypeId::VARCHAR) {
138
- // handle collation
139
- auto collation = StringType::GetCollation(input_type);
140
- left = PushCollation(context, std::move(left), collation, expr.type == ExpressionType::COMPARE_EQUAL);
141
- right = PushCollation(context, std::move(right), collation, expr.type == ExpressionType::COMPARE_EQUAL);
142
- }
143
+ PushCollation(context, left, input_type, expr.type == ExpressionType::COMPARE_EQUAL);
144
+ PushCollation(context, right, input_type, expr.type == ExpressionType::COMPARE_EQUAL);
145
+
143
146
  // now create the bound comparison expression
144
147
  return BindResult(make_uniq<BoundComparisonExpression>(expr.type, std::move(left), std::move(right)));
145
148
  }