duckdb 0.7.2-dev2995.0 → 0.7.2-dev3117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +8 -1
  5. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +1 -3
  6. package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -3
  7. package/src/duckdb/extension/json/json_functions/json_transform.cpp +3 -2
  8. package/src/duckdb/extension/parquet/parquet-extension.cpp +9 -7
  9. package/src/duckdb/src/common/enum_util.cpp +5908 -0
  10. package/src/duckdb/src/common/enums/expression_type.cpp +216 -4
  11. package/src/duckdb/src/common/enums/join_type.cpp +6 -5
  12. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  13. package/src/duckdb/src/common/exception.cpp +1 -1
  14. package/src/duckdb/src/common/exception_format_value.cpp +2 -2
  15. package/src/duckdb/src/common/multi_file_reader.cpp +14 -0
  16. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +143 -0
  17. package/src/duckdb/src/common/serializer/binary_serializer.cpp +160 -0
  18. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +3 -3
  19. package/src/duckdb/src/common/types.cpp +11 -10
  20. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +4 -4
  21. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +2 -1
  22. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +2 -3
  23. package/src/duckdb/src/execution/aggregate_hashtable.cpp +3 -3
  24. package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
  25. package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +2 -2
  26. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -2
  27. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +2 -1
  28. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -1
  29. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +2 -1
  30. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +165 -0
  31. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +1 -1
  32. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +9 -7
  33. package/src/duckdb/src/execution/partitionable_hashtable.cpp +2 -2
  34. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +25 -4
  35. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +2 -1
  36. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -1
  37. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +5 -4
  38. package/src/duckdb/src/function/table/copy_csv.cpp +85 -29
  39. package/src/duckdb/src/function/table/read_csv.cpp +17 -11
  40. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +2 -1
  41. package/src/duckdb/src/function/table/system/duckdb_types.cpp +2 -1
  42. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  43. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +958 -0
  44. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +3 -3
  45. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  46. package/src/duckdb/src/include/duckdb/common/exception.hpp +4 -4
  47. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +3 -2
  48. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +44 -0
  49. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +93 -0
  50. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +92 -0
  51. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +7 -3
  52. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +2 -2
  53. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -1
  54. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
  56. package/src/duckdb/src/include/duckdb/common/vector.hpp +61 -14
  57. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +3 -2
  58. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +2 -2
  59. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +3 -3
  61. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +68 -0
  62. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -0
  63. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -3
  64. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +32 -4
  66. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -2
  67. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  68. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -3
  69. package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +24 -0
  70. package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +1 -0
  71. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +1 -0
  72. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
  75. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +26 -0
  77. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  78. package/src/duckdb/src/main/client_context.cpp +1 -0
  79. package/src/duckdb/src/main/client_verify.cpp +1 -0
  80. package/src/duckdb/src/main/database.cpp +11 -23
  81. package/src/duckdb/src/main/database_path_and_type.cpp +23 -0
  82. package/src/duckdb/src/main/relation/join_relation.cpp +2 -1
  83. package/src/duckdb/src/main/relation/setop_relation.cpp +2 -3
  84. package/src/duckdb/src/parser/expression/window_expression.cpp +1 -1
  85. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +2 -2
  86. package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
  87. package/src/duckdb/src/parser/result_modifier.cpp +2 -2
  88. package/src/duckdb/src/parser/statement/select_statement.cpp +0 -44
  89. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -3
  90. package/src/duckdb/src/parser/tableref.cpp +1 -1
  91. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -3
  92. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +6 -0
  93. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +4 -1
  94. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +7 -0
  95. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +1 -1
  96. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +2 -2
  97. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
  98. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +20 -0
  99. package/src/duckdb/src/verification/statement_verifier.cpp +3 -0
  100. package/src/duckdb/ub_src_common.cpp +2 -2
  101. package/src/duckdb/ub_src_common_serializer.cpp +4 -2
  102. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  103. package/src/duckdb/ub_src_main.cpp +2 -0
  104. package/src/duckdb/src/common/serializer/enum_serializer.cpp +0 -1180
  105. package/src/duckdb/src/common/vector.cpp +0 -12
  106. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +0 -113
@@ -32,7 +32,7 @@ public:
32
32
  vector<LogicalType> group_types_p, vector<LogicalType> payload_types_p,
33
33
  vector<BoundAggregateExpression *> bindings_p);
34
34
 
35
- idx_t AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition, const vector<idx_t> &filter);
35
+ idx_t AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition, const unsafe_vector<idx_t> &filter);
36
36
  void Partition();
37
37
  bool IsPartitioned();
38
38
 
@@ -51,7 +51,7 @@ private:
51
51
  bool is_partitioned;
52
52
  RadixPartitionInfo &partition_info;
53
53
  vector<SelectionVector> sel_vectors;
54
- vector<idx_t> sel_vector_sizes;
54
+ unsafe_vector<idx_t> sel_vector_sizes;
55
55
  DataChunk group_subset, payload_subset;
56
56
  Vector hashes, hashes_subset;
57
57
  AggregateHTAppendState append_state;
@@ -62,7 +62,7 @@ private:
62
62
 
63
63
  private:
64
64
  idx_t ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes, DataChunk &payload,
65
- const vector<idx_t> &filter);
65
+ const unsafe_vector<idx_t> &filter);
66
66
  //! Returns the HT entry size used for intermediate hash tables
67
67
  HtEntryType GetHTEntrySize();
68
68
  };
@@ -26,7 +26,7 @@ public:
26
26
 
27
27
  GroupingSet &grouping_set;
28
28
  //! The indices specified in the groups_count that do not appear in the grouping_set
29
- vector<idx_t> null_groups;
29
+ unsafe_vector<idx_t> null_groups;
30
30
  const GroupedAggregateData &op;
31
31
 
32
32
  vector<LogicalType> group_types;
@@ -42,7 +42,7 @@ public:
42
42
  unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const;
43
43
 
44
44
  void Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input, DataChunk &aggregate_input_chunk,
45
- const vector<idx_t> &filter) const;
45
+ const unsafe_vector<idx_t> &filter) const;
46
46
  void Combine(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate) const;
47
47
  bool Finalize(ClientContext &context, GlobalSinkState &gstate_p) const;
48
48
 
@@ -17,6 +17,7 @@ namespace duckdb {
17
17
 
18
18
  class Binder;
19
19
  struct BoundStatement;
20
+ class ColumnDataCollection;
20
21
  class ExecutionContext;
21
22
 
22
23
  struct LocalFunctionData {
@@ -51,6 +52,24 @@ struct GlobalFunctionData {
51
52
  }
52
53
  };
53
54
 
55
+ struct PreparedBatchData {
56
+ virtual ~PreparedBatchData() {
57
+ }
58
+
59
+ template <class TARGET>
60
+ TARGET &Cast() {
61
+ D_ASSERT(dynamic_cast<TARGET *>(this));
62
+ return (TARGET &)*this;
63
+ }
64
+ template <class TARGET>
65
+ const TARGET &Cast() const {
66
+ D_ASSERT(dynamic_cast<const TARGET *>(this));
67
+ return (const TARGET &)*this;
68
+ }
69
+ };
70
+
71
+ enum class CopyFunctionExecutionMode { REGULAR_COPY_TO_FILE, PARALLEL_COPY_TO_FILE, BATCH_COPY_TO_FILE };
72
+
54
73
  typedef BoundStatement (*copy_to_plan_t)(Binder &binder, CopyStatement &stmt);
55
74
  typedef unique_ptr<FunctionData> (*copy_to_bind_t)(ClientContext &context, CopyInfo &info, vector<string> &names,
56
75
  vector<LogicalType> &sql_types);
@@ -71,15 +90,21 @@ typedef unique_ptr<FunctionData> (*copy_to_deserialize_t)(ClientContext &context
71
90
  typedef unique_ptr<FunctionData> (*copy_from_bind_t)(ClientContext &context, CopyInfo &info,
72
91
  vector<string> &expected_names,
73
92
  vector<LogicalType> &expected_types);
74
- typedef bool (*copy_to_is_parallel_t)(ClientContext &context, FunctionData &bind_data);
93
+ typedef CopyFunctionExecutionMode (*copy_to_execution_mode_t)(bool preserve_insertion_order, bool supports_batch_index);
94
+
95
+ typedef unique_ptr<PreparedBatchData> (*copy_prepare_batch_t)(ClientContext &context, FunctionData &bind_data,
96
+ GlobalFunctionData &gstate,
97
+ unique_ptr<ColumnDataCollection> collection);
98
+ typedef void (*copy_flush_batch_t)(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate,
99
+ PreparedBatchData &batch);
75
100
 
76
101
  class CopyFunction : public Function {
77
102
  public:
78
103
  explicit CopyFunction(string name)
79
104
  : Function(name), plan(nullptr), copy_to_bind(nullptr), copy_to_initialize_local(nullptr),
80
105
  copy_to_initialize_global(nullptr), copy_to_sink(nullptr), copy_to_combine(nullptr),
81
- copy_to_finalize(nullptr), parallel(nullptr), serialize(nullptr), deserialize(nullptr),
82
- copy_from_bind(nullptr) {
106
+ copy_to_finalize(nullptr), execution_mode(nullptr), prepare_batch(nullptr), flush_batch(nullptr),
107
+ serialize(nullptr), deserialize(nullptr), copy_from_bind(nullptr) {
83
108
  }
84
109
 
85
110
  //! Plan rewrite copy function
@@ -91,7 +116,10 @@ public:
91
116
  copy_to_sink_t copy_to_sink;
92
117
  copy_to_combine_t copy_to_combine;
93
118
  copy_to_finalize_t copy_to_finalize;
94
- copy_to_is_parallel_t parallel;
119
+ copy_to_execution_mode_t execution_mode;
120
+
121
+ copy_prepare_batch_t prepare_batch;
122
+ copy_flush_batch_t flush_batch;
95
123
 
96
124
  copy_to_serialize_t serialize;
97
125
  copy_to_deserialize_t deserialize;
@@ -54,6 +54,8 @@ struct WriteCSVData : public BaseCSVData {
54
54
  bool is_simple;
55
55
  //! The size of the CSV file (in bytes) that we buffer before we flush it to disk
56
56
  idx_t flush_size = 4096 * 8;
57
+ //! For each byte whether or not the CSV file requires quotes when containing the byte
58
+ unique_ptr<bool[]> requires_quotes;
57
59
  };
58
60
 
59
61
  struct ColumnInfo {
@@ -97,8 +99,8 @@ struct ReadCSVData : public BaseCSVData {
97
99
  bool single_threaded = false;
98
100
  //! Reader bind data
99
101
  MultiFileReaderBindData reader_bind;
100
- //! If all files are On-Disk file (e.g., not a pipe)
101
- bool file_exists = true;
102
+ //! If any file is a pipe
103
+ bool is_pipe = false;
102
104
  vector<ColumnInfo> column_info;
103
105
 
104
106
  void Initialize(unique_ptr<BufferedCSVReader> &reader) {
@@ -84,6 +84,8 @@ struct ExtensionOption {
84
84
  struct DBConfigOptions {
85
85
  //! Database file path. May be empty for in-memory mode
86
86
  string database_path;
87
+ //! Database type. If empty, automatically extracted from `database_path`, where a `type:path` syntax is expected
88
+ string database_type;
87
89
  //! Access mode of the database (AUTOMATIC, READ_ONLY or READ_WRITE)
88
90
  AccessMode access_mode = AccessMode::AUTOMATIC;
89
91
  //! Checkpoint when WAL reaches this size (default: 16MB)
@@ -54,13 +54,11 @@ public:
54
54
 
55
55
  DUCKDB_API bool TryGetCurrentSetting(const std::string &key, Value &result);
56
56
 
57
- //! Get the database extension type from a given path
58
- string ExtractDatabaseType(string &path);
59
57
  unique_ptr<AttachedDatabase> CreateAttachedDatabase(AttachInfo &info, const string &type, AccessMode access_mode);
60
58
 
61
59
  private:
62
60
  void Initialize(const char *path, DBConfig *config);
63
- void CreateDatabase(const string &database_type);
61
+ void CreateMainDatabase();
64
62
 
65
63
  void Configure(DBConfig &config);
66
64
 
@@ -0,0 +1,24 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/main/database_path_and_type.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include <string>
12
+ #include "duckdb/main/config.hpp"
13
+
14
+ namespace duckdb {
15
+
16
+ struct DBPathAndType {
17
+
18
+ //! Parse database extension type and rest of path from combined form (type:path)
19
+ static DBPathAndType Parse(const string &combined_path, const DBConfig &config);
20
+
21
+ const string path;
22
+ const string type;
23
+ };
24
+ } // namespace duckdb
@@ -20,6 +20,7 @@ public:
20
20
  shared_ptr<Relation> left;
21
21
  shared_ptr<Relation> right;
22
22
  SetOperationType setop_type;
23
+ vector<ColumnDefinition> columns;
23
24
 
24
25
  public:
25
26
  unique_ptr<QueryNode> GetQueryNode() override;
@@ -17,6 +17,7 @@ namespace duckdb {
17
17
 
18
18
  enum class SampleMethod : uint8_t { SYSTEM_SAMPLE = 0, BERNOULLI_SAMPLE = 1, RESERVOIR_SAMPLE = 2 };
19
19
 
20
+ // **DEPRECATED**: Use EnumUtil directly instead.
20
21
  string SampleMethodToString(SampleMethod method);
21
22
 
22
23
  struct SampleOptions {
@@ -59,6 +59,8 @@ protected:
59
59
 
60
60
  idx_t TryBindGroup(ParsedExpression &expr, idx_t depth);
61
61
  BindResult BindGroup(ParsedExpression &expr, idx_t depth, idx_t group_index);
62
+
63
+ bool QualifyColumnAlias(const ColumnRefExpression &colref) override;
62
64
  };
63
65
 
64
66
  } // namespace duckdb
@@ -101,6 +101,8 @@ public:
101
101
  static bool ContainsType(const LogicalType &type, LogicalTypeId target);
102
102
  static LogicalType ExchangeType(const LogicalType &type, LogicalTypeId target, LogicalType new_type);
103
103
 
104
+ virtual bool QualifyColumnAlias(const ColumnRefExpression &colref);
105
+
104
106
  //! Bind the given expresion. Unlike Bind(), this does *not* mute the given ParsedExpression.
105
107
  //! Exposed to be used from sub-binders that aren't subclasses of ExpressionBinder.
106
108
  virtual BindResult BindExpression(unique_ptr<ParsedExpression> &expr_ptr, idx_t depth,
@@ -35,7 +35,7 @@ public:
35
35
  //! The set of grouping sets (optional).
36
36
  vector<GroupingSet> grouping_sets;
37
37
  //! The list of grouping function calls (optional)
38
- vector<vector<idx_t>> grouping_functions;
38
+ vector<unsafe_vector<idx_t>> grouping_functions;
39
39
  //! Group statistics (optional)
40
40
  vector<unique_ptr<BaseStatistics>> group_stats;
41
41
 
@@ -78,7 +78,7 @@ public:
78
78
  vector<unique_ptr<Expression>> aggregates;
79
79
 
80
80
  //! GROUPING function calls
81
- vector<vector<idx_t>> grouping_functions;
81
+ vector<unsafe_vector<idx_t>> grouping_functions;
82
82
 
83
83
  //! Map from aggregate function to aggregate index (used to eliminate duplicate aggregates)
84
84
  expression_map_t<idx_t> aggregate_map;
@@ -0,0 +1,26 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/verification/deserialized_statement_verifier_v2.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/verification/statement_verifier.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ //------------------------------------------------------------------------------
16
+ // This is a temporary statement verifier that uses the new de/serialization
17
+ // infrastructure to verify the correctness of the de/serialization process.
18
+ // This verifier will be removed once the new de/serialization infrastructure
19
+ // (FormatDe/Serializer) replaces the old one.
20
+ class DeserializedStatementVerifierV2 : public StatementVerifier {
21
+ public:
22
+ explicit DeserializedStatementVerifierV2(unique_ptr<SQLStatement> statement_p);
23
+ static unique_ptr<StatementVerifier> Create(const SQLStatement &statement);
24
+ };
25
+
26
+ } // namespace duckdb
@@ -18,6 +18,7 @@ enum class VerificationType : uint8_t {
18
18
  ORIGINAL,
19
19
  COPIED,
20
20
  DESERIALIZED,
21
+ DESERIALIZED_V2,
21
22
  PARSED,
22
23
  UNOPTIMIZED,
23
24
  NO_OPERATOR_CACHING,
@@ -495,6 +495,7 @@ unique_ptr<LogicalOperator> ClientContext::ExtractPlan(const string &query) {
495
495
  }
496
496
 
497
497
  unique_ptr<LogicalOperator> plan;
498
+ client_data->http_state = make_uniq<HTTPState>();
498
499
  RunFunctionInTransactionInternal(*lock, [&]() {
499
500
  Planner planner(*this);
500
501
  planner.CreatePlan(std::move(statements[0]));
@@ -26,6 +26,7 @@ PreservedError ClientContext::VerifyQuery(ClientContextLock &lock, const string
26
26
  if (config.query_verification_enabled) {
27
27
  statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::COPIED, stmt));
28
28
  statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::DESERIALIZED, stmt));
29
+ statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::DESERIALIZED_V2, stmt));
29
30
  statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::UNOPTIMIZED, stmt));
30
31
  prepared_statement_verifier = StatementVerifier::Create(VerificationType::PREPARED, stmt);
31
32
  #ifdef DUCKDB_DEBUG_ASYNC_SINK_SOURCE
@@ -13,9 +13,9 @@
13
13
  #include "duckdb/main/extension_helper.hpp"
14
14
  #include "duckdb/parallel/task_scheduler.hpp"
15
15
  #include "duckdb/parser/parsed_data/attach_info.hpp"
16
- #include "duckdb/storage/magic_bytes.hpp"
17
16
  #include "duckdb/storage/object_cache.hpp"
18
17
  #include "duckdb/storage/standard_buffer_manager.hpp"
18
+ #include "duckdb/main/database_path_and_type.hpp"
19
19
  #include "duckdb/storage/storage_extension.hpp"
20
20
  #include "duckdb/storage/storage_manager.hpp"
21
21
  #include "duckdb/transaction/transaction_manager.hpp"
@@ -129,22 +129,6 @@ ConnectionManager &ConnectionManager::Get(ClientContext &context) {
129
129
  return ConnectionManager::Get(DatabaseInstance::GetDatabase(context));
130
130
  }
131
131
 
132
- string DatabaseInstance::ExtractDatabaseType(string &path) {
133
- // first check if there is an existing prefix
134
- auto extension = ExtensionHelper::ExtractExtensionPrefixFromPath(path);
135
- if (!extension.empty()) {
136
- // path is prefixed with an extension - remove it
137
- path = StringUtil::Replace(path, extension + ":", "");
138
- return extension;
139
- }
140
- // if there isn't - check the magic bytes of the file (if any)
141
- auto file_type = MagicBytes::CheckMagicBytes(config.file_system.get(), path);
142
- if (file_type == DataFileType::SQLITE_FILE) {
143
- return "sqlite";
144
- }
145
- return string();
146
- }
147
-
148
132
  duckdb::unique_ptr<AttachedDatabase> DatabaseInstance::CreateAttachedDatabase(AttachInfo &info, const string &type,
149
133
  AccessMode access_mode) {
150
134
  duckdb::unique_ptr<AttachedDatabase> attached_database;
@@ -172,12 +156,12 @@ duckdb::unique_ptr<AttachedDatabase> DatabaseInstance::CreateAttachedDatabase(At
172
156
  return attached_database;
173
157
  }
174
158
 
175
- void DatabaseInstance::CreateDatabase(const string &database_type) {
159
+ void DatabaseInstance::CreateMainDatabase() {
176
160
  AttachInfo info;
177
161
  info.name = AttachedDatabase::ExtractDatabaseName(config.options.database_path);
178
162
  info.path = config.options.database_path;
179
163
 
180
- auto attached_database = CreateAttachedDatabase(info, database_type, config.options.access_mode);
164
+ auto attached_database = CreateAttachedDatabase(info, config.options.database_type, config.options.access_mode);
181
165
  auto initial_database = attached_database.get();
182
166
  {
183
167
  Connection con(*this);
@@ -235,14 +219,18 @@ void DatabaseInstance::Initialize(const char *database_path, DBConfig *user_conf
235
219
  connection_manager = make_uniq<ConnectionManager>();
236
220
 
237
221
  // check if we are opening a standard DuckDB database or an extension database
238
- auto database_type = ExtractDatabaseType(config.options.database_path);
222
+ if (config.options.database_type.empty()) {
223
+ auto path_and_type = DBPathAndType::Parse(config.options.database_path, config);
224
+ config.options.database_type = path_and_type.type;
225
+ config.options.database_path = path_and_type.path;
226
+ }
239
227
 
240
228
  // initialize the system catalog
241
229
  db_manager->InitializeSystemCatalog();
242
230
 
243
- if (!database_type.empty()) {
231
+ if (!config.options.database_type.empty()) {
244
232
  // if we are opening an extension database - load the extension
245
- ExtensionHelper::LoadExternalExtension(*this, nullptr, database_type);
233
+ ExtensionHelper::LoadExternalExtension(*this, nullptr, config.options.database_type);
246
234
  }
247
235
 
248
236
  if (!config.options.unrecognized_options.empty()) {
@@ -250,7 +238,7 @@ void DatabaseInstance::Initialize(const char *database_path, DBConfig *user_conf
250
238
  }
251
239
 
252
240
  if (!db_manager->HasDefaultDatabase()) {
253
- CreateDatabase(database_type);
241
+ CreateMainDatabase();
254
242
  }
255
243
 
256
244
  // only increase thread count after storage init because we get races on catalog otherwise
@@ -0,0 +1,23 @@
1
+ #include "duckdb/main/database_path_and_type.hpp"
2
+
3
+ #include "duckdb/main/extension_helper.hpp"
4
+ #include "duckdb/storage/magic_bytes.hpp"
5
+
6
+ namespace duckdb {
7
+
8
+ DBPathAndType DBPathAndType::Parse(const string &combined_path, const DBConfig &config) {
9
+ auto extension = ExtensionHelper::ExtractExtensionPrefixFromPath(combined_path);
10
+ if (!extension.empty()) {
11
+ // path is prefixed with an extension - remove it
12
+ auto path = StringUtil::Replace(combined_path, extension + ":", "");
13
+ auto type = ExtensionHelper::ApplyExtensionAlias(extension);
14
+ return {path, type};
15
+ }
16
+ // if there isn't - check the magic bytes of the file (if any)
17
+ auto file_type = MagicBytes::CheckMagicBytes(config.file_system.get(), combined_path);
18
+ if (file_type == DataFileType::SQLITE_FILE) {
19
+ return {combined_path, "sqlite"};
20
+ }
21
+ return {combined_path, string()};
22
+ }
23
+ } // namespace duckdb
@@ -3,6 +3,7 @@
3
3
  #include "duckdb/parser/query_node/select_node.hpp"
4
4
  #include "duckdb/parser/expression/star_expression.hpp"
5
5
  #include "duckdb/parser/tableref/joinref.hpp"
6
+ #include "duckdb/common/enum_util.hpp"
6
7
 
7
8
  namespace duckdb {
8
9
 
@@ -51,7 +52,7 @@ const vector<ColumnDefinition> &JoinRelation::Columns() {
51
52
 
52
53
  string JoinRelation::ToString(idx_t depth) {
53
54
  string str = RenderWhitespace(depth);
54
- str += "Join " + JoinTypeToString(join_type);
55
+ str += "Join " + EnumUtil::ToString(join_type);
55
56
  if (condition) {
56
57
  str += " " + condition->GetName();
57
58
  }
@@ -11,8 +11,7 @@ SetOpRelation::SetOpRelation(shared_ptr<Relation> left_p, shared_ptr<Relation> r
11
11
  if (left->context.GetContext() != right->context.GetContext()) {
12
12
  throw Exception("Cannot combine LEFT and RIGHT relations of different connections!");
13
13
  }
14
- vector<ColumnDefinition> dummy_columns;
15
- context.GetContext()->TryBindRelation(*this, dummy_columns);
14
+ context.GetContext()->TryBindRelation(*this, this->columns);
16
15
  }
17
16
 
18
17
  unique_ptr<QueryNode> SetOpRelation::GetQueryNode() {
@@ -31,7 +30,7 @@ string SetOpRelation::GetAlias() {
31
30
  }
32
31
 
33
32
  const vector<ColumnDefinition> &SetOpRelation::Columns() {
34
- return left->Columns();
33
+ return this->columns;
35
34
  }
36
35
 
37
36
  string SetOpRelation::ToString(idx_t depth) {
@@ -4,7 +4,7 @@
4
4
  #include "duckdb/common/field_writer.hpp"
5
5
  #include "duckdb/common/string_util.hpp"
6
6
 
7
- #include "duckdb/common/serializer/enum_serializer.hpp"
7
+ #include "duckdb/common/enum_util.hpp"
8
8
  #include "duckdb/common/serializer/format_serializer.hpp"
9
9
  #include "duckdb/common/serializer/format_deserializer.hpp"
10
10
 
@@ -1,13 +1,13 @@
1
1
  #include "duckdb/parser/parsed_data/sample_options.hpp"
2
2
  #include "duckdb/common/field_writer.hpp"
3
- #include "duckdb/common/serializer/enum_serializer.hpp"
4
3
  #include "duckdb/common/serializer/format_serializer.hpp"
5
4
  #include "duckdb/common/serializer/format_deserializer.hpp"
6
5
 
7
6
  namespace duckdb {
8
7
 
8
+ // **DEPRECATED**: Use EnumUtil directly instead.
9
9
  string SampleMethodToString(SampleMethod method) {
10
- return EnumSerializer::EnumToString(method);
10
+ return EnumUtil::ToString(method);
11
11
  }
12
12
 
13
13
  void SampleOptions::Serialize(Serializer &serializer) {
@@ -97,7 +97,7 @@ string SelectNode::ToString() const {
97
97
  if (sample->is_percentage) {
98
98
  result += "%";
99
99
  }
100
- result += " (" + SampleMethodToString(sample->method);
100
+ result += " (" + EnumUtil::ToString(sample->method);
101
101
  if (sample->seed >= 0) {
102
102
  result += ", " + std::to_string(sample->seed);
103
103
  }
@@ -297,8 +297,8 @@ void LimitPercentModifier::Serialize(FieldWriter &writer) const {
297
297
 
298
298
  void LimitPercentModifier::FormatSerialize(FormatSerializer &serializer) const {
299
299
  ResultModifier::FormatSerialize(serializer);
300
- serializer.WriteProperty("limit", limit);
301
- serializer.WriteProperty("offset", offset);
300
+ serializer.WriteOptionalProperty("limit", limit);
301
+ serializer.WriteOptionalProperty("offset", offset);
302
302
  }
303
303
 
304
304
  unique_ptr<ResultModifier> LimitPercentModifier::Deserialize(FieldReader &reader) {
@@ -46,47 +46,3 @@ string SelectStatement::ToString() const {
46
46
  }
47
47
 
48
48
  } // namespace duckdb
49
-
50
- /*
51
- json_serialize_sql('SELECT BLOB ''\x01\x10'';', format := CAST('t' AS BOOLEAN)){
52
- "error": false,
53
- "statements": [
54
- {
55
- "node": {
56
- "type": "SELECT_NODE",
57
- "modifiers": [],
58
- "cte_map": {
59
- "map": []
60
- },
61
- "select_list": [
62
- {
63
- "class": "CONSTANT",
64
- "type": "CONSTANT",
65
- "alias": "",
66
- "value": {
67
- "type": {
68
- "id": "BLOB",
69
- "type_info": null
70
- },
71
- "is_null": false,
72
- "value": "\u0001\u0010"
73
- }
74
- }
75
- ],
76
- "from_table": {
77
- "type": "EMPTY",
78
- "alias": "",
79
- "sample": null
80
- },
81
- "where_clause": null,
82
- "group_expressions": [],
83
- "group_sets": [],
84
- "aggregate_handling": "STANDARD_HANDLING",
85
- "having": null,
86
- "sample": null,
87
- "qualify": null
88
- }
89
- }
90
- ]
91
- }
92
- */
@@ -12,15 +12,15 @@ string JoinRef::ToString() const {
12
12
  result = left->ToString() + " ";
13
13
  switch (ref_type) {
14
14
  case JoinRefType::REGULAR:
15
- result += JoinTypeToString(type) + " JOIN ";
15
+ result += EnumUtil::ToString(type) + " JOIN ";
16
16
  break;
17
17
  case JoinRefType::NATURAL:
18
18
  result += "NATURAL ";
19
- result += JoinTypeToString(type) + " JOIN ";
19
+ result += EnumUtil::ToString(type) + " JOIN ";
20
20
  break;
21
21
  case JoinRefType::ASOF:
22
22
  result += "ASOF ";
23
- result += JoinTypeToString(type) + " JOIN ";
23
+ result += EnumUtil::ToString(type) + " JOIN ";
24
24
  break;
25
25
  case JoinRefType::CROSS:
26
26
  result += ", ";
@@ -30,7 +30,7 @@ string TableRef::BaseToString(string result, const vector<string> &column_name_a
30
30
  result += ")";
31
31
  }
32
32
  if (sample) {
33
- result += " TABLESAMPLE " + SampleMethodToString(sample->method);
33
+ result += " TABLESAMPLE " + EnumUtil::ToString(sample->method);
34
34
  result += "(" + sample->sample_size.ToString() + " " + string(sample->is_percentage ? "PERCENT" : "ROWS") + ")";
35
35
  if (sample->seed >= 0) {
36
36
  result += "REPEATABLE (" + to_string(sample->seed) + ")";
@@ -1,4 +1,4 @@
1
- #include "duckdb/common/serializer/enum_serializer.hpp"
1
+ #include "duckdb/common/enum_util.hpp"
2
2
  #include "duckdb/common/string_util.hpp"
3
3
  #include "duckdb/common/to_string.hpp"
4
4
  #include "duckdb/parser/expression/case_expression.hpp"
@@ -305,8 +305,8 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
305
305
  auto arg_expr = children[0].get();
306
306
  auto &order_by = order_bys->orders[0];
307
307
  if (arg_expr->Equals(order_by.expression.get())) {
308
- auto sense = make_uniq<ConstantExpression>(EnumSerializer::EnumToString(order_by.type));
309
- auto nulls = make_uniq<ConstantExpression>(EnumSerializer::EnumToString(order_by.null_order));
308
+ auto sense = make_uniq<ConstantExpression>(EnumUtil::ToChars(order_by.type));
309
+ auto nulls = make_uniq<ConstantExpression>(EnumUtil::ToChars(order_by.null_order));
310
310
  order_bys = nullptr;
311
311
  auto unordered = make_uniq<FunctionExpression>(catalog, schema, lowercase_name.c_str(), std::move(children),
312
312
  std::move(filter_expr), std::move(order_bys),
@@ -388,4 +388,10 @@ BindResult ExpressionBinder::BindExpression(ColumnRefExpression &colref_p, idx_t
388
388
  return result;
389
389
  }
390
390
 
391
+ bool ExpressionBinder::QualifyColumnAlias(const ColumnRefExpression &colref) {
392
+ // Only BaseSelectBinder will have a valid col alias map,
393
+ // otherwise just return false
394
+ return false;
395
+ }
396
+
391
397
  } // namespace duckdb
@@ -45,7 +45,10 @@ BindResult ExpressionBinder::BindExpression(FunctionExpression &function, idx_t
45
45
  colref = make_uniq<ColumnRefExpression>(function.schema, function.catalog);
46
46
  }
47
47
  auto new_colref = QualifyColumnName(*colref, error);
48
- if (error.empty()) {
48
+ bool is_col = error.empty() ? true : false;
49
+ bool is_col_alias = QualifyColumnAlias(*colref);
50
+
51
+ if (is_col || is_col_alias) {
49
52
  // we can! transform this into a function call on the column
50
53
  // i.e. "x.lower()" becomes "lower(x)"
51
54
  function.children.insert(function.children.begin(), std::move(colref));
@@ -143,4 +143,11 @@ BindResult BaseSelectBinder::BindGroup(ParsedExpression &expr, idx_t depth, idx_
143
143
  ColumnBinding(node.group_index, group_index), depth));
144
144
  }
145
145
 
146
+ bool BaseSelectBinder::QualifyColumnAlias(const ColumnRefExpression &colref) {
147
+ if (!colref.IsQualified()) {
148
+ return alias_map.find(colref.column_names[0]) != alias_map.end() ? true : false;
149
+ }
150
+ return false;
151
+ }
152
+
146
153
  } // namespace duckdb
@@ -89,7 +89,7 @@ unique_ptr<LogicalOperator> LogicalAggregate::Deserialize(LogicalDeserialization
89
89
  for (idx_t i = 0; i < grouping_sets_size; i++) {
90
90
  grouping_sets.push_back(reader.ReadRequiredSet<idx_t>());
91
91
  }
92
- vector<vector<idx_t>> grouping_functions;
92
+ vector<unsafe_vector<idx_t>> grouping_functions;
93
93
  auto grouping_functions_size = reader.ReadRequired<idx_t>();
94
94
  for (idx_t i = 0; i < grouping_functions_size; i++) {
95
95
  grouping_functions.push_back(reader.ReadRequiredList<idx_t>());