duckdb 0.8.2-dev4711.0 → 0.8.2-dev5002.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/binding.gyp +0 -1
  2. package/binding.gyp.in +0 -1
  3. package/package.json +1 -1
  4. package/src/connection.cpp +10 -23
  5. package/src/data_chunk.cpp +1 -3
  6. package/src/database.cpp +4 -9
  7. package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
  8. package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
  9. package/src/duckdb/extension/json/json_functions.cpp +4 -6
  10. package/src/duckdb/src/common/enum_util.cpp +10 -5
  11. package/src/duckdb/src/common/operator/cast_operators.cpp +18 -0
  12. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  13. package/src/duckdb/src/common/row_operations/row_matcher.cpp +375 -0
  14. package/src/duckdb/src/common/types/data_chunk.cpp +48 -11
  15. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
  16. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +28 -17
  17. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
  18. package/src/duckdb/src/common/types/vector.cpp +0 -1
  19. package/src/duckdb/src/common/types.cpp +1 -1
  20. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
  21. package/src/duckdb/src/core_functions/function_list.cpp +1 -1
  22. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
  23. package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
  24. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +5 -1
  25. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +10 -1
  26. package/src/duckdb/src/core_functions/scalar/map/map_concat.cpp +0 -2
  27. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
  28. package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
  29. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
  30. package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
  31. package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_inner.cpp +20 -27
  32. package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp +21 -9
  33. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
  34. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +1 -1
  35. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
  36. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
  37. package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
  38. package/src/duckdb/src/function/cast/time_casts.cpp +12 -0
  39. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
  40. package/src/duckdb/src/function/function_binder.cpp +10 -9
  41. package/src/duckdb/src/function/pragma/pragma_queries.cpp +3 -0
  42. package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
  43. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  44. package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
  45. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +27 -0
  47. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +38 -2
  48. package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
  49. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -2
  50. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
  51. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
  52. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +4 -4
  53. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +4 -4
  54. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +5 -5
  55. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +7 -7
  56. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +12 -12
  57. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +12 -12
  58. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -3
  59. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +33 -33
  60. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +2 -2
  61. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -3
  62. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +13 -13
  63. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +2 -2
  64. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
  66. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
  67. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
  68. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -0
  69. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +1 -2
  70. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -1
  75. package/src/duckdb/src/main/config.cpp +1 -1
  76. package/src/duckdb/src/main/relation.cpp +10 -0
  77. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
  78. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +28 -6
  79. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +3 -0
  80. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
  81. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +14 -6
  82. package/src/duckdb/src/planner/operator/logical_create_table.cpp +3 -3
  83. package/src/duckdb/src/planner/operator/logical_delete.cpp +3 -2
  84. package/src/duckdb/src/planner/operator/logical_insert.cpp +3 -2
  85. package/src/duckdb/src/planner/operator/logical_update.cpp +3 -2
  86. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
  87. package/src/duckdb/src/storage/data_table.cpp +18 -8
  88. package/src/duckdb/src/storage/local_storage.cpp +2 -3
  89. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +64 -80
  90. package/src/duckdb/src/storage/storage_manager.cpp +6 -2
  91. package/src/duckdb/src/storage/table/row_group.cpp +6 -0
  92. package/src/duckdb/src/storage/table/row_group_collection.cpp +4 -3
  93. package/src/duckdb/src/storage/table/struct_column_data.cpp +2 -0
  94. package/src/duckdb/src/transaction/duck_transaction.cpp +1 -0
  95. package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
  96. package/src/statement.cpp +2 -4
  97. package/test/database_fail.test.ts +6 -0
  98. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -18,7 +18,7 @@ namespace duckdb {
18
18
  struct StartsWithOperatorFun {
19
19
  static constexpr const char *Name = "^@";
20
20
  static constexpr const char *Parameters = "string,search_string";
21
- static constexpr const char *Description = "Return true if string begins with search_string";
21
+ static constexpr const char *Description = "Returns true if string begins with search_string";
22
22
  static constexpr const char *Example = "starts_with('abc','a')";
23
23
 
24
24
  static ScalarFunction GetFunction();
@@ -33,7 +33,7 @@ struct StartsWithFun {
33
33
  struct ASCIIFun {
34
34
  static constexpr const char *Name = "ascii";
35
35
  static constexpr const char *Parameters = "string";
36
- static constexpr const char *Description = "Returns an integer that represents the Unicode code point of the first character of the string.";
36
+ static constexpr const char *Description = "Returns an integer that represents the Unicode code point of the first character of the string";
37
37
  static constexpr const char *Example = "ascii('Ω')";
38
38
 
39
39
  static ScalarFunction GetFunction();
@@ -42,7 +42,7 @@ struct ASCIIFun {
42
42
  struct BarFun {
43
43
  static constexpr const char *Name = "bar";
44
44
  static constexpr const char *Parameters = "x,min,max,width";
45
- static constexpr const char *Description = "Draw a band whose width is proportional to (x - min) and equal to width characters when x = max. width defaults to 80.";
45
+ static constexpr const char *Description = "Draws a band whose width is proportional to (x - min) and equal to width characters when x = max. width defaults to 80";
46
46
  static constexpr const char *Example = "bar(5, 0, 20, 10)";
47
47
 
48
48
  static ScalarFunctionSet GetFunctions();
@@ -66,7 +66,7 @@ struct ToBinaryFun {
66
66
  struct ChrFun {
67
67
  static constexpr const char *Name = "chr";
68
68
  static constexpr const char *Parameters = "code_point";
69
- static constexpr const char *Description = "returns a character which is corresponding the ASCII code value or Unicode code point";
69
+ static constexpr const char *Description = "Returns a character which is corresponding the ASCII code value or Unicode code point";
70
70
  static constexpr const char *Example = "chr(65)";
71
71
 
72
72
  static ScalarFunction GetFunction();
@@ -75,7 +75,7 @@ struct ChrFun {
75
75
  struct DamerauLevenshteinFun {
76
76
  static constexpr const char *Name = "damerau_levenshtein";
77
77
  static constexpr const char *Parameters = "str1,str2";
78
- static constexpr const char *Description = "Extension of Levenshtein distance to also include transposition of adjacent characters as an allowed edit operation. In other words, the minimum number of edit operations (insertions, deletions, substitutions or transpositions) required to change one string to another. Different case is considered different.";
78
+ static constexpr const char *Description = "Extension of Levenshtein distance to also include transposition of adjacent characters as an allowed edit operation. In other words, the minimum number of edit operations (insertions, deletions, substitutions or transpositions) required to change one string to another. Different case is considered different";
79
79
  static constexpr const char *Example = "damerau_levenshtein('hello', 'world')";
80
80
 
81
81
  static ScalarFunction GetFunction();
@@ -108,7 +108,7 @@ struct FormatreadabledecimalsizeFun {
108
108
  struct HammingFun {
109
109
  static constexpr const char *Name = "hamming";
110
110
  static constexpr const char *Parameters = "str1,str2";
111
- static constexpr const char *Description = "The number of positions with different characters for 2 strings of equal length. Different case is considered different.";
111
+ static constexpr const char *Description = "The number of positions with different characters for 2 strings of equal length. Different case is considered different";
112
112
  static constexpr const char *Example = "hamming('duck','luck')";
113
113
 
114
114
  static ScalarFunction GetFunction();
@@ -138,7 +138,7 @@ struct ToHexFun {
138
138
  struct InstrFun {
139
139
  static constexpr const char *Name = "instr";
140
140
  static constexpr const char *Parameters = "haystack,needle";
141
- static constexpr const char *Description = "Return location of first occurrence of needle in haystack, counting from 1. Returns 0 if no match found.";
141
+ static constexpr const char *Description = "Returns location of first occurrence of needle in haystack, counting from 1. Returns 0 if no match found";
142
142
  static constexpr const char *Example = "instr('test test','es')";
143
143
 
144
144
  static ScalarFunction GetFunction();
@@ -159,7 +159,7 @@ struct PositionFun {
159
159
  struct JaccardFun {
160
160
  static constexpr const char *Name = "jaccard";
161
161
  static constexpr const char *Parameters = "str1,str2";
162
- static constexpr const char *Description = "The Jaccard similarity between two strings. Different case is considered different. Returns a number between 0 and 1.";
162
+ static constexpr const char *Description = "The Jaccard similarity between two strings. Different case is considered different. Returns a number between 0 and 1";
163
163
  static constexpr const char *Example = "jaccard('duck','luck')";
164
164
 
165
165
  static ScalarFunction GetFunction();
@@ -168,7 +168,7 @@ struct JaccardFun {
168
168
  struct JaroSimilarityFun {
169
169
  static constexpr const char *Name = "jaro_similarity";
170
170
  static constexpr const char *Parameters = "str1,str2";
171
- static constexpr const char *Description = "The Jaro similarity between two strings. Different case is considered different. Returns a number between 0 and 1.";
171
+ static constexpr const char *Description = "The Jaro similarity between two strings. Different case is considered different. Returns a number between 0 and 1";
172
172
  static constexpr const char *Example = "jaro_similarity('duck','duckdb')";
173
173
 
174
174
  static ScalarFunction GetFunction();
@@ -177,7 +177,7 @@ struct JaroSimilarityFun {
177
177
  struct JaroWinklerSimilarityFun {
178
178
  static constexpr const char *Name = "jaro_winkler_similarity";
179
179
  static constexpr const char *Parameters = "str1,str2";
180
- static constexpr const char *Description = "The Jaro-Winkler similarity between two strings. Different case is considered different. Returns a number between 0 and 1.";
180
+ static constexpr const char *Description = "The Jaro-Winkler similarity between two strings. Different case is considered different. Returns a number between 0 and 1";
181
181
  static constexpr const char *Example = "jaro_winkler_similarity('duck','duckdb')";
182
182
 
183
183
  static ScalarFunction GetFunction();
@@ -204,7 +204,7 @@ struct LeftGraphemeFun {
204
204
  struct LevenshteinFun {
205
205
  static constexpr const char *Name = "levenshtein";
206
206
  static constexpr const char *Parameters = "str1,str2";
207
- static constexpr const char *Description = "The minimum number of single-character edits (insertions, deletions or substitutions) required to change one string to the other. Different case is considered different.";
207
+ static constexpr const char *Description = "The minimum number of single-character edits (insertions, deletions or substitutions) required to change one string to the other. Different case is considered different";
208
208
  static constexpr const char *Example = "levenshtein('duck','db')";
209
209
 
210
210
  static ScalarFunction GetFunction();
@@ -285,7 +285,7 @@ struct RepeatFun {
285
285
  static constexpr const char *Description = "Repeats the string count number of times";
286
286
  static constexpr const char *Example = "repeat('A', 5)";
287
287
 
288
- static ScalarFunction GetFunction();
288
+ static ScalarFunctionSet GetFunctions();
289
289
  };
290
290
 
291
291
  struct ReplaceFun {
@@ -402,7 +402,7 @@ struct RegexpSplitToArrayFun {
402
402
  struct TranslateFun {
403
403
  static constexpr const char *Name = "translate";
404
404
  static constexpr const char *Parameters = "string,from,to";
405
- static constexpr const char *Description = "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.";
405
+ static constexpr const char *Description = "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted";
406
406
  static constexpr const char *Example = "translate('12345', '143', 'ax')";
407
407
 
408
408
  static ScalarFunction GetFunction();
@@ -18,7 +18,7 @@ namespace duckdb {
18
18
  struct StructInsertFun {
19
19
  static constexpr const char *Name = "struct_insert";
20
20
  static constexpr const char *Parameters = "struct,any";
21
- static constexpr const char *Description = "Add field(s)/value(s) to an existing STRUCT with the argument values. The entry name(s) will be the bound variable name(s).";
21
+ static constexpr const char *Description = "Adds field(s)/value(s) to an existing STRUCT with the argument values. The entry name(s) will be the bound variable name(s)";
22
22
  static constexpr const char *Example = "struct_insert({'a': 1}, b := 2)";
23
23
 
24
24
  static ScalarFunction GetFunction();
@@ -27,7 +27,7 @@ struct StructInsertFun {
27
27
  struct StructPackFun {
28
28
  static constexpr const char *Name = "struct_pack";
29
29
  static constexpr const char *Parameters = "any";
30
- static constexpr const char *Description = "Create a STRUCT containing the argument values. The entry name will be the bound variable name";
30
+ static constexpr const char *Description = "Creates a STRUCT containing the argument values. The entry name will be the bound variable name";
31
31
  static constexpr const char *Example = "struct_pack(i := 4, s := 'string')";
32
32
 
33
33
  static ScalarFunction GetFunction();
@@ -27,7 +27,7 @@ struct UnionExtractFun {
27
27
  struct UnionTagFun {
28
28
  static constexpr const char *Name = "union_tag";
29
29
  static constexpr const char *Parameters = "union";
30
- static constexpr const char *Description = "Retrieve the currently selected tag of the union as an Enum.";
30
+ static constexpr const char *Description = "Retrieve the currently selected tag of the union as an ENUM";
31
31
  static constexpr const char *Example = "union_tag(union_value(k := 'foo'))";
32
32
 
33
33
  static ScalarFunction GetFunction();
@@ -36,7 +36,7 @@ struct UnionTagFun {
36
36
  struct UnionValueFun {
37
37
  static constexpr const char *Name = "union_value";
38
38
  static constexpr const char *Parameters = "tag";
39
- static constexpr const char *Description = "Create a single member UNION containing the argument value. The tag of the value will be the bound variable name.";
39
+ static constexpr const char *Description = "Create a single member UNION containing the argument value. The tag of the value will be the bound variable name";
40
40
  static constexpr const char *Example = "union_value(k := 'hello')";
41
41
 
42
42
  static ScalarFunction GetFunction();
@@ -8,6 +8,7 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include "duckdb/common/row_operations/row_matcher.hpp"
11
12
  #include "duckdb/common/types/row/partitioned_tuple_data.hpp"
12
13
  #include "duckdb/execution/base_aggregate_hashtable.hpp"
13
14
  #include "duckdb/storage/arena_allocator.hpp"
@@ -143,6 +144,9 @@ public:
143
144
  void UnpinData();
144
145
 
145
146
  private:
147
+ //! Efficiently matches groups
148
+ RowMatcher row_matcher;
149
+
146
150
  //! Append state
147
151
  struct AggregateHTAppendState {
148
152
  AggregateHTAppendState();
@@ -65,7 +65,7 @@ public:
65
65
  //! returned by the JoinHashTable::Scan function and can be used to resume a
66
66
  //! probe.
67
67
  struct ScanStructure {
68
- unsafe_unique_array<UnifiedVectorFormat> key_data;
68
+ TupleDataChunkState &key_state;
69
69
  Vector pointers;
70
70
  idx_t count;
71
71
  SelectionVector sel_vector;
@@ -74,7 +74,7 @@ public:
74
74
  JoinHashTable &ht;
75
75
  bool finished;
76
76
 
77
- explicit ScanStructure(JoinHashTable &ht);
77
+ explicit ScanStructure(JoinHashTable &ht, TupleDataChunkState &key_state);
78
78
  //! Get the next batch of data from the scan structure
79
79
  void Next(DataChunk &keys, DataChunk &left, DataChunk &result);
80
80
 
@@ -130,7 +130,8 @@ public:
130
130
  //! ever called.
131
131
  void Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool parallel);
132
132
  //! Probe the HT with the given input chunk, resulting in the given result
133
- unique_ptr<ScanStructure> Probe(DataChunk &keys, Vector *precomputed_hashes = nullptr);
133
+ unique_ptr<ScanStructure> Probe(DataChunk &keys, TupleDataChunkState &key_state,
134
+ Vector *precomputed_hashes = nullptr);
134
135
  //! Scan the HT to construct the full outer join result
135
136
  void ScanFullOuter(JoinHTScanState &state, Vector &addresses, DataChunk &result);
136
137
 
@@ -166,6 +167,9 @@ public:
166
167
  vector<ExpressionType> predicates;
167
168
  //! Data column layout
168
169
  TupleDataLayout layout;
170
+ //! Efficiently matches rows
171
+ RowMatcher row_matcher;
172
+ RowMatcher row_matcher_no_match_sel;
169
173
  //! The size of an entry as stored in the HashTable
170
174
  idx_t entry_size;
171
175
  //! The total tuple size
@@ -201,7 +205,8 @@ public:
201
205
  } correlated_mark_join_info;
202
206
 
203
207
  private:
204
- unique_ptr<ScanStructure> InitializeScanStructure(DataChunk &keys, const SelectionVector *&current_sel);
208
+ unique_ptr<ScanStructure> InitializeScanStructure(DataChunk &keys, TupleDataChunkState &key_state,
209
+ const SelectionVector *&current_sel);
205
210
  void Hash(DataChunk &keys, const SelectionVector &sel, idx_t count, Vector &hashes);
206
211
 
207
212
  //! Apply a bitmask to the hashes
@@ -212,8 +217,8 @@ private:
212
217
  //! Insert the given set of locations into the HT with the given set of hashes
213
218
  void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[], bool parallel);
214
219
 
215
- idx_t PrepareKeys(DataChunk &keys, unsafe_unique_array<UnifiedVectorFormat> &key_data,
216
- const SelectionVector *&current_sel, SelectionVector &sel, bool build_side);
220
+ idx_t PrepareKeys(DataChunk &keys, vector<TupleDataVectorFormat> &vector_data, const SelectionVector *&current_sel,
221
+ SelectionVector &sel, bool build_side);
217
222
 
218
223
  //! Lock for combining data_collection when merging HTs
219
224
  mutex data_lock;
@@ -316,8 +321,9 @@ public:
316
321
  //! Build HT for the next partitioned probe round
317
322
  bool PrepareExternalFinalize();
318
323
  //! Probe whatever we can, sink the rest into a thread-local HT
319
- unique_ptr<ScanStructure> ProbeAndSpill(DataChunk &keys, DataChunk &payload, ProbeSpill &probe_spill,
320
- ProbeSpillLocalAppendState &spill_state, DataChunk &spill_chunk);
324
+ unique_ptr<ScanStructure> ProbeAndSpill(DataChunk &keys, TupleDataChunkState &key_state, DataChunk &payload,
325
+ ProbeSpill &probe_spill, ProbeSpillLocalAppendState &spill_state,
326
+ DataChunk &spill_chunk);
321
327
 
322
328
  private:
323
329
  //! First and last partition of the current probe round
@@ -103,6 +103,8 @@ public:
103
103
  // JOIN operation
104
104
  DUCKDB_API shared_ptr<Relation> Join(const shared_ptr<Relation> &other, const string &condition,
105
105
  JoinType type = JoinType::INNER, JoinRefType ref_type = JoinRefType::REGULAR);
106
+ shared_ptr<Relation> Join(const shared_ptr<Relation> &other, vector<unique_ptr<ParsedExpression>> condition,
107
+ JoinType type = JoinType::INNER, JoinRefType ref_type = JoinRefType::REGULAR);
106
108
 
107
109
  // CROSS PRODUCT operation
108
110
  DUCKDB_API shared_ptr<Relation> CrossProduct(const shared_ptr<Relation> &other,
@@ -121,6 +123,8 @@ public:
121
123
  DUCKDB_API shared_ptr<Relation> Aggregate(const vector<string> &aggregates);
122
124
  DUCKDB_API shared_ptr<Relation> Aggregate(const string &aggregate_list, const string &group_list);
123
125
  DUCKDB_API shared_ptr<Relation> Aggregate(const vector<string> &aggregates, const vector<string> &groups);
126
+ DUCKDB_API shared_ptr<Relation> Aggregate(vector<unique_ptr<ParsedExpression>> expressions,
127
+ const string &group_list);
124
128
 
125
129
  // ALIAS
126
130
  DUCKDB_API shared_ptr<Relation> Alias(const string &alias);
@@ -21,6 +21,7 @@ class BoundSelectNode;
21
21
  struct BoundGroupInformation {
22
22
  parsed_expression_map_t<idx_t> map;
23
23
  case_insensitive_map_t<idx_t> alias_map;
24
+ unordered_map<idx_t, idx_t> collated_groups;
24
25
  };
25
26
 
26
27
  //! The BaseSelectBinder is the base binder of the SELECT, HAVING and QUALIFY binders. It can bind aggregates and window
@@ -35,7 +35,6 @@ protected:
35
35
  void ResolveTypes() override;
36
36
 
37
37
  private:
38
- LogicalCreateTable(ClientContext &context, const string &catalog, const string &schema,
39
- unique_ptr<CreateInfo> info);
38
+ LogicalCreateTable(ClientContext &context, unique_ptr<CreateInfo> info);
40
39
  };
41
40
  } // namespace duckdb
@@ -37,6 +37,6 @@ protected:
37
37
  void ResolveTypes() override;
38
38
 
39
39
  private:
40
- LogicalDelete(ClientContext &context, const string &catalog, const string &schema, const string &table);
40
+ LogicalDelete(ClientContext &context, const unique_ptr<CreateInfo> &table_info);
41
41
  };
42
42
  } // namespace duckdb
@@ -72,6 +72,6 @@ protected:
72
72
  string GetName() const override;
73
73
 
74
74
  private:
75
- LogicalInsert(ClientContext &context, const string &catalog, const string &schema, const string &table);
75
+ LogicalInsert(ClientContext &context, const unique_ptr<CreateInfo> table_info);
76
76
  };
77
77
  } // namespace duckdb
@@ -42,6 +42,6 @@ protected:
42
42
  void ResolveTypes() override;
43
43
 
44
44
  private:
45
- LogicalUpdate(ClientContext &context, const string &catalog, const string &schema, const string &table);
45
+ LogicalUpdate(ClientContext &context, const unique_ptr<CreateInfo> &table_info);
46
46
  };
47
47
  } // namespace duckdb
@@ -144,7 +144,7 @@ public:
144
144
  //! Revert a set of appends made by the given AppendState, used to revert appends in the event of an error during
145
145
  //! commit (e.g. because of an I/O exception)
146
146
  void RevertAppend(idx_t start_row, idx_t count);
147
- void RevertAppendInternal(idx_t start_row, idx_t count);
147
+ void RevertAppendInternal(idx_t start_row);
148
148
 
149
149
  void ScanTableSegment(idx_t start_row, idx_t count, const std::function<void(DataChunk &chunk)> &function);
150
150
 
@@ -73,7 +73,7 @@ public:
73
73
  //! FinalizeAppend flushes an append with a variable number of rows.
74
74
  void FinalizeAppend(TransactionData transaction, TableAppendState &state);
75
75
  void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count);
76
- void RevertAppendInternal(idx_t start_row, idx_t count);
76
+ void RevertAppendInternal(idx_t start_row);
77
77
 
78
78
  void MergeStorage(RowGroupCollection &data);
79
79
 
@@ -177,7 +177,7 @@ void DBConfig::SetOptionByName(const string &name, const Value &value) {
177
177
  void DBConfig::SetOption(DatabaseInstance *db, const ConfigurationOption &option, const Value &value) {
178
178
  lock_guard<mutex> l(config_lock);
179
179
  if (!option.set_global) {
180
- throw InternalException("Could not set option \"%s\" as a global option", option.name);
180
+ throw InvalidInputException("Could not set option \"%s\" as a global option", option.name);
181
181
  }
182
182
  D_ASSERT(option.reset_global);
183
183
  Value input = value.DefaultCastAs(option.parameter_type);
@@ -130,7 +130,12 @@ shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other, const str
130
130
  JoinRefType ref_type) {
131
131
  auto expression_list = Parser::ParseExpressionList(condition, context.GetContext()->GetParserOptions());
132
132
  D_ASSERT(!expression_list.empty());
133
+ return Join(other, std::move(expression_list), type, ref_type);
134
+ }
133
135
 
136
+ shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other,
137
+ vector<unique_ptr<ParsedExpression>> expression_list, JoinType type,
138
+ JoinRefType ref_type) {
134
139
  if (expression_list.size() > 1 || expression_list[0]->type == ExpressionType::COLUMN_REF) {
135
140
  // multiple columns or single column ref: the condition is a USING list
136
141
  vector<string> using_columns;
@@ -197,6 +202,11 @@ shared_ptr<Relation> Relation::Aggregate(const vector<string> &aggregates, const
197
202
  return this->Aggregate(aggregate_list, group_list);
198
203
  }
199
204
 
205
+ shared_ptr<Relation> Relation::Aggregate(vector<unique_ptr<ParsedExpression>> expressions, const string &group_list) {
206
+ auto groups = Parser::ParseGroupByList(group_list, context.GetContext()->GetParserOptions());
207
+ return make_shared<AggregateRelation>(shared_from_this(), std::move(expressions), std::move(groups));
208
+ }
209
+
200
210
  string Relation::GetAlias() {
201
211
  return "relation";
202
212
  }
@@ -70,9 +70,6 @@ unique_ptr<Expression> DatePartSimplificationRule::Apply(LogicalOperator &op, ve
70
70
  case DatePartSpecifier::DOY:
71
71
  new_function_name = "dayofyear";
72
72
  break;
73
- case DatePartSpecifier::EPOCH:
74
- new_function_name = "epoch";
75
- break;
76
73
  case DatePartSpecifier::MICROSECONDS:
77
74
  new_function_name = "microsecond";
78
75
  break;
@@ -1,25 +1,29 @@
1
1
  #include "duckdb/common/limits.hpp"
2
2
  #include "duckdb/common/string_util.hpp"
3
3
  #include "duckdb/execution/expression_executor.hpp"
4
+ #include "duckdb/function/aggregate/distributive_functions.hpp"
5
+ #include "duckdb/function/function_binder.hpp"
4
6
  #include "duckdb/main/config.hpp"
5
7
  #include "duckdb/parser/expression/columnref_expression.hpp"
6
8
  #include "duckdb/parser/expression/comparison_expression.hpp"
9
+ #include "duckdb/parser/expression/conjunction_expression.hpp"
7
10
  #include "duckdb/parser/expression/constant_expression.hpp"
8
- #include "duckdb/parser/expression/subquery_expression.hpp"
9
11
  #include "duckdb/parser/expression/star_expression.hpp"
12
+ #include "duckdb/parser/expression/subquery_expression.hpp"
10
13
  #include "duckdb/parser/query_node/select_node.hpp"
11
14
  #include "duckdb/parser/tableref/joinref.hpp"
12
15
  #include "duckdb/planner/binder.hpp"
16
+ #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
13
17
  #include "duckdb/planner/expression_binder/column_alias_binder.hpp"
14
18
  #include "duckdb/planner/expression_binder/constant_binder.hpp"
15
19
  #include "duckdb/planner/expression_binder/group_binder.hpp"
16
20
  #include "duckdb/planner/expression_binder/having_binder.hpp"
17
- #include "duckdb/planner/expression_binder/qualify_binder.hpp"
18
21
  #include "duckdb/planner/expression_binder/order_binder.hpp"
22
+ #include "duckdb/planner/expression_binder/qualify_binder.hpp"
19
23
  #include "duckdb/planner/expression_binder/select_binder.hpp"
20
24
  #include "duckdb/planner/expression_binder/where_binder.hpp"
25
+ #include "duckdb/planner/expression_iterator.hpp"
21
26
  #include "duckdb/planner/query_node/bound_select_node.hpp"
22
- #include "duckdb/parser/expression/conjunction_expression.hpp"
23
27
 
24
28
  namespace duckdb {
25
29
 
@@ -380,10 +384,28 @@ unique_ptr<BoundQueryNode> Binder::BindSelectNode(SelectNode &statement, unique_
380
384
  auto bound_expr = group_binder.Bind(group_expressions[i], &group_type);
381
385
  D_ASSERT(bound_expr->return_type.id() != LogicalTypeId::INVALID);
382
386
 
387
+ // find out whether the expression contains a subquery, it can't be copied if so
388
+ auto &bound_expr_ref = *bound_expr;
389
+ bool contains_subquery = bound_expr_ref.HasSubquery();
390
+
383
391
  // push a potential collation, if necessary
384
- bound_expr = ExpressionBinder::PushCollation(context, std::move(bound_expr),
385
- StringType::GetCollation(group_type), true);
386
- result->groups.group_expressions.push_back(std::move(bound_expr));
392
+ auto collated_expr = ExpressionBinder::PushCollation(context, std::move(bound_expr),
393
+ StringType::GetCollation(group_type), true);
394
+ if (!contains_subquery && !collated_expr->Equals(bound_expr_ref)) {
395
+ // if there is a collation on a group x, we should group by the collated expr,
396
+ // but also push a first(x) aggregate in case x is selected (uncollated)
397
+ info.collated_groups[i] = result->aggregates.size();
398
+
399
+ auto first_fun = FirstFun::GetFunction(LogicalType::VARCHAR);
400
+ vector<unique_ptr<Expression>> first_children;
401
+ // FIXME: would be better to just refer to this expression, but for now we copy
402
+ first_children.push_back(bound_expr_ref.Copy());
403
+
404
+ FunctionBinder function_binder(context);
405
+ auto function = function_binder.BindAggregateFunction(first_fun, std::move(first_children));
406
+ result->aggregates.push_back(std::move(function));
407
+ }
408
+ result->groups.group_expressions.push_back(std::move(collated_expr));
387
409
 
388
410
  // in the unbound expression we DO bind the table names of any ColumnRefs
389
411
  // we do this to make sure that "table.a" and "a" are treated the same
@@ -39,6 +39,9 @@ BoundStatement Binder::Bind(DropStatement &stmt) {
39
39
  if (!entry) {
40
40
  break;
41
41
  }
42
+ if (entry->internal) {
43
+ throw CatalogException("Cannot drop internal catalog entry \"%s\"!", entry->name);
44
+ }
42
45
  stmt.info->catalog = entry->ParentCatalog().GetName();
43
46
  if (!entry->temporary) {
44
47
  // we can only drop temporary tables in read-only mode
@@ -51,6 +51,7 @@ void LogicalComparisonJoin::ExtractJoinConditions(
51
51
  unique_ptr<LogicalOperator> &right_child, const unordered_set<idx_t> &left_bindings,
52
52
  const unordered_set<idx_t> &right_bindings, vector<unique_ptr<Expression>> &expressions,
53
53
  vector<JoinCondition> &conditions, vector<unique_ptr<Expression>> &arbitrary_expressions) {
54
+
54
55
  for (auto &expr : expressions) {
55
56
  auto total_side = JoinSide::GetJoinSide(*expr, left_bindings, right_bindings);
56
57
  if (total_side != JoinSide::BOTH) {
@@ -77,10 +78,17 @@ void LogicalComparisonJoin::ExtractJoinConditions(
77
78
  continue;
78
79
  }
79
80
  }
80
- } else if ((expr->type >= ExpressionType::COMPARE_EQUAL &&
81
- expr->type <= ExpressionType::COMPARE_GREATERTHANOREQUALTO) ||
82
- expr->type == ExpressionType::COMPARE_DISTINCT_FROM ||
83
- expr->type == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
81
+ } else if (expr->type == ExpressionType::COMPARE_EQUAL || expr->type == ExpressionType::COMPARE_NOTEQUAL ||
82
+ expr->type == ExpressionType::COMPARE_BOUNDARY_START ||
83
+ expr->type == ExpressionType::COMPARE_LESSTHAN ||
84
+ expr->type == ExpressionType::COMPARE_GREATERTHAN ||
85
+ expr->type == ExpressionType::COMPARE_LESSTHANOREQUALTO ||
86
+ expr->type == ExpressionType::COMPARE_GREATERTHANOREQUALTO ||
87
+ expr->type == ExpressionType::COMPARE_BOUNDARY_START ||
88
+ expr->type == ExpressionType::COMPARE_NOT_DISTINCT_FROM ||
89
+ expr->type == ExpressionType::COMPARE_DISTINCT_FROM)
90
+
91
+ {
84
92
  // comparison, check if we can create a comparison JoinCondition
85
93
  if (CreateJoinCondition(*expr, left_bindings, right_bindings, conditions)) {
86
94
  // successfully created the join condition
@@ -1,15 +1,15 @@
1
1
  #include "duckdb/planner/expression_binder/base_select_binder.hpp"
2
2
 
3
+ #include "duckdb/common/string_util.hpp"
3
4
  #include "duckdb/parser/expression/columnref_expression.hpp"
5
+ #include "duckdb/parser/expression/operator_expression.hpp"
4
6
  #include "duckdb/parser/expression/window_expression.hpp"
5
7
  #include "duckdb/parser/parsed_expression_iterator.hpp"
8
+ #include "duckdb/planner/binder.hpp"
6
9
  #include "duckdb/planner/expression/bound_columnref_expression.hpp"
7
10
  #include "duckdb/planner/expression/bound_window_expression.hpp"
8
11
  #include "duckdb/planner/expression_binder/aggregate_binder.hpp"
9
12
  #include "duckdb/planner/query_node/bound_select_node.hpp"
10
- #include "duckdb/parser/expression/operator_expression.hpp"
11
- #include "duckdb/common/string_util.hpp"
12
- #include "duckdb/planner/binder.hpp"
13
13
 
14
14
  namespace duckdb {
15
15
 
@@ -138,9 +138,17 @@ BindResult BaseSelectBinder::BindGroupingFunction(OperatorExpression &op, idx_t
138
138
  }
139
139
 
140
140
  BindResult BaseSelectBinder::BindGroup(ParsedExpression &expr, idx_t depth, idx_t group_index) {
141
- auto &group = node.groups.group_expressions[group_index];
142
- return BindResult(make_uniq<BoundColumnRefExpression>(expr.GetName(), group->return_type,
143
- ColumnBinding(node.group_index, group_index), depth));
141
+ auto it = info.collated_groups.find(group_index);
142
+ if (it != info.collated_groups.end()) {
143
+ // This is an implicitly collated group, so we need to refer to the first() aggregate
144
+ const auto &aggr_index = it->second;
145
+ return BindResult(make_uniq<BoundColumnRefExpression>(expr.GetName(), node.aggregates[aggr_index]->return_type,
146
+ ColumnBinding(node.aggregate_index, aggr_index), depth));
147
+ } else {
148
+ auto &group = node.groups.group_expressions[group_index];
149
+ return BindResult(make_uniq<BoundColumnRefExpression>(expr.GetName(), group->return_type,
150
+ ColumnBinding(node.group_index, group_index), depth));
151
+ }
144
152
  }
145
153
 
146
154
  bool BaseSelectBinder::QualifyColumnAlias(const ColumnRefExpression &colref) {
@@ -6,9 +6,9 @@ LogicalCreateTable::LogicalCreateTable(SchemaCatalogEntry &schema, unique_ptr<Bo
6
6
  : LogicalOperator(LogicalOperatorType::LOGICAL_CREATE_TABLE), schema(schema), info(std::move(info)) {
7
7
  }
8
8
 
9
- LogicalCreateTable::LogicalCreateTable(ClientContext &context, const string &catalog, const string &schema,
10
- unique_ptr<CreateInfo> unbound_info)
11
- : LogicalOperator(LogicalOperatorType::LOGICAL_CREATE_TABLE), schema(Catalog::GetSchema(context, catalog, schema)) {
9
+ LogicalCreateTable::LogicalCreateTable(ClientContext &context, unique_ptr<CreateInfo> unbound_info)
10
+ : LogicalOperator(LogicalOperatorType::LOGICAL_CREATE_TABLE),
11
+ schema(Catalog::GetSchema(context, unbound_info->catalog, unbound_info->schema)) {
12
12
  D_ASSERT(unbound_info->type == CatalogType::TABLE_ENTRY);
13
13
  auto binder = Binder::CreateBinder(context);
14
14
  info = binder->BindCreateTableInfo(unique_ptr_cast<CreateInfo, CreateTableInfo>(std::move(unbound_info)));
@@ -11,9 +11,10 @@ LogicalDelete::LogicalDelete(TableCatalogEntry &table, idx_t table_index)
11
11
  return_chunk(false) {
12
12
  }
13
13
 
14
- LogicalDelete::LogicalDelete(ClientContext &context, const string &catalog, const string &schema, const string &table)
14
+ LogicalDelete::LogicalDelete(ClientContext &context, const unique_ptr<CreateInfo> &table_info)
15
15
  : LogicalOperator(LogicalOperatorType::LOGICAL_DELETE),
16
- table(Catalog::GetEntry<TableCatalogEntry>(context, catalog, schema, table)) {
16
+ table(Catalog::GetEntry<TableCatalogEntry>(context, table_info->catalog, table_info->schema,
17
+ dynamic_cast<CreateTableInfo &>(*table_info).table)) {
17
18
  }
18
19
 
19
20
  idx_t LogicalDelete::EstimateCardinality(ClientContext &context) {
@@ -11,9 +11,10 @@ LogicalInsert::LogicalInsert(TableCatalogEntry &table, idx_t table_index)
11
11
  action_type(OnConflictAction::THROW) {
12
12
  }
13
13
 
14
- LogicalInsert::LogicalInsert(ClientContext &context, const string &catalog, const string &schema, const string &table)
14
+ LogicalInsert::LogicalInsert(ClientContext &context, const unique_ptr<CreateInfo> table_info)
15
15
  : LogicalOperator(LogicalOperatorType::LOGICAL_INSERT),
16
- table(Catalog::GetEntry<TableCatalogEntry>(context, catalog, schema, table)) {
16
+ table(Catalog::GetEntry<TableCatalogEntry>(context, table_info->catalog, table_info->schema,
17
+ dynamic_cast<CreateTableInfo &>(*table_info).table)) {
17
18
  }
18
19
 
19
20
  idx_t LogicalInsert::EstimateCardinality(ClientContext &context) {
@@ -9,9 +9,10 @@ LogicalUpdate::LogicalUpdate(TableCatalogEntry &table)
9
9
  : LogicalOperator(LogicalOperatorType::LOGICAL_UPDATE), table(table), table_index(0), return_chunk(false) {
10
10
  }
11
11
 
12
- LogicalUpdate::LogicalUpdate(ClientContext &context, const string &catalog, const string &schema, const string &table)
12
+ LogicalUpdate::LogicalUpdate(ClientContext &context, const unique_ptr<CreateInfo> &table_info)
13
13
  : LogicalOperator(LogicalOperatorType::LOGICAL_UPDATE),
14
- table(Catalog::GetEntry<TableCatalogEntry>(context, catalog, schema, table)) {
14
+ table(Catalog::GetEntry<TableCatalogEntry>(context, table_info->catalog, table_info->schema,
15
+ dynamic_cast<CreateTableInfo &>(*table_info).table)) {
15
16
  }
16
17
 
17
18
  idx_t LogicalUpdate::EstimateCardinality(ClientContext &context) {
@@ -449,11 +449,10 @@ void ValidityRevertAppend(ColumnSegment &segment, idx_t start_row) {
449
449
  if (start_bit % 8 != 0) {
450
450
  // handle sub-bit stuff (yay)
451
451
  idx_t byte_pos = start_bit / 8;
452
- idx_t bit_start = byte_pos * 8;
453
452
  idx_t bit_end = (byte_pos + 1) * 8;
454
- ValidityMask mask(reinterpret_cast<validity_t *>(handle.Ptr() + byte_pos));
453
+ ValidityMask mask(reinterpret_cast<validity_t *>(handle.Ptr()));
455
454
  for (idx_t i = start_bit; i < bit_end; i++) {
456
- mask.SetValid(i - bit_start);
455
+ mask.SetValid(i);
457
456
  }
458
457
  revert_start = bit_end / 8;
459
458
  } else {