duckdb 0.8.2-dev4653.0 → 0.8.2-dev4871.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/binding.gyp +0 -1
  2. package/binding.gyp.in +0 -1
  3. package/package.json +1 -1
  4. package/src/connection.cpp +10 -23
  5. package/src/data_chunk.cpp +1 -3
  6. package/src/database.cpp +4 -9
  7. package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
  8. package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
  9. package/src/duckdb/extension/json/json_functions.cpp +4 -6
  10. package/src/duckdb/src/common/enum_util.cpp +10 -5
  11. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  12. package/src/duckdb/src/common/row_operations/row_matcher.cpp +408 -0
  13. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
  14. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +35 -17
  15. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
  16. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
  17. package/src/duckdb/src/core_functions/function_list.cpp +1 -1
  18. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
  19. package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
  20. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
  21. package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
  22. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
  23. package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
  24. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -3
  25. package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +5 -1
  26. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +18 -9
  27. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +11 -27
  28. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +1 -2
  29. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +4 -0
  30. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +11 -2
  31. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +8 -8
  32. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +7 -6
  33. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +27 -6
  34. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
  35. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
  36. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +49 -41
  37. package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
  38. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
  39. package/src/duckdb/src/function/function_binder.cpp +10 -9
  40. package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
  41. package/src/duckdb/src/function/table/read_csv.cpp +12 -9
  42. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  43. package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
  44. package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
  45. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +8 -2
  46. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
  47. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
  48. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +1 -1
  49. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
  50. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
  51. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +4 -0
  52. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +1 -1
  53. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +4 -0
  54. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +2 -4
  55. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +3 -1
  56. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +1 -0
  58. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +1 -2
  59. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +3 -0
  60. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
  61. package/src/duckdb/src/main/config.cpp +1 -1
  62. package/src/duckdb/src/main/query_result.cpp +16 -10
  63. package/src/duckdb/src/main/relation.cpp +10 -0
  64. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
  65. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
  66. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
  67. package/src/duckdb/src/storage/data_table.cpp +10 -0
  68. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +42 -44
  69. package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
  70. package/src/statement.cpp +2 -4
  71. package/test/database_fail.test.ts +6 -0
  72. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -65,7 +65,7 @@ public:
65
65
  //! returned by the JoinHashTable::Scan function and can be used to resume a
66
66
  //! probe.
67
67
  struct ScanStructure {
68
- unsafe_unique_array<UnifiedVectorFormat> key_data;
68
+ TupleDataChunkState &key_state;
69
69
  Vector pointers;
70
70
  idx_t count;
71
71
  SelectionVector sel_vector;
@@ -74,7 +74,7 @@ public:
74
74
  JoinHashTable &ht;
75
75
  bool finished;
76
76
 
77
- explicit ScanStructure(JoinHashTable &ht);
77
+ explicit ScanStructure(JoinHashTable &ht, TupleDataChunkState &key_state);
78
78
  //! Get the next batch of data from the scan structure
79
79
  void Next(DataChunk &keys, DataChunk &left, DataChunk &result);
80
80
 
@@ -130,7 +130,8 @@ public:
130
130
  //! ever called.
131
131
  void Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool parallel);
132
132
  //! Probe the HT with the given input chunk, resulting in the given result
133
- unique_ptr<ScanStructure> Probe(DataChunk &keys, Vector *precomputed_hashes = nullptr);
133
+ unique_ptr<ScanStructure> Probe(DataChunk &keys, TupleDataChunkState &key_state,
134
+ Vector *precomputed_hashes = nullptr);
134
135
  //! Scan the HT to construct the full outer join result
135
136
  void ScanFullOuter(JoinHTScanState &state, Vector &addresses, DataChunk &result);
136
137
 
@@ -166,6 +167,9 @@ public:
166
167
  vector<ExpressionType> predicates;
167
168
  //! Data column layout
168
169
  TupleDataLayout layout;
170
+ //! Efficiently matches rows
171
+ RowMatcher row_matcher;
172
+ RowMatcher row_matcher_no_match_sel;
169
173
  //! The size of an entry as stored in the HashTable
170
174
  idx_t entry_size;
171
175
  //! The total tuple size
@@ -201,7 +205,8 @@ public:
201
205
  } correlated_mark_join_info;
202
206
 
203
207
  private:
204
- unique_ptr<ScanStructure> InitializeScanStructure(DataChunk &keys, const SelectionVector *&current_sel);
208
+ unique_ptr<ScanStructure> InitializeScanStructure(DataChunk &keys, TupleDataChunkState &key_state,
209
+ const SelectionVector *&current_sel);
205
210
  void Hash(DataChunk &keys, const SelectionVector &sel, idx_t count, Vector &hashes);
206
211
 
207
212
  //! Apply a bitmask to the hashes
@@ -212,8 +217,8 @@ private:
212
217
  //! Insert the given set of locations into the HT with the given set of hashes
213
218
  void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[], bool parallel);
214
219
 
215
- idx_t PrepareKeys(DataChunk &keys, unsafe_unique_array<UnifiedVectorFormat> &key_data,
216
- const SelectionVector *&current_sel, SelectionVector &sel, bool build_side);
220
+ idx_t PrepareKeys(DataChunk &keys, vector<TupleDataVectorFormat> &vector_data, const SelectionVector *&current_sel,
221
+ SelectionVector &sel, bool build_side);
217
222
 
218
223
  //! Lock for combining data_collection when merging HTs
219
224
  mutex data_lock;
@@ -316,8 +321,9 @@ public:
316
321
  //! Build HT for the next partitioned probe round
317
322
  bool PrepareExternalFinalize();
318
323
  //! Probe whatever we can, sink the rest into a thread-local HT
319
- unique_ptr<ScanStructure> ProbeAndSpill(DataChunk &keys, DataChunk &payload, ProbeSpill &probe_spill,
320
- ProbeSpillLocalAppendState &spill_state, DataChunk &spill_chunk);
324
+ unique_ptr<ScanStructure> ProbeAndSpill(DataChunk &keys, TupleDataChunkState &key_state, DataChunk &payload,
325
+ ProbeSpill &probe_spill, ProbeSpillLocalAppendState &spill_state,
326
+ DataChunk &spill_chunk);
321
327
 
322
328
  private:
323
329
  //! First and last partition of the current probe round
@@ -78,6 +78,10 @@ public:
78
78
  return line_error + 1;
79
79
  };
80
80
 
81
+ virtual void Increment(idx_t buffer_idx) {
82
+ return;
83
+ }
84
+
81
85
  //! Initialize projection indices to select all columns
82
86
  void InitializeProjection();
83
87
 
@@ -89,7 +89,7 @@ public:
89
89
  private:
90
90
  ClientContext &context;
91
91
  //! Actual size can be smaller than the buffer size in case we allocate it too optimistically.
92
- idx_t file_size;
92
+ idx_t actual_buffer_size;
93
93
  //! We need to check for Byte Order Mark, to define the start position of this buffer
94
94
  //! https://en.wikipedia.org/wiki/Byte_order_mark#UTF-8
95
95
  idx_t start_position = 0;
@@ -20,10 +20,14 @@ public:
20
20
  //! Return the 1-indexed line number
21
21
  idx_t GetLine(idx_t batch_idx, idx_t line_error = 0, idx_t file_idx = 0, idx_t cur_start = 0, bool verify = true,
22
22
  bool stop_at_first = true);
23
+ //! In case an error happened we have to increment the lines read of that batch
24
+ void Increment(idx_t file_idx, idx_t batch_idx);
23
25
  //! Verify if the CSV File was read correctly from [0,batch_idx] batches.
24
26
  void Verify(idx_t file_idx, idx_t batch_idx, idx_t cur_first_pos);
25
27
  //! Lines read per batch, <file_index, <batch_index, count>>
26
28
  vector<unordered_map<idx_t, idx_t>> lines_read;
29
+ //! Lines read per batch, <file_index, <batch_index, count>>
30
+ vector<unordered_map<idx_t, idx_t>> lines_errored;
27
31
  //! Set of batches that have been initialized but are not yet finished.
28
32
  vector<set<idx_t>> current_batches;
29
33
  //! Pointer to CSV Reader Mutex
@@ -126,12 +126,10 @@ struct CSVReaderOptions {
126
126
  bool normalize_names = false;
127
127
  //! True, if column with that index must skip null check
128
128
  vector<bool> force_not_null;
129
+ //! Number of sample chunks used in auto-detection
130
+ idx_t sample_size_chunks = 20480 / STANDARD_VECTOR_SIZE;
129
131
  //! Consider all columns to be of type varchar
130
132
  bool all_varchar = false;
131
- //! Size of sample chunk used for dialect and type detection
132
- idx_t sample_chunk_size = STANDARD_VECTOR_SIZE;
133
- //! Number of sample chunks used for type detection
134
- idx_t sample_chunks = 10;
135
133
  //! Whether or not to automatically detect dialect and datatypes
136
134
  bool auto_detect = false;
137
135
  //! The file path of the CSV file to read
@@ -28,7 +28,7 @@ struct SnifferResult {
28
28
  class CSVSniffer {
29
29
  public:
30
30
  explicit CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager> buffer_manager_p,
31
- CSVStateMachineCache &state_machine_cache);
31
+ CSVStateMachineCache &state_machine_cache, bool explicit_set_columns = false);
32
32
 
33
33
  //! Main method that sniffs the CSV file, returns the types, names and options as a result
34
34
  //! CSV Sniffing consists of five steps:
@@ -110,6 +110,8 @@ private:
110
110
  //! ------------------------------------------------------//
111
111
  void DetectHeader();
112
112
  vector<string> names;
113
+ //! If Column Names and Types have been explicitly set
114
+ const bool explicit_set_columns;
113
115
 
114
116
  //! ------------------------------------------------------//
115
117
  //! ------------------ Type Replacement ----------------- //
@@ -13,7 +13,7 @@
13
13
  #include "duckdb/execution/operator/scan/csv/quote_rules.hpp"
14
14
 
15
15
  namespace duckdb {
16
- static constexpr uint32_t NUM_STATES = 8;
16
+ static constexpr uint32_t NUM_STATES = 9;
17
17
  static constexpr uint32_t NUM_TRANSITIONS = 256;
18
18
  typedef uint8_t state_machine_t[NUM_STATES][NUM_TRANSITIONS];
19
19
 
@@ -134,6 +134,7 @@ public:
134
134
  void ParseCSV(DataChunk &insert_chunk);
135
135
 
136
136
  idx_t GetLineError(idx_t line_error, idx_t buffer_idx, bool stop_at_first = true) override;
137
+ void Increment(idx_t buffer_idx) override;
137
138
 
138
139
  private:
139
140
  //! Initialize Parser
@@ -51,13 +51,12 @@ public:
51
51
  OperatorSourceInput &input) const;
52
52
 
53
53
  const TupleDataLayout &GetLayout() const;
54
- idx_t Count(GlobalSinkState &sink) const;
54
+ idx_t NumberOfPartitions(GlobalSinkState &sink) const;
55
55
  static void SetMultiScan(GlobalSinkState &sink);
56
56
 
57
57
  private:
58
58
  void SetGroupingValues();
59
59
  void PopulateGroupChunk(DataChunk &group_chunk, DataChunk &input_chunk) const;
60
- idx_t CountInternal(GlobalSinkState &sink) const;
61
60
 
62
61
  TupleDataLayout layout;
63
62
  };
@@ -240,6 +240,7 @@ static constexpr ExtensionEntry EXTENSION_FILE_CONTAINS[] = {{".parquet?", "parq
240
240
 
241
241
  static constexpr const char *AUTOLOADABLE_EXTENSIONS[] = {
242
242
  // "azure",
243
+ "arrow",
243
244
  "aws",
244
245
  "autocomplete",
245
246
  "excel",
@@ -249,7 +250,9 @@ static constexpr const char *AUTOLOADABLE_EXTENSIONS[] = {
249
250
  // "icu",
250
251
  "json",
251
252
  "parquet",
253
+ "postgres_scanner",
252
254
  "sqlsmith",
255
+ "sqlite_scanner",
253
256
  "tpcds",
254
257
  "tpch",
255
258
  "visualizer",
@@ -103,6 +103,8 @@ public:
103
103
  // JOIN operation
104
104
  DUCKDB_API shared_ptr<Relation> Join(const shared_ptr<Relation> &other, const string &condition,
105
105
  JoinType type = JoinType::INNER, JoinRefType ref_type = JoinRefType::REGULAR);
106
+ shared_ptr<Relation> Join(const shared_ptr<Relation> &other, vector<unique_ptr<ParsedExpression>> condition,
107
+ JoinType type = JoinType::INNER, JoinRefType ref_type = JoinRefType::REGULAR);
106
108
 
107
109
  // CROSS PRODUCT operation
108
110
  DUCKDB_API shared_ptr<Relation> CrossProduct(const shared_ptr<Relation> &other,
@@ -121,6 +123,8 @@ public:
121
123
  DUCKDB_API shared_ptr<Relation> Aggregate(const vector<string> &aggregates);
122
124
  DUCKDB_API shared_ptr<Relation> Aggregate(const string &aggregate_list, const string &group_list);
123
125
  DUCKDB_API shared_ptr<Relation> Aggregate(const vector<string> &aggregates, const vector<string> &groups);
126
+ DUCKDB_API shared_ptr<Relation> Aggregate(vector<unique_ptr<ParsedExpression>> expressions,
127
+ const string &group_list);
124
128
 
125
129
  // ALIAS
126
130
  DUCKDB_API shared_ptr<Relation> Alias(const string &alias);
@@ -177,7 +177,7 @@ void DBConfig::SetOptionByName(const string &name, const Value &value) {
177
177
  void DBConfig::SetOption(DatabaseInstance *db, const ConfigurationOption &option, const Value &value) {
178
178
  lock_guard<mutex> l(config_lock);
179
179
  if (!option.set_global) {
180
- throw InternalException("Could not set option \"%s\" as a global option", option.name);
180
+ throw InvalidInputException("Could not set option \"%s\" as a global option", option.name);
181
181
  }
182
182
  D_ASSERT(option.reset_global);
183
183
  Value input = value.DefaultCastAs(option.parameter_type);
@@ -1,8 +1,9 @@
1
1
  #include "duckdb/main/query_result.hpp"
2
+
3
+ #include "duckdb/common/box_renderer.hpp"
2
4
  #include "duckdb/common/printer.hpp"
3
5
  #include "duckdb/common/vector.hpp"
4
6
  #include "duckdb/main/client_context.hpp"
5
- #include "duckdb/common/box_renderer.hpp"
6
7
  namespace duckdb {
7
8
 
8
9
  BaseQueryResult::BaseQueryResult(QueryResultType type, StatementType statement_type, StatementProperties properties_p,
@@ -100,9 +101,17 @@ bool QueryResult::Equals(QueryResult &other) { // LCOV_EXCL_START
100
101
  }
101
102
  // now compare the actual values
102
103
  // fetch chunks
104
+ unique_ptr<DataChunk> lchunk, rchunk;
105
+ idx_t lindex = 0, rindex = 0;
103
106
  while (true) {
104
- auto lchunk = Fetch();
105
- auto rchunk = other.Fetch();
107
+ if (!lchunk || lindex == lchunk->size()) {
108
+ lchunk = Fetch();
109
+ lindex = 0;
110
+ }
111
+ if (!rchunk || rindex == rchunk->size()) {
112
+ rchunk = other.Fetch();
113
+ rindex = 0;
114
+ }
106
115
  if (!lchunk && !rchunk) {
107
116
  return true;
108
117
  }
@@ -112,14 +121,11 @@ bool QueryResult::Equals(QueryResult &other) { // LCOV_EXCL_START
112
121
  if (lchunk->size() == 0 && rchunk->size() == 0) {
113
122
  return true;
114
123
  }
115
- if (lchunk->size() != rchunk->size()) {
116
- return false;
117
- }
118
124
  D_ASSERT(lchunk->ColumnCount() == rchunk->ColumnCount());
119
- for (idx_t col = 0; col < rchunk->ColumnCount(); col++) {
120
- for (idx_t row = 0; row < rchunk->size(); row++) {
121
- auto lvalue = lchunk->GetValue(col, row);
122
- auto rvalue = rchunk->GetValue(col, row);
125
+ for (; lindex < lchunk->size() && rindex < rchunk->size(); lindex++, rindex++) {
126
+ for (idx_t col = 0; col < rchunk->ColumnCount(); col++) {
127
+ auto lvalue = lchunk->GetValue(col, lindex);
128
+ auto rvalue = rchunk->GetValue(col, rindex);
123
129
  if (lvalue.IsNull() && rvalue.IsNull()) {
124
130
  continue;
125
131
  }
@@ -130,7 +130,12 @@ shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other, const str
130
130
  JoinRefType ref_type) {
131
131
  auto expression_list = Parser::ParseExpressionList(condition, context.GetContext()->GetParserOptions());
132
132
  D_ASSERT(!expression_list.empty());
133
+ return Join(other, std::move(expression_list), type, ref_type);
134
+ }
133
135
 
136
+ shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other,
137
+ vector<unique_ptr<ParsedExpression>> expression_list, JoinType type,
138
+ JoinRefType ref_type) {
134
139
  if (expression_list.size() > 1 || expression_list[0]->type == ExpressionType::COLUMN_REF) {
135
140
  // multiple columns or single column ref: the condition is a USING list
136
141
  vector<string> using_columns;
@@ -197,6 +202,11 @@ shared_ptr<Relation> Relation::Aggregate(const vector<string> &aggregates, const
197
202
  return this->Aggregate(aggregate_list, group_list);
198
203
  }
199
204
 
205
+ shared_ptr<Relation> Relation::Aggregate(vector<unique_ptr<ParsedExpression>> expressions, const string &group_list) {
206
+ auto groups = Parser::ParseGroupByList(group_list, context.GetContext()->GetParserOptions());
207
+ return make_shared<AggregateRelation>(shared_from_this(), std::move(expressions), std::move(groups));
208
+ }
209
+
200
210
  string Relation::GetAlias() {
201
211
  return "relation";
202
212
  }
@@ -70,9 +70,6 @@ unique_ptr<Expression> DatePartSimplificationRule::Apply(LogicalOperator &op, ve
70
70
  case DatePartSpecifier::DOY:
71
71
  new_function_name = "dayofyear";
72
72
  break;
73
- case DatePartSpecifier::EPOCH:
74
- new_function_name = "epoch";
75
- break;
76
73
  case DatePartSpecifier::MICROSECONDS:
77
74
  new_function_name = "microsecond";
78
75
  break;
@@ -51,6 +51,7 @@ void LogicalComparisonJoin::ExtractJoinConditions(
51
51
  unique_ptr<LogicalOperator> &right_child, const unordered_set<idx_t> &left_bindings,
52
52
  const unordered_set<idx_t> &right_bindings, vector<unique_ptr<Expression>> &expressions,
53
53
  vector<JoinCondition> &conditions, vector<unique_ptr<Expression>> &arbitrary_expressions) {
54
+
54
55
  for (auto &expr : expressions) {
55
56
  auto total_side = JoinSide::GetJoinSide(*expr, left_bindings, right_bindings);
56
57
  if (total_side != JoinSide::BOTH) {
@@ -77,10 +78,17 @@ void LogicalComparisonJoin::ExtractJoinConditions(
77
78
  continue;
78
79
  }
79
80
  }
80
- } else if ((expr->type >= ExpressionType::COMPARE_EQUAL &&
81
- expr->type <= ExpressionType::COMPARE_GREATERTHANOREQUALTO) ||
82
- expr->type == ExpressionType::COMPARE_DISTINCT_FROM ||
83
- expr->type == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
81
+ } else if (expr->type == ExpressionType::COMPARE_EQUAL || expr->type == ExpressionType::COMPARE_NOTEQUAL ||
82
+ expr->type == ExpressionType::COMPARE_BOUNDARY_START ||
83
+ expr->type == ExpressionType::COMPARE_LESSTHAN ||
84
+ expr->type == ExpressionType::COMPARE_GREATERTHAN ||
85
+ expr->type == ExpressionType::COMPARE_LESSTHANOREQUALTO ||
86
+ expr->type == ExpressionType::COMPARE_GREATERTHANOREQUALTO ||
87
+ expr->type == ExpressionType::COMPARE_BOUNDARY_START ||
88
+ expr->type == ExpressionType::COMPARE_NOT_DISTINCT_FROM ||
89
+ expr->type == ExpressionType::COMPARE_DISTINCT_FROM)
90
+
91
+ {
84
92
  // comparison, check if we can create a comparison JoinCondition
85
93
  if (CreateJoinCondition(*expr, left_bindings, right_bindings, conditions)) {
86
94
  // successfully created the join condition
@@ -449,11 +449,10 @@ void ValidityRevertAppend(ColumnSegment &segment, idx_t start_row) {
449
449
  if (start_bit % 8 != 0) {
450
450
  // handle sub-bit stuff (yay)
451
451
  idx_t byte_pos = start_bit / 8;
452
- idx_t bit_start = byte_pos * 8;
453
452
  idx_t bit_end = (byte_pos + 1) * 8;
454
- ValidityMask mask(reinterpret_cast<validity_t *>(handle.Ptr() + byte_pos));
453
+ ValidityMask mask(reinterpret_cast<validity_t *>(handle.Ptr()));
455
454
  for (idx_t i = start_bit; i < bit_end; i++) {
456
- mask.SetValid(i - bit_start);
455
+ mask.SetValid(i);
457
456
  }
458
457
  revert_start = bit_end / 8;
459
458
  } else {
@@ -832,6 +832,7 @@ void DataTable::RevertAppendInternal(idx_t start_row, idx_t count) {
832
832
  void DataTable::RevertAppend(idx_t start_row, idx_t count) {
833
833
  lock_guard<mutex> lock(append_lock);
834
834
 
835
+ // revert any appends to indexes
835
836
  if (!info->indexes.Empty()) {
836
837
  idx_t current_row_base = start_row;
837
838
  row_t row_data[STANDARD_VECTOR_SIZE];
@@ -847,6 +848,15 @@ void DataTable::RevertAppend(idx_t start_row, idx_t count) {
847
848
  current_row_base += chunk.size();
848
849
  });
849
850
  }
851
+
852
+ // we need to vacuum the indexes to remove any buffers that are now empty
853
+ // due to reverting the appends
854
+ info->indexes.Scan([&](Index &index) {
855
+ index.Vacuum();
856
+ return false;
857
+ });
858
+
859
+ // revert the data table append
850
860
  RevertAppendInternal(start_row, count);
851
861
  }
852
862
 
@@ -101,28 +101,27 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
101
101
  serializer.WriteProperty(111, "normalize_names", normalize_names);
102
102
  serializer.WriteProperty(112, "force_not_null", force_not_null);
103
103
  serializer.WriteProperty(113, "all_varchar", all_varchar);
104
- serializer.WriteProperty(114, "sample_chunk_size", sample_chunk_size);
105
- serializer.WriteProperty(115, "sample_chunks", sample_chunks);
106
- serializer.WriteProperty(116, "auto_detect", auto_detect);
107
- serializer.WriteProperty(117, "file_path", file_path);
108
- serializer.WriteProperty(118, "decimal_separator", decimal_separator);
109
- serializer.WriteProperty(119, "null_padding", null_padding);
110
- serializer.WriteProperty(120, "buffer_size", buffer_size);
111
- serializer.WriteProperty(121, "file_options", file_options);
112
- serializer.WriteProperty(122, "force_quote", force_quote);
113
- serializer.WriteProperty(123, "rejects_table_name", rejects_table_name);
114
- serializer.WriteProperty(124, "rejects_limit", rejects_limit);
115
- serializer.WriteProperty(125, "rejects_recovery_columns", rejects_recovery_columns);
116
- serializer.WriteProperty(126, "rejects_recovery_column_ids", rejects_recovery_column_ids);
117
- serializer.WriteProperty(127, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
118
- serializer.WriteProperty(128, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
119
- serializer.WriteProperty(129, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
120
- serializer.WriteProperty(130, "dialect_options.header", dialect_options.header);
121
- serializer.WriteProperty(131, "dialect_options.num_cols", dialect_options.num_cols);
122
- serializer.WriteProperty(132, "dialect_options.new_line", dialect_options.new_line);
123
- serializer.WriteProperty(133, "dialect_options.skip_rows", dialect_options.skip_rows);
124
- serializer.WriteProperty(134, "dialect_options.date_format", dialect_options.date_format);
125
- serializer.WriteProperty(135, "dialect_options.has_format", dialect_options.has_format);
104
+ serializer.WriteProperty(114, "sample_size_chunks", sample_size_chunks);
105
+ serializer.WriteProperty(115, "auto_detect", auto_detect);
106
+ serializer.WriteProperty(116, "file_path", file_path);
107
+ serializer.WriteProperty(117, "decimal_separator", decimal_separator);
108
+ serializer.WriteProperty(118, "null_padding", null_padding);
109
+ serializer.WriteProperty(119, "buffer_size", buffer_size);
110
+ serializer.WriteProperty(120, "file_options", file_options);
111
+ serializer.WriteProperty(121, "force_quote", force_quote);
112
+ serializer.WriteProperty(122, "rejects_table_name", rejects_table_name);
113
+ serializer.WriteProperty(123, "rejects_limit", rejects_limit);
114
+ serializer.WriteProperty(124, "rejects_recovery_columns", rejects_recovery_columns);
115
+ serializer.WriteProperty(125, "rejects_recovery_column_ids", rejects_recovery_column_ids);
116
+ serializer.WriteProperty(126, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
117
+ serializer.WriteProperty(127, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
118
+ serializer.WriteProperty(128, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
119
+ serializer.WriteProperty(129, "dialect_options.header", dialect_options.header);
120
+ serializer.WriteProperty(130, "dialect_options.num_cols", dialect_options.num_cols);
121
+ serializer.WriteProperty(131, "dialect_options.new_line", dialect_options.new_line);
122
+ serializer.WriteProperty(132, "dialect_options.skip_rows", dialect_options.skip_rows);
123
+ serializer.WriteProperty(133, "dialect_options.date_format", dialect_options.date_format);
124
+ serializer.WriteProperty(134, "dialect_options.has_format", dialect_options.has_format);
126
125
  }
127
126
 
128
127
  CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
@@ -141,28 +140,27 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
141
140
  deserializer.ReadProperty(111, "normalize_names", result.normalize_names);
142
141
  deserializer.ReadProperty(112, "force_not_null", result.force_not_null);
143
142
  deserializer.ReadProperty(113, "all_varchar", result.all_varchar);
144
- deserializer.ReadProperty(114, "sample_chunk_size", result.sample_chunk_size);
145
- deserializer.ReadProperty(115, "sample_chunks", result.sample_chunks);
146
- deserializer.ReadProperty(116, "auto_detect", result.auto_detect);
147
- deserializer.ReadProperty(117, "file_path", result.file_path);
148
- deserializer.ReadProperty(118, "decimal_separator", result.decimal_separator);
149
- deserializer.ReadProperty(119, "null_padding", result.null_padding);
150
- deserializer.ReadProperty(120, "buffer_size", result.buffer_size);
151
- deserializer.ReadProperty(121, "file_options", result.file_options);
152
- deserializer.ReadProperty(122, "force_quote", result.force_quote);
153
- deserializer.ReadProperty(123, "rejects_table_name", result.rejects_table_name);
154
- deserializer.ReadProperty(124, "rejects_limit", result.rejects_limit);
155
- deserializer.ReadProperty(125, "rejects_recovery_columns", result.rejects_recovery_columns);
156
- deserializer.ReadProperty(126, "rejects_recovery_column_ids", result.rejects_recovery_column_ids);
157
- deserializer.ReadProperty(127, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
158
- deserializer.ReadProperty(128, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
159
- deserializer.ReadProperty(129, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
160
- deserializer.ReadProperty(130, "dialect_options.header", result.dialect_options.header);
161
- deserializer.ReadProperty(131, "dialect_options.num_cols", result.dialect_options.num_cols);
162
- deserializer.ReadProperty(132, "dialect_options.new_line", result.dialect_options.new_line);
163
- deserializer.ReadProperty(133, "dialect_options.skip_rows", result.dialect_options.skip_rows);
164
- deserializer.ReadProperty(134, "dialect_options.date_format", result.dialect_options.date_format);
165
- deserializer.ReadProperty(135, "dialect_options.has_format", result.dialect_options.has_format);
143
+ deserializer.ReadProperty(114, "sample_size_chunks", result.sample_size_chunks);
144
+ deserializer.ReadProperty(115, "auto_detect", result.auto_detect);
145
+ deserializer.ReadProperty(116, "file_path", result.file_path);
146
+ deserializer.ReadProperty(117, "decimal_separator", result.decimal_separator);
147
+ deserializer.ReadProperty(118, "null_padding", result.null_padding);
148
+ deserializer.ReadProperty(119, "buffer_size", result.buffer_size);
149
+ deserializer.ReadProperty(120, "file_options", result.file_options);
150
+ deserializer.ReadProperty(121, "force_quote", result.force_quote);
151
+ deserializer.ReadProperty(122, "rejects_table_name", result.rejects_table_name);
152
+ deserializer.ReadProperty(123, "rejects_limit", result.rejects_limit);
153
+ deserializer.ReadProperty(124, "rejects_recovery_columns", result.rejects_recovery_columns);
154
+ deserializer.ReadProperty(125, "rejects_recovery_column_ids", result.rejects_recovery_column_ids);
155
+ deserializer.ReadProperty(126, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
156
+ deserializer.ReadProperty(127, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
157
+ deserializer.ReadProperty(128, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
158
+ deserializer.ReadProperty(129, "dialect_options.header", result.dialect_options.header);
159
+ deserializer.ReadProperty(130, "dialect_options.num_cols", result.dialect_options.num_cols);
160
+ deserializer.ReadProperty(131, "dialect_options.new_line", result.dialect_options.new_line);
161
+ deserializer.ReadProperty(132, "dialect_options.skip_rows", result.dialect_options.skip_rows);
162
+ deserializer.ReadProperty(133, "dialect_options.date_format", result.dialect_options.date_format);
163
+ deserializer.ReadProperty(134, "dialect_options.has_format", result.dialect_options.has_format);
166
164
  return result;
167
165
  }
168
166
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  #include "src/common/row_operations/row_gather.cpp"
6
6
 
7
- #include "src/common/row_operations/row_match.cpp"
7
+ #include "src/common/row_operations/row_matcher.cpp"
8
8
 
9
9
  #include "src/common/row_operations/row_external.cpp"
10
10
 
package/src/statement.cpp CHANGED
@@ -93,11 +93,9 @@ Statement::Statement(const Napi::CallbackInfo &info) : Napi::ObjectWrap<Statemen
93
93
  int length = info.Length();
94
94
 
95
95
  if (length <= 0 || !Connection::HasInstance(info[0])) {
96
- Napi::TypeError::New(env, "Connection object expected").ThrowAsJavaScriptException();
97
- return;
96
+ throw Napi::TypeError::New(env, "Connection object expected");
98
97
  } else if (length <= 1 || !info[1].IsString()) {
99
- Napi::TypeError::New(env, "SQL query expected").ThrowAsJavaScriptException();
100
- return;
98
+ throw Napi::TypeError::New(env, "SQL query expected");
101
99
  }
102
100
 
103
101
  connection_ref = Napi::ObjectWrap<Connection>::Unwrap(info[0].As<Napi::Object>());
@@ -2,6 +2,7 @@ import * as sqlite3 from '..';
2
2
  import * as assert from 'assert';
3
3
  import {DuckDbError, RowData} from "..";
4
4
  import {Worker} from 'worker_threads';
5
+ import {expect} from 'chai';
5
6
 
6
7
  describe('error handling', function() {
7
8
  var db: sqlite3.Database;
@@ -163,4 +164,9 @@ describe('error handling', function() {
163
164
  await run_worker(); // first should always succeed
164
165
  await run_worker(); // second fails without thread safety
165
166
  })
167
+
168
+ it("shouldn't crash on an exception", () => {
169
+ expect(() => new sqlite3.Database(':memory:', {file_search_path: '/'})).to.throw('Could not set option "file_search_path" as a global option');
170
+ });
166
171
  });
172
+