duckdb 0.8.2-dev4653.0 → 0.8.2-dev4871.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +0 -1
- package/binding.gyp.in +0 -1
- package/package.json +1 -1
- package/src/connection.cpp +10 -23
- package/src/data_chunk.cpp +1 -3
- package/src/database.cpp +4 -9
- package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
- package/src/duckdb/extension/json/json_functions.cpp +4 -6
- package/src/duckdb/src/common/enum_util.cpp +10 -5
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +408 -0
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +35 -17
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
- package/src/duckdb/src/core_functions/function_list.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
- package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
- package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -3
- package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +5 -1
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +18 -9
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +11 -27
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +1 -2
- package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +4 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +11 -2
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +8 -8
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +7 -6
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +27 -6
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +49 -41
- package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
- package/src/duckdb/src/function/function_binder.cpp +10 -9
- package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
- package/src/duckdb/src/function/table/read_csv.cpp +12 -9
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
- package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +8 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
- package/src/duckdb/src/main/config.cpp +1 -1
- package/src/duckdb/src/main/query_result.cpp +16 -10
- package/src/duckdb/src/main/relation.cpp +10 -0
- package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
- package/src/duckdb/src/storage/data_table.cpp +10 -0
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +42 -44
- package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
- package/src/statement.cpp +2 -4
- package/test/database_fail.test.ts +6 -0
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -65,7 +65,7 @@ public:
|
|
65
65
|
//! returned by the JoinHashTable::Scan function and can be used to resume a
|
66
66
|
//! probe.
|
67
67
|
struct ScanStructure {
|
68
|
-
|
68
|
+
TupleDataChunkState &key_state;
|
69
69
|
Vector pointers;
|
70
70
|
idx_t count;
|
71
71
|
SelectionVector sel_vector;
|
@@ -74,7 +74,7 @@ public:
|
|
74
74
|
JoinHashTable &ht;
|
75
75
|
bool finished;
|
76
76
|
|
77
|
-
explicit ScanStructure(JoinHashTable &ht);
|
77
|
+
explicit ScanStructure(JoinHashTable &ht, TupleDataChunkState &key_state);
|
78
78
|
//! Get the next batch of data from the scan structure
|
79
79
|
void Next(DataChunk &keys, DataChunk &left, DataChunk &result);
|
80
80
|
|
@@ -130,7 +130,8 @@ public:
|
|
130
130
|
//! ever called.
|
131
131
|
void Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool parallel);
|
132
132
|
//! Probe the HT with the given input chunk, resulting in the given result
|
133
|
-
unique_ptr<ScanStructure> Probe(DataChunk &keys,
|
133
|
+
unique_ptr<ScanStructure> Probe(DataChunk &keys, TupleDataChunkState &key_state,
|
134
|
+
Vector *precomputed_hashes = nullptr);
|
134
135
|
//! Scan the HT to construct the full outer join result
|
135
136
|
void ScanFullOuter(JoinHTScanState &state, Vector &addresses, DataChunk &result);
|
136
137
|
|
@@ -166,6 +167,9 @@ public:
|
|
166
167
|
vector<ExpressionType> predicates;
|
167
168
|
//! Data column layout
|
168
169
|
TupleDataLayout layout;
|
170
|
+
//! Efficiently matches rows
|
171
|
+
RowMatcher row_matcher;
|
172
|
+
RowMatcher row_matcher_no_match_sel;
|
169
173
|
//! The size of an entry as stored in the HashTable
|
170
174
|
idx_t entry_size;
|
171
175
|
//! The total tuple size
|
@@ -201,7 +205,8 @@ public:
|
|
201
205
|
} correlated_mark_join_info;
|
202
206
|
|
203
207
|
private:
|
204
|
-
unique_ptr<ScanStructure> InitializeScanStructure(DataChunk &keys,
|
208
|
+
unique_ptr<ScanStructure> InitializeScanStructure(DataChunk &keys, TupleDataChunkState &key_state,
|
209
|
+
const SelectionVector *¤t_sel);
|
205
210
|
void Hash(DataChunk &keys, const SelectionVector &sel, idx_t count, Vector &hashes);
|
206
211
|
|
207
212
|
//! Apply a bitmask to the hashes
|
@@ -212,8 +217,8 @@ private:
|
|
212
217
|
//! Insert the given set of locations into the HT with the given set of hashes
|
213
218
|
void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[], bool parallel);
|
214
219
|
|
215
|
-
idx_t PrepareKeys(DataChunk &keys,
|
216
|
-
|
220
|
+
idx_t PrepareKeys(DataChunk &keys, vector<TupleDataVectorFormat> &vector_data, const SelectionVector *¤t_sel,
|
221
|
+
SelectionVector &sel, bool build_side);
|
217
222
|
|
218
223
|
//! Lock for combining data_collection when merging HTs
|
219
224
|
mutex data_lock;
|
@@ -316,8 +321,9 @@ public:
|
|
316
321
|
//! Build HT for the next partitioned probe round
|
317
322
|
bool PrepareExternalFinalize();
|
318
323
|
//! Probe whatever we can, sink the rest into a thread-local HT
|
319
|
-
unique_ptr<ScanStructure> ProbeAndSpill(DataChunk &keys,
|
320
|
-
|
324
|
+
unique_ptr<ScanStructure> ProbeAndSpill(DataChunk &keys, TupleDataChunkState &key_state, DataChunk &payload,
|
325
|
+
ProbeSpill &probe_spill, ProbeSpillLocalAppendState &spill_state,
|
326
|
+
DataChunk &spill_chunk);
|
321
327
|
|
322
328
|
private:
|
323
329
|
//! First and last partition of the current probe round
|
@@ -89,7 +89,7 @@ public:
|
|
89
89
|
private:
|
90
90
|
ClientContext &context;
|
91
91
|
//! Actual size can be smaller than the buffer size in case we allocate it too optimistically.
|
92
|
-
idx_t
|
92
|
+
idx_t actual_buffer_size;
|
93
93
|
//! We need to check for Byte Order Mark, to define the start position of this buffer
|
94
94
|
//! https://en.wikipedia.org/wiki/Byte_order_mark#UTF-8
|
95
95
|
idx_t start_position = 0;
|
@@ -20,10 +20,14 @@ public:
|
|
20
20
|
//! Return the 1-indexed line number
|
21
21
|
idx_t GetLine(idx_t batch_idx, idx_t line_error = 0, idx_t file_idx = 0, idx_t cur_start = 0, bool verify = true,
|
22
22
|
bool stop_at_first = true);
|
23
|
+
//! In case an error happened we have to increment the lines read of that batch
|
24
|
+
void Increment(idx_t file_idx, idx_t batch_idx);
|
23
25
|
//! Verify if the CSV File was read correctly from [0,batch_idx] batches.
|
24
26
|
void Verify(idx_t file_idx, idx_t batch_idx, idx_t cur_first_pos);
|
25
27
|
//! Lines read per batch, <file_index, <batch_index, count>>
|
26
28
|
vector<unordered_map<idx_t, idx_t>> lines_read;
|
29
|
+
//! Lines read per batch, <file_index, <batch_index, count>>
|
30
|
+
vector<unordered_map<idx_t, idx_t>> lines_errored;
|
27
31
|
//! Set of batches that have been initialized but are not yet finished.
|
28
32
|
vector<set<idx_t>> current_batches;
|
29
33
|
//! Pointer to CSV Reader Mutex
|
@@ -126,12 +126,10 @@ struct CSVReaderOptions {
|
|
126
126
|
bool normalize_names = false;
|
127
127
|
//! True, if column with that index must skip null check
|
128
128
|
vector<bool> force_not_null;
|
129
|
+
//! Number of sample chunks used in auto-detection
|
130
|
+
idx_t sample_size_chunks = 20480 / STANDARD_VECTOR_SIZE;
|
129
131
|
//! Consider all columns to be of type varchar
|
130
132
|
bool all_varchar = false;
|
131
|
-
//! Size of sample chunk used for dialect and type detection
|
132
|
-
idx_t sample_chunk_size = STANDARD_VECTOR_SIZE;
|
133
|
-
//! Number of sample chunks used for type detection
|
134
|
-
idx_t sample_chunks = 10;
|
135
133
|
//! Whether or not to automatically detect dialect and datatypes
|
136
134
|
bool auto_detect = false;
|
137
135
|
//! The file path of the CSV file to read
|
@@ -28,7 +28,7 @@ struct SnifferResult {
|
|
28
28
|
class CSVSniffer {
|
29
29
|
public:
|
30
30
|
explicit CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager> buffer_manager_p,
|
31
|
-
CSVStateMachineCache &state_machine_cache);
|
31
|
+
CSVStateMachineCache &state_machine_cache, bool explicit_set_columns = false);
|
32
32
|
|
33
33
|
//! Main method that sniffs the CSV file, returns the types, names and options as a result
|
34
34
|
//! CSV Sniffing consists of five steps:
|
@@ -110,6 +110,8 @@ private:
|
|
110
110
|
//! ------------------------------------------------------//
|
111
111
|
void DetectHeader();
|
112
112
|
vector<string> names;
|
113
|
+
//! If Column Names and Types have been explicitly set
|
114
|
+
const bool explicit_set_columns;
|
113
115
|
|
114
116
|
//! ------------------------------------------------------//
|
115
117
|
//! ------------------ Type Replacement ----------------- //
|
package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp
CHANGED
@@ -13,7 +13,7 @@
|
|
13
13
|
#include "duckdb/execution/operator/scan/csv/quote_rules.hpp"
|
14
14
|
|
15
15
|
namespace duckdb {
|
16
|
-
static constexpr uint32_t NUM_STATES =
|
16
|
+
static constexpr uint32_t NUM_STATES = 9;
|
17
17
|
static constexpr uint32_t NUM_TRANSITIONS = 256;
|
18
18
|
typedef uint8_t state_machine_t[NUM_STATES][NUM_TRANSITIONS];
|
19
19
|
|
@@ -51,13 +51,12 @@ public:
|
|
51
51
|
OperatorSourceInput &input) const;
|
52
52
|
|
53
53
|
const TupleDataLayout &GetLayout() const;
|
54
|
-
idx_t
|
54
|
+
idx_t NumberOfPartitions(GlobalSinkState &sink) const;
|
55
55
|
static void SetMultiScan(GlobalSinkState &sink);
|
56
56
|
|
57
57
|
private:
|
58
58
|
void SetGroupingValues();
|
59
59
|
void PopulateGroupChunk(DataChunk &group_chunk, DataChunk &input_chunk) const;
|
60
|
-
idx_t CountInternal(GlobalSinkState &sink) const;
|
61
60
|
|
62
61
|
TupleDataLayout layout;
|
63
62
|
};
|
@@ -240,6 +240,7 @@ static constexpr ExtensionEntry EXTENSION_FILE_CONTAINS[] = {{".parquet?", "parq
|
|
240
240
|
|
241
241
|
static constexpr const char *AUTOLOADABLE_EXTENSIONS[] = {
|
242
242
|
// "azure",
|
243
|
+
"arrow",
|
243
244
|
"aws",
|
244
245
|
"autocomplete",
|
245
246
|
"excel",
|
@@ -249,7 +250,9 @@ static constexpr const char *AUTOLOADABLE_EXTENSIONS[] = {
|
|
249
250
|
// "icu",
|
250
251
|
"json",
|
251
252
|
"parquet",
|
253
|
+
"postgres_scanner",
|
252
254
|
"sqlsmith",
|
255
|
+
"sqlite_scanner",
|
253
256
|
"tpcds",
|
254
257
|
"tpch",
|
255
258
|
"visualizer",
|
@@ -103,6 +103,8 @@ public:
|
|
103
103
|
// JOIN operation
|
104
104
|
DUCKDB_API shared_ptr<Relation> Join(const shared_ptr<Relation> &other, const string &condition,
|
105
105
|
JoinType type = JoinType::INNER, JoinRefType ref_type = JoinRefType::REGULAR);
|
106
|
+
shared_ptr<Relation> Join(const shared_ptr<Relation> &other, vector<unique_ptr<ParsedExpression>> condition,
|
107
|
+
JoinType type = JoinType::INNER, JoinRefType ref_type = JoinRefType::REGULAR);
|
106
108
|
|
107
109
|
// CROSS PRODUCT operation
|
108
110
|
DUCKDB_API shared_ptr<Relation> CrossProduct(const shared_ptr<Relation> &other,
|
@@ -121,6 +123,8 @@ public:
|
|
121
123
|
DUCKDB_API shared_ptr<Relation> Aggregate(const vector<string> &aggregates);
|
122
124
|
DUCKDB_API shared_ptr<Relation> Aggregate(const string &aggregate_list, const string &group_list);
|
123
125
|
DUCKDB_API shared_ptr<Relation> Aggregate(const vector<string> &aggregates, const vector<string> &groups);
|
126
|
+
DUCKDB_API shared_ptr<Relation> Aggregate(vector<unique_ptr<ParsedExpression>> expressions,
|
127
|
+
const string &group_list);
|
124
128
|
|
125
129
|
// ALIAS
|
126
130
|
DUCKDB_API shared_ptr<Relation> Alias(const string &alias);
|
@@ -177,7 +177,7 @@ void DBConfig::SetOptionByName(const string &name, const Value &value) {
|
|
177
177
|
void DBConfig::SetOption(DatabaseInstance *db, const ConfigurationOption &option, const Value &value) {
|
178
178
|
lock_guard<mutex> l(config_lock);
|
179
179
|
if (!option.set_global) {
|
180
|
-
throw
|
180
|
+
throw InvalidInputException("Could not set option \"%s\" as a global option", option.name);
|
181
181
|
}
|
182
182
|
D_ASSERT(option.reset_global);
|
183
183
|
Value input = value.DefaultCastAs(option.parameter_type);
|
@@ -1,8 +1,9 @@
|
|
1
1
|
#include "duckdb/main/query_result.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/box_renderer.hpp"
|
2
4
|
#include "duckdb/common/printer.hpp"
|
3
5
|
#include "duckdb/common/vector.hpp"
|
4
6
|
#include "duckdb/main/client_context.hpp"
|
5
|
-
#include "duckdb/common/box_renderer.hpp"
|
6
7
|
namespace duckdb {
|
7
8
|
|
8
9
|
BaseQueryResult::BaseQueryResult(QueryResultType type, StatementType statement_type, StatementProperties properties_p,
|
@@ -100,9 +101,17 @@ bool QueryResult::Equals(QueryResult &other) { // LCOV_EXCL_START
|
|
100
101
|
}
|
101
102
|
// now compare the actual values
|
102
103
|
// fetch chunks
|
104
|
+
unique_ptr<DataChunk> lchunk, rchunk;
|
105
|
+
idx_t lindex = 0, rindex = 0;
|
103
106
|
while (true) {
|
104
|
-
|
105
|
-
|
107
|
+
if (!lchunk || lindex == lchunk->size()) {
|
108
|
+
lchunk = Fetch();
|
109
|
+
lindex = 0;
|
110
|
+
}
|
111
|
+
if (!rchunk || rindex == rchunk->size()) {
|
112
|
+
rchunk = other.Fetch();
|
113
|
+
rindex = 0;
|
114
|
+
}
|
106
115
|
if (!lchunk && !rchunk) {
|
107
116
|
return true;
|
108
117
|
}
|
@@ -112,14 +121,11 @@ bool QueryResult::Equals(QueryResult &other) { // LCOV_EXCL_START
|
|
112
121
|
if (lchunk->size() == 0 && rchunk->size() == 0) {
|
113
122
|
return true;
|
114
123
|
}
|
115
|
-
if (lchunk->size() != rchunk->size()) {
|
116
|
-
return false;
|
117
|
-
}
|
118
124
|
D_ASSERT(lchunk->ColumnCount() == rchunk->ColumnCount());
|
119
|
-
for (
|
120
|
-
for (idx_t
|
121
|
-
auto lvalue = lchunk->GetValue(col,
|
122
|
-
auto rvalue = rchunk->GetValue(col,
|
125
|
+
for (; lindex < lchunk->size() && rindex < rchunk->size(); lindex++, rindex++) {
|
126
|
+
for (idx_t col = 0; col < rchunk->ColumnCount(); col++) {
|
127
|
+
auto lvalue = lchunk->GetValue(col, lindex);
|
128
|
+
auto rvalue = rchunk->GetValue(col, rindex);
|
123
129
|
if (lvalue.IsNull() && rvalue.IsNull()) {
|
124
130
|
continue;
|
125
131
|
}
|
@@ -130,7 +130,12 @@ shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other, const str
|
|
130
130
|
JoinRefType ref_type) {
|
131
131
|
auto expression_list = Parser::ParseExpressionList(condition, context.GetContext()->GetParserOptions());
|
132
132
|
D_ASSERT(!expression_list.empty());
|
133
|
+
return Join(other, std::move(expression_list), type, ref_type);
|
134
|
+
}
|
133
135
|
|
136
|
+
shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other,
|
137
|
+
vector<unique_ptr<ParsedExpression>> expression_list, JoinType type,
|
138
|
+
JoinRefType ref_type) {
|
134
139
|
if (expression_list.size() > 1 || expression_list[0]->type == ExpressionType::COLUMN_REF) {
|
135
140
|
// multiple columns or single column ref: the condition is a USING list
|
136
141
|
vector<string> using_columns;
|
@@ -197,6 +202,11 @@ shared_ptr<Relation> Relation::Aggregate(const vector<string> &aggregates, const
|
|
197
202
|
return this->Aggregate(aggregate_list, group_list);
|
198
203
|
}
|
199
204
|
|
205
|
+
shared_ptr<Relation> Relation::Aggregate(vector<unique_ptr<ParsedExpression>> expressions, const string &group_list) {
|
206
|
+
auto groups = Parser::ParseGroupByList(group_list, context.GetContext()->GetParserOptions());
|
207
|
+
return make_shared<AggregateRelation>(shared_from_this(), std::move(expressions), std::move(groups));
|
208
|
+
}
|
209
|
+
|
200
210
|
string Relation::GetAlias() {
|
201
211
|
return "relation";
|
202
212
|
}
|
@@ -70,9 +70,6 @@ unique_ptr<Expression> DatePartSimplificationRule::Apply(LogicalOperator &op, ve
|
|
70
70
|
case DatePartSpecifier::DOY:
|
71
71
|
new_function_name = "dayofyear";
|
72
72
|
break;
|
73
|
-
case DatePartSpecifier::EPOCH:
|
74
|
-
new_function_name = "epoch";
|
75
|
-
break;
|
76
73
|
case DatePartSpecifier::MICROSECONDS:
|
77
74
|
new_function_name = "microsecond";
|
78
75
|
break;
|
@@ -51,6 +51,7 @@ void LogicalComparisonJoin::ExtractJoinConditions(
|
|
51
51
|
unique_ptr<LogicalOperator> &right_child, const unordered_set<idx_t> &left_bindings,
|
52
52
|
const unordered_set<idx_t> &right_bindings, vector<unique_ptr<Expression>> &expressions,
|
53
53
|
vector<JoinCondition> &conditions, vector<unique_ptr<Expression>> &arbitrary_expressions) {
|
54
|
+
|
54
55
|
for (auto &expr : expressions) {
|
55
56
|
auto total_side = JoinSide::GetJoinSide(*expr, left_bindings, right_bindings);
|
56
57
|
if (total_side != JoinSide::BOTH) {
|
@@ -77,10 +78,17 @@ void LogicalComparisonJoin::ExtractJoinConditions(
|
|
77
78
|
continue;
|
78
79
|
}
|
79
80
|
}
|
80
|
-
} else if (
|
81
|
-
|
82
|
-
expr->type == ExpressionType::
|
83
|
-
expr->type == ExpressionType::
|
81
|
+
} else if (expr->type == ExpressionType::COMPARE_EQUAL || expr->type == ExpressionType::COMPARE_NOTEQUAL ||
|
82
|
+
expr->type == ExpressionType::COMPARE_BOUNDARY_START ||
|
83
|
+
expr->type == ExpressionType::COMPARE_LESSTHAN ||
|
84
|
+
expr->type == ExpressionType::COMPARE_GREATERTHAN ||
|
85
|
+
expr->type == ExpressionType::COMPARE_LESSTHANOREQUALTO ||
|
86
|
+
expr->type == ExpressionType::COMPARE_GREATERTHANOREQUALTO ||
|
87
|
+
expr->type == ExpressionType::COMPARE_BOUNDARY_START ||
|
88
|
+
expr->type == ExpressionType::COMPARE_NOT_DISTINCT_FROM ||
|
89
|
+
expr->type == ExpressionType::COMPARE_DISTINCT_FROM)
|
90
|
+
|
91
|
+
{
|
84
92
|
// comparison, check if we can create a comparison JoinCondition
|
85
93
|
if (CreateJoinCondition(*expr, left_bindings, right_bindings, conditions)) {
|
86
94
|
// successfully created the join condition
|
@@ -449,11 +449,10 @@ void ValidityRevertAppend(ColumnSegment &segment, idx_t start_row) {
|
|
449
449
|
if (start_bit % 8 != 0) {
|
450
450
|
// handle sub-bit stuff (yay)
|
451
451
|
idx_t byte_pos = start_bit / 8;
|
452
|
-
idx_t bit_start = byte_pos * 8;
|
453
452
|
idx_t bit_end = (byte_pos + 1) * 8;
|
454
|
-
ValidityMask mask(reinterpret_cast<validity_t *>(handle.Ptr()
|
453
|
+
ValidityMask mask(reinterpret_cast<validity_t *>(handle.Ptr()));
|
455
454
|
for (idx_t i = start_bit; i < bit_end; i++) {
|
456
|
-
mask.SetValid(i
|
455
|
+
mask.SetValid(i);
|
457
456
|
}
|
458
457
|
revert_start = bit_end / 8;
|
459
458
|
} else {
|
@@ -832,6 +832,7 @@ void DataTable::RevertAppendInternal(idx_t start_row, idx_t count) {
|
|
832
832
|
void DataTable::RevertAppend(idx_t start_row, idx_t count) {
|
833
833
|
lock_guard<mutex> lock(append_lock);
|
834
834
|
|
835
|
+
// revert any appends to indexes
|
835
836
|
if (!info->indexes.Empty()) {
|
836
837
|
idx_t current_row_base = start_row;
|
837
838
|
row_t row_data[STANDARD_VECTOR_SIZE];
|
@@ -847,6 +848,15 @@ void DataTable::RevertAppend(idx_t start_row, idx_t count) {
|
|
847
848
|
current_row_base += chunk.size();
|
848
849
|
});
|
849
850
|
}
|
851
|
+
|
852
|
+
// we need to vacuum the indexes to remove any buffers that are now empty
|
853
|
+
// due to reverting the appends
|
854
|
+
info->indexes.Scan([&](Index &index) {
|
855
|
+
index.Vacuum();
|
856
|
+
return false;
|
857
|
+
});
|
858
|
+
|
859
|
+
// revert the data table append
|
850
860
|
RevertAppendInternal(start_row, count);
|
851
861
|
}
|
852
862
|
|
@@ -101,28 +101,27 @@ void CSVReaderOptions::Serialize(Serializer &serializer) const {
|
|
101
101
|
serializer.WriteProperty(111, "normalize_names", normalize_names);
|
102
102
|
serializer.WriteProperty(112, "force_not_null", force_not_null);
|
103
103
|
serializer.WriteProperty(113, "all_varchar", all_varchar);
|
104
|
-
serializer.WriteProperty(114, "
|
105
|
-
serializer.WriteProperty(115, "
|
106
|
-
serializer.WriteProperty(116, "
|
107
|
-
serializer.WriteProperty(117, "
|
108
|
-
serializer.WriteProperty(118, "
|
109
|
-
serializer.WriteProperty(119, "
|
110
|
-
serializer.WriteProperty(120, "
|
111
|
-
serializer.WriteProperty(121, "
|
112
|
-
serializer.WriteProperty(122, "
|
113
|
-
serializer.WriteProperty(123, "
|
114
|
-
serializer.WriteProperty(124, "
|
115
|
-
serializer.WriteProperty(125, "
|
116
|
-
serializer.WriteProperty(126, "
|
117
|
-
serializer.WriteProperty(127, "dialect_options.state_machine_options.
|
118
|
-
serializer.WriteProperty(128, "dialect_options.state_machine_options.
|
119
|
-
serializer.WriteProperty(129, "dialect_options.
|
120
|
-
serializer.WriteProperty(130, "dialect_options.
|
121
|
-
serializer.WriteProperty(131, "dialect_options.
|
122
|
-
serializer.WriteProperty(132, "dialect_options.
|
123
|
-
serializer.WriteProperty(133, "dialect_options.
|
124
|
-
serializer.WriteProperty(134, "dialect_options.
|
125
|
-
serializer.WriteProperty(135, "dialect_options.has_format", dialect_options.has_format);
|
104
|
+
serializer.WriteProperty(114, "sample_size_chunks", sample_size_chunks);
|
105
|
+
serializer.WriteProperty(115, "auto_detect", auto_detect);
|
106
|
+
serializer.WriteProperty(116, "file_path", file_path);
|
107
|
+
serializer.WriteProperty(117, "decimal_separator", decimal_separator);
|
108
|
+
serializer.WriteProperty(118, "null_padding", null_padding);
|
109
|
+
serializer.WriteProperty(119, "buffer_size", buffer_size);
|
110
|
+
serializer.WriteProperty(120, "file_options", file_options);
|
111
|
+
serializer.WriteProperty(121, "force_quote", force_quote);
|
112
|
+
serializer.WriteProperty(122, "rejects_table_name", rejects_table_name);
|
113
|
+
serializer.WriteProperty(123, "rejects_limit", rejects_limit);
|
114
|
+
serializer.WriteProperty(124, "rejects_recovery_columns", rejects_recovery_columns);
|
115
|
+
serializer.WriteProperty(125, "rejects_recovery_column_ids", rejects_recovery_column_ids);
|
116
|
+
serializer.WriteProperty(126, "dialect_options.state_machine_options.delimiter", dialect_options.state_machine_options.delimiter);
|
117
|
+
serializer.WriteProperty(127, "dialect_options.state_machine_options.quote", dialect_options.state_machine_options.quote);
|
118
|
+
serializer.WriteProperty(128, "dialect_options.state_machine_options.escape", dialect_options.state_machine_options.escape);
|
119
|
+
serializer.WriteProperty(129, "dialect_options.header", dialect_options.header);
|
120
|
+
serializer.WriteProperty(130, "dialect_options.num_cols", dialect_options.num_cols);
|
121
|
+
serializer.WriteProperty(131, "dialect_options.new_line", dialect_options.new_line);
|
122
|
+
serializer.WriteProperty(132, "dialect_options.skip_rows", dialect_options.skip_rows);
|
123
|
+
serializer.WriteProperty(133, "dialect_options.date_format", dialect_options.date_format);
|
124
|
+
serializer.WriteProperty(134, "dialect_options.has_format", dialect_options.has_format);
|
126
125
|
}
|
127
126
|
|
128
127
|
CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
|
@@ -141,28 +140,27 @@ CSVReaderOptions CSVReaderOptions::Deserialize(Deserializer &deserializer) {
|
|
141
140
|
deserializer.ReadProperty(111, "normalize_names", result.normalize_names);
|
142
141
|
deserializer.ReadProperty(112, "force_not_null", result.force_not_null);
|
143
142
|
deserializer.ReadProperty(113, "all_varchar", result.all_varchar);
|
144
|
-
deserializer.ReadProperty(114, "
|
145
|
-
deserializer.ReadProperty(115, "
|
146
|
-
deserializer.ReadProperty(116, "
|
147
|
-
deserializer.ReadProperty(117, "
|
148
|
-
deserializer.ReadProperty(118, "
|
149
|
-
deserializer.ReadProperty(119, "
|
150
|
-
deserializer.ReadProperty(120, "
|
151
|
-
deserializer.ReadProperty(121, "
|
152
|
-
deserializer.ReadProperty(122, "
|
153
|
-
deserializer.ReadProperty(123, "
|
154
|
-
deserializer.ReadProperty(124, "
|
155
|
-
deserializer.ReadProperty(125, "
|
156
|
-
deserializer.ReadProperty(126, "
|
157
|
-
deserializer.ReadProperty(127, "dialect_options.state_machine_options.
|
158
|
-
deserializer.ReadProperty(128, "dialect_options.state_machine_options.
|
159
|
-
deserializer.ReadProperty(129, "dialect_options.
|
160
|
-
deserializer.ReadProperty(130, "dialect_options.
|
161
|
-
deserializer.ReadProperty(131, "dialect_options.
|
162
|
-
deserializer.ReadProperty(132, "dialect_options.
|
163
|
-
deserializer.ReadProperty(133, "dialect_options.
|
164
|
-
deserializer.ReadProperty(134, "dialect_options.
|
165
|
-
deserializer.ReadProperty(135, "dialect_options.has_format", result.dialect_options.has_format);
|
143
|
+
deserializer.ReadProperty(114, "sample_size_chunks", result.sample_size_chunks);
|
144
|
+
deserializer.ReadProperty(115, "auto_detect", result.auto_detect);
|
145
|
+
deserializer.ReadProperty(116, "file_path", result.file_path);
|
146
|
+
deserializer.ReadProperty(117, "decimal_separator", result.decimal_separator);
|
147
|
+
deserializer.ReadProperty(118, "null_padding", result.null_padding);
|
148
|
+
deserializer.ReadProperty(119, "buffer_size", result.buffer_size);
|
149
|
+
deserializer.ReadProperty(120, "file_options", result.file_options);
|
150
|
+
deserializer.ReadProperty(121, "force_quote", result.force_quote);
|
151
|
+
deserializer.ReadProperty(122, "rejects_table_name", result.rejects_table_name);
|
152
|
+
deserializer.ReadProperty(123, "rejects_limit", result.rejects_limit);
|
153
|
+
deserializer.ReadProperty(124, "rejects_recovery_columns", result.rejects_recovery_columns);
|
154
|
+
deserializer.ReadProperty(125, "rejects_recovery_column_ids", result.rejects_recovery_column_ids);
|
155
|
+
deserializer.ReadProperty(126, "dialect_options.state_machine_options.delimiter", result.dialect_options.state_machine_options.delimiter);
|
156
|
+
deserializer.ReadProperty(127, "dialect_options.state_machine_options.quote", result.dialect_options.state_machine_options.quote);
|
157
|
+
deserializer.ReadProperty(128, "dialect_options.state_machine_options.escape", result.dialect_options.state_machine_options.escape);
|
158
|
+
deserializer.ReadProperty(129, "dialect_options.header", result.dialect_options.header);
|
159
|
+
deserializer.ReadProperty(130, "dialect_options.num_cols", result.dialect_options.num_cols);
|
160
|
+
deserializer.ReadProperty(131, "dialect_options.new_line", result.dialect_options.new_line);
|
161
|
+
deserializer.ReadProperty(132, "dialect_options.skip_rows", result.dialect_options.skip_rows);
|
162
|
+
deserializer.ReadProperty(133, "dialect_options.date_format", result.dialect_options.date_format);
|
163
|
+
deserializer.ReadProperty(134, "dialect_options.has_format", result.dialect_options.has_format);
|
166
164
|
return result;
|
167
165
|
}
|
168
166
|
|
package/src/statement.cpp
CHANGED
@@ -93,11 +93,9 @@ Statement::Statement(const Napi::CallbackInfo &info) : Napi::ObjectWrap<Statemen
|
|
93
93
|
int length = info.Length();
|
94
94
|
|
95
95
|
if (length <= 0 || !Connection::HasInstance(info[0])) {
|
96
|
-
Napi::TypeError::New(env, "Connection object expected")
|
97
|
-
return;
|
96
|
+
throw Napi::TypeError::New(env, "Connection object expected");
|
98
97
|
} else if (length <= 1 || !info[1].IsString()) {
|
99
|
-
Napi::TypeError::New(env, "SQL query expected")
|
100
|
-
return;
|
98
|
+
throw Napi::TypeError::New(env, "SQL query expected");
|
101
99
|
}
|
102
100
|
|
103
101
|
connection_ref = Napi::ObjectWrap<Connection>::Unwrap(info[0].As<Napi::Object>());
|
@@ -2,6 +2,7 @@ import * as sqlite3 from '..';
|
|
2
2
|
import * as assert from 'assert';
|
3
3
|
import {DuckDbError, RowData} from "..";
|
4
4
|
import {Worker} from 'worker_threads';
|
5
|
+
import {expect} from 'chai';
|
5
6
|
|
6
7
|
describe('error handling', function() {
|
7
8
|
var db: sqlite3.Database;
|
@@ -163,4 +164,9 @@ describe('error handling', function() {
|
|
163
164
|
await run_worker(); // first should always succeed
|
164
165
|
await run_worker(); // second fails without thread safety
|
165
166
|
})
|
167
|
+
|
168
|
+
it("shouldn't crash on an exception", () => {
|
169
|
+
expect(() => new sqlite3.Database(':memory:', {file_search_path: '/'})).to.throw('Could not set option "file_search_path" as a global option');
|
170
|
+
});
|
166
171
|
});
|
172
|
+
|