duckdb 0.9.1-dev43.0 → 0.9.1-dev69.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +19 -21
- package/src/duckdb/src/function/table/read_csv.cpp +1 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +2 -0
- package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +2 -2
package/package.json
CHANGED
@@ -22,39 +22,37 @@ CSVSniffer::CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager>
|
|
22
22
|
}
|
23
23
|
}
|
24
24
|
|
25
|
+
void CSVSniffer::SetResultOptions() {
|
26
|
+
options.dialect_options = best_candidate->dialect_options;
|
27
|
+
options.dialect_options.new_line = best_candidate->dialect_options.new_line;
|
28
|
+
options.has_header = best_candidate->dialect_options.header;
|
29
|
+
options.skip_rows_set = options.dialect_options.skip_rows > 0;
|
30
|
+
if (options.has_header) {
|
31
|
+
options.dialect_options.true_start = best_start_with_header;
|
32
|
+
} else {
|
33
|
+
options.dialect_options.true_start = best_start_without_header;
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
25
37
|
SnifferResult CSVSniffer::SniffCSV() {
|
26
38
|
// 1. Dialect Detection
|
27
39
|
DetectDialect();
|
28
|
-
if (explicit_set_columns) {
|
29
|
-
if (!candidates.empty()) {
|
30
|
-
options.dialect_options.state_machine_options = candidates[0]->dialect_options.state_machine_options;
|
31
|
-
options.dialect_options.new_line = candidates[0]->dialect_options.new_line;
|
32
|
-
}
|
33
|
-
// We do not need to run type and header detection as these were defined by the user
|
34
|
-
return SnifferResult(detected_types, names);
|
35
|
-
}
|
36
40
|
// 2. Type Detection
|
37
41
|
DetectTypes();
|
38
42
|
// 3. Header Detection
|
39
43
|
DetectHeader();
|
44
|
+
if (explicit_set_columns) {
|
45
|
+
SetResultOptions();
|
46
|
+
// We do not need to run type refinement, since the types have been given by the user
|
47
|
+
return SnifferResult({}, {});
|
48
|
+
}
|
40
49
|
D_ASSERT(best_sql_types_candidates_per_column_idx.size() == names.size());
|
41
50
|
// 4. Type Replacement
|
42
51
|
ReplaceTypes();
|
43
52
|
// 5. Type Refinement
|
44
53
|
RefineTypes();
|
45
|
-
// We are done,
|
46
|
-
|
47
|
-
// Set the CSV Options in the reference
|
48
|
-
options.dialect_options = best_candidate->dialect_options;
|
49
|
-
options.has_header = best_candidate->dialect_options.header;
|
50
|
-
options.skip_rows_set = options.dialect_options.skip_rows > 0;
|
51
|
-
if (options.has_header) {
|
52
|
-
options.dialect_options.true_start = best_start_with_header;
|
53
|
-
} else {
|
54
|
-
options.dialect_options.true_start = best_start_without_header;
|
55
|
-
}
|
56
|
-
|
57
|
-
// Return the types and names
|
54
|
+
// We are done, Set the CSV Options in the reference. Construct and return the result.
|
55
|
+
SetResultOptions();
|
58
56
|
return SnifferResult(detected_types, names);
|
59
57
|
}
|
60
58
|
|
@@ -38,7 +38,7 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
|
|
38
38
|
auto number_of_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
39
39
|
//! If we have many csv files, we run single-threaded on each file and parallelize on the number of files
|
40
40
|
bool many_csv_files = files.size() > 1 && int64_t(files.size() * 2) >= number_of_threads;
|
41
|
-
if (options.parallel_mode != ParallelMode::PARALLEL && many_csv_files) {
|
41
|
+
if (options.parallel_mode != ParallelMode::PARALLEL && (many_csv_files || number_of_threads == 1)) {
|
42
42
|
single_threaded = true;
|
43
43
|
}
|
44
44
|
if (options.parallel_mode == ParallelMode::SINGLE_THREADED || not_supported_options ||
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "v0.9.1-
|
2
|
+
#define DUCKDB_VERSION "v0.9.1-dev69"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "5ba1abd81a"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -50,6 +50,8 @@ private:
|
|
50
50
|
CSVReaderOptions &options;
|
51
51
|
//! Buffer being used on sniffer
|
52
52
|
shared_ptr<CSVBufferManager> buffer_manager;
|
53
|
+
//! Sets the result options
|
54
|
+
void SetResultOptions();
|
53
55
|
|
54
56
|
//! ------------------------------------------------------//
|
55
57
|
//! ----------------- Dialect Detection ----------------- //
|
@@ -38,8 +38,8 @@ void CommonAggregateOptimizer::ExtractCommonAggregates(LogicalAggregate &aggr) {
|
|
38
38
|
// aggregate does not exist yet: add it to the map
|
39
39
|
aggregate_remap[*aggr.expressions[i]] = i;
|
40
40
|
if (i != original_index) {
|
41
|
-
// this aggregate is not erased, however an
|
42
|
-
// so we need to remap this
|
41
|
+
// this aggregate is not erased, however an aggregate BEFORE it has been erased
|
42
|
+
// so we need to remap this aggregate
|
43
43
|
ColumnBinding original_binding(aggr.aggregate_index, original_index);
|
44
44
|
ColumnBinding new_binding(aggr.aggregate_index, i);
|
45
45
|
aggregate_map[original_binding] = new_binding;
|