duckdb 0.3.5-dev1125.0 → 0.3.5-dev1167.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +12 -2
- package/src/duckdb.hpp +5 -2
- package/src/parquet-amalgamation.cpp +36102 -35867
- package/src/parquet-amalgamation.hpp +186 -16
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -69189,6 +69189,10 @@ public:
|
|
|
69189
69189
|
return plain_file_source;
|
|
69190
69190
|
}
|
|
69191
69191
|
|
|
69192
|
+
bool OnDiskFile() {
|
|
69193
|
+
return file_handle->OnDiskFile();
|
|
69194
|
+
}
|
|
69195
|
+
|
|
69192
69196
|
idx_t FileSize() {
|
|
69193
69197
|
return file_size;
|
|
69194
69198
|
}
|
|
@@ -70771,13 +70775,19 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start) {
|
|
|
70771
70775
|
|
|
70772
70776
|
// the remaining part of the last buffer
|
|
70773
70777
|
idx_t remaining = buffer_size - start;
|
|
70774
|
-
|
|
70778
|
+
|
|
70779
|
+
bool large_buffers = mode == ParserMode::PARSING && !file_handle->OnDiskFile() && file_handle->CanSeek();
|
|
70780
|
+
idx_t buffer_read_size = large_buffers ? INITIAL_BUFFER_SIZE_LARGE : INITIAL_BUFFER_SIZE;
|
|
70781
|
+
|
|
70775
70782
|
while (remaining > buffer_read_size) {
|
|
70776
70783
|
buffer_read_size *= 2;
|
|
70777
70784
|
}
|
|
70778
|
-
|
|
70785
|
+
|
|
70786
|
+
// Check line length
|
|
70787
|
+
if (remaining > options.maximum_line_size) {
|
|
70779
70788
|
throw InvalidInputException("Maximum line size of %llu bytes exceeded!", options.maximum_line_size);
|
|
70780
70789
|
}
|
|
70790
|
+
|
|
70781
70791
|
buffer = unique_ptr<char[]>(new char[buffer_read_size + remaining + 1]);
|
|
70782
70792
|
buffer_size = remaining + buffer_read_size;
|
|
70783
70793
|
if (remaining > 0) {
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "9c978fb3c"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev1167"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -22591,6 +22591,7 @@ struct BufferedCSVReaderOptions {
|
|
|
22591
22591
|
//! How many leading rows to skip
|
|
22592
22592
|
idx_t skip_rows = 0;
|
|
22593
22593
|
//! Maximum CSV line size: specified because if we reach this amount, we likely have wrong delimiters (default: 2MB)
|
|
22594
|
+
//! note that this is the guaranteed line length that will succeed, longer lines may be accepted if slightly above
|
|
22594
22595
|
idx_t maximum_line_size = 2097152;
|
|
22595
22596
|
//! Whether or not header names shall be normalized
|
|
22596
22597
|
bool normalize_names = false;
|
|
@@ -22644,6 +22645,8 @@ enum class ParserMode : uint8_t { PARSING = 0, SNIFFING_DIALECT = 1, SNIFFING_DA
|
|
|
22644
22645
|
class BufferedCSVReader {
|
|
22645
22646
|
//! Initial buffer read size; can be extended for long lines
|
|
22646
22647
|
static constexpr idx_t INITIAL_BUFFER_SIZE = 16384;
|
|
22648
|
+
//! Larger buffer size for non disk files
|
|
22649
|
+
static constexpr idx_t INITIAL_BUFFER_SIZE_LARGE = 10000000; // 10MB
|
|
22647
22650
|
ParserMode mode;
|
|
22648
22651
|
|
|
22649
22652
|
public:
|