duckdb 0.8.1-dev425.0 → 0.8.1-dev480.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/package.json +3 -1
  2. package/src/duckdb/src/common/arrow/arrow_converter.cpp +11 -12
  3. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +4 -3
  4. package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +1 -0
  5. package/src/duckdb/src/function/table/read_csv.cpp +1 -0
  6. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  7. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  8. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +2 -3
  9. package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +8 -1
  10. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +0 -1
  11. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +3 -1
  12. package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
  13. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +1 -0
  14. package/src/duckdb/src/include/duckdb/main/query_result.hpp +6 -0
  15. package/src/duckdb/src/include/duckdb/main/settings.hpp +10 -0
  16. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +2 -2
  17. package/src/duckdb/src/main/capi/arrow-c.cpp +4 -4
  18. package/src/duckdb/src/main/client_context.cpp +4 -3
  19. package/src/duckdb/src/main/config.cpp +1 -0
  20. package/src/duckdb/src/main/database_manager.cpp +16 -0
  21. package/src/duckdb/src/main/query_result.cpp +6 -2
  22. package/src/duckdb/src/main/settings/settings.cpp +19 -0
  23. package/src/duckdb/src/parser/parser.cpp +94 -34
  24. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +2 -0
  25. package/test/extension.test.ts +11 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.1-dev425.0",
5
+ "version": "0.8.1-dev480.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -28,11 +28,13 @@
28
28
  },
29
29
  "devDependencies": {
30
30
  "@types/chai": "^4.3.4",
31
+ "@types/chai-as-promised": "^7.1.5",
31
32
  "@types/mocha": "^10.0.0",
32
33
  "@types/node": "^18.11.0",
33
34
  "apache-arrow": "^9.0.0",
34
35
  "aws-sdk": "^2.790.0",
35
36
  "chai": "^4.3.6",
37
+ "chai-as-promised": "^7.1.1",
36
38
  "jsdoc3-parser": "^2.0.0",
37
39
  "mocha": "^8.3.0",
38
40
  "ts-node": "^10.9.1",
@@ -16,7 +16,7 @@
16
16
  namespace duckdb {
17
17
 
18
18
  void ArrowConverter::ToArrowArray(DataChunk &input, ArrowArray *out_array, ArrowOptions options) {
19
- ArrowAppender appender(input.GetTypes(), input.size(), options);
19
+ ArrowAppender appender(input.GetTypes(), input.size(), std::move(options));
20
20
  appender.Append(input, 0, input.size(), input.size());
21
21
  *out_array = appender.Finalize();
22
22
  }
@@ -59,10 +59,10 @@ void InitializeChild(ArrowSchema &child, const string &name = "") {
59
59
  child.dictionary = nullptr;
60
60
  }
61
61
  void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
62
- const string &config_timezone, ArrowOptions options);
62
+ const ArrowOptions &options);
63
63
 
64
64
  void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
65
- const string &config_timezone, ArrowOptions options) {
65
+ const ArrowOptions &options) {
66
66
  child.format = "+m";
67
67
  //! Map has one child which is a struct
68
68
  child.n_children = 1;
@@ -73,11 +73,11 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
73
73
  InitializeChild(root_holder.nested_children.back()[0]);
74
74
  child.children = &root_holder.nested_children_ptr.back()[0];
75
75
  child.children[0]->name = "entries";
76
- SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), config_timezone, options);
76
+ SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options);
77
77
  }
78
78
 
79
79
  void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
80
- const string &config_timezone, ArrowOptions options) {
80
+ const ArrowOptions &options) {
81
81
  switch (type.id()) {
82
82
  case LogicalTypeId::BOOLEAN:
83
83
  child.format = "b";
@@ -134,7 +134,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
134
134
  child.format = "tsu:";
135
135
  break;
136
136
  case LogicalTypeId::TIMESTAMP_TZ: {
137
- string format = "tsu:" + config_timezone;
137
+ string format = "tsu:" + options.time_zone;
138
138
  auto format_ptr = make_unsafe_uniq_array<char>(format.size() + 1);
139
139
  for (size_t i = 0; i < format.size(); i++) {
140
140
  format_ptr[i] = format[i];
@@ -192,7 +192,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
192
192
  InitializeChild(root_holder.nested_children.back()[0]);
193
193
  child.children = &root_holder.nested_children_ptr.back()[0];
194
194
  child.children[0]->name = "l";
195
- SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), config_timezone, options);
195
+ SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options);
196
196
  break;
197
197
  }
198
198
  case LogicalTypeId::STRUCT: {
@@ -220,13 +220,12 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
220
220
  root_holder.owned_type_names.push_back(std::move(name_ptr));
221
221
 
222
222
  child.children[type_idx]->name = root_holder.owned_type_names.back().get();
223
- SetArrowFormat(root_holder, *child.children[type_idx], child_types[type_idx].second, config_timezone,
224
- options);
223
+ SetArrowFormat(root_holder, *child.children[type_idx], child_types[type_idx].second, options);
225
224
  }
226
225
  break;
227
226
  }
228
227
  case LogicalTypeId::MAP: {
229
- SetArrowMapFormat(root_holder, child, type, config_timezone, options);
228
+ SetArrowMapFormat(root_holder, child, type, options);
230
229
  break;
231
230
  }
232
231
  case LogicalTypeId::ENUM: {
@@ -259,7 +258,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
259
258
  }
260
259
 
261
260
  void ArrowConverter::ToArrowSchema(ArrowSchema *out_schema, const vector<LogicalType> &types,
262
- const vector<string> &names, const string &config_timezone, ArrowOptions options) {
261
+ const vector<string> &names, const ArrowOptions &options) {
263
262
  D_ASSERT(out_schema);
264
263
  D_ASSERT(types.size() == names.size());
265
264
  idx_t column_count = types.size();
@@ -287,7 +286,7 @@ void ArrowConverter::ToArrowSchema(ArrowSchema *out_schema, const vector<Logical
287
286
 
288
287
  auto &child = root_holder->children[col_idx];
289
288
  InitializeChild(child, names[col_idx]);
290
- SetArrowFormat(*root_holder, child, types[col_idx], config_timezone, options);
289
+ SetArrowFormat(*root_holder, child, types[col_idx], options);
291
290
  }
292
291
 
293
292
  // Release ownership to caller
@@ -80,7 +80,7 @@ int ResultArrowArrayStreamWrapper::MyStreamGetSchema(struct ArrowArrayStream *st
80
80
  auto my_stream = reinterpret_cast<ResultArrowArrayStreamWrapper *>(stream->private_data);
81
81
  if (!my_stream->column_types.empty()) {
82
82
  ArrowConverter::ToArrowSchema(out, my_stream->column_types, my_stream->column_names,
83
- my_stream->timezone_config);
83
+ QueryResult::GetArrowOptions(*my_stream->result));
84
84
  return 0;
85
85
  }
86
86
 
@@ -100,7 +100,8 @@ int ResultArrowArrayStreamWrapper::MyStreamGetSchema(struct ArrowArrayStream *st
100
100
  my_stream->column_types = result.types;
101
101
  my_stream->column_names = result.names;
102
102
  }
103
- ArrowConverter::ToArrowSchema(out, my_stream->column_types, my_stream->column_names, my_stream->timezone_config);
103
+ ArrowConverter::ToArrowSchema(out, my_stream->column_types, my_stream->column_names,
104
+ QueryResult::GetArrowOptions(*my_stream->result));
104
105
  return 0;
105
106
  }
106
107
 
@@ -186,7 +187,7 @@ bool ArrowUtil::TryFetchNext(QueryResult &result, unique_ptr<DataChunk> &chunk,
186
187
  bool ArrowUtil::TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray *out, idx_t &count,
187
188
  PreservedError &error) {
188
189
  count = 0;
189
- ArrowAppender appender(result->types, chunk_size);
190
+ ArrowAppender appender(result->types, chunk_size, QueryResult::GetArrowOptions(*result));
190
191
  auto &current_chunk = result->current_chunk;
191
192
  if (current_chunk.Valid()) {
192
193
  // We start by scanning the non-finished current chunk
@@ -138,6 +138,7 @@ AggregateFunctionSet ApproxCountDistinctFun::GetFunctions() {
138
138
  approx_count.AddFunction(GetApproxCountDistinctFunction(LogicalType::VARCHAR));
139
139
  approx_count.AddFunction(GetApproxCountDistinctFunction(LogicalType::TIMESTAMP));
140
140
  approx_count.AddFunction(GetApproxCountDistinctFunction(LogicalType::TIMESTAMP_TZ));
141
+ approx_count.AddFunction(GetApproxCountDistinctFunction(LogicalType::BLOB));
141
142
  return approx_count;
142
143
  }
143
144
 
@@ -424,6 +424,7 @@ bool ParallelCSVGlobalState::Finished() {
424
424
 
425
425
  void ParallelCSVGlobalState::Verify() {
426
426
  // All threads are done, we run some magic sweet verification code
427
+ lock_guard<mutex> parallel_lock(main_mutex);
427
428
  if (running_threads == 0) {
428
429
  D_ASSERT(tuple_end.size() == tuple_start.size());
429
430
  for (idx_t i = 0; i < tuple_start.size(); i++) {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.1-dev425"
2
+ #define DUCKDB_VERSION "0.8.1-dev480"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "f519703700"
5
+ #define DUCKDB_SOURCE_ID "da89803681"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -19,7 +19,7 @@ struct ArrowAppendData;
19
19
  //! The ArrowAppender class can be used to incrementally construct an arrow array by appending data chunks into it
20
20
  class ArrowAppender {
21
21
  public:
22
- DUCKDB_API ArrowAppender(vector<LogicalType> types, idx_t initial_capacity, ArrowOptions options = ArrowOptions());
22
+ DUCKDB_API ArrowAppender(vector<LogicalType> types, idx_t initial_capacity, ArrowOptions options);
23
23
  DUCKDB_API ~ArrowAppender();
24
24
 
25
25
  //! Append a data chunk to the underlying arrow array
@@ -18,9 +18,8 @@ namespace duckdb {
18
18
 
19
19
  struct ArrowConverter {
20
20
  DUCKDB_API static void ToArrowSchema(ArrowSchema *out_schema, const vector<LogicalType> &types,
21
- const vector<string> &names, const string &config_timezone,
22
- ArrowOptions options = ArrowOptions());
23
- DUCKDB_API static void ToArrowArray(DataChunk &input, ArrowArray *out_array, ArrowOptions options = ArrowOptions());
21
+ const vector<string> &names, const ArrowOptions &options);
22
+ DUCKDB_API static void ToArrowArray(DataChunk &input, ArrowArray *out_array, ArrowOptions options);
24
23
  };
25
24
 
26
25
  } // namespace duckdb
@@ -13,6 +13,13 @@ namespace duckdb {
13
13
  enum ArrowOffsetSize { REGULAR, LARGE };
14
14
 
15
15
  struct ArrowOptions {
16
- ArrowOffsetSize offset_size = ArrowOffsetSize::LARGE;
16
+ explicit ArrowOptions(ArrowOffsetSize offset_size_p) : offset_size(offset_size_p) {
17
+ }
18
+ ArrowOptions(ArrowOffsetSize offset_size_p, string timezone_p) : offset_size(offset_size_p), time_zone(timezone_p) {
19
+ }
20
+ ArrowOptions() {
21
+ }
22
+ ArrowOffsetSize offset_size = ArrowOffsetSize::REGULAR;
23
+ string time_zone = "UTC";
17
24
  };
18
25
  } // namespace duckdb
@@ -21,7 +21,6 @@ public:
21
21
  idx_t batch_size;
22
22
  vector<LogicalType> column_types;
23
23
  vector<string> column_names;
24
- string timezone_config;
25
24
 
26
25
  private:
27
26
  static int MyStreamGetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *out);
@@ -13,6 +13,8 @@
13
13
  #include "duckdb/common/types.hpp"
14
14
  #include "duckdb/common/types/data_chunk.hpp"
15
15
  #include "duckdb/main/appender.hpp"
16
+ #include "duckdb/common/arrow/arrow_options.hpp"
17
+
16
18
  #include <cstring>
17
19
  #include <cassert>
18
20
 
@@ -46,7 +48,7 @@ struct PendingStatementWrapper {
46
48
  struct ArrowResultWrapper {
47
49
  unique_ptr<MaterializedQueryResult> result;
48
50
  unique_ptr<DataChunk> current_chunk;
49
- string timezone_config;
51
+ ArrowOptions options;
50
52
  };
51
53
 
52
54
  struct AppenderWrapper {
@@ -28,6 +28,7 @@
28
28
  #include "duckdb/optimizer/optimizer_extension.hpp"
29
29
  #include "duckdb/parser/parser_extension.hpp"
30
30
  #include "duckdb/planner/operator_extension.hpp"
31
+ #include "duckdb/common/arrow/arrow_options.hpp"
31
32
 
32
33
  namespace duckdb {
33
34
  class BufferPool;
@@ -135,6 +136,8 @@ struct DBConfigOptions {
135
136
  WindowAggregationMode window_mode = WindowAggregationMode::WINDOW;
136
137
  //! Whether or not preserving insertion order should be preserved
137
138
  bool preserve_insertion_order = true;
139
+ //! Whether Arrow Arrays use Large or Regular buffers
140
+ ArrowOffsetSize arrow_offset_size = ArrowOffsetSize::REGULAR;
138
141
  //! Database configuration variables as controlled by SET
139
142
  case_insensitive_map_t<Value> set_variables;
140
143
  //! Database configuration variable default values;
@@ -44,6 +44,7 @@ public:
44
44
  //! Returns a reference to the system catalog
45
45
  Catalog &GetSystemCatalog();
46
46
  static const string &GetDefaultDatabase(ClientContext &context);
47
+ void SetDefaultDatabase(ClientContext &context, const string &new_value);
47
48
 
48
49
  optional_ptr<AttachedDatabase> GetDatabaseFromPath(ClientContext &context, const string &path);
49
50
  vector<reference<AttachedDatabase>> GetDatabases(ClientContext &context);
@@ -12,6 +12,7 @@
12
12
  #include "duckdb/common/types/data_chunk.hpp"
13
13
  #include "duckdb/common/winapi.hpp"
14
14
  #include "duckdb/common/preserved_error.hpp"
15
+ #include "duckdb/common/arrow/arrow_options.hpp"
15
16
 
16
17
  namespace duckdb {
17
18
  struct BoxRendererConfig;
@@ -20,7 +21,11 @@ enum class QueryResultType : uint8_t { MATERIALIZED_RESULT, STREAM_RESULT, PENDI
20
21
 
21
22
  //! A set of properties from the client context that can be used to interpret the query result
22
23
  struct ClientProperties {
24
+ ClientProperties(string time_zone_p, ArrowOffsetSize arrow_offset_size_p)
25
+ : time_zone(std::move(time_zone_p)), arrow_offset_size(arrow_offset_size_p) {
26
+ }
23
27
  string time_zone;
28
+ ArrowOffsetSize arrow_offset_size;
24
29
  };
25
30
 
26
31
  class BaseQueryResult {
@@ -141,6 +146,7 @@ public:
141
146
  }
142
147
  }
143
148
 
149
+ static ArrowOptions GetArrowOptions(QueryResult &query_result);
144
150
  static string GetConfigTimezone(QueryResult &query_result);
145
151
 
146
152
  private:
@@ -397,6 +397,16 @@ struct PreserveInsertionOrder {
397
397
  static Value GetSetting(ClientContext &context);
398
398
  };
399
399
 
400
+ struct ExportLargeBufferArrow {
401
+ static constexpr const char *Name = "arrow_large_buffer_size";
402
+ static constexpr const char *Description =
403
+ "If arrow buffers for strings, blobs, uuids and bits should be exported using large buffers";
404
+ static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
405
+ static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
406
+ static void ResetGlobal(DatabaseInstance *db, DBConfig &config);
407
+ static Value GetSetting(ClientContext &context);
408
+ };
409
+
400
410
  struct ProfilerHistorySize {
401
411
  static constexpr const char *Name = "profiler_history_size";
402
412
  static constexpr const char *Description = "Sets the profiler history size";
@@ -93,9 +93,9 @@ public:
93
93
 
94
94
  unique_ptr<TableRef> Copy() override;
95
95
 
96
- //! Serializes a blob into a JoinRef
96
+ //! Serializes a blob into a PivotRef
97
97
  void Serialize(FieldWriter &serializer) const override;
98
- //! Deserializes a blob back into a JoinRef
98
+ //! Deserializes a blob back into a PivotRef
99
99
  static unique_ptr<TableRef> Deserialize(FieldReader &source);
100
100
 
101
101
  void FormatSerialize(FormatSerializer &serializer) const override;
@@ -25,7 +25,7 @@ duckdb_state duckdb_query_arrow_schema(duckdb_arrow result, duckdb_arrow_schema
25
25
  }
26
26
  auto wrapper = reinterpret_cast<ArrowResultWrapper *>(result);
27
27
  ArrowConverter::ToArrowSchema((ArrowSchema *)*out_schema, wrapper->result->types, wrapper->result->names,
28
- wrapper->timezone_config);
28
+ wrapper->options);
29
29
  return DuckDBSuccess;
30
30
  }
31
31
 
@@ -41,7 +41,7 @@ duckdb_state duckdb_query_arrow_array(duckdb_arrow result, duckdb_arrow_array *o
41
41
  if (!wrapper->current_chunk || wrapper->current_chunk->size() == 0) {
42
42
  return DuckDBSuccess;
43
43
  }
44
- ArrowConverter::ToArrowArray(*wrapper->current_chunk, reinterpret_cast<ArrowArray *>(*out_array));
44
+ ArrowConverter::ToArrowArray(*wrapper->current_chunk, reinterpret_cast<ArrowArray *>(*out_array), wrapper->options);
45
45
  return DuckDBSuccess;
46
46
  }
47
47
 
@@ -96,9 +96,9 @@ duckdb_state duckdb_execute_prepared_arrow(duckdb_prepared_statement prepared_st
96
96
  auto arrow_wrapper = new ArrowResultWrapper();
97
97
  if (wrapper->statement->context->config.set_variables.find("TimeZone") ==
98
98
  wrapper->statement->context->config.set_variables.end()) {
99
- arrow_wrapper->timezone_config = "UTC";
99
+ arrow_wrapper->options.time_zone = "UTC";
100
100
  } else {
101
- arrow_wrapper->timezone_config =
101
+ arrow_wrapper->options.time_zone =
102
102
  wrapper->statement->context->config.set_variables["TimeZone"].GetValue<std::string>();
103
103
  }
104
104
 
@@ -319,6 +319,8 @@ shared_ptr<PreparedStatementData> ClientContext::CreatePreparedStatement(ClientC
319
319
  planner.parameter_data.emplace_back(value);
320
320
  }
321
321
  }
322
+
323
+ client_data->http_state = make_shared<HTTPState>();
322
324
  planner.CreatePlan(std::move(statement));
323
325
  D_ASSERT(planner.plan || !planner.properties.bound_all_parameters);
324
326
  profiler.EndPhase();
@@ -1153,9 +1155,8 @@ ParserOptions ClientContext::GetParserOptions() const {
1153
1155
  }
1154
1156
 
1155
1157
  ClientProperties ClientContext::GetClientProperties() const {
1156
- ClientProperties properties;
1157
- properties.time_zone = ClientConfig::GetConfig(*this).ExtractTimezone();
1158
- return properties;
1158
+ auto client_context = ClientConfig::GetConfig(*this);
1159
+ return {client_context.ExtractTimezone(), db->config.options.arrow_offset_size};
1159
1160
  }
1160
1161
 
1161
1162
  bool ClientContext::ExecutionIsFinished() {
@@ -105,6 +105,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
105
105
  DUCKDB_GLOBAL(TempDirectorySetting),
106
106
  DUCKDB_GLOBAL(ThreadsSetting),
107
107
  DUCKDB_GLOBAL(UsernameSetting),
108
+ DUCKDB_GLOBAL(ExportLargeBufferArrow),
108
109
  DUCKDB_GLOBAL_ALIAS("user", UsernameSetting),
109
110
  DUCKDB_GLOBAL_ALIAS("wal_autocheckpoint", CheckpointThresholdSetting),
110
111
  DUCKDB_GLOBAL_ALIAS("worker_threads", ThreadsSetting),
@@ -87,6 +87,22 @@ const string &DatabaseManager::GetDefaultDatabase(ClientContext &context) {
87
87
  return default_entry.catalog;
88
88
  }
89
89
 
90
+ // LCOV_EXCL_START
91
+ void DatabaseManager::SetDefaultDatabase(ClientContext &context, const string &new_value) {
92
+ auto db_entry = GetDatabase(context, new_value);
93
+
94
+ if (!db_entry) {
95
+ throw InternalException("Database \"%s\" not found", new_value);
96
+ } else if (db_entry->IsTemporary()) {
97
+ throw InternalException("Cannot set the default database to a temporary database");
98
+ } else if (db_entry->IsSystem()) {
99
+ throw InternalException("Cannot set the default database to a system database");
100
+ }
101
+
102
+ default_database = new_value;
103
+ }
104
+ // LCOV_EXCL_STOP
105
+
90
106
  vector<reference<AttachedDatabase>> DatabaseManager::GetDatabases(ClientContext &context) {
91
107
  vector<reference<AttachedDatabase>> result;
92
108
  databases->Scan(context, [&](CatalogEntry &entry) { result.push_back(entry.Cast<AttachedDatabase>()); });
@@ -3,7 +3,6 @@
3
3
  #include "duckdb/common/vector.hpp"
4
4
  #include "duckdb/main/client_context.hpp"
5
5
  #include "duckdb/common/box_renderer.hpp"
6
-
7
6
  namespace duckdb {
8
7
 
9
8
  BaseQueryResult::BaseQueryResult(QueryResultType type, StatementType statement_type, StatementProperties properties_p,
@@ -71,7 +70,8 @@ idx_t CurrentChunk::RemainingSize() {
71
70
  return data_chunk->size() - position;
72
71
  }
73
72
 
74
- QueryResult::QueryResult(QueryResultType type, PreservedError error) : BaseQueryResult(type, std::move(error)) {
73
+ QueryResult::QueryResult(QueryResultType type, PreservedError error)
74
+ : BaseQueryResult(type, std::move(error)), client_properties("UTC", ArrowOffsetSize::REGULAR) {
75
75
  }
76
76
 
77
77
  QueryResult::~QueryResult() {
@@ -164,6 +164,10 @@ string QueryResult::HeaderToString() {
164
164
  return result;
165
165
  }
166
166
 
167
+ ArrowOptions QueryResult::GetArrowOptions(QueryResult &query_result) {
168
+ return {query_result.client_properties.arrow_offset_size, query_result.client_properties.time_zone};
169
+ }
170
+
167
171
  string QueryResult::GetConfigTimezone(QueryResult &query_result) {
168
172
  return query_result.client_properties.time_zone;
169
173
  }
@@ -895,6 +895,25 @@ Value PreserveInsertionOrder::GetSetting(ClientContext &context) {
895
895
  return Value::BOOLEAN(config.options.preserve_insertion_order);
896
896
  }
897
897
 
898
+ //===--------------------------------------------------------------------===//
899
+ // ExportLargeBufferArrow
900
+ //===--------------------------------------------------------------------===//
901
+ void ExportLargeBufferArrow::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
902
+ auto export_large_buffers_arrow = input.GetValue<bool>();
903
+
904
+ config.options.arrow_offset_size = export_large_buffers_arrow ? ArrowOffsetSize::LARGE : ArrowOffsetSize::REGULAR;
905
+ }
906
+
907
+ void ExportLargeBufferArrow::ResetGlobal(DatabaseInstance *db, DBConfig &config) {
908
+ config.options.arrow_offset_size = DBConfig().options.arrow_offset_size;
909
+ }
910
+
911
+ Value ExportLargeBufferArrow::GetSetting(ClientContext &context) {
912
+ auto &config = DBConfig::GetConfig(context);
913
+ bool export_large_buffers_arrow = config.options.arrow_offset_size == ArrowOffsetSize::LARGE;
914
+ return Value::BOOLEAN(export_large_buffers_arrow);
915
+ }
916
+
898
917
  //===--------------------------------------------------------------------===//
899
918
  // Profiler History Size
900
919
  //===--------------------------------------------------------------------===//
@@ -1,18 +1,17 @@
1
1
  #include "duckdb/parser/parser.hpp"
2
2
 
3
- #include "duckdb/parser/transformer.hpp"
4
3
  #include "duckdb/parser/parsed_data/create_table_info.hpp"
4
+ #include "duckdb/parser/parser_extension.hpp"
5
+ #include "duckdb/parser/query_error_context.hpp"
6
+ #include "duckdb/parser/query_node/select_node.hpp"
5
7
  #include "duckdb/parser/statement/create_statement.hpp"
6
8
  #include "duckdb/parser/statement/extension_statement.hpp"
7
9
  #include "duckdb/parser/statement/select_statement.hpp"
8
10
  #include "duckdb/parser/statement/update_statement.hpp"
9
- #include "duckdb/parser/query_node/select_node.hpp"
10
11
  #include "duckdb/parser/tableref/expressionlistref.hpp"
11
- #include "postgres_parser.hpp"
12
- #include "duckdb/parser/query_error_context.hpp"
13
- #include "duckdb/parser/parser_extension.hpp"
14
-
12
+ #include "duckdb/parser/transformer.hpp"
15
13
  #include "parser/parser.hpp"
14
+ #include "postgres_parser.hpp"
16
15
 
17
16
  namespace duckdb {
18
17
 
@@ -124,6 +123,29 @@ end:
124
123
  return ReplaceUnicodeSpaces(query_str, new_query, unicode_spaces);
125
124
  }
126
125
 
126
+ vector<string> SplitQueryStringIntoStatements(const string &query) {
127
+ // Break sql string down into sql statements using the tokenizer
128
+ vector<string> query_statements;
129
+ auto tokens = Parser::Tokenize(query);
130
+ auto next_statement_start = 0;
131
+ for (idx_t i = 1; i < tokens.size(); ++i) {
132
+ auto &t_prev = tokens[i - 1];
133
+ auto &t = tokens[i];
134
+ if (t_prev.type == SimplifiedTokenType::SIMPLIFIED_TOKEN_OPERATOR) {
135
+ // LCOV_EXCL_START
136
+ for (idx_t c = t_prev.start; c <= t.start; ++c) {
137
+ if (query.c_str()[c] == ';') {
138
+ query_statements.emplace_back(query.substr(next_statement_start, t.start - next_statement_start));
139
+ next_statement_start = tokens[i].start;
140
+ }
141
+ }
142
+ // LCOV_EXCL_STOP
143
+ }
144
+ }
145
+ query_statements.emplace_back(query.substr(next_statement_start, query.size() - next_statement_start));
146
+ return query_statements;
147
+ }
148
+
127
149
  void Parser::ParseQuery(const string &query) {
128
150
  Transformer transformer(options);
129
151
  string parser_error;
@@ -138,39 +160,77 @@ void Parser::ParseQuery(const string &query) {
138
160
  }
139
161
  {
140
162
  PostgresParser::SetPreserveIdentifierCase(options.preserve_identifier_case);
141
- PostgresParser parser;
142
- parser.Parse(query);
143
- if (parser.success) {
144
- if (!parser.parse_tree) {
145
- // empty statement
146
- return;
147
- }
148
-
149
- // if it succeeded, we transform the Postgres parse tree into a list of
150
- // SQLStatements
151
- transformer.TransformParseTree(parser.parse_tree, statements);
152
- } else {
153
- parser_error = QueryErrorContext::Format(query, parser.error_message, parser.error_location - 1);
154
- }
155
- }
156
- if (!parser_error.empty()) {
157
- if (options.extensions) {
158
- for (auto &ext : *options.extensions) {
159
- D_ASSERT(ext.parse_function);
160
- auto result = ext.parse_function(ext.parser_info.get(), query);
161
- if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
162
- auto statement = make_uniq<ExtensionStatement>(ext, std::move(result.parse_data));
163
- statement->stmt_length = query.size();
164
- statement->stmt_location = 0;
165
- statements.push_back(std::move(statement));
163
+ bool parsing_succeed = false;
164
+ // Creating a new scope to prevent multiple PostgresParser destructors being called
165
+ // which led to some memory issues
166
+ {
167
+ PostgresParser parser;
168
+ parser.Parse(query);
169
+ if (parser.success) {
170
+ if (!parser.parse_tree) {
171
+ // empty statement
166
172
  return;
167
173
  }
168
- if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
169
- throw ParserException(result.error);
174
+
175
+ // if it succeeded, we transform the Postgres parse tree into a list of
176
+ // SQLStatements
177
+ transformer.TransformParseTree(parser.parse_tree, statements);
178
+ parsing_succeed = true;
179
+ } else {
180
+ parser_error = QueryErrorContext::Format(query, parser.error_message, parser.error_location - 1);
181
+ }
182
+ }
183
+ // If DuckDB fails to parse the entire sql string, break the string down into individual statements
184
+ // using ';' as the delimiter so that parser extensions can parse the statement
185
+ if (parsing_succeed) {
186
+ // no-op
187
+ // return here would require refactoring into another function. o.w. will just no-op in order to run wrap up
188
+ // code at the end of this function
189
+ } else if (!options.extensions || options.extensions->empty()) {
190
+ throw ParserException(parser_error);
191
+ } else {
192
+ // split sql string into statements and re-parse using extension
193
+ auto query_statements = SplitQueryStringIntoStatements(query);
194
+ for (auto const &query_statement : query_statements) {
195
+ PostgresParser another_parser;
196
+ another_parser.Parse(query_statement);
197
+ // LCOV_EXCL_START
198
+ // first see if DuckDB can parse this individual query statement
199
+ if (another_parser.success) {
200
+ if (!another_parser.parse_tree) {
201
+ // empty statement
202
+ continue;
203
+ }
204
+ transformer.TransformParseTree(another_parser.parse_tree, statements);
205
+ } else {
206
+ // let extensions parse the statement which DuckDB failed to parse
207
+ bool parsed_single_statement = false;
208
+ for (auto &ext : *options.extensions) {
209
+ D_ASSERT(!parsed_single_statement);
210
+ D_ASSERT(ext.parse_function);
211
+ auto result = ext.parse_function(ext.parser_info.get(), query_statement);
212
+ if (result.type == ParserExtensionResultType::PARSE_SUCCESSFUL) {
213
+ auto statement = make_uniq<ExtensionStatement>(ext, std::move(result.parse_data));
214
+ statement->stmt_length = query_statement.size();
215
+ statement->stmt_location = 0;
216
+ statements.push_back(std::move(statement));
217
+ parsed_single_statement = true;
218
+ break;
219
+ } else if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) {
220
+ throw ParserException(result.error);
221
+ } else {
222
+ // We move to the next one!
223
+ }
224
+ }
225
+ if (!parsed_single_statement) {
226
+ parser_error = QueryErrorContext::Format(query, another_parser.error_message,
227
+ another_parser.error_location - 1);
228
+ throw ParserException(parser_error);
229
+ }
170
230
  }
231
+ // LCOV_EXCL_STOP
171
232
  }
172
233
  }
173
- throw ParserException(parser_error);
174
234
  }
175
235
  if (!statements.empty()) {
176
236
  auto &last_statement = statements.back();
@@ -97,6 +97,8 @@ bool Binder::BindTableFunctionParameters(TableFunctionCatalogEntry &table_functi
97
97
  subquery = make_uniq<BoundSubqueryRef>(std::move(binder), std::move(node));
98
98
  seen_subquery = true;
99
99
  arguments.emplace_back(LogicalTypeId::TABLE);
100
+ parameters.emplace_back(
101
+ Value(LogicalType::INVALID)); // this is a dummy value so the lengths of arguments and parameter match
100
102
  continue;
101
103
  }
102
104
 
@@ -3,6 +3,10 @@ import {Database, DuckDbError, HttpError, TableData} from '..';
3
3
  import * as fs from 'fs';
4
4
  import * as assert from 'assert';
5
5
  import * as path from 'path';
6
+ import chaiAsPromised from 'chai-as-promised';
7
+ import chai, {expect} from "chai";
8
+
9
+ chai.use(chaiAsPromised);
6
10
 
7
11
  const extension_base_path = "../../../build/release/extension";
8
12
 
@@ -26,6 +30,13 @@ function isHTTPException(err: DuckDbError): err is HttpError {
26
30
 
27
31
  // Note: test will pass on http request failing due to connection issues.
28
32
  const test_httpfs = async function (db: duckdb.Database) {
33
+ const promise = new Promise<void>((resolve, reject) =>
34
+ db.exec(`SELECT *
35
+ FROM parquet_scan('http://localhost:1234/whatever.parquet')`, function (err: DuckDbError | null) {
36
+ err ? reject(err) : resolve()
37
+ }));
38
+ await chai.assert.isRejected(promise, 'IO Error: Connection error for HTTP HEAD');
39
+
29
40
  await new Promise<void>((resolve, reject) => db.all("SELECT id, first_name, last_name FROM PARQUET_SCAN('https://raw.githubusercontent.com/cwida/duckdb/master/data/parquet-testing/userdata1.parquet') LIMIT 3;", function (err: null | Error, rows: TableData) {
30
41
  if (err) {
31
42
  if (err.message.startsWith("Unable to connect to URL")) {