duckdb 0.9.1-dev97.0 → 0.9.2-dev10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.github/workflows/HighPriorityIssues.yml +36 -0
  2. package/.github/workflows/NodeJS.yml +250 -0
  3. package/LICENSE +7 -0
  4. package/Makefile +3 -9
  5. package/README.md +2 -2
  6. package/binding.gyp +8 -8
  7. package/lib/duckdb.d.ts +18 -0
  8. package/lib/duckdb.js +12 -0
  9. package/package.json +4 -4
  10. package/scripts/install_node.sh +21 -0
  11. package/scripts/node_build.sh +40 -0
  12. package/scripts/node_build_win.sh +21 -0
  13. package/scripts/node_version.sh +33 -0
  14. package/src/database.cpp +30 -1
  15. package/src/duckdb/extension/icu/icu-makedate.cpp +1 -1
  16. package/src/duckdb/extension/icu/icu-strptime.cpp +0 -2
  17. package/src/duckdb/extension/icu/icu_extension.cpp +0 -1
  18. package/src/duckdb/extension/json/json_functions/json_create.cpp +27 -14
  19. package/src/duckdb/extension/json/json_functions/json_transform.cpp +26 -14
  20. package/src/duckdb/extension/json/json_functions.cpp +1 -10
  21. package/src/duckdb/extension/parquet/column_reader.cpp +26 -1
  22. package/src/duckdb/extension/parquet/column_writer.cpp +10 -1
  23. package/src/duckdb/extension/parquet/include/column_reader.hpp +2 -0
  24. package/src/duckdb/extension/parquet/include/parquet_bss_decoder.hpp +49 -0
  25. package/src/duckdb/extension/parquet/parquet_extension.cpp +3 -4
  26. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +3 -4
  27. package/src/duckdb/src/common/arrow/appender/list_data.cpp +2 -2
  28. package/src/duckdb/src/common/arrow/appender/map_data.cpp +15 -10
  29. package/src/duckdb/src/common/arrow/appender/struct_data.cpp +2 -2
  30. package/src/duckdb/src/common/arrow/appender/union_data.cpp +2 -2
  31. package/src/duckdb/src/common/arrow/arrow_appender.cpp +26 -7
  32. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +3 -3
  33. package/src/duckdb/src/common/exception.cpp +60 -84
  34. package/src/duckdb/src/common/preserved_error.cpp +20 -0
  35. package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
  36. package/src/duckdb/src/execution/expression_executor/execute_reference.cpp +1 -1
  37. package/src/duckdb/src/execution/expression_executor_state.cpp +8 -2
  38. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +1 -1
  39. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +2 -0
  40. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +5 -5
  41. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +2 -2
  43. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +1 -4
  44. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +2 -4
  45. package/src/duckdb/src/function/function_binder.cpp +1 -1
  46. package/src/duckdb/src/function/table/arrow_conversion.cpp +2 -1
  47. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  48. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -0
  49. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +3 -1
  50. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +2 -1
  51. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +3 -0
  52. package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -0
  53. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +1 -3
  54. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -4
  56. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +10 -10
  57. package/src/duckdb/src/include/duckdb/function/replacement_scan.hpp +20 -0
  58. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  59. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  60. package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -0
  61. package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +3 -0
  62. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -2
  63. package/src/duckdb/src/main/capi/arrow-c.cpp +7 -4
  64. package/src/duckdb/src/main/config.cpp +14 -0
  65. package/src/duckdb/src/main/extension/extension_install.cpp +14 -12
  66. package/src/duckdb/src/optimizer/filter_pushdown.cpp +1 -0
  67. package/src/duckdb/src/optimizer/pushdown/pushdown_distinct.cpp +19 -0
  68. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +4 -2
  69. package/src/duckdb/src/parser/transform/statement/transform_create_sequence.cpp +10 -5
  70. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +5 -7
  71. package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +4 -2
  72. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +17 -14
  73. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +5 -12
  74. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +15 -1
  75. package/src/duckdb/src/planner/bound_parameter_map.cpp +16 -5
  76. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -5
  77. package/src/duckdb/src/planner/planner.cpp +1 -1
  78. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +13 -9
  79. package/src/duckdb/third_party/parquet/parquet_types.h +2 -1
  80. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
  81. package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
  82. package/src/duckdb_node.cpp +29 -8
  83. package/src/duckdb_node.hpp +2 -0
  84. package/src/statement.cpp +13 -4
  85. package/test/arrow.test.ts +3 -1
  86. package/test/parquet.test.ts +1 -1
  87. package/test/test_all_types.test.ts +13 -12
  88. package/test/tokenize.test.ts +74 -0
  89. package/test/userdata1.parquet +0 -0
  90. package/{configure → vendor} +1 -1
  91. package/{configure.py → vendor.py} +12 -1
  92. package/duckdb_extension_config.cmake +0 -10
@@ -17,6 +17,8 @@ unique_ptr<CreateStatement> Transformer::TransformCreateSequence(duckdb_libpgque
17
17
  info->name = qname.name;
18
18
 
19
19
  if (stmt.options) {
20
+ int64_t default_start_value = info->start_value;
21
+ bool has_start_value = false;
20
22
  unordered_set<SequenceInfo, EnumClassHash> used;
21
23
  duckdb_libpgquery::PGListCell *cell = nullptr;
22
24
  for_each_cell(cell, stmt.options->head) {
@@ -51,10 +53,10 @@ unique_ptr<CreateStatement> Transformer::TransformCreateSequence(duckdb_libpgque
51
53
  throw ParserException("Increment must not be zero");
52
54
  }
53
55
  if (info->increment < 0) {
54
- info->start_value = info->max_value = -1;
56
+ default_start_value = info->max_value = -1;
55
57
  info->min_value = NumericLimits<int64_t>::Minimum();
56
58
  } else {
57
- info->start_value = info->min_value = 1;
59
+ default_start_value = info->min_value = 1;
58
60
  info->max_value = NumericLimits<int64_t>::Maximum();
59
61
  }
60
62
  } else if (opt_name == "minvalue") {
@@ -68,7 +70,7 @@ unique_ptr<CreateStatement> Transformer::TransformCreateSequence(duckdb_libpgque
68
70
 
69
71
  info->min_value = opt_value;
70
72
  if (info->increment > 0) {
71
- info->start_value = info->min_value;
73
+ default_start_value = info->min_value;
72
74
  }
73
75
  } else if (opt_name == "maxvalue") {
74
76
  if (used.find(SequenceInfo::SEQ_MAX) != used.end()) {
@@ -81,7 +83,7 @@ unique_ptr<CreateStatement> Transformer::TransformCreateSequence(duckdb_libpgque
81
83
 
82
84
  info->max_value = opt_value;
83
85
  if (info->increment < 0) {
84
- info->start_value = info->max_value;
86
+ default_start_value = info->max_value;
85
87
  }
86
88
  } else if (opt_name == "start") {
87
89
  if (used.find(SequenceInfo::SEQ_START) != used.end()) {
@@ -91,7 +93,7 @@ unique_ptr<CreateStatement> Transformer::TransformCreateSequence(duckdb_libpgque
91
93
  if (nodef) {
92
94
  continue;
93
95
  }
94
-
96
+ has_start_value = true;
95
97
  info->start_value = opt_value;
96
98
  } else if (opt_name == "cycle") {
97
99
  if (used.find(SequenceInfo::SEQ_CYCLE) != used.end()) {
@@ -107,6 +109,9 @@ unique_ptr<CreateStatement> Transformer::TransformCreateSequence(duckdb_libpgque
107
109
  throw ParserException("Unrecognized option \"%s\" for CREATE SEQUENCE", opt_name);
108
110
  }
109
111
  }
112
+ if (!has_start_value) {
113
+ info->start_value = default_start_value;
114
+ }
110
115
  }
111
116
  info->temporary = !stmt.sequence->relpersistence;
112
117
  info->on_conflict = TransformOnConflict(stmt.onconflict);
@@ -34,13 +34,11 @@ BindResult ExpressionBinder::BindExpression(BetweenExpression &expr, idx_t depth
34
34
  input = BoundCastExpression::AddCastToType(context, std::move(input), input_type);
35
35
  lower = BoundCastExpression::AddCastToType(context, std::move(lower), input_type);
36
36
  upper = BoundCastExpression::AddCastToType(context, std::move(upper), input_type);
37
- if (input_type.id() == LogicalTypeId::VARCHAR) {
38
- // handle collation
39
- auto collation = StringType::GetCollation(input_type);
40
- input = PushCollation(context, std::move(input), collation, false);
41
- lower = PushCollation(context, std::move(lower), collation, false);
42
- upper = PushCollation(context, std::move(upper), collation, false);
43
- }
37
+ // handle collation
38
+ PushCollation(context, input, input_type, false);
39
+ PushCollation(context, lower, input_type, false);
40
+ PushCollation(context, upper, input_type, false);
41
+
44
42
  if (!input->HasSideEffects() && !input->HasParameter() && !input->HasSubquery()) {
45
43
  // the expression does not have side effects and can be copied: create two comparisons
46
44
  // the reason we do this is that individual comparisons are easier to handle in optimizers
@@ -18,8 +18,10 @@ BindResult ExpressionBinder::BindExpression(CollateExpression &expr, idx_t depth
18
18
  throw BinderException("collations are only supported for type varchar");
19
19
  }
20
20
  // Validate the collation, but don't use it
21
- PushCollation(context, child->Copy(), expr.collation, false);
22
- child->return_type = LogicalType::VARCHAR_COLLATION(expr.collation);
21
+ auto child_copy = child->Copy();
22
+ auto collation_type = LogicalType::VARCHAR_COLLATION(expr.collation);
23
+ PushCollation(context, child_copy, collation_type, false);
24
+ child->return_type = collation_type;
23
25
  return BindResult(std::move(child));
24
26
  }
25
27
 
@@ -18,20 +18,25 @@
18
18
 
19
19
  namespace duckdb {
20
20
 
21
- unique_ptr<Expression> ExpressionBinder::PushCollation(ClientContext &context, unique_ptr<Expression> source,
22
- const string &collation_p, bool equality_only) {
21
+ bool ExpressionBinder::PushCollation(ClientContext &context, unique_ptr<Expression> &source,
22
+ const LogicalType &sql_type, bool equality_only) {
23
+ if (sql_type.id() != LogicalTypeId::VARCHAR) {
24
+ // only VARCHAR columns require collation
25
+ return false;
26
+ }
23
27
  // replace default collation with system collation
28
+ auto str_collation = StringType::GetCollation(sql_type);
24
29
  string collation;
25
- if (collation_p.empty()) {
30
+ if (str_collation.empty()) {
26
31
  collation = DBConfig::GetConfig(context).options.collation;
27
32
  } else {
28
- collation = collation_p;
33
+ collation = str_collation;
29
34
  }
30
35
  collation = StringUtil::Lower(collation);
31
36
  // bind the collation
32
37
  if (collation.empty() || collation == "binary" || collation == "c" || collation == "posix") {
33
- // binary collation: just skip
34
- return source;
38
+ // no collation or binary collation: skip
39
+ return false;
35
40
  }
36
41
  auto &catalog = Catalog::GetSystemCatalog(context);
37
42
  auto splits = StringUtil::Split(StringUtil::Lower(collation), ".");
@@ -60,11 +65,12 @@ unique_ptr<Expression> ExpressionBinder::PushCollation(ClientContext &context, u
60
65
  auto function = function_binder.BindScalarFunction(collation_entry.function, std::move(children));
61
66
  source = std::move(function);
62
67
  }
63
- return source;
68
+ return true;
64
69
  }
65
70
 
66
71
  void ExpressionBinder::TestCollation(ClientContext &context, const string &collation) {
67
- PushCollation(context, make_uniq<BoundConstantExpression>(Value("")), collation);
72
+ auto expr = make_uniq_base<Expression, BoundConstantExpression>(Value(""));
73
+ PushCollation(context, expr, LogicalType::VARCHAR_COLLATION(collation));
68
74
  }
69
75
 
70
76
  LogicalType BoundComparisonExpression::BindComparison(LogicalType left_type, LogicalType right_type) {
@@ -134,12 +140,9 @@ BindResult ExpressionBinder::BindExpression(ComparisonExpression &expr, idx_t de
134
140
  right = BoundCastExpression::AddCastToType(context, std::move(right), input_type,
135
141
  input_type.id() == LogicalTypeId::ENUM);
136
142
 
137
- if (input_type.id() == LogicalTypeId::VARCHAR) {
138
- // handle collation
139
- auto collation = StringType::GetCollation(input_type);
140
- left = PushCollation(context, std::move(left), collation, expr.type == ExpressionType::COMPARE_EQUAL);
141
- right = PushCollation(context, std::move(right), collation, expr.type == ExpressionType::COMPARE_EQUAL);
142
- }
143
+ PushCollation(context, left, input_type, expr.type == ExpressionType::COMPARE_EQUAL);
144
+ PushCollation(context, right, input_type, expr.type == ExpressionType::COMPARE_EQUAL);
145
+
143
146
  // now create the bound comparison expression
144
147
  return BindResult(make_uniq<BoundComparisonExpression>(expr.type, std::move(left), std::move(right)));
145
148
  }
@@ -222,10 +222,7 @@ void Binder::BindModifierTypes(BoundQueryNode &result, const vector<LogicalType>
222
222
  for (auto &target_distinct : distinct.target_distincts) {
223
223
  auto &bound_colref = target_distinct->Cast<BoundColumnRefExpression>();
224
224
  const auto &sql_type = sql_types[bound_colref.binding.column_index];
225
- if (sql_type.id() == LogicalTypeId::VARCHAR) {
226
- target_distinct = ExpressionBinder::PushCollation(context, std::move(target_distinct),
227
- StringType::GetCollation(sql_type), true);
228
- }
225
+ ExpressionBinder::PushCollation(context, target_distinct, sql_type, true);
229
226
  }
230
227
  break;
231
228
  }
@@ -253,10 +250,7 @@ void Binder::BindModifierTypes(BoundQueryNode &result, const vector<LogicalType>
253
250
  D_ASSERT(bound_colref.binding.column_index < sql_types.size());
254
251
  const auto &sql_type = sql_types[bound_colref.binding.column_index];
255
252
  bound_colref.return_type = sql_types[bound_colref.binding.column_index];
256
- if (sql_type.id() == LogicalTypeId::VARCHAR) {
257
- order_node.expression = ExpressionBinder::PushCollation(context, std::move(order_node.expression),
258
- StringType::GetCollation(sql_type));
259
- }
253
+ ExpressionBinder::PushCollation(context, order_node.expression, sql_type);
260
254
  }
261
255
  break;
262
256
  }
@@ -389,9 +383,8 @@ unique_ptr<BoundQueryNode> Binder::BindSelectNode(SelectNode &statement, unique_
389
383
  bool contains_subquery = bound_expr_ref.HasSubquery();
390
384
 
391
385
  // push a potential collation, if necessary
392
- auto collated_expr = ExpressionBinder::PushCollation(context, std::move(bound_expr),
393
- StringType::GetCollation(group_type), true);
394
- if (!contains_subquery && !collated_expr->Equals(bound_expr_ref)) {
386
+ bool requires_collation = ExpressionBinder::PushCollation(context, bound_expr, group_type, true);
387
+ if (!contains_subquery && requires_collation) {
395
388
  // if there is a collation on a group x, we should group by the collated expr,
396
389
  // but also push a first(x) aggregate in case x is selected (uncollated)
397
390
  info.collated_groups[i] = result->aggregates.size();
@@ -405,7 +398,7 @@ unique_ptr<BoundQueryNode> Binder::BindSelectNode(SelectNode &statement, unique_
405
398
  auto function = function_binder.BindAggregateFunction(first_fun, std::move(first_children));
406
399
  result->aggregates.push_back(std::move(function));
407
400
  }
408
- result->groups.group_expressions.push_back(std::move(collated_expr));
401
+ result->groups.group_expressions.push_back(std::move(bound_expr));
409
402
 
410
403
  // in the unbound expression we DO bind the table names of any ColumnRefs
411
404
  // we do this to make sure that "table.a" and "a" are treated the same
@@ -68,6 +68,16 @@ void Binder::BindSchemaOrCatalog(string &catalog, string &schema) {
68
68
  BindSchemaOrCatalog(context, catalog, schema);
69
69
  }
70
70
 
71
+ const string Binder::BindCatalog(string &catalog) {
72
+ auto &db_manager = DatabaseManager::Get(context);
73
+ optional_ptr<AttachedDatabase> database = db_manager.GetDatabase(context, catalog);
74
+ if (database) {
75
+ return db_manager.GetDatabase(context, catalog).get()->GetName();
76
+ } else {
77
+ return db_manager.GetDefaultDatabase(context);
78
+ }
79
+ }
80
+
71
81
  SchemaCatalogEntry &Binder::BindSchema(CreateInfo &info) {
72
82
  BindSchemaOrCatalog(info.catalog, info.schema);
73
83
  if (IsInvalidCatalog(info.catalog) && info.temporary) {
@@ -456,9 +466,13 @@ BoundStatement Binder::Bind(CreateStatement &stmt) {
456
466
 
457
467
  auto catalog_type = stmt.info->type;
458
468
  switch (catalog_type) {
459
- case CatalogType::SCHEMA_ENTRY:
469
+ case CatalogType::SCHEMA_ENTRY: {
470
+ auto &base = stmt.info->Cast<CreateInfo>();
471
+ auto catalog = BindCatalog(base.catalog);
472
+ properties.modified_databases.insert(catalog);
460
473
  result.plan = make_uniq<LogicalCreate>(LogicalOperatorType::LOGICAL_CREATE_SCHEMA, std::move(stmt.info));
461
474
  break;
475
+ }
462
476
  case CatalogType::VIEW_ENTRY: {
463
477
  auto &base = stmt.info->Cast<CreateViewInfo>();
464
478
  // bind the schema
@@ -43,19 +43,30 @@ shared_ptr<BoundParameterData> BoundParameterMap::CreateOrGetData(const string &
43
43
  }
44
44
 
45
45
  unique_ptr<BoundParameterExpression> BoundParameterMap::BindParameterExpression(ParameterExpression &expr) {
46
- auto &identifier = expr.identifier;
47
- auto return_type = GetReturnType(identifier);
48
46
 
47
+ auto &identifier = expr.identifier;
49
48
  D_ASSERT(!parameter_data.count(identifier));
50
49
 
51
50
  // No value has been supplied yet,
52
- // We return a shared pointer to an object that will get populated wtih a Value later
53
- // When the BoundParameterExpression get executed, this will be used to get the corresponding value
51
+ // We return a shared pointer to an object that will get populated with a Value later
52
+ // When the BoundParameterExpression gets executed, this will be used to get the corresponding value
54
53
  auto param_data = CreateOrGetData(identifier);
55
54
  auto bound_expr = make_uniq<BoundParameterExpression>(identifier);
55
+
56
56
  bound_expr->parameter_data = param_data;
57
- bound_expr->return_type = return_type;
58
57
  bound_expr->alias = expr.alias;
58
+
59
+ auto param_type = param_data->return_type;
60
+ auto identifier_type = GetReturnType(identifier);
61
+
62
+ // we found a type for this bound parameter, but now we found another occurrence with the same identifier,
63
+ // a CAST around this consecutive occurrence might swallow the unknown type of this consecutive occurrence,
64
+ // then, if we do not rebind, we potentially have unknown data types during execution
65
+ if (identifier_type == LogicalType::UNKNOWN && param_type != LogicalType::UNKNOWN) {
66
+ rebind = true;
67
+ }
68
+
69
+ bound_expr->return_type = identifier_type;
59
70
  return bound_expr;
60
71
  }
61
72
 
@@ -98,11 +98,8 @@ BindResult BaseSelectBinder::BindColumnRef(unique_ptr<ParsedExpression> &expr_pt
98
98
  " This is not yet supported.",
99
99
  colref.column_names[0]);
100
100
  }
101
- auto result = BindResult(node.select_list[index]->Copy());
102
- if (result.expression->type == ExpressionType::BOUND_COLUMN_REF) {
103
- auto &result_expr = result.expression->Cast<BoundColumnRefExpression>();
104
- result_expr.depth = depth;
105
- }
101
+ auto copied_expression = node.original_expressions[index]->Copy();
102
+ result = BindExpression(copied_expression, depth, false);
106
103
  return result;
107
104
  }
108
105
  }
@@ -76,7 +76,7 @@ void Planner::CreatePlan(SQLStatement &statement) {
76
76
  }
77
77
  this->properties = binder->properties;
78
78
  this->properties.parameter_count = parameter_count;
79
- properties.bound_all_parameters = parameters_resolved;
79
+ properties.bound_all_parameters = !bound_parameters.rebind && parameters_resolved;
80
80
 
81
81
  Planner::VerifyPlan(context, plan, bound_parameters.GetParametersPtr());
82
82
 
@@ -252,6 +252,7 @@ void DuckTransactionManager::RollbackTransaction(Transaction *transaction_p) {
252
252
  }
253
253
 
254
254
  void DuckTransactionManager::RemoveTransaction(DuckTransaction &transaction) noexcept {
255
+ bool changes_made = transaction.ChangesMade();
255
256
  // remove the transaction from the list of active transactions
256
257
  idx_t t_index = active_transactions.size();
257
258
  // check for the lowest and highest start time in the list of transactions
@@ -275,15 +276,18 @@ void DuckTransactionManager::RemoveTransaction(DuckTransaction &transaction) noe
275
276
  D_ASSERT(t_index != active_transactions.size());
276
277
  auto current_transaction = std::move(active_transactions[t_index]);
277
278
  auto current_query = DatabaseManager::Get(db).ActiveQueryNumber();
278
- if (transaction.commit_id != 0) {
279
- // the transaction was committed, add it to the list of recently
280
- // committed transactions
281
- recently_committed_transactions.push_back(std::move(current_transaction));
282
- } else {
283
- // the transaction was aborted, but we might still need its information
284
- // add it to the set of transactions awaiting GC
285
- current_transaction->highest_active_query = current_query;
286
- old_transactions.push_back(std::move(current_transaction));
279
+ if (changes_made) {
280
+ // if the transaction made any changes we need to keep it around
281
+ if (transaction.commit_id != 0) {
282
+ // the transaction was committed, add it to the list of recently
283
+ // committed transactions
284
+ recently_committed_transactions.push_back(std::move(current_transaction));
285
+ } else {
286
+ // the transaction was aborted, but we might still need its information
287
+ // add it to the set of transactions awaiting GC
288
+ current_transaction->highest_active_query = current_query;
289
+ old_transactions.push_back(std::move(current_transaction));
290
+ }
287
291
  }
288
292
  // remove the transaction from the set of currently active transactions
289
293
  active_transactions.erase(active_transactions.begin() + t_index);
@@ -92,7 +92,8 @@ struct Encoding {
92
92
  DELTA_BINARY_PACKED = 5,
93
93
  DELTA_LENGTH_BYTE_ARRAY = 6,
94
94
  DELTA_BYTE_ARRAY = 7,
95
- RLE_DICTIONARY = 8
95
+ RLE_DICTIONARY = 8,
96
+ BYTE_STREAM_SPLIT = 9,
96
97
  };
97
98
  };
98
99
 
@@ -348,17 +348,17 @@
348
348
 
349
349
  #include "extension/icu/third_party/icu/i18n/wintzimpl.cpp"
350
350
 
351
- #include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp"
351
+ #include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp"
352
352
 
353
- #include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
353
+ #include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp"
354
354
 
355
- #include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp"
355
+ #include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp"
356
356
 
357
- #include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp"
357
+ #include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
358
358
 
359
359
  #include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp"
360
360
 
361
361
  #include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp"
362
362
 
363
- #include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp"
363
+ #include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp"
364
364
 
@@ -2,6 +2,8 @@
2
2
 
3
3
  #include "src/optimizer/pushdown/pushdown_cross_product.cpp"
4
4
 
5
+ #include "src/optimizer/pushdown/pushdown_distinct.cpp"
6
+
5
7
  #include "src/optimizer/pushdown/pushdown_filter.cpp"
6
8
 
7
9
  #include "src/optimizer/pushdown/pushdown_get.cpp"
@@ -12,15 +12,36 @@ NodeDuckDB::NodeDuckDB(Napi::Env env, Napi::Object exports) {
12
12
  statement_constructor = node_duckdb::Statement::Init(env, exports);
13
13
  query_result_constructor = node_duckdb::QueryResult::Init(env, exports);
14
14
 
15
- exports.DefineProperties({
16
- DEFINE_CONSTANT_INTEGER(exports, node_duckdb::Database::DUCKDB_NODEJS_ERROR, ERROR) DEFINE_CONSTANT_INTEGER(
15
+ auto token_type_enum = Napi::Object::New(env);
16
+
17
+ token_type_enum.Set("IDENTIFIER", 0);
18
+ token_type_enum.Set("NUMERIC_CONSTANT", 1);
19
+ token_type_enum.Set("STRING_CONSTANT", 2);
20
+ token_type_enum.Set("OPERATOR", 3);
21
+ token_type_enum.Set("KEYWORD", 4);
22
+ token_type_enum.Set("COMMENT", 5);
23
+
24
+ // TypeScript enums expose an inverse mapping.
25
+ token_type_enum.Set((uint32_t)0, "IDENTIFIER");
26
+ token_type_enum.Set((uint32_t)1, "NUMERIC_CONSTANT");
27
+ token_type_enum.Set((uint32_t)2, "STRING_CONSTANT");
28
+ token_type_enum.Set((uint32_t)3, "OPERATOR");
29
+ token_type_enum.Set((uint32_t)4, "KEYWORD");
30
+ token_type_enum.Set((uint32_t)5, "COMMENT");
31
+
32
+ token_type_enum_ref = Napi::ObjectReference::New(token_type_enum);
33
+
34
+ exports.DefineProperties(
35
+ {DEFINE_CONSTANT_INTEGER(exports, node_duckdb::Database::DUCKDB_NODEJS_ERROR, ERROR) DEFINE_CONSTANT_INTEGER(
17
36
  exports, node_duckdb::Database::DUCKDB_NODEJS_READONLY, OPEN_READONLY) // same as SQLite
18
- DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_READWRITE) // ignored
19
- DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_CREATE) // ignored
20
- DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_FULLMUTEX) // ignored
21
- DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_SHAREDCACHE) // ignored
22
- DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_PRIVATECACHE) // ignored
23
- });
37
+ DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_READWRITE) // ignored
38
+ DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_CREATE) // ignored
39
+ DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_FULLMUTEX) // ignored
40
+ DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_SHAREDCACHE) // ignored
41
+ DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_PRIVATECACHE) // ignored
42
+
43
+ Napi::PropertyDescriptor::Value("TokenType", token_type_enum,
44
+ static_cast<napi_property_attributes>(napi_enumerable | napi_configurable))});
24
45
  }
25
46
 
26
47
  NODE_API_ADDON(NodeDuckDB);
@@ -23,6 +23,7 @@ public:
23
23
  Napi::FunctionReference connection_constructor;
24
24
  Napi::FunctionReference statement_constructor;
25
25
  Napi::FunctionReference query_result_constructor;
26
+ Napi::ObjectReference token_type_enum_ref;
26
27
  };
27
28
 
28
29
  namespace node_duckdb {
@@ -109,6 +110,7 @@ public:
109
110
  Napi::Value Interrupt(const Napi::CallbackInfo &info);
110
111
  Napi::Value Close(const Napi::CallbackInfo &info);
111
112
  Napi::Value RegisterReplacementScan(const Napi::CallbackInfo &info);
113
+ Napi::Value Tokenize(const Napi::CallbackInfo &info);
112
114
 
113
115
  public:
114
116
  constexpr static int DUCKDB_NODEJS_ERROR = -1;
package/src/statement.cpp CHANGED
@@ -187,6 +187,15 @@ static Napi::Value convert_col_val(Napi::Env &env, duckdb::Value dval, duckdb::L
187
187
  const auto scale = duckdb::Interval::SECS_PER_DAY * duckdb::Interval::MSECS_PER_SEC;
188
188
  value = Napi::Date::New(env, double(dval.GetValue<int32_t>() * scale));
189
189
  } break;
190
+ case duckdb::LogicalTypeId::TIMESTAMP_NS: {
191
+ value = Napi::Date::New(env, double(dval.GetValue<int64_t>() / (duckdb::Interval::MICROS_PER_MSEC * 1000)));
192
+ } break;
193
+ case duckdb::LogicalTypeId::TIMESTAMP_MS: {
194
+ value = Napi::Date::New(env, double(dval.GetValue<int64_t>()));
195
+ } break;
196
+ case duckdb::LogicalTypeId::TIMESTAMP_SEC: {
197
+ value = Napi::Date::New(env, double(dval.GetValue<int64_t>() * duckdb::Interval::MSECS_PER_SEC));
198
+ } break;
190
199
  case duckdb::LogicalTypeId::TIMESTAMP:
191
200
  case duckdb::LogicalTypeId::TIMESTAMP_TZ: {
192
201
  value = Napi::Date::New(env, double(dval.GetValue<int64_t>() / duckdb::Interval::MICROS_PER_MSEC));
@@ -377,10 +386,10 @@ struct RunPreparedTask : public Task {
377
386
  // query results, the string data is owned by the QueryResult
378
387
  auto result_ref_ptr = new std::shared_ptr<duckdb::QueryResult>(result_ptr);
379
388
 
380
- auto array_buffer =
381
- Napi::ArrayBuffer::New(env, (void *)blob.GetData(), blob.GetSize(), deleter, result_ref_ptr);
389
+ auto array_buffer = Napi::Buffer<char>::NewOrCopy(env, (char *)blob.GetData(), blob.GetSize(),
390
+ deleter, result_ref_ptr);
382
391
 
383
- auto typed_array = Napi::Uint8Array::New(env, blob.GetSize(), array_buffer, 0);
392
+ auto typed_array = Napi::TypedArrayOf<char>(env, array_buffer);
384
393
 
385
394
  // TODO we should handle this in duckdb probably
386
395
  if (is_header) {
@@ -708,7 +717,7 @@ struct GetNextArrowIpcTask : public Task {
708
717
  delete static_cast<unique_ptr<duckdb::DataChunk> *>(hint);
709
718
  };
710
719
  auto array_buffer =
711
- Napi::ArrayBuffer::New(env, (void *)blob.GetData(), blob.GetSize(), deleter, data_chunk_ptr);
720
+ Napi::Buffer<char>::NewOrCopy(env, (char *)blob.GetData(), blob.GetSize(), deleter, data_chunk_ptr);
712
721
 
713
722
  deferred.Resolve(array_buffer);
714
723
  }
@@ -9,7 +9,9 @@ describe('arrow IPC API fails neatly when extension not loaded', function() {
9
9
  let conn;
10
10
  before((done) => {
11
11
  db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"}, () => {
12
- done();
12
+ db.all('SET autoload_known_extensions=false;', () => {
13
+ done();
14
+ });
13
15
  });
14
16
  });
15
17
 
@@ -8,7 +8,7 @@ describe('can query parquet', function() {
8
8
  });
9
9
 
10
10
  it('should be able to read parquet files', function(done) {
11
- db.run("select * from parquet_scan('../../data/parquet-testing/userdata1.parquet')", done);
11
+ db.run("select * from parquet_scan('test/userdata1.parquet')", done);
12
12
  });
13
13
 
14
14
  });
@@ -22,10 +22,12 @@ function timedelta(obj: { days: number; micros: number; months: number }) {
22
22
  const replacement_values: Record<string, string> = {
23
23
  timestamp:
24
24
  "'1990-01-01 00:00:00'::TIMESTAMP, '9999-12-31 23:59:59'::TIMESTAMP, NULL::TIMESTAMP",
25
- // TODO: fix these, they are currently being returned as strings
26
- // timestamp_s: "'1990-01-01 00:00:00'::TIMESTAMP_S",
27
- // timestamp_ns: "'1990-01-01 00:00:00'::TIMESTAMP_NS",
28
- // timestamp_ms: "'1990-01-01 00:00:00'::TIMESTAMP_MS",
25
+ timestamp_s:
26
+ "'1990-01-01 00:00:00'::TIMESTAMP_S, '9999-12-31 23:59:59'::TIMESTAMP_S, NULL::TIMESTAMP_S",
27
+ // note: timestamp_ns does not support extreme values
28
+ timestamp_ns: "'1990-01-01 00:00:00'::TIMESTAMP_NS, NULL::TIMESTAMP_NS",
29
+ timestamp_ms:
30
+ "'1990-01-01 00:00:00'::TIMESTAMP_MS, '9999-12-31 23:59:59'::TIMESTAMP_MS, NULL::TIMESTAMP_MS",
29
31
  timestamp_tz:
30
32
  "'1990-01-01 00:00:00Z'::TIMESTAMPTZ, '9999-12-31 23:59:59.999999Z'::TIMESTAMPTZ, NULL::TIMESTAMPTZ",
31
33
  date: "'1990-01-01'::DATE, '9999-12-31'::DATE, NULL::DATE",
@@ -157,7 +159,7 @@ const correct_answer_map: Record<string, any[]> = {
157
159
  null,
158
160
  ],
159
161
  map: ["{}", "{key1=🦆🦆🦆🦆🦆🦆, key2=goose}", null],
160
- union: ['Frank', '5', null],
162
+ union: ["Frank", "5", null],
161
163
 
162
164
  time_tz: ["00:00:00-1559", "23:59:59.999999+1559", null],
163
165
  interval: [
@@ -176,16 +178,15 @@ const correct_answer_map: Record<string, any[]> = {
176
178
  null,
177
179
  ],
178
180
  date: [new Date("1990-01-01"), new Date("9999-12-31"), null],
179
- timestamp_s: ["290309-12-22 (BC) 00:00:00", "294247-01-10 04:00:54", null],
180
-
181
- timestamp_ns: [
182
- "1677-09-21 00:12:43.145225",
183
- "2262-04-11 23:47:16.854775",
181
+ timestamp_s: [
182
+ new Date(Date.UTC(1990, 0, 1)),
183
+ new Date("9999-12-31T23:59:59.000Z"),
184
184
  null,
185
185
  ],
186
+ timestamp_ns: [new Date(Date.UTC(1990, 0, 1)), null],
186
187
  timestamp_ms: [
187
- "290309-12-22 (BC) 00:00:00",
188
- "294247-01-10 04:00:54.775",
188
+ new Date(Date.UTC(1990, 0, 1)),
189
+ new Date("9999-12-31T23:59:59.000Z"),
189
190
  null,
190
191
  ],
191
192
  timestamp_tz: [
@@ -0,0 +1,74 @@
1
+ import * as assert from 'assert';
2
+ import * as duckdb from '..';
3
+
4
+ describe('tokenize', function () {
5
+ it('should return correct tokens for a single statement', function () {
6
+ const db = new duckdb.Database(':memory:');
7
+ const output = db.tokenize('select 1;');
8
+ assert.deepStrictEqual(output, {
9
+ offsets: [0, 7, 8],
10
+ types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT, duckdb.TokenType.OPERATOR]
11
+ });
12
+ });
13
+ it('should return correct tokens for a multiple statements', function () {
14
+ const db = new duckdb.Database(':memory:');
15
+ const output = db.tokenize('select 1; select 2;');
16
+ assert.deepStrictEqual(output, {
17
+ offsets: [0, 7, 8, 10, 17, 18],
18
+ types: [
19
+ duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT, duckdb.TokenType.OPERATOR,
20
+ duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT, duckdb.TokenType.OPERATOR
21
+ ]
22
+ });
23
+ });
24
+ it('should return no tokens for an empty string', function () {
25
+ const db = new duckdb.Database(':memory:');
26
+ const output = db.tokenize('');
27
+ assert.deepStrictEqual(output, {
28
+ offsets: [],
29
+ types: []
30
+ });
31
+ });
32
+ it('should handle quoted semicolons in string constants', function () {
33
+ const db = new duckdb.Database(':memory:');
34
+ const output = db.tokenize(`select ';';`);
35
+ assert.deepStrictEqual(output, {
36
+ offsets: [0, 7, 10],
37
+ types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.STRING_CONSTANT, duckdb.TokenType.OPERATOR]
38
+ });
39
+ });
40
+ it('should handle quoted semicolons in identifiers', function () {
41
+ const db = new duckdb.Database(':memory:');
42
+ const output = db.tokenize(`from ";";`);
43
+ assert.deepStrictEqual(output, {
44
+ offsets: [0, 5, 8],
45
+ types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.IDENTIFIER, duckdb.TokenType.OPERATOR]
46
+ });
47
+ });
48
+ it('should handle comments', function () {
49
+ const db = new duckdb.Database(':memory:');
50
+ const output = db.tokenize(`select /* comment */ 1`);
51
+ // Note that the tokenizer doesn't return tokens for comments.
52
+ assert.deepStrictEqual(output, {
53
+ offsets: [0, 21],
54
+ types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT]
55
+ });
56
+ });
57
+ it('should handle invalid syntax', function () {
58
+ const db = new duckdb.Database(':memory:');
59
+ const output = db.tokenize(`selec 1`);
60
+ // The misspelled keyword is scanned as an identifier.
61
+ assert.deepStrictEqual(output, {
62
+ offsets: [0, 6],
63
+ types: [duckdb.TokenType.IDENTIFIER, duckdb.TokenType.NUMERIC_CONSTANT]
64
+ });
65
+ });
66
+ it('should support inverse TokenType mapping', function () {
67
+ assert.equal(duckdb.TokenType[duckdb.TokenType.IDENTIFIER], "IDENTIFIER");
68
+ assert.equal(duckdb.TokenType[duckdb.TokenType.NUMERIC_CONSTANT], "NUMERIC_CONSTANT");
69
+ assert.equal(duckdb.TokenType[duckdb.TokenType.STRING_CONSTANT], "STRING_CONSTANT");
70
+ assert.equal(duckdb.TokenType[duckdb.TokenType.OPERATOR], "OPERATOR");
71
+ assert.equal(duckdb.TokenType[duckdb.TokenType.KEYWORD], "KEYWORD");
72
+ assert.equal(duckdb.TokenType[duckdb.TokenType.COMMENT], "COMMENT");
73
+ });
74
+ });
Binary file
@@ -5,4 +5,4 @@ set -x
5
5
 
6
6
  cd `dirname $0`
7
7
 
8
- python3 configure.py
8
+ python3 vendor.py $@