duckdb 0.5.1-dev271.0 → 0.5.1-dev282.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.1-dev271.0",
4
+ "version": "0.5.1-dev282.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -18941,7 +18941,8 @@ namespace duckdb {
18941
18941
 
18942
18942
  static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
18943
18943
  unordered_map<string, column_t> &column_map,
18944
- bool filename_col, bool hive_partition_cols) {
18944
+ duckdb_re2::RE2 &compiled_regex, bool filename_col,
18945
+ bool hive_partition_cols) {
18945
18946
  unordered_map<column_t, string> result;
18946
18947
 
18947
18948
  if (filename_col) {
@@ -18952,7 +18953,7 @@ static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
18952
18953
  }
18953
18954
 
18954
18955
  if (hive_partition_cols) {
18955
- auto partitions = HivePartitioning::Parse(filename);
18956
+ auto partitions = HivePartitioning::Parse(filename, compiled_regex);
18956
18957
  for (auto &partition : partitions) {
18957
18958
  auto lookup_column_id = column_map.find(partition.first);
18958
18959
  if (lookup_column_id != column_map.end()) {
@@ -18990,10 +18991,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
18990
18991
  // - s3://bucket/var1=value1/bla/bla/var2=value2
18991
18992
  // - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
18992
18993
  // - folder/folder/folder/../var1=value1/etc/.//var2=value2
18993
- std::map<string, string> HivePartitioning::Parse(string &filename) {
18994
- std::map<string, string> result;
18994
+ const string HivePartitioning::REGEX_STRING = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
18995
18995
 
18996
- string regex = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
18996
+ std::map<string, string> HivePartitioning::Parse(string &filename, duckdb_re2::RE2 &regex) {
18997
+ std::map<string, string> result;
18997
18998
  duckdb_re2::StringPiece input(filename); // Wrap a StringPiece around it
18998
18999
 
18999
19000
  string var;
@@ -19004,6 +19005,11 @@ std::map<string, string> HivePartitioning::Parse(string &filename) {
19004
19005
  return result;
19005
19006
  }
19006
19007
 
19008
+ std::map<string, string> HivePartitioning::Parse(string &filename) {
19009
+ duckdb_re2::RE2 regex(REGEX_STRING);
19010
+ return Parse(filename, regex);
19011
+ }
19012
+
19007
19013
  // TODO: this can still be improved by removing the parts of filter expressions that are true for all remaining files.
19008
19014
  // currently, only expressions that cannot be evaluated during pushdown are removed.
19009
19015
  void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<unique_ptr<Expression>> &filters,
@@ -19011,6 +19017,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
19011
19017
  bool hive_enabled, bool filename_enabled) {
19012
19018
  vector<string> pruned_files;
19013
19019
  vector<unique_ptr<Expression>> pruned_filters;
19020
+ duckdb_re2::RE2 regex(REGEX_STRING);
19014
19021
 
19015
19022
  if ((!filename_enabled && !hive_enabled) || filters.empty()) {
19016
19023
  return;
@@ -19019,7 +19026,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
19019
19026
  for (idx_t i = 0; i < files.size(); i++) {
19020
19027
  auto &file = files[i];
19021
19028
  bool should_prune_file = false;
19022
- auto known_values = GetKnownColumnValues(file, column_map, filename_enabled, hive_enabled);
19029
+ auto known_values = GetKnownColumnValues(file, column_map, regex, filename_enabled, hive_enabled);
19023
19030
 
19024
19031
  FilterCombiner combiner;
19025
19032
  for (auto &filter : filters) {
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "705fc18d5"
15
- #define DUCKDB_VERSION "v0.5.1-dev271"
14
+ #define DUCKDB_SOURCE_ID "3d23491fa"
15
+ #define DUCKDB_VERSION "v0.5.1-dev282"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -26998,12 +26998,16 @@ class HivePartitioning {
26998
26998
  public:
26999
26999
  //! Parse a filename that follows the hive partitioning scheme
27000
27000
  DUCKDB_API static std::map<string, string> Parse(string &filename);
27001
+ DUCKDB_API static std::map<string, string> Parse(string &filename, duckdb_re2::RE2 &regex);
27001
27002
  //! Prunes a list of filenames based on a set of filters, can be used by TableFunctions in the
27002
27003
  //! pushdown_complex_filter function to skip files with filename-based filters. Also removes the filters that always
27003
27004
  //! evaluate to true.
27004
27005
  DUCKDB_API static void ApplyFiltersToFileList(vector<string> &files, vector<unique_ptr<Expression>> &filters,
27005
27006
  unordered_map<string, column_t> &column_map, idx_t table_index,
27006
27007
  bool hive_enabled, bool filename_enabled);
27008
+
27009
+ //! Returns the compiled regex pattern to match hive partitions
27010
+ DUCKDB_API static const string REGEX_STRING;
27007
27011
  };
27008
27012
 
27009
27013
  } // namespace duckdb