npm - duckdb - Versions diffs - 0.5.1-dev271.0 → 0.5.1-dev282.0 - Mend

duckdb 0.5.1-dev271.0 → 0.5.1-dev282.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +13 -6
package/src/duckdb.hpp +6 -2
package/src/parquet-amalgamation.cpp +20142 -20140

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "duckdb",
   "main": "./lib/duckdb.js",
-  "version": "0.5.1-dev271.0",
+  "version": "0.5.1-dev282.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb.cpp CHANGED Viewed

@@ -18941,7 +18941,8 @@ namespace duckdb {
 static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
                                                             unordered_map<string, column_t> &column_map,
-                                                            bool filename_col, bool hive_partition_cols) {
+                                                            duckdb_re2::RE2 &compiled_regex, bool filename_col,
+                                                            bool hive_partition_cols) {
 	unordered_map<column_t, string> result;
 	if (filename_col) {
@@ -18952,7 +18953,7 @@ static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
 	}
 	if (hive_partition_cols) {
-		auto partitions = HivePartitioning::Parse(filename);
+		auto partitions = HivePartitioning::Parse(filename, compiled_regex);
 		for (auto &partition : partitions) {
 			auto lookup_column_id = column_map.find(partition.first);
 			if (lookup_column_id != column_map.end()) {
@@ -18990,10 +18991,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
 // 	- s3://bucket/var1=value1/bla/bla/var2=value2
 //  - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
 //  - folder/folder/folder/../var1=value1/etc/.//var2=value2
-std::map<string, string> HivePartitioning::Parse(string &filename) {
-	std::map<string, string> result;
+const string HivePartitioning::REGEX_STRING = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
-	string regex = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
+std::map<string, string> HivePartitioning::Parse(string &filename, duckdb_re2::RE2 &regex) {
+	std::map<string, string> result;
 	duckdb_re2::StringPiece input(filename); // Wrap a StringPiece around it
 	string var;
@@ -19004,6 +19005,11 @@ std::map<string, string> HivePartitioning::Parse(string &filename) {
 	return result;
 }
+std::map<string, string> HivePartitioning::Parse(string &filename) {
+	duckdb_re2::RE2 regex(REGEX_STRING);
+	return Parse(filename, regex);
+}
 // TODO: this can still be improved by removing the parts of filter expressions that are true for all remaining files.
 //		 currently, only expressions that cannot be evaluated during pushdown are removed.
 void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<unique_ptr<Expression>> &filters,
@@ -19011,6 +19017,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
                                               bool hive_enabled, bool filename_enabled) {
 	vector<string> pruned_files;
 	vector<unique_ptr<Expression>> pruned_filters;
+	duckdb_re2::RE2 regex(REGEX_STRING);
 	if ((!filename_enabled && !hive_enabled) || filters.empty()) {
 		return;
@@ -19019,7 +19026,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
 	for (idx_t i = 0; i < files.size(); i++) {
 		auto &file = files[i];
 		bool should_prune_file = false;
-		auto known_values = GetKnownColumnValues(file, column_map, filename_enabled, hive_enabled);
+		auto known_values = GetKnownColumnValues(file, column_map, regex, filename_enabled, hive_enabled);
 		FilterCombiner combiner;
 		for (auto &filter : filters) {

package/src/duckdb.hpp CHANGED Viewed

@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 #pragma once
 #define DUCKDB_AMALGAMATION 1
 #define DUCKDB_AMALGAMATION_EXTENDED 1
-#define DUCKDB_SOURCE_ID "705fc18d5"
-#define DUCKDB_VERSION "v0.5.1-dev271"
+#define DUCKDB_SOURCE_ID "3d23491fa"
+#define DUCKDB_VERSION "v0.5.1-dev282"
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
@@ -26998,12 +26998,16 @@ class HivePartitioning {
 public:
 	//! Parse a filename that follows the hive partitioning scheme
 	DUCKDB_API static std::map<string, string> Parse(string &filename);
+	DUCKDB_API static std::map<string, string> Parse(string &filename, duckdb_re2::RE2 &regex);
 	//! Prunes a list of filenames based on a set of filters, can be used by TableFunctions in the
 	//! pushdown_complex_filter function to skip files with filename-based filters. Also removes the filters that always
 	//! evaluate to true.
 	DUCKDB_API static void ApplyFiltersToFileList(vector<string> &files, vector<unique_ptr<Expression>> &filters,
 	                                              unordered_map<string, column_t> &column_map, idx_t table_index,
 	                                              bool hive_enabled, bool filename_enabled);
+	//! Returns the compiled regex pattern to match hive partitions
+	DUCKDB_API static const string REGEX_STRING;
 };
 } // namespace duckdb