duckdb 0.7.2-dev1146.0 → 0.7.2-dev1188.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -832,6 +832,46 @@ static bool HasGlob(const string &str) {
832
832
  }
833
833
  return false;
834
834
  }
835
+ static bool IsCrawl(const string &glob) {
836
+ // glob must match exactly
837
+ return glob == "**";
838
+ }
839
+ static bool HasMultipleCrawl(const vector<string> &splits) {
840
+ return std::count(splits.begin(), splits.end(), "**") > 1;
841
+ }
842
+ static bool IsSymbolicLink(const string &path) {
843
+ #ifndef _WIN32
844
+ struct stat status;
845
+ return (lstat(path.c_str(), &status) != -1 && S_ISLNK(status.st_mode));
846
+ #else
847
+ auto attributes = WindowsGetFileAttributes(path);
848
+ if (attributes == INVALID_FILE_ATTRIBUTES)
849
+ return false;
850
+ return attributes & FILE_ATTRIBUTE_REPARSE_POINT;
851
+ #endif
852
+ }
853
+
854
+ static void RecursiveGlobDirectories(FileSystem &fs, const string &path, vector<string> &result, bool match_directory,
855
+ bool join_path) {
856
+
857
+ fs.ListFiles(path, [&](const string &fname, bool is_directory) {
858
+ string concat;
859
+ if (join_path) {
860
+ concat = fs.JoinPath(path, fname);
861
+ } else {
862
+ concat = fname;
863
+ }
864
+ if (IsSymbolicLink(concat)) {
865
+ return;
866
+ }
867
+ if (is_directory == match_directory) {
868
+ result.push_back(concat);
869
+ }
870
+ if (is_directory) {
871
+ RecursiveGlobDirectories(fs, concat, result, match_directory, true);
872
+ }
873
+ });
874
+ }
835
875
 
836
876
  static void GlobFilesInternal(FileSystem &fs, const string &path, const string &glob, bool match_directory,
837
877
  vector<string> &result, bool join_path) {
@@ -933,6 +973,10 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
933
973
  }
934
974
  }
935
975
 
976
+ if (HasMultipleCrawl(splits)) {
977
+ throw IOException("Cannot use multiple \'**\' in one path");
978
+ }
979
+
936
980
  for (idx_t i = absolute_path ? 1 : 0; i < splits.size(); i++) {
937
981
  bool is_last_chunk = i + 1 == splits.size();
938
982
  bool has_glob = HasGlob(splits[i]);
@@ -949,14 +993,27 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
949
993
  }
950
994
  }
951
995
  } else {
952
- if (previous_directories.empty()) {
953
- // no previous directories: list in the current path
954
- GlobFilesInternal(*this, ".", splits[i], !is_last_chunk, result, false);
996
+ if (IsCrawl(splits[i])) {
997
+ if (!is_last_chunk) {
998
+ result = previous_directories;
999
+ }
1000
+ if (previous_directories.empty()) {
1001
+ RecursiveGlobDirectories(*this, ".", result, !is_last_chunk, false);
1002
+ } else {
1003
+ for (auto &prev_dir : previous_directories) {
1004
+ RecursiveGlobDirectories(*this, prev_dir, result, !is_last_chunk, true);
1005
+ }
1006
+ }
955
1007
  } else {
956
- // previous directories
957
- // we iterate over each of the previous directories, and apply the glob of the current directory
958
- for (auto &prev_directory : previous_directories) {
959
- GlobFilesInternal(*this, prev_directory, splits[i], !is_last_chunk, result, true);
1008
+ if (previous_directories.empty()) {
1009
+ // no previous directories: list in the current path
1010
+ GlobFilesInternal(*this, ".", splits[i], !is_last_chunk, result, false);
1011
+ } else {
1012
+ // previous directories
1013
+ // we iterate over each of the previous directories, and apply the glob of the current directory
1014
+ for (auto &prev_directory : previous_directories) {
1015
+ GlobFilesInternal(*this, prev_directory, splits[i], !is_last_chunk, result, true);
1016
+ }
960
1017
  }
961
1018
  }
962
1019
  }
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev1146"
2
+ #define DUCKDB_VERSION "0.7.2-dev1188"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "b8cf6a98e2"
5
+ #define DUCKDB_SOURCE_ID "d1518bdfe8"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"