abstract-utilities 0.2.2.493__py3-none-any.whl → 0.2.2.496__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. abstract_utilities/__init__.py +0 -1
  2. abstract_utilities/file_utils/__init__.py +1 -2
  3. abstract_utilities/file_utils/imports/constants.py +6 -0
  4. abstract_utilities/file_utils/imports/imports.py +1 -1
  5. abstract_utilities/file_utils/imports/module_imports.py +1 -2
  6. abstract_utilities/file_utils/module_imports.py +12 -0
  7. abstract_utilities/file_utils/src/__init__.py +10 -0
  8. abstract_utilities/file_utils/src/file_filters.py +110 -0
  9. abstract_utilities/file_utils/src/file_reader.py +607 -0
  10. abstract_utilities/file_utils/src/file_utils.py +279 -0
  11. abstract_utilities/file_utils/src/filter_params.py +155 -0
  12. abstract_utilities/file_utils/src/find_collect.py +154 -0
  13. abstract_utilities/file_utils/src/initFunctionsGen.py +286 -0
  14. abstract_utilities/file_utils/src/map_utils.py +29 -0
  15. abstract_utilities/file_utils/src/pdf_utils.py +300 -0
  16. abstract_utilities/file_utils/src/type_checks.py +92 -0
  17. abstract_utilities/import_utils/__init__.py +2 -0
  18. abstract_utilities/import_utils/imports/__init__.py +4 -0
  19. abstract_utilities/import_utils/imports/constants.py +2 -0
  20. abstract_utilities/import_utils/imports/imports.py +4 -0
  21. abstract_utilities/import_utils/imports/module_imports.py +6 -0
  22. abstract_utilities/import_utils/imports/utils.py +30 -0
  23. abstract_utilities/import_utils/src/__init__.py +7 -0
  24. abstract_utilities/import_utils/src/clean_imports.py +122 -0
  25. abstract_utilities/import_utils/src/dot_utils.py +60 -0
  26. abstract_utilities/import_utils/src/extract_utils.py +42 -0
  27. abstract_utilities/import_utils/src/import_functions.py +46 -0
  28. abstract_utilities/import_utils/src/import_utils.py +299 -0
  29. abstract_utilities/import_utils/src/package_utils/__init__.py +139 -0
  30. abstract_utilities/import_utils/src/package_utils/context_utils.py +27 -0
  31. abstract_utilities/import_utils/src/package_utils/import_collectors.py +53 -0
  32. abstract_utilities/import_utils/src/package_utils/path_utils.py +28 -0
  33. abstract_utilities/import_utils/src/package_utils/safe_import.py +27 -0
  34. abstract_utilities/import_utils/src/package_utils.py +140 -0
  35. abstract_utilities/import_utils/src/sysroot_utils.py +57 -0
  36. abstract_utilities/path_utils.py +1 -12
  37. abstract_utilities/read_write_utils.py +31 -14
  38. {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.496.dist-info}/METADATA +1 -1
  39. {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.496.dist-info}/RECORD +42 -11
  40. {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.496.dist-info}/top_level.txt +1 -0
  41. imports/__init__.py +36 -0
  42. {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.496.dist-info}/WHEEL +0 -0
@@ -0,0 +1,279 @@
1
+
2
+ from .filter_params import *
3
+ from ..imports import *
4
+ ##from abstract_utilities import make_list,get_media_exts, is_media_type
5
+ def get_allowed_predicate(allowed=None):
6
+ if allowed != False:
7
+ if allowed == True:
8
+ allowed = None
9
+ allowed = allowed or make_allowed_predicate()
10
+ else:
11
+ def allowed(*args):
12
+ return True
13
+ allowed = allowed
14
+ return allowed
15
+ def get_globs(items,recursive: bool = True,allowed=None):
16
+ glob_paths = []
17
+ items = [item for item in make_list(items) if item]
18
+ for item in items:
19
+ pattern = os.path.join(item, "**/*") # include all files recursively\n
20
+ nuItems = glob.glob(pattern, recursive=recursive)
21
+ if allowed:
22
+ nuItems = [nuItem for nuItem in nuItems if nuItem and allowed(nuItem)]
23
+ glob_paths += nuItems
24
+ return glob_paths
25
+ def get_allowed_files(items,allowed=True):
26
+ allowed = get_allowed_predicate(allowed=allowed)
27
+ return [item for item in items if item and os.path.isfile(item) and allowed(item)]
28
+ def get_allowed_dirs(items,allowed=False):
29
+ allowed = get_allowed_predicate(allowed=allowed)
30
+ return [item for item in items if item and os.path.isdir(item) and allowed(item)]
31
+
32
+ def get_filtered_files(items,allowed=None,files = []):
33
+ allowed = get_allowed_predicate(allowed=allowed)
34
+ glob_paths = get_globs(items)
35
+ return [glob_path for glob_path in glob_paths if glob_path and os.path.isfile(glob_path) and glob_path not in files and allowed(glob_path)]
36
+ def get_filtered_dirs(items,allowed=None,dirs = []):
37
+ allowed = get_allowed_predicate(allowed=allowed)
38
+ glob_paths = get_globs(items)
39
+ return [glob_path for glob_path in glob_paths if glob_path and os.path.isdir(glob_path) and glob_path not in dirs and allowed(glob_path)]
40
+
41
+ def get_all_allowed_files(items,allowed=None):
42
+ dirs = get_all_allowed_dirs(items)
43
+ files = get_allowed_files(items)
44
+ nu_files = []
45
+ for directory in dirs:
46
+ files += get_filtered_files(directory,allowed=allowed,files=files)
47
+ return files
48
+ def get_all_allowed_dirs(items,allowed=None):
49
+ allowed = get_allowed_predicate(allowed=allowed)
50
+ dirs = get_allowed_dirs(items)
51
+ nu_dirs=[]
52
+ for directory in dirs:
53
+ nu_dirs += get_filtered_dirs(directory,allowed=allowed,dirs=nu_dirs)
54
+ return nu_dirs
55
+ def get_files_and_dirs(
56
+ directory: str,
57
+ cfg: Optional["ScanConfig"] = None,
58
+ allowed_exts: Optional[Set[str]] = False,
59
+ unallowed_exts: Optional[Set[str]] = False,
60
+ allowed_types: Optional[Set[str]] = False,
61
+ exclude_types: Optional[Set[str]] = False,
62
+ allowed_dirs: Optional[List[str]] = False,
63
+ exclude_dirs: Optional[List[str]] = False,
64
+ allowed_patterns: Optional[List[str]] = False,
65
+ exclude_patterns: Optional[List[str]] = False,
66
+ add = False,
67
+ recursive: bool = True,
68
+ include_files: bool = True,
69
+ **kwargs
70
+ ):
71
+ cfg = cfg or define_defaults(
72
+ allowed_exts = allowed_exts,
73
+ unallowed_exts = unallowed_exts,
74
+ allowed_types = allowed_types,
75
+ exclude_types = exclude_types,
76
+ allowed_dirs = allowed_dirs,
77
+ exclude_dirs = exclude_dirs,
78
+ allowed_patterns = allowed_patterns,
79
+ exclude_patterns = exclude_patterns,
80
+ add=add
81
+ )
82
+ allowed = make_allowed_predicate(cfg)
83
+ items=[]
84
+ files =[]
85
+ if recursive:
86
+ items = get_globs(directory,recursive=recursive,allowed=allowed)
87
+ else:
88
+ directories = make_list(directory)
89
+ for directory in directories:
90
+ items +=[os.path.join(directory,item) for item in os.listdir(directory)]
91
+ dirs = get_allowed_dirs(items,allowed=allowed)
92
+ if include_files:
93
+ files = get_allowed_files(items,allowed=allowed)
94
+ return dirs,files
95
+ def make_allowed_predicate(cfg: ScanConfig) -> Callable[[str], bool]:
96
+ """
97
+ Build a predicate that returns True if a given path is considered allowed
98
+ under the given ScanConfig. Applies allowed_* and exclude_* logic symmetrically.
99
+ """
100
+ def allowed(path: str) -> bool:
101
+ p = Path(path)
102
+ name = p.name.lower()
103
+ path_str = str(p).lower()
104
+
105
+ # --------------------
106
+ # A) directory filters
107
+ # --------------------
108
+ if cfg.exclude_dirs:
109
+ for dpat in cfg.exclude_dirs:
110
+ dpat_l = dpat.lower()
111
+ if dpat_l in path_str or fnmatch.fnmatch(name, dpat_l):
112
+ if p.is_dir() or dpat_l in path_str:
113
+ return False
114
+
115
+ if cfg.allowed_dirs and cfg.allowed_dirs != ["*"]:
116
+ # must be in at least one allowed dir
117
+ if not any(
118
+ fnmatch.fnmatch(path_str, f"*{dpat.lower()}*") for dpat in cfg.allowed_dirs
119
+ ):
120
+ return False
121
+
122
+ # --------------------
123
+ # B) pattern filters
124
+ # --------------------
125
+ if cfg.allowed_patterns and cfg.allowed_patterns != ["*"]:
126
+ if not any(fnmatch.fnmatch(name, pat.lower()) for pat in cfg.allowed_patterns):
127
+ return False
128
+
129
+ if cfg.exclude_patterns:
130
+ for pat in cfg.exclude_patterns:
131
+ if fnmatch.fnmatch(name, pat.lower()):
132
+ return False
133
+
134
+ # --------------------
135
+ # C) extension filters
136
+ # --------------------
137
+ if p.is_file():
138
+ ext = p.suffix.lower()
139
+ if cfg.allowed_exts and ext not in cfg.allowed_exts:
140
+ return False
141
+ if cfg.unallowed_exts and ext in cfg.unallowed_exts:
142
+ return False
143
+
144
+ # --------------------
145
+ # D) type filters (optional)
146
+ # --------------------
147
+ if cfg.allowed_types and cfg.allowed_types != {"*"}:
148
+ if not any(t in path_str for t in cfg.allowed_types):
149
+ return False
150
+ if cfg.exclude_types and cfg.exclude_types != {"*"}:
151
+ if any(t in path_str for t in cfg.exclude_types):
152
+ return False
153
+
154
+ return True
155
+
156
+ return allowed
157
+ def correct_kwargs(**kwargs):
158
+ for key,values in kwargs.items():
159
+ if key.startswith('excluded'):
160
+ post_fix = key.split('_')[-1]
161
+ correct_key = f'exclude_{post_fix}'
162
+ correct_vals = kwargs.get(correct_key)
163
+ kwargs[correct_key]=combine_params(correct_vals,values)
164
+ del kwargs[key]
165
+
166
+ def collect_filepaths(
167
+ directory: List[str],
168
+ cfg: ScanConfig=None,
169
+ allowed_exts: Optional[Set[str]] = False,
170
+ unallowed_exts: Optional[Set[str]] = False,
171
+ allowed_types: Optional[Set[str]] = False,
172
+ exclude_types: Optional[Set[str]] = False,
173
+ allowed_dirs: Optional[List[str]] = False,
174
+ exclude_dirs: Optional[List[str]] = False,
175
+ allowed_patterns: Optional[List[str]] = False,
176
+ exclude_patterns: Optional[List[str]] = False,
177
+ add=False,
178
+ allowed: Optional[Callable[[str], bool]] = None,
179
+ **kwargs
180
+ ) -> List[str]:
181
+ kwargs = correct_kwargs(allowed_exts = allowed_exts,
182
+ unallowed_exts = unallowed_exts,
183
+ allowed_types = allowed_types,
184
+ exclude_types = exclude_types,
185
+ allowed_dirs = allowed_dirs,
186
+ exclude_dirs = exclude_dirs,
187
+ allowed_patterns = allowed_patterns,
188
+ exclude_patterns = exclude_patterns,
189
+ **kwargs)
190
+ cfg = cfg or define_defaults(
191
+ allowed_exts = allowed_exts,
192
+ unallowed_exts = unallowed_exts,
193
+ allowed_types = allowed_types,
194
+ exclude_types = exclude_types,
195
+ allowed_dirs = allowed_dirs,
196
+ exclude_dirs = exclude_dirs,
197
+ allowed_patterns = allowed_patterns,
198
+ exclude_patterns = exclude_patterns,
199
+ add = add
200
+ )
201
+ allowed = allowed or make_allowed_predicate(cfg)
202
+ directories = make_list(directory)
203
+ roots = [r for r in directories if r]
204
+
205
+ # your existing helpers (get_dirs, get_globs, etc.) stay the same
206
+ original_dirs = get_allowed_dirs(roots, allowed=allowed)
207
+ original_globs = get_globs(original_dirs)
208
+ files = get_allowed_files(original_globs, allowed=allowed)
209
+
210
+ for d in get_filtered_dirs(original_dirs, allowed=allowed):
211
+ files += get_filtered_files(d, allowed=allowed, files=files)
212
+
213
+ # de-dupe while preserving order
214
+ seen, out = set(), []
215
+ for f in files:
216
+ if f not in seen:
217
+ seen.add(f)
218
+ out.append(f)
219
+ return out
220
+
221
+
222
+ def _fast_walk(
223
+ root: Path,
224
+ exts: Iterable[str],
225
+ skip_dirs: Iterable[str] = (),
226
+ skip_patterns: Iterable[str] = (),
227
+ ) -> List[Path]:
228
+ exts = tuple(exts)
229
+ skip_dirs = set(sd.lower() for sd in skip_dirs or ())
230
+ skip_patterns = tuple(sp.lower() for sp in (skip_patterns or ()))
231
+
232
+ out = []
233
+ for p in root.rglob("*"):
234
+ # skip directories by name hit
235
+ if p.is_dir():
236
+ name = p.name.lower()
237
+ if name in skip_dirs:
238
+ # rglob doesn't let us prune mid-iteration cleanly; we just won't collect under it
239
+ continue
240
+ # nothing to collect for dirs
241
+ continue
242
+
243
+ # file filters
244
+ name = p.name.lower()
245
+ if any(fnmatch.fnmatch(name, pat) for pat in skip_patterns):
246
+ continue
247
+ if p.suffix.lower() in exts:
248
+ out.append(p)
249
+
250
+ # de-dup and normalize
251
+ return sorted({pp.resolve() for pp in out})
252
+
253
+
254
+ def enumerate_source_files(
255
+ src_root: Path,
256
+ cfg: Optional["ScanConfig"] = None,
257
+ *,
258
+ exts: Optional[Iterable[str]] = None,
259
+ fast_skip_dirs: Optional[Iterable[str]] = None,
260
+ fast_skip_patterns: Optional[Iterable[str]] = None,
261
+ ) -> List[Path]:
262
+ """
263
+ Unified enumerator:
264
+ - If `cfg` is provided: use collect_filepaths(...) with full rules.
265
+ - Else: fast walk using rglob over `exts` (defaults to EXTS) with optional light excludes.
266
+ """
267
+ src_root = Path(src_root)
268
+
269
+ if cfg is not None:
270
+ files = collect_filepaths([str(src_root)], cfg=cfg)
271
+ return sorted({Path(f).resolve() for f in files})
272
+
273
+ # Fast mode
274
+ return _fast_walk(
275
+ src_root,
276
+ exts or EXTS,
277
+ skip_dirs=fast_skip_dirs or (),
278
+ skip_patterns=fast_skip_patterns or (),
279
+ )
@@ -0,0 +1,155 @@
1
+
2
+ from ..imports import *
3
+ import re
4
+ def combine_params(*values,typ=None):
5
+ nu_values = None
6
+ for value in values:
7
+ if value is not None:
8
+ if nu_values is None:
9
+ typ = typ or type(value)
10
+ nu_values = typ()
11
+ else:
12
+ if val_type is set:
13
+ nu_values = nu_values | typ(value)
14
+ if val_type is list:
15
+ nu_values += typ(value)
16
+ return nu_values
17
+ # -------------------------
18
+ # Default sets
19
+ # -------------------------
20
+
21
+ DEFAULT_ALLOWED_EXTS: Set[str] = {
22
+ ".py", ".pyw", ".js", ".jsx", ".ts", ".tsx", ".mjs",
23
+ ".html", ".htm", ".xml", ".css", ".scss", ".sass", ".less",
24
+ ".json", ".yaml", ".yml", ".toml", ".ini", ".cfg",
25
+ ".md", ".markdown", ".rst", ".sh", ".bash", ".env", ".txt"
26
+ }
27
+
28
+ DEFAULT_EXCLUDE_TYPES: Set[str] = {
29
+ "image", "video", "audio", "presentation",
30
+ "spreadsheet", "archive", "executable"
31
+ }
32
+
33
+ _unallowed = set(get_media_exts(DEFAULT_EXCLUDE_TYPES)) | {
34
+ ".bak", ".shp", ".cpg", ".dbf", ".shx", ".geojson",
35
+ ".pyc", ".prj", ".sbn", ".sbx"
36
+ }
37
+ DEFAULT_UNALLOWED_EXTS = {e for e in _unallowed if e not in DEFAULT_ALLOWED_EXTS}
38
+
39
+ DEFAULT_EXCLUDE_DIRS: List[str] = [
40
+ "node_modules", "__pycache__", "backups", "backup",
41
+ "trash", "deprecated", "old", "__init__"
42
+ ]
43
+
44
+ DEFAULT_EXCLUDE_PATTERNS: List[str] = [
45
+ "__init__*", "*.tmp", "*.log", "*.lock", "*.zip", "*~"
46
+ ]
47
+
48
+ DEFAULT_ALLOWED_PATTERNS: List[str] = ["*"]
49
+ DEFAULT_ALLOWED_DIRS: List[str] = ["*"]
50
+ DEFAULT_ALLOWED_TYPES: List[str] = ["*"]
51
+
52
+ REMOTE_RE = re.compile(r"^(?P<host>[^:\s]+@[^:\s]+):(?P<path>/.*)$")
53
+ AllowedPredicate = Optional[Callable[[str], bool]]
54
+
55
+ # -------------------------
56
+ # Config dataclass
57
+ # -------------------------
58
+
59
+ @dataclass
60
+ class ScanConfig:
61
+ allowed_exts: Set[str]
62
+ unallowed_exts: Set[str]
63
+ allowed_types: Set[str]
64
+ exclude_types: Set[str]
65
+ allowed_dirs: List[str] = field(default_factory=list)
66
+ exclude_dirs: List[str] = field(default_factory=list)
67
+ allowed_patterns: List[str] = field(default_factory=list)
68
+ exclude_patterns: List[str] = field(default_factory=list)
69
+
70
+ # -------------------------
71
+ # Utility functions
72
+ # -------------------------
73
+
74
+ def _normalize_listlike(value, typ=list, sep=','):
75
+ """Normalize comma-separated or iterable values into the desired type."""
76
+ if value in [True, None, False]:
77
+ return value
78
+ if isinstance(value, str):
79
+ value = [v.strip() for v in value.split(sep) if v.strip()]
80
+ return typ(value)
81
+
82
+ def _ensure_exts(exts):
83
+ if exts in [True, None, False]:
84
+ return exts
85
+ out = []
86
+ for ext in _normalize_listlike(exts, list):
87
+ if not ext.startswith('.'):
88
+ ext = f".{ext}"
89
+ out.append(ext)
90
+ return set(out)
91
+
92
+ def _ensure_patterns(patterns):
93
+ """Normalize pattern list and ensure they are valid globs."""
94
+ if patterns in [True, None, False]:
95
+ return patterns
96
+ patterns = _normalize_listlike(patterns, list)
97
+ out = []
98
+ for pattern in patterns:
99
+ if not pattern:
100
+ continue
101
+ if '*' not in pattern and '?' not in pattern:
102
+ # Implicitly make it a prefix match
103
+ if pattern.startswith('.') or pattern.startswith('~'):
104
+ pattern = f"*{pattern}"
105
+ else:
106
+ pattern = f"{pattern}*"
107
+ out.append(pattern)
108
+ return out
109
+
110
+
111
+ def _get_default_modular(value, default, add=False, typ=set):
112
+ """Merge user and default values intelligently."""
113
+ if value in [True, None]:
114
+ return default
115
+ if value is False:
116
+ return typ()
117
+ if add:
118
+ return combine_params(value,default,typ=None)
119
+
120
+ return typ(value)
121
+
122
+ # -------------------------
123
+ # Default derivation logic
124
+ # -------------------------
125
+
126
+ def derive_file_defaults(
127
+ allowed_exts=False, unallowed_exts=False,
128
+ allowed_types=False, exclude_types=False,
129
+ allowed_dirs=False, exclude_dirs=False,
130
+ allowed_patterns=False, exclude_patterns=False,
131
+ add=False
132
+ ):
133
+ allowed_exts = _get_default_modular(_ensure_exts(allowed_exts), DEFAULT_ALLOWED_EXTS, add, set)
134
+ unallowed_exts = _get_default_modular(_ensure_exts(unallowed_exts), DEFAULT_UNALLOWED_EXTS, add, set)
135
+ allowed_types = _get_default_modular(_normalize_listlike(allowed_types, set), DEFAULT_ALLOWED_TYPES, add, set)
136
+ exclude_types = _get_default_modular(_normalize_listlike(exclude_types, set), DEFAULT_EXCLUDE_TYPES, add, set)
137
+ allowed_dirs = _get_default_modular(_normalize_listlike(allowed_dirs, list), DEFAULT_ALLOWED_DIRS, add, list)
138
+ exclude_dirs = _get_default_modular(_normalize_listlike(exclude_dirs, list), DEFAULT_EXCLUDE_DIRS, add, list)
139
+ allowed_patterns = _get_default_modular(_ensure_patterns(allowed_patterns), DEFAULT_ALLOWED_PATTERNS, add, list)
140
+ exclude_patterns = _get_default_modular(_ensure_patterns(exclude_patterns), DEFAULT_EXCLUDE_PATTERNS, add, list)
141
+
142
+ return {
143
+ "allowed_exts": allowed_exts,
144
+ "unallowed_exts": unallowed_exts,
145
+ "allowed_types": allowed_types,
146
+ "exclude_types": exclude_types,
147
+ "allowed_dirs": allowed_dirs,
148
+ "exclude_dirs": exclude_dirs,
149
+ "allowed_patterns": allowed_patterns,
150
+ "exclude_patterns": exclude_patterns,
151
+ }
152
+
153
+ def define_defaults(**kwargs):
154
+ defaults = derive_file_defaults(**kwargs)
155
+ return ScanConfig(**defaults)
@@ -0,0 +1,154 @@
1
+ from ..imports import *
2
+ from .filter_params import *
3
+ from .file_filters import enumerate_source_files
4
+
5
+
6
+ def check_path_type(
7
+ path: str,
8
+ user: Optional[str] = None,
9
+ host: Optional[str] = None,
10
+ user_as_host: Optional[str] = None,
11
+ use_shell: bool = False
12
+ ) -> Literal["file", "directory", "missing", "unknown"]:
13
+ """
14
+ Determine whether a given path is a file, directory, or missing.
15
+ Works locally or remotely (via SSH).
16
+
17
+ Args:
18
+ path: The path to check.
19
+ user, host, user_as_host: SSH parameters if remote.
20
+ use_shell: Force shell test instead of Python os.path.
21
+ Returns:
22
+ One of: 'file', 'directory', 'missing', or 'unknown'
23
+ """
24
+
25
+ # --- remote check if user/host is given ---
26
+ if user_as_host or (user and host):
27
+ remote_target = user_as_host or f"{user}@{host}"
28
+ cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
29
+ try:
30
+ result = subprocess.check_output(
31
+ ["ssh", remote_target, cmd],
32
+ stderr=subprocess.DEVNULL,
33
+ text=True,
34
+ timeout=5
35
+ ).strip()
36
+ return result if result in ("file", "directory", "missing") else "unknown"
37
+ except Exception:
38
+ return "unknown"
39
+
40
+ # --- local check ---
41
+ if not use_shell:
42
+ if os.path.isfile(path):
43
+ return "file"
44
+ elif os.path.isdir(path):
45
+ return "directory"
46
+ elif not os.path.exists(path):
47
+ return "missing"
48
+ return "unknown"
49
+ else:
50
+ # fallback using shell tests (useful for sandboxed contexts)
51
+ cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
52
+ try:
53
+ output = subprocess.check_output(
54
+ cmd, shell=True, stderr=subprocess.DEVNULL, text=True
55
+ ).strip()
56
+ return output if output in ("file", "directory", "missing") else "unknown"
57
+ except Exception:
58
+ return "unknown"
59
+
60
+
61
+
62
+
63
+ def get_find_cmd(
64
+ directory: str,
65
+ *,
66
+ mindepth: Optional[int] = None,
67
+ maxdepth: Optional[int] = None,
68
+ depth: Optional[int] = None,
69
+ file_type: Optional[str] = None, # 'f' or 'd'
70
+ name: Optional[str] = None,
71
+ size: Optional[str] = None,
72
+ mtime: Optional[str] = None,
73
+ perm: Optional[str] = None,
74
+ user: Optional[str] = None,
75
+ **kwargs
76
+ ) -> str:
77
+ """Constructs a Unix `find` command string from keyword args."""
78
+ cmd = [f"find {directory}"]
79
+
80
+ if depth is not None:
81
+ cmd += [f"-mindepth {depth}", f"-maxdepth {depth}"]
82
+ else:
83
+ if mindepth is not None:
84
+ cmd.append(f"-mindepth {mindepth}")
85
+ if maxdepth is not None:
86
+ cmd.append(f"-maxdepth {maxdepth}")
87
+
88
+ if file_type in ("f", "d"):
89
+ cmd.append(f"-type {file_type}")
90
+ if name:
91
+ cmd.append(f"-name '{name}'")
92
+ if size:
93
+ cmd.append(f"-size {size}")
94
+ if mtime:
95
+ cmd.append(f"-mtime {mtime}")
96
+ if perm:
97
+ cmd.append(f"-perm {perm}")
98
+ if user:
99
+ cmd.append(f"-user {user}")
100
+
101
+ return " ".join(cmd)
102
+
103
+
104
+ def collect_globs(
105
+ directory: str,
106
+ cfg: Optional["ScanConfig"] = None,
107
+ *,
108
+ exts: Optional[Set[str]] = None,
109
+ patterns: Optional[List[str]] = None,
110
+ mindepth: Optional[int] = None,
111
+ maxdepth: Optional[int] = None,
112
+ depth: Optional[int] = None,
113
+ file_type: Optional[str] = None,
114
+ user_at_host: Optional[str] = None,
115
+ add: bool = False,
116
+ **kwargs
117
+ ) -> List[str]:
118
+ """
119
+ Collect file or directory paths using either:
120
+ - local recursive logic (rglob)
121
+ - or remote shell call (find via run_cmd)
122
+ """
123
+ cfg = cfg or define_defaults(add=add)
124
+ directory = str(directory)
125
+ exts = ensure_exts(exts)
126
+ patterns = ensure_patterns(patterns)
127
+
128
+ # Remote path via SSH
129
+ if user_at_host:
130
+ find_cmd = get_find_cmd(
131
+ directory,
132
+ mindepth=mindepth,
133
+ maxdepth=maxdepth,
134
+ depth=depth,
135
+ file_type=file_type,
136
+ **{k: v for k, v in kwargs.items() if v},
137
+ )
138
+ return run_cmd(find_cmd, user_at_host=user_at_host)
139
+
140
+ # Local path (Python-native walk)
141
+ root = Path(directory)
142
+ results = []
143
+ for p in root.rglob("*"):
144
+ if file_type == "f" and not p.is_file():
145
+ continue
146
+ if file_type == "d" and not p.is_dir():
147
+ continue
148
+ if exts and p.suffix.lower() not in exts:
149
+ continue
150
+ if patterns and not any(p.match(pat) for pat in patterns):
151
+ continue
152
+ results.append(str(p.resolve()))
153
+
154
+ return sorted(results)