abstract-utilities 0.2.2.493__py3-none-any.whl → 0.2.2.496__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_utilities/__init__.py +0 -1
- abstract_utilities/file_utils/__init__.py +1 -2
- abstract_utilities/file_utils/imports/constants.py +6 -0
- abstract_utilities/file_utils/imports/imports.py +1 -1
- abstract_utilities/file_utils/imports/module_imports.py +1 -2
- abstract_utilities/file_utils/module_imports.py +12 -0
- abstract_utilities/file_utils/src/__init__.py +10 -0
- abstract_utilities/file_utils/src/file_filters.py +110 -0
- abstract_utilities/file_utils/src/file_reader.py +607 -0
- abstract_utilities/file_utils/src/file_utils.py +279 -0
- abstract_utilities/file_utils/src/filter_params.py +155 -0
- abstract_utilities/file_utils/src/find_collect.py +154 -0
- abstract_utilities/file_utils/src/initFunctionsGen.py +286 -0
- abstract_utilities/file_utils/src/map_utils.py +29 -0
- abstract_utilities/file_utils/src/pdf_utils.py +300 -0
- abstract_utilities/file_utils/src/type_checks.py +92 -0
- abstract_utilities/import_utils/__init__.py +2 -0
- abstract_utilities/import_utils/imports/__init__.py +4 -0
- abstract_utilities/import_utils/imports/constants.py +2 -0
- abstract_utilities/import_utils/imports/imports.py +4 -0
- abstract_utilities/import_utils/imports/module_imports.py +6 -0
- abstract_utilities/import_utils/imports/utils.py +30 -0
- abstract_utilities/import_utils/src/__init__.py +7 -0
- abstract_utilities/import_utils/src/clean_imports.py +122 -0
- abstract_utilities/import_utils/src/dot_utils.py +60 -0
- abstract_utilities/import_utils/src/extract_utils.py +42 -0
- abstract_utilities/import_utils/src/import_functions.py +46 -0
- abstract_utilities/import_utils/src/import_utils.py +299 -0
- abstract_utilities/import_utils/src/package_utils/__init__.py +139 -0
- abstract_utilities/import_utils/src/package_utils/context_utils.py +27 -0
- abstract_utilities/import_utils/src/package_utils/import_collectors.py +53 -0
- abstract_utilities/import_utils/src/package_utils/path_utils.py +28 -0
- abstract_utilities/import_utils/src/package_utils/safe_import.py +27 -0
- abstract_utilities/import_utils/src/package_utils.py +140 -0
- abstract_utilities/import_utils/src/sysroot_utils.py +57 -0
- abstract_utilities/path_utils.py +1 -12
- abstract_utilities/read_write_utils.py +31 -14
- {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.496.dist-info}/METADATA +1 -1
- {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.496.dist-info}/RECORD +42 -11
- {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.496.dist-info}/top_level.txt +1 -0
- imports/__init__.py +36 -0
- {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.496.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
|
|
2
|
+
from .filter_params import *
|
|
3
|
+
from ..imports import *
|
|
4
|
+
##from abstract_utilities import make_list,get_media_exts, is_media_type
|
|
5
|
+
def get_allowed_predicate(allowed=None):
|
|
6
|
+
if allowed != False:
|
|
7
|
+
if allowed == True:
|
|
8
|
+
allowed = None
|
|
9
|
+
allowed = allowed or make_allowed_predicate()
|
|
10
|
+
else:
|
|
11
|
+
def allowed(*args):
|
|
12
|
+
return True
|
|
13
|
+
allowed = allowed
|
|
14
|
+
return allowed
|
|
15
|
+
def get_globs(items,recursive: bool = True,allowed=None):
|
|
16
|
+
glob_paths = []
|
|
17
|
+
items = [item for item in make_list(items) if item]
|
|
18
|
+
for item in items:
|
|
19
|
+
pattern = os.path.join(item, "**/*") # include all files recursively\n
|
|
20
|
+
nuItems = glob.glob(pattern, recursive=recursive)
|
|
21
|
+
if allowed:
|
|
22
|
+
nuItems = [nuItem for nuItem in nuItems if nuItem and allowed(nuItem)]
|
|
23
|
+
glob_paths += nuItems
|
|
24
|
+
return glob_paths
|
|
25
|
+
def get_allowed_files(items,allowed=True):
|
|
26
|
+
allowed = get_allowed_predicate(allowed=allowed)
|
|
27
|
+
return [item for item in items if item and os.path.isfile(item) and allowed(item)]
|
|
28
|
+
def get_allowed_dirs(items,allowed=False):
|
|
29
|
+
allowed = get_allowed_predicate(allowed=allowed)
|
|
30
|
+
return [item for item in items if item and os.path.isdir(item) and allowed(item)]
|
|
31
|
+
|
|
32
|
+
def get_filtered_files(items,allowed=None,files = []):
|
|
33
|
+
allowed = get_allowed_predicate(allowed=allowed)
|
|
34
|
+
glob_paths = get_globs(items)
|
|
35
|
+
return [glob_path for glob_path in glob_paths if glob_path and os.path.isfile(glob_path) and glob_path not in files and allowed(glob_path)]
|
|
36
|
+
def get_filtered_dirs(items,allowed=None,dirs = []):
|
|
37
|
+
allowed = get_allowed_predicate(allowed=allowed)
|
|
38
|
+
glob_paths = get_globs(items)
|
|
39
|
+
return [glob_path for glob_path in glob_paths if glob_path and os.path.isdir(glob_path) and glob_path not in dirs and allowed(glob_path)]
|
|
40
|
+
|
|
41
|
+
def get_all_allowed_files(items,allowed=None):
|
|
42
|
+
dirs = get_all_allowed_dirs(items)
|
|
43
|
+
files = get_allowed_files(items)
|
|
44
|
+
nu_files = []
|
|
45
|
+
for directory in dirs:
|
|
46
|
+
files += get_filtered_files(directory,allowed=allowed,files=files)
|
|
47
|
+
return files
|
|
48
|
+
def get_all_allowed_dirs(items,allowed=None):
|
|
49
|
+
allowed = get_allowed_predicate(allowed=allowed)
|
|
50
|
+
dirs = get_allowed_dirs(items)
|
|
51
|
+
nu_dirs=[]
|
|
52
|
+
for directory in dirs:
|
|
53
|
+
nu_dirs += get_filtered_dirs(directory,allowed=allowed,dirs=nu_dirs)
|
|
54
|
+
return nu_dirs
|
|
55
|
+
def get_files_and_dirs(
|
|
56
|
+
directory: str,
|
|
57
|
+
cfg: Optional["ScanConfig"] = None,
|
|
58
|
+
allowed_exts: Optional[Set[str]] = False,
|
|
59
|
+
unallowed_exts: Optional[Set[str]] = False,
|
|
60
|
+
allowed_types: Optional[Set[str]] = False,
|
|
61
|
+
exclude_types: Optional[Set[str]] = False,
|
|
62
|
+
allowed_dirs: Optional[List[str]] = False,
|
|
63
|
+
exclude_dirs: Optional[List[str]] = False,
|
|
64
|
+
allowed_patterns: Optional[List[str]] = False,
|
|
65
|
+
exclude_patterns: Optional[List[str]] = False,
|
|
66
|
+
add = False,
|
|
67
|
+
recursive: bool = True,
|
|
68
|
+
include_files: bool = True,
|
|
69
|
+
**kwargs
|
|
70
|
+
):
|
|
71
|
+
cfg = cfg or define_defaults(
|
|
72
|
+
allowed_exts = allowed_exts,
|
|
73
|
+
unallowed_exts = unallowed_exts,
|
|
74
|
+
allowed_types = allowed_types,
|
|
75
|
+
exclude_types = exclude_types,
|
|
76
|
+
allowed_dirs = allowed_dirs,
|
|
77
|
+
exclude_dirs = exclude_dirs,
|
|
78
|
+
allowed_patterns = allowed_patterns,
|
|
79
|
+
exclude_patterns = exclude_patterns,
|
|
80
|
+
add=add
|
|
81
|
+
)
|
|
82
|
+
allowed = make_allowed_predicate(cfg)
|
|
83
|
+
items=[]
|
|
84
|
+
files =[]
|
|
85
|
+
if recursive:
|
|
86
|
+
items = get_globs(directory,recursive=recursive,allowed=allowed)
|
|
87
|
+
else:
|
|
88
|
+
directories = make_list(directory)
|
|
89
|
+
for directory in directories:
|
|
90
|
+
items +=[os.path.join(directory,item) for item in os.listdir(directory)]
|
|
91
|
+
dirs = get_allowed_dirs(items,allowed=allowed)
|
|
92
|
+
if include_files:
|
|
93
|
+
files = get_allowed_files(items,allowed=allowed)
|
|
94
|
+
return dirs,files
|
|
95
|
+
def make_allowed_predicate(cfg: ScanConfig) -> Callable[[str], bool]:
|
|
96
|
+
"""
|
|
97
|
+
Build a predicate that returns True if a given path is considered allowed
|
|
98
|
+
under the given ScanConfig. Applies allowed_* and exclude_* logic symmetrically.
|
|
99
|
+
"""
|
|
100
|
+
def allowed(path: str) -> bool:
|
|
101
|
+
p = Path(path)
|
|
102
|
+
name = p.name.lower()
|
|
103
|
+
path_str = str(p).lower()
|
|
104
|
+
|
|
105
|
+
# --------------------
|
|
106
|
+
# A) directory filters
|
|
107
|
+
# --------------------
|
|
108
|
+
if cfg.exclude_dirs:
|
|
109
|
+
for dpat in cfg.exclude_dirs:
|
|
110
|
+
dpat_l = dpat.lower()
|
|
111
|
+
if dpat_l in path_str or fnmatch.fnmatch(name, dpat_l):
|
|
112
|
+
if p.is_dir() or dpat_l in path_str:
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
if cfg.allowed_dirs and cfg.allowed_dirs != ["*"]:
|
|
116
|
+
# must be in at least one allowed dir
|
|
117
|
+
if not any(
|
|
118
|
+
fnmatch.fnmatch(path_str, f"*{dpat.lower()}*") for dpat in cfg.allowed_dirs
|
|
119
|
+
):
|
|
120
|
+
return False
|
|
121
|
+
|
|
122
|
+
# --------------------
|
|
123
|
+
# B) pattern filters
|
|
124
|
+
# --------------------
|
|
125
|
+
if cfg.allowed_patterns and cfg.allowed_patterns != ["*"]:
|
|
126
|
+
if not any(fnmatch.fnmatch(name, pat.lower()) for pat in cfg.allowed_patterns):
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
if cfg.exclude_patterns:
|
|
130
|
+
for pat in cfg.exclude_patterns:
|
|
131
|
+
if fnmatch.fnmatch(name, pat.lower()):
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
# --------------------
|
|
135
|
+
# C) extension filters
|
|
136
|
+
# --------------------
|
|
137
|
+
if p.is_file():
|
|
138
|
+
ext = p.suffix.lower()
|
|
139
|
+
if cfg.allowed_exts and ext not in cfg.allowed_exts:
|
|
140
|
+
return False
|
|
141
|
+
if cfg.unallowed_exts and ext in cfg.unallowed_exts:
|
|
142
|
+
return False
|
|
143
|
+
|
|
144
|
+
# --------------------
|
|
145
|
+
# D) type filters (optional)
|
|
146
|
+
# --------------------
|
|
147
|
+
if cfg.allowed_types and cfg.allowed_types != {"*"}:
|
|
148
|
+
if not any(t in path_str for t in cfg.allowed_types):
|
|
149
|
+
return False
|
|
150
|
+
if cfg.exclude_types and cfg.exclude_types != {"*"}:
|
|
151
|
+
if any(t in path_str for t in cfg.exclude_types):
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
return allowed
|
|
157
|
+
def correct_kwargs(**kwargs):
|
|
158
|
+
for key,values in kwargs.items():
|
|
159
|
+
if key.startswith('excluded'):
|
|
160
|
+
post_fix = key.split('_')[-1]
|
|
161
|
+
correct_key = f'exclude_{post_fix}'
|
|
162
|
+
correct_vals = kwargs.get(correct_key)
|
|
163
|
+
kwargs[correct_key]=combine_params(correct_vals,values)
|
|
164
|
+
del kwargs[key]
|
|
165
|
+
|
|
166
|
+
def collect_filepaths(
|
|
167
|
+
directory: List[str],
|
|
168
|
+
cfg: ScanConfig=None,
|
|
169
|
+
allowed_exts: Optional[Set[str]] = False,
|
|
170
|
+
unallowed_exts: Optional[Set[str]] = False,
|
|
171
|
+
allowed_types: Optional[Set[str]] = False,
|
|
172
|
+
exclude_types: Optional[Set[str]] = False,
|
|
173
|
+
allowed_dirs: Optional[List[str]] = False,
|
|
174
|
+
exclude_dirs: Optional[List[str]] = False,
|
|
175
|
+
allowed_patterns: Optional[List[str]] = False,
|
|
176
|
+
exclude_patterns: Optional[List[str]] = False,
|
|
177
|
+
add=False,
|
|
178
|
+
allowed: Optional[Callable[[str], bool]] = None,
|
|
179
|
+
**kwargs
|
|
180
|
+
) -> List[str]:
|
|
181
|
+
kwargs = correct_kwargs(allowed_exts = allowed_exts,
|
|
182
|
+
unallowed_exts = unallowed_exts,
|
|
183
|
+
allowed_types = allowed_types,
|
|
184
|
+
exclude_types = exclude_types,
|
|
185
|
+
allowed_dirs = allowed_dirs,
|
|
186
|
+
exclude_dirs = exclude_dirs,
|
|
187
|
+
allowed_patterns = allowed_patterns,
|
|
188
|
+
exclude_patterns = exclude_patterns,
|
|
189
|
+
**kwargs)
|
|
190
|
+
cfg = cfg or define_defaults(
|
|
191
|
+
allowed_exts = allowed_exts,
|
|
192
|
+
unallowed_exts = unallowed_exts,
|
|
193
|
+
allowed_types = allowed_types,
|
|
194
|
+
exclude_types = exclude_types,
|
|
195
|
+
allowed_dirs = allowed_dirs,
|
|
196
|
+
exclude_dirs = exclude_dirs,
|
|
197
|
+
allowed_patterns = allowed_patterns,
|
|
198
|
+
exclude_patterns = exclude_patterns,
|
|
199
|
+
add = add
|
|
200
|
+
)
|
|
201
|
+
allowed = allowed or make_allowed_predicate(cfg)
|
|
202
|
+
directories = make_list(directory)
|
|
203
|
+
roots = [r for r in directories if r]
|
|
204
|
+
|
|
205
|
+
# your existing helpers (get_dirs, get_globs, etc.) stay the same
|
|
206
|
+
original_dirs = get_allowed_dirs(roots, allowed=allowed)
|
|
207
|
+
original_globs = get_globs(original_dirs)
|
|
208
|
+
files = get_allowed_files(original_globs, allowed=allowed)
|
|
209
|
+
|
|
210
|
+
for d in get_filtered_dirs(original_dirs, allowed=allowed):
|
|
211
|
+
files += get_filtered_files(d, allowed=allowed, files=files)
|
|
212
|
+
|
|
213
|
+
# de-dupe while preserving order
|
|
214
|
+
seen, out = set(), []
|
|
215
|
+
for f in files:
|
|
216
|
+
if f not in seen:
|
|
217
|
+
seen.add(f)
|
|
218
|
+
out.append(f)
|
|
219
|
+
return out
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _fast_walk(
|
|
223
|
+
root: Path,
|
|
224
|
+
exts: Iterable[str],
|
|
225
|
+
skip_dirs: Iterable[str] = (),
|
|
226
|
+
skip_patterns: Iterable[str] = (),
|
|
227
|
+
) -> List[Path]:
|
|
228
|
+
exts = tuple(exts)
|
|
229
|
+
skip_dirs = set(sd.lower() for sd in skip_dirs or ())
|
|
230
|
+
skip_patterns = tuple(sp.lower() for sp in (skip_patterns or ()))
|
|
231
|
+
|
|
232
|
+
out = []
|
|
233
|
+
for p in root.rglob("*"):
|
|
234
|
+
# skip directories by name hit
|
|
235
|
+
if p.is_dir():
|
|
236
|
+
name = p.name.lower()
|
|
237
|
+
if name in skip_dirs:
|
|
238
|
+
# rglob doesn't let us prune mid-iteration cleanly; we just won't collect under it
|
|
239
|
+
continue
|
|
240
|
+
# nothing to collect for dirs
|
|
241
|
+
continue
|
|
242
|
+
|
|
243
|
+
# file filters
|
|
244
|
+
name = p.name.lower()
|
|
245
|
+
if any(fnmatch.fnmatch(name, pat) for pat in skip_patterns):
|
|
246
|
+
continue
|
|
247
|
+
if p.suffix.lower() in exts:
|
|
248
|
+
out.append(p)
|
|
249
|
+
|
|
250
|
+
# de-dup and normalize
|
|
251
|
+
return sorted({pp.resolve() for pp in out})
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def enumerate_source_files(
|
|
255
|
+
src_root: Path,
|
|
256
|
+
cfg: Optional["ScanConfig"] = None,
|
|
257
|
+
*,
|
|
258
|
+
exts: Optional[Iterable[str]] = None,
|
|
259
|
+
fast_skip_dirs: Optional[Iterable[str]] = None,
|
|
260
|
+
fast_skip_patterns: Optional[Iterable[str]] = None,
|
|
261
|
+
) -> List[Path]:
|
|
262
|
+
"""
|
|
263
|
+
Unified enumerator:
|
|
264
|
+
- If `cfg` is provided: use collect_filepaths(...) with full rules.
|
|
265
|
+
- Else: fast walk using rglob over `exts` (defaults to EXTS) with optional light excludes.
|
|
266
|
+
"""
|
|
267
|
+
src_root = Path(src_root)
|
|
268
|
+
|
|
269
|
+
if cfg is not None:
|
|
270
|
+
files = collect_filepaths([str(src_root)], cfg=cfg)
|
|
271
|
+
return sorted({Path(f).resolve() for f in files})
|
|
272
|
+
|
|
273
|
+
# Fast mode
|
|
274
|
+
return _fast_walk(
|
|
275
|
+
src_root,
|
|
276
|
+
exts or EXTS,
|
|
277
|
+
skip_dirs=fast_skip_dirs or (),
|
|
278
|
+
skip_patterns=fast_skip_patterns or (),
|
|
279
|
+
)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
|
|
2
|
+
from ..imports import *
|
|
3
|
+
import re
|
|
4
|
+
def combine_params(*values,typ=None):
|
|
5
|
+
nu_values = None
|
|
6
|
+
for value in values:
|
|
7
|
+
if value is not None:
|
|
8
|
+
if nu_values is None:
|
|
9
|
+
typ = typ or type(value)
|
|
10
|
+
nu_values = typ()
|
|
11
|
+
else:
|
|
12
|
+
if val_type is set:
|
|
13
|
+
nu_values = nu_values | typ(value)
|
|
14
|
+
if val_type is list:
|
|
15
|
+
nu_values += typ(value)
|
|
16
|
+
return nu_values
|
|
17
|
+
# -------------------------
|
|
18
|
+
# Default sets
|
|
19
|
+
# -------------------------
|
|
20
|
+
|
|
21
|
+
DEFAULT_ALLOWED_EXTS: Set[str] = {
|
|
22
|
+
".py", ".pyw", ".js", ".jsx", ".ts", ".tsx", ".mjs",
|
|
23
|
+
".html", ".htm", ".xml", ".css", ".scss", ".sass", ".less",
|
|
24
|
+
".json", ".yaml", ".yml", ".toml", ".ini", ".cfg",
|
|
25
|
+
".md", ".markdown", ".rst", ".sh", ".bash", ".env", ".txt"
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
DEFAULT_EXCLUDE_TYPES: Set[str] = {
|
|
29
|
+
"image", "video", "audio", "presentation",
|
|
30
|
+
"spreadsheet", "archive", "executable"
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
_unallowed = set(get_media_exts(DEFAULT_EXCLUDE_TYPES)) | {
|
|
34
|
+
".bak", ".shp", ".cpg", ".dbf", ".shx", ".geojson",
|
|
35
|
+
".pyc", ".prj", ".sbn", ".sbx"
|
|
36
|
+
}
|
|
37
|
+
DEFAULT_UNALLOWED_EXTS = {e for e in _unallowed if e not in DEFAULT_ALLOWED_EXTS}
|
|
38
|
+
|
|
39
|
+
DEFAULT_EXCLUDE_DIRS: List[str] = [
|
|
40
|
+
"node_modules", "__pycache__", "backups", "backup",
|
|
41
|
+
"trash", "deprecated", "old", "__init__"
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
DEFAULT_EXCLUDE_PATTERNS: List[str] = [
|
|
45
|
+
"__init__*", "*.tmp", "*.log", "*.lock", "*.zip", "*~"
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
DEFAULT_ALLOWED_PATTERNS: List[str] = ["*"]
|
|
49
|
+
DEFAULT_ALLOWED_DIRS: List[str] = ["*"]
|
|
50
|
+
DEFAULT_ALLOWED_TYPES: List[str] = ["*"]
|
|
51
|
+
|
|
52
|
+
REMOTE_RE = re.compile(r"^(?P<host>[^:\s]+@[^:\s]+):(?P<path>/.*)$")
|
|
53
|
+
AllowedPredicate = Optional[Callable[[str], bool]]
|
|
54
|
+
|
|
55
|
+
# -------------------------
|
|
56
|
+
# Config dataclass
|
|
57
|
+
# -------------------------
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class ScanConfig:
|
|
61
|
+
allowed_exts: Set[str]
|
|
62
|
+
unallowed_exts: Set[str]
|
|
63
|
+
allowed_types: Set[str]
|
|
64
|
+
exclude_types: Set[str]
|
|
65
|
+
allowed_dirs: List[str] = field(default_factory=list)
|
|
66
|
+
exclude_dirs: List[str] = field(default_factory=list)
|
|
67
|
+
allowed_patterns: List[str] = field(default_factory=list)
|
|
68
|
+
exclude_patterns: List[str] = field(default_factory=list)
|
|
69
|
+
|
|
70
|
+
# -------------------------
|
|
71
|
+
# Utility functions
|
|
72
|
+
# -------------------------
|
|
73
|
+
|
|
74
|
+
def _normalize_listlike(value, typ=list, sep=','):
|
|
75
|
+
"""Normalize comma-separated or iterable values into the desired type."""
|
|
76
|
+
if value in [True, None, False]:
|
|
77
|
+
return value
|
|
78
|
+
if isinstance(value, str):
|
|
79
|
+
value = [v.strip() for v in value.split(sep) if v.strip()]
|
|
80
|
+
return typ(value)
|
|
81
|
+
|
|
82
|
+
def _ensure_exts(exts):
|
|
83
|
+
if exts in [True, None, False]:
|
|
84
|
+
return exts
|
|
85
|
+
out = []
|
|
86
|
+
for ext in _normalize_listlike(exts, list):
|
|
87
|
+
if not ext.startswith('.'):
|
|
88
|
+
ext = f".{ext}"
|
|
89
|
+
out.append(ext)
|
|
90
|
+
return set(out)
|
|
91
|
+
|
|
92
|
+
def _ensure_patterns(patterns):
|
|
93
|
+
"""Normalize pattern list and ensure they are valid globs."""
|
|
94
|
+
if patterns in [True, None, False]:
|
|
95
|
+
return patterns
|
|
96
|
+
patterns = _normalize_listlike(patterns, list)
|
|
97
|
+
out = []
|
|
98
|
+
for pattern in patterns:
|
|
99
|
+
if not pattern:
|
|
100
|
+
continue
|
|
101
|
+
if '*' not in pattern and '?' not in pattern:
|
|
102
|
+
# Implicitly make it a prefix match
|
|
103
|
+
if pattern.startswith('.') or pattern.startswith('~'):
|
|
104
|
+
pattern = f"*{pattern}"
|
|
105
|
+
else:
|
|
106
|
+
pattern = f"{pattern}*"
|
|
107
|
+
out.append(pattern)
|
|
108
|
+
return out
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _get_default_modular(value, default, add=False, typ=set):
|
|
112
|
+
"""Merge user and default values intelligently."""
|
|
113
|
+
if value in [True, None]:
|
|
114
|
+
return default
|
|
115
|
+
if value is False:
|
|
116
|
+
return typ()
|
|
117
|
+
if add:
|
|
118
|
+
return combine_params(value,default,typ=None)
|
|
119
|
+
|
|
120
|
+
return typ(value)
|
|
121
|
+
|
|
122
|
+
# -------------------------
|
|
123
|
+
# Default derivation logic
|
|
124
|
+
# -------------------------
|
|
125
|
+
|
|
126
|
+
def derive_file_defaults(
|
|
127
|
+
allowed_exts=False, unallowed_exts=False,
|
|
128
|
+
allowed_types=False, exclude_types=False,
|
|
129
|
+
allowed_dirs=False, exclude_dirs=False,
|
|
130
|
+
allowed_patterns=False, exclude_patterns=False,
|
|
131
|
+
add=False
|
|
132
|
+
):
|
|
133
|
+
allowed_exts = _get_default_modular(_ensure_exts(allowed_exts), DEFAULT_ALLOWED_EXTS, add, set)
|
|
134
|
+
unallowed_exts = _get_default_modular(_ensure_exts(unallowed_exts), DEFAULT_UNALLOWED_EXTS, add, set)
|
|
135
|
+
allowed_types = _get_default_modular(_normalize_listlike(allowed_types, set), DEFAULT_ALLOWED_TYPES, add, set)
|
|
136
|
+
exclude_types = _get_default_modular(_normalize_listlike(exclude_types, set), DEFAULT_EXCLUDE_TYPES, add, set)
|
|
137
|
+
allowed_dirs = _get_default_modular(_normalize_listlike(allowed_dirs, list), DEFAULT_ALLOWED_DIRS, add, list)
|
|
138
|
+
exclude_dirs = _get_default_modular(_normalize_listlike(exclude_dirs, list), DEFAULT_EXCLUDE_DIRS, add, list)
|
|
139
|
+
allowed_patterns = _get_default_modular(_ensure_patterns(allowed_patterns), DEFAULT_ALLOWED_PATTERNS, add, list)
|
|
140
|
+
exclude_patterns = _get_default_modular(_ensure_patterns(exclude_patterns), DEFAULT_EXCLUDE_PATTERNS, add, list)
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
"allowed_exts": allowed_exts,
|
|
144
|
+
"unallowed_exts": unallowed_exts,
|
|
145
|
+
"allowed_types": allowed_types,
|
|
146
|
+
"exclude_types": exclude_types,
|
|
147
|
+
"allowed_dirs": allowed_dirs,
|
|
148
|
+
"exclude_dirs": exclude_dirs,
|
|
149
|
+
"allowed_patterns": allowed_patterns,
|
|
150
|
+
"exclude_patterns": exclude_patterns,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
def define_defaults(**kwargs):
|
|
154
|
+
defaults = derive_file_defaults(**kwargs)
|
|
155
|
+
return ScanConfig(**defaults)
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
from ..imports import *
|
|
2
|
+
from .filter_params import *
|
|
3
|
+
from .file_filters import enumerate_source_files
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def check_path_type(
|
|
7
|
+
path: str,
|
|
8
|
+
user: Optional[str] = None,
|
|
9
|
+
host: Optional[str] = None,
|
|
10
|
+
user_as_host: Optional[str] = None,
|
|
11
|
+
use_shell: bool = False
|
|
12
|
+
) -> Literal["file", "directory", "missing", "unknown"]:
|
|
13
|
+
"""
|
|
14
|
+
Determine whether a given path is a file, directory, or missing.
|
|
15
|
+
Works locally or remotely (via SSH).
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
path: The path to check.
|
|
19
|
+
user, host, user_as_host: SSH parameters if remote.
|
|
20
|
+
use_shell: Force shell test instead of Python os.path.
|
|
21
|
+
Returns:
|
|
22
|
+
One of: 'file', 'directory', 'missing', or 'unknown'
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
# --- remote check if user/host is given ---
|
|
26
|
+
if user_as_host or (user and host):
|
|
27
|
+
remote_target = user_as_host or f"{user}@{host}"
|
|
28
|
+
cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
|
|
29
|
+
try:
|
|
30
|
+
result = subprocess.check_output(
|
|
31
|
+
["ssh", remote_target, cmd],
|
|
32
|
+
stderr=subprocess.DEVNULL,
|
|
33
|
+
text=True,
|
|
34
|
+
timeout=5
|
|
35
|
+
).strip()
|
|
36
|
+
return result if result in ("file", "directory", "missing") else "unknown"
|
|
37
|
+
except Exception:
|
|
38
|
+
return "unknown"
|
|
39
|
+
|
|
40
|
+
# --- local check ---
|
|
41
|
+
if not use_shell:
|
|
42
|
+
if os.path.isfile(path):
|
|
43
|
+
return "file"
|
|
44
|
+
elif os.path.isdir(path):
|
|
45
|
+
return "directory"
|
|
46
|
+
elif not os.path.exists(path):
|
|
47
|
+
return "missing"
|
|
48
|
+
return "unknown"
|
|
49
|
+
else:
|
|
50
|
+
# fallback using shell tests (useful for sandboxed contexts)
|
|
51
|
+
cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
|
|
52
|
+
try:
|
|
53
|
+
output = subprocess.check_output(
|
|
54
|
+
cmd, shell=True, stderr=subprocess.DEVNULL, text=True
|
|
55
|
+
).strip()
|
|
56
|
+
return output if output in ("file", "directory", "missing") else "unknown"
|
|
57
|
+
except Exception:
|
|
58
|
+
return "unknown"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_find_cmd(
|
|
64
|
+
directory: str,
|
|
65
|
+
*,
|
|
66
|
+
mindepth: Optional[int] = None,
|
|
67
|
+
maxdepth: Optional[int] = None,
|
|
68
|
+
depth: Optional[int] = None,
|
|
69
|
+
file_type: Optional[str] = None, # 'f' or 'd'
|
|
70
|
+
name: Optional[str] = None,
|
|
71
|
+
size: Optional[str] = None,
|
|
72
|
+
mtime: Optional[str] = None,
|
|
73
|
+
perm: Optional[str] = None,
|
|
74
|
+
user: Optional[str] = None,
|
|
75
|
+
**kwargs
|
|
76
|
+
) -> str:
|
|
77
|
+
"""Constructs a Unix `find` command string from keyword args."""
|
|
78
|
+
cmd = [f"find {directory}"]
|
|
79
|
+
|
|
80
|
+
if depth is not None:
|
|
81
|
+
cmd += [f"-mindepth {depth}", f"-maxdepth {depth}"]
|
|
82
|
+
else:
|
|
83
|
+
if mindepth is not None:
|
|
84
|
+
cmd.append(f"-mindepth {mindepth}")
|
|
85
|
+
if maxdepth is not None:
|
|
86
|
+
cmd.append(f"-maxdepth {maxdepth}")
|
|
87
|
+
|
|
88
|
+
if file_type in ("f", "d"):
|
|
89
|
+
cmd.append(f"-type {file_type}")
|
|
90
|
+
if name:
|
|
91
|
+
cmd.append(f"-name '{name}'")
|
|
92
|
+
if size:
|
|
93
|
+
cmd.append(f"-size {size}")
|
|
94
|
+
if mtime:
|
|
95
|
+
cmd.append(f"-mtime {mtime}")
|
|
96
|
+
if perm:
|
|
97
|
+
cmd.append(f"-perm {perm}")
|
|
98
|
+
if user:
|
|
99
|
+
cmd.append(f"-user {user}")
|
|
100
|
+
|
|
101
|
+
return " ".join(cmd)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def collect_globs(
|
|
105
|
+
directory: str,
|
|
106
|
+
cfg: Optional["ScanConfig"] = None,
|
|
107
|
+
*,
|
|
108
|
+
exts: Optional[Set[str]] = None,
|
|
109
|
+
patterns: Optional[List[str]] = None,
|
|
110
|
+
mindepth: Optional[int] = None,
|
|
111
|
+
maxdepth: Optional[int] = None,
|
|
112
|
+
depth: Optional[int] = None,
|
|
113
|
+
file_type: Optional[str] = None,
|
|
114
|
+
user_at_host: Optional[str] = None,
|
|
115
|
+
add: bool = False,
|
|
116
|
+
**kwargs
|
|
117
|
+
) -> List[str]:
|
|
118
|
+
"""
|
|
119
|
+
Collect file or directory paths using either:
|
|
120
|
+
- local recursive logic (rglob)
|
|
121
|
+
- or remote shell call (find via run_cmd)
|
|
122
|
+
"""
|
|
123
|
+
cfg = cfg or define_defaults(add=add)
|
|
124
|
+
directory = str(directory)
|
|
125
|
+
exts = ensure_exts(exts)
|
|
126
|
+
patterns = ensure_patterns(patterns)
|
|
127
|
+
|
|
128
|
+
# Remote path via SSH
|
|
129
|
+
if user_at_host:
|
|
130
|
+
find_cmd = get_find_cmd(
|
|
131
|
+
directory,
|
|
132
|
+
mindepth=mindepth,
|
|
133
|
+
maxdepth=maxdepth,
|
|
134
|
+
depth=depth,
|
|
135
|
+
file_type=file_type,
|
|
136
|
+
**{k: v for k, v in kwargs.items() if v},
|
|
137
|
+
)
|
|
138
|
+
return run_cmd(find_cmd, user_at_host=user_at_host)
|
|
139
|
+
|
|
140
|
+
# Local path (Python-native walk)
|
|
141
|
+
root = Path(directory)
|
|
142
|
+
results = []
|
|
143
|
+
for p in root.rglob("*"):
|
|
144
|
+
if file_type == "f" and not p.is_file():
|
|
145
|
+
continue
|
|
146
|
+
if file_type == "d" and not p.is_dir():
|
|
147
|
+
continue
|
|
148
|
+
if exts and p.suffix.lower() not in exts:
|
|
149
|
+
continue
|
|
150
|
+
if patterns and not any(p.match(pat) for pat in patterns):
|
|
151
|
+
continue
|
|
152
|
+
results.append(str(p.resolve()))
|
|
153
|
+
|
|
154
|
+
return sorted(results)
|