abstract-utilities 0.2.2.540__py3-none-any.whl → 0.2.2.593__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of abstract-utilities might be problematic. Click here for more details.
- abstract_utilities/class_utils/caller_utils.py +18 -0
- abstract_utilities/class_utils/global_utils.py +3 -2
- abstract_utilities/class_utils/imports/imports.py +1 -1
- abstract_utilities/directory_utils/src/directory_utils.py +17 -1
- abstract_utilities/file_utils/imports/imports.py +0 -4
- abstract_utilities/file_utils/imports/module_imports.py +1 -1
- abstract_utilities/file_utils/src/__init__.py +2 -3
- abstract_utilities/file_utils/src/file_filters/__init__.py +4 -0
- abstract_utilities/file_utils/src/file_filters/ensure_utils.py +118 -0
- abstract_utilities/file_utils/src/file_filters/filter_params.py +86 -0
- abstract_utilities/file_utils/src/file_filters/filter_utils.py +78 -0
- abstract_utilities/file_utils/src/file_filters/predicate_utils.py +114 -0
- abstract_utilities/file_utils/src/file_reader.py +0 -1
- abstract_utilities/file_utils/src/find_collect.py +10 -86
- abstract_utilities/file_utils/src/find_content.py +210 -0
- abstract_utilities/file_utils/src/reader_utils/__init__.py +4 -0
- abstract_utilities/file_utils/src/reader_utils/directory_reader.py +53 -0
- abstract_utilities/file_utils/src/reader_utils/file_reader.py +543 -0
- abstract_utilities/file_utils/src/reader_utils/file_readers.py +376 -0
- abstract_utilities/file_utils/src/reader_utils/imports.py +18 -0
- abstract_utilities/file_utils/src/reader_utils/pdf_utils.py +300 -0
- abstract_utilities/import_utils/circular_import_finder.py +222 -0
- abstract_utilities/import_utils/circular_import_finder2.py +118 -0
- abstract_utilities/import_utils/imports/module_imports.py +3 -1
- abstract_utilities/import_utils/src/clean_imports.py +156 -25
- abstract_utilities/import_utils/src/dot_utils.py +11 -0
- abstract_utilities/import_utils/src/extract_utils.py +4 -0
- abstract_utilities/import_utils/src/import_functions.py +46 -2
- abstract_utilities/import_utils/src/pkg_utils.py +58 -4
- abstract_utilities/import_utils/src/sysroot_utils.py +56 -1
- abstract_utilities/log_utils/log_file.py +3 -2
- abstract_utilities/path_utils/path_utils.py +25 -23
- abstract_utilities/safe_utils/safe_utils.py +30 -0
- {abstract_utilities-0.2.2.540.dist-info → abstract_utilities-0.2.2.593.dist-info}/METADATA +1 -1
- {abstract_utilities-0.2.2.540.dist-info → abstract_utilities-0.2.2.593.dist-info}/RECORD +37 -23
- {abstract_utilities-0.2.2.540.dist-info → abstract_utilities-0.2.2.593.dist-info}/WHEEL +0 -0
- {abstract_utilities-0.2.2.540.dist-info → abstract_utilities-0.2.2.593.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,22 @@
|
|
|
1
1
|
from .imports import *
|
|
2
|
+
|
|
3
|
+
def get_initial_caller() -> str:
|
|
4
|
+
"""
|
|
5
|
+
Return the TRUE original caller: the entrypoint script that launched the program.
|
|
6
|
+
"""
|
|
7
|
+
main_mod = sys.modules.get('__main__')
|
|
8
|
+
|
|
9
|
+
# interactive environments (REPL) may have no __file__
|
|
10
|
+
if not main_mod or not hasattr(main_mod, '__file__'):
|
|
11
|
+
return None
|
|
12
|
+
|
|
13
|
+
return os.path.realpath(main_mod.__file__)
|
|
14
|
+
def get_initial_caller_dir() -> str:
|
|
15
|
+
"""
|
|
16
|
+
Return the directory of the TRUE original entrypoint script.
|
|
17
|
+
"""
|
|
18
|
+
caller = get_initial_caller()
|
|
19
|
+
return os.path.dirname(caller) if caller else None
|
|
2
20
|
def get_caller(i: Optional[int] = None) -> str:
|
|
3
21
|
"""
|
|
4
22
|
Return the filename of the calling frame.
|
|
@@ -48,9 +48,10 @@ def change_glob(var: str, val: any, glob: dict = return_globals()) -> any:
|
|
|
48
48
|
def get_globes(string:str='',glob:dict=return_globals()):
|
|
49
49
|
if string in glob:
|
|
50
50
|
return glob[string]
|
|
51
|
-
def if_none_default(string:str, default:any,glob:dict=return_globals()):
|
|
51
|
+
def if_none_default(string:str, default:any,glob:dict=return_globals(),typ=None):
|
|
52
|
+
|
|
52
53
|
piece = get_globes(string=string,glob=glob)
|
|
53
|
-
if piece is None:
|
|
54
|
+
if piece is None or (typ and not isinstance(piece,typ)):
|
|
54
55
|
piece = default
|
|
55
56
|
return change_glob(var=string,val=piece,glob=glob)
|
|
56
57
|
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
from ...imports import inspect,os,json,functools,inspect,glob
|
|
1
|
+
from ...imports import inspect,os,json,functools,inspect,glob,sys
|
|
2
2
|
from typing import *
|
|
@@ -1,5 +1,21 @@
|
|
|
1
|
-
|
|
1
|
+
from pathlib import Path
|
|
2
2
|
from .utils import *
|
|
3
|
+
|
|
4
|
+
def get_shortest_path(*args):
|
|
5
|
+
shortest = None
|
|
6
|
+
args = {arg:str(arg).split('/') for arg in args if arg}
|
|
7
|
+
for path,spl in args.items():
|
|
8
|
+
if shortest is None or (len(spl)<len(shortest[-1])):
|
|
9
|
+
shortest=[path,spl]
|
|
10
|
+
return shortest[0]
|
|
11
|
+
def get_common_root(paths):
|
|
12
|
+
"""
|
|
13
|
+
Compute the deepest common parent directory of all sysroots.
|
|
14
|
+
"""
|
|
15
|
+
paths = [Path(p).resolve() for p in paths]
|
|
16
|
+
common = os.path.commonpath(paths)
|
|
17
|
+
return common
|
|
18
|
+
|
|
3
19
|
def get_dirs(path):
|
|
4
20
|
"""
|
|
5
21
|
Get List of Immediate Subdirectories in a Path
|
|
@@ -4,5 +4,5 @@ from ...ssh_utils import *
|
|
|
4
4
|
from ...env_utils import *
|
|
5
5
|
from ...read_write_utils import read_from_file,write_to_file
|
|
6
6
|
from ...log_utils import get_logFile
|
|
7
|
-
from ...class_utils import get_caller, get_caller_path, get_caller_dir,SingletonMeta,run_pruned_func
|
|
7
|
+
from ...class_utils import if_none_default,if_none_change,get_caller,get_initial_caller_dir, get_caller_path, get_caller_dir,SingletonMeta,run_pruned_func
|
|
8
8
|
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
from .file_filters import *
|
|
2
|
-
from .filter_params import *
|
|
3
2
|
from .map_utils import *
|
|
4
|
-
from .
|
|
5
|
-
from .file_reader import *
|
|
3
|
+
from .reader_utils import *
|
|
6
4
|
from .find_collect import *
|
|
5
|
+
from .find_content import *
|
|
7
6
|
from .initFunctionsGen import call_for_all_tabs,get_for_all_tabs
|
|
8
7
|
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from .filter_utils import *
|
|
2
|
+
def normalize_listlike(value, typ=list, sep=','):
|
|
3
|
+
"""Normalize comma-separated or iterable values into the desired type."""
|
|
4
|
+
if value in [True, None, False]:
|
|
5
|
+
return value
|
|
6
|
+
if isinstance(value, str):
|
|
7
|
+
value = [v.strip() for v in value.split(sep) if v.strip()]
|
|
8
|
+
return typ(value)
|
|
9
|
+
|
|
10
|
+
def ensure_exts(exts):
|
|
11
|
+
if exts in [True, None, False]:
|
|
12
|
+
return exts
|
|
13
|
+
out = []
|
|
14
|
+
for ext in normalize_listlike(exts, list):
|
|
15
|
+
if not ext.startswith('.'):
|
|
16
|
+
ext = f".{ext}"
|
|
17
|
+
out.append(ext)
|
|
18
|
+
return set(out)
|
|
19
|
+
|
|
20
|
+
def ensure_patterns(patterns):
|
|
21
|
+
"""Normalize pattern list and ensure they are valid globs."""
|
|
22
|
+
if patterns in [True, None, False]:
|
|
23
|
+
return patterns
|
|
24
|
+
patterns = normalize_listlike(patterns, list)
|
|
25
|
+
out = []
|
|
26
|
+
for pattern in patterns:
|
|
27
|
+
if not pattern:
|
|
28
|
+
continue
|
|
29
|
+
if '*' not in pattern and '?' not in pattern:
|
|
30
|
+
# Implicitly make it a prefix match
|
|
31
|
+
if pattern.startswith('.') or pattern.startswith('~'):
|
|
32
|
+
pattern = f"*{pattern}"
|
|
33
|
+
else:
|
|
34
|
+
pattern = f"{pattern}*"
|
|
35
|
+
out.append(pattern)
|
|
36
|
+
return out
|
|
37
|
+
def ensure_directories(*args,**kwargs):
|
|
38
|
+
directories = []
|
|
39
|
+
for arg in args:
|
|
40
|
+
arg_str = str(arg)
|
|
41
|
+
|
|
42
|
+
if run_pruned_func(is_dir,arg_str,**kwargs):
|
|
43
|
+
directories.append(arg_str)
|
|
44
|
+
elif run_pruned_func(is_file,arg_str,**kwargs):
|
|
45
|
+
dirname = os.path.dirname(arg_str)
|
|
46
|
+
directories.append(dirname)
|
|
47
|
+
safe_directories = get_dir_filter_kwargs(**kwargs)
|
|
48
|
+
safe_dirs = safe_directories.get('directories')
|
|
49
|
+
safe_dirs = if_none_change(safe_dirs or None,get_initial_caller_dir())
|
|
50
|
+
directories+= make_list(safe_dirs)
|
|
51
|
+
return list(set([r for r in directories if r]))
|
|
52
|
+
def get_proper_type_str(string):
|
|
53
|
+
if not string:
|
|
54
|
+
return None
|
|
55
|
+
string_lower = string.lower()
|
|
56
|
+
items = {
|
|
57
|
+
"d":["dir","dirs","directory","directories","d","dirname"],
|
|
58
|
+
"f":["file","filepath","file_path","files","filepaths","file_paths","f"]
|
|
59
|
+
}
|
|
60
|
+
for key,values in items.items():
|
|
61
|
+
if string_lower in values:
|
|
62
|
+
return key
|
|
63
|
+
init = string_lower[0] if len(string_lower)>0 else None
|
|
64
|
+
if init in items:
|
|
65
|
+
return init
|
|
66
|
+
def check_path_type(
|
|
67
|
+
path: str,
|
|
68
|
+
user: Optional[str] = None,
|
|
69
|
+
host: Optional[str] = None,
|
|
70
|
+
user_as_host: Optional[str] = None,
|
|
71
|
+
use_shell: bool = False
|
|
72
|
+
) -> Literal["file", "directory", "missing", "unknown"]:
|
|
73
|
+
"""
|
|
74
|
+
Determine whether a given path is a file, directory, or missing.
|
|
75
|
+
Works locally or remotely (via SSH).
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
path: The path to check.
|
|
79
|
+
user, host, user_as_host: SSH parameters if remote.
|
|
80
|
+
use_shell: Force shell test instead of Python os.path.
|
|
81
|
+
Returns:
|
|
82
|
+
One of: 'file', 'directory', 'missing', or 'unknown'
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
# --- remote check if user/host is given ---
|
|
86
|
+
if user_as_host or (user and host):
|
|
87
|
+
remote_target = user_as_host or f"{user}@{host}"
|
|
88
|
+
cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
|
|
89
|
+
try:
|
|
90
|
+
result = subprocess.check_output(
|
|
91
|
+
["ssh", remote_target, cmd],
|
|
92
|
+
stderr=subprocess.DEVNULL,
|
|
93
|
+
text=True,
|
|
94
|
+
timeout=5
|
|
95
|
+
).strip()
|
|
96
|
+
return result if result in ("file", "directory", "missing") else "unknown"
|
|
97
|
+
except Exception:
|
|
98
|
+
return "unknown"
|
|
99
|
+
|
|
100
|
+
# --- local check ---
|
|
101
|
+
if not use_shell:
|
|
102
|
+
if os.path.isfile(path):
|
|
103
|
+
return "file"
|
|
104
|
+
elif os.path.isdir(path):
|
|
105
|
+
return "directory"
|
|
106
|
+
elif not os.path.exists(path):
|
|
107
|
+
return "missing"
|
|
108
|
+
return "unknown"
|
|
109
|
+
else:
|
|
110
|
+
# fallback using shell tests (useful for sandboxed contexts)
|
|
111
|
+
cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
|
|
112
|
+
try:
|
|
113
|
+
output = subprocess.check_output(
|
|
114
|
+
cmd, shell=True, stderr=subprocess.DEVNULL, text=True
|
|
115
|
+
).strip()
|
|
116
|
+
return output if output in ("file", "directory", "missing") else "unknown"
|
|
117
|
+
except Exception:
|
|
118
|
+
return "unknown"
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from .predicate_utils import *
|
|
2
|
+
def _get_default_modular(value, default, add=False, typ=set):
|
|
3
|
+
"""Merge user and default values intelligently."""
|
|
4
|
+
if value == None:
|
|
5
|
+
value = add
|
|
6
|
+
if value in [True]:
|
|
7
|
+
return default
|
|
8
|
+
if value is False:
|
|
9
|
+
return value
|
|
10
|
+
if add:
|
|
11
|
+
return combine_params(value,default,typ=None)
|
|
12
|
+
|
|
13
|
+
return typ(value)
|
|
14
|
+
|
|
15
|
+
# -------------------------
|
|
16
|
+
# Default derivation logic
|
|
17
|
+
# -------------------------
|
|
18
|
+
def _get_default_modular(value, default, add=None, typ=set):
|
|
19
|
+
"""Merge user and default values intelligently."""
|
|
20
|
+
add = add or False
|
|
21
|
+
if value == None:
|
|
22
|
+
value = add
|
|
23
|
+
if value in [True]:
|
|
24
|
+
return default
|
|
25
|
+
if value is False:
|
|
26
|
+
return value
|
|
27
|
+
if add:
|
|
28
|
+
return combine_params(value,default,typ=None)
|
|
29
|
+
return typ(value)
|
|
30
|
+
def derive_all_defaults(**kwargs):
|
|
31
|
+
kwargs = get_safe_canonical_kwargs(**kwargs)
|
|
32
|
+
add = kwargs.get("add",False)
|
|
33
|
+
nu_defaults = {}
|
|
34
|
+
for key,values in DEFAULT_CANONICAL_MAP.items():
|
|
35
|
+
default = values.get("default")
|
|
36
|
+
typ = values.get("type")
|
|
37
|
+
key_value = kwargs.get(key)
|
|
38
|
+
if key in DEFAULT_ALLOWED_EXCLUDE_MAP:
|
|
39
|
+
|
|
40
|
+
if key.endswith('exts'):
|
|
41
|
+
input_value = ensure_exts(key_value)
|
|
42
|
+
if key.endswith('patterns'):
|
|
43
|
+
input_value = ensure_patterns(key_value)
|
|
44
|
+
else:
|
|
45
|
+
input_value = normalize_listlike(key_value, typ)
|
|
46
|
+
nu_defaults[key] = _get_default_modular(input_value, default, add, typ)
|
|
47
|
+
else:
|
|
48
|
+
value = default if key_value is None else key_value
|
|
49
|
+
if typ == list:
|
|
50
|
+
value = make_list(value)
|
|
51
|
+
elif typ == bool:
|
|
52
|
+
value = bool(value)
|
|
53
|
+
nu_defaults[key] = value
|
|
54
|
+
|
|
55
|
+
return nu_defaults
|
|
56
|
+
# -------------------------
|
|
57
|
+
# Default derivation logic
|
|
58
|
+
# -------------------------
|
|
59
|
+
def derive_file_defaults(**kwargs):
|
|
60
|
+
kwargs = derive_all_defaults(**kwargs)
|
|
61
|
+
add = kwargs.get("add",True)
|
|
62
|
+
nu_defaults = {}
|
|
63
|
+
for key,values in DEFAULT_ALLOWED_EXCLUDE_MAP.items():
|
|
64
|
+
default = values.get("default")
|
|
65
|
+
typ = values.get("type")
|
|
66
|
+
key_value = kwargs.get(key)
|
|
67
|
+
if key.endswith('exts'):
|
|
68
|
+
input_value = ensure_exts(key_value)
|
|
69
|
+
if key.endswith('patterns'):
|
|
70
|
+
input_value = ensure_patterns(key_value)
|
|
71
|
+
else:
|
|
72
|
+
input_value = normalize_listlike(key_value, typ)
|
|
73
|
+
nu_defaults[key] = _get_default_modular(input_value, default, add, typ)
|
|
74
|
+
return nu_defaults
|
|
75
|
+
|
|
76
|
+
def define_defaults(**kwargs):
|
|
77
|
+
defaults = derive_file_defaults(**kwargs)
|
|
78
|
+
return ScanConfig(**defaults)
|
|
79
|
+
|
|
80
|
+
def get_file_filters(*args,**kwargs):
|
|
81
|
+
directories = ensure_directories(*args,**kwargs)
|
|
82
|
+
recursive = kwargs.get('recursive',True)
|
|
83
|
+
include_files = kwargs.get('include_files',True)
|
|
84
|
+
cfg = define_defaults(**kwargs)
|
|
85
|
+
allowed = kwargs.get("allowed") or make_allowed_predicate(cfg)
|
|
86
|
+
return directories,cfg,allowed,include_files,recursive
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from ...imports import *
|
|
2
|
+
import re
|
|
3
|
+
def combine_params(*values,typ=None):
|
|
4
|
+
nu_values = None
|
|
5
|
+
for value in values:
|
|
6
|
+
if value is not None:
|
|
7
|
+
typ = typ or type(value)
|
|
8
|
+
if nu_values is None:
|
|
9
|
+
nu_values = typ()
|
|
10
|
+
|
|
11
|
+
if typ is set:
|
|
12
|
+
nu_values = nu_values | typ(value)
|
|
13
|
+
if typ is list:
|
|
14
|
+
nu_values += typ(value)
|
|
15
|
+
return nu_values
|
|
16
|
+
def get_safe_kwargs(canonical_map, **kwargs):
|
|
17
|
+
# Lowercase all keys for safety
|
|
18
|
+
canonical_map = canonical_map or CANONICAL_MAP
|
|
19
|
+
norm_kwargs = {k.lower(): v for k, v in kwargs.items() if v is not None}
|
|
20
|
+
|
|
21
|
+
# Inverse lookup: alias → canonical key
|
|
22
|
+
alias_lookup = {
|
|
23
|
+
alias: canon
|
|
24
|
+
for canon, aliases in canonical_map.items()
|
|
25
|
+
if aliases
|
|
26
|
+
for alias in aliases
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# Preserve correctly named keys
|
|
30
|
+
safe_kwargs = {k: v for k, v in norm_kwargs.items() if k in canonical_map}
|
|
31
|
+
|
|
32
|
+
for k, v in norm_kwargs.items():
|
|
33
|
+
if k in alias_lookup:
|
|
34
|
+
canonical_key = alias_lookup[k]
|
|
35
|
+
prev = safe_kwargs.get(canonical_key)
|
|
36
|
+
if prev is None:
|
|
37
|
+
safe_kwargs[canonical_key] = v
|
|
38
|
+
else:
|
|
39
|
+
# merge intelligently if both exist
|
|
40
|
+
if isinstance(prev, (set, list)) and isinstance(v, (set, list)):
|
|
41
|
+
safe_kwargs[canonical_key] = list(set(prev) | set(v))
|
|
42
|
+
else:
|
|
43
|
+
safe_kwargs[canonical_key] = v # overwrite for non-iterables
|
|
44
|
+
|
|
45
|
+
# fill defaults if missing
|
|
46
|
+
for canon in canonical_map:
|
|
47
|
+
safe_kwargs.setdefault(canon, None)
|
|
48
|
+
|
|
49
|
+
return safe_kwargs
|
|
50
|
+
|
|
51
|
+
def create_canonical_map(*args,canonical_map=None):
|
|
52
|
+
keys = [arg for arg in args if arg]
|
|
53
|
+
if not keys:
|
|
54
|
+
return CANONICAL_MAP
|
|
55
|
+
canonical_map = canonical_map or CANONICAL_MAP
|
|
56
|
+
|
|
57
|
+
return {key:canonical_map.get(key) for key in keys}
|
|
58
|
+
def get_safe_canonical_kwargs(*args,canonical_map=None,**kwargs):
|
|
59
|
+
canonical_map = canonical_map or create_canonical_map(*args)
|
|
60
|
+
|
|
61
|
+
return get_safe_kwargs(canonical_map=canonical_map,**kwargs)
|
|
62
|
+
def get_dir_filter_kwargs(**kwargs):
|
|
63
|
+
canonical_map = create_canonical_map("directories")
|
|
64
|
+
return get_safe_kwargs(canonical_map=canonical_map,**kwargs)
|
|
65
|
+
def get_file_filter_kwargs(**kwargs):
|
|
66
|
+
"""
|
|
67
|
+
Normalize arbitrary keyword arguments for file scanning configuration.
|
|
68
|
+
|
|
69
|
+
Examples:
|
|
70
|
+
- 'excluded_ext' or 'unallowed_exts' → 'exclude_exts'
|
|
71
|
+
- 'include_dirs' or 'allow_dir' → 'allowed_dirs'
|
|
72
|
+
- 'excludePattern' or 'excluded_patterns' → 'exclude_patterns'
|
|
73
|
+
- 'allowed_type' or 'include_types' → 'allowed_types'
|
|
74
|
+
"""
|
|
75
|
+
# Canonical keys and aliases
|
|
76
|
+
canonical_keys =["allowed_exts","exclude_exts","allowed_types","exclude_types","allowed_dirs","exclude_dirs","allowed_patterns","exclude_patterns"]
|
|
77
|
+
|
|
78
|
+
return get_safe_canonical_kwargs(*canonical_keys,**kwargs)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from .ensure_utils import *
|
|
2
|
+
def get_allowed_predicate(allowed=None):
|
|
3
|
+
if allowed != False:
|
|
4
|
+
if allowed == True:
|
|
5
|
+
allowed = None
|
|
6
|
+
allowed = allowed or make_allowed_predicate()
|
|
7
|
+
else:
|
|
8
|
+
def allowed(*args):
|
|
9
|
+
return True
|
|
10
|
+
allowed = allowed
|
|
11
|
+
return allowed
|
|
12
|
+
def get_globs(items,recursive: bool = True,allowed=None):
|
|
13
|
+
glob_paths = []
|
|
14
|
+
items = [item for item in make_list(items) if item]
|
|
15
|
+
for item in items:
|
|
16
|
+
pattern = os.path.join(item, "**/*") # include all files recursively\n
|
|
17
|
+
nuItems = glob.glob(pattern, recursive=recursive)
|
|
18
|
+
if allowed:
|
|
19
|
+
nuItems = [nuItem for nuItem in nuItems if nuItem and allowed(nuItem)]
|
|
20
|
+
glob_paths += nuItems
|
|
21
|
+
return glob_paths
|
|
22
|
+
def get_allowed_files(items,allowed=True):
|
|
23
|
+
allowed = get_allowed_predicate(allowed=allowed)
|
|
24
|
+
return [item for item in items if item and os.path.isfile(item) and allowed(item)]
|
|
25
|
+
def get_allowed_dirs(items,allowed=False):
|
|
26
|
+
allowed = get_allowed_predicate(allowed=allowed)
|
|
27
|
+
return [item for item in items if item and os.path.isdir(item) and allowed(item)]
|
|
28
|
+
|
|
29
|
+
def get_filtered_files(items,allowed=None,files = []):
|
|
30
|
+
allowed = get_allowed_predicate(allowed=allowed)
|
|
31
|
+
glob_paths = get_globs(items)
|
|
32
|
+
return [glob_path for glob_path in glob_paths if glob_path and os.path.isfile(glob_path) and glob_path not in files and allowed(glob_path)]
|
|
33
|
+
def get_filtered_dirs(items,allowed=None,dirs = []):
|
|
34
|
+
allowed = get_allowed_predicate(allowed=allowed)
|
|
35
|
+
glob_paths = get_globs(items)
|
|
36
|
+
return [glob_path for glob_path in glob_paths if glob_path and os.path.isdir(glob_path) and glob_path not in dirs and allowed(glob_path)]
|
|
37
|
+
|
|
38
|
+
def get_all_allowed_files(items,allowed=None):
|
|
39
|
+
dirs = get_all_allowed_dirs(items)
|
|
40
|
+
files = get_allowed_files(items)
|
|
41
|
+
nu_files = []
|
|
42
|
+
for directory in dirs:
|
|
43
|
+
files += get_filtered_files(directory,allowed=allowed,files=files)
|
|
44
|
+
return files
|
|
45
|
+
def get_all_allowed_dirs(items,allowed=None):
|
|
46
|
+
allowed = get_allowed_predicate(allowed=allowed)
|
|
47
|
+
dirs = get_allowed_dirs(items)
|
|
48
|
+
nu_dirs=[]
|
|
49
|
+
for directory in dirs:
|
|
50
|
+
nu_dirs += get_filtered_dirs(directory,allowed=allowed,dirs=nu_dirs)
|
|
51
|
+
return nu_dirs
|
|
52
|
+
|
|
53
|
+
def make_allowed_predicate(cfg: ScanConfig) -> Callable[[str], bool]:
|
|
54
|
+
"""
|
|
55
|
+
Build a predicate that returns True if a given path is considered allowed
|
|
56
|
+
under the given ScanConfig. Applies allowed_* and exclude_* logic symmetrically.
|
|
57
|
+
"""
|
|
58
|
+
def allowed(path: str=None,p=None) -> bool:
|
|
59
|
+
p = p or Path(path)
|
|
60
|
+
name = p.name.lower()
|
|
61
|
+
path_str = str(p).lower()
|
|
62
|
+
|
|
63
|
+
# --------------------
|
|
64
|
+
# A) directory filters
|
|
65
|
+
# --------------------
|
|
66
|
+
if cfg.exclude_dirs:
|
|
67
|
+
for dpat in cfg.exclude_dirs:
|
|
68
|
+
dpat_l = dpat.lower()
|
|
69
|
+
if dpat_l in path_str or fnmatch.fnmatch(name, dpat_l):
|
|
70
|
+
if p.is_dir() or dpat_l in path_str:
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
if cfg.allowed_dirs and cfg.allowed_dirs != ["*"]:
|
|
74
|
+
# must be in at least one allowed dir
|
|
75
|
+
if not any(
|
|
76
|
+
fnmatch.fnmatch(path_str, f"*{dpat.lower()}*") for dpat in cfg.allowed_dirs
|
|
77
|
+
):
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
# --------------------
|
|
81
|
+
# B) pattern filters
|
|
82
|
+
# --------------------
|
|
83
|
+
if cfg.allowed_patterns and cfg.allowed_patterns != ["*"]:
|
|
84
|
+
if not any(fnmatch.fnmatch(name, pat.lower()) for pat in cfg.allowed_patterns):
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
if cfg.exclude_patterns:
|
|
88
|
+
for pat in cfg.exclude_patterns:
|
|
89
|
+
if fnmatch.fnmatch(name, pat.lower()):
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
# --------------------
|
|
93
|
+
# C) extension filters
|
|
94
|
+
# --------------------
|
|
95
|
+
if p.is_file():
|
|
96
|
+
ext = p.suffix.lower()
|
|
97
|
+
if cfg.allowed_exts and ext not in cfg.allowed_exts:
|
|
98
|
+
return False
|
|
99
|
+
if cfg.exclude_exts and ext in cfg.exclude_exts:
|
|
100
|
+
return False
|
|
101
|
+
|
|
102
|
+
# --------------------
|
|
103
|
+
# D) type filters (optional)
|
|
104
|
+
# --------------------
|
|
105
|
+
if cfg.allowed_types and cfg.allowed_types != {"*"}:
|
|
106
|
+
if not any(t in path_str for t in cfg.allowed_types):
|
|
107
|
+
return False
|
|
108
|
+
if cfg.exclude_types and cfg.exclude_types != {"*"}:
|
|
109
|
+
if any(t in path_str for t in cfg.exclude_types):
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
return allowed
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# file_reader.py
|
|
2
2
|
from ..imports import *
|
|
3
3
|
# -------- Public API drop-ins that mirror your originals --------
|
|
4
|
-
from .filter_params import *
|
|
5
4
|
from .file_filters import *
|
|
6
5
|
from .pdf_utils import *
|
|
7
6
|
# ---------------------------------------------------------------------------
|
|
@@ -1,76 +1,8 @@
|
|
|
1
1
|
from ..imports import *
|
|
2
2
|
from .file_filters import *
|
|
3
|
-
from .filter_params import *
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
from typing import Optional, List, Set
|
|
6
5
|
|
|
7
|
-
def get_proper_type_str(string):
|
|
8
|
-
if not string:
|
|
9
|
-
return None
|
|
10
|
-
string_lower = string.lower()
|
|
11
|
-
items = {
|
|
12
|
-
"d":["dir","dirs","directory","directories","d","dirname"],
|
|
13
|
-
"f":["file","filepath","file_path","files","filepaths","file_paths","f"]
|
|
14
|
-
}
|
|
15
|
-
for key,values in items.items():
|
|
16
|
-
if string_lower in values:
|
|
17
|
-
return key
|
|
18
|
-
init = string_lower[0] if len(string_lower)>0 else None
|
|
19
|
-
if init in items:
|
|
20
|
-
return init
|
|
21
|
-
def check_path_type(
|
|
22
|
-
path: str,
|
|
23
|
-
user: Optional[str] = None,
|
|
24
|
-
host: Optional[str] = None,
|
|
25
|
-
user_as_host: Optional[str] = None,
|
|
26
|
-
use_shell: bool = False
|
|
27
|
-
) -> Literal["file", "directory", "missing", "unknown"]:
|
|
28
|
-
"""
|
|
29
|
-
Determine whether a given path is a file, directory, or missing.
|
|
30
|
-
Works locally or remotely (via SSH).
|
|
31
|
-
|
|
32
|
-
Args:
|
|
33
|
-
path: The path to check.
|
|
34
|
-
user, host, user_as_host: SSH parameters if remote.
|
|
35
|
-
use_shell: Force shell test instead of Python os.path.
|
|
36
|
-
Returns:
|
|
37
|
-
One of: 'file', 'directory', 'missing', or 'unknown'
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
# --- remote check if user/host is given ---
|
|
41
|
-
if user_as_host or (user and host):
|
|
42
|
-
remote_target = user_as_host or f"{user}@{host}"
|
|
43
|
-
cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
|
|
44
|
-
try:
|
|
45
|
-
result = subprocess.check_output(
|
|
46
|
-
["ssh", remote_target, cmd],
|
|
47
|
-
stderr=subprocess.DEVNULL,
|
|
48
|
-
text=True,
|
|
49
|
-
timeout=5
|
|
50
|
-
).strip()
|
|
51
|
-
return result if result in ("file", "directory", "missing") else "unknown"
|
|
52
|
-
except Exception:
|
|
53
|
-
return "unknown"
|
|
54
|
-
|
|
55
|
-
# --- local check ---
|
|
56
|
-
if not use_shell:
|
|
57
|
-
if os.path.isfile(path):
|
|
58
|
-
return "file"
|
|
59
|
-
elif os.path.isdir(path):
|
|
60
|
-
return "directory"
|
|
61
|
-
elif not os.path.exists(path):
|
|
62
|
-
return "missing"
|
|
63
|
-
return "unknown"
|
|
64
|
-
else:
|
|
65
|
-
# fallback using shell tests (useful for sandboxed contexts)
|
|
66
|
-
cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
|
|
67
|
-
try:
|
|
68
|
-
output = subprocess.check_output(
|
|
69
|
-
cmd, shell=True, stderr=subprocess.DEVNULL, text=True
|
|
70
|
-
).strip()
|
|
71
|
-
return output if output in ("file", "directory", "missing") else "unknown"
|
|
72
|
-
except Exception:
|
|
73
|
-
return "unknown"
|
|
74
6
|
|
|
75
7
|
|
|
76
8
|
|
|
@@ -95,15 +27,15 @@ def get_find_cmd(
|
|
|
95
27
|
"""
|
|
96
28
|
# Normalize inputs into canonical form
|
|
97
29
|
kwargs = get_safe_canonical_kwargs(*args, **kwargs)
|
|
98
|
-
cfg = define_defaults(**kwargs)
|
|
30
|
+
cfg = kwargs.get('cfg') or define_defaults(**kwargs)
|
|
99
31
|
|
|
100
32
|
# Get directory list (may come from args or kwargs)
|
|
101
|
-
directories =
|
|
102
|
-
if not directories:
|
|
33
|
+
kwargs["directories"] = ensure_directories(*args, **kwargs)
|
|
34
|
+
if not kwargs["directories"]:
|
|
103
35
|
return []
|
|
104
36
|
|
|
105
37
|
# Build base command for all directories
|
|
106
|
-
dir_expr = " ".join(shlex.quote(d) for d in directories)
|
|
38
|
+
dir_expr = " ".join(shlex.quote(d) for d in kwargs["directories"])
|
|
107
39
|
cmd = [f"find {dir_expr}"]
|
|
108
40
|
|
|
109
41
|
# --- depth filters ---
|
|
@@ -183,18 +115,7 @@ def get_find_cmd(
|
|
|
183
115
|
|
|
184
116
|
return " ".join(cmd)
|
|
185
117
|
|
|
186
|
-
|
|
187
|
-
directories = []
|
|
188
|
-
for arg in args:
|
|
189
|
-
arg_str = str(arg)
|
|
190
|
-
if is_dir(arg_str,**kwargs):
|
|
191
|
-
directories.append(arg_str)
|
|
192
|
-
elif is_file(arg_str,**kwargs):
|
|
193
|
-
dirname = os.path.dirname(arg_str)
|
|
194
|
-
directories.append(dirname)
|
|
195
|
-
safe_directories = get_dir_filter_kwargs(**kwargs)
|
|
196
|
-
directories+= make_list(safe_directories.get('directories',[]))
|
|
197
|
-
return list(set([r for r in directories if r]))
|
|
118
|
+
|
|
198
119
|
|
|
199
120
|
def collect_globs(
|
|
200
121
|
*args,
|
|
@@ -212,9 +133,10 @@ def collect_globs(
|
|
|
212
133
|
- If file_type is "f" or "d" → returns a list of that type
|
|
213
134
|
- Supports SSH mode via `user_at_host`
|
|
214
135
|
"""
|
|
136
|
+
user_pass_host_key = get_user_pass_host_key(**kwargs)
|
|
215
137
|
kwargs["directories"] = ensure_directories(*args, **kwargs)
|
|
216
138
|
kwargs= get_safe_canonical_kwargs(**kwargs)
|
|
217
|
-
kwargs["cfg"] = define_defaults(**kwargs)
|
|
139
|
+
kwargs["cfg"] = kwargs.get('cfg') or define_defaults(**kwargs)
|
|
218
140
|
|
|
219
141
|
type_strs = {"f":"files","d":"dirs"}
|
|
220
142
|
file_type = get_proper_type_str(file_type)
|
|
@@ -227,11 +149,13 @@ def collect_globs(
|
|
|
227
149
|
type_str = type_strs.get(file_type)
|
|
228
150
|
# Remote path (SSH)
|
|
229
151
|
find_cmd = get_find_cmd(
|
|
152
|
+
directories=kwargs.get("directories"),
|
|
153
|
+
cfg=kwargs.get('cfg'),
|
|
230
154
|
mindepth=mindepth,
|
|
231
155
|
maxdepth=maxdepth,
|
|
232
156
|
depth=depth,
|
|
233
157
|
file_type=file_type,
|
|
234
|
-
**
|
|
158
|
+
**user_pass_host_key,
|
|
235
159
|
)
|
|
236
160
|
result = run_pruned_func(run_cmd,find_cmd,
|
|
237
161
|
**kwargs
|