abstract-utilities 0.2.2.387__py3-none-any.whl → 0.2.2.480__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of abstract-utilities might be problematic. Click here for more details.
- abstract_utilities/__init__.py +14 -43
- abstract_utilities/abstract_classes.py +49 -0
- abstract_utilities/class_utils.py +38 -3
- abstract_utilities/cmd_utils/imports/__init__.py +1 -0
- abstract_utilities/cmd_utils/imports/imports.py +10 -0
- abstract_utilities/cmd_utils/pexpect_utils.py +310 -0
- abstract_utilities/cmd_utils/user_utils.py +1 -1
- abstract_utilities/dynimport.py +7 -15
- abstract_utilities/env_utils/__init__.py +3 -0
- abstract_utilities/env_utils/abstractEnv.py +129 -0
- abstract_utilities/env_utils/envy_it.py +33 -0
- abstract_utilities/env_utils/imports/__init__.py +2 -0
- abstract_utilities/env_utils/imports/imports.py +8 -0
- abstract_utilities/env_utils/imports/utils.py +122 -0
- abstract_utilities/file_utils/__init__.py +3 -0
- abstract_utilities/file_utils/file_utils/__init__.py +8 -0
- abstract_utilities/file_utils/file_utils/file_filters.py +104 -0
- abstract_utilities/{robust_reader → file_utils/file_utils}/file_reader.py +5 -19
- abstract_utilities/{robust_readers/file_filters.py → file_utils/file_utils/file_utils.py} +5 -4
- abstract_utilities/{robust_readers → file_utils/file_utils}/filter_params.py +1 -38
- abstract_utilities/file_utils/file_utils/find_collect.py +154 -0
- abstract_utilities/file_utils/file_utils/imports/__init__.py +3 -0
- abstract_utilities/file_utils/file_utils/imports/constants.py +39 -0
- abstract_utilities/file_utils/file_utils/imports/file_functions.py +10 -0
- abstract_utilities/file_utils/file_utils/imports/imports.py +39 -0
- abstract_utilities/file_utils/file_utils/imports/module_imports.py +14 -0
- abstract_utilities/file_utils/file_utils/imports.py +10 -0
- abstract_utilities/file_utils/file_utils/map_utils.py +29 -0
- abstract_utilities/{robust_reader → file_utils/file_utils}/pdf_utils.py +1 -9
- abstract_utilities/file_utils/file_utils/type_checks.py +91 -0
- abstract_utilities/file_utils/imports/__init__.py +4 -0
- abstract_utilities/file_utils/imports/classes.py +381 -0
- abstract_utilities/file_utils/imports/clean_imps.py +158 -0
- abstract_utilities/file_utils/imports/constants.py +39 -0
- abstract_utilities/file_utils/imports/file_functions.py +10 -0
- abstract_utilities/file_utils/imports/imports.py +65 -0
- abstract_utilities/file_utils/imports/module_imports.py +13 -0
- abstract_utilities/file_utils/req.py +329 -0
- abstract_utilities/log_utils.py +1 -1
- abstract_utilities/path_utils.py +90 -6
- abstract_utilities/read_write_utils.py +250 -157
- abstract_utilities/robust_reader/__init__.py +1 -1
- abstract_utilities/robust_reader/imports/__init__.py +1 -0
- abstract_utilities/robust_reader/imports/imports.py +3 -0
- abstract_utilities/robust_readers/__init__.py +0 -1
- abstract_utilities/robust_readers/import_utils/__init__.py +1 -0
- abstract_utilities/robust_readers/import_utils/clean_imports.py +175 -0
- abstract_utilities/robust_readers/imports.py +8 -0
- abstract_utilities/robust_readers/initFuncGen.py +92 -76
- abstract_utilities/safe_utils.py +133 -0
- abstract_utilities/ssh_utils/__init__.py +3 -0
- abstract_utilities/ssh_utils/classes.py +127 -0
- abstract_utilities/ssh_utils/imports.py +10 -0
- abstract_utilities/ssh_utils/pexpect_utils.py +315 -0
- abstract_utilities/ssh_utils/utils.py +188 -0
- abstract_utilities/string_clean.py +40 -1
- abstract_utilities/string_utils.py +51 -0
- abstract_utilities/type_utils.py +25 -2
- {abstract_utilities-0.2.2.387.dist-info → abstract_utilities-0.2.2.480.dist-info}/METADATA +1 -1
- abstract_utilities-0.2.2.480.dist-info/RECORD +92 -0
- abstract_utilities-0.2.2.387.dist-info/RECORD +0 -52
- {abstract_utilities-0.2.2.387.dist-info → abstract_utilities-0.2.2.480.dist-info}/WHEEL +0 -0
- {abstract_utilities-0.2.2.387.dist-info → abstract_utilities-0.2.2.480.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
from .abstractEnv import abstractEnv
|
|
3
|
+
def get_env_value(key:str=None,path:str=None,file_name:str=None,deep_scan=False):
|
|
4
|
+
abstract_env = abstractEnv(key=key, file_name=file_name, path=path,deep_scan=deep_scan)
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
Retrieves the value of a specified environment variable from a .env file.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
key (str, optional): The key to search for in the .env file. Defaults to None.
|
|
11
|
+
path (str, optional): The path to the .env file. Defaults to None.
|
|
12
|
+
file_name (str, optional): The name of the .env file. Defaults to None.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
str: The value of the environment variable if found, otherwise None.
|
|
16
|
+
"""
|
|
17
|
+
return abstract_env.env_value
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_env_path(key:str=None,path:str=None,file_name:str=None,deep_scan=False):
|
|
21
|
+
abstract_env = abstractEnv(key=key, file_name=file_name, path=path,deep_scan=deep_scan)
|
|
22
|
+
"""
|
|
23
|
+
Retrieves the value of a specified environment variable from a .env file.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
key (str, optional): The key to search for in the .env file. Defaults to None.
|
|
27
|
+
path (str, optional): The path to the .env file. Defaults to None.
|
|
28
|
+
file_name (str, optional): The name of the .env file. Defaults to None.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
str: The value of the environment variable if found, otherwise None.
|
|
32
|
+
"""
|
|
33
|
+
return abstract_env.env_path
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dotenv import load_dotenv
|
|
3
|
+
from ...string_clean import eatAll,eatInner,eatOuter,safe_split
|
|
4
|
+
from ...compare_utils import line_contains
|
|
5
|
+
from ...type_utils import is_list,is_bool
|
|
6
|
+
from ...path_utils import get_slash,path_join,if_not_last_child_join,get_home_folder,simple_path_join,is_file
|
|
7
|
+
DEFAULT_FILE_NAME = '.env'
|
|
8
|
+
DEFAULT_KEY = 'MY_PASSWORD'
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
def find_and_read_env_file(key:str=DEFAULT_KEY,file_name:str=DEFAULT_FILE_NAME, start_path:str=None):
|
|
3
|
+
"""
|
|
4
|
+
Search for an environment file and read a specific key from it.
|
|
5
|
+
|
|
6
|
+
Args:
|
|
7
|
+
file_name (str): Name of the .env file to be searched. Defaults to '.env'.
|
|
8
|
+
key (str): Key to be retrieved from the .env file. Defaults to 'MY_PASSWORD'.
|
|
9
|
+
start_path (str): Directory path to start the search from. If None, search starts from current directory.
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
str: The value corresponding to the key if found, otherwise None.
|
|
13
|
+
"""
|
|
14
|
+
# Set the default start_path to the current directory if it's None
|
|
15
|
+
directories = [start_path, os.getcwd(), get_home_folder(), simple_path_join(get_home_folder(),'.envy_all'),simple_path_join(get_home_folder(),'envy_all')]
|
|
16
|
+
if start_path in [None, os.getcwd()]:
|
|
17
|
+
directories = directories[1:]
|
|
18
|
+
|
|
19
|
+
# Try to find the file in the start_path
|
|
20
|
+
for k in range(0,len(directories)):
|
|
21
|
+
env_path = check_env_file(path=directories[k],file_name=file_name)
|
|
22
|
+
if not is_bool(env_path):
|
|
23
|
+
value = search_for_env_key(path=env_path,key=key)
|
|
24
|
+
if value != None:
|
|
25
|
+
return value
|
|
26
|
+
|
|
27
|
+
def search_for_env_key(key:str,path:str):
|
|
28
|
+
"""
|
|
29
|
+
Search for a specific key in a .env file.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
path (str): The path to the .env file.
|
|
33
|
+
key (str): The key to search for in the .env file.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
str: The value of the key if found, otherwise None.
|
|
37
|
+
"""
|
|
38
|
+
with open(path, "r") as f:
|
|
39
|
+
for line in f:
|
|
40
|
+
eq_split = safe_split(line,['=',0])
|
|
41
|
+
# If the line contains the key, return the value after stripping extra characters
|
|
42
|
+
if line_contains(string=eq_split, compare=key):
|
|
43
|
+
return eatAll(line[len(eq_split):],[' ','','=']).strip()
|
|
44
|
+
|
|
45
|
+
def check_env_file(path:str,file_name:str=DEFAULT_FILE_NAME):
|
|
46
|
+
"""
|
|
47
|
+
Check if the environment file exists in a specified path.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
path (str): The path to check for the .env file.
|
|
51
|
+
file_name (str): The name of the .env file. Defaults to '.env'.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
str: The path of the .env file if it exists, otherwise False.
|
|
55
|
+
"""
|
|
56
|
+
path = if_not_last_child_join(path=path, child=DEFAULT_FILE_NAME)
|
|
57
|
+
# Return the path if file exists, otherwise return False
|
|
58
|
+
if is_file(path):
|
|
59
|
+
return path
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
def safe_env_load(path:str=None):
|
|
63
|
+
"""
|
|
64
|
+
Safely load the .env file if it exists at a specified path.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
path (str): The path to load the .env file from. If None, no operation is performed.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
bool: True if the .env file is successfully loaded, otherwise False.
|
|
71
|
+
"""
|
|
72
|
+
if path == None:
|
|
73
|
+
return False
|
|
74
|
+
if is_file(path):
|
|
75
|
+
if str(safe_split(path,[get_slash(),-1]))[0] == '.':
|
|
76
|
+
load_dotenv(path)
|
|
77
|
+
return True
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
def get_env_value(key:str=DEFAULT_KEY,path:str=None,file_name:str=DEFAULT_FILE_NAME):
|
|
81
|
+
"""
|
|
82
|
+
Retrieves the value of the specified environment variable.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
path (str): The path to the environment file. Defaults to None.
|
|
86
|
+
file_name (str): The name of the environment file. Defaults to '.env'.
|
|
87
|
+
key (str): The key to search for in the .env file. Defaults to 'MY_PASSWORD'.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
str: The value of the environment variable if found, otherwise None.
|
|
91
|
+
"""
|
|
92
|
+
if safe_env_load(path):
|
|
93
|
+
return os.getenv(key)
|
|
94
|
+
return find_and_read_env_file(file_name=file_name, key=key, start_path=os.getcwd())
|
|
95
|
+
def split_eq(line):
|
|
96
|
+
"""
|
|
97
|
+
Splits a string at the first equals sign '=' and cleans up the key and value.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
line (str): The string to be split.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
list: A list containing the cleaned key and value. If '=' is not found, returns [line, None].
|
|
104
|
+
"""
|
|
105
|
+
if '=' in line:
|
|
106
|
+
key_side = line.split('=')[0]
|
|
107
|
+
value_side = line[len(key_side+'='):]
|
|
108
|
+
return [eatOuter(key_side,[' ','','\t']),eatAll(value_side,[' ','','\t','\n'])]
|
|
109
|
+
return [line,None]
|
|
110
|
+
def dotenv_load(path:str=None):
|
|
111
|
+
"""
|
|
112
|
+
Safely load the .env file if it exists at a specified path.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
path (str): The path to load the .env file from. If None, no operation is performed.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
bool: True if the .env file is successfully loaded, otherwise False.
|
|
119
|
+
"""
|
|
120
|
+
if path and os.path.isfile(path) and os.path.basename(path)[0] == '.':
|
|
121
|
+
load_dotenv(path)
|
|
122
|
+
return True
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
from .filter_params import *
|
|
3
|
+
from .file_utils import *
|
|
4
|
+
##from abstract_utilities import make_list,get_media_exts, is_media_type
|
|
5
|
+
|
|
6
|
+
def collect_filepaths(
|
|
7
|
+
directory: List[str],
|
|
8
|
+
cfg: ScanConfig=None,
|
|
9
|
+
allowed_exts: Optional[Set[str]] = False,
|
|
10
|
+
unallowed_exts: Optional[Set[str]] = False,
|
|
11
|
+
exclude_types: Optional[Set[str]] = False,
|
|
12
|
+
exclude_dirs: Optional[List[str]] = False,
|
|
13
|
+
exclude_patterns: Optional[List[str]] = False,
|
|
14
|
+
add=False,
|
|
15
|
+
allowed: Optional[Callable[[str], bool]] = None,
|
|
16
|
+
**kwargs
|
|
17
|
+
) -> List[str]:
|
|
18
|
+
cfg = cfg or define_defaults(
|
|
19
|
+
allowed_exts=allowed_exts,
|
|
20
|
+
unallowed_exts=unallowed_exts,
|
|
21
|
+
exclude_types=exclude_types,
|
|
22
|
+
exclude_dirs=exclude_dirs,
|
|
23
|
+
exclude_patterns=exclude_patterns,
|
|
24
|
+
add = add
|
|
25
|
+
)
|
|
26
|
+
allowed = allowed or make_allowed_predicate(cfg)
|
|
27
|
+
directories = make_list(directory)
|
|
28
|
+
roots = [r for r in directories if r]
|
|
29
|
+
|
|
30
|
+
# your existing helpers (get_dirs, get_globs, etc.) stay the same
|
|
31
|
+
original_dirs = get_allowed_dirs(roots, allowed=allowed)
|
|
32
|
+
original_globs = get_globs(original_dirs)
|
|
33
|
+
files = get_allowed_files(original_globs, allowed=allowed)
|
|
34
|
+
|
|
35
|
+
for d in get_filtered_dirs(original_dirs, allowed=allowed):
|
|
36
|
+
files += get_filtered_files(d, allowed=allowed, files=files)
|
|
37
|
+
|
|
38
|
+
# de-dupe while preserving order
|
|
39
|
+
seen, out = set(), []
|
|
40
|
+
for f in files:
|
|
41
|
+
if f not in seen:
|
|
42
|
+
seen.add(f)
|
|
43
|
+
out.append(f)
|
|
44
|
+
return out
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _fast_walk(
|
|
48
|
+
root: Path,
|
|
49
|
+
exts: Iterable[str],
|
|
50
|
+
skip_dirs: Iterable[str] = (),
|
|
51
|
+
skip_patterns: Iterable[str] = (),
|
|
52
|
+
) -> List[Path]:
|
|
53
|
+
exts = tuple(exts)
|
|
54
|
+
skip_dirs = set(sd.lower() for sd in skip_dirs or ())
|
|
55
|
+
skip_patterns = tuple(sp.lower() for sp in (skip_patterns or ()))
|
|
56
|
+
|
|
57
|
+
out = []
|
|
58
|
+
for p in root.rglob("*"):
|
|
59
|
+
# skip directories by name hit
|
|
60
|
+
if p.is_dir():
|
|
61
|
+
name = p.name.lower()
|
|
62
|
+
if name in skip_dirs:
|
|
63
|
+
# rglob doesn't let us prune mid-iteration cleanly; we just won't collect under it
|
|
64
|
+
continue
|
|
65
|
+
# nothing to collect for dirs
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
# file filters
|
|
69
|
+
name = p.name.lower()
|
|
70
|
+
if any(fnmatch.fnmatch(name, pat) for pat in skip_patterns):
|
|
71
|
+
continue
|
|
72
|
+
if p.suffix.lower() in exts:
|
|
73
|
+
out.append(p)
|
|
74
|
+
|
|
75
|
+
# de-dup and normalize
|
|
76
|
+
return sorted({pp.resolve() for pp in out})
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def enumerate_source_files(
|
|
80
|
+
src_root: Path,
|
|
81
|
+
cfg: Optional["ScanConfig"] = None,
|
|
82
|
+
*,
|
|
83
|
+
exts: Optional[Iterable[str]] = None,
|
|
84
|
+
fast_skip_dirs: Optional[Iterable[str]] = None,
|
|
85
|
+
fast_skip_patterns: Optional[Iterable[str]] = None,
|
|
86
|
+
) -> List[Path]:
|
|
87
|
+
"""
|
|
88
|
+
Unified enumerator:
|
|
89
|
+
- If `cfg` is provided: use collect_filepaths(...) with full rules.
|
|
90
|
+
- Else: fast walk using rglob over `exts` (defaults to EXTS) with optional light excludes.
|
|
91
|
+
"""
|
|
92
|
+
src_root = Path(src_root)
|
|
93
|
+
|
|
94
|
+
if cfg is not None:
|
|
95
|
+
files = collect_filepaths([str(src_root)], cfg=cfg)
|
|
96
|
+
return sorted({Path(f).resolve() for f in files})
|
|
97
|
+
|
|
98
|
+
# Fast mode
|
|
99
|
+
return _fast_walk(
|
|
100
|
+
src_root,
|
|
101
|
+
exts or EXTS,
|
|
102
|
+
skip_dirs=fast_skip_dirs or (),
|
|
103
|
+
skip_patterns=fast_skip_patterns or (),
|
|
104
|
+
)
|
|
@@ -1,29 +1,15 @@
|
|
|
1
1
|
# file_reader.py
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
from
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from werkzeug.utils import secure_filename
|
|
9
|
-
from werkzeug.datastructures import FileStorage
|
|
10
|
-
from datetime import datetime
|
|
11
|
-
from typing import Dict, Union, List
|
|
2
|
+
from .imports import *
|
|
3
|
+
# -------- Public API drop-ins that mirror your originals --------
|
|
4
|
+
from .filter_params import *
|
|
5
|
+
from .file_filters import *
|
|
6
|
+
from .file_utils import *
|
|
12
7
|
from .pdf_utils import *
|
|
13
|
-
import pdfplumber
|
|
14
|
-
from pdf2image import convert_from_path # only used for OCR fallback
|
|
15
|
-
import pytesseract
|
|
16
|
-
from pathlib import Path
|
|
17
8
|
# ---------------------------------------------------------------------------
|
|
18
9
|
# NOTE: The following helper functions must be provided elsewhere:
|
|
19
10
|
# - convert_date_string(s: str) -> datetime
|
|
20
11
|
# - read_from_file(path: str) -> pd.DataFrame
|
|
21
12
|
# ---------------------------------------------------------------------------
|
|
22
|
-
DEFAULT_EXCLUDE_DIRS = {"node_modules", "__pycache__","backups","backup"}
|
|
23
|
-
DEFAULT_EXCLUDE_FILE_PATTERNS = {"__init__*", "*.tmp", "*.log"}
|
|
24
|
-
DEFAULT_EXCLUDE_TYPES = {"image","video","audio","presentation"}
|
|
25
|
-
def get_ext(item):
|
|
26
|
-
return item.split('.')[-1]
|
|
27
13
|
def _should_skip_dir(dir_name: str, exclude_dirs: set[str]) -> bool:
|
|
28
14
|
"""
|
|
29
15
|
Return True if dir_name match=self.exclude_types)es one of the excluded directory names exactly.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
from typing import *
|
|
3
|
-
import fnmatch, os, glob
|
|
1
|
+
|
|
4
2
|
from .filter_params import *
|
|
3
|
+
from .imports import *
|
|
4
|
+
|
|
5
5
|
##from abstract_utilities import make_list,get_media_exts, is_media_type
|
|
6
6
|
def get_allowed_predicate(allowed=None):
|
|
7
7
|
if allowed != False:
|
|
@@ -15,7 +15,8 @@ def get_allowed_predicate(allowed=None):
|
|
|
15
15
|
return allowed
|
|
16
16
|
def get_globs(items,recursive: bool = True,allowed=None):
|
|
17
17
|
glob_paths = []
|
|
18
|
-
for item in make_list(items)
|
|
18
|
+
items = [item for item in make_list(items) if item]
|
|
19
|
+
for item in items:
|
|
19
20
|
pattern = os.path.join(item, "**/*") # include all files recursively\n
|
|
20
21
|
nuItems = glob.glob(pattern, recursive=recursive)
|
|
21
22
|
if allowed:
|
|
@@ -1,41 +1,4 @@
|
|
|
1
|
-
from
|
|
2
|
-
from ..type_utils import make_list,get_media_exts, is_media_type
|
|
3
|
-
from ..string_clean import eatAll
|
|
4
|
-
from dataclasses import dataclass, field
|
|
5
|
-
@dataclass
|
|
6
|
-
class ScanConfig:
|
|
7
|
-
allowed_exts: Set[str]
|
|
8
|
-
unallowed_exts: Set[str]
|
|
9
|
-
exclude_types: Set[str]
|
|
10
|
-
exclude_dirs: List[str] = field(default_factory=list)
|
|
11
|
-
exclude_patterns: List[str] = field(default_factory=list)
|
|
12
|
-
DEFAULT_ALLOWED_EXTS: Set[str] = {
|
|
13
|
-
".py", ".pyw", # python
|
|
14
|
-
".js", ".jsx", ".ts", ".tsx", ".mjs", # JS/TS
|
|
15
|
-
".html", ".htm", ".xml", # markup
|
|
16
|
-
".css", ".scss", ".sass", ".less", # styles
|
|
17
|
-
".json", ".yaml", ".yml", ".toml", ".ini", # configs
|
|
18
|
-
".cfg", ".md", ".markdown", ".rst", # docs
|
|
19
|
-
".sh", ".bash", ".env", # scripts/env
|
|
20
|
-
".txt" # plain text
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
DEFAULT_EXCLUDE_TYPES: Set[str] = {
|
|
24
|
-
"image", "video", "audio", "presentation",
|
|
25
|
-
"spreadsheet", "archive", "executable"
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
# never want these—even if they sneak into ALLOWED
|
|
29
|
-
_unallowed = set(get_media_exts(DEFAULT_EXCLUDE_TYPES)) | {'.bak', '.shp', '.cpg', '.dbf', '.shx','.geojson',".pyc",'.shx','.geojson','.prj','.sbn','.sbx'}
|
|
30
|
-
DEFAULT_UNALLOWED_EXTS = {e for e in _unallowed if e not in DEFAULT_ALLOWED_EXTS}
|
|
31
|
-
|
|
32
|
-
DEFAULT_EXCLUDE_DIRS: Set[str] = {
|
|
33
|
-
"node_modules", "__pycache__", "backups", "backup", "backs", "trash", "depriciated", "old", "__init__"
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
DEFAULT_EXCLUDE_PATTERNS: Set[str] = {
|
|
37
|
-
"__init__*", "*.tmp", "*.log", "*.lock", "*.zip","*~"
|
|
38
|
-
}
|
|
1
|
+
from .imports import *
|
|
39
2
|
def get_default_modular(obj,default=None,add=False,typ=set):
|
|
40
3
|
if obj in [False,True,None]:
|
|
41
4
|
if obj in [True,None]:
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
from .filter_params import *
|
|
3
|
+
from .file_filters import enumerate_source_files
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def check_path_type(
|
|
7
|
+
path: str,
|
|
8
|
+
user: Optional[str] = None,
|
|
9
|
+
host: Optional[str] = None,
|
|
10
|
+
user_as_host: Optional[str] = None,
|
|
11
|
+
use_shell: bool = False
|
|
12
|
+
) -> Literal["file", "directory", "missing", "unknown"]:
|
|
13
|
+
"""
|
|
14
|
+
Determine whether a given path is a file, directory, or missing.
|
|
15
|
+
Works locally or remotely (via SSH).
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
path: The path to check.
|
|
19
|
+
user, host, user_as_host: SSH parameters if remote.
|
|
20
|
+
use_shell: Force shell test instead of Python os.path.
|
|
21
|
+
Returns:
|
|
22
|
+
One of: 'file', 'directory', 'missing', or 'unknown'
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
# --- remote check if user/host is given ---
|
|
26
|
+
if user_as_host or (user and host):
|
|
27
|
+
remote_target = user_as_host or f"{user}@{host}"
|
|
28
|
+
cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
|
|
29
|
+
try:
|
|
30
|
+
result = subprocess.check_output(
|
|
31
|
+
["ssh", remote_target, cmd],
|
|
32
|
+
stderr=subprocess.DEVNULL,
|
|
33
|
+
text=True,
|
|
34
|
+
timeout=5
|
|
35
|
+
).strip()
|
|
36
|
+
return result if result in ("file", "directory", "missing") else "unknown"
|
|
37
|
+
except Exception:
|
|
38
|
+
return "unknown"
|
|
39
|
+
|
|
40
|
+
# --- local check ---
|
|
41
|
+
if not use_shell:
|
|
42
|
+
if os.path.isfile(path):
|
|
43
|
+
return "file"
|
|
44
|
+
elif os.path.isdir(path):
|
|
45
|
+
return "directory"
|
|
46
|
+
elif not os.path.exists(path):
|
|
47
|
+
return "missing"
|
|
48
|
+
return "unknown"
|
|
49
|
+
else:
|
|
50
|
+
# fallback using shell tests (useful for sandboxed contexts)
|
|
51
|
+
cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
|
|
52
|
+
try:
|
|
53
|
+
output = subprocess.check_output(
|
|
54
|
+
cmd, shell=True, stderr=subprocess.DEVNULL, text=True
|
|
55
|
+
).strip()
|
|
56
|
+
return output if output in ("file", "directory", "missing") else "unknown"
|
|
57
|
+
except Exception:
|
|
58
|
+
return "unknown"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_find_cmd(
|
|
64
|
+
directory: str,
|
|
65
|
+
*,
|
|
66
|
+
mindepth: Optional[int] = None,
|
|
67
|
+
maxdepth: Optional[int] = None,
|
|
68
|
+
depth: Optional[int] = None,
|
|
69
|
+
file_type: Optional[str] = None, # 'f' or 'd'
|
|
70
|
+
name: Optional[str] = None,
|
|
71
|
+
size: Optional[str] = None,
|
|
72
|
+
mtime: Optional[str] = None,
|
|
73
|
+
perm: Optional[str] = None,
|
|
74
|
+
user: Optional[str] = None,
|
|
75
|
+
**kwargs
|
|
76
|
+
) -> str:
|
|
77
|
+
"""Constructs a Unix `find` command string from keyword args."""
|
|
78
|
+
cmd = [f"find {directory}"]
|
|
79
|
+
|
|
80
|
+
if depth is not None:
|
|
81
|
+
cmd += [f"-mindepth {depth}", f"-maxdepth {depth}"]
|
|
82
|
+
else:
|
|
83
|
+
if mindepth is not None:
|
|
84
|
+
cmd.append(f"-mindepth {mindepth}")
|
|
85
|
+
if maxdepth is not None:
|
|
86
|
+
cmd.append(f"-maxdepth {maxdepth}")
|
|
87
|
+
|
|
88
|
+
if file_type in ("f", "d"):
|
|
89
|
+
cmd.append(f"-type {file_type}")
|
|
90
|
+
if name:
|
|
91
|
+
cmd.append(f"-name '{name}'")
|
|
92
|
+
if size:
|
|
93
|
+
cmd.append(f"-size {size}")
|
|
94
|
+
if mtime:
|
|
95
|
+
cmd.append(f"-mtime {mtime}")
|
|
96
|
+
if perm:
|
|
97
|
+
cmd.append(f"-perm {perm}")
|
|
98
|
+
if user:
|
|
99
|
+
cmd.append(f"-user {user}")
|
|
100
|
+
|
|
101
|
+
return " ".join(cmd)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def collect_globs(
|
|
105
|
+
directory: str,
|
|
106
|
+
cfg: Optional["ScanConfig"] = None,
|
|
107
|
+
*,
|
|
108
|
+
exts: Optional[Set[str]] = None,
|
|
109
|
+
patterns: Optional[List[str]] = None,
|
|
110
|
+
mindepth: Optional[int] = None,
|
|
111
|
+
maxdepth: Optional[int] = None,
|
|
112
|
+
depth: Optional[int] = None,
|
|
113
|
+
file_type: Optional[str] = None,
|
|
114
|
+
user_at_host: Optional[str] = None,
|
|
115
|
+
add: bool = False,
|
|
116
|
+
**kwargs
|
|
117
|
+
) -> List[str]:
|
|
118
|
+
"""
|
|
119
|
+
Collect file or directory paths using either:
|
|
120
|
+
- local recursive logic (rglob)
|
|
121
|
+
- or remote shell call (find via run_cmd)
|
|
122
|
+
"""
|
|
123
|
+
cfg = cfg or define_defaults(add=add)
|
|
124
|
+
directory = str(directory)
|
|
125
|
+
exts = ensure_exts(exts)
|
|
126
|
+
patterns = ensure_patterns(patterns)
|
|
127
|
+
|
|
128
|
+
# Remote path via SSH
|
|
129
|
+
if user_at_host:
|
|
130
|
+
find_cmd = get_find_cmd(
|
|
131
|
+
directory,
|
|
132
|
+
mindepth=mindepth,
|
|
133
|
+
maxdepth=maxdepth,
|
|
134
|
+
depth=depth,
|
|
135
|
+
file_type=file_type,
|
|
136
|
+
**{k: v for k, v in kwargs.items() if v},
|
|
137
|
+
)
|
|
138
|
+
return run_cmd(find_cmd, user_at_host=user_at_host)
|
|
139
|
+
|
|
140
|
+
# Local path (Python-native walk)
|
|
141
|
+
root = Path(directory)
|
|
142
|
+
results = []
|
|
143
|
+
for p in root.rglob("*"):
|
|
144
|
+
if file_type == "f" and not p.is_file():
|
|
145
|
+
continue
|
|
146
|
+
if file_type == "d" and not p.is_dir():
|
|
147
|
+
continue
|
|
148
|
+
if exts and p.suffix.lower() not in exts:
|
|
149
|
+
continue
|
|
150
|
+
if patterns and not any(p.match(pat) for pat in patterns):
|
|
151
|
+
continue
|
|
152
|
+
results.append(str(p.resolve()))
|
|
153
|
+
|
|
154
|
+
return sorted(results)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
from .module_imports import *
|
|
3
|
+
@dataclass
|
|
4
|
+
class ScanConfig:
|
|
5
|
+
allowed_exts: Set[str]
|
|
6
|
+
unallowed_exts: Set[str]
|
|
7
|
+
exclude_types: Set[str]
|
|
8
|
+
exclude_dirs: List[str] = field(default_factory=list)
|
|
9
|
+
exclude_patterns: List[str] = field(default_factory=list)
|
|
10
|
+
DEFAULT_ALLOWED_EXTS: Set[str] = {
|
|
11
|
+
".py", ".pyw", # python
|
|
12
|
+
".js", ".jsx", ".ts", ".tsx", ".mjs", # JS/TS
|
|
13
|
+
".html", ".htm", ".xml", # markup
|
|
14
|
+
".css", ".scss", ".sass", ".less", # styles
|
|
15
|
+
".json", ".yaml", ".yml", ".toml", ".ini", # configs
|
|
16
|
+
".cfg", ".md", ".markdown", ".rst", # docs
|
|
17
|
+
".sh", ".bash", ".env", # scripts/env
|
|
18
|
+
".txt" # plain text
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
DEFAULT_EXCLUDE_TYPES: Set[str] = {
|
|
22
|
+
"image", "video", "audio", "presentation",
|
|
23
|
+
"spreadsheet", "archive", "executable"
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# never want these—even if they sneak into ALLOWED
|
|
27
|
+
_unallowed = set(get_media_exts(DEFAULT_EXCLUDE_TYPES)) | {'.bak', '.shp', '.cpg', '.dbf', '.shx','.geojson',".pyc",'.shx','.geojson','.prj','.sbn','.sbx'}
|
|
28
|
+
DEFAULT_UNALLOWED_EXTS = {e for e in _unallowed if e not in DEFAULT_ALLOWED_EXTS}
|
|
29
|
+
|
|
30
|
+
DEFAULT_EXCLUDE_DIRS: Set[str] = {
|
|
31
|
+
"node_modules", "old","__pycache__", "backups", "backup", "backs", "trash", "depriciated", "old", "__init__"
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
DEFAULT_EXCLUDE_PATTERNS: Set[str] = {
|
|
35
|
+
"__init__*", "*.tmp", "*.log", "*.lock", "*.zip","*~"
|
|
36
|
+
}
|
|
37
|
+
REMOTE_RE = re.compile(r"^(?P<host>[^:\s]+@[^:\s]+):(?P<path>/.*)$")
|
|
38
|
+
AllowedPredicate = Optional[Callable[[str], bool]]
|
|
39
|
+
DEFAULT_EXCLUDE_FILE_PATTERNS=DEFAULT_EXCLUDE_PATTERNS
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
def get_caller_path():
|
|
3
|
+
i = i or 1
|
|
4
|
+
frame = inspect.stack()[i]
|
|
5
|
+
return os.path.abspath(frame.filename)
|
|
6
|
+
def get_caller_dir(i=None):
|
|
7
|
+
i = i or 1
|
|
8
|
+
frame = inspect.stack()[i]
|
|
9
|
+
abspath = os.path.abspath(frame.filename)
|
|
10
|
+
return os.path.dirname(abspath)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# ============================================================
|
|
2
|
+
# abstract_utilities/imports/imports.py
|
|
3
|
+
# Global imports hub — everything imported here will be
|
|
4
|
+
# automatically available to any module that does:
|
|
5
|
+
# from ..imports import *
|
|
6
|
+
# ============================================================
|
|
7
|
+
# ---- Core standard library modules -------------------------
|
|
8
|
+
import os, sys, re, shlex, glob, platform, textwrap, subprocess, inspect, json, time
|
|
9
|
+
import tempfile, shutil, logging, pathlib, fnmatch, importlib, importlib.util, types
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from types import ModuleType
|
|
13
|
+
|
|
14
|
+
# ---- Dataclasses and typing --------------------------------
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import (
|
|
17
|
+
Any, Optional, List, Dict, Set, Tuple,
|
|
18
|
+
Iterable, Callable, Literal, Union, TypeVar
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# ---- Common 3rd-party dependencies --------------------------
|
|
22
|
+
import pandas as pd
|
|
23
|
+
import geopandas as gpd
|
|
24
|
+
import pytesseract
|
|
25
|
+
import pdfplumber
|
|
26
|
+
import PyPDF2
|
|
27
|
+
import ezodf
|
|
28
|
+
from pdf2image import convert_from_path
|
|
29
|
+
from werkzeug.utils import secure_filename
|
|
30
|
+
from werkzeug.datastructures import FileStorage
|
|
31
|
+
|
|
32
|
+
# ---- Helpers ------------------------------------------------
|
|
33
|
+
import textwrap as tw
|
|
34
|
+
from pprint import pprint
|
|
35
|
+
|
|
36
|
+
# ============================================================
|
|
37
|
+
# AUTO-EXPORT ALL NON-PRIVATE NAMES
|
|
38
|
+
# ============================================================
|
|
39
|
+
__all__ = [name for name in globals() if not name.startswith("_")]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
from ....string_clean import eatAll
|
|
3
|
+
from ....list_utils import make_list
|
|
4
|
+
from ....type_utils import get_media_exts, is_media_type, MIME_TYPES, is_str
|
|
5
|
+
from ....ssh_utils import *
|
|
6
|
+
from ....env_utils import *
|
|
7
|
+
from ....read_write_utils import *
|
|
8
|
+
from ....abstract_classes import SingletonMeta
|
|
9
|
+
from ....string_utils import get_from_kwargs
|
|
10
|
+
from ....abstract_classes import run_pruned_func
|
|
11
|
+
from ....class_utils import get_caller, get_caller_path, get_caller_dir
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
__all__ = [name for name in globals() if not name.startswith("_")]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from ..imports import *
|
|
2
|
+
from typing import *
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
@dataclass
|
|
5
|
+
class ScanConfig:
|
|
6
|
+
allowed_exts: Set[str]
|
|
7
|
+
unallowed_exts: Set[str]
|
|
8
|
+
exclude_types: Set[str]
|
|
9
|
+
exclude_dirs: List[str] = field(default_factory=list)
|
|
10
|
+
exclude_patterns: List[str] = field(default_factory=list)
|