abstract-utilities 0.2.2.387__py3-none-any.whl → 0.2.2.480__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of abstract-utilities might be problematic. Click here for more details.

Files changed (63) hide show
  1. abstract_utilities/__init__.py +14 -43
  2. abstract_utilities/abstract_classes.py +49 -0
  3. abstract_utilities/class_utils.py +38 -3
  4. abstract_utilities/cmd_utils/imports/__init__.py +1 -0
  5. abstract_utilities/cmd_utils/imports/imports.py +10 -0
  6. abstract_utilities/cmd_utils/pexpect_utils.py +310 -0
  7. abstract_utilities/cmd_utils/user_utils.py +1 -1
  8. abstract_utilities/dynimport.py +7 -15
  9. abstract_utilities/env_utils/__init__.py +3 -0
  10. abstract_utilities/env_utils/abstractEnv.py +129 -0
  11. abstract_utilities/env_utils/envy_it.py +33 -0
  12. abstract_utilities/env_utils/imports/__init__.py +2 -0
  13. abstract_utilities/env_utils/imports/imports.py +8 -0
  14. abstract_utilities/env_utils/imports/utils.py +122 -0
  15. abstract_utilities/file_utils/__init__.py +3 -0
  16. abstract_utilities/file_utils/file_utils/__init__.py +8 -0
  17. abstract_utilities/file_utils/file_utils/file_filters.py +104 -0
  18. abstract_utilities/{robust_reader → file_utils/file_utils}/file_reader.py +5 -19
  19. abstract_utilities/{robust_readers/file_filters.py → file_utils/file_utils/file_utils.py} +5 -4
  20. abstract_utilities/{robust_readers → file_utils/file_utils}/filter_params.py +1 -38
  21. abstract_utilities/file_utils/file_utils/find_collect.py +154 -0
  22. abstract_utilities/file_utils/file_utils/imports/__init__.py +3 -0
  23. abstract_utilities/file_utils/file_utils/imports/constants.py +39 -0
  24. abstract_utilities/file_utils/file_utils/imports/file_functions.py +10 -0
  25. abstract_utilities/file_utils/file_utils/imports/imports.py +39 -0
  26. abstract_utilities/file_utils/file_utils/imports/module_imports.py +14 -0
  27. abstract_utilities/file_utils/file_utils/imports.py +10 -0
  28. abstract_utilities/file_utils/file_utils/map_utils.py +29 -0
  29. abstract_utilities/{robust_reader → file_utils/file_utils}/pdf_utils.py +1 -9
  30. abstract_utilities/file_utils/file_utils/type_checks.py +91 -0
  31. abstract_utilities/file_utils/imports/__init__.py +4 -0
  32. abstract_utilities/file_utils/imports/classes.py +381 -0
  33. abstract_utilities/file_utils/imports/clean_imps.py +158 -0
  34. abstract_utilities/file_utils/imports/constants.py +39 -0
  35. abstract_utilities/file_utils/imports/file_functions.py +10 -0
  36. abstract_utilities/file_utils/imports/imports.py +65 -0
  37. abstract_utilities/file_utils/imports/module_imports.py +13 -0
  38. abstract_utilities/file_utils/req.py +329 -0
  39. abstract_utilities/log_utils.py +1 -1
  40. abstract_utilities/path_utils.py +90 -6
  41. abstract_utilities/read_write_utils.py +250 -157
  42. abstract_utilities/robust_reader/__init__.py +1 -1
  43. abstract_utilities/robust_reader/imports/__init__.py +1 -0
  44. abstract_utilities/robust_reader/imports/imports.py +3 -0
  45. abstract_utilities/robust_readers/__init__.py +0 -1
  46. abstract_utilities/robust_readers/import_utils/__init__.py +1 -0
  47. abstract_utilities/robust_readers/import_utils/clean_imports.py +175 -0
  48. abstract_utilities/robust_readers/imports.py +8 -0
  49. abstract_utilities/robust_readers/initFuncGen.py +92 -76
  50. abstract_utilities/safe_utils.py +133 -0
  51. abstract_utilities/ssh_utils/__init__.py +3 -0
  52. abstract_utilities/ssh_utils/classes.py +127 -0
  53. abstract_utilities/ssh_utils/imports.py +10 -0
  54. abstract_utilities/ssh_utils/pexpect_utils.py +315 -0
  55. abstract_utilities/ssh_utils/utils.py +188 -0
  56. abstract_utilities/string_clean.py +40 -1
  57. abstract_utilities/string_utils.py +51 -0
  58. abstract_utilities/type_utils.py +25 -2
  59. {abstract_utilities-0.2.2.387.dist-info → abstract_utilities-0.2.2.480.dist-info}/METADATA +1 -1
  60. abstract_utilities-0.2.2.480.dist-info/RECORD +92 -0
  61. abstract_utilities-0.2.2.387.dist-info/RECORD +0 -52
  62. {abstract_utilities-0.2.2.387.dist-info → abstract_utilities-0.2.2.480.dist-info}/WHEEL +0 -0
  63. {abstract_utilities-0.2.2.387.dist-info → abstract_utilities-0.2.2.480.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,33 @@
1
+ from .imports import *
2
+ from .abstractEnv import abstractEnv
3
+ def get_env_value(key:str=None,path:str=None,file_name:str=None,deep_scan=False):
4
+ abstract_env = abstractEnv(key=key, file_name=file_name, path=path,deep_scan=deep_scan)
5
+
6
+ """
7
+ Retrieves the value of a specified environment variable from a .env file.
8
+
9
+ Args:
10
+ key (str, optional): The key to search for in the .env file. Defaults to None.
11
+ path (str, optional): The path to the .env file. Defaults to None.
12
+ file_name (str, optional): The name of the .env file. Defaults to None.
13
+
14
+ Returns:
15
+ str: The value of the environment variable if found, otherwise None.
16
+ """
17
+ return abstract_env.env_value
18
+
19
+
20
+ def get_env_path(key:str=None,path:str=None,file_name:str=None,deep_scan=False):
21
+ abstract_env = abstractEnv(key=key, file_name=file_name, path=path,deep_scan=deep_scan)
22
+ """
23
+ Retrieves the value of a specified environment variable from a .env file.
24
+
25
+ Args:
26
+ key (str, optional): The key to search for in the .env file. Defaults to None.
27
+ path (str, optional): The path to the .env file. Defaults to None.
28
+ file_name (str, optional): The name of the .env file. Defaults to None.
29
+
30
+ Returns:
31
+ str: The value of the environment variable if found, otherwise None.
32
+ """
33
+ return abstract_env.env_path
@@ -0,0 +1,2 @@
1
+ from .imports import *
2
+ from .utils import *
@@ -0,0 +1,8 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from ...string_clean import eatAll,eatInner,eatOuter,safe_split
4
+ from ...compare_utils import line_contains
5
+ from ...type_utils import is_list,is_bool
6
+ from ...path_utils import get_slash,path_join,if_not_last_child_join,get_home_folder,simple_path_join,is_file
7
+ DEFAULT_FILE_NAME = '.env'
8
+ DEFAULT_KEY = 'MY_PASSWORD'
@@ -0,0 +1,122 @@
1
+ from .imports import *
2
+ def find_and_read_env_file(key:str=DEFAULT_KEY,file_name:str=DEFAULT_FILE_NAME, start_path:str=None):
3
+ """
4
+ Search for an environment file and read a specific key from it.
5
+
6
+ Args:
7
+ file_name (str): Name of the .env file to be searched. Defaults to '.env'.
8
+ key (str): Key to be retrieved from the .env file. Defaults to 'MY_PASSWORD'.
9
+ start_path (str): Directory path to start the search from. If None, search starts from current directory.
10
+
11
+ Returns:
12
+ str: The value corresponding to the key if found, otherwise None.
13
+ """
14
+ # Set the default start_path to the current directory if it's None
15
+ directories = [start_path, os.getcwd(), get_home_folder(), simple_path_join(get_home_folder(),'.envy_all'),simple_path_join(get_home_folder(),'envy_all')]
16
+ if start_path in [None, os.getcwd()]:
17
+ directories = directories[1:]
18
+
19
+ # Try to find the file in the start_path
20
+ for k in range(0,len(directories)):
21
+ env_path = check_env_file(path=directories[k],file_name=file_name)
22
+ if not is_bool(env_path):
23
+ value = search_for_env_key(path=env_path,key=key)
24
+ if value != None:
25
+ return value
26
+
27
+ def search_for_env_key(key:str,path:str):
28
+ """
29
+ Search for a specific key in a .env file.
30
+
31
+ Args:
32
+ path (str): The path to the .env file.
33
+ key (str): The key to search for in the .env file.
34
+
35
+ Returns:
36
+ str: The value of the key if found, otherwise None.
37
+ """
38
+ with open(path, "r") as f:
39
+ for line in f:
40
+ eq_split = safe_split(line,['=',0])
41
+ # If the line contains the key, return the value after stripping extra characters
42
+ if line_contains(string=eq_split, compare=key):
43
+ return eatAll(line[len(eq_split):],[' ','','=']).strip()
44
+
45
+ def check_env_file(path:str,file_name:str=DEFAULT_FILE_NAME):
46
+ """
47
+ Check if the environment file exists in a specified path.
48
+
49
+ Args:
50
+ path (str): The path to check for the .env file.
51
+ file_name (str): The name of the .env file. Defaults to '.env'.
52
+
53
+ Returns:
54
+ str: The path of the .env file if it exists, otherwise False.
55
+ """
56
+ path = if_not_last_child_join(path=path, child=DEFAULT_FILE_NAME)
57
+ # Return the path if file exists, otherwise return False
58
+ if is_file(path):
59
+ return path
60
+ return False
61
+
62
+ def safe_env_load(path:str=None):
63
+ """
64
+ Safely load the .env file if it exists at a specified path.
65
+
66
+ Args:
67
+ path (str): The path to load the .env file from. If None, no operation is performed.
68
+
69
+ Returns:
70
+ bool: True if the .env file is successfully loaded, otherwise False.
71
+ """
72
+ if path == None:
73
+ return False
74
+ if is_file(path):
75
+ if str(safe_split(path,[get_slash(),-1]))[0] == '.':
76
+ load_dotenv(path)
77
+ return True
78
+ return False
79
+
80
+ def get_env_value(key:str=DEFAULT_KEY,path:str=None,file_name:str=DEFAULT_FILE_NAME):
81
+ """
82
+ Retrieves the value of the specified environment variable.
83
+
84
+ Args:
85
+ path (str): The path to the environment file. Defaults to None.
86
+ file_name (str): The name of the environment file. Defaults to '.env'.
87
+ key (str): The key to search for in the .env file. Defaults to 'MY_PASSWORD'.
88
+
89
+ Returns:
90
+ str: The value of the environment variable if found, otherwise None.
91
+ """
92
+ if safe_env_load(path):
93
+ return os.getenv(key)
94
+ return find_and_read_env_file(file_name=file_name, key=key, start_path=os.getcwd())
95
+ def split_eq(line):
96
+ """
97
+ Splits a string at the first equals sign '=' and cleans up the key and value.
98
+
99
+ Args:
100
+ line (str): The string to be split.
101
+
102
+ Returns:
103
+ list: A list containing the cleaned key and value. If '=' is not found, returns [line, None].
104
+ """
105
+ if '=' in line:
106
+ key_side = line.split('=')[0]
107
+ value_side = line[len(key_side+'='):]
108
+ return [eatOuter(key_side,[' ','','\t']),eatAll(value_side,[' ','','\t','\n'])]
109
+ return [line,None]
110
+ def dotenv_load(path:str=None):
111
+ """
112
+ Safely load the .env file if it exists at a specified path.
113
+
114
+ Args:
115
+ path (str): The path to load the .env file from. If None, no operation is performed.
116
+
117
+ Returns:
118
+ bool: True if the .env file is successfully loaded, otherwise False.
119
+ """
120
+ if path and os.path.isfile(path) and os.path.basename(path)[0] == '.':
121
+ load_dotenv(path)
122
+ return True
@@ -0,0 +1,3 @@
1
+ from .imports import *
2
+ from .file_utils import *
3
+ from .req import call_for_all_tabs,get_for_all_tabs
@@ -0,0 +1,8 @@
1
+ from .file_filters import *
2
+ from .file_utils import *
3
+ from .filter_params import *
4
+ from .map_utils import *
5
+ from .pdf_utils import *
6
+ from .file_reader import *
7
+ from .find_collect import *
8
+ from .type_checks import *
@@ -0,0 +1,104 @@
1
+ from .imports import *
2
+ from .filter_params import *
3
+ from .file_utils import *
4
+ ##from abstract_utilities import make_list,get_media_exts, is_media_type
5
+
6
+ def collect_filepaths(
7
+ directory: List[str],
8
+ cfg: ScanConfig=None,
9
+ allowed_exts: Optional[Set[str]] = False,
10
+ unallowed_exts: Optional[Set[str]] = False,
11
+ exclude_types: Optional[Set[str]] = False,
12
+ exclude_dirs: Optional[List[str]] = False,
13
+ exclude_patterns: Optional[List[str]] = False,
14
+ add=False,
15
+ allowed: Optional[Callable[[str], bool]] = None,
16
+ **kwargs
17
+ ) -> List[str]:
18
+ cfg = cfg or define_defaults(
19
+ allowed_exts=allowed_exts,
20
+ unallowed_exts=unallowed_exts,
21
+ exclude_types=exclude_types,
22
+ exclude_dirs=exclude_dirs,
23
+ exclude_patterns=exclude_patterns,
24
+ add = add
25
+ )
26
+ allowed = allowed or make_allowed_predicate(cfg)
27
+ directories = make_list(directory)
28
+ roots = [r for r in directories if r]
29
+
30
+ # your existing helpers (get_dirs, get_globs, etc.) stay the same
31
+ original_dirs = get_allowed_dirs(roots, allowed=allowed)
32
+ original_globs = get_globs(original_dirs)
33
+ files = get_allowed_files(original_globs, allowed=allowed)
34
+
35
+ for d in get_filtered_dirs(original_dirs, allowed=allowed):
36
+ files += get_filtered_files(d, allowed=allowed, files=files)
37
+
38
+ # de-dupe while preserving order
39
+ seen, out = set(), []
40
+ for f in files:
41
+ if f not in seen:
42
+ seen.add(f)
43
+ out.append(f)
44
+ return out
45
+
46
+
47
+ def _fast_walk(
48
+ root: Path,
49
+ exts: Iterable[str],
50
+ skip_dirs: Iterable[str] = (),
51
+ skip_patterns: Iterable[str] = (),
52
+ ) -> List[Path]:
53
+ exts = tuple(exts)
54
+ skip_dirs = set(sd.lower() for sd in skip_dirs or ())
55
+ skip_patterns = tuple(sp.lower() for sp in (skip_patterns or ()))
56
+
57
+ out = []
58
+ for p in root.rglob("*"):
59
+ # skip directories by name hit
60
+ if p.is_dir():
61
+ name = p.name.lower()
62
+ if name in skip_dirs:
63
+ # rglob doesn't let us prune mid-iteration cleanly; we just won't collect under it
64
+ continue
65
+ # nothing to collect for dirs
66
+ continue
67
+
68
+ # file filters
69
+ name = p.name.lower()
70
+ if any(fnmatch.fnmatch(name, pat) for pat in skip_patterns):
71
+ continue
72
+ if p.suffix.lower() in exts:
73
+ out.append(p)
74
+
75
+ # de-dup and normalize
76
+ return sorted({pp.resolve() for pp in out})
77
+
78
+
79
+ def enumerate_source_files(
80
+ src_root: Path,
81
+ cfg: Optional["ScanConfig"] = None,
82
+ *,
83
+ exts: Optional[Iterable[str]] = None,
84
+ fast_skip_dirs: Optional[Iterable[str]] = None,
85
+ fast_skip_patterns: Optional[Iterable[str]] = None,
86
+ ) -> List[Path]:
87
+ """
88
+ Unified enumerator:
89
+ - If `cfg` is provided: use collect_filepaths(...) with full rules.
90
+ - Else: fast walk using rglob over `exts` (defaults to EXTS) with optional light excludes.
91
+ """
92
+ src_root = Path(src_root)
93
+
94
+ if cfg is not None:
95
+ files = collect_filepaths([str(src_root)], cfg=cfg)
96
+ return sorted({Path(f).resolve() for f in files})
97
+
98
+ # Fast mode
99
+ return _fast_walk(
100
+ src_root,
101
+ exts or EXTS,
102
+ skip_dirs=fast_skip_dirs or (),
103
+ skip_patterns=fast_skip_patterns or (),
104
+ )
@@ -1,29 +1,15 @@
1
1
  # file_reader.py
2
- import os,tempfile,shutil,logging,ezodf,fnmatch
3
- from typing import Union
4
- import pandas as pd
5
- import geopandas as gpd
6
- from abstract_utilities import *
7
- from datetime import datetime
8
- from werkzeug.utils import secure_filename
9
- from werkzeug.datastructures import FileStorage
10
- from datetime import datetime
11
- from typing import Dict, Union, List
2
+ from .imports import *
3
+ # -------- Public API drop-ins that mirror your originals --------
4
+ from .filter_params import *
5
+ from .file_filters import *
6
+ from .file_utils import *
12
7
  from .pdf_utils import *
13
- import pdfplumber
14
- from pdf2image import convert_from_path # only used for OCR fallback
15
- import pytesseract
16
- from pathlib import Path
17
8
  # ---------------------------------------------------------------------------
18
9
  # NOTE: The following helper functions must be provided elsewhere:
19
10
  # - convert_date_string(s: str) -> datetime
20
11
  # - read_from_file(path: str) -> pd.DataFrame
21
12
  # ---------------------------------------------------------------------------
22
- DEFAULT_EXCLUDE_DIRS = {"node_modules", "__pycache__","backups","backup"}
23
- DEFAULT_EXCLUDE_FILE_PATTERNS = {"__init__*", "*.tmp", "*.log"}
24
- DEFAULT_EXCLUDE_TYPES = {"image","video","audio","presentation"}
25
- def get_ext(item):
26
- return item.split('.')[-1]
27
13
  def _should_skip_dir(dir_name: str, exclude_dirs: set[str]) -> bool:
28
14
  """
29
15
  Return True if dir_name match=self.exclude_types)es one of the excluded directory names exactly.
@@ -1,7 +1,7 @@
1
- from pathlib import Path
2
- from typing import *
3
- import fnmatch, os, glob
1
+
4
2
  from .filter_params import *
3
+ from .imports import *
4
+
5
5
  ##from abstract_utilities import make_list,get_media_exts, is_media_type
6
6
  def get_allowed_predicate(allowed=None):
7
7
  if allowed != False:
@@ -15,7 +15,8 @@ def get_allowed_predicate(allowed=None):
15
15
  return allowed
16
16
  def get_globs(items,recursive: bool = True,allowed=None):
17
17
  glob_paths = []
18
- for item in make_list(items):
18
+ items = [item for item in make_list(items) if item]
19
+ for item in items:
19
20
  pattern = os.path.join(item, "**/*") # include all files recursively\n
20
21
  nuItems = glob.glob(pattern, recursive=recursive)
21
22
  if allowed:
@@ -1,41 +1,4 @@
1
- from typing import *
2
- from ..type_utils import make_list,get_media_exts, is_media_type
3
- from ..string_clean import eatAll
4
- from dataclasses import dataclass, field
5
- @dataclass
6
- class ScanConfig:
7
- allowed_exts: Set[str]
8
- unallowed_exts: Set[str]
9
- exclude_types: Set[str]
10
- exclude_dirs: List[str] = field(default_factory=list)
11
- exclude_patterns: List[str] = field(default_factory=list)
12
- DEFAULT_ALLOWED_EXTS: Set[str] = {
13
- ".py", ".pyw", # python
14
- ".js", ".jsx", ".ts", ".tsx", ".mjs", # JS/TS
15
- ".html", ".htm", ".xml", # markup
16
- ".css", ".scss", ".sass", ".less", # styles
17
- ".json", ".yaml", ".yml", ".toml", ".ini", # configs
18
- ".cfg", ".md", ".markdown", ".rst", # docs
19
- ".sh", ".bash", ".env", # scripts/env
20
- ".txt" # plain text
21
- }
22
-
23
- DEFAULT_EXCLUDE_TYPES: Set[str] = {
24
- "image", "video", "audio", "presentation",
25
- "spreadsheet", "archive", "executable"
26
- }
27
-
28
- # never want these—even if they sneak into ALLOWED
29
- _unallowed = set(get_media_exts(DEFAULT_EXCLUDE_TYPES)) | {'.bak', '.shp', '.cpg', '.dbf', '.shx','.geojson',".pyc",'.shx','.geojson','.prj','.sbn','.sbx'}
30
- DEFAULT_UNALLOWED_EXTS = {e for e in _unallowed if e not in DEFAULT_ALLOWED_EXTS}
31
-
32
- DEFAULT_EXCLUDE_DIRS: Set[str] = {
33
- "node_modules", "__pycache__", "backups", "backup", "backs", "trash", "depriciated", "old", "__init__"
34
- }
35
-
36
- DEFAULT_EXCLUDE_PATTERNS: Set[str] = {
37
- "__init__*", "*.tmp", "*.log", "*.lock", "*.zip","*~"
38
- }
1
+ from .imports import *
39
2
  def get_default_modular(obj,default=None,add=False,typ=set):
40
3
  if obj in [False,True,None]:
41
4
  if obj in [True,None]:
@@ -0,0 +1,154 @@
1
+ from .imports import *
2
+ from .filter_params import *
3
+ from .file_filters import enumerate_source_files
4
+
5
+
6
+ def check_path_type(
7
+ path: str,
8
+ user: Optional[str] = None,
9
+ host: Optional[str] = None,
10
+ user_as_host: Optional[str] = None,
11
+ use_shell: bool = False
12
+ ) -> Literal["file", "directory", "missing", "unknown"]:
13
+ """
14
+ Determine whether a given path is a file, directory, or missing.
15
+ Works locally or remotely (via SSH).
16
+
17
+ Args:
18
+ path: The path to check.
19
+ user, host, user_as_host: SSH parameters if remote.
20
+ use_shell: Force shell test instead of Python os.path.
21
+ Returns:
22
+ One of: 'file', 'directory', 'missing', or 'unknown'
23
+ """
24
+
25
+ # --- remote check if user/host is given ---
26
+ if user_as_host or (user and host):
27
+ remote_target = user_as_host or f"{user}@{host}"
28
+ cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
29
+ try:
30
+ result = subprocess.check_output(
31
+ ["ssh", remote_target, cmd],
32
+ stderr=subprocess.DEVNULL,
33
+ text=True,
34
+ timeout=5
35
+ ).strip()
36
+ return result if result in ("file", "directory", "missing") else "unknown"
37
+ except Exception:
38
+ return "unknown"
39
+
40
+ # --- local check ---
41
+ if not use_shell:
42
+ if os.path.isfile(path):
43
+ return "file"
44
+ elif os.path.isdir(path):
45
+ return "directory"
46
+ elif not os.path.exists(path):
47
+ return "missing"
48
+ return "unknown"
49
+ else:
50
+ # fallback using shell tests (useful for sandboxed contexts)
51
+ cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
52
+ try:
53
+ output = subprocess.check_output(
54
+ cmd, shell=True, stderr=subprocess.DEVNULL, text=True
55
+ ).strip()
56
+ return output if output in ("file", "directory", "missing") else "unknown"
57
+ except Exception:
58
+ return "unknown"
59
+
60
+
61
+
62
+
63
+ def get_find_cmd(
64
+ directory: str,
65
+ *,
66
+ mindepth: Optional[int] = None,
67
+ maxdepth: Optional[int] = None,
68
+ depth: Optional[int] = None,
69
+ file_type: Optional[str] = None, # 'f' or 'd'
70
+ name: Optional[str] = None,
71
+ size: Optional[str] = None,
72
+ mtime: Optional[str] = None,
73
+ perm: Optional[str] = None,
74
+ user: Optional[str] = None,
75
+ **kwargs
76
+ ) -> str:
77
+ """Constructs a Unix `find` command string from keyword args."""
78
+ cmd = [f"find {directory}"]
79
+
80
+ if depth is not None:
81
+ cmd += [f"-mindepth {depth}", f"-maxdepth {depth}"]
82
+ else:
83
+ if mindepth is not None:
84
+ cmd.append(f"-mindepth {mindepth}")
85
+ if maxdepth is not None:
86
+ cmd.append(f"-maxdepth {maxdepth}")
87
+
88
+ if file_type in ("f", "d"):
89
+ cmd.append(f"-type {file_type}")
90
+ if name:
91
+ cmd.append(f"-name '{name}'")
92
+ if size:
93
+ cmd.append(f"-size {size}")
94
+ if mtime:
95
+ cmd.append(f"-mtime {mtime}")
96
+ if perm:
97
+ cmd.append(f"-perm {perm}")
98
+ if user:
99
+ cmd.append(f"-user {user}")
100
+
101
+ return " ".join(cmd)
102
+
103
+
104
+ def collect_globs(
105
+ directory: str,
106
+ cfg: Optional["ScanConfig"] = None,
107
+ *,
108
+ exts: Optional[Set[str]] = None,
109
+ patterns: Optional[List[str]] = None,
110
+ mindepth: Optional[int] = None,
111
+ maxdepth: Optional[int] = None,
112
+ depth: Optional[int] = None,
113
+ file_type: Optional[str] = None,
114
+ user_at_host: Optional[str] = None,
115
+ add: bool = False,
116
+ **kwargs
117
+ ) -> List[str]:
118
+ """
119
+ Collect file or directory paths using either:
120
+ - local recursive logic (rglob)
121
+ - or remote shell call (find via run_cmd)
122
+ """
123
+ cfg = cfg or define_defaults(add=add)
124
+ directory = str(directory)
125
+ exts = ensure_exts(exts)
126
+ patterns = ensure_patterns(patterns)
127
+
128
+ # Remote path via SSH
129
+ if user_at_host:
130
+ find_cmd = get_find_cmd(
131
+ directory,
132
+ mindepth=mindepth,
133
+ maxdepth=maxdepth,
134
+ depth=depth,
135
+ file_type=file_type,
136
+ **{k: v for k, v in kwargs.items() if v},
137
+ )
138
+ return run_cmd(find_cmd, user_at_host=user_at_host)
139
+
140
+ # Local path (Python-native walk)
141
+ root = Path(directory)
142
+ results = []
143
+ for p in root.rglob("*"):
144
+ if file_type == "f" and not p.is_file():
145
+ continue
146
+ if file_type == "d" and not p.is_dir():
147
+ continue
148
+ if exts and p.suffix.lower() not in exts:
149
+ continue
150
+ if patterns and not any(p.match(pat) for pat in patterns):
151
+ continue
152
+ results.append(str(p.resolve()))
153
+
154
+ return sorted(results)
@@ -0,0 +1,3 @@
1
+ from .constants import *
2
+ from .imports import *
3
+ from ..imports import *
@@ -0,0 +1,39 @@
1
+ from .imports import *
2
+ from .module_imports import *
3
+ @dataclass
4
+ class ScanConfig:
5
+ allowed_exts: Set[str]
6
+ unallowed_exts: Set[str]
7
+ exclude_types: Set[str]
8
+ exclude_dirs: List[str] = field(default_factory=list)
9
+ exclude_patterns: List[str] = field(default_factory=list)
10
+ DEFAULT_ALLOWED_EXTS: Set[str] = {
11
+ ".py", ".pyw", # python
12
+ ".js", ".jsx", ".ts", ".tsx", ".mjs", # JS/TS
13
+ ".html", ".htm", ".xml", # markup
14
+ ".css", ".scss", ".sass", ".less", # styles
15
+ ".json", ".yaml", ".yml", ".toml", ".ini", # configs
16
+ ".cfg", ".md", ".markdown", ".rst", # docs
17
+ ".sh", ".bash", ".env", # scripts/env
18
+ ".txt" # plain text
19
+ }
20
+
21
+ DEFAULT_EXCLUDE_TYPES: Set[str] = {
22
+ "image", "video", "audio", "presentation",
23
+ "spreadsheet", "archive", "executable"
24
+ }
25
+
26
+ # never want these—even if they sneak into ALLOWED
27
+ _unallowed = set(get_media_exts(DEFAULT_EXCLUDE_TYPES)) | {'.bak', '.shp', '.cpg', '.dbf', '.shx','.geojson',".pyc",'.shx','.geojson','.prj','.sbn','.sbx'}
28
+ DEFAULT_UNALLOWED_EXTS = {e for e in _unallowed if e not in DEFAULT_ALLOWED_EXTS}
29
+
30
+ DEFAULT_EXCLUDE_DIRS: Set[str] = {
31
+ "node_modules", "old","__pycache__", "backups", "backup", "backs", "trash", "depriciated", "old", "__init__"
32
+ }
33
+
34
+ DEFAULT_EXCLUDE_PATTERNS: Set[str] = {
35
+ "__init__*", "*.tmp", "*.log", "*.lock", "*.zip","*~"
36
+ }
37
+ REMOTE_RE = re.compile(r"^(?P<host>[^:\s]+@[^:\s]+):(?P<path>/.*)$")
38
+ AllowedPredicate = Optional[Callable[[str], bool]]
39
+ DEFAULT_EXCLUDE_FILE_PATTERNS=DEFAULT_EXCLUDE_PATTERNS
@@ -0,0 +1,10 @@
1
+ from .imports import *
2
+ def get_caller_path():
3
+ i = i or 1
4
+ frame = inspect.stack()[i]
5
+ return os.path.abspath(frame.filename)
6
+ def get_caller_dir(i=None):
7
+ i = i or 1
8
+ frame = inspect.stack()[i]
9
+ abspath = os.path.abspath(frame.filename)
10
+ return os.path.dirname(abspath)
@@ -0,0 +1,39 @@
1
+ # ============================================================
2
+ # abstract_utilities/imports/imports.py
3
+ # Global imports hub — everything imported here will be
4
+ # automatically available to any module that does:
5
+ # from ..imports import *
6
+ # ============================================================
7
+ # ---- Core standard library modules -------------------------
8
+ import os, sys, re, shlex, glob, platform, textwrap, subprocess, inspect, json, time
9
+ import tempfile, shutil, logging, pathlib, fnmatch, importlib, importlib.util, types
10
+ from pathlib import Path
11
+ from datetime import datetime
12
+ from types import ModuleType
13
+
14
+ # ---- Dataclasses and typing --------------------------------
15
+ from dataclasses import dataclass, field
16
+ from typing import (
17
+ Any, Optional, List, Dict, Set, Tuple,
18
+ Iterable, Callable, Literal, Union, TypeVar
19
+ )
20
+
21
+ # ---- Common 3rd-party dependencies --------------------------
22
+ import pandas as pd
23
+ import geopandas as gpd
24
+ import pytesseract
25
+ import pdfplumber
26
+ import PyPDF2
27
+ import ezodf
28
+ from pdf2image import convert_from_path
29
+ from werkzeug.utils import secure_filename
30
+ from werkzeug.datastructures import FileStorage
31
+
32
+ # ---- Helpers ------------------------------------------------
33
+ import textwrap as tw
34
+ from pprint import pprint
35
+
36
+ # ============================================================
37
+ # AUTO-EXPORT ALL NON-PRIVATE NAMES
38
+ # ============================================================
39
+ __all__ = [name for name in globals() if not name.startswith("_")]
@@ -0,0 +1,14 @@
1
+ from .imports import *
2
+ from ....string_clean import eatAll
3
+ from ....list_utils import make_list
4
+ from ....type_utils import get_media_exts, is_media_type, MIME_TYPES, is_str
5
+ from ....ssh_utils import *
6
+ from ....env_utils import *
7
+ from ....read_write_utils import *
8
+ from ....abstract_classes import SingletonMeta
9
+ from ....string_utils import get_from_kwargs
10
+ from ....abstract_classes import run_pruned_func
11
+ from ....class_utils import get_caller, get_caller_path, get_caller_dir
12
+
13
+
14
+ __all__ = [name for name in globals() if not name.startswith("_")]
@@ -0,0 +1,10 @@
1
+ from ..imports import *
2
+ from typing import *
3
+ from dataclasses import dataclass, field
4
+ @dataclass
5
+ class ScanConfig:
6
+ allowed_exts: Set[str]
7
+ unallowed_exts: Set[str]
8
+ exclude_types: Set[str]
9
+ exclude_dirs: List[str] = field(default_factory=list)
10
+ exclude_patterns: List[str] = field(default_factory=list)