abstract-utilities 0.2.2.450__py3-none-any.whl → 0.2.2.451__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of abstract-utilities might be problematic. Click here for more details.

Files changed (58) hide show
  1. abstract_utilities/__init__.py +17 -43
  2. abstract_utilities/abstract_classes.py +49 -0
  3. abstract_utilities/class_utils.py +39 -3
  4. abstract_utilities/cmd_utils/imports/__init__.py +1 -0
  5. abstract_utilities/cmd_utils/imports/imports.py +10 -0
  6. abstract_utilities/cmd_utils/pexpect_utils.py +310 -0
  7. abstract_utilities/cmd_utils/user_utils.py +1 -1
  8. abstract_utilities/compare_utils/__init__.py +3 -0
  9. abstract_utilities/compare_utils/best_match.py +150 -0
  10. abstract_utilities/{compare_utils.py → compare_utils/compare_utils.py} +1 -1
  11. abstract_utilities/compare_utils/find_value.py +105 -0
  12. abstract_utilities/dynimport.py +7 -15
  13. abstract_utilities/env_utils/__init__.py +3 -0
  14. abstract_utilities/env_utils/abstractEnv.py +129 -0
  15. abstract_utilities/env_utils/envy_it.py +33 -0
  16. abstract_utilities/env_utils/imports/__init__.py +2 -0
  17. abstract_utilities/env_utils/imports/imports.py +8 -0
  18. abstract_utilities/env_utils/imports/utils.py +122 -0
  19. abstract_utilities/file_utils/__init__.py +3 -0
  20. abstract_utilities/file_utils/file_utils/__init__.py +7 -0
  21. abstract_utilities/file_utils/file_utils/file_filters.py +104 -0
  22. abstract_utilities/{robust_reader → file_utils/file_utils}/file_reader.py +5 -19
  23. abstract_utilities/{robust_readers/file_filters.py → file_utils/file_utils/file_utils.py} +2 -1
  24. abstract_utilities/{robust_readers → file_utils/file_utils}/filter_params.py +1 -38
  25. abstract_utilities/file_utils/file_utils/find_collect.py +153 -0
  26. abstract_utilities/file_utils/file_utils/imports.py +1 -0
  27. abstract_utilities/file_utils/file_utils/map_utils.py +29 -0
  28. abstract_utilities/{robust_reader → file_utils/file_utils}/pdf_utils.py +1 -9
  29. abstract_utilities/file_utils/imports/__init__.py +5 -0
  30. abstract_utilities/file_utils/imports/classes.py +381 -0
  31. abstract_utilities/file_utils/imports/constants.py +39 -0
  32. abstract_utilities/file_utils/imports/file_functions.py +10 -0
  33. abstract_utilities/file_utils/imports/imports.py +13 -0
  34. abstract_utilities/file_utils/imports/module_imports.py +10 -0
  35. abstract_utilities/file_utils/req.py +329 -0
  36. abstract_utilities/json_utils.py +35 -0
  37. abstract_utilities/log_utils.py +14 -3
  38. abstract_utilities/path_utils.py +90 -6
  39. abstract_utilities/read_write_utils.py +99 -156
  40. abstract_utilities/robust_reader/__init__.py +1 -1
  41. abstract_utilities/robust_reader/imports/__init__.py +1 -0
  42. abstract_utilities/robust_reader/imports/imports.py +12 -0
  43. abstract_utilities/robust_readers/__init__.py +0 -1
  44. abstract_utilities/robust_readers/imports.py +8 -0
  45. abstract_utilities/robust_readers/initFuncGen.py +92 -76
  46. abstract_utilities/safe_utils.py +133 -0
  47. abstract_utilities/ssh_utils/__init__.py +3 -0
  48. abstract_utilities/ssh_utils/classes.py +127 -0
  49. abstract_utilities/ssh_utils/imports.py +10 -0
  50. abstract_utilities/ssh_utils/pexpect_utils.py +315 -0
  51. abstract_utilities/ssh_utils/utils.py +188 -0
  52. abstract_utilities/string_utils.py +12 -0
  53. abstract_utilities/type_utils.py +0 -1
  54. {abstract_utilities-0.2.2.450.dist-info → abstract_utilities-0.2.2.451.dist-info}/METADATA +5 -3
  55. abstract_utilities-0.2.2.451.dist-info/RECORD +84 -0
  56. abstract_utilities-0.2.2.450.dist-info/RECORD +0 -49
  57. {abstract_utilities-0.2.2.450.dist-info → abstract_utilities-0.2.2.451.dist-info}/WHEEL +0 -0
  58. {abstract_utilities-0.2.2.450.dist-info → abstract_utilities-0.2.2.451.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,153 @@
1
+ from .imports import *
2
+ from .filter_params import define_defaults, ensure_patterns, ensure_exts
3
+ from .file_filters import enumerate_source_files
4
+
5
+
6
+ def check_path_type(
7
+ path: str,
8
+ user: Optional[str] = None,
9
+ host: Optional[str] = None,
10
+ user_as_host: Optional[str] = None,
11
+ use_shell: bool = False
12
+ ) -> Literal["file", "directory", "missing", "unknown"]:
13
+ """
14
+ Determine whether a given path is a file, directory, or missing.
15
+ Works locally or remotely (via SSH).
16
+
17
+ Args:
18
+ path: The path to check.
19
+ user, host, user_as_host: SSH parameters if remote.
20
+ use_shell: Force shell test instead of Python os.path.
21
+ Returns:
22
+ One of: 'file', 'directory', 'missing', or 'unknown'
23
+ """
24
+
25
+ # --- remote check if user/host is given ---
26
+ if user_as_host or (user and host):
27
+ remote_target = user_as_host or f"{user}@{host}"
28
+ cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
29
+ try:
30
+ result = subprocess.check_output(
31
+ ["ssh", remote_target, cmd],
32
+ stderr=subprocess.DEVNULL,
33
+ text=True,
34
+ timeout=5
35
+ ).strip()
36
+ return result if result in ("file", "directory", "missing") else "unknown"
37
+ except Exception:
38
+ return "unknown"
39
+
40
+ # --- local check ---
41
+ if not use_shell:
42
+ if os.path.isfile(path):
43
+ return "file"
44
+ elif os.path.isdir(path):
45
+ return "directory"
46
+ elif not os.path.exists(path):
47
+ return "missing"
48
+ return "unknown"
49
+ else:
50
+ # fallback using shell tests (useful for sandboxed contexts)
51
+ cmd = f"if [ -f '{path}' ]; then echo file; elif [ -d '{path}' ]; then echo directory; else echo missing; fi"
52
+ try:
53
+ output = subprocess.check_output(
54
+ cmd, shell=True, stderr=subprocess.DEVNULL, text=True
55
+ ).strip()
56
+ return output if output in ("file", "directory", "missing") else "unknown"
57
+ except Exception:
58
+ return "unknown"
59
+
60
+
61
+
62
+
63
+ def get_find_cmd(
64
+ directory: str,
65
+ *,
66
+ mindepth: Optional[int] = None,
67
+ maxdepth: Optional[int] = None,
68
+ depth: Optional[int] = None,
69
+ file_type: Optional[str] = None, # 'f' or 'd'
70
+ name: Optional[str] = None,
71
+ size: Optional[str] = None,
72
+ mtime: Optional[str] = None,
73
+ perm: Optional[str] = None,
74
+ user: Optional[str] = None,
75
+ ) -> str:
76
+ """Constructs a Unix `find` command string from keyword args."""
77
+ cmd = [f"find {directory}"]
78
+
79
+ if depth is not None:
80
+ cmd += [f"-mindepth {depth}", f"-maxdepth {depth}"]
81
+ else:
82
+ if mindepth is not None:
83
+ cmd.append(f"-mindepth {mindepth}")
84
+ if maxdepth is not None:
85
+ cmd.append(f"-maxdepth {maxdepth}")
86
+
87
+ if file_type in ("f", "d"):
88
+ cmd.append(f"-type {file_type}")
89
+ if name:
90
+ cmd.append(f"-name '{name}'")
91
+ if size:
92
+ cmd.append(f"-size {size}")
93
+ if mtime:
94
+ cmd.append(f"-mtime {mtime}")
95
+ if perm:
96
+ cmd.append(f"-perm {perm}")
97
+ if user:
98
+ cmd.append(f"-user {user}")
99
+
100
+ return " ".join(cmd)
101
+
102
+
103
+ def collect_globs(
104
+ directory: str,
105
+ cfg: Optional["ScanConfig"] = None,
106
+ *,
107
+ exts: Optional[Set[str]] = None,
108
+ patterns: Optional[List[str]] = None,
109
+ mindepth: Optional[int] = None,
110
+ maxdepth: Optional[int] = None,
111
+ depth: Optional[int] = None,
112
+ file_type: Optional[str] = None,
113
+ user_at_host: Optional[str] = None,
114
+ add: bool = False,
115
+ **kwargs
116
+ ) -> List[str]:
117
+ """
118
+ Collect file or directory paths using either:
119
+ - local recursive logic (rglob)
120
+ - or remote shell call (find via run_cmd)
121
+ """
122
+ cfg = cfg or define_defaults(add=add)
123
+ directory = str(directory)
124
+ exts = ensure_exts(exts)
125
+ patterns = ensure_patterns(patterns)
126
+
127
+ # Remote path via SSH
128
+ if user_at_host:
129
+ find_cmd = get_find_cmd(
130
+ directory,
131
+ mindepth=mindepth,
132
+ maxdepth=maxdepth,
133
+ depth=depth,
134
+ file_type=file_type,
135
+ **{k: v for k, v in kwargs.items() if v},
136
+ )
137
+ return run_cmd(find_cmd, user_at_host=user_at_host)
138
+
139
+ # Local path (Python-native walk)
140
+ root = Path(directory)
141
+ results = []
142
+ for p in root.rglob("*"):
143
+ if file_type == "f" and not p.is_file():
144
+ continue
145
+ if file_type == "d" and not p.is_dir():
146
+ continue
147
+ if exts and p.suffix.lower() not in exts:
148
+ continue
149
+ if patterns and not any(p.match(pat) for pat in patterns):
150
+ continue
151
+ results.append(str(p.resolve()))
152
+
153
+ return sorted(results)
@@ -0,0 +1 @@
1
+ from ..imports import *
@@ -0,0 +1,29 @@
1
+ from .imports import MIME_TYPES,make_list,os
2
+ from .file_filters import get_globs
3
+ def get_file_type(file_path,types=None,default=None):
4
+ mime_types = {}
5
+ if types:
6
+ types = make_list(types)
7
+ for typ in types:
8
+ mime_types[typ] = MIME_TYPES.get(typ)
9
+ else:
10
+ mime_types = MIME_TYPES
11
+
12
+ if os.path.isfile(file_path):
13
+ basename = os.path.basename(file_path)
14
+ filename,ext = os.path.splitext(basename)
15
+ for file_type,ext_values in mime_types.items():
16
+ if ext in ext_values:
17
+ return file_type
18
+ def get_file_map(directory,types=None,default=None):
19
+ if directory and os.path.isfile(directory):
20
+ directory = os.path.dirname(directory)
21
+ all_types = {}
22
+ files = get_globs(directory)
23
+ for file in files:
24
+ file_type = get_file_type(file,types=types,default=default)
25
+ if file_type:
26
+ if file_type not in all_types:
27
+ all_types[file_type] = []
28
+ all_types[file_type].append(file)
29
+ return all_types
@@ -1,12 +1,4 @@
1
- import PyPDF2
2
- from typing import *
3
- from pdf2image import convert_from_path
4
- from abstract_utilities.path_utils import (is_file, mkdirs, get_directory,
5
- get_base_name, split_text,
6
- get_ext, get_file_name)
7
- from abstract_utilities.type_utils import is_str
8
- from abstract_utilities.cmd_utils import cmd_input
9
- from abstract_utilities.read_write_utils import write_to_file
1
+ from .imports import *
10
2
  def if_none_return(obj: object, obj_2: object) -> object:
11
3
  """
12
4
  Return obj if obj_2 is None, otherwise return obj_2.
@@ -0,0 +1,5 @@
1
+ from .constants import *
2
+ from .imports import *
3
+ from .module_imports import *
4
+ from .classes import *
5
+ from .file_functions import *
@@ -0,0 +1,381 @@
1
+ from .imports import *
2
+ from .module_imports import *
3
+ from .constants import *
4
+ def get_item_check_cmd(path, file=True, directory=False, exists=False):
5
+ if (directory and file) or exists:
6
+ typ = "e"
7
+ elif file:
8
+ typ = "f"
9
+ elif directory:
10
+ typ = "d"
11
+ elif isinstance(file, str):
12
+ if "f" in file:
13
+ typ = "f"
14
+ elif "d" in file:
15
+ typ = "d"
16
+ else:
17
+ typ = "e"
18
+ else:
19
+ typ = "e"
20
+ return f"test -{typ} {shlex.quote(path)} && echo __OK__ || true"
21
+
22
+
23
+ def get_all_item_check_cmd(path, file=True, directory=True, exists=True):
24
+ collects = []
25
+ out_js = {}
26
+
27
+ if file:
28
+ collects.append("file")
29
+ if directory:
30
+ collects.append("dir")
31
+ if exists:
32
+ collects.append("exists")
33
+
34
+ if not collects:
35
+ return out_js
36
+
37
+ path = shlex.quote(path)
38
+ for typ in collects:
39
+ t = typ[0] # f, d, or e
40
+ out_js[typ] = f"test -{t} {path} && echo __OK__ || true"
41
+
42
+ return out_js
43
+
44
+
45
+ def is_file(
46
+ path,
47
+ user_at_host=None,
48
+ password=None,
49
+ key=None,
50
+ env_path=None,
51
+ **kwargs
52
+ ):
53
+ contingencies = list(set([user_at_host,password,key,env_path]))
54
+ len_contingencies = len(contingencies)
55
+ is_potential = (len_contingencies >1 or (None not in contingencies))
56
+ if not is_potential:
57
+ return os.path.isfile(path)
58
+ cmd = get_item_check_cmd(path,file=True)
59
+ return run_cmd(cmd=cmd,
60
+ user_at_host=user_at_host,
61
+ password=password,
62
+ key=key,
63
+ env_path=env_path,
64
+ **kwargs
65
+ )
66
+ def is_dir(
67
+ path,
68
+ user_at_host=None,
69
+ password=None,
70
+ key=None,
71
+ env_path=None,
72
+ **kwargs
73
+ ):
74
+ contingencies = list(set([user_at_host,password,key,env_path]))
75
+ len_contingencies = len(contingencies)
76
+ is_potential = (len_contingencies >1 or (None not in contingencies))
77
+ if not is_potential:
78
+ return os.path.isdir(path)
79
+ cmd = get_item_check_cmd(path,file=False,directory=True)
80
+ return run_cmd(cmd=cmd,
81
+ user_at_host=user_at_host,
82
+ password=password,
83
+ key=key,
84
+ env_path=env_path,
85
+ **kwargs
86
+ )
87
+ def is_exists(
88
+ path,
89
+ user_at_host=None,
90
+ password=None,
91
+ key=None,
92
+ env_path=None,
93
+ **kwargs
94
+ ):
95
+ contingencies = list(set([user_at_host,password,key,env_path]))
96
+ len_contingencies = len(contingencies)
97
+ is_potential = (len_contingencies >1 or (None not in contingencies))
98
+ if not is_potential:
99
+ return os.path.exists(path)
100
+ if is_potential == True:
101
+ cmd = get_item_check_cmd(path,exists=True)
102
+ return run_cmd(cmd=cmd,
103
+ user_at_host=user_at_host,
104
+ password=password,
105
+ key=key,
106
+ env_path=env_path,
107
+ **kwargs
108
+ )
109
+ def is_any(
110
+ path,
111
+ user_at_host=None,
112
+ password=None,
113
+ key=None,
114
+ env_path=None,
115
+ **kwargs
116
+ ):
117
+ contingencies = list(set([user_at_host,password,key,env_path]))
118
+ len_contingencies = len(contingencies)
119
+ is_potential = (len_contingencies >1 or (None not in contingencies))
120
+ if not is_potential:
121
+ return os.path.exists(path)
122
+ if is_potential == True:
123
+ out_js = get_all_item_check_cmd(path,file=True,directory=True,exists=True)
124
+ for typ,cmd in out_js.items():
125
+ response = run_cmd(cmd=cmd,
126
+ user_at_host=user_at_host,
127
+ password=password,
128
+ key=key,
129
+ env_path=env_path,
130
+ **kwargs
131
+ )
132
+ result = "__OK__" in (response or "")
133
+ if result:
134
+ return typ
135
+ return None
136
+ class PathBackend(Protocol):
137
+ def join(self, *parts: str) -> str: ...
138
+ def isfile(self, path: str) -> bool: ...
139
+ def isdir(self, path: str) -> bool: ...
140
+ def glob_recursive(self, base: str, **opts) -> List[str]: ...
141
+ def listdir(self, base: str) -> List[str]: ...
142
+
143
+ class LocalFS:
144
+ def __init__(self, get_type=False, get_is_dir=False, get_is_file=False, get_is_exists=False, **kwargs):
145
+ self.get_type = get_type
146
+ self.get_is_dir = get_is_dir
147
+ self.get_is_file = get_is_file
148
+ self.get_is_exists = get_is_exists
149
+
150
+ def join(self, *parts: str) -> str:
151
+ return os.path.join(*parts)
152
+
153
+ def isfile(self, path: str) -> bool:
154
+ return os.path.isfile(path)
155
+
156
+ def isdir(self, path: str) -> bool:
157
+ return os.path.isdir(path)
158
+
159
+ def isexists(self, path: str) -> bool:
160
+ return os.path.exists(path)
161
+
162
+ def istype(self, path: str) -> str | None:
163
+ funcs_js = {"file": os.path.isfile, "dir": os.path.isdir, "exists": os.path.exists}
164
+ for key, func in funcs_js.items():
165
+ if func(path):
166
+ return key
167
+ return None
168
+
169
+ def is_included(self, path, **kwargs):
170
+ include_js = {}
171
+ if self.get_type:
172
+ include_js["typ"] = self.istype(path)
173
+ if self.get_is_dir:
174
+ include_js["dir"] = self.isdir(path)
175
+ if self.get_is_file:
176
+ include_js["file"] = self.isfile(path)
177
+ if self.get_is_exists:
178
+ include_js["exists"] = self.isexists(path)
179
+ return include_js
180
+ def glob_recursive(self, base: str, **opts) -> List[str]:
181
+ """
182
+ opts:
183
+ - maxdepth: int | None
184
+ - mindepth: int (default 1)
185
+ - follow_symlinks: bool
186
+ - include_dirs: bool
187
+ - include_files: bool
188
+ - exclude_hidden: bool
189
+ """
190
+ maxdepth = opts.get("maxdepth")
191
+ mindepth = opts.get("mindepth", 1)
192
+ follow = opts.get("follow_symlinks", False)
193
+ want_d = opts.get("include_dirs", True)
194
+ want_f = opts.get("include_files", True)
195
+ hide = opts.get("exclude_hidden", False)
196
+
197
+ results: List[str] = []
198
+ base_depth = os.path.normpath(base).count(os.sep)
199
+
200
+ for root, dirs, files in os.walk(base, followlinks=follow):
201
+ depth = os.path.normpath(root).count(os.sep) - base_depth
202
+ if maxdepth is not None and depth > maxdepth:
203
+ dirs[:] = []
204
+ continue
205
+ if want_d and depth >= mindepth:
206
+ for d in dirs:
207
+ if hide and d.startswith("."): continue
208
+ results.append(os.path.join(root, d))
209
+ if want_f and depth >= mindepth:
210
+ for f in files:
211
+ if hide and f.startswith("."): continue
212
+ results.append(os.path.join(root, f))
213
+ return results
214
+
215
+ def listdir(self, base: str) -> List[str]:
216
+ try:
217
+ return [os.path.join(base, name) for name in os.listdir(base)]
218
+ except Exception:
219
+ return []
220
+ def get_spec_kwargs(
221
+ user_at_host=None,
222
+ password=None,
223
+ key=None,
224
+ env_path=None,
225
+ kwargs=None
226
+ ):
227
+ kwargs = kwargs or {}
228
+ kwargs["user_at_host"] = kwargs.get("user_at_host") or user_at_host
229
+ kwargs["password"] = kwargs.get("password") or password
230
+ kwargs["key"] = kwargs.get("key") or key
231
+ kwargs["env_path"] = kwargs.get("env_path") or env_path
232
+ return kwargs
233
+ class SSHFS:
234
+ """Remote POSIX backend via run_remote_cmd."""
235
+ def __init__(self, password=None, key=None, env_path=None,
236
+ get_type=False, get_is_dir=False, get_is_file=False, get_is_exists=False, **kwargs):
237
+ self.user_at_host = kwargs.get('user_at_host') or kwargs.get('user') or kwargs.get('host')
238
+ self.password = password
239
+ self.key = key
240
+ self.env_path = env_path
241
+ self.get_type = get_type
242
+ self.get_is_dir = get_is_dir
243
+ self.get_is_file = get_is_file
244
+ self.get_is_exists = get_is_exists
245
+
246
+ def cell_spec_kwargs(self, func, path, **kwargs):
247
+ kwargs = get_spec_kwargs(
248
+ user_at_host=self.user_at_host,
249
+ password=self.password,
250
+ key=self.key,
251
+ env_path=self.env_path,
252
+ kwargs=kwargs
253
+ )
254
+ return func(path, **kwargs)
255
+
256
+ def is_included(self, path, **kwargs):
257
+ include_js = {}
258
+ if self.get_type:
259
+ include_js["typ"] = self.istype(path, **kwargs)
260
+ if self.get_is_dir:
261
+ include_js["dir"] = self.isdir(path, **kwargs)
262
+ if self.get_is_file:
263
+ include_js["file"] = self.isfile(path, **kwargs)
264
+ if self.get_is_exists:
265
+ include_js["exists"] = self.isexists(path, **kwargs)
266
+ return include_js
267
+
268
+ def join(self, *parts: str) -> str:
269
+ return posixpath.join(*parts)
270
+
271
+ def isfile(self, path: str, **kwargs) -> bool:
272
+ out = self.cell_spec_kwargs(is_file, path, **kwargs)
273
+ return "__OK__" in (out or "")
274
+
275
+ def isdir(self, path: str, **kwargs) -> bool:
276
+ out = self.cell_spec_kwargs(is_dir, path, **kwargs)
277
+ return "__OK__" in (out or "")
278
+
279
+ def isexists(self, path: str, **kwargs) -> bool:
280
+ out = self.cell_spec_kwargs(is_exists, path, **kwargs)
281
+ return "__OK__" in (out or "")
282
+
283
+ def istype(self, path: str, **kwargs) -> str | None:
284
+ out = self.cell_spec_kwargs(is_any, path, **kwargs)
285
+ return out
286
+
287
+ def glob_recursive(self, base: str, **opts) -> List[str]:
288
+ maxdepth = opts.get("maxdepth")
289
+ mindepth = opts.get("mindepth", 1)
290
+ follow = opts.get("follow_symlinks", False)
291
+ want_d = opts.get("include_dirs", True)
292
+ want_f = opts.get("include_files", True)
293
+ hide = opts.get("exclude_hidden", False)
294
+
295
+ parts = []
296
+ if follow:
297
+ parts.append("-L")
298
+ parts += ["find", shlex.quote(base)]
299
+ if mindepth is not None:
300
+ parts += ["-mindepth", str(mindepth)]
301
+ if maxdepth is not None:
302
+ parts += ["-maxdepth", str(maxdepth)]
303
+
304
+ type_filters = []
305
+ if want_d and not want_f:
306
+ type_filters = ["-type", "d"]
307
+ elif want_f and not want_d:
308
+ type_filters = ["-type", "f"]
309
+
310
+ hidden_filter = []
311
+ if hide:
312
+ hidden_filter = ["!", "-regex", r".*/\..*"]
313
+
314
+ cmd = " ".join(parts + type_filters + hidden_filter + ["-printf", r"'%p\n'"]) + " 2>/dev/null"
315
+ out = run_remote_cmd(self.user_at_host, cmd)
316
+ return [line.strip().strip("'") for line in (out or "").splitlines() if line.strip()]
317
+
318
+ def listdir(self, base: str) -> List[str]:
319
+ cmd = f"find {shlex.quote(base)} -maxdepth 1 -mindepth 1 -printf '%p\\n' 2>/dev/null"
320
+ out = run_remote_cmd(self.user_at_host, cmd)
321
+ return [line.strip() for line in (out or "").splitlines() if line.strip()]
322
+
323
+
324
+
325
+ def try_group(pre,item,strings):
326
+
327
+ try:
328
+ m = pre.match(item)
329
+ for i,string in enumerate(strings):
330
+ strings[i] = m.group(string)
331
+
332
+ except:
333
+ return None
334
+ return strings
335
+ def normalize_items(
336
+ paths: Iterable[str],
337
+ user_at_host=None,
338
+ get_type=True,
339
+ get_is_dir=False,
340
+ get_is_file=False,
341
+ get_is_exists=False,
342
+ **kwargs
343
+ ) -> List[tuple[PathBackend, str, dict]]:
344
+ pairs: List[tuple[PathBackend, str, dict]] = []
345
+ host = user_at_host or kwargs.get("host") or kwargs.get("user")
346
+ paths = make_list(paths)
347
+ for item in paths:
348
+ if not item:
349
+ continue
350
+
351
+ strings = try_group(REMOTE_RE, item, ["host", "path"])
352
+ fs_host = None
353
+ nuhost = None
354
+
355
+ if (strings and None not in strings) or host:
356
+ if strings and None not in strings:
357
+ nuhost = strings[0]
358
+ item = strings[1] or item
359
+ nuhost = nuhost or host
360
+ fs_host = SSHFS(
361
+ nuhost,
362
+ user_at_host=user_at_host,
363
+ get_type=get_type,
364
+ get_is_dir=get_is_dir,
365
+ get_is_file=get_is_file,
366
+ get_is_exists=get_is_exists,
367
+ **kwargs
368
+ )
369
+ else:
370
+ fs_host = LocalFS(
371
+ get_type=get_type,
372
+ get_is_dir=get_is_dir,
373
+ get_is_file=get_is_file,
374
+ get_is_exists=get_is_exists
375
+ )
376
+
377
+ includes = fs_host.is_included(item)
378
+ pairs.append((fs_host, item, includes))
379
+ return pairs
380
+
381
+
@@ -0,0 +1,39 @@
1
+ from .imports import *
2
+ from .module_imports import *
3
+ @dataclass
4
+ class ScanConfig:
5
+ allowed_exts: Set[str]
6
+ unallowed_exts: Set[str]
7
+ exclude_types: Set[str]
8
+ exclude_dirs: List[str] = field(default_factory=list)
9
+ exclude_patterns: List[str] = field(default_factory=list)
10
+ DEFAULT_ALLOWED_EXTS: Set[str] = {
11
+ ".py", ".pyw", # python
12
+ ".js", ".jsx", ".ts", ".tsx", ".mjs", # JS/TS
13
+ ".html", ".htm", ".xml", # markup
14
+ ".css", ".scss", ".sass", ".less", # styles
15
+ ".json", ".yaml", ".yml", ".toml", ".ini", # configs
16
+ ".cfg", ".md", ".markdown", ".rst", # docs
17
+ ".sh", ".bash", ".env", # scripts/env
18
+ ".txt" # plain text
19
+ }
20
+
21
+ DEFAULT_EXCLUDE_TYPES: Set[str] = {
22
+ "image", "video", "audio", "presentation",
23
+ "spreadsheet", "archive", "executable"
24
+ }
25
+
26
+ # never want these—even if they sneak into ALLOWED
27
+ _unallowed = set(get_media_exts(DEFAULT_EXCLUDE_TYPES)) | {'.bak', '.shp', '.cpg', '.dbf', '.shx','.geojson',".pyc",'.shx','.geojson','.prj','.sbn','.sbx'}
28
+ DEFAULT_UNALLOWED_EXTS = {e for e in _unallowed if e not in DEFAULT_ALLOWED_EXTS}
29
+
30
+ DEFAULT_EXCLUDE_DIRS: Set[str] = {
31
+ "node_modules", "old","__pycache__", "backups", "backup", "backs", "trash", "depriciated", "old", "__init__"
32
+ }
33
+
34
+ DEFAULT_EXCLUDE_PATTERNS: Set[str] = {
35
+ "__init__*", "*.tmp", "*.log", "*.lock", "*.zip","*~"
36
+ }
37
+ REMOTE_RE = re.compile(r"^(?P<host>[^:\s]+@[^:\s]+):(?P<path>/.*)$")
38
+ AllowedPredicate = Optional[Callable[[str], bool]]
39
+ DEFAULT_EXCLUDE_FILE_PATTERNS=DEFAULT_EXCLUDE_PATTERNS
@@ -0,0 +1,10 @@
1
+ from .imports import *
2
+ def get_caller_path():
3
+ i = i or 1
4
+ frame = inspect.stack()[i]
5
+ return os.path.abspath(frame.filename)
6
+ def get_caller_dir(i=None):
7
+ i = i or 1
8
+ frame = inspect.stack()[i]
9
+ abspath = os.path.abspath(frame.filename)
10
+ return os.path.dirname(abspath)
@@ -0,0 +1,13 @@
1
+ import pandas as pd
2
+ from typing import *
3
+ import geopandas as gpd
4
+ from pathlib import Path
5
+ from types import ModuleType
6
+ from datetime import datetime
7
+ from pdf2image import convert_from_path
8
+ from dataclasses import dataclass, field
9
+ from werkzeug.utils import secure_filename
10
+ from werkzeug.datastructures import FileStorage
11
+ import fnmatch, fnmatch,shlex, os, glob, platform, textwrap, pkgutil,time
12
+ import tempfile,shutil,logging,ezodf,fnmatch,pytesseract,pdfplumber,re
13
+ import textwrap, sys, types, importlib, importlib.util, inspect,PyPDF2
@@ -0,0 +1,10 @@
1
+ from ...string_clean import eatAll
2
+ from ...list_utils import make_list
3
+ from ...type_utils import get_media_exts, is_media_type,MIME_TYPES,is_str
4
+ from ...ssh_utils import *
5
+ from ...env_utils import *
6
+ from ...read_write_utils import *
7
+ from ...abstract_classes import SingletonMeta
8
+ from ...log_utils import get_logFile
9
+ from ...class_utils import get_caller,get_caller_path,get_caller_dir
10
+ from ...ssh_utils import run_cmd