abstract-utilities 0.2.2.513__py3-none-any.whl → 0.2.2.583__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_utilities/class_utils/caller_utils.py +18 -0
- abstract_utilities/class_utils/global_utils.py +3 -2
- abstract_utilities/class_utils/imports/imports.py +1 -1
- abstract_utilities/directory_utils/__init__.py +2 -4
- abstract_utilities/directory_utils/imports/__init__.py +2 -0
- abstract_utilities/directory_utils/imports/imports.py +1 -0
- abstract_utilities/directory_utils/imports/module_imports.py +2 -0
- abstract_utilities/directory_utils/src/__init__.py +4 -0
- abstract_utilities/directory_utils/src/directory_utils.py +108 -0
- abstract_utilities/directory_utils/src/name_utils.py +43 -0
- abstract_utilities/directory_utils/src/size_utils.py +57 -0
- abstract_utilities/directory_utils/src/utils.py +116 -0
- abstract_utilities/file_utils/imports/constants.py +81 -7
- abstract_utilities/file_utils/imports/imports.py +0 -4
- abstract_utilities/file_utils/imports/module_imports.py +1 -1
- abstract_utilities/file_utils/src/__init__.py +2 -4
- abstract_utilities/file_utils/src/file_filters/__init__.py +4 -0
- abstract_utilities/file_utils/src/file_filters/ensure_utils.py +116 -0
- abstract_utilities/file_utils/src/file_filters/filter_params.py +86 -0
- abstract_utilities/file_utils/src/file_filters/filter_utils.py +78 -0
- abstract_utilities/file_utils/src/file_filters/predicate_utils.py +114 -0
- abstract_utilities/file_utils/src/file_filters.py +114 -47
- abstract_utilities/file_utils/src/file_reader.py +0 -64
- abstract_utilities/file_utils/src/file_utils.py +7 -130
- abstract_utilities/file_utils/src/filter_params.py +128 -86
- abstract_utilities/file_utils/src/find_collect.py +85 -165
- abstract_utilities/file_utils/src/find_content.py +210 -0
- abstract_utilities/file_utils/src/initFunctionsGen.py +3 -9
- abstract_utilities/file_utils/src/reader_utils/__init__.py +4 -0
- abstract_utilities/file_utils/src/reader_utils/directory_reader.py +53 -0
- abstract_utilities/file_utils/src/reader_utils/file_reader.py +543 -0
- abstract_utilities/file_utils/src/reader_utils/file_readers.py +376 -0
- abstract_utilities/file_utils/src/reader_utils/imports.py +18 -0
- abstract_utilities/file_utils/src/reader_utils/pdf_utils.py +300 -0
- abstract_utilities/file_utils (2)/__init__.py +2 -0
- abstract_utilities/file_utils (2)/imports/__init__.py +2 -0
- abstract_utilities/file_utils (2)/imports/constants.py +118 -0
- abstract_utilities/file_utils (2)/imports/imports/__init__.py +3 -0
- abstract_utilities/file_utils (2)/imports/imports/constants.py +119 -0
- abstract_utilities/file_utils (2)/imports/imports/imports.py +46 -0
- abstract_utilities/file_utils (2)/imports/imports/module_imports.py +8 -0
- abstract_utilities/file_utils (2)/imports/utils/__init__.py +3 -0
- abstract_utilities/file_utils (2)/imports/utils/classes.py +379 -0
- abstract_utilities/file_utils (2)/imports/utils/clean_imps.py +155 -0
- abstract_utilities/file_utils (2)/imports/utils/filter_utils.py +341 -0
- abstract_utilities/file_utils (2)/src/__init__.py +8 -0
- abstract_utilities/file_utils (2)/src/file_filters.py +155 -0
- abstract_utilities/file_utils (2)/src/file_reader.py +604 -0
- abstract_utilities/file_utils (2)/src/find_collect.py +258 -0
- abstract_utilities/file_utils (2)/src/initFunctionsGen.py +286 -0
- abstract_utilities/file_utils (2)/src/map_utils.py +28 -0
- abstract_utilities/file_utils (2)/src/pdf_utils.py +300 -0
- abstract_utilities/import_utils/circular_import_finder.py +222 -0
- abstract_utilities/import_utils/circular_import_finder2.py +118 -0
- abstract_utilities/import_utils/imports/module_imports.py +3 -1
- abstract_utilities/import_utils/src/clean_imports.py +156 -25
- abstract_utilities/import_utils/src/dot_utils.py +11 -0
- abstract_utilities/import_utils/src/extract_utils.py +4 -0
- abstract_utilities/import_utils/src/import_functions.py +47 -2
- abstract_utilities/import_utils/src/pkg_utils.py +58 -4
- abstract_utilities/import_utils/src/sysroot_utils.py +56 -1
- abstract_utilities/log_utils/log_file.py +3 -2
- abstract_utilities/path_utils/path_utils.py +25 -23
- abstract_utilities/safe_utils/safe_utils.py +30 -0
- {abstract_utilities-0.2.2.513.dist-info → abstract_utilities-0.2.2.583.dist-info}/METADATA +1 -1
- {abstract_utilities-0.2.2.513.dist-info → abstract_utilities-0.2.2.583.dist-info}/RECORD +68 -28
- {abstract_utilities-0.2.2.513.dist-info → abstract_utilities-0.2.2.583.dist-info}/WHEEL +0 -0
- {abstract_utilities-0.2.2.513.dist-info → abstract_utilities-0.2.2.583.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
from .module_imports import *
|
|
3
|
+
# -------------------------
|
|
4
|
+
# Config dataclass
|
|
5
|
+
# -------------------------
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class ScanConfig:
|
|
9
|
+
allowed_exts: Set[str]
|
|
10
|
+
exclude_exts: Set[str]
|
|
11
|
+
allowed_types: Set[str]
|
|
12
|
+
exclude_types: Set[str]
|
|
13
|
+
allowed_dirs: List[str] = field(default_factory=list)
|
|
14
|
+
exclude_dirs: List[str] = field(default_factory=list)
|
|
15
|
+
allowed_patterns: List[str] = field(default_factory=list)
|
|
16
|
+
exclude_patterns: List[str] = field(default_factory=list)
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class SearchParams(ScanConfig):
|
|
20
|
+
directories: List[str] = field(default_factory=list)
|
|
21
|
+
add: bool = False
|
|
22
|
+
recursive: bool = True
|
|
23
|
+
strings: List[str] = field(default_factory=list)
|
|
24
|
+
total_strings: bool = False
|
|
25
|
+
parse_lines: bool = False
|
|
26
|
+
spec_line: Union[bool, int] = False
|
|
27
|
+
get_lines: bool = False
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class AllParams(SearchParams):
|
|
31
|
+
cfg = None
|
|
32
|
+
allowed: Optional[Callable[[str], bool]] = None
|
|
33
|
+
include_files: bool = True
|
|
34
|
+
recursive: bool = True
|
|
35
|
+
# -------------------------
|
|
36
|
+
# Default sets
|
|
37
|
+
# -------------------------
|
|
38
|
+
DEFAULT_ALLOWED_EXTS: Set[str] = {
|
|
39
|
+
".py", ".pyw", # python
|
|
40
|
+
".js", ".jsx", ".ts", ".tsx", ".mjs", # JS/TS
|
|
41
|
+
".html", ".htm", ".xml", # markup
|
|
42
|
+
".css", ".scss", ".sass", ".less", # styles
|
|
43
|
+
".json", ".yaml", ".yml", ".toml", ".ini", # configs
|
|
44
|
+
".cfg", ".md", ".markdown", ".rst", # docs
|
|
45
|
+
".sh", ".bash", ".env", # scripts/env
|
|
46
|
+
".txt" # plain text
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
DEFAULT_EXCLUDE_TYPES: Set[str] = {
|
|
50
|
+
"image", "video", "audio", "presentation",
|
|
51
|
+
"spreadsheet", "archive", "executable"
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# never want these—even if they sneak into ALLOWED
|
|
55
|
+
_unallowed = set(get_media_exts(DEFAULT_EXCLUDE_TYPES)) | {
|
|
56
|
+
".bak", ".shp", ".cpg", ".dbf", ".shx", ".geojson",
|
|
57
|
+
".pyc", ".prj", ".sbn", ".sbx"
|
|
58
|
+
}
|
|
59
|
+
DEFAULT_EXCLUDE_EXTS = {e.split('.')[-1] for e in _unallowed if e not in DEFAULT_ALLOWED_EXTS}
|
|
60
|
+
|
|
61
|
+
DEFAULT_EXCLUDE_DIRS: Set[str] = {
|
|
62
|
+
"node_modules", "old","__pycache__", "backups", "backup",
|
|
63
|
+
"backs", "trash", "depriciated", "old", "__init__"
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
DEFAULT_EXCLUDE_PATTERNS: Set[str] = {
|
|
67
|
+
"__init__*", "*.tmp", "*.log", "*.lock", "*.zip","*~"
|
|
68
|
+
}
|
|
69
|
+
REMOTE_RE = re.compile(r"^(?P<host>[^:\s]+@[^:\s]+):(?P<path>/.*)$")
|
|
70
|
+
AllowedPredicate = Optional[Callable[[str], bool]]
|
|
71
|
+
DEFAULT_EXCLUDE_FILE_PATTERNS=DEFAULT_EXCLUDE_PATTERNS
|
|
72
|
+
DEFAULT_ALLOWED_PATTERNS: List[str] = ["*"]
|
|
73
|
+
DEFAULT_ALLOWED_DIRS: List[str] = ["*"]
|
|
74
|
+
DEFAULT_ALLOWED_TYPES: List[str] = ["*"]
|
|
75
|
+
CANONICAL_MAP = {
|
|
76
|
+
"directories": ["directory", "directories", "dir","dirs","directory","directories","d","dirname", "paths", "path","roots","root"],
|
|
77
|
+
"files":["file","filepath","file_path","files","filepaths","file_paths","paths", "path","f"],
|
|
78
|
+
"allowed_exts": ["allow_ext", "allowed_ext", "include_ext", "include_exts", "exts_allowed"],
|
|
79
|
+
"exclude_exts": ["exclude_ext", "excluded_ext", "excluded_exts", "unallowed_ext", "unallowed_exts"],
|
|
80
|
+
"allowed_types": ["allow_type", "allowed_type", "include_type", "include_types", "types_allowed"],
|
|
81
|
+
"exclude_types": ["exclude_type", "excluded_type", "excluded_types", "unallowed_type", "unallowed_types"],
|
|
82
|
+
"allowed_dirs": ["allow_dir", "allowed_dir", "include_dir", "include_dirs", "dirs_allowed"],
|
|
83
|
+
"exclude_dirs": ["exclude_dir", "excluded_dir", "excluded_dirs", "unallowed_dir", "unallowed_dirs"],
|
|
84
|
+
"allowed_patterns": ["allow_pattern", "allowed_pattern", "include_pattern", "include_patterns", "patterns_allowed"],
|
|
85
|
+
"exclude_patterns": ["exclude_pattern", "excluded_pattern", "excluded_patterns", "unallowed_pattern", "unallowed_patterns"],
|
|
86
|
+
"add":["add"],
|
|
87
|
+
"recursive":["recursive"],
|
|
88
|
+
"strings":["strings"],
|
|
89
|
+
"total_strings":["total_strings"],
|
|
90
|
+
"parse_lines":["parse_lines"],
|
|
91
|
+
"spec_line":["spec_line"],
|
|
92
|
+
"get_lines":["get_lines"]
|
|
93
|
+
}
|
|
94
|
+
DEFAULT_ALLOWED_EXCLUDE_MAP={
|
|
95
|
+
"allowed_exts": {"default":DEFAULT_ALLOWED_EXTS,"type":type(DEFAULT_ALLOWED_EXTS),"canonical":CANONICAL_MAP.get("allowed_exts")},
|
|
96
|
+
"exclude_exts": {"default":DEFAULT_EXCLUDE_EXTS,"type":type(DEFAULT_EXCLUDE_EXTS),"canonical":CANONICAL_MAP.get("exclude_exts")},
|
|
97
|
+
"allowed_types": {"default":DEFAULT_ALLOWED_TYPES,"type":type(DEFAULT_ALLOWED_TYPES),"canonical":CANONICAL_MAP.get("allowed_types")},
|
|
98
|
+
"exclude_types": {"default":DEFAULT_EXCLUDE_TYPES,"type":type(DEFAULT_EXCLUDE_TYPES),"canonical":CANONICAL_MAP.get("exclude_types")},
|
|
99
|
+
"allowed_dirs": {"default":DEFAULT_ALLOWED_DIRS,"type":type(DEFAULT_ALLOWED_DIRS),"canonical":CANONICAL_MAP.get("allowed_dirs")},
|
|
100
|
+
"exclude_dirs": {"default":DEFAULT_EXCLUDE_DIRS,"type":type(DEFAULT_EXCLUDE_DIRS),"canonical":CANONICAL_MAP.get("exclude_dirs")},
|
|
101
|
+
"allowed_patterns": {"default":DEFAULT_ALLOWED_PATTERNS,"type":type(DEFAULT_ALLOWED_PATTERNS),"canonical":CANONICAL_MAP.get("allowed_patterns")},
|
|
102
|
+
"exclude_patterns": {"default":DEFAULT_EXCLUDE_PATTERNS,"type":type(DEFAULT_EXCLUDE_PATTERNS),"canonical":CANONICAL_MAP.get("exclude_patterns")},
|
|
103
|
+
}
|
|
104
|
+
DEFAULT_CANONICAL_MAP={
|
|
105
|
+
"directories":{"default":[],"type":list,"canonical":CANONICAL_MAP.get("directories")},
|
|
106
|
+
"files":{"default":[],"type":list,"canonical":CANONICAL_MAP.get("files")},
|
|
107
|
+
**DEFAULT_ALLOWED_EXCLUDE_MAP,
|
|
108
|
+
"allowed":{"default":None,"type":bool,"canonical":CANONICAL_MAP.get("allowed")},
|
|
109
|
+
"add":{"default":False,"type":bool,"canonical":CANONICAL_MAP.get("add")},
|
|
110
|
+
"recursive":{"default":True,"type":bool,"canonical":CANONICAL_MAP.get("recursive")},
|
|
111
|
+
"strings":{"default":None,"type":list,"canonical":CANONICAL_MAP.get("strings")},
|
|
112
|
+
"total_strings":{"default":False,"type":bool,"canonical":CANONICAL_MAP.get("total_strings")},
|
|
113
|
+
"parse_lines":{"default":False,"type":bool,"canonical":CANONICAL_MAP.get("parse_lines")},
|
|
114
|
+
"spec_line":{"default":False,"type":bool,"canonical":CANONICAL_MAP.get("spec_line")},
|
|
115
|
+
"get_lines":{"default":False,"type":bool,"canonical":CANONICAL_MAP.get("get_lines")},
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from .imports import *
|
|
2
|
+
from .module_imports import *
|
|
3
|
+
# -------------------------
|
|
4
|
+
# Config dataclass
|
|
5
|
+
# -------------------------
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class ScanConfig:
|
|
9
|
+
allowed_exts: Set[str]
|
|
10
|
+
exclude_exts: Set[str]
|
|
11
|
+
allowed_types: Set[str]
|
|
12
|
+
exclude_types: Set[str]
|
|
13
|
+
allowed_dirs: List[str] = field(default_factory=list)
|
|
14
|
+
exclude_dirs: List[str] = field(default_factory=list)
|
|
15
|
+
allowed_patterns: List[str] = field(default_factory=list)
|
|
16
|
+
exclude_patterns: List[str] = field(default_factory=list)
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class SearchParams(ScanConfig):
|
|
20
|
+
directories: List[str] = field(default_factory=list)
|
|
21
|
+
add: bool = False
|
|
22
|
+
recursive: bool = True
|
|
23
|
+
strings: List[str] = field(default_factory=list)
|
|
24
|
+
total_strings: bool = False
|
|
25
|
+
parse_lines: bool = False
|
|
26
|
+
spec_line: Union[bool, int] = False
|
|
27
|
+
get_lines: bool = False
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class AllParams(SearchParams):
|
|
31
|
+
cfg = None
|
|
32
|
+
files:List[str] = field(default_factory=list)
|
|
33
|
+
allowed: Optional[Callable[[str], bool]] = None
|
|
34
|
+
include_files: bool = True
|
|
35
|
+
recursive: bool = True
|
|
36
|
+
# -------------------------
|
|
37
|
+
# Default sets
|
|
38
|
+
# -------------------------
|
|
39
|
+
DEFAULT_ALLOWED_EXTS: Set[str] = {
|
|
40
|
+
".py", ".pyw", # python
|
|
41
|
+
".js", ".jsx", ".ts", ".tsx", ".mjs", # JS/TS
|
|
42
|
+
".html", ".htm", ".xml", # markup
|
|
43
|
+
".css", ".scss", ".sass", ".less", # styles
|
|
44
|
+
".json", ".yaml", ".yml", ".toml", ".ini", # configs
|
|
45
|
+
".cfg", ".md", ".markdown", ".rst", # docs
|
|
46
|
+
".sh", ".bash", ".env", # scripts/env
|
|
47
|
+
".txt" # plain text
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
DEFAULT_EXCLUDE_TYPES: Set[str] = {
|
|
51
|
+
"image", "video", "audio", "presentation",
|
|
52
|
+
"spreadsheet", "archive", "executable"
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# never want these—even if they sneak into ALLOWED
|
|
56
|
+
_unallowed = set(get_media_exts(DEFAULT_EXCLUDE_TYPES)) | {
|
|
57
|
+
".bak", ".shp", ".cpg", ".dbf", ".shx", ".geojson",
|
|
58
|
+
".pyc", ".prj", ".sbn", ".sbx"
|
|
59
|
+
}
|
|
60
|
+
DEFAULT_EXCLUDE_EXTS = {e.split('.')[-1] for e in _unallowed if e not in DEFAULT_ALLOWED_EXTS}
|
|
61
|
+
|
|
62
|
+
DEFAULT_EXCLUDE_DIRS: Set[str] = {
|
|
63
|
+
"node_modules", "old","__pycache__", "backups", "backup",
|
|
64
|
+
"backs", "trash", "depriciated", "old", "__init__"
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
DEFAULT_EXCLUDE_PATTERNS: Set[str] = {
|
|
68
|
+
"__init__*", "*.tmp", "*.log", "*.lock", "*.zip","*~"
|
|
69
|
+
}
|
|
70
|
+
REMOTE_RE = re.compile(r"^(?P<host>[^:\s]+@[^:\s]+):(?P<path>/.*)$")
|
|
71
|
+
AllowedPredicate = Optional[Callable[[str], bool]]
|
|
72
|
+
DEFAULT_EXCLUDE_FILE_PATTERNS=DEFAULT_EXCLUDE_PATTERNS
|
|
73
|
+
DEFAULT_ALLOWED_PATTERNS: List[str] = ["*"]
|
|
74
|
+
DEFAULT_ALLOWED_DIRS: List[str] = ["*"]
|
|
75
|
+
DEFAULT_ALLOWED_TYPES: List[str] = ["*"]
|
|
76
|
+
CANONICAL_MAP = {
|
|
77
|
+
"directories": ["directory", "directories", "dir","dirs","directory","directories","d","dirname", "paths", "path","roots","root"],
|
|
78
|
+
"files":["file","filepath","file_path","files","filepaths","file_paths","paths", "path","f"],
|
|
79
|
+
"allowed_exts": ["allow_ext", "allowed_ext", "include_ext", "include_exts", "exts_allowed"],
|
|
80
|
+
"exclude_exts": ["exclude_ext", "excluded_ext", "excluded_exts", "unallowed_ext", "unallowed_exts"],
|
|
81
|
+
"allowed_types": ["allow_type", "allowed_type", "include_type", "include_types", "types_allowed"],
|
|
82
|
+
"exclude_types": ["exclude_type", "excluded_type", "excluded_types", "unallowed_type", "unallowed_types"],
|
|
83
|
+
"allowed_dirs": ["allow_dir", "allowed_dir", "include_dir", "include_dirs", "dirs_allowed"],
|
|
84
|
+
"exclude_dirs": ["exclude_dir", "excluded_dir", "excluded_dirs", "unallowed_dir", "unallowed_dirs"],
|
|
85
|
+
"allowed_patterns": ["allow_pattern", "allowed_pattern", "include_pattern", "include_patterns", "patterns_allowed"],
|
|
86
|
+
"exclude_patterns": ["exclude_pattern", "excluded_pattern", "excluded_patterns", "unallowed_pattern", "unallowed_patterns"],
|
|
87
|
+
"add":["add"],
|
|
88
|
+
"recursive":["recursive"],
|
|
89
|
+
"strings":["strings"],
|
|
90
|
+
"total_strings":["total_strings"],
|
|
91
|
+
"parse_lines":["parse_lines"],
|
|
92
|
+
"spec_line":["spec_line"],
|
|
93
|
+
"get_lines":["get_lines"]
|
|
94
|
+
}
|
|
95
|
+
DEFAULT_ALLOWED_EXCLUDE_MAP={
|
|
96
|
+
"allowed_exts": {"default":DEFAULT_ALLOWED_EXTS,"type":type(DEFAULT_ALLOWED_EXTS),"canonical":CANONICAL_MAP.get("allowed_exts")},
|
|
97
|
+
"exclude_exts": {"default":DEFAULT_EXCLUDE_EXTS,"type":type(DEFAULT_EXCLUDE_EXTS),"canonical":CANONICAL_MAP.get("exclude_exts")},
|
|
98
|
+
"allowed_types": {"default":DEFAULT_ALLOWED_TYPES,"type":type(DEFAULT_ALLOWED_TYPES),"canonical":CANONICAL_MAP.get("allowed_types")},
|
|
99
|
+
"exclude_types": {"default":DEFAULT_EXCLUDE_TYPES,"type":type(DEFAULT_EXCLUDE_TYPES),"canonical":CANONICAL_MAP.get("exclude_types")},
|
|
100
|
+
"allowed_dirs": {"default":DEFAULT_ALLOWED_DIRS,"type":type(DEFAULT_ALLOWED_DIRS),"canonical":CANONICAL_MAP.get("allowed_dirs")},
|
|
101
|
+
"exclude_dirs": {"default":DEFAULT_EXCLUDE_DIRS,"type":type(DEFAULT_EXCLUDE_DIRS),"canonical":CANONICAL_MAP.get("exclude_dirs")},
|
|
102
|
+
"allowed_patterns": {"default":DEFAULT_ALLOWED_PATTERNS,"type":type(DEFAULT_ALLOWED_PATTERNS),"canonical":CANONICAL_MAP.get("allowed_patterns")},
|
|
103
|
+
"exclude_patterns": {"default":DEFAULT_EXCLUDE_PATTERNS,"type":type(DEFAULT_EXCLUDE_PATTERNS),"canonical":CANONICAL_MAP.get("exclude_patterns")},
|
|
104
|
+
}
|
|
105
|
+
DEFAULT_CANONICAL_MAP={
|
|
106
|
+
"directories":{"default":[],"type":list,"canonical":CANONICAL_MAP.get("directories")},
|
|
107
|
+
"files":{"default":[],"type":list,"canonical":CANONICAL_MAP.get("files")},
|
|
108
|
+
**DEFAULT_ALLOWED_EXCLUDE_MAP,
|
|
109
|
+
"allowed":{"default":None,"type":bool,"canonical":CANONICAL_MAP.get("allowed")},
|
|
110
|
+
"add":{"default":False,"type":bool,"canonical":CANONICAL_MAP.get("add")},
|
|
111
|
+
"recursive":{"default":True,"type":bool,"canonical":CANONICAL_MAP.get("recursive")},
|
|
112
|
+
"strings":{"default":None,"type":list,"canonical":CANONICAL_MAP.get("strings")},
|
|
113
|
+
"total_strings":{"default":False,"type":bool,"canonical":CANONICAL_MAP.get("total_strings")},
|
|
114
|
+
"parse_lines":{"default":False,"type":bool,"canonical":CANONICAL_MAP.get("parse_lines")},
|
|
115
|
+
"spec_line":{"default":False,"type":bool,"canonical":CANONICAL_MAP.get("spec_line")},
|
|
116
|
+
"get_lines":{"default":False,"type":bool,"canonical":CANONICAL_MAP.get("get_lines")},
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# ============================================================
|
|
2
|
+
# abstract_utilities/imports/imports.py
|
|
3
|
+
# Global imports hub — everything imported here will be
|
|
4
|
+
# automatically available to any module that does:
|
|
5
|
+
# from ..imports import *
|
|
6
|
+
# ============================================================
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
from ....imports import *
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import os, sys, re, inspect
|
|
13
|
+
from typing import *
|
|
14
|
+
from types import MethodType
|
|
15
|
+
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
|
|
18
|
+
from typing import *
|
|
19
|
+
from werkzeug.utils import secure_filename
|
|
20
|
+
from werkzeug.datastructures import FileStorage
|
|
21
|
+
from pdf2image import convert_from_path # only used for OCR fallback
|
|
22
|
+
# ---- Core standard library modules -------------------------
|
|
23
|
+
|
|
24
|
+
from datetime import datetime
|
|
25
|
+
from types import ModuleType
|
|
26
|
+
|
|
27
|
+
# ---- Dataclasses and typing --------------------------------
|
|
28
|
+
from dataclasses import dataclass, field
|
|
29
|
+
from typing import (
|
|
30
|
+
Any, Optional, List, Dict, Set, Tuple,
|
|
31
|
+
Iterable, Callable, Literal, Union, TypeVar
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# ---- Common 3rd-party dependencies --------------------------
|
|
35
|
+
from pdf2image import convert_from_path
|
|
36
|
+
from werkzeug.utils import secure_filename
|
|
37
|
+
from werkzeug.datastructures import FileStorage
|
|
38
|
+
|
|
39
|
+
# ---- Helpers ------------------------------------------------
|
|
40
|
+
from pprint import pprint
|
|
41
|
+
|
|
42
|
+
# ============================================================
|
|
43
|
+
# AUTO-EXPORT ALL NON-PRIVATE NAMES
|
|
44
|
+
# ============================================================
|
|
45
|
+
__all__ = [name for name in globals() if not name.startswith("_")]
|
|
46
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from ....list_utils import make_list
|
|
2
|
+
from ....type_utils import get_media_exts, is_media_type, MIME_TYPES, is_str
|
|
3
|
+
from ....ssh_utils import *
|
|
4
|
+
from ....env_utils import *
|
|
5
|
+
from ....read_write_utils import read_from_file,write_to_file
|
|
6
|
+
from ....log_utils import get_logFile
|
|
7
|
+
from ....class_utils import get_caller, get_caller_path, get_caller_dir,SingletonMeta,run_pruned_func
|
|
8
|
+
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
from ..imports import *
|
|
2
|
+
def get_item_check_cmd(path, file=True, directory=False, exists=False):
|
|
3
|
+
if (directory and file) or exists:
|
|
4
|
+
typ = "e"
|
|
5
|
+
elif file:
|
|
6
|
+
typ = "f"
|
|
7
|
+
elif directory:
|
|
8
|
+
typ = "d"
|
|
9
|
+
elif isinstance(file, str):
|
|
10
|
+
if "f" in file:
|
|
11
|
+
typ = "f"
|
|
12
|
+
elif "d" in file:
|
|
13
|
+
typ = "d"
|
|
14
|
+
else:
|
|
15
|
+
typ = "e"
|
|
16
|
+
else:
|
|
17
|
+
typ = "e"
|
|
18
|
+
return f"test -{typ} {shlex.quote(path)} && echo __OK__ || true"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_all_item_check_cmd(path, file=True, directory=True, exists=True):
|
|
22
|
+
collects = []
|
|
23
|
+
out_js = {}
|
|
24
|
+
|
|
25
|
+
if file:
|
|
26
|
+
collects.append("file")
|
|
27
|
+
if directory:
|
|
28
|
+
collects.append("dir")
|
|
29
|
+
if exists:
|
|
30
|
+
collects.append("exists")
|
|
31
|
+
|
|
32
|
+
if not collects:
|
|
33
|
+
return out_js
|
|
34
|
+
|
|
35
|
+
path = shlex.quote(path)
|
|
36
|
+
for typ in collects:
|
|
37
|
+
t = typ[0] # f, d, or e
|
|
38
|
+
out_js[typ] = f"test -{t} {path} && echo __OK__ || true"
|
|
39
|
+
|
|
40
|
+
return out_js
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def is_file(
|
|
44
|
+
path,
|
|
45
|
+
user_at_host=None,
|
|
46
|
+
password=None,
|
|
47
|
+
key=None,
|
|
48
|
+
env_path=None,
|
|
49
|
+
**kwargs
|
|
50
|
+
):
|
|
51
|
+
contingencies = list(set([user_at_host,password,key,env_path]))
|
|
52
|
+
len_contingencies = len(contingencies)
|
|
53
|
+
is_potential = (len_contingencies >1 or (None not in contingencies))
|
|
54
|
+
if not is_potential:
|
|
55
|
+
return os.path.isfile(path)
|
|
56
|
+
cmd = get_item_check_cmd(path,file=True)
|
|
57
|
+
return run_cmd(cmd=cmd,
|
|
58
|
+
user_at_host=user_at_host,
|
|
59
|
+
password=password,
|
|
60
|
+
key=key,
|
|
61
|
+
env_path=env_path,
|
|
62
|
+
**kwargs
|
|
63
|
+
)
|
|
64
|
+
def is_dir(
|
|
65
|
+
path,
|
|
66
|
+
user_at_host=None,
|
|
67
|
+
password=None,
|
|
68
|
+
key=None,
|
|
69
|
+
env_path=None,
|
|
70
|
+
**kwargs
|
|
71
|
+
):
|
|
72
|
+
contingencies = list(set([user_at_host,password,key,env_path]))
|
|
73
|
+
len_contingencies = len(contingencies)
|
|
74
|
+
is_potential = (len_contingencies >1 or (None not in contingencies))
|
|
75
|
+
if not is_potential:
|
|
76
|
+
return os.path.isdir(path)
|
|
77
|
+
cmd = get_item_check_cmd(path,file=False,directory=True)
|
|
78
|
+
return run_cmd(cmd=cmd,
|
|
79
|
+
user_at_host=user_at_host,
|
|
80
|
+
password=password,
|
|
81
|
+
key=key,
|
|
82
|
+
env_path=env_path,
|
|
83
|
+
**kwargs
|
|
84
|
+
)
|
|
85
|
+
def is_exists(
|
|
86
|
+
path,
|
|
87
|
+
user_at_host=None,
|
|
88
|
+
password=None,
|
|
89
|
+
key=None,
|
|
90
|
+
env_path=None,
|
|
91
|
+
**kwargs
|
|
92
|
+
):
|
|
93
|
+
contingencies = list(set([user_at_host,password,key,env_path]))
|
|
94
|
+
len_contingencies = len(contingencies)
|
|
95
|
+
is_potential = (len_contingencies >1 or (None not in contingencies))
|
|
96
|
+
if not is_potential:
|
|
97
|
+
return os.path.exists(path)
|
|
98
|
+
if is_potential == True:
|
|
99
|
+
cmd = get_item_check_cmd(path,exists=True)
|
|
100
|
+
return run_cmd(cmd=cmd,
|
|
101
|
+
user_at_host=user_at_host,
|
|
102
|
+
password=password,
|
|
103
|
+
key=key,
|
|
104
|
+
env_path=env_path,
|
|
105
|
+
**kwargs
|
|
106
|
+
)
|
|
107
|
+
def is_any(
|
|
108
|
+
path,
|
|
109
|
+
user_at_host=None,
|
|
110
|
+
password=None,
|
|
111
|
+
key=None,
|
|
112
|
+
env_path=None,
|
|
113
|
+
**kwargs
|
|
114
|
+
):
|
|
115
|
+
contingencies = list(set([user_at_host,password,key,env_path]))
|
|
116
|
+
len_contingencies = len(contingencies)
|
|
117
|
+
is_potential = (len_contingencies >1 or (None not in contingencies))
|
|
118
|
+
if not is_potential:
|
|
119
|
+
return os.path.exists(path)
|
|
120
|
+
if is_potential == True:
|
|
121
|
+
out_js = get_all_item_check_cmd(path,file=True,directory=True,exists=True)
|
|
122
|
+
for typ,cmd in out_js.items():
|
|
123
|
+
response = run_cmd(cmd=cmd,
|
|
124
|
+
user_at_host=user_at_host,
|
|
125
|
+
password=password,
|
|
126
|
+
key=key,
|
|
127
|
+
env_path=env_path,
|
|
128
|
+
**kwargs
|
|
129
|
+
)
|
|
130
|
+
result = "__OK__" in (response or "")
|
|
131
|
+
if result:
|
|
132
|
+
return typ
|
|
133
|
+
return None
|
|
134
|
+
class PathBackend(Protocol):
|
|
135
|
+
def join(self, *parts: str) -> str: ...
|
|
136
|
+
def isfile(self, path: str) -> bool: ...
|
|
137
|
+
def isdir(self, path: str) -> bool: ...
|
|
138
|
+
def glob_recursive(self, base: str, **opts) -> List[str]: ...
|
|
139
|
+
def listdir(self, base: str) -> List[str]: ...
|
|
140
|
+
|
|
141
|
+
class LocalFS:
|
|
142
|
+
def __init__(self, get_type=False, get_is_dir=False, get_is_file=False, get_is_exists=False, **kwargs):
|
|
143
|
+
self.get_type = get_type
|
|
144
|
+
self.get_is_dir = get_is_dir
|
|
145
|
+
self.get_is_file = get_is_file
|
|
146
|
+
self.get_is_exists = get_is_exists
|
|
147
|
+
|
|
148
|
+
def join(self, *parts: str) -> str:
|
|
149
|
+
return os.path.join(*parts)
|
|
150
|
+
|
|
151
|
+
def isfile(self, path: str) -> bool:
|
|
152
|
+
return os.path.isfile(path)
|
|
153
|
+
|
|
154
|
+
def isdir(self, path: str) -> bool:
|
|
155
|
+
return os.path.isdir(path)
|
|
156
|
+
|
|
157
|
+
def isexists(self, path: str) -> bool:
|
|
158
|
+
return os.path.exists(path)
|
|
159
|
+
|
|
160
|
+
def istype(self, path: str) -> str | None:
|
|
161
|
+
funcs_js = {"file": os.path.isfile, "dir": os.path.isdir, "exists": os.path.exists}
|
|
162
|
+
for key, func in funcs_js.items():
|
|
163
|
+
if func(path):
|
|
164
|
+
return key
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
def is_included(self, path, **kwargs):
|
|
168
|
+
include_js = {}
|
|
169
|
+
if self.get_type:
|
|
170
|
+
include_js["typ"] = self.istype(path)
|
|
171
|
+
if self.get_is_dir:
|
|
172
|
+
include_js["dir"] = self.isdir(path)
|
|
173
|
+
if self.get_is_file:
|
|
174
|
+
include_js["file"] = self.isfile(path)
|
|
175
|
+
if self.get_is_exists:
|
|
176
|
+
include_js["exists"] = self.isexists(path)
|
|
177
|
+
return include_js
|
|
178
|
+
def glob_recursive(self, base: str, **opts) -> List[str]:
|
|
179
|
+
"""
|
|
180
|
+
opts:
|
|
181
|
+
- maxdepth: int | None
|
|
182
|
+
- mindepth: int (default 1)
|
|
183
|
+
- follow_symlinks: bool
|
|
184
|
+
- include_dirs: bool
|
|
185
|
+
- include_files: bool
|
|
186
|
+
- exclude_hidden: bool
|
|
187
|
+
"""
|
|
188
|
+
maxdepth = opts.get("maxdepth")
|
|
189
|
+
mindepth = opts.get("mindepth", 1)
|
|
190
|
+
follow = opts.get("follow_symlinks", False)
|
|
191
|
+
want_d = opts.get("include_dirs", True)
|
|
192
|
+
want_f = opts.get("include_files", True)
|
|
193
|
+
hide = opts.get("exclude_hidden", False)
|
|
194
|
+
|
|
195
|
+
results: List[str] = []
|
|
196
|
+
base_depth = os.path.normpath(base).count(os.sep)
|
|
197
|
+
|
|
198
|
+
for root, dirs, files in os.walk(base, followlinks=follow):
|
|
199
|
+
depth = os.path.normpath(root).count(os.sep) - base_depth
|
|
200
|
+
if maxdepth is not None and depth > maxdepth:
|
|
201
|
+
dirs[:] = []
|
|
202
|
+
continue
|
|
203
|
+
if want_d and depth >= mindepth:
|
|
204
|
+
for d in dirs:
|
|
205
|
+
if hide and d.startswith("."): continue
|
|
206
|
+
results.append(os.path.join(root, d))
|
|
207
|
+
if want_f and depth >= mindepth:
|
|
208
|
+
for f in files:
|
|
209
|
+
if hide and f.startswith("."): continue
|
|
210
|
+
results.append(os.path.join(root, f))
|
|
211
|
+
return results
|
|
212
|
+
|
|
213
|
+
def listdir(self, base: str) -> List[str]:
|
|
214
|
+
try:
|
|
215
|
+
return [os.path.join(base, name) for name in os.listdir(base)]
|
|
216
|
+
except Exception:
|
|
217
|
+
return []
|
|
218
|
+
def get_spec_kwargs(
|
|
219
|
+
user_at_host=None,
|
|
220
|
+
password=None,
|
|
221
|
+
key=None,
|
|
222
|
+
env_path=None,
|
|
223
|
+
kwargs=None
|
|
224
|
+
):
|
|
225
|
+
kwargs = kwargs or {}
|
|
226
|
+
kwargs["user_at_host"] = kwargs.get("user_at_host") or user_at_host
|
|
227
|
+
kwargs["password"] = kwargs.get("password") or password
|
|
228
|
+
kwargs["key"] = kwargs.get("key") or key
|
|
229
|
+
kwargs["env_path"] = kwargs.get("env_path") or env_path
|
|
230
|
+
return kwargs
|
|
231
|
+
class SSHFS:
|
|
232
|
+
"""Remote POSIX backend via run_remote_cmd."""
|
|
233
|
+
def __init__(self, password=None, key=None, env_path=None,
|
|
234
|
+
get_type=False, get_is_dir=False, get_is_file=False, get_is_exists=False, **kwargs):
|
|
235
|
+
self.user_at_host = kwargs.get('user_at_host') or kwargs.get('user') or kwargs.get('host')
|
|
236
|
+
self.password = password
|
|
237
|
+
self.key = key
|
|
238
|
+
self.env_path = env_path
|
|
239
|
+
self.get_type = get_type
|
|
240
|
+
self.get_is_dir = get_is_dir
|
|
241
|
+
self.get_is_file = get_is_file
|
|
242
|
+
self.get_is_exists = get_is_exists
|
|
243
|
+
|
|
244
|
+
def cell_spec_kwargs(self, func, path, **kwargs):
|
|
245
|
+
kwargs = get_spec_kwargs(
|
|
246
|
+
user_at_host=self.user_at_host,
|
|
247
|
+
password=self.password,
|
|
248
|
+
key=self.key,
|
|
249
|
+
env_path=self.env_path,
|
|
250
|
+
kwargs=kwargs
|
|
251
|
+
)
|
|
252
|
+
return func(path, **kwargs)
|
|
253
|
+
|
|
254
|
+
def is_included(self, path, **kwargs):
|
|
255
|
+
include_js = {}
|
|
256
|
+
if self.get_type:
|
|
257
|
+
include_js["typ"] = self.istype(path, **kwargs)
|
|
258
|
+
if self.get_is_dir:
|
|
259
|
+
include_js["dir"] = self.isdir(path, **kwargs)
|
|
260
|
+
if self.get_is_file:
|
|
261
|
+
include_js["file"] = self.isfile(path, **kwargs)
|
|
262
|
+
if self.get_is_exists:
|
|
263
|
+
include_js["exists"] = self.isexists(path, **kwargs)
|
|
264
|
+
return include_js
|
|
265
|
+
|
|
266
|
+
def join(self, *parts: str) -> str:
|
|
267
|
+
return posixpath.join(*parts)
|
|
268
|
+
|
|
269
|
+
def isfile(self, path: str, **kwargs) -> bool:
|
|
270
|
+
out = self.cell_spec_kwargs(is_file, path, **kwargs)
|
|
271
|
+
return "__OK__" in (out or "")
|
|
272
|
+
|
|
273
|
+
def isdir(self, path: str, **kwargs) -> bool:
|
|
274
|
+
out = self.cell_spec_kwargs(is_dir, path, **kwargs)
|
|
275
|
+
return "__OK__" in (out or "")
|
|
276
|
+
|
|
277
|
+
def isexists(self, path: str, **kwargs) -> bool:
|
|
278
|
+
out = self.cell_spec_kwargs(is_exists, path, **kwargs)
|
|
279
|
+
return "__OK__" in (out or "")
|
|
280
|
+
|
|
281
|
+
def istype(self, path: str, **kwargs) -> str | None:
|
|
282
|
+
out = self.cell_spec_kwargs(is_any, path, **kwargs)
|
|
283
|
+
return out
|
|
284
|
+
|
|
285
|
+
def glob_recursive(self, base: str, **opts) -> List[str]:
|
|
286
|
+
maxdepth = opts.get("maxdepth")
|
|
287
|
+
mindepth = opts.get("mindepth", 1)
|
|
288
|
+
follow = opts.get("follow_symlinks", False)
|
|
289
|
+
want_d = opts.get("include_dirs", True)
|
|
290
|
+
want_f = opts.get("include_files", True)
|
|
291
|
+
hide = opts.get("exclude_hidden", False)
|
|
292
|
+
|
|
293
|
+
parts = []
|
|
294
|
+
if follow:
|
|
295
|
+
parts.append("-L")
|
|
296
|
+
parts += ["find", shlex.quote(base)]
|
|
297
|
+
if mindepth is not None:
|
|
298
|
+
parts += ["-mindepth", str(mindepth)]
|
|
299
|
+
if maxdepth is not None:
|
|
300
|
+
parts += ["-maxdepth", str(maxdepth)]
|
|
301
|
+
|
|
302
|
+
type_filters = []
|
|
303
|
+
if want_d and not want_f:
|
|
304
|
+
type_filters = ["-type", "d"]
|
|
305
|
+
elif want_f and not want_d:
|
|
306
|
+
type_filters = ["-type", "f"]
|
|
307
|
+
|
|
308
|
+
hidden_filter = []
|
|
309
|
+
if hide:
|
|
310
|
+
hidden_filter = ["!", "-regex", r".*/\..*"]
|
|
311
|
+
|
|
312
|
+
cmd = " ".join(parts + type_filters + hidden_filter + ["-printf", r"'%p\n'"]) + " 2>/dev/null"
|
|
313
|
+
out = run_remote_cmd(self.user_at_host, cmd)
|
|
314
|
+
return [line.strip().strip("'") for line in (out or "").splitlines() if line.strip()]
|
|
315
|
+
|
|
316
|
+
def listdir(self, base: str) -> List[str]:
|
|
317
|
+
cmd = f"find {shlex.quote(base)} -maxdepth 1 -mindepth 1 -printf '%p\\n' 2>/dev/null"
|
|
318
|
+
out = run_remote_cmd(self.user_at_host, cmd)
|
|
319
|
+
return [line.strip() for line in (out or "").splitlines() if line.strip()]
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def try_group(pre,item,strings):
|
|
324
|
+
|
|
325
|
+
try:
|
|
326
|
+
m = pre.match(item)
|
|
327
|
+
for i,string in enumerate(strings):
|
|
328
|
+
strings[i] = m.group(string)
|
|
329
|
+
|
|
330
|
+
except:
|
|
331
|
+
return None
|
|
332
|
+
return strings
|
|
333
|
+
def normalize_items(
|
|
334
|
+
paths: Iterable[str],
|
|
335
|
+
user_at_host=None,
|
|
336
|
+
get_type=True,
|
|
337
|
+
get_is_dir=False,
|
|
338
|
+
get_is_file=False,
|
|
339
|
+
get_is_exists=False,
|
|
340
|
+
**kwargs
|
|
341
|
+
) -> List[tuple[PathBackend, str, dict]]:
|
|
342
|
+
pairs: List[tuple[PathBackend, str, dict]] = []
|
|
343
|
+
host = user_at_host or kwargs.get("host") or kwargs.get("user")
|
|
344
|
+
paths = make_list(paths)
|
|
345
|
+
for item in paths:
|
|
346
|
+
if not item:
|
|
347
|
+
continue
|
|
348
|
+
|
|
349
|
+
strings = try_group(REMOTE_RE, item, ["host", "path"])
|
|
350
|
+
fs_host = None
|
|
351
|
+
nuhost = None
|
|
352
|
+
|
|
353
|
+
if (strings and None not in strings) or host:
|
|
354
|
+
if strings and None not in strings:
|
|
355
|
+
nuhost = strings[0]
|
|
356
|
+
item = strings[1] or item
|
|
357
|
+
nuhost = nuhost or host
|
|
358
|
+
fs_host = SSHFS(
|
|
359
|
+
nuhost,
|
|
360
|
+
user_at_host=user_at_host,
|
|
361
|
+
get_type=get_type,
|
|
362
|
+
get_is_dir=get_is_dir,
|
|
363
|
+
get_is_file=get_is_file,
|
|
364
|
+
get_is_exists=get_is_exists,
|
|
365
|
+
**kwargs
|
|
366
|
+
)
|
|
367
|
+
else:
|
|
368
|
+
fs_host = LocalFS(
|
|
369
|
+
get_type=get_type,
|
|
370
|
+
get_is_dir=get_is_dir,
|
|
371
|
+
get_is_file=get_is_file,
|
|
372
|
+
get_is_exists=get_is_exists
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
includes = fs_host.is_included(item)
|
|
376
|
+
pairs.append((fs_host, item, includes))
|
|
377
|
+
return pairs
|
|
378
|
+
|
|
379
|
+
|