abstract-utilities 0.2.2.493__py3-none-any.whl → 0.2.2.495__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_utilities/__init__.py +0 -1
- abstract_utilities/file_utils/__init__.py +1 -2
- abstract_utilities/file_utils/imports/constants.py +6 -0
- abstract_utilities/file_utils/imports/imports.py +1 -1
- abstract_utilities/file_utils/imports/module_imports.py +1 -2
- abstract_utilities/file_utils/module_imports.py +12 -0
- abstract_utilities/file_utils/src/__init__.py +10 -0
- abstract_utilities/file_utils/src/file_filters.py +110 -0
- abstract_utilities/file_utils/src/file_reader.py +607 -0
- abstract_utilities/file_utils/src/file_utils.py +279 -0
- abstract_utilities/file_utils/src/filter_params.py +155 -0
- abstract_utilities/file_utils/src/find_collect.py +154 -0
- abstract_utilities/file_utils/src/initFunctionsGen.py +286 -0
- abstract_utilities/file_utils/src/map_utils.py +29 -0
- abstract_utilities/file_utils/src/pdf_utils.py +300 -0
- abstract_utilities/file_utils/src/type_checks.py +92 -0
- abstract_utilities/import_utils/__init__.py +2 -0
- abstract_utilities/import_utils/imports/__init__.py +4 -0
- abstract_utilities/import_utils/imports/constants.py +2 -0
- abstract_utilities/import_utils/imports/imports.py +4 -0
- abstract_utilities/import_utils/imports/module_imports.py +6 -0
- abstract_utilities/import_utils/imports/utils.py +30 -0
- abstract_utilities/import_utils/src/__init__.py +7 -0
- abstract_utilities/import_utils/src/clean_imports.py +122 -0
- abstract_utilities/import_utils/src/dot_utils.py +60 -0
- abstract_utilities/import_utils/src/extract_utils.py +42 -0
- abstract_utilities/import_utils/src/import_functions.py +46 -0
- abstract_utilities/import_utils/src/import_utils.py +299 -0
- abstract_utilities/import_utils/src/package_utils/__init__.py +139 -0
- abstract_utilities/import_utils/src/package_utils/context_utils.py +27 -0
- abstract_utilities/import_utils/src/package_utils/import_collectors.py +53 -0
- abstract_utilities/import_utils/src/package_utils/path_utils.py +28 -0
- abstract_utilities/import_utils/src/package_utils/safe_import.py +27 -0
- abstract_utilities/import_utils/src/package_utils.py +140 -0
- abstract_utilities/import_utils/src/sysroot_utils.py +57 -0
- abstract_utilities/path_utils.py +1 -12
- abstract_utilities/read_write_utils.py +31 -14
- {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.495.dist-info}/METADATA +1 -1
- {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.495.dist-info}/RECORD +42 -11
- {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.495.dist-info}/top_level.txt +1 -0
- imports/__init__.py +36 -0
- {abstract_utilities-0.2.2.493.dist-info → abstract_utilities-0.2.2.495.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
# attach_functions.py — single helper you can import anywhere
|
|
2
|
+
# attach_dynamic.py
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from .file_utils import define_defaults,get_files_and_dirs
|
|
5
|
+
from ..imports import *
|
|
6
|
+
ABSPATH = os.path.abspath(__file__)
|
|
7
|
+
ABSROOT = os.path.dirname(ABSPATH)
|
|
8
|
+
def caller_path():
|
|
9
|
+
frame = inspect.stack()[1]
|
|
10
|
+
return os.path.abspath(frame.filename)
|
|
11
|
+
def _is_defined_here(mod: types.ModuleType, obj: object) -> bool:
|
|
12
|
+
try:
|
|
13
|
+
return inspect.getmodule(obj) is mod
|
|
14
|
+
except Exception:
|
|
15
|
+
return False
|
|
16
|
+
|
|
17
|
+
def _collect_callables(mod: types.ModuleType) -> Dict[str, Callable]:
|
|
18
|
+
out: Dict[str, Callable] = {}
|
|
19
|
+
names = getattr(mod, "__all__", None)
|
|
20
|
+
if names:
|
|
21
|
+
# trust the author's export list
|
|
22
|
+
for n in names:
|
|
23
|
+
fn = getattr(mod, n, None)
|
|
24
|
+
if callable(fn):
|
|
25
|
+
out[n] = fn
|
|
26
|
+
return out
|
|
27
|
+
# otherwise, discover top-level callables defined in this module
|
|
28
|
+
for n in dir(mod):
|
|
29
|
+
if n.startswith("_"):
|
|
30
|
+
continue
|
|
31
|
+
obj = getattr(mod, n, None)
|
|
32
|
+
if callable(obj) and _is_defined_here(mod, obj):
|
|
33
|
+
out[n] = obj
|
|
34
|
+
return out
|
|
35
|
+
|
|
36
|
+
def _import_module_by_name(name: str) -> Optional[types.ModuleType]:
|
|
37
|
+
try:
|
|
38
|
+
return importlib.import_module(name)
|
|
39
|
+
except Exception:
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
def _import_module_by_path(pkg_name: str, base_dir: str, filename: str) -> Optional[types.ModuleType]:
|
|
43
|
+
mod_name = f"{pkg_name}.functions"
|
|
44
|
+
path = os.path.join(base_dir, filename)
|
|
45
|
+
spec = importlib.util.spec_from_file_location(mod_name, path)
|
|
46
|
+
if not spec or not spec.loader:
|
|
47
|
+
return None
|
|
48
|
+
mod = importlib.util.module_from_spec(spec)
|
|
49
|
+
sys.modules[mod_name] = mod
|
|
50
|
+
spec.loader.exec_module(mod)
|
|
51
|
+
return mod
|
|
52
|
+
|
|
53
|
+
def _walk_functions_package(pkg_name: str, pkg_mod: types.ModuleType) -> List[types.ModuleType]:
|
|
54
|
+
"""Import all immediate submodules in the functions/ package."""
|
|
55
|
+
mods: List[types.ModuleType] = [pkg_mod]
|
|
56
|
+
pkg_dir = os.path.dirname(pkg_mod.__file__ or "")
|
|
57
|
+
for info in pkgutil.iter_modules([pkg_dir]):
|
|
58
|
+
# only import direct children (no recursion here; easy to add if you need)
|
|
59
|
+
child_name = f"{pkg_mod.__name__}.{info.name}"
|
|
60
|
+
m = _import_module_by_name(child_name)
|
|
61
|
+
if m:
|
|
62
|
+
mods.append(m)
|
|
63
|
+
return mods
|
|
64
|
+
|
|
65
|
+
def _discover_functions(base_pkg: str, *, hot_reload: bool) -> List[Tuple[str, Callable, str]]:
|
|
66
|
+
"""
|
|
67
|
+
Returns a list of (export_name, callable, module_basename).
|
|
68
|
+
Works if you have base_pkg.functions.py or base_pkg/functions/ package.
|
|
69
|
+
"""
|
|
70
|
+
# Prefer normal import of '<base_pkg>.functions'
|
|
71
|
+
fqn = f"{base_pkg}.functions"
|
|
72
|
+
mod = _import_module_by_name(fqn)
|
|
73
|
+
|
|
74
|
+
if mod is None:
|
|
75
|
+
# fallback: sibling functions.py, even without being a package
|
|
76
|
+
base = _import_module_by_name(base_pkg)
|
|
77
|
+
if not base or not getattr(base, "__file__", None):
|
|
78
|
+
return []
|
|
79
|
+
base_dir = os.path.dirname(base.__file__)
|
|
80
|
+
if os.path.isfile(os.path.join(base_dir, "functions.py")):
|
|
81
|
+
mod = _import_module_by_path(base_pkg, base_dir, "functions.py")
|
|
82
|
+
else:
|
|
83
|
+
return []
|
|
84
|
+
|
|
85
|
+
if hot_reload:
|
|
86
|
+
try:
|
|
87
|
+
mod = importlib.reload(mod) # type: ignore[arg-type]
|
|
88
|
+
except Exception:
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
results: List[Tuple[str, Callable, str]] = []
|
|
92
|
+
modules: List[types.ModuleType]
|
|
93
|
+
|
|
94
|
+
if hasattr(mod, "__path__"): # it's a package: import children
|
|
95
|
+
modules = _walk_functions_package(base_pkg, mod)
|
|
96
|
+
else:
|
|
97
|
+
modules = [mod]
|
|
98
|
+
|
|
99
|
+
for m in modules:
|
|
100
|
+
exported = _collect_callables(m)
|
|
101
|
+
module_basename = m.__name__.split(".")[-1]
|
|
102
|
+
for name, fn in exported.items():
|
|
103
|
+
results.append((name, fn, module_basename))
|
|
104
|
+
return results
|
|
105
|
+
|
|
106
|
+
def attach_functions(
|
|
107
|
+
obj_or_cls,
|
|
108
|
+
base_pkg: str | None = None,
|
|
109
|
+
hot_reload: bool = True,
|
|
110
|
+
prefix_with_module: bool = False,
|
|
111
|
+
include_private: bool = True,
|
|
112
|
+
only_defined_here: bool = True, # don't attach stuff imported from elsewhere
|
|
113
|
+
) -> list[str]:
|
|
114
|
+
"""
|
|
115
|
+
Attach all free functions found in <base_pkg>.functions (module or package)
|
|
116
|
+
to the *class* of obj_or_cls. Returns the list of attached attribute names.
|
|
117
|
+
"""
|
|
118
|
+
cls = obj_or_cls if inspect.isclass(obj_or_cls) else obj_or_cls.__class__
|
|
119
|
+
# Derive "<package>.functions" from the class's module unless you pass base_pkg
|
|
120
|
+
caller_mod = cls.__module__
|
|
121
|
+
pkg_root = (base_pkg or caller_mod.rsplit(".", 1)[0]).rstrip(".")
|
|
122
|
+
funcs_pkg_name = f"{pkg_root}.functions"
|
|
123
|
+
|
|
124
|
+
def _import(name: str) -> ModuleType | None:
|
|
125
|
+
try:
|
|
126
|
+
if hot_reload and name in sys.modules:
|
|
127
|
+
return importlib.reload(sys.modules[name])
|
|
128
|
+
return importlib.import_module(name)
|
|
129
|
+
except Exception:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
def _is_pkg(m: ModuleType) -> bool:
|
|
133
|
+
return hasattr(m, "__path__")
|
|
134
|
+
|
|
135
|
+
mod = _import(funcs_pkg_name)
|
|
136
|
+
if mod is None:
|
|
137
|
+
# Nothing to attach (no functions.py or functions/ next to your class)
|
|
138
|
+
setattr(cls, "_attached_functions", tuple())
|
|
139
|
+
return []
|
|
140
|
+
|
|
141
|
+
modules: list[ModuleType] = [mod]
|
|
142
|
+
if _is_pkg(mod):
|
|
143
|
+
# attach from every submodule under functions/
|
|
144
|
+
for it in pkgutil.iter_modules(mod.__path__):
|
|
145
|
+
sub = _import(f"{funcs_pkg_name}.{it.name}")
|
|
146
|
+
if sub is not None:
|
|
147
|
+
modules.append(sub)
|
|
148
|
+
|
|
149
|
+
attached: list[str] = []
|
|
150
|
+
for m in modules:
|
|
151
|
+
for name, obj in vars(m).items():
|
|
152
|
+
# only callables (skip classes), and keep them sane
|
|
153
|
+
if not callable(obj) or isinstance(obj, type):
|
|
154
|
+
continue
|
|
155
|
+
if only_defined_here and getattr(obj, "__module__", None) != m.__name__:
|
|
156
|
+
continue
|
|
157
|
+
if not include_private and name.startswith("_"):
|
|
158
|
+
continue
|
|
159
|
+
if name.startswith("__") and name.endswith("__"):
|
|
160
|
+
continue
|
|
161
|
+
attr = f"{m.__name__.rsplit('.', 1)[-1]}__{name}" if prefix_with_module else name
|
|
162
|
+
try:
|
|
163
|
+
setattr(cls, attr, obj) # set on CLASS → becomes bound method on instances
|
|
164
|
+
attached.append(attr)
|
|
165
|
+
except Exception:
|
|
166
|
+
# don't explode if one name collides; keep going
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
# handy for debugging
|
|
170
|
+
try:
|
|
171
|
+
setattr(cls, "_attached_functions", tuple(attached))
|
|
172
|
+
except Exception:
|
|
173
|
+
pass
|
|
174
|
+
return attached
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def isTab(item):
|
|
180
|
+
item_lower = item.lower()
|
|
181
|
+
for key in ['console','tab']:
|
|
182
|
+
if item_lower.endswith(key):
|
|
183
|
+
return True
|
|
184
|
+
return False
|
|
185
|
+
def get_dir(root,item):
|
|
186
|
+
if None in [root]:
|
|
187
|
+
return None
|
|
188
|
+
path = root
|
|
189
|
+
if item != None:
|
|
190
|
+
path = os.path.join(path,item)
|
|
191
|
+
return path
|
|
192
|
+
def isDir(root,item=None):
|
|
193
|
+
path = get_dir(root,item)
|
|
194
|
+
if path:
|
|
195
|
+
return os.path.isdir(path)
|
|
196
|
+
def check_dir_item(root,item=None):
|
|
197
|
+
return (item and isTab(item) and isDir(root,item))
|
|
198
|
+
def get_dirs(root = None):
|
|
199
|
+
root = root or ABSROOT
|
|
200
|
+
dirpaths = [get_dir(root,item) for item in os.listdir(root) if check_dir_item(root,item)]
|
|
201
|
+
return dirpaths
|
|
202
|
+
def ifFunctionsInFile(root):
|
|
203
|
+
items = [os.path.join(root, "functions"),os.path.join(root, "functions.py")]
|
|
204
|
+
for item in items:
|
|
205
|
+
if os.path.exists(item):
|
|
206
|
+
return item
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def get_for_all_tabs(root = None):
|
|
210
|
+
root = root or caller_path()
|
|
211
|
+
if os.path.isfile(root):
|
|
212
|
+
root = os.path.dirname(root)
|
|
213
|
+
all_tabs = get_dirs(root = root)
|
|
214
|
+
for ROOT in all_tabs:
|
|
215
|
+
FUNCS_DIR = ifFunctionsInFile(ROOT)
|
|
216
|
+
if FUNCS_DIR == None:
|
|
217
|
+
for ROOT in get_dirs(root = ROOT):
|
|
218
|
+
apply_inits(ROOT)
|
|
219
|
+
else:
|
|
220
|
+
apply_inits(ROOT)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def apply_inits(ROOT):
|
|
224
|
+
FUNCS_DIR = ifFunctionsInFile(ROOT)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
if_fun_dir = isDir(FUNCS_DIR)
|
|
228
|
+
if if_fun_dir != None:
|
|
229
|
+
|
|
230
|
+
if if_fun_dir:
|
|
231
|
+
CFG = define_defaults(allowed_exts='.py',
|
|
232
|
+
unallowed_exts = True,
|
|
233
|
+
exclude_types = True,
|
|
234
|
+
exclude_dirs = True,
|
|
235
|
+
exclude_patterns = True)
|
|
236
|
+
_,filepaths = get_files_and_dirs(FUNCS_DIR,cfg=CFG)
|
|
237
|
+
|
|
238
|
+
else:
|
|
239
|
+
filepaths = [FUNCS_DIR]
|
|
240
|
+
|
|
241
|
+
# Parse top-level def names
|
|
242
|
+
def extract_funcs(path: str):
|
|
243
|
+
funcs = []
|
|
244
|
+
for line in read_from_file(path).splitlines():
|
|
245
|
+
m = re.match(r"^def\s+([A-Za-z_]\w*)\s*\(self", line)
|
|
246
|
+
if m:
|
|
247
|
+
funcs.append(m.group(1))
|
|
248
|
+
return funcs
|
|
249
|
+
|
|
250
|
+
# Build functions/__init__.py that re-exports all discovered functions
|
|
251
|
+
import_lines = []
|
|
252
|
+
all_funcs = []
|
|
253
|
+
for fp in filepaths:
|
|
254
|
+
module = os.path.splitext(os.path.basename(fp))[0]
|
|
255
|
+
funcs = extract_funcs(fp)
|
|
256
|
+
if funcs:
|
|
257
|
+
import_lines.append(f"from .{module} import ({', '.join(funcs)})")
|
|
258
|
+
all_funcs.extend(funcs)
|
|
259
|
+
if if_fun_dir:
|
|
260
|
+
functions_init = "\n".join(import_lines) + ("\n" if import_lines else "")
|
|
261
|
+
write_to_file(contents=functions_init, file_path=os.path.join(FUNCS_DIR, "__init__.py"))
|
|
262
|
+
|
|
263
|
+
# Prepare the tuple literal of function names for import + loop
|
|
264
|
+
uniq_funcs = sorted(set(all_funcs))
|
|
265
|
+
func_tuple = ", ".join(uniq_funcs) + ("," if len(uniq_funcs) == 1 else "")
|
|
266
|
+
|
|
267
|
+
# Generate apiConsole/initFuncs.py using the safer setattr-loop
|
|
268
|
+
init_funcs_src = textwrap.dedent(f"""\
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
from .functions import ({func_tuple})
|
|
272
|
+
|
|
273
|
+
def initFuncs(self):
|
|
274
|
+
try:
|
|
275
|
+
for f in ({func_tuple}):
|
|
276
|
+
setattr(self, f.__name__, f)
|
|
277
|
+
except Exception as e:
|
|
278
|
+
logger.info(f"{{e}}")
|
|
279
|
+
return self
|
|
280
|
+
""")
|
|
281
|
+
|
|
282
|
+
write_to_file(contents=init_funcs_src, file_path=os.path.join(ROOT, "initFuncs.py"))
|
|
283
|
+
|
|
284
|
+
def call_for_all_tabs():
|
|
285
|
+
root = get_caller_dir(2)
|
|
286
|
+
get_for_all_tabs(root)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from ..imports import MIME_TYPES,make_list,os
|
|
2
|
+
from .file_filters import get_globs
|
|
3
|
+
def get_file_type(file_path,types=None,default=None):
|
|
4
|
+
mime_types = {}
|
|
5
|
+
if types:
|
|
6
|
+
types = make_list(types)
|
|
7
|
+
for typ in types:
|
|
8
|
+
mime_types[typ] = MIME_TYPES.get(typ)
|
|
9
|
+
else:
|
|
10
|
+
mime_types = MIME_TYPES
|
|
11
|
+
|
|
12
|
+
if os.path.isfile(file_path):
|
|
13
|
+
basename = os.path.basename(file_path)
|
|
14
|
+
filename,ext = os.path.splitext(basename)
|
|
15
|
+
for file_type,ext_values in mime_types.items():
|
|
16
|
+
if ext in ext_values:
|
|
17
|
+
return file_type
|
|
18
|
+
def get_file_map(directory,types=None,default=None):
|
|
19
|
+
if directory and os.path.isfile(directory):
|
|
20
|
+
directory = os.path.dirname(directory)
|
|
21
|
+
all_types = {}
|
|
22
|
+
files = get_globs(directory)
|
|
23
|
+
for file in files:
|
|
24
|
+
file_type = get_file_type(file,types=types,default=default)
|
|
25
|
+
if file_type:
|
|
26
|
+
if file_type not in all_types:
|
|
27
|
+
all_types[file_type] = []
|
|
28
|
+
all_types[file_type].append(file)
|
|
29
|
+
return all_types
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
from ..imports import *
|
|
2
|
+
def if_none_return(obj: object, obj_2: object) -> object:
|
|
3
|
+
"""
|
|
4
|
+
Return obj if obj_2 is None, otherwise return obj_2.
|
|
5
|
+
|
|
6
|
+
Args:
|
|
7
|
+
obj (Any): Primary object to return.
|
|
8
|
+
obj_2 (Any): Secondary object to check.
|
|
9
|
+
|
|
10
|
+
Returns:
|
|
11
|
+
Any: obj if obj_2 is None, else obj_2.
|
|
12
|
+
"""
|
|
13
|
+
return obj if obj_2 is None else obj_2
|
|
14
|
+
|
|
15
|
+
def write_pdf() -> PyPDF2.PdfWriter:
|
|
16
|
+
"""
|
|
17
|
+
Return a new PDF writer object.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
PyPDF2.PdfWriter: New PDF writer object.
|
|
21
|
+
"""
|
|
22
|
+
return PyPDF2.PdfWriter()
|
|
23
|
+
def read_pdf(file: str):
|
|
24
|
+
"""
|
|
25
|
+
Read and return a PDF reader object from the provided file path.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
file (str): Path to the PDF file.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
PyPDF2.PdfReader: PDF reader object.
|
|
32
|
+
"""
|
|
33
|
+
return PyPDF2.PdfReader(file)
|
|
34
|
+
def is_pdf_path(file: str):
|
|
35
|
+
"""
|
|
36
|
+
Checks if a given file path corresponds to a PDF file.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
file (str): A string representing the file path.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
bool: True if the file has a '.pdf' extension, False otherwise.
|
|
43
|
+
"""
|
|
44
|
+
if is_file(file):
|
|
45
|
+
if get_ext(file) == '.pdf':
|
|
46
|
+
return True
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
def read_pdf(file: str):
|
|
50
|
+
"""Read and return a PDF reader object from the provided file path."""
|
|
51
|
+
return PyPDF2.PdfReader(file)
|
|
52
|
+
def get_pdf_obj(pdf_obj: Union[str, object]) -> object:
|
|
53
|
+
"""
|
|
54
|
+
Processes and returns a PDF object. If provided with a file path to a PDF,
|
|
55
|
+
it reads and returns the PDF content as an object.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
pdf_obj: Either a PDF file path or an existing PDF object.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
object: The PDF content as an object.
|
|
62
|
+
"""
|
|
63
|
+
if is_str(pdf_obj):
|
|
64
|
+
if is_pdf_path(pdf_obj):
|
|
65
|
+
pdf_obj = read_pdf(pdf_obj) # Assuming there's a function read_pdf() to read PDF content
|
|
66
|
+
return pdf_obj
|
|
67
|
+
def get_separate_pages(pdf_reader, start_page:int=1, end_page:int=None):
|
|
68
|
+
"""
|
|
69
|
+
Get specific pages from a PDF and return them as a new PDF object.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
pdf_reader (object): The PDF reader object.
|
|
73
|
+
start_page (int, optional): The starting page number. Defaults to 1.
|
|
74
|
+
end_page (int, optional): The ending page number. Defaults to the last page.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
object: A new PDF writer object with the specified pages.
|
|
78
|
+
"""
|
|
79
|
+
num_pages = get_pdf_pages(pdf_reader)
|
|
80
|
+
|
|
81
|
+
# Handling default or out-of-bounds page values
|
|
82
|
+
if end_page is None or num_pages < end_page:
|
|
83
|
+
end_page = num_pages
|
|
84
|
+
elif num_pages < start_page:
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
pdf_writer = write_pdf()
|
|
88
|
+
|
|
89
|
+
for page_num in range(num_pages):
|
|
90
|
+
if start_page <= page_num <= end_page:
|
|
91
|
+
pdf_writer.add_page(pdf_reader.pages[page_num])
|
|
92
|
+
return pdf_writer
|
|
93
|
+
def is_pdf_path(file):
|
|
94
|
+
"""
|
|
95
|
+
Check if the provided file path corresponds to a valid PDF file.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
file (str): File path.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
bool: True if it's a valid PDF path, False otherwise.
|
|
102
|
+
"""
|
|
103
|
+
if is_file(file) and get_ext(file).lower() == '.pdf':
|
|
104
|
+
return True
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
def get_pdf_pages(pdf_file):
|
|
108
|
+
"""
|
|
109
|
+
Get the total number of pages in the PDF.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
pdf_file (object/str): PDF reader object or path to a PDF file.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
int: Number of pages in the PDF.
|
|
116
|
+
"""
|
|
117
|
+
pdf_file = get_pdf_obj(pdf_file)
|
|
118
|
+
try:
|
|
119
|
+
return len(pdf_file.pages)
|
|
120
|
+
except:
|
|
121
|
+
return False
|
|
122
|
+
def save_pdf(output_file_path, pdf_writer):
|
|
123
|
+
"""
|
|
124
|
+
Save a PDF writer object to a file.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
output_file_path (str): Path to save the PDF.
|
|
128
|
+
pdf_writer (object): PDF writer object to save.
|
|
129
|
+
"""
|
|
130
|
+
with open(output_file_path, 'wb') as output_file:
|
|
131
|
+
pdf_writer.write(output_file)
|
|
132
|
+
def split_pdf(input_path: str, output_folder: Optional[str] = None, file_name: Optional[str] = None) -> List[str]:
|
|
133
|
+
"""
|
|
134
|
+
Split a PDF file into separate files for each page.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
input_path (str): Path to the input PDF file.
|
|
138
|
+
output_folder (str, optional): Directory to save the split PDF files. Defaults to the directory of input_path.
|
|
139
|
+
file_name (str, optional): Base name for the output files. Defaults to the base name of input_path.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
list: List of paths to the created split PDF files.
|
|
143
|
+
"""
|
|
144
|
+
pdf_pages = []
|
|
145
|
+
file_name = get_file_name(input_path) if file_name is None else file_name
|
|
146
|
+
output_folder = if_none_return(get_directory(input_path), output_folder)
|
|
147
|
+
|
|
148
|
+
print(f"Splitting PDF: {input_path}")
|
|
149
|
+
print(f"Output Folder: {output_folder}")
|
|
150
|
+
print(f"Using Filename: {file_name}")
|
|
151
|
+
|
|
152
|
+
with open(input_path, 'rb') as pdf_file:
|
|
153
|
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
|
154
|
+
num_pages = len(pdf_reader.pages) # Replace getNumPages() with len(pdf_reader.pages)
|
|
155
|
+
|
|
156
|
+
print(f"Number of pages in PDF: {num_pages}")
|
|
157
|
+
|
|
158
|
+
for page_num in range(num_pages):
|
|
159
|
+
pdf_writer = PyPDF2.PdfWriter()
|
|
160
|
+
pdf_writer.add_page(pdf_reader.pages[page_num]) # Use the pdf_writer instance you created
|
|
161
|
+
|
|
162
|
+
output_file_path = os.path.join(output_folder, f'{file_name}_page_{page_num + 1}.pdf')
|
|
163
|
+
output_img_path = os.path.join(output_folder, f'{file_name}_page_{page_num + 1}.png')
|
|
164
|
+
print(f"Writing to: {output_file_path}")
|
|
165
|
+
pdf_pages.append(output_file_path)
|
|
166
|
+
save_pdf(output_file_path,pdf_writer)
|
|
167
|
+
|
|
168
|
+
return pdf_pages
|
|
169
|
+
def pdf_to_img_list(pdf_list: List[str], output_folder: Optional[str] = None, file_name: Optional[str] = None,
|
|
170
|
+
paginate: bool = False, extension: str = "png") -> List[str]:
|
|
171
|
+
"""
|
|
172
|
+
Convert a list of PDF files to images.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
pdf_list (List[str]): List of paths to PDF files.
|
|
176
|
+
output_folder (str, optional): Directory to save the images. Defaults to PDF's directory.
|
|
177
|
+
file_name (str, optional): Base name for the images. Defaults to PDF's name.
|
|
178
|
+
paginate (bool): Whether to paginate the image names. Defaults to False.
|
|
179
|
+
extension (str): Extension for the image files. Defaults to "png".
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
List[str]: List of paths to the created image files.
|
|
183
|
+
"""
|
|
184
|
+
image_list=[]
|
|
185
|
+
file_name_start = file_name
|
|
186
|
+
for i, each in enumerate(pdf_list):
|
|
187
|
+
try:
|
|
188
|
+
images = convert_from_path(each)
|
|
189
|
+
except Exception as e:
|
|
190
|
+
print("An error occurred while converting the PDF:", e)
|
|
191
|
+
|
|
192
|
+
if output_folder is None:
|
|
193
|
+
output_folder = get_directory(each)
|
|
194
|
+
if file_name_start is None:
|
|
195
|
+
file_name = get_file_name(each)
|
|
196
|
+
if paginate:
|
|
197
|
+
file_name=f"{file_name}_Page_{i}"
|
|
198
|
+
|
|
199
|
+
for i, image in enumerate(images):
|
|
200
|
+
image_output_path = os.path.join(output_folder, f"{file_name}.{extension}")
|
|
201
|
+
image_list.append(image_output_path)
|
|
202
|
+
save_image(image=image, image_path=image_output_path, format=extension.upper())
|
|
203
|
+
return image_list
|
|
204
|
+
def img_to_txt_list(img_list: List[str], output_folder: Optional[str] = None, file_name: Optional[str] = None,
|
|
205
|
+
paginate: bool = False, extension: str = "txt") -> List[str]:
|
|
206
|
+
"""
|
|
207
|
+
Convert a list of image files to text.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
img_list (List[str]): List of paths to image files.
|
|
211
|
+
output_folder (str, optional): Directory to save the text files. Defaults to image's directory.
|
|
212
|
+
file_name (str, optional): Base name for the text files. Defaults to image's name.
|
|
213
|
+
paginate (bool): Whether to paginate the text filenames. Defaults to False.
|
|
214
|
+
extension (str): Extension for the text files. Defaults to "txt".
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
List[str]: List of paths to the created text files.
|
|
218
|
+
"""
|
|
219
|
+
text_list = []
|
|
220
|
+
file_name_start = file_name
|
|
221
|
+
for i, each in enumerate(img_list):
|
|
222
|
+
if output_folder is None:
|
|
223
|
+
output_folder = get_directory(each)
|
|
224
|
+
if file_name_start is None:
|
|
225
|
+
file_name = get_file_name(each)
|
|
226
|
+
if paginate:
|
|
227
|
+
file_name=f"{file_name}_Page_{i}"
|
|
228
|
+
|
|
229
|
+
text_output = image_to_text(each)
|
|
230
|
+
text_output_path = os.path.join(output_folder, f"{get_file_name(each)}.{extension}")
|
|
231
|
+
text_list.append(text_output_path)
|
|
232
|
+
write_to_file(filepath=text_output_path, contents=text_output)
|
|
233
|
+
return text_list
|
|
234
|
+
def open_pdf_file(pdf_file_path: str) -> None:
|
|
235
|
+
"""
|
|
236
|
+
Open a PDF file using the default associated program.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
pdf_file_path (str): Path to the PDF file to open.
|
|
240
|
+
"""
|
|
241
|
+
try:
|
|
242
|
+
# Open the PDF file using the default associated program
|
|
243
|
+
cmd_input("open "+pdf_file_path)
|
|
244
|
+
except FileNotFoundError:
|
|
245
|
+
print("Error: The specified file does not exist.")
|
|
246
|
+
except Exception as e:
|
|
247
|
+
print("Error:", e)
|
|
248
|
+
# use it before writing to a file
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def get_pdfs_in_directory(directory: str) -> List[str]:
|
|
252
|
+
"""
|
|
253
|
+
Get a list of PDF filenames in a given directory.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
directory (str): Path to the directory.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
list: List of PDF filenames in the directory.
|
|
260
|
+
"""
|
|
261
|
+
pdfs = []
|
|
262
|
+
for filename in os.listdir(directory):
|
|
263
|
+
if is_pdf_path(filename):
|
|
264
|
+
pdfs.append(filename)
|
|
265
|
+
return pdfs
|
|
266
|
+
|
|
267
|
+
def get_all_pdf_in_directory(file_directory: Optional[str] = None) -> List[str]:
|
|
268
|
+
"""
|
|
269
|
+
Get a list of complete paths to PDF files in a given directory.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
file_directory (str, optional): Path to the directory.
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
list: List of paths to PDF files in the directory.
|
|
276
|
+
"""
|
|
277
|
+
pdfs=[]
|
|
278
|
+
for filename in sorted(os.listdir(file_directory)):
|
|
279
|
+
if is_pdf_path(filename):
|
|
280
|
+
pdf_path = os.path.join(file_directory, filename)
|
|
281
|
+
if is_file(pdf_path):
|
|
282
|
+
pdfs.append(pdf_path)
|
|
283
|
+
return pdfs
|
|
284
|
+
|
|
285
|
+
def collate_pdfs(pdf_list: List[str], output_pdf_path: str) -> None:
|
|
286
|
+
"""
|
|
287
|
+
Merge multiple PDF files into a single PDF.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
pdf_list (list): List of paths to PDF files to be merged.
|
|
291
|
+
output_pdf_path (str): Path to save the merged PDF.
|
|
292
|
+
"""
|
|
293
|
+
pdf_writer = PyPDF2.PdfWriter()
|
|
294
|
+
for file_path in pdf_list:
|
|
295
|
+
with open(file_path, 'rb') as pdf_file:
|
|
296
|
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
|
297
|
+
for page_num in range(len(pdf_reader.pages)):
|
|
298
|
+
pdf_writer.add_page(pdf_reader.pages[page_num])
|
|
299
|
+
save_pdf(output_file_path, pdf_writer)
|
|
300
|
+
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from ..imports import *
|
|
2
|
+
|
|
3
|
+
def get_user_pass_host_key(**kwargs):
|
|
4
|
+
args = ['password','user_at_host','host','key','user']
|
|
5
|
+
kwargs['del_kwarg']=kwargs.get('del_kwarg',False)
|
|
6
|
+
values,kwargs = get_from_kwargs(*args,**kwargs)
|
|
7
|
+
return values
|
|
8
|
+
|
|
9
|
+
# --- Base remote checker -----------------------------------------------------
|
|
10
|
+
def _remote_test(path: str, test_flag: str, timeout: int = 5,*args, **kwargs) -> bool:
|
|
11
|
+
"""
|
|
12
|
+
Run a remote shell test (e.g. -f, -d) via SSH.
|
|
13
|
+
Returns True if test succeeds, False otherwise.
|
|
14
|
+
"""
|
|
15
|
+
try:
|
|
16
|
+
kwargs['cmd']=f"[ {test_flag} {shlex.quote(path)} ] && echo 1 || echo 0"
|
|
17
|
+
kwargs['text']=True
|
|
18
|
+
kwargs['timeout']=timeout
|
|
19
|
+
kwargs['stderr']=subprocess.DEVNULL
|
|
20
|
+
result = run_pruned_func(run_cmd,**kwargs)
|
|
21
|
+
return result.strip() == "1"
|
|
22
|
+
except Exception:
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# --- Individual path checks --------------------------------------------------
|
|
27
|
+
def is_remote_file(path: str,*args, **kwargs) -> bool:
|
|
28
|
+
"""True if remote path is a file."""
|
|
29
|
+
return _remote_test(path, "-f", **kwargs)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def is_remote_dir(path: str,*args, **kwargs) -> bool:
|
|
33
|
+
"""True if remote path is a directory."""
|
|
34
|
+
return _remote_test(path, "-d", **kwargs)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def is_local_file(path: str) -> bool:
|
|
38
|
+
"""True if local path is a file."""
|
|
39
|
+
return os.path.isfile(path)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def is_local_dir(path: str) -> bool:
|
|
43
|
+
"""True if local path is a directory."""
|
|
44
|
+
return os.path.isdir(path)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# --- Unified interface -------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
def is_file(path: str,*args,**kwargs) -> bool:
|
|
50
|
+
"""Determine if path is a file (works local or remote)."""
|
|
51
|
+
if get_user_pass_host_key(**kwargs):
|
|
52
|
+
return is_remote_file(path, **kwargs)
|
|
53
|
+
return is_local_file(path)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def is_dir(path: str, *args,**kwargs) -> bool:
|
|
57
|
+
"""Determine if path is a directory (works local or remote)."""
|
|
58
|
+
if get_user_pass_host_key(**kwargs):
|
|
59
|
+
return is_remote_dir(path, **kwargs)
|
|
60
|
+
return is_local_dir(path)
|
|
61
|
+
|
|
62
|
+
def is_exists(path: str, *args,**kwargs) -> bool:
|
|
63
|
+
if is_file(path,**kwargs):
|
|
64
|
+
return True
|
|
65
|
+
if is_dir(path,**kwargs):
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
# --- Optional: keep your original all-in-one wrapper ------------------------
|
|
69
|
+
def check_path_type(
|
|
70
|
+
path: str,
|
|
71
|
+
*args,
|
|
72
|
+
**kwargs
|
|
73
|
+
) -> str:
|
|
74
|
+
"""
|
|
75
|
+
Return 'file', 'directory', 'missing', or 'unknown'.
|
|
76
|
+
Uses isolated is_file/is_dir functions.
|
|
77
|
+
"""
|
|
78
|
+
if get_user_pass_host_key(**kwargs):
|
|
79
|
+
if is_remote_file(path,**kwargs):
|
|
80
|
+
return "file"
|
|
81
|
+
elif is_remote_dir(path,**kwargs):
|
|
82
|
+
return "directory"
|
|
83
|
+
else:
|
|
84
|
+
return "missing"
|
|
85
|
+
else:
|
|
86
|
+
if os.path.isfile(path):
|
|
87
|
+
return "file"
|
|
88
|
+
elif os.path.isdir(path):
|
|
89
|
+
return "directory"
|
|
90
|
+
elif not os.path.exists(path):
|
|
91
|
+
return "missing"
|
|
92
|
+
return "unknown"
|