FlowerPower 0.31.0__tar.gz → 0.31.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowerpower-0.31.0/src/FlowerPower.egg-info → flowerpower-0.31.1}/PKG-INFO +1 -1
- {flowerpower-0.31.0 → flowerpower-0.31.1}/pyproject.toml +1 -1
- {flowerpower-0.31.0 → flowerpower-0.31.1/src/FlowerPower.egg-info}/PKG-INFO +1 -1
- flowerpower-0.31.1/src/flowerpower/utils/misc.py +421 -0
- flowerpower-0.31.0/src/flowerpower/utils/misc.py +0 -420
- {flowerpower-0.31.0 → flowerpower-0.31.1}/LICENSE +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/README.md +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/setup.cfg +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/FlowerPower.egg-info/SOURCES.txt +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/FlowerPower.egg-info/dependency_links.txt +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/FlowerPower.egg-info/entry_points.txt +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/FlowerPower.egg-info/requires.txt +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/FlowerPower.egg-info/top_level.txt +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/__init__.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/__init__.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/base.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/exceptions.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/pipeline/__init__.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/pipeline/adapter.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/pipeline/builder.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/pipeline/builder_adapter.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/pipeline/builder_executor.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/pipeline/run.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/project/__init__.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cfg/project/adapter.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cli/__init__.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cli/cfg.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cli/pipeline.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/cli/utils.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/flowerpower.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/__init__.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/base.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/config_manager.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/executor.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/io.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/lifecycle_manager.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/manager.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/pipeline.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/registry.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/visualizer.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/plugins/io/__init__.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/settings/__init__.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/settings/_backend.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/settings/executor.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/settings/general.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/settings/hamilton.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/settings/logging.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/settings/retry.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/__init__.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/adapter.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/callback.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/config.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/executor.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/filesystem.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/logging.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/monkey.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/open_telemetry.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/security.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/src/flowerpower/utils/templates.py +0 -0
- {flowerpower-0.31.0 → flowerpower-0.31.1}/tests/test_flowerpower_project.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: FlowerPower
|
3
|
-
Version: 0.31.
|
3
|
+
Version: 0.31.1
|
4
4
|
Summary: A simple workflow framework for building and managing data processing pipelines
|
5
5
|
Author-email: "Volker L." <ligno.blades@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/legout/flowerpower
|
@@ -4,7 +4,7 @@ description = "A simple workflow framework for building and managing data proces
|
|
4
4
|
authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">= 3.11"
|
7
|
-
version = "0.31.
|
7
|
+
version = "0.31.1"
|
8
8
|
keywords = ["hamilton", "workflow", "pipeline", "scheduler", "dask", "ray"]
|
9
9
|
|
10
10
|
dependencies = [
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: FlowerPower
|
3
|
-
Version: 0.31.
|
3
|
+
Version: 0.31.1
|
4
4
|
Summary: A simple workflow framework for building and managing data processing pipelines
|
5
5
|
Author-email: "Volker L." <ligno.blades@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/legout/flowerpower
|
@@ -0,0 +1,421 @@
|
|
1
|
+
import importlib
|
2
|
+
import os
|
3
|
+
import subprocess
|
4
|
+
import tempfile
|
5
|
+
import time
|
6
|
+
# from collections.abc import Iterable
|
7
|
+
from typing import Any
|
8
|
+
|
9
|
+
import msgspec
|
10
|
+
from fsspec_utils import AbstractFileSystem, filesystem
|
11
|
+
from .security import validate_file_path
|
12
|
+
from fsspec_utils.utils import run_parallel
|
13
|
+
|
14
|
+
# if importlib.util.find_spec("joblib"):
|
15
|
+
# from joblib import Parallel, delayed
|
16
|
+
# from rich.progress import (BarColumn, Progress, TextColumn,
|
17
|
+
# TimeElapsedColumn)
|
18
|
+
|
19
|
+
# def _prepare_parallel_args(
|
20
|
+
# args: tuple, kwargs: dict
|
21
|
+
# ) -> tuple[list, list, dict, dict, int]:
|
22
|
+
# """Prepare and validate arguments for parallel execution.
|
23
|
+
|
24
|
+
# Args:
|
25
|
+
# args: Positional arguments
|
26
|
+
# kwargs: Keyword arguments
|
27
|
+
|
28
|
+
# Returns:
|
29
|
+
# tuple: (iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len)
|
30
|
+
|
31
|
+
# Raises:
|
32
|
+
# ValueError: If no iterable arguments or length mismatch
|
33
|
+
# """
|
34
|
+
# iterables = []
|
35
|
+
# fixed_args = []
|
36
|
+
# iterable_kwargs = {}
|
37
|
+
# fixed_kwargs = {}
|
38
|
+
# first_iterable_len = None
|
39
|
+
|
40
|
+
# # Process positional arguments
|
41
|
+
# for arg in args:
|
42
|
+
# if isinstance(arg, (list, tuple)) and not isinstance(arg[0], (list, tuple)):
|
43
|
+
# iterables.append(arg)
|
44
|
+
# if first_iterable_len is None:
|
45
|
+
# first_iterable_len = len(arg)
|
46
|
+
# elif len(arg) != first_iterable_len:
|
47
|
+
# raise ValueError(
|
48
|
+
# f"Iterable length mismatch: argument has length {len(arg)}, expected {first_iterable_len}"
|
49
|
+
# )
|
50
|
+
# else:
|
51
|
+
# fixed_args.append(arg)
|
52
|
+
|
53
|
+
# # Process keyword arguments
|
54
|
+
# for key, value in kwargs.items():
|
55
|
+
# if isinstance(value, (list, tuple)) and not isinstance(
|
56
|
+
# value[0], (list, tuple)
|
57
|
+
# ):
|
58
|
+
# if first_iterable_len is None:
|
59
|
+
# first_iterable_len = len(value)
|
60
|
+
# elif len(value) != first_iterable_len:
|
61
|
+
# raise ValueError(
|
62
|
+
# f"Iterable length mismatch: {key} has length {len(value)}, expected {first_iterable_len}"
|
63
|
+
# )
|
64
|
+
# iterable_kwargs[key] = value
|
65
|
+
# else:
|
66
|
+
# fixed_kwargs[key] = value
|
67
|
+
|
68
|
+
# if first_iterable_len is None:
|
69
|
+
# raise ValueError("At least one iterable argument is required")
|
70
|
+
|
71
|
+
# return iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len
|
72
|
+
|
73
|
+
# def _execute_parallel_with_progress(
|
74
|
+
# func: callable,
|
75
|
+
# iterables: list,
|
76
|
+
# fixed_args: list,
|
77
|
+
# iterable_kwargs: dict,
|
78
|
+
# fixed_kwargs: dict,
|
79
|
+
# param_combinations: list,
|
80
|
+
# parallel_kwargs: dict,
|
81
|
+
# ) -> list:
|
82
|
+
# """Execute parallel tasks with progress tracking.
|
83
|
+
|
84
|
+
# Args:
|
85
|
+
# func: Function to execute
|
86
|
+
# iterables: List of iterable arguments
|
87
|
+
# fixed_args: List of fixed arguments
|
88
|
+
# iterable_kwargs: Dictionary of iterable keyword arguments
|
89
|
+
# fixed_kwargs: Dictionary of fixed keyword arguments
|
90
|
+
# param_combinations: List of parameter combinations
|
91
|
+
# parallel_kwargs: Parallel execution configuration
|
92
|
+
|
93
|
+
# Returns:
|
94
|
+
# list: Results from parallel execution
|
95
|
+
# """
|
96
|
+
# results = [None] * len(param_combinations)
|
97
|
+
# with Progress(
|
98
|
+
# TextColumn("[progress.description]{task.description}"),
|
99
|
+
# BarColumn(),
|
100
|
+
# "[progress.percentage]{task.percentage:>3.0f}%",
|
101
|
+
# TimeElapsedColumn(),
|
102
|
+
# transient=True,
|
103
|
+
# ) as progress:
|
104
|
+
# task = progress.add_task(
|
105
|
+
# "Running in parallel...", total=len(param_combinations)
|
106
|
+
# )
|
107
|
+
|
108
|
+
# def wrapper(idx, param_tuple):
|
109
|
+
# res = func(
|
110
|
+
# *(list(param_tuple[: len(iterables)]) + fixed_args),
|
111
|
+
# **{
|
112
|
+
# k: v
|
113
|
+
# for k, v in zip(
|
114
|
+
# iterable_kwargs.keys(), param_tuple[len(iterables) :]
|
115
|
+
# )
|
116
|
+
# },
|
117
|
+
# **fixed_kwargs,
|
118
|
+
# )
|
119
|
+
# progress.update(task, advance=1)
|
120
|
+
# return idx, res
|
121
|
+
#
|
122
|
+
# for idx, result in Parallel(**parallel_kwargs)(
|
123
|
+
# delayed(wrapper)(i, param_tuple)
|
124
|
+
# for i, param_tuple in enumerate(param_combinations)
|
125
|
+
# ):
|
126
|
+
# results[idx] = result
|
127
|
+
# return results
|
128
|
+
|
129
|
+
# def _execute_parallel_without_progress(
|
130
|
+
# func: callable,
|
131
|
+
# iterables: list,
|
132
|
+
# fixed_args: list,
|
133
|
+
# iterable_kwargs: dict,
|
134
|
+
# fixed_kwargs: dict,
|
135
|
+
# param_combinations: list,
|
136
|
+
# parallel_kwargs: dict,
|
137
|
+
# ) -> list:
|
138
|
+
# """Execute parallel tasks without progress tracking.
|
139
|
+
|
140
|
+
# Args:
|
141
|
+
# func: Function to execute
|
142
|
+
# iterables: List of iterable arguments
|
143
|
+
# fixed_args: List of fixed arguments
|
144
|
+
# iterable_kwargs: Dictionary of iterable keyword arguments
|
145
|
+
# fixed_kwargs: Dictionary of fixed keyword arguments
|
146
|
+
# param_combinations: List of parameter combinations
|
147
|
+
# parallel_kwargs: Parallel execution configuration
|
148
|
+
|
149
|
+
# Returns:
|
150
|
+
# list: Results from parallel execution
|
151
|
+
# """
|
152
|
+
# return Parallel(**parallel_kwargs)(
|
153
|
+
# delayed(func)(
|
154
|
+
# *(list(param_tuple[: len(iterables)]) + fixed_args),
|
155
|
+
# **{
|
156
|
+
# k: v
|
157
|
+
# for k, v in zip(
|
158
|
+
# iterable_kwargs.keys(), param_tuple[len(iterables) :]
|
159
|
+
# )
|
160
|
+
# },
|
161
|
+
# **fixed_kwargs,
|
162
|
+
# )
|
163
|
+
# for param_tuple in param_combinations
|
164
|
+
# )
|
165
|
+
|
166
|
+
# def run_parallel(
|
167
|
+
# func: callable,
|
168
|
+
# *args,
|
169
|
+
# n_jobs: int = -1,
|
170
|
+
# backend: str = "threading",
|
171
|
+
# verbose: bool = True,
|
172
|
+
# **kwargs,
|
173
|
+
# ) -> list[any]:
|
174
|
+
# """Runs a function for a list of parameters in parallel.
|
175
|
+
|
176
|
+
# Args:
|
177
|
+
# func (Callable): function to run in parallel
|
178
|
+
# *args: Positional arguments. Can be single values or iterables
|
179
|
+
# n_jobs (int, optional): Number of joblib workers. Defaults to -1
|
180
|
+
# backend (str, optional): joblib backend. Valid options are
|
181
|
+
# `loky`,`threading`, `mutliprocessing` or `sequential`. Defaults to "threading"
|
182
|
+
# verbose (bool, optional): Show progress bar. Defaults to True
|
183
|
+
# **kwargs: Keyword arguments. Can be single values or iterables
|
184
|
+
|
185
|
+
# Returns:
|
186
|
+
# list[any]: Function output
|
187
|
+
|
188
|
+
# Examples:
|
189
|
+
# >>> # Single iterable argument
|
190
|
+
# >>> run_parallel(func, [1,2,3], fixed_arg=42)
|
191
|
+
|
192
|
+
# >>> # Multiple iterables in args and kwargs
|
193
|
+
# >>> run_parallel(func, [1,2,3], val=[7,8,9], fixed=42)
|
194
|
+
|
195
|
+
# >>> # Only kwargs iterables
|
196
|
+
# >>> run_parallel(func, x=[1,2,3], y=[4,5,6], fixed=42)
|
197
|
+
# """
|
198
|
+
# parallel_kwargs = {"n_jobs": n_jobs, "backend": backend, "verbose": 0}
|
199
|
+
|
200
|
+
# # Prepare and validate arguments
|
201
|
+
# iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len = _prepare_parallel_args(
|
202
|
+
# args, kwargs
|
203
|
+
# )
|
204
|
+
|
205
|
+
# # Create parameter combinations
|
206
|
+
# all_iterables = iterables + list(iterable_kwargs.values())
|
207
|
+
# param_combinations = list(zip(*all_iterables))
|
208
|
+
|
209
|
+
# # Execute with or without progress tracking
|
210
|
+
# if not verbose:
|
211
|
+
# return _execute_parallel_without_progress(
|
212
|
+
# func, iterables, fixed_args, iterable_kwargs, fixed_kwargs,
|
213
|
+
# param_combinations, parallel_kwargs
|
214
|
+
# )
|
215
|
+
# else:
|
216
|
+
# return _execute_parallel_with_progress(
|
217
|
+
# func, iterables, fixed_args, iterable_kwargs, fixed_kwargs,
|
218
|
+
# param_combinations, parallel_kwargs
|
219
|
+
# )
|
220
|
+
|
221
|
+
# else:
|
222
|
+
|
223
|
+
# def run_parallel(*args, **kwargs):
|
224
|
+
# raise ImportError("joblib not installed")
|
225
|
+
|
226
|
+
|
227
|
+
def get_partitions_from_path(
|
228
|
+
path: str, partitioning: str | list[str] | None = None
|
229
|
+
) -> list[tuple]:
|
230
|
+
"""Get the dataset partitions from the file path.
|
231
|
+
|
232
|
+
Args:
|
233
|
+
path (str): File path.
|
234
|
+
partitioning (str | list[str] | None, optional): Partitioning type. Defaults to None.
|
235
|
+
|
236
|
+
Returns:
|
237
|
+
list[tuple]: Partitions.
|
238
|
+
"""
|
239
|
+
if "." in path:
|
240
|
+
path = os.path.dirname(path)
|
241
|
+
|
242
|
+
parts = path.split("/")
|
243
|
+
|
244
|
+
if isinstance(partitioning, str):
|
245
|
+
if partitioning == "hive":
|
246
|
+
return [tuple(p.split("=")) for p in parts if "=" in p]
|
247
|
+
|
248
|
+
else:
|
249
|
+
return [
|
250
|
+
(partitioning, parts[0]),
|
251
|
+
]
|
252
|
+
else:
|
253
|
+
return list(zip(partitioning, parts[-len(partitioning) :]))
|
254
|
+
|
255
|
+
|
256
|
+
def _validate_image_format(format: str) -> str:
|
257
|
+
"""Validate image format to prevent injection attacks.
|
258
|
+
|
259
|
+
Args:
|
260
|
+
format: Image format to validate
|
261
|
+
|
262
|
+
Returns:
|
263
|
+
str: Validated format
|
264
|
+
|
265
|
+
Raises:
|
266
|
+
ValueError: If format is not supported
|
267
|
+
"""
|
268
|
+
allowed_formats = {"svg", "png", "jpg", "jpeg", "gif", "pdf", "html"}
|
269
|
+
if format not in allowed_formats:
|
270
|
+
raise ValueError(f"Unsupported format: {format}. Allowed: {allowed_formats}")
|
271
|
+
return format
|
272
|
+
|
273
|
+
def _create_temp_image_file(data: str | bytes, format: str) -> str:
|
274
|
+
"""Create a temporary file with image data.
|
275
|
+
|
276
|
+
Args:
|
277
|
+
data: Image data as string or bytes
|
278
|
+
format: Validated image format
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
str: Path to temporary file
|
282
|
+
|
283
|
+
Raises:
|
284
|
+
OSError: If file creation fails
|
285
|
+
"""
|
286
|
+
with tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False) as tmp:
|
287
|
+
if isinstance(data, str):
|
288
|
+
tmp.write(data.encode('utf-8'))
|
289
|
+
else:
|
290
|
+
tmp.write(data)
|
291
|
+
tmp_path = tmp.name
|
292
|
+
|
293
|
+
# Validate the temporary file path for security
|
294
|
+
validate_file_path(tmp_path, allow_relative=False)
|
295
|
+
return tmp_path
|
296
|
+
|
297
|
+
def _open_image_viewer(tmp_path: str) -> None:
|
298
|
+
"""Open image viewer with the given file path.
|
299
|
+
|
300
|
+
Args:
|
301
|
+
tmp_path: Path to temporary image file
|
302
|
+
|
303
|
+
Raises:
|
304
|
+
OSError: If platform is not supported
|
305
|
+
subprocess.CalledProcessError: If subprocess fails
|
306
|
+
subprocess.TimeoutExpired: If subprocess times out
|
307
|
+
"""
|
308
|
+
import platform
|
309
|
+
platform_system = platform.system()
|
310
|
+
|
311
|
+
if platform_system == "Darwin": # macOS
|
312
|
+
subprocess.run(["open", tmp_path], check=True, timeout=10)
|
313
|
+
elif platform_system == "Linux":
|
314
|
+
subprocess.run(["xdg-open", tmp_path], check=True, timeout=10)
|
315
|
+
elif platform_system == "Windows":
|
316
|
+
subprocess.run(["start", "", tmp_path], shell=True, check=True, timeout=10)
|
317
|
+
else:
|
318
|
+
raise OSError(f"Unsupported platform: {platform_system}")
|
319
|
+
|
320
|
+
def _cleanup_temp_file(tmp_path: str) -> None:
|
321
|
+
"""Clean up temporary file.
|
322
|
+
|
323
|
+
Args:
|
324
|
+
tmp_path: Path to temporary file to remove
|
325
|
+
"""
|
326
|
+
try:
|
327
|
+
os.unlink(tmp_path)
|
328
|
+
except OSError:
|
329
|
+
pass # File might already be deleted or in use
|
330
|
+
|
331
|
+
def view_img(data: str | bytes, format: str = "svg"):
|
332
|
+
"""View image data using the system's default image viewer.
|
333
|
+
|
334
|
+
Args:
|
335
|
+
data: Image data as string or bytes
|
336
|
+
format: Image format (svg, png, jpg, jpeg, gif, pdf, html)
|
337
|
+
|
338
|
+
Raises:
|
339
|
+
ValueError: If format is not supported
|
340
|
+
RuntimeError: If file opening fails
|
341
|
+
OSError: If platform is not supported
|
342
|
+
"""
|
343
|
+
# Validate format to prevent injection attacks
|
344
|
+
validated_format = _validate_image_format(format)
|
345
|
+
|
346
|
+
# Create a temporary file with validated extension
|
347
|
+
tmp_path = _create_temp_image_file(data, validated_format)
|
348
|
+
|
349
|
+
try:
|
350
|
+
# Open image viewer with secure subprocess call
|
351
|
+
_open_image_viewer(tmp_path)
|
352
|
+
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e:
|
353
|
+
# Clean up temp file on error
|
354
|
+
_cleanup_temp_file(tmp_path)
|
355
|
+
raise RuntimeError(f"Failed to open file: {e}")
|
356
|
+
|
357
|
+
# Optional: Remove the temp file after a delay
|
358
|
+
time.sleep(2) # Wait for viewer to open
|
359
|
+
_cleanup_temp_file(tmp_path)
|
360
|
+
|
361
|
+
|
362
|
+
def update_config_from_dict(
|
363
|
+
struct: msgspec.Struct, data: dict[str, Any]
|
364
|
+
) -> msgspec.Struct:
|
365
|
+
"""
|
366
|
+
Updates a msgspec.Struct instance with values from a dictionary.
|
367
|
+
Handles nested msgspec.Struct objects and nested dictionaries.
|
368
|
+
|
369
|
+
Args:
|
370
|
+
obj: The msgspec.Struct object to update
|
371
|
+
update_dict: Dictionary containing update values
|
372
|
+
|
373
|
+
Returns:
|
374
|
+
Updated msgspec.Struct instance
|
375
|
+
"""
|
376
|
+
# Convert the struct to a dictionary for easier manipulation
|
377
|
+
obj_dict = msgspec.to_builtins(struct)
|
378
|
+
|
379
|
+
# Update the dictionary recursively
|
380
|
+
for key, value in data.items():
|
381
|
+
if key in obj_dict:
|
382
|
+
if isinstance(value, dict) and isinstance(obj_dict[key], dict):
|
383
|
+
# Handle nested dictionaries
|
384
|
+
obj_dict[key] = update_nested_dict(obj_dict[key], value)
|
385
|
+
else:
|
386
|
+
# Direct update for non-nested values
|
387
|
+
obj_dict[key] = value
|
388
|
+
|
389
|
+
# Convert back to the original struct type
|
390
|
+
return msgspec.convert(obj_dict, type(struct))
|
391
|
+
|
392
|
+
|
393
|
+
def update_nested_dict(
|
394
|
+
original: dict[str, Any], updates: dict[str, Any]
|
395
|
+
) -> dict[str, Any]:
|
396
|
+
"""Helper function to update nested dictionaries"""
|
397
|
+
result = original.copy()
|
398
|
+
for key, value in updates.items():
|
399
|
+
if key in result and isinstance(value, dict) and isinstance(result[key], dict):
|
400
|
+
# Recursively update nested dictionaries
|
401
|
+
result[key] = update_nested_dict(result[key], value)
|
402
|
+
else:
|
403
|
+
# Direct update
|
404
|
+
result[key] = value
|
405
|
+
return result
|
406
|
+
|
407
|
+
|
408
|
+
def get_filesystem(fs: AbstractFileSystem | None = None, fs_type: str = "file") -> AbstractFileSystem:
|
409
|
+
"""
|
410
|
+
Helper function to get a filesystem instance.
|
411
|
+
|
412
|
+
Args:
|
413
|
+
fs: An optional filesystem instance to use. If provided, this will be returned directly.
|
414
|
+
fs_type: The type of filesystem to create if fs is None. Defaults to "file".
|
415
|
+
|
416
|
+
Returns:
|
417
|
+
An AbstractFileSystem instance.
|
418
|
+
"""
|
419
|
+
if fs is None:
|
420
|
+
fs = filesystem(fs_type)
|
421
|
+
return fs
|
@@ -1,420 +0,0 @@
|
|
1
|
-
import importlib
|
2
|
-
import os
|
3
|
-
import subprocess
|
4
|
-
import tempfile
|
5
|
-
import time
|
6
|
-
# from collections.abc import Iterable
|
7
|
-
from typing import Any
|
8
|
-
|
9
|
-
import msgspec
|
10
|
-
from fsspec_utils import AbstractFileSystem, filesystem
|
11
|
-
from .security import validate_file_path
|
12
|
-
|
13
|
-
if importlib.util.find_spec("joblib"):
|
14
|
-
from joblib import Parallel, delayed
|
15
|
-
from rich.progress import (BarColumn, Progress, TextColumn,
|
16
|
-
TimeElapsedColumn)
|
17
|
-
|
18
|
-
def _prepare_parallel_args(
|
19
|
-
args: tuple, kwargs: dict
|
20
|
-
) -> tuple[list, list, dict, dict, int]:
|
21
|
-
"""Prepare and validate arguments for parallel execution.
|
22
|
-
|
23
|
-
Args:
|
24
|
-
args: Positional arguments
|
25
|
-
kwargs: Keyword arguments
|
26
|
-
|
27
|
-
Returns:
|
28
|
-
tuple: (iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len)
|
29
|
-
|
30
|
-
Raises:
|
31
|
-
ValueError: If no iterable arguments or length mismatch
|
32
|
-
"""
|
33
|
-
iterables = []
|
34
|
-
fixed_args = []
|
35
|
-
iterable_kwargs = {}
|
36
|
-
fixed_kwargs = {}
|
37
|
-
first_iterable_len = None
|
38
|
-
|
39
|
-
# Process positional arguments
|
40
|
-
for arg in args:
|
41
|
-
if isinstance(arg, (list, tuple)) and not isinstance(arg[0], (list, tuple)):
|
42
|
-
iterables.append(arg)
|
43
|
-
if first_iterable_len is None:
|
44
|
-
first_iterable_len = len(arg)
|
45
|
-
elif len(arg) != first_iterable_len:
|
46
|
-
raise ValueError(
|
47
|
-
f"Iterable length mismatch: argument has length {len(arg)}, expected {first_iterable_len}"
|
48
|
-
)
|
49
|
-
else:
|
50
|
-
fixed_args.append(arg)
|
51
|
-
|
52
|
-
# Process keyword arguments
|
53
|
-
for key, value in kwargs.items():
|
54
|
-
if isinstance(value, (list, tuple)) and not isinstance(
|
55
|
-
value[0], (list, tuple)
|
56
|
-
):
|
57
|
-
if first_iterable_len is None:
|
58
|
-
first_iterable_len = len(value)
|
59
|
-
elif len(value) != first_iterable_len:
|
60
|
-
raise ValueError(
|
61
|
-
f"Iterable length mismatch: {key} has length {len(value)}, expected {first_iterable_len}"
|
62
|
-
)
|
63
|
-
iterable_kwargs[key] = value
|
64
|
-
else:
|
65
|
-
fixed_kwargs[key] = value
|
66
|
-
|
67
|
-
if first_iterable_len is None:
|
68
|
-
raise ValueError("At least one iterable argument is required")
|
69
|
-
|
70
|
-
return iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len
|
71
|
-
|
72
|
-
def _execute_parallel_with_progress(
|
73
|
-
func: callable,
|
74
|
-
iterables: list,
|
75
|
-
fixed_args: list,
|
76
|
-
iterable_kwargs: dict,
|
77
|
-
fixed_kwargs: dict,
|
78
|
-
param_combinations: list,
|
79
|
-
parallel_kwargs: dict,
|
80
|
-
) -> list:
|
81
|
-
"""Execute parallel tasks with progress tracking.
|
82
|
-
|
83
|
-
Args:
|
84
|
-
func: Function to execute
|
85
|
-
iterables: List of iterable arguments
|
86
|
-
fixed_args: List of fixed arguments
|
87
|
-
iterable_kwargs: Dictionary of iterable keyword arguments
|
88
|
-
fixed_kwargs: Dictionary of fixed keyword arguments
|
89
|
-
param_combinations: List of parameter combinations
|
90
|
-
parallel_kwargs: Parallel execution configuration
|
91
|
-
|
92
|
-
Returns:
|
93
|
-
list: Results from parallel execution
|
94
|
-
"""
|
95
|
-
results = [None] * len(param_combinations)
|
96
|
-
with Progress(
|
97
|
-
TextColumn("[progress.description]{task.description}"),
|
98
|
-
BarColumn(),
|
99
|
-
"[progress.percentage]{task.percentage:>3.0f}%",
|
100
|
-
TimeElapsedColumn(),
|
101
|
-
transient=True,
|
102
|
-
) as progress:
|
103
|
-
task = progress.add_task(
|
104
|
-
"Running in parallel...", total=len(param_combinations)
|
105
|
-
)
|
106
|
-
|
107
|
-
def wrapper(idx, param_tuple):
|
108
|
-
res = func(
|
109
|
-
*(list(param_tuple[: len(iterables)]) + fixed_args),
|
110
|
-
**{
|
111
|
-
k: v
|
112
|
-
for k, v in zip(
|
113
|
-
iterable_kwargs.keys(), param_tuple[len(iterables) :]
|
114
|
-
)
|
115
|
-
},
|
116
|
-
**fixed_kwargs,
|
117
|
-
)
|
118
|
-
progress.update(task, advance=1)
|
119
|
-
return idx, res
|
120
|
-
|
121
|
-
for idx, result in Parallel(**parallel_kwargs)(
|
122
|
-
delayed(wrapper)(i, param_tuple)
|
123
|
-
for i, param_tuple in enumerate(param_combinations)
|
124
|
-
):
|
125
|
-
results[idx] = result
|
126
|
-
return results
|
127
|
-
|
128
|
-
def _execute_parallel_without_progress(
|
129
|
-
func: callable,
|
130
|
-
iterables: list,
|
131
|
-
fixed_args: list,
|
132
|
-
iterable_kwargs: dict,
|
133
|
-
fixed_kwargs: dict,
|
134
|
-
param_combinations: list,
|
135
|
-
parallel_kwargs: dict,
|
136
|
-
) -> list:
|
137
|
-
"""Execute parallel tasks without progress tracking.
|
138
|
-
|
139
|
-
Args:
|
140
|
-
func: Function to execute
|
141
|
-
iterables: List of iterable arguments
|
142
|
-
fixed_args: List of fixed arguments
|
143
|
-
iterable_kwargs: Dictionary of iterable keyword arguments
|
144
|
-
fixed_kwargs: Dictionary of fixed keyword arguments
|
145
|
-
param_combinations: List of parameter combinations
|
146
|
-
parallel_kwargs: Parallel execution configuration
|
147
|
-
|
148
|
-
Returns:
|
149
|
-
list: Results from parallel execution
|
150
|
-
"""
|
151
|
-
return Parallel(**parallel_kwargs)(
|
152
|
-
delayed(func)(
|
153
|
-
*(list(param_tuple[: len(iterables)]) + fixed_args),
|
154
|
-
**{
|
155
|
-
k: v
|
156
|
-
for k, v in zip(
|
157
|
-
iterable_kwargs.keys(), param_tuple[len(iterables) :]
|
158
|
-
)
|
159
|
-
},
|
160
|
-
**fixed_kwargs,
|
161
|
-
)
|
162
|
-
for param_tuple in param_combinations
|
163
|
-
)
|
164
|
-
|
165
|
-
def run_parallel(
|
166
|
-
func: callable,
|
167
|
-
*args,
|
168
|
-
n_jobs: int = -1,
|
169
|
-
backend: str = "threading",
|
170
|
-
verbose: bool = True,
|
171
|
-
**kwargs,
|
172
|
-
) -> list[any]:
|
173
|
-
"""Runs a function for a list of parameters in parallel.
|
174
|
-
|
175
|
-
Args:
|
176
|
-
func (Callable): function to run in parallel
|
177
|
-
*args: Positional arguments. Can be single values or iterables
|
178
|
-
n_jobs (int, optional): Number of joblib workers. Defaults to -1
|
179
|
-
backend (str, optional): joblib backend. Valid options are
|
180
|
-
`loky`,`threading`, `mutliprocessing` or `sequential`. Defaults to "threading"
|
181
|
-
verbose (bool, optional): Show progress bar. Defaults to True
|
182
|
-
**kwargs: Keyword arguments. Can be single values or iterables
|
183
|
-
|
184
|
-
Returns:
|
185
|
-
list[any]: Function output
|
186
|
-
|
187
|
-
Examples:
|
188
|
-
>>> # Single iterable argument
|
189
|
-
>>> run_parallel(func, [1,2,3], fixed_arg=42)
|
190
|
-
|
191
|
-
>>> # Multiple iterables in args and kwargs
|
192
|
-
>>> run_parallel(func, [1,2,3], val=[7,8,9], fixed=42)
|
193
|
-
|
194
|
-
>>> # Only kwargs iterables
|
195
|
-
>>> run_parallel(func, x=[1,2,3], y=[4,5,6], fixed=42)
|
196
|
-
"""
|
197
|
-
parallel_kwargs = {"n_jobs": n_jobs, "backend": backend, "verbose": 0}
|
198
|
-
|
199
|
-
# Prepare and validate arguments
|
200
|
-
iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len = _prepare_parallel_args(
|
201
|
-
args, kwargs
|
202
|
-
)
|
203
|
-
|
204
|
-
# Create parameter combinations
|
205
|
-
all_iterables = iterables + list(iterable_kwargs.values())
|
206
|
-
param_combinations = list(zip(*all_iterables))
|
207
|
-
|
208
|
-
# Execute with or without progress tracking
|
209
|
-
if not verbose:
|
210
|
-
return _execute_parallel_without_progress(
|
211
|
-
func, iterables, fixed_args, iterable_kwargs, fixed_kwargs,
|
212
|
-
param_combinations, parallel_kwargs
|
213
|
-
)
|
214
|
-
else:
|
215
|
-
return _execute_parallel_with_progress(
|
216
|
-
func, iterables, fixed_args, iterable_kwargs, fixed_kwargs,
|
217
|
-
param_combinations, parallel_kwargs
|
218
|
-
)
|
219
|
-
|
220
|
-
else:
|
221
|
-
|
222
|
-
def run_parallel(*args, **kwargs):
|
223
|
-
raise ImportError("joblib not installed")
|
224
|
-
|
225
|
-
|
226
|
-
def get_partitions_from_path(
|
227
|
-
path: str, partitioning: str | list[str] | None = None
|
228
|
-
) -> list[tuple]:
|
229
|
-
"""Get the dataset partitions from the file path.
|
230
|
-
|
231
|
-
Args:
|
232
|
-
path (str): File path.
|
233
|
-
partitioning (str | list[str] | None, optional): Partitioning type. Defaults to None.
|
234
|
-
|
235
|
-
Returns:
|
236
|
-
list[tuple]: Partitions.
|
237
|
-
"""
|
238
|
-
if "." in path:
|
239
|
-
path = os.path.dirname(path)
|
240
|
-
|
241
|
-
parts = path.split("/")
|
242
|
-
|
243
|
-
if isinstance(partitioning, str):
|
244
|
-
if partitioning == "hive":
|
245
|
-
return [tuple(p.split("=")) for p in parts if "=" in p]
|
246
|
-
|
247
|
-
else:
|
248
|
-
return [
|
249
|
-
(partitioning, parts[0]),
|
250
|
-
]
|
251
|
-
else:
|
252
|
-
return list(zip(partitioning, parts[-len(partitioning) :]))
|
253
|
-
|
254
|
-
|
255
|
-
def _validate_image_format(format: str) -> str:
|
256
|
-
"""Validate image format to prevent injection attacks.
|
257
|
-
|
258
|
-
Args:
|
259
|
-
format: Image format to validate
|
260
|
-
|
261
|
-
Returns:
|
262
|
-
str: Validated format
|
263
|
-
|
264
|
-
Raises:
|
265
|
-
ValueError: If format is not supported
|
266
|
-
"""
|
267
|
-
allowed_formats = {"svg", "png", "jpg", "jpeg", "gif", "pdf", "html"}
|
268
|
-
if format not in allowed_formats:
|
269
|
-
raise ValueError(f"Unsupported format: {format}. Allowed: {allowed_formats}")
|
270
|
-
return format
|
271
|
-
|
272
|
-
def _create_temp_image_file(data: str | bytes, format: str) -> str:
|
273
|
-
"""Create a temporary file with image data.
|
274
|
-
|
275
|
-
Args:
|
276
|
-
data: Image data as string or bytes
|
277
|
-
format: Validated image format
|
278
|
-
|
279
|
-
Returns:
|
280
|
-
str: Path to temporary file
|
281
|
-
|
282
|
-
Raises:
|
283
|
-
OSError: If file creation fails
|
284
|
-
"""
|
285
|
-
with tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False) as tmp:
|
286
|
-
if isinstance(data, str):
|
287
|
-
tmp.write(data.encode('utf-8'))
|
288
|
-
else:
|
289
|
-
tmp.write(data)
|
290
|
-
tmp_path = tmp.name
|
291
|
-
|
292
|
-
# Validate the temporary file path for security
|
293
|
-
validate_file_path(tmp_path, allow_relative=False)
|
294
|
-
return tmp_path
|
295
|
-
|
296
|
-
def _open_image_viewer(tmp_path: str) -> None:
|
297
|
-
"""Open image viewer with the given file path.
|
298
|
-
|
299
|
-
Args:
|
300
|
-
tmp_path: Path to temporary image file
|
301
|
-
|
302
|
-
Raises:
|
303
|
-
OSError: If platform is not supported
|
304
|
-
subprocess.CalledProcessError: If subprocess fails
|
305
|
-
subprocess.TimeoutExpired: If subprocess times out
|
306
|
-
"""
|
307
|
-
import platform
|
308
|
-
platform_system = platform.system()
|
309
|
-
|
310
|
-
if platform_system == "Darwin": # macOS
|
311
|
-
subprocess.run(["open", tmp_path], check=True, timeout=10)
|
312
|
-
elif platform_system == "Linux":
|
313
|
-
subprocess.run(["xdg-open", tmp_path], check=True, timeout=10)
|
314
|
-
elif platform_system == "Windows":
|
315
|
-
subprocess.run(["start", "", tmp_path], shell=True, check=True, timeout=10)
|
316
|
-
else:
|
317
|
-
raise OSError(f"Unsupported platform: {platform_system}")
|
318
|
-
|
319
|
-
def _cleanup_temp_file(tmp_path: str) -> None:
|
320
|
-
"""Clean up temporary file.
|
321
|
-
|
322
|
-
Args:
|
323
|
-
tmp_path: Path to temporary file to remove
|
324
|
-
"""
|
325
|
-
try:
|
326
|
-
os.unlink(tmp_path)
|
327
|
-
except OSError:
|
328
|
-
pass # File might already be deleted or in use
|
329
|
-
|
330
|
-
def view_img(data: str | bytes, format: str = "svg"):
|
331
|
-
"""View image data using the system's default image viewer.
|
332
|
-
|
333
|
-
Args:
|
334
|
-
data: Image data as string or bytes
|
335
|
-
format: Image format (svg, png, jpg, jpeg, gif, pdf, html)
|
336
|
-
|
337
|
-
Raises:
|
338
|
-
ValueError: If format is not supported
|
339
|
-
RuntimeError: If file opening fails
|
340
|
-
OSError: If platform is not supported
|
341
|
-
"""
|
342
|
-
# Validate format to prevent injection attacks
|
343
|
-
validated_format = _validate_image_format(format)
|
344
|
-
|
345
|
-
# Create a temporary file with validated extension
|
346
|
-
tmp_path = _create_temp_image_file(data, validated_format)
|
347
|
-
|
348
|
-
try:
|
349
|
-
# Open image viewer with secure subprocess call
|
350
|
-
_open_image_viewer(tmp_path)
|
351
|
-
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e:
|
352
|
-
# Clean up temp file on error
|
353
|
-
_cleanup_temp_file(tmp_path)
|
354
|
-
raise RuntimeError(f"Failed to open file: {e}")
|
355
|
-
|
356
|
-
# Optional: Remove the temp file after a delay
|
357
|
-
time.sleep(2) # Wait for viewer to open
|
358
|
-
_cleanup_temp_file(tmp_path)
|
359
|
-
|
360
|
-
|
361
|
-
def update_config_from_dict(
|
362
|
-
struct: msgspec.Struct, data: dict[str, Any]
|
363
|
-
) -> msgspec.Struct:
|
364
|
-
"""
|
365
|
-
Updates a msgspec.Struct instance with values from a dictionary.
|
366
|
-
Handles nested msgspec.Struct objects and nested dictionaries.
|
367
|
-
|
368
|
-
Args:
|
369
|
-
obj: The msgspec.Struct object to update
|
370
|
-
update_dict: Dictionary containing update values
|
371
|
-
|
372
|
-
Returns:
|
373
|
-
Updated msgspec.Struct instance
|
374
|
-
"""
|
375
|
-
# Convert the struct to a dictionary for easier manipulation
|
376
|
-
obj_dict = msgspec.to_builtins(struct)
|
377
|
-
|
378
|
-
# Update the dictionary recursively
|
379
|
-
for key, value in data.items():
|
380
|
-
if key in obj_dict:
|
381
|
-
if isinstance(value, dict) and isinstance(obj_dict[key], dict):
|
382
|
-
# Handle nested dictionaries
|
383
|
-
obj_dict[key] = update_nested_dict(obj_dict[key], value)
|
384
|
-
else:
|
385
|
-
# Direct update for non-nested values
|
386
|
-
obj_dict[key] = value
|
387
|
-
|
388
|
-
# Convert back to the original struct type
|
389
|
-
return msgspec.convert(obj_dict, type(struct))
|
390
|
-
|
391
|
-
|
392
|
-
def update_nested_dict(
|
393
|
-
original: dict[str, Any], updates: dict[str, Any]
|
394
|
-
) -> dict[str, Any]:
|
395
|
-
"""Helper function to update nested dictionaries"""
|
396
|
-
result = original.copy()
|
397
|
-
for key, value in updates.items():
|
398
|
-
if key in result and isinstance(value, dict) and isinstance(result[key], dict):
|
399
|
-
# Recursively update nested dictionaries
|
400
|
-
result[key] = update_nested_dict(result[key], value)
|
401
|
-
else:
|
402
|
-
# Direct update
|
403
|
-
result[key] = value
|
404
|
-
return result
|
405
|
-
|
406
|
-
|
407
|
-
def get_filesystem(fs: AbstractFileSystem | None = None, fs_type: str = "file") -> AbstractFileSystem:
|
408
|
-
"""
|
409
|
-
Helper function to get a filesystem instance.
|
410
|
-
|
411
|
-
Args:
|
412
|
-
fs: An optional filesystem instance to use. If provided, this will be returned directly.
|
413
|
-
fs_type: The type of filesystem to create if fs is None. Defaults to "file".
|
414
|
-
|
415
|
-
Returns:
|
416
|
-
An AbstractFileSystem instance.
|
417
|
-
"""
|
418
|
-
if fs is None:
|
419
|
-
fs = filesystem(fs_type)
|
420
|
-
return fs
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|