FlowerPower 0.31.0__tar.gz → 0.31.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {flowerpower-0.31.0/src/FlowerPower.egg-info → flowerpower-0.31.2}/PKG-INFO +1 -1
  2. {flowerpower-0.31.0 → flowerpower-0.31.2}/pyproject.toml +2 -1
  3. {flowerpower-0.31.0 → flowerpower-0.31.2/src/FlowerPower.egg-info}/PKG-INFO +1 -1
  4. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/pipeline/manager.py +34 -5
  5. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/pipeline/visualizer.py +25 -11
  6. flowerpower-0.31.2/src/flowerpower/utils/misc.py +421 -0
  7. flowerpower-0.31.0/src/flowerpower/utils/misc.py +0 -420
  8. {flowerpower-0.31.0 → flowerpower-0.31.2}/LICENSE +0 -0
  9. {flowerpower-0.31.0 → flowerpower-0.31.2}/README.md +0 -0
  10. {flowerpower-0.31.0 → flowerpower-0.31.2}/setup.cfg +0 -0
  11. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/FlowerPower.egg-info/SOURCES.txt +0 -0
  12. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/FlowerPower.egg-info/dependency_links.txt +0 -0
  13. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/FlowerPower.egg-info/entry_points.txt +0 -0
  14. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/FlowerPower.egg-info/requires.txt +0 -0
  15. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/FlowerPower.egg-info/top_level.txt +0 -0
  16. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/__init__.py +0 -0
  17. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/__init__.py +0 -0
  18. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/base.py +0 -0
  19. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/exceptions.py +0 -0
  20. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/pipeline/__init__.py +0 -0
  21. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/pipeline/adapter.py +0 -0
  22. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/pipeline/builder.py +0 -0
  23. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/pipeline/builder_adapter.py +0 -0
  24. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/pipeline/builder_executor.py +0 -0
  25. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/pipeline/run.py +0 -0
  26. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/project/__init__.py +0 -0
  27. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cfg/project/adapter.py +0 -0
  28. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cli/__init__.py +0 -0
  29. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cli/cfg.py +0 -0
  30. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cli/pipeline.py +0 -0
  31. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/cli/utils.py +0 -0
  32. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/flowerpower.py +0 -0
  33. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/pipeline/__init__.py +0 -0
  34. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/pipeline/base.py +0 -0
  35. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/pipeline/config_manager.py +0 -0
  36. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/pipeline/executor.py +0 -0
  37. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/pipeline/io.py +0 -0
  38. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/pipeline/lifecycle_manager.py +0 -0
  39. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/pipeline/pipeline.py +0 -0
  40. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/pipeline/registry.py +0 -0
  41. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/plugins/io/__init__.py +0 -0
  42. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/settings/__init__.py +0 -0
  43. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/settings/_backend.py +0 -0
  44. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/settings/executor.py +0 -0
  45. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/settings/general.py +0 -0
  46. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/settings/hamilton.py +0 -0
  47. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/settings/logging.py +0 -0
  48. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/settings/retry.py +0 -0
  49. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/__init__.py +0 -0
  50. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/adapter.py +0 -0
  51. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/callback.py +0 -0
  52. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/config.py +0 -0
  53. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/executor.py +0 -0
  54. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/filesystem.py +0 -0
  55. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/logging.py +0 -0
  56. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/monkey.py +0 -0
  57. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/open_telemetry.py +0 -0
  58. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/security.py +0 -0
  59. {flowerpower-0.31.0 → flowerpower-0.31.2}/src/flowerpower/utils/templates.py +0 -0
  60. {flowerpower-0.31.0 → flowerpower-0.31.2}/tests/test_flowerpower_project.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.31.0
3
+ Version: 0.31.2
4
4
  Summary: A simple workflow framework for building and managing data processing pipelines
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -4,7 +4,7 @@ description = "A simple workflow framework for building and managing data proces
4
4
  authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
5
5
  readme = "README.md"
6
6
  requires-python = ">= 3.11"
7
- version = "0.31.0"
7
+ version = "0.31.2"
8
8
  keywords = ["hamilton", "workflow", "pipeline", "scheduler", "dask", "ray"]
9
9
 
10
10
  dependencies = [
@@ -76,6 +76,7 @@ dev-dependencies = [
76
76
  "numpy>=2.3.3",
77
77
  "matplotlib>=3.10.6",
78
78
  "seaborn>=0.13.2",
79
+ "pyyaml>=6.0.3",
79
80
  ]
80
81
  package = true
81
82
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.31.0
3
+ Version: 0.31.2
4
4
  Summary: A simple workflow framework for building and managing data processing pipelines
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -580,11 +580,32 @@ class PipelineManager:
580
580
  """
581
581
  return self._lifecycle_manager.list_pipelines()
582
582
 
583
- def show_pipelines(self) -> None:
584
- """Display all available pipelines in a formatted table.
583
+ def show_pipelines(self, format: str = "table") -> None:
584
+ """Display all available pipelines in a selected format.
585
585
 
586
- Uses rich formatting for terminal display.
586
+ Args:
587
+ format: One of "table", "json", or "yaml". Defaults to "table".
587
588
  """
589
+ fmt = (format or "table").lower()
590
+ if fmt == "table":
591
+ return self.registry.show_pipelines()
592
+ names = self._lifecycle_manager.list_pipelines()
593
+ try:
594
+ if fmt == "json":
595
+ import json
596
+ print(json.dumps(names))
597
+ return None
598
+ if fmt == "yaml":
599
+ import yaml # type: ignore
600
+ print(yaml.safe_dump(names, sort_keys=False))
601
+ return None
602
+ except Exception as e:
603
+ warnings.warn(
604
+ f"Failed to format pipelines as {fmt}: {e}. Falling back to table.",
605
+ RuntimeWarning,
606
+ )
607
+ return self.registry.show_pipelines()
608
+ warnings.warn(f"Unknown format '{format}', using table.", RuntimeWarning)
588
609
  return self.registry.show_pipelines()
589
610
 
590
611
  @property
@@ -985,7 +1006,13 @@ class PipelineManager:
985
1006
  )
986
1007
 
987
1008
  # Visualizer Delegations
988
- def save_dag(self, name: str, format: str = "png", reload: bool = False) -> None:
1009
+ def save_dag(
1010
+ self,
1011
+ name: str,
1012
+ format: str = "png",
1013
+ reload: bool = False,
1014
+ output_path: str | None = None,
1015
+ ) -> str:
989
1016
  """Save pipeline DAG visualization to a file.
990
1017
 
991
1018
  Creates a visual representation of the pipeline's directed acyclic graph (DAG)
@@ -1020,7 +1047,9 @@ class PipelineManager:
1020
1047
  ... reload=True
1021
1048
  ... )
1022
1049
  """
1023
- self.visualizer.save_dag(name=name, format=format, reload=reload)
1050
+ return self.visualizer.save_dag(
1051
+ name=name, format=format, reload=reload, output_path=output_path
1052
+ )
1024
1053
 
1025
1054
  def show_dag(
1026
1055
  self, name: str, format: str = "png", reload: bool = False, raw: bool = False
@@ -67,7 +67,8 @@ class PipelineVisualizer:
67
67
  name: str,
68
68
  format: str = "png",
69
69
  reload: bool = False,
70
- ):
70
+ output_path: str | None = None,
71
+ ) -> str:
71
72
  """
72
73
  Save an image of the graph of functions for a given pipeline name.
73
74
 
@@ -86,25 +87,38 @@ class PipelineVisualizer:
86
87
  """
87
88
  dag = self._get_dag_object(name=name, reload=reload)
88
89
 
89
- # Use project_cfg attributes for path and filesystem access
90
- graph_dir = posixpath.join(self.project_cfg.base_dir, "graphs")
91
- self._fs.makedirs(graph_dir, exist_ok=True)
92
-
93
- output_path = posixpath.join(
94
- graph_dir, name
95
- ) # Output filename is just the pipeline name
96
- output_path_with_ext = f"{output_path}.{format}"
90
+ # Determine final output path
91
+ if output_path is None:
92
+ graph_dir = posixpath.join(self.project_cfg.base_dir, "graphs")
93
+ self._fs.makedirs(graph_dir, exist_ok=True)
94
+ base = posixpath.join(graph_dir, name)
95
+ final_path = f"{base}.{format}"
96
+ render_path = base
97
+ else:
98
+ # If output_path already has an extension, use as-is; otherwise append format
99
+ if "." in posixpath.basename(output_path):
100
+ final_path = output_path
101
+ # Remove extension for graphviz render base path
102
+ render_path = final_path.rsplit(".", 1)[0]
103
+ fmt = final_path.rsplit(".", 1)[1]
104
+ if fmt != format:
105
+ # Honor explicit extension if it differs from format argument
106
+ format = fmt
107
+ else:
108
+ final_path = f"{output_path}.{format}"
109
+ render_path = output_path
97
110
 
98
111
  # Render the DAG using the graphviz object returned by display_all_functions
99
112
  dag.render(
100
- output_path, # graphviz appends the format automatically
113
+ render_path, # graphviz appends the format automatically
101
114
  format=format,
102
115
  cleanup=True,
103
116
  view=False,
104
117
  )
105
118
  print(
106
- f"📊 Saved graph for [bold blue]{self.project_cfg.name}.{name}[/bold blue] to [green]{output_path_with_ext}[/green]"
119
+ f"📊 Saved graph for [bold blue]{self.project_cfg.name}.{name}[/bold blue] to [green]{final_path}[/green]"
107
120
  )
121
+ return final_path
108
122
 
109
123
  def show_dag(
110
124
  self,
@@ -0,0 +1,421 @@
1
+ import importlib
2
+ import os
3
+ import subprocess
4
+ import tempfile
5
+ import time
6
+ # from collections.abc import Iterable
7
+ from typing import Any
8
+
9
+ import msgspec
10
+ from fsspec_utils import AbstractFileSystem, filesystem
11
+ from .security import validate_file_path
12
+ from fsspec_utils.utils import run_parallel
13
+
14
+ # if importlib.util.find_spec("joblib"):
15
+ # from joblib import Parallel, delayed
16
+ # from rich.progress import (BarColumn, Progress, TextColumn,
17
+ # TimeElapsedColumn)
18
+
19
+ # def _prepare_parallel_args(
20
+ # args: tuple, kwargs: dict
21
+ # ) -> tuple[list, list, dict, dict, int]:
22
+ # """Prepare and validate arguments for parallel execution.
23
+
24
+ # Args:
25
+ # args: Positional arguments
26
+ # kwargs: Keyword arguments
27
+
28
+ # Returns:
29
+ # tuple: (iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len)
30
+
31
+ # Raises:
32
+ # ValueError: If no iterable arguments or length mismatch
33
+ # """
34
+ # iterables = []
35
+ # fixed_args = []
36
+ # iterable_kwargs = {}
37
+ # fixed_kwargs = {}
38
+ # first_iterable_len = None
39
+
40
+ # # Process positional arguments
41
+ # for arg in args:
42
+ # if isinstance(arg, (list, tuple)) and not isinstance(arg[0], (list, tuple)):
43
+ # iterables.append(arg)
44
+ # if first_iterable_len is None:
45
+ # first_iterable_len = len(arg)
46
+ # elif len(arg) != first_iterable_len:
47
+ # raise ValueError(
48
+ # f"Iterable length mismatch: argument has length {len(arg)}, expected {first_iterable_len}"
49
+ # )
50
+ # else:
51
+ # fixed_args.append(arg)
52
+
53
+ # # Process keyword arguments
54
+ # for key, value in kwargs.items():
55
+ # if isinstance(value, (list, tuple)) and not isinstance(
56
+ # value[0], (list, tuple)
57
+ # ):
58
+ # if first_iterable_len is None:
59
+ # first_iterable_len = len(value)
60
+ # elif len(value) != first_iterable_len:
61
+ # raise ValueError(
62
+ # f"Iterable length mismatch: {key} has length {len(value)}, expected {first_iterable_len}"
63
+ # )
64
+ # iterable_kwargs[key] = value
65
+ # else:
66
+ # fixed_kwargs[key] = value
67
+
68
+ # if first_iterable_len is None:
69
+ # raise ValueError("At least one iterable argument is required")
70
+
71
+ # return iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len
72
+
73
+ # def _execute_parallel_with_progress(
74
+ # func: callable,
75
+ # iterables: list,
76
+ # fixed_args: list,
77
+ # iterable_kwargs: dict,
78
+ # fixed_kwargs: dict,
79
+ # param_combinations: list,
80
+ # parallel_kwargs: dict,
81
+ # ) -> list:
82
+ # """Execute parallel tasks with progress tracking.
83
+
84
+ # Args:
85
+ # func: Function to execute
86
+ # iterables: List of iterable arguments
87
+ # fixed_args: List of fixed arguments
88
+ # iterable_kwargs: Dictionary of iterable keyword arguments
89
+ # fixed_kwargs: Dictionary of fixed keyword arguments
90
+ # param_combinations: List of parameter combinations
91
+ # parallel_kwargs: Parallel execution configuration
92
+
93
+ # Returns:
94
+ # list: Results from parallel execution
95
+ # """
96
+ # results = [None] * len(param_combinations)
97
+ # with Progress(
98
+ # TextColumn("[progress.description]{task.description}"),
99
+ # BarColumn(),
100
+ # "[progress.percentage]{task.percentage:>3.0f}%",
101
+ # TimeElapsedColumn(),
102
+ # transient=True,
103
+ # ) as progress:
104
+ # task = progress.add_task(
105
+ # "Running in parallel...", total=len(param_combinations)
106
+ # )
107
+
108
+ # def wrapper(idx, param_tuple):
109
+ # res = func(
110
+ # *(list(param_tuple[: len(iterables)]) + fixed_args),
111
+ # **{
112
+ # k: v
113
+ # for k, v in zip(
114
+ # iterable_kwargs.keys(), param_tuple[len(iterables) :]
115
+ # )
116
+ # },
117
+ # **fixed_kwargs,
118
+ # )
119
+ # progress.update(task, advance=1)
120
+ # return idx, res
121
+ #
122
+ # for idx, result in Parallel(**parallel_kwargs)(
123
+ # delayed(wrapper)(i, param_tuple)
124
+ # for i, param_tuple in enumerate(param_combinations)
125
+ # ):
126
+ # results[idx] = result
127
+ # return results
128
+
129
+ # def _execute_parallel_without_progress(
130
+ # func: callable,
131
+ # iterables: list,
132
+ # fixed_args: list,
133
+ # iterable_kwargs: dict,
134
+ # fixed_kwargs: dict,
135
+ # param_combinations: list,
136
+ # parallel_kwargs: dict,
137
+ # ) -> list:
138
+ # """Execute parallel tasks without progress tracking.
139
+
140
+ # Args:
141
+ # func: Function to execute
142
+ # iterables: List of iterable arguments
143
+ # fixed_args: List of fixed arguments
144
+ # iterable_kwargs: Dictionary of iterable keyword arguments
145
+ # fixed_kwargs: Dictionary of fixed keyword arguments
146
+ # param_combinations: List of parameter combinations
147
+ # parallel_kwargs: Parallel execution configuration
148
+
149
+ # Returns:
150
+ # list: Results from parallel execution
151
+ # """
152
+ # return Parallel(**parallel_kwargs)(
153
+ # delayed(func)(
154
+ # *(list(param_tuple[: len(iterables)]) + fixed_args),
155
+ # **{
156
+ # k: v
157
+ # for k, v in zip(
158
+ # iterable_kwargs.keys(), param_tuple[len(iterables) :]
159
+ # )
160
+ # },
161
+ # **fixed_kwargs,
162
+ # )
163
+ # for param_tuple in param_combinations
164
+ # )
165
+
166
+ # def run_parallel(
167
+ # func: callable,
168
+ # *args,
169
+ # n_jobs: int = -1,
170
+ # backend: str = "threading",
171
+ # verbose: bool = True,
172
+ # **kwargs,
173
+ # ) -> list[any]:
174
+ # """Runs a function for a list of parameters in parallel.
175
+
176
+ # Args:
177
+ # func (Callable): function to run in parallel
178
+ # *args: Positional arguments. Can be single values or iterables
179
+ # n_jobs (int, optional): Number of joblib workers. Defaults to -1
180
+ # backend (str, optional): joblib backend. Valid options are
181
+ # `loky`,`threading`, `mutliprocessing` or `sequential`. Defaults to "threading"
182
+ # verbose (bool, optional): Show progress bar. Defaults to True
183
+ # **kwargs: Keyword arguments. Can be single values or iterables
184
+
185
+ # Returns:
186
+ # list[any]: Function output
187
+
188
+ # Examples:
189
+ # >>> # Single iterable argument
190
+ # >>> run_parallel(func, [1,2,3], fixed_arg=42)
191
+
192
+ # >>> # Multiple iterables in args and kwargs
193
+ # >>> run_parallel(func, [1,2,3], val=[7,8,9], fixed=42)
194
+
195
+ # >>> # Only kwargs iterables
196
+ # >>> run_parallel(func, x=[1,2,3], y=[4,5,6], fixed=42)
197
+ # """
198
+ # parallel_kwargs = {"n_jobs": n_jobs, "backend": backend, "verbose": 0}
199
+
200
+ # # Prepare and validate arguments
201
+ # iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len = _prepare_parallel_args(
202
+ # args, kwargs
203
+ # )
204
+
205
+ # # Create parameter combinations
206
+ # all_iterables = iterables + list(iterable_kwargs.values())
207
+ # param_combinations = list(zip(*all_iterables))
208
+
209
+ # # Execute with or without progress tracking
210
+ # if not verbose:
211
+ # return _execute_parallel_without_progress(
212
+ # func, iterables, fixed_args, iterable_kwargs, fixed_kwargs,
213
+ # param_combinations, parallel_kwargs
214
+ # )
215
+ # else:
216
+ # return _execute_parallel_with_progress(
217
+ # func, iterables, fixed_args, iterable_kwargs, fixed_kwargs,
218
+ # param_combinations, parallel_kwargs
219
+ # )
220
+
221
+ # else:
222
+
223
+ # def run_parallel(*args, **kwargs):
224
+ # raise ImportError("joblib not installed")
225
+
226
+
227
+ def get_partitions_from_path(
228
+ path: str, partitioning: str | list[str] | None = None
229
+ ) -> list[tuple]:
230
+ """Get the dataset partitions from the file path.
231
+
232
+ Args:
233
+ path (str): File path.
234
+ partitioning (str | list[str] | None, optional): Partitioning type. Defaults to None.
235
+
236
+ Returns:
237
+ list[tuple]: Partitions.
238
+ """
239
+ if "." in path:
240
+ path = os.path.dirname(path)
241
+
242
+ parts = path.split("/")
243
+
244
+ if isinstance(partitioning, str):
245
+ if partitioning == "hive":
246
+ return [tuple(p.split("=")) for p in parts if "=" in p]
247
+
248
+ else:
249
+ return [
250
+ (partitioning, parts[0]),
251
+ ]
252
+ else:
253
+ return list(zip(partitioning, parts[-len(partitioning) :]))
254
+
255
+
256
+ def _validate_image_format(format: str) -> str:
257
+ """Validate image format to prevent injection attacks.
258
+
259
+ Args:
260
+ format: Image format to validate
261
+
262
+ Returns:
263
+ str: Validated format
264
+
265
+ Raises:
266
+ ValueError: If format is not supported
267
+ """
268
+ allowed_formats = {"svg", "png", "jpg", "jpeg", "gif", "pdf", "html"}
269
+ if format not in allowed_formats:
270
+ raise ValueError(f"Unsupported format: {format}. Allowed: {allowed_formats}")
271
+ return format
272
+
273
+ def _create_temp_image_file(data: str | bytes, format: str) -> str:
274
+ """Create a temporary file with image data.
275
+
276
+ Args:
277
+ data: Image data as string or bytes
278
+ format: Validated image format
279
+
280
+ Returns:
281
+ str: Path to temporary file
282
+
283
+ Raises:
284
+ OSError: If file creation fails
285
+ """
286
+ with tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False) as tmp:
287
+ if isinstance(data, str):
288
+ tmp.write(data.encode('utf-8'))
289
+ else:
290
+ tmp.write(data)
291
+ tmp_path = tmp.name
292
+
293
+ # Validate the temporary file path for security
294
+ validate_file_path(tmp_path, allow_relative=False)
295
+ return tmp_path
296
+
297
+ def _open_image_viewer(tmp_path: str) -> None:
298
+ """Open image viewer with the given file path.
299
+
300
+ Args:
301
+ tmp_path: Path to temporary image file
302
+
303
+ Raises:
304
+ OSError: If platform is not supported
305
+ subprocess.CalledProcessError: If subprocess fails
306
+ subprocess.TimeoutExpired: If subprocess times out
307
+ """
308
+ import platform
309
+ platform_system = platform.system()
310
+
311
+ if platform_system == "Darwin": # macOS
312
+ subprocess.run(["open", tmp_path], check=True, timeout=10)
313
+ elif platform_system == "Linux":
314
+ subprocess.run(["xdg-open", tmp_path], check=True, timeout=10)
315
+ elif platform_system == "Windows":
316
+ subprocess.run(["start", "", tmp_path], shell=True, check=True, timeout=10)
317
+ else:
318
+ raise OSError(f"Unsupported platform: {platform_system}")
319
+
320
+ def _cleanup_temp_file(tmp_path: str) -> None:
321
+ """Clean up temporary file.
322
+
323
+ Args:
324
+ tmp_path: Path to temporary file to remove
325
+ """
326
+ try:
327
+ os.unlink(tmp_path)
328
+ except OSError:
329
+ pass # File might already be deleted or in use
330
+
331
+ def view_img(data: str | bytes, format: str = "svg"):
332
+ """View image data using the system's default image viewer.
333
+
334
+ Args:
335
+ data: Image data as string or bytes
336
+ format: Image format (svg, png, jpg, jpeg, gif, pdf, html)
337
+
338
+ Raises:
339
+ ValueError: If format is not supported
340
+ RuntimeError: If file opening fails
341
+ OSError: If platform is not supported
342
+ """
343
+ # Validate format to prevent injection attacks
344
+ validated_format = _validate_image_format(format)
345
+
346
+ # Create a temporary file with validated extension
347
+ tmp_path = _create_temp_image_file(data, validated_format)
348
+
349
+ try:
350
+ # Open image viewer with secure subprocess call
351
+ _open_image_viewer(tmp_path)
352
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e:
353
+ # Clean up temp file on error
354
+ _cleanup_temp_file(tmp_path)
355
+ raise RuntimeError(f"Failed to open file: {e}")
356
+
357
+ # Optional: Remove the temp file after a delay
358
+ time.sleep(2) # Wait for viewer to open
359
+ _cleanup_temp_file(tmp_path)
360
+
361
+
362
+ def update_config_from_dict(
363
+ struct: msgspec.Struct, data: dict[str, Any]
364
+ ) -> msgspec.Struct:
365
+ """
366
+ Updates a msgspec.Struct instance with values from a dictionary.
367
+ Handles nested msgspec.Struct objects and nested dictionaries.
368
+
369
+ Args:
370
+ obj: The msgspec.Struct object to update
371
+ update_dict: Dictionary containing update values
372
+
373
+ Returns:
374
+ Updated msgspec.Struct instance
375
+ """
376
+ # Convert the struct to a dictionary for easier manipulation
377
+ obj_dict = msgspec.to_builtins(struct)
378
+
379
+ # Update the dictionary recursively
380
+ for key, value in data.items():
381
+ if key in obj_dict:
382
+ if isinstance(value, dict) and isinstance(obj_dict[key], dict):
383
+ # Handle nested dictionaries
384
+ obj_dict[key] = update_nested_dict(obj_dict[key], value)
385
+ else:
386
+ # Direct update for non-nested values
387
+ obj_dict[key] = value
388
+
389
+ # Convert back to the original struct type
390
+ return msgspec.convert(obj_dict, type(struct))
391
+
392
+
393
+ def update_nested_dict(
394
+ original: dict[str, Any], updates: dict[str, Any]
395
+ ) -> dict[str, Any]:
396
+ """Helper function to update nested dictionaries"""
397
+ result = original.copy()
398
+ for key, value in updates.items():
399
+ if key in result and isinstance(value, dict) and isinstance(result[key], dict):
400
+ # Recursively update nested dictionaries
401
+ result[key] = update_nested_dict(result[key], value)
402
+ else:
403
+ # Direct update
404
+ result[key] = value
405
+ return result
406
+
407
+
408
+ def get_filesystem(fs: AbstractFileSystem | None = None, fs_type: str = "file") -> AbstractFileSystem:
409
+ """
410
+ Helper function to get a filesystem instance.
411
+
412
+ Args:
413
+ fs: An optional filesystem instance to use. If provided, this will be returned directly.
414
+ fs_type: The type of filesystem to create if fs is None. Defaults to "file".
415
+
416
+ Returns:
417
+ An AbstractFileSystem instance.
418
+ """
419
+ if fs is None:
420
+ fs = filesystem(fs_type)
421
+ return fs
@@ -1,420 +0,0 @@
1
- import importlib
2
- import os
3
- import subprocess
4
- import tempfile
5
- import time
6
- # from collections.abc import Iterable
7
- from typing import Any
8
-
9
- import msgspec
10
- from fsspec_utils import AbstractFileSystem, filesystem
11
- from .security import validate_file_path
12
-
13
- if importlib.util.find_spec("joblib"):
14
- from joblib import Parallel, delayed
15
- from rich.progress import (BarColumn, Progress, TextColumn,
16
- TimeElapsedColumn)
17
-
18
- def _prepare_parallel_args(
19
- args: tuple, kwargs: dict
20
- ) -> tuple[list, list, dict, dict, int]:
21
- """Prepare and validate arguments for parallel execution.
22
-
23
- Args:
24
- args: Positional arguments
25
- kwargs: Keyword arguments
26
-
27
- Returns:
28
- tuple: (iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len)
29
-
30
- Raises:
31
- ValueError: If no iterable arguments or length mismatch
32
- """
33
- iterables = []
34
- fixed_args = []
35
- iterable_kwargs = {}
36
- fixed_kwargs = {}
37
- first_iterable_len = None
38
-
39
- # Process positional arguments
40
- for arg in args:
41
- if isinstance(arg, (list, tuple)) and not isinstance(arg[0], (list, tuple)):
42
- iterables.append(arg)
43
- if first_iterable_len is None:
44
- first_iterable_len = len(arg)
45
- elif len(arg) != first_iterable_len:
46
- raise ValueError(
47
- f"Iterable length mismatch: argument has length {len(arg)}, expected {first_iterable_len}"
48
- )
49
- else:
50
- fixed_args.append(arg)
51
-
52
- # Process keyword arguments
53
- for key, value in kwargs.items():
54
- if isinstance(value, (list, tuple)) and not isinstance(
55
- value[0], (list, tuple)
56
- ):
57
- if first_iterable_len is None:
58
- first_iterable_len = len(value)
59
- elif len(value) != first_iterable_len:
60
- raise ValueError(
61
- f"Iterable length mismatch: {key} has length {len(value)}, expected {first_iterable_len}"
62
- )
63
- iterable_kwargs[key] = value
64
- else:
65
- fixed_kwargs[key] = value
66
-
67
- if first_iterable_len is None:
68
- raise ValueError("At least one iterable argument is required")
69
-
70
- return iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len
71
-
72
- def _execute_parallel_with_progress(
73
- func: callable,
74
- iterables: list,
75
- fixed_args: list,
76
- iterable_kwargs: dict,
77
- fixed_kwargs: dict,
78
- param_combinations: list,
79
- parallel_kwargs: dict,
80
- ) -> list:
81
- """Execute parallel tasks with progress tracking.
82
-
83
- Args:
84
- func: Function to execute
85
- iterables: List of iterable arguments
86
- fixed_args: List of fixed arguments
87
- iterable_kwargs: Dictionary of iterable keyword arguments
88
- fixed_kwargs: Dictionary of fixed keyword arguments
89
- param_combinations: List of parameter combinations
90
- parallel_kwargs: Parallel execution configuration
91
-
92
- Returns:
93
- list: Results from parallel execution
94
- """
95
- results = [None] * len(param_combinations)
96
- with Progress(
97
- TextColumn("[progress.description]{task.description}"),
98
- BarColumn(),
99
- "[progress.percentage]{task.percentage:>3.0f}%",
100
- TimeElapsedColumn(),
101
- transient=True,
102
- ) as progress:
103
- task = progress.add_task(
104
- "Running in parallel...", total=len(param_combinations)
105
- )
106
-
107
- def wrapper(idx, param_tuple):
108
- res = func(
109
- *(list(param_tuple[: len(iterables)]) + fixed_args),
110
- **{
111
- k: v
112
- for k, v in zip(
113
- iterable_kwargs.keys(), param_tuple[len(iterables) :]
114
- )
115
- },
116
- **fixed_kwargs,
117
- )
118
- progress.update(task, advance=1)
119
- return idx, res
120
-
121
- for idx, result in Parallel(**parallel_kwargs)(
122
- delayed(wrapper)(i, param_tuple)
123
- for i, param_tuple in enumerate(param_combinations)
124
- ):
125
- results[idx] = result
126
- return results
127
-
128
- def _execute_parallel_without_progress(
129
- func: callable,
130
- iterables: list,
131
- fixed_args: list,
132
- iterable_kwargs: dict,
133
- fixed_kwargs: dict,
134
- param_combinations: list,
135
- parallel_kwargs: dict,
136
- ) -> list:
137
- """Execute parallel tasks without progress tracking.
138
-
139
- Args:
140
- func: Function to execute
141
- iterables: List of iterable arguments
142
- fixed_args: List of fixed arguments
143
- iterable_kwargs: Dictionary of iterable keyword arguments
144
- fixed_kwargs: Dictionary of fixed keyword arguments
145
- param_combinations: List of parameter combinations
146
- parallel_kwargs: Parallel execution configuration
147
-
148
- Returns:
149
- list: Results from parallel execution
150
- """
151
- return Parallel(**parallel_kwargs)(
152
- delayed(func)(
153
- *(list(param_tuple[: len(iterables)]) + fixed_args),
154
- **{
155
- k: v
156
- for k, v in zip(
157
- iterable_kwargs.keys(), param_tuple[len(iterables) :]
158
- )
159
- },
160
- **fixed_kwargs,
161
- )
162
- for param_tuple in param_combinations
163
- )
164
-
165
- def run_parallel(
166
- func: callable,
167
- *args,
168
- n_jobs: int = -1,
169
- backend: str = "threading",
170
- verbose: bool = True,
171
- **kwargs,
172
- ) -> list[any]:
173
- """Runs a function for a list of parameters in parallel.
174
-
175
- Args:
176
- func (Callable): function to run in parallel
177
- *args: Positional arguments. Can be single values or iterables
178
- n_jobs (int, optional): Number of joblib workers. Defaults to -1
179
- backend (str, optional): joblib backend. Valid options are
180
- `loky`,`threading`, `mutliprocessing` or `sequential`. Defaults to "threading"
181
- verbose (bool, optional): Show progress bar. Defaults to True
182
- **kwargs: Keyword arguments. Can be single values or iterables
183
-
184
- Returns:
185
- list[any]: Function output
186
-
187
- Examples:
188
- >>> # Single iterable argument
189
- >>> run_parallel(func, [1,2,3], fixed_arg=42)
190
-
191
- >>> # Multiple iterables in args and kwargs
192
- >>> run_parallel(func, [1,2,3], val=[7,8,9], fixed=42)
193
-
194
- >>> # Only kwargs iterables
195
- >>> run_parallel(func, x=[1,2,3], y=[4,5,6], fixed=42)
196
- """
197
- parallel_kwargs = {"n_jobs": n_jobs, "backend": backend, "verbose": 0}
198
-
199
- # Prepare and validate arguments
200
- iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len = _prepare_parallel_args(
201
- args, kwargs
202
- )
203
-
204
- # Create parameter combinations
205
- all_iterables = iterables + list(iterable_kwargs.values())
206
- param_combinations = list(zip(*all_iterables))
207
-
208
- # Execute with or without progress tracking
209
- if not verbose:
210
- return _execute_parallel_without_progress(
211
- func, iterables, fixed_args, iterable_kwargs, fixed_kwargs,
212
- param_combinations, parallel_kwargs
213
- )
214
- else:
215
- return _execute_parallel_with_progress(
216
- func, iterables, fixed_args, iterable_kwargs, fixed_kwargs,
217
- param_combinations, parallel_kwargs
218
- )
219
-
220
- else:
221
-
222
- def run_parallel(*args, **kwargs):
223
- raise ImportError("joblib not installed")
224
-
225
-
226
- def get_partitions_from_path(
227
- path: str, partitioning: str | list[str] | None = None
228
- ) -> list[tuple]:
229
- """Get the dataset partitions from the file path.
230
-
231
- Args:
232
- path (str): File path.
233
- partitioning (str | list[str] | None, optional): Partitioning type. Defaults to None.
234
-
235
- Returns:
236
- list[tuple]: Partitions.
237
- """
238
- if "." in path:
239
- path = os.path.dirname(path)
240
-
241
- parts = path.split("/")
242
-
243
- if isinstance(partitioning, str):
244
- if partitioning == "hive":
245
- return [tuple(p.split("=")) for p in parts if "=" in p]
246
-
247
- else:
248
- return [
249
- (partitioning, parts[0]),
250
- ]
251
- else:
252
- return list(zip(partitioning, parts[-len(partitioning) :]))
253
-
254
-
255
- def _validate_image_format(format: str) -> str:
256
- """Validate image format to prevent injection attacks.
257
-
258
- Args:
259
- format: Image format to validate
260
-
261
- Returns:
262
- str: Validated format
263
-
264
- Raises:
265
- ValueError: If format is not supported
266
- """
267
- allowed_formats = {"svg", "png", "jpg", "jpeg", "gif", "pdf", "html"}
268
- if format not in allowed_formats:
269
- raise ValueError(f"Unsupported format: {format}. Allowed: {allowed_formats}")
270
- return format
271
-
272
- def _create_temp_image_file(data: str | bytes, format: str) -> str:
273
- """Create a temporary file with image data.
274
-
275
- Args:
276
- data: Image data as string or bytes
277
- format: Validated image format
278
-
279
- Returns:
280
- str: Path to temporary file
281
-
282
- Raises:
283
- OSError: If file creation fails
284
- """
285
- with tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False) as tmp:
286
- if isinstance(data, str):
287
- tmp.write(data.encode('utf-8'))
288
- else:
289
- tmp.write(data)
290
- tmp_path = tmp.name
291
-
292
- # Validate the temporary file path for security
293
- validate_file_path(tmp_path, allow_relative=False)
294
- return tmp_path
295
-
296
- def _open_image_viewer(tmp_path: str) -> None:
297
- """Open image viewer with the given file path.
298
-
299
- Args:
300
- tmp_path: Path to temporary image file
301
-
302
- Raises:
303
- OSError: If platform is not supported
304
- subprocess.CalledProcessError: If subprocess fails
305
- subprocess.TimeoutExpired: If subprocess times out
306
- """
307
- import platform
308
- platform_system = platform.system()
309
-
310
- if platform_system == "Darwin": # macOS
311
- subprocess.run(["open", tmp_path], check=True, timeout=10)
312
- elif platform_system == "Linux":
313
- subprocess.run(["xdg-open", tmp_path], check=True, timeout=10)
314
- elif platform_system == "Windows":
315
- subprocess.run(["start", "", tmp_path], shell=True, check=True, timeout=10)
316
- else:
317
- raise OSError(f"Unsupported platform: {platform_system}")
318
-
319
- def _cleanup_temp_file(tmp_path: str) -> None:
320
- """Clean up temporary file.
321
-
322
- Args:
323
- tmp_path: Path to temporary file to remove
324
- """
325
- try:
326
- os.unlink(tmp_path)
327
- except OSError:
328
- pass # File might already be deleted or in use
329
-
330
- def view_img(data: str | bytes, format: str = "svg"):
331
- """View image data using the system's default image viewer.
332
-
333
- Args:
334
- data: Image data as string or bytes
335
- format: Image format (svg, png, jpg, jpeg, gif, pdf, html)
336
-
337
- Raises:
338
- ValueError: If format is not supported
339
- RuntimeError: If file opening fails
340
- OSError: If platform is not supported
341
- """
342
- # Validate format to prevent injection attacks
343
- validated_format = _validate_image_format(format)
344
-
345
- # Create a temporary file with validated extension
346
- tmp_path = _create_temp_image_file(data, validated_format)
347
-
348
- try:
349
- # Open image viewer with secure subprocess call
350
- _open_image_viewer(tmp_path)
351
- except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e:
352
- # Clean up temp file on error
353
- _cleanup_temp_file(tmp_path)
354
- raise RuntimeError(f"Failed to open file: {e}")
355
-
356
- # Optional: Remove the temp file after a delay
357
- time.sleep(2) # Wait for viewer to open
358
- _cleanup_temp_file(tmp_path)
359
-
360
-
361
- def update_config_from_dict(
362
- struct: msgspec.Struct, data: dict[str, Any]
363
- ) -> msgspec.Struct:
364
- """
365
- Updates a msgspec.Struct instance with values from a dictionary.
366
- Handles nested msgspec.Struct objects and nested dictionaries.
367
-
368
- Args:
369
- obj: The msgspec.Struct object to update
370
- update_dict: Dictionary containing update values
371
-
372
- Returns:
373
- Updated msgspec.Struct instance
374
- """
375
- # Convert the struct to a dictionary for easier manipulation
376
- obj_dict = msgspec.to_builtins(struct)
377
-
378
- # Update the dictionary recursively
379
- for key, value in data.items():
380
- if key in obj_dict:
381
- if isinstance(value, dict) and isinstance(obj_dict[key], dict):
382
- # Handle nested dictionaries
383
- obj_dict[key] = update_nested_dict(obj_dict[key], value)
384
- else:
385
- # Direct update for non-nested values
386
- obj_dict[key] = value
387
-
388
- # Convert back to the original struct type
389
- return msgspec.convert(obj_dict, type(struct))
390
-
391
-
392
- def update_nested_dict(
393
- original: dict[str, Any], updates: dict[str, Any]
394
- ) -> dict[str, Any]:
395
- """Helper function to update nested dictionaries"""
396
- result = original.copy()
397
- for key, value in updates.items():
398
- if key in result and isinstance(value, dict) and isinstance(result[key], dict):
399
- # Recursively update nested dictionaries
400
- result[key] = update_nested_dict(result[key], value)
401
- else:
402
- # Direct update
403
- result[key] = value
404
- return result
405
-
406
-
407
- def get_filesystem(fs: AbstractFileSystem | None = None, fs_type: str = "file") -> AbstractFileSystem:
408
- """
409
- Helper function to get a filesystem instance.
410
-
411
- Args:
412
- fs: An optional filesystem instance to use. If provided, this will be returned directly.
413
- fs_type: The type of filesystem to create if fs is None. Defaults to "file".
414
-
415
- Returns:
416
- An AbstractFileSystem instance.
417
- """
418
- if fs is None:
419
- fs = filesystem(fs_type)
420
- return fs
File without changes
File without changes
File without changes