flowyml 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +207 -0
- flowyml/assets/__init__.py +22 -0
- flowyml/assets/artifact.py +40 -0
- flowyml/assets/base.py +209 -0
- flowyml/assets/dataset.py +100 -0
- flowyml/assets/featureset.py +301 -0
- flowyml/assets/metrics.py +104 -0
- flowyml/assets/model.py +82 -0
- flowyml/assets/registry.py +157 -0
- flowyml/assets/report.py +315 -0
- flowyml/cli/__init__.py +5 -0
- flowyml/cli/experiment.py +232 -0
- flowyml/cli/init.py +256 -0
- flowyml/cli/main.py +327 -0
- flowyml/cli/run.py +75 -0
- flowyml/cli/stack_cli.py +532 -0
- flowyml/cli/ui.py +33 -0
- flowyml/core/__init__.py +68 -0
- flowyml/core/advanced_cache.py +274 -0
- flowyml/core/approval.py +64 -0
- flowyml/core/cache.py +203 -0
- flowyml/core/checkpoint.py +148 -0
- flowyml/core/conditional.py +373 -0
- flowyml/core/context.py +155 -0
- flowyml/core/error_handling.py +419 -0
- flowyml/core/executor.py +354 -0
- flowyml/core/graph.py +185 -0
- flowyml/core/parallel.py +452 -0
- flowyml/core/pipeline.py +764 -0
- flowyml/core/project.py +253 -0
- flowyml/core/resources.py +424 -0
- flowyml/core/scheduler.py +630 -0
- flowyml/core/scheduler_config.py +32 -0
- flowyml/core/step.py +201 -0
- flowyml/core/step_grouping.py +292 -0
- flowyml/core/templates.py +226 -0
- flowyml/core/versioning.py +217 -0
- flowyml/integrations/__init__.py +1 -0
- flowyml/integrations/keras.py +134 -0
- flowyml/monitoring/__init__.py +1 -0
- flowyml/monitoring/alerts.py +57 -0
- flowyml/monitoring/data.py +102 -0
- flowyml/monitoring/llm.py +160 -0
- flowyml/monitoring/monitor.py +57 -0
- flowyml/monitoring/notifications.py +246 -0
- flowyml/registry/__init__.py +5 -0
- flowyml/registry/model_registry.py +491 -0
- flowyml/registry/pipeline_registry.py +55 -0
- flowyml/stacks/__init__.py +27 -0
- flowyml/stacks/base.py +77 -0
- flowyml/stacks/bridge.py +288 -0
- flowyml/stacks/components.py +155 -0
- flowyml/stacks/gcp.py +499 -0
- flowyml/stacks/local.py +112 -0
- flowyml/stacks/migration.py +97 -0
- flowyml/stacks/plugin_config.py +78 -0
- flowyml/stacks/plugins.py +401 -0
- flowyml/stacks/registry.py +226 -0
- flowyml/storage/__init__.py +26 -0
- flowyml/storage/artifacts.py +246 -0
- flowyml/storage/materializers/__init__.py +20 -0
- flowyml/storage/materializers/base.py +133 -0
- flowyml/storage/materializers/keras.py +185 -0
- flowyml/storage/materializers/numpy.py +94 -0
- flowyml/storage/materializers/pandas.py +142 -0
- flowyml/storage/materializers/pytorch.py +135 -0
- flowyml/storage/materializers/sklearn.py +110 -0
- flowyml/storage/materializers/tensorflow.py +152 -0
- flowyml/storage/metadata.py +931 -0
- flowyml/tracking/__init__.py +1 -0
- flowyml/tracking/experiment.py +211 -0
- flowyml/tracking/leaderboard.py +191 -0
- flowyml/tracking/runs.py +145 -0
- flowyml/ui/__init__.py +15 -0
- flowyml/ui/backend/Dockerfile +31 -0
- flowyml/ui/backend/__init__.py +0 -0
- flowyml/ui/backend/auth.py +163 -0
- flowyml/ui/backend/main.py +187 -0
- flowyml/ui/backend/routers/__init__.py +0 -0
- flowyml/ui/backend/routers/assets.py +45 -0
- flowyml/ui/backend/routers/execution.py +179 -0
- flowyml/ui/backend/routers/experiments.py +49 -0
- flowyml/ui/backend/routers/leaderboard.py +118 -0
- flowyml/ui/backend/routers/notifications.py +72 -0
- flowyml/ui/backend/routers/pipelines.py +110 -0
- flowyml/ui/backend/routers/plugins.py +192 -0
- flowyml/ui/backend/routers/projects.py +85 -0
- flowyml/ui/backend/routers/runs.py +66 -0
- flowyml/ui/backend/routers/schedules.py +222 -0
- flowyml/ui/backend/routers/traces.py +84 -0
- flowyml/ui/frontend/Dockerfile +20 -0
- flowyml/ui/frontend/README.md +315 -0
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
- flowyml/ui/frontend/dist/index.html +16 -0
- flowyml/ui/frontend/index.html +15 -0
- flowyml/ui/frontend/nginx.conf +26 -0
- flowyml/ui/frontend/package-lock.json +3545 -0
- flowyml/ui/frontend/package.json +33 -0
- flowyml/ui/frontend/postcss.config.js +6 -0
- flowyml/ui/frontend/src/App.jsx +21 -0
- flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
- flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
- flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
- flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
- flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
- flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
- flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
- flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
- flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
- flowyml/ui/frontend/src/components/Layout.jsx +108 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
- flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
- flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
- flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
- flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
- flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
- flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
- flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
- flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
- flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
- flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
- flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
- flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
- flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
- flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
- flowyml/ui/frontend/src/index.css +11 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
- flowyml/ui/frontend/src/main.jsx +10 -0
- flowyml/ui/frontend/src/router/index.jsx +39 -0
- flowyml/ui/frontend/src/services/pluginService.js +90 -0
- flowyml/ui/frontend/src/utils/api.js +47 -0
- flowyml/ui/frontend/src/utils/cn.js +6 -0
- flowyml/ui/frontend/tailwind.config.js +31 -0
- flowyml/ui/frontend/vite.config.js +21 -0
- flowyml/ui/utils.py +77 -0
- flowyml/utils/__init__.py +67 -0
- flowyml/utils/config.py +308 -0
- flowyml/utils/debug.py +240 -0
- flowyml/utils/environment.py +346 -0
- flowyml/utils/git.py +319 -0
- flowyml/utils/logging.py +61 -0
- flowyml/utils/performance.py +314 -0
- flowyml/utils/stack_config.py +296 -0
- flowyml/utils/validation.py +270 -0
- flowyml-1.1.0.dist-info/METADATA +372 -0
- flowyml-1.1.0.dist-info/RECORD +159 -0
- flowyml-1.1.0.dist-info/WHEEL +4 -0
- flowyml-1.1.0.dist-info/entry_points.txt +3 -0
- flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
flowyml/core/parallel.py
ADDED
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
"""Parallel and distributed execution utilities."""
|
|
2
|
+
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
from typing import Any
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
import multiprocessing
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ParallelConfig:
|
|
12
|
+
"""Configuration for parallel execution."""
|
|
13
|
+
|
|
14
|
+
max_workers: int = None # None = CPU count
|
|
15
|
+
backend: str = "thread" # "thread" or "process"
|
|
16
|
+
timeout: float | None = None
|
|
17
|
+
chunk_size: int = 1
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ParallelExecutor:
|
|
21
|
+
"""Execute steps in parallel.
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
```python
|
|
25
|
+
from flowyml.core.parallel import ParallelExecutor
|
|
26
|
+
|
|
27
|
+
executor = ParallelExecutor(max_workers=4)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@step(parallel=True)
|
|
31
|
+
def process_shard(shard):
|
|
32
|
+
return expensive_processing(shard)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
results = executor.map(process_shard, shards)
|
|
36
|
+
```
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
max_workers: int | None = None,
|
|
42
|
+
backend: str = "thread",
|
|
43
|
+
timeout: float | None = None,
|
|
44
|
+
):
|
|
45
|
+
"""Initialize parallel executor.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
max_workers: Maximum worker threads/processes (None = CPU count)
|
|
49
|
+
backend: "thread" for threading or "process" for multiprocessing
|
|
50
|
+
timeout: Timeout for each task in seconds
|
|
51
|
+
"""
|
|
52
|
+
self.config = ParallelConfig(
|
|
53
|
+
max_workers=max_workers or multiprocessing.cpu_count(),
|
|
54
|
+
backend=backend,
|
|
55
|
+
timeout=timeout,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def map_items(
|
|
59
|
+
self,
|
|
60
|
+
func: Callable,
|
|
61
|
+
items: list[Any],
|
|
62
|
+
**kwargs,
|
|
63
|
+
) -> list[Any]:
|
|
64
|
+
"""Execute function in parallel over items.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
func: Function to execute
|
|
68
|
+
items: Items to process
|
|
69
|
+
**kwargs: Additional arguments for function
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
List of results in same order as items
|
|
73
|
+
"""
|
|
74
|
+
if self.config.backend == "thread":
|
|
75
|
+
executor_class = concurrent.futures.ThreadPoolExecutor
|
|
76
|
+
else:
|
|
77
|
+
executor_class = concurrent.futures.ProcessPoolExecutor
|
|
78
|
+
|
|
79
|
+
with executor_class(max_workers=self.config.max_workers) as executor:
|
|
80
|
+
# Submit all tasks
|
|
81
|
+
futures = []
|
|
82
|
+
for item in items:
|
|
83
|
+
future = executor.submit(func, item, **kwargs) if kwargs else executor.submit(func, item)
|
|
84
|
+
futures.append(future)
|
|
85
|
+
|
|
86
|
+
# Collect results
|
|
87
|
+
results = []
|
|
88
|
+
for future in concurrent.futures.as_completed(
|
|
89
|
+
futures,
|
|
90
|
+
timeout=self.config.timeout,
|
|
91
|
+
):
|
|
92
|
+
try:
|
|
93
|
+
result = future.result()
|
|
94
|
+
results.append(result)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
results.append(ParallelExecutionError(str(e)))
|
|
97
|
+
|
|
98
|
+
return results
|
|
99
|
+
|
|
100
|
+
def starmap(
|
|
101
|
+
self,
|
|
102
|
+
func: Callable,
|
|
103
|
+
items: list[tuple],
|
|
104
|
+
) -> list[Any]:
|
|
105
|
+
"""Execute function with multiple arguments in parallel.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
func: Function to execute
|
|
109
|
+
items: List of tuples (each tuple is arguments for one call)
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
List of results
|
|
113
|
+
"""
|
|
114
|
+
if self.config.backend == "thread":
|
|
115
|
+
executor_class = concurrent.futures.ThreadPoolExecutor
|
|
116
|
+
else:
|
|
117
|
+
executor_class = concurrent.futures.ProcessPoolExecutor
|
|
118
|
+
|
|
119
|
+
with executor_class(max_workers=self.config.max_workers) as executor:
|
|
120
|
+
futures = [executor.submit(func, *args) for args in items]
|
|
121
|
+
|
|
122
|
+
results = []
|
|
123
|
+
for future in concurrent.futures.as_completed(
|
|
124
|
+
futures,
|
|
125
|
+
timeout=self.config.timeout,
|
|
126
|
+
):
|
|
127
|
+
try:
|
|
128
|
+
result = future.result()
|
|
129
|
+
results.append(result)
|
|
130
|
+
except Exception as e:
|
|
131
|
+
results.append(ParallelExecutionError(str(e)))
|
|
132
|
+
|
|
133
|
+
return results
|
|
134
|
+
|
|
135
|
+
def execute_parallel_steps(
|
|
136
|
+
self,
|
|
137
|
+
steps: list[Callable],
|
|
138
|
+
inputs: dict[str, Any] | None = None,
|
|
139
|
+
) -> list[Any]:
|
|
140
|
+
"""Execute multiple independent steps in parallel.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
steps: List of step functions to execute
|
|
144
|
+
inputs: Shared inputs for all steps
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
List of results from each step
|
|
148
|
+
"""
|
|
149
|
+
inputs = inputs or {}
|
|
150
|
+
|
|
151
|
+
if self.config.backend == "thread":
|
|
152
|
+
executor_class = concurrent.futures.ThreadPoolExecutor
|
|
153
|
+
else:
|
|
154
|
+
executor_class = concurrent.futures.ProcessPoolExecutor
|
|
155
|
+
|
|
156
|
+
with executor_class(max_workers=self.config.max_workers) as executor:
|
|
157
|
+
futures = {executor.submit(step, **inputs): step for step in steps}
|
|
158
|
+
|
|
159
|
+
results = []
|
|
160
|
+
for future in concurrent.futures.as_completed(
|
|
161
|
+
futures,
|
|
162
|
+
timeout=self.config.timeout,
|
|
163
|
+
):
|
|
164
|
+
try:
|
|
165
|
+
result = future.result()
|
|
166
|
+
results.append(result)
|
|
167
|
+
except Exception as e:
|
|
168
|
+
results.append(ParallelExecutionError(str(e)))
|
|
169
|
+
|
|
170
|
+
return results
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class ParallelExecutionError:
|
|
174
|
+
"""Marker for failed parallel execution."""
|
|
175
|
+
|
|
176
|
+
def __init__(self, error_message: str):
|
|
177
|
+
self.error_message = error_message
|
|
178
|
+
self.failed = True
|
|
179
|
+
|
|
180
|
+
def __repr__(self):
|
|
181
|
+
return f"<ParallelExecutionError: {self.error_message}>"
|
|
182
|
+
|
|
183
|
+
def __bool__(self):
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def parallel_map(
|
|
188
|
+
func: Callable,
|
|
189
|
+
items: list[Any],
|
|
190
|
+
max_workers: int | None = None,
|
|
191
|
+
backend: str = "thread",
|
|
192
|
+
) -> list[Any]:
|
|
193
|
+
"""Quick parallel map function.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
func: Function to apply
|
|
197
|
+
items: Items to process
|
|
198
|
+
max_workers: Maximum workers
|
|
199
|
+
backend: "thread" or "process"
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
List of results
|
|
203
|
+
|
|
204
|
+
Example:
|
|
205
|
+
```python
|
|
206
|
+
from flowyml.core.parallel import parallel_map
|
|
207
|
+
|
|
208
|
+
results = parallel_map(process_item, items, max_workers=4)
|
|
209
|
+
```
|
|
210
|
+
"""
|
|
211
|
+
executor = ParallelExecutor(max_workers=max_workers, backend=backend)
|
|
212
|
+
return executor.map_items(func, items)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class DataParallelExecutor:
|
|
216
|
+
"""Execute operations on data-parallel partitions.
|
|
217
|
+
|
|
218
|
+
Example:
|
|
219
|
+
```python
|
|
220
|
+
from flowyml.core.parallel import DataParallelExecutor
|
|
221
|
+
|
|
222
|
+
executor = DataParallelExecutor(num_partitions=4)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@step
|
|
226
|
+
def process_partition(data_partition):
|
|
227
|
+
return train_on_partition(data_partition)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
results = executor.execute_data_parallel(process_partition, large_dataset)
|
|
231
|
+
```
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
def __init__(self, num_partitions: int = 4):
|
|
235
|
+
"""Initialize data parallel executor.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
num_partitions: Number of data partitions
|
|
239
|
+
"""
|
|
240
|
+
self.num_partitions = num_partitions
|
|
241
|
+
self.executor = ParallelExecutor(max_workers=num_partitions)
|
|
242
|
+
|
|
243
|
+
def partition_data(self, data: Any) -> list[Any]:
|
|
244
|
+
"""Partition data for parallel processing.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
data: Data to partition
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
List of data partitions
|
|
251
|
+
"""
|
|
252
|
+
try:
|
|
253
|
+
# Try to partition list-like data
|
|
254
|
+
if hasattr(data, "__len__"):
|
|
255
|
+
n = len(data)
|
|
256
|
+
partition_size = n // self.num_partitions
|
|
257
|
+
partitions = []
|
|
258
|
+
|
|
259
|
+
for i in range(self.num_partitions):
|
|
260
|
+
start = i * partition_size
|
|
261
|
+
end = start + partition_size if i < self.num_partitions - 1 else n
|
|
262
|
+
partitions.append(data[start:end])
|
|
263
|
+
|
|
264
|
+
return partitions
|
|
265
|
+
except Exception:
|
|
266
|
+
pass
|
|
267
|
+
|
|
268
|
+
# If partitioning fails, return single partition
|
|
269
|
+
return [data]
|
|
270
|
+
|
|
271
|
+
def execute_data_parallel(
|
|
272
|
+
self,
|
|
273
|
+
func: Callable,
|
|
274
|
+
data: Any,
|
|
275
|
+
) -> list[Any]:
|
|
276
|
+
"""Execute function on data partitions in parallel.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
func: Function to execute on each partition
|
|
280
|
+
data: Data to partition and process
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
List of results from each partition
|
|
284
|
+
"""
|
|
285
|
+
partitions = self.partition_data(data)
|
|
286
|
+
return self.executor.map_items(func, partitions)
|
|
287
|
+
|
|
288
|
+
def reduce_results(
|
|
289
|
+
self,
|
|
290
|
+
results: list[Any],
|
|
291
|
+
reduce_func: Callable | None = None,
|
|
292
|
+
) -> Any:
|
|
293
|
+
"""Reduce parallel results to single output.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
results: List of partition results
|
|
297
|
+
reduce_func: Function to reduce results (default: concatenate)
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Reduced result
|
|
301
|
+
"""
|
|
302
|
+
if reduce_func:
|
|
303
|
+
return reduce_func(results)
|
|
304
|
+
|
|
305
|
+
# Default: try to concatenate
|
|
306
|
+
try:
|
|
307
|
+
# Try list concatenation
|
|
308
|
+
if all(isinstance(r, list) for r in results):
|
|
309
|
+
result = []
|
|
310
|
+
for r in results:
|
|
311
|
+
result.extend(r)
|
|
312
|
+
return result
|
|
313
|
+
|
|
314
|
+
# Try dict merge
|
|
315
|
+
if all(isinstance(r, dict) for r in results):
|
|
316
|
+
result = {}
|
|
317
|
+
for r in results:
|
|
318
|
+
result.update(r)
|
|
319
|
+
return result
|
|
320
|
+
|
|
321
|
+
# Return as-is
|
|
322
|
+
return results
|
|
323
|
+
|
|
324
|
+
except Exception:
|
|
325
|
+
return results
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def distribute_across_gpus(
|
|
329
|
+
func: Callable,
|
|
330
|
+
items: list[Any],
|
|
331
|
+
gpu_ids: list[int] | None = None,
|
|
332
|
+
) -> list[Any]:
|
|
333
|
+
"""Distribute work across multiple GPUs.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
func: Function to execute
|
|
337
|
+
items: Items to process
|
|
338
|
+
gpu_ids: List of GPU IDs to use (None = use all available)
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
List of results
|
|
342
|
+
|
|
343
|
+
Example:
|
|
344
|
+
```python
|
|
345
|
+
from flowyml.core.parallel import distribute_across_gpus
|
|
346
|
+
|
|
347
|
+
results = distribute_across_gpus(train_on_gpu, data_shards, gpu_ids=[0, 1, 2, 3])
|
|
348
|
+
```
|
|
349
|
+
"""
|
|
350
|
+
try:
|
|
351
|
+
import torch
|
|
352
|
+
|
|
353
|
+
if gpu_ids is None:
|
|
354
|
+
if torch.cuda.is_available():
|
|
355
|
+
gpu_ids = list(range(torch.cuda.device_count()))
|
|
356
|
+
else:
|
|
357
|
+
gpu_ids = [-1] # CPU fallback
|
|
358
|
+
|
|
359
|
+
except ImportError:
|
|
360
|
+
gpu_ids = [-1] # CPU fallback
|
|
361
|
+
|
|
362
|
+
# Distribute items across GPUs
|
|
363
|
+
num_gpus = len(gpu_ids)
|
|
364
|
+
|
|
365
|
+
def execute_on_gpu(item_and_gpu):
|
|
366
|
+
item, gpu_id = item_and_gpu
|
|
367
|
+
# Set device for this process
|
|
368
|
+
if gpu_id >= 0:
|
|
369
|
+
try:
|
|
370
|
+
import torch
|
|
371
|
+
|
|
372
|
+
torch.cuda.set_device(gpu_id)
|
|
373
|
+
except Exception:
|
|
374
|
+
pass
|
|
375
|
+
return func(item)
|
|
376
|
+
|
|
377
|
+
# Pair items with GPU IDs in round-robin fashion
|
|
378
|
+
items_with_gpus = [(item, gpu_ids[i % num_gpus]) for i, item in enumerate(items)]
|
|
379
|
+
|
|
380
|
+
# Execute in parallel
|
|
381
|
+
executor = ParallelExecutor(max_workers=num_gpus, backend="process")
|
|
382
|
+
return executor.map_items(execute_on_gpu, items_with_gpus)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
class BatchExecutor:
|
|
386
|
+
"""Execute function on batches in parallel.
|
|
387
|
+
|
|
388
|
+
Example:
|
|
389
|
+
```python
|
|
390
|
+
from flowyml.core.parallel import BatchExecutor
|
|
391
|
+
|
|
392
|
+
executor = BatchExecutor(batch_size=32, max_workers=4)
|
|
393
|
+
|
|
394
|
+
results = executor.execute_batches(inference_func, large_dataset)
|
|
395
|
+
```
|
|
396
|
+
"""
|
|
397
|
+
|
|
398
|
+
def __init__(
|
|
399
|
+
self,
|
|
400
|
+
batch_size: int = 32,
|
|
401
|
+
max_workers: int | None = None,
|
|
402
|
+
):
|
|
403
|
+
"""Initialize batch executor.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
batch_size: Size of each batch
|
|
407
|
+
max_workers: Maximum parallel workers
|
|
408
|
+
"""
|
|
409
|
+
self.batch_size = batch_size
|
|
410
|
+
self.executor = ParallelExecutor(max_workers=max_workers)
|
|
411
|
+
|
|
412
|
+
def create_batches(self, items: list[Any]) -> list[list[Any]]:
|
|
413
|
+
"""Create batches from items.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
items: Items to batch
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
List of batches
|
|
420
|
+
"""
|
|
421
|
+
batches = []
|
|
422
|
+
for i in range(0, len(items), self.batch_size):
|
|
423
|
+
batch = items[i : i + self.batch_size]
|
|
424
|
+
batches.append(batch)
|
|
425
|
+
return batches
|
|
426
|
+
|
|
427
|
+
def execute_batches(
|
|
428
|
+
self,
|
|
429
|
+
func: Callable,
|
|
430
|
+
items: list[Any],
|
|
431
|
+
) -> list[Any]:
|
|
432
|
+
"""Execute function on batches in parallel.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
func: Function to execute on each batch
|
|
436
|
+
items: Items to process
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
List of all results (flattened)
|
|
440
|
+
"""
|
|
441
|
+
batches = self.create_batches(items)
|
|
442
|
+
batch_results = self.executor.map_items(func, batches)
|
|
443
|
+
|
|
444
|
+
# Flatten results
|
|
445
|
+
results = []
|
|
446
|
+
for batch_result in batch_results:
|
|
447
|
+
if isinstance(batch_result, list):
|
|
448
|
+
results.extend(batch_result)
|
|
449
|
+
else:
|
|
450
|
+
results.append(batch_result)
|
|
451
|
+
|
|
452
|
+
return results
|