flowyml 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. flowyml/__init__.py +207 -0
  2. flowyml/assets/__init__.py +22 -0
  3. flowyml/assets/artifact.py +40 -0
  4. flowyml/assets/base.py +209 -0
  5. flowyml/assets/dataset.py +100 -0
  6. flowyml/assets/featureset.py +301 -0
  7. flowyml/assets/metrics.py +104 -0
  8. flowyml/assets/model.py +82 -0
  9. flowyml/assets/registry.py +157 -0
  10. flowyml/assets/report.py +315 -0
  11. flowyml/cli/__init__.py +5 -0
  12. flowyml/cli/experiment.py +232 -0
  13. flowyml/cli/init.py +256 -0
  14. flowyml/cli/main.py +327 -0
  15. flowyml/cli/run.py +75 -0
  16. flowyml/cli/stack_cli.py +532 -0
  17. flowyml/cli/ui.py +33 -0
  18. flowyml/core/__init__.py +68 -0
  19. flowyml/core/advanced_cache.py +274 -0
  20. flowyml/core/approval.py +64 -0
  21. flowyml/core/cache.py +203 -0
  22. flowyml/core/checkpoint.py +148 -0
  23. flowyml/core/conditional.py +373 -0
  24. flowyml/core/context.py +155 -0
  25. flowyml/core/error_handling.py +419 -0
  26. flowyml/core/executor.py +354 -0
  27. flowyml/core/graph.py +185 -0
  28. flowyml/core/parallel.py +452 -0
  29. flowyml/core/pipeline.py +764 -0
  30. flowyml/core/project.py +253 -0
  31. flowyml/core/resources.py +424 -0
  32. flowyml/core/scheduler.py +630 -0
  33. flowyml/core/scheduler_config.py +32 -0
  34. flowyml/core/step.py +201 -0
  35. flowyml/core/step_grouping.py +292 -0
  36. flowyml/core/templates.py +226 -0
  37. flowyml/core/versioning.py +217 -0
  38. flowyml/integrations/__init__.py +1 -0
  39. flowyml/integrations/keras.py +134 -0
  40. flowyml/monitoring/__init__.py +1 -0
  41. flowyml/monitoring/alerts.py +57 -0
  42. flowyml/monitoring/data.py +102 -0
  43. flowyml/monitoring/llm.py +160 -0
  44. flowyml/monitoring/monitor.py +57 -0
  45. flowyml/monitoring/notifications.py +246 -0
  46. flowyml/registry/__init__.py +5 -0
  47. flowyml/registry/model_registry.py +491 -0
  48. flowyml/registry/pipeline_registry.py +55 -0
  49. flowyml/stacks/__init__.py +27 -0
  50. flowyml/stacks/base.py +77 -0
  51. flowyml/stacks/bridge.py +288 -0
  52. flowyml/stacks/components.py +155 -0
  53. flowyml/stacks/gcp.py +499 -0
  54. flowyml/stacks/local.py +112 -0
  55. flowyml/stacks/migration.py +97 -0
  56. flowyml/stacks/plugin_config.py +78 -0
  57. flowyml/stacks/plugins.py +401 -0
  58. flowyml/stacks/registry.py +226 -0
  59. flowyml/storage/__init__.py +26 -0
  60. flowyml/storage/artifacts.py +246 -0
  61. flowyml/storage/materializers/__init__.py +20 -0
  62. flowyml/storage/materializers/base.py +133 -0
  63. flowyml/storage/materializers/keras.py +185 -0
  64. flowyml/storage/materializers/numpy.py +94 -0
  65. flowyml/storage/materializers/pandas.py +142 -0
  66. flowyml/storage/materializers/pytorch.py +135 -0
  67. flowyml/storage/materializers/sklearn.py +110 -0
  68. flowyml/storage/materializers/tensorflow.py +152 -0
  69. flowyml/storage/metadata.py +931 -0
  70. flowyml/tracking/__init__.py +1 -0
  71. flowyml/tracking/experiment.py +211 -0
  72. flowyml/tracking/leaderboard.py +191 -0
  73. flowyml/tracking/runs.py +145 -0
  74. flowyml/ui/__init__.py +15 -0
  75. flowyml/ui/backend/Dockerfile +31 -0
  76. flowyml/ui/backend/__init__.py +0 -0
  77. flowyml/ui/backend/auth.py +163 -0
  78. flowyml/ui/backend/main.py +187 -0
  79. flowyml/ui/backend/routers/__init__.py +0 -0
  80. flowyml/ui/backend/routers/assets.py +45 -0
  81. flowyml/ui/backend/routers/execution.py +179 -0
  82. flowyml/ui/backend/routers/experiments.py +49 -0
  83. flowyml/ui/backend/routers/leaderboard.py +118 -0
  84. flowyml/ui/backend/routers/notifications.py +72 -0
  85. flowyml/ui/backend/routers/pipelines.py +110 -0
  86. flowyml/ui/backend/routers/plugins.py +192 -0
  87. flowyml/ui/backend/routers/projects.py +85 -0
  88. flowyml/ui/backend/routers/runs.py +66 -0
  89. flowyml/ui/backend/routers/schedules.py +222 -0
  90. flowyml/ui/backend/routers/traces.py +84 -0
  91. flowyml/ui/frontend/Dockerfile +20 -0
  92. flowyml/ui/frontend/README.md +315 -0
  93. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
  94. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
  95. flowyml/ui/frontend/dist/index.html +16 -0
  96. flowyml/ui/frontend/index.html +15 -0
  97. flowyml/ui/frontend/nginx.conf +26 -0
  98. flowyml/ui/frontend/package-lock.json +3545 -0
  99. flowyml/ui/frontend/package.json +33 -0
  100. flowyml/ui/frontend/postcss.config.js +6 -0
  101. flowyml/ui/frontend/src/App.jsx +21 -0
  102. flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
  103. flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
  104. flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
  105. flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
  106. flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
  107. flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
  108. flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
  109. flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
  110. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
  111. flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
  112. flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
  113. flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
  114. flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
  115. flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
  116. flowyml/ui/frontend/src/components/Layout.jsx +108 -0
  117. flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
  118. flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
  119. flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
  120. flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
  121. flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
  122. flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
  123. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
  124. flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
  125. flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
  126. flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
  127. flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
  128. flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
  129. flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
  130. flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
  131. flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
  132. flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
  133. flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
  134. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
  135. flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
  136. flowyml/ui/frontend/src/index.css +11 -0
  137. flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
  138. flowyml/ui/frontend/src/main.jsx +10 -0
  139. flowyml/ui/frontend/src/router/index.jsx +39 -0
  140. flowyml/ui/frontend/src/services/pluginService.js +90 -0
  141. flowyml/ui/frontend/src/utils/api.js +47 -0
  142. flowyml/ui/frontend/src/utils/cn.js +6 -0
  143. flowyml/ui/frontend/tailwind.config.js +31 -0
  144. flowyml/ui/frontend/vite.config.js +21 -0
  145. flowyml/ui/utils.py +77 -0
  146. flowyml/utils/__init__.py +67 -0
  147. flowyml/utils/config.py +308 -0
  148. flowyml/utils/debug.py +240 -0
  149. flowyml/utils/environment.py +346 -0
  150. flowyml/utils/git.py +319 -0
  151. flowyml/utils/logging.py +61 -0
  152. flowyml/utils/performance.py +314 -0
  153. flowyml/utils/stack_config.py +296 -0
  154. flowyml/utils/validation.py +270 -0
  155. flowyml-1.1.0.dist-info/METADATA +372 -0
  156. flowyml-1.1.0.dist-info/RECORD +159 -0
  157. flowyml-1.1.0.dist-info/WHEEL +4 -0
  158. flowyml-1.1.0.dist-info/entry_points.txt +3 -0
  159. flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
flowyml/utils/git.py ADDED
@@ -0,0 +1,319 @@
1
+ """Git integration utilities for experiment tracking."""
2
+
3
+ import subprocess
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+
8
+ class GitInfo:
9
+ """Git repository information."""
10
+
11
+ def __init__(
12
+ self,
13
+ commit_hash: str | None = None,
14
+ branch: str | None = None,
15
+ is_dirty: bool = False,
16
+ remote_url: str | None = None,
17
+ author: str | None = None,
18
+ commit_message: str | None = None,
19
+ commit_time: str | None = None,
20
+ ):
21
+ self.commit_hash = commit_hash
22
+ self.branch = branch
23
+ self.is_dirty = is_dirty
24
+ self.remote_url = remote_url
25
+ self.author = author
26
+ self.commit_message = commit_message
27
+ self.commit_time = commit_time
28
+
29
+ def to_dict(self) -> dict[str, Any]:
30
+ """Convert to dictionary."""
31
+ return {
32
+ "commit_hash": self.commit_hash,
33
+ "branch": self.branch,
34
+ "is_dirty": self.is_dirty,
35
+ "remote_url": self.remote_url,
36
+ "author": self.author,
37
+ "commit_message": self.commit_message,
38
+ "commit_time": self.commit_time,
39
+ }
40
+
41
+ @property
42
+ def is_available(self) -> bool:
43
+ """Check if git info is available."""
44
+ return self.commit_hash is not None
45
+
46
+
47
+ def run_git_command(command: list, cwd: Path | None = None) -> str | None:
48
+ """Run a git command and return output.
49
+
50
+ Args:
51
+ command: Git command as list of strings
52
+ cwd: Working directory
53
+
54
+ Returns:
55
+ Command output or None if failed
56
+ """
57
+ try:
58
+ result = subprocess.run(
59
+ ["git"] + command,
60
+ cwd=cwd or Path.cwd(),
61
+ capture_output=True,
62
+ text=True,
63
+ timeout=5,
64
+ )
65
+ if result.returncode == 0:
66
+ return result.stdout.strip()
67
+ return None
68
+ except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
69
+ return None
70
+
71
+
72
+ def is_git_repo(path: Path | None = None) -> bool:
73
+ """Check if directory is a git repository.
74
+
75
+ Args:
76
+ path: Directory to check
77
+
78
+ Returns:
79
+ True if directory is a git repository
80
+ """
81
+ result = run_git_command(["rev-parse", "--git-dir"], cwd=path)
82
+ return result is not None
83
+
84
+
85
+ def get_commit_hash(path: Path | None = None) -> str | None:
86
+ """Get current commit hash.
87
+
88
+ Args:
89
+ path: Repository path
90
+
91
+ Returns:
92
+ Commit hash or None
93
+ """
94
+ return run_git_command(["rev-parse", "HEAD"], cwd=path)
95
+
96
+
97
+ def get_short_commit_hash(path: Path | None = None) -> str | None:
98
+ """Get short commit hash.
99
+
100
+ Args:
101
+ path: Repository path
102
+
103
+ Returns:
104
+ Short commit hash or None
105
+ """
106
+ return run_git_command(["rev-parse", "--short", "HEAD"], cwd=path)
107
+
108
+
109
+ def get_branch_name(path: Path | None = None) -> str | None:
110
+ """Get current branch name.
111
+
112
+ Args:
113
+ path: Repository path
114
+
115
+ Returns:
116
+ Branch name or None
117
+ """
118
+ return run_git_command(["rev-parse", "--abbrev-ref", "HEAD"], cwd=path)
119
+
120
+
121
+ def is_dirty(path: Path | None = None) -> bool:
122
+ """Check if repository has uncommitted changes.
123
+
124
+ Args:
125
+ path: Repository path
126
+
127
+ Returns:
128
+ True if repository is dirty
129
+ """
130
+ result = run_git_command(["status", "--porcelain"], cwd=path)
131
+ return bool(result)
132
+
133
+
134
+ def get_remote_url(path: Path | None = None, remote: str = "origin") -> str | None:
135
+ """Get remote repository URL.
136
+
137
+ Args:
138
+ path: Repository path
139
+ remote: Remote name
140
+
141
+ Returns:
142
+ Remote URL or None
143
+ """
144
+ return run_git_command(["config", "--get", f"remote.{remote}.url"], cwd=path)
145
+
146
+
147
+ def get_commit_author(path: Path | None = None) -> str | None:
148
+ """Get author of current commit.
149
+
150
+ Args:
151
+ path: Repository path
152
+
153
+ Returns:
154
+ Commit author or None
155
+ """
156
+ return run_git_command(["log", "-1", "--format=%an <%ae>"], cwd=path)
157
+
158
+
159
+ def get_commit_message(path: Path | None = None) -> str | None:
160
+ """Get message of current commit.
161
+
162
+ Args:
163
+ path: Repository path
164
+
165
+ Returns:
166
+ Commit message or None
167
+ """
168
+ return run_git_command(["log", "-1", "--format=%s"], cwd=path)
169
+
170
+
171
+ def get_commit_time(path: Path | None = None) -> str | None:
172
+ """Get timestamp of current commit.
173
+
174
+ Args:
175
+ path: Repository path
176
+
177
+ Returns:
178
+ Commit timestamp or None
179
+ """
180
+ return run_git_command(["log", "-1", "--format=%ci"], cwd=path)
181
+
182
+
183
+ def get_diff(path: Path | None = None, staged: bool = False) -> str | None:
184
+ """Get diff of uncommitted changes.
185
+
186
+ Args:
187
+ path: Repository path
188
+ staged: Get staged changes only
189
+
190
+ Returns:
191
+ Diff output or None
192
+ """
193
+ command = ["diff"]
194
+ if staged:
195
+ command.append("--cached")
196
+
197
+ return run_git_command(command, cwd=path)
198
+
199
+
200
+ def get_git_info(path: Path | None = None) -> GitInfo:
201
+ """Get comprehensive git information.
202
+
203
+ Args:
204
+ path: Repository path
205
+
206
+ Returns:
207
+ GitInfo object with repository information
208
+ """
209
+ if not is_git_repo(path):
210
+ return GitInfo()
211
+
212
+ return GitInfo(
213
+ commit_hash=get_commit_hash(path),
214
+ branch=get_branch_name(path),
215
+ is_dirty=is_dirty(path),
216
+ remote_url=get_remote_url(path),
217
+ author=get_commit_author(path),
218
+ commit_message=get_commit_message(path),
219
+ commit_time=get_commit_time(path),
220
+ )
221
+
222
+
223
+ def save_git_snapshot(output_dir: Path, path: Path | None = None) -> None:
224
+ """Save git repository snapshot.
225
+
226
+ Args:
227
+ output_dir: Directory to save snapshot
228
+ path: Repository path
229
+ """
230
+ output_dir.mkdir(parents=True, exist_ok=True)
231
+
232
+ # Save git info
233
+ git_info = get_git_info(path)
234
+ if git_info.is_available:
235
+ import json
236
+
237
+ with open(output_dir / "git_info.json", "w") as f:
238
+ json.dump(git_info.to_dict(), f, indent=2)
239
+
240
+ # Save diff if dirty
241
+ if git_info.is_dirty:
242
+ diff = get_diff(path)
243
+ if diff:
244
+ with open(output_dir / "git_diff.patch", "w") as f:
245
+ f.write(diff)
246
+
247
+
248
+ def get_file_commit_history(
249
+ file_path: str,
250
+ max_count: int = 10,
251
+ path: Path | None = None,
252
+ ) -> list[dict[str, str]]:
253
+ """Get commit history for a specific file.
254
+
255
+ Args:
256
+ file_path: Path to file
257
+ max_count: Maximum number of commits to return
258
+ path: Repository path
259
+
260
+ Returns:
261
+ List of commit dictionaries
262
+ """
263
+ if not is_git_repo(path):
264
+ return []
265
+
266
+ log_format = "%H|%an|%ae|%ci|%s"
267
+ result = run_git_command(
268
+ ["log", f"--max-count={max_count}", f"--format={log_format}", "--", file_path],
269
+ cwd=path,
270
+ )
271
+
272
+ if not result:
273
+ return []
274
+
275
+ commits = []
276
+ for line in result.split("\n"):
277
+ if not line:
278
+ continue
279
+
280
+ parts = line.split("|", 4)
281
+ if len(parts) == 5:
282
+ commits.append(
283
+ {
284
+ "hash": parts[0],
285
+ "author_name": parts[1],
286
+ "author_email": parts[2],
287
+ "timestamp": parts[3],
288
+ "message": parts[4],
289
+ },
290
+ )
291
+
292
+ return commits
293
+
294
+
295
+ def get_tags(path: Path | None = None) -> list[str]:
296
+ """Get list of git tags.
297
+
298
+ Args:
299
+ path: Repository path
300
+
301
+ Returns:
302
+ List of tag names
303
+ """
304
+ result = run_git_command(["tag", "--list"], cwd=path)
305
+ if result:
306
+ return [tag for tag in result.split("\n") if tag]
307
+ return []
308
+
309
+
310
+ def get_current_tag(path: Path | None = None) -> str | None:
311
+ """Get tag pointing to current commit.
312
+
313
+ Args:
314
+ path: Repository path
315
+
316
+ Returns:
317
+ Tag name or None
318
+ """
319
+ return run_git_command(["describe", "--exact-match", "--tags"], cwd=path)
@@ -0,0 +1,61 @@
1
+ """Logging utilities for flowyml."""
2
+
3
+ import logging
4
+ import sys
5
+ from pathlib import Path
6
+
7
+
8
+ def setup_logger(
9
+ name: str = "flowyml",
10
+ level: int = logging.INFO,
11
+ log_file: str = None,
12
+ ) -> logging.Logger:
13
+ """Setup logger for flowyml.
14
+
15
+ Args:
16
+ name: Logger name
17
+ level: Logging level
18
+ log_file: Optional log file path
19
+
20
+ Returns:
21
+ Configured logger
22
+ """
23
+ logger = logging.getLogger(name)
24
+ logger.setLevel(level)
25
+
26
+ # Console handler
27
+ console_handler = logging.StreamHandler(sys.stdout)
28
+ console_handler.setLevel(level)
29
+ console_format = logging.Formatter(
30
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
31
+ )
32
+ console_handler.setFormatter(console_format)
33
+ logger.addHandler(console_handler)
34
+
35
+ # File handler if specified
36
+ if log_file:
37
+ log_path = Path(log_file)
38
+ log_path.parent.mkdir(parents=True, exist_ok=True)
39
+
40
+ file_handler = logging.FileHandler(log_file)
41
+ file_handler.setLevel(level)
42
+ file_handler.setFormatter(console_format)
43
+ logger.addHandler(file_handler)
44
+
45
+ return logger
46
+
47
+
48
+ # Default logger
49
+ logger = setup_logger()
50
+
51
+
52
+ def get_logger(name: str = "flowyml") -> logging.Logger:
53
+ """Get a logger by name.
54
+
55
+ Args:
56
+ name: Logger name
57
+
58
+ Returns:
59
+ Logger instance
60
+ """
61
+ return logging.getLogger(name)
@@ -0,0 +1,314 @@
1
+ """Performance optimization utilities."""
2
+
3
+ import functools
4
+ from typing import Any
5
+ from collections.abc import Callable
6
+ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
7
+ import multiprocessing as mp
8
+
9
+
10
+ class LazyValue:
11
+ """Lazy evaluation wrapper.
12
+
13
+ Computes value only when accessed.
14
+
15
+ Examples:
16
+ >>> lazy = LazyValue(lambda: expensive_computation())
17
+ >>> # Not computed yet
18
+ >>> result = lazy.value # Now computed
19
+ """
20
+
21
+ def __init__(self, compute_func: Callable):
22
+ self._compute_func = compute_func
23
+ self._value = None
24
+ self._computed = False
25
+
26
+ @property
27
+ def value(self):
28
+ """Get the computed value."""
29
+ if not self._computed:
30
+ self._value = self._compute_func()
31
+ self._computed = True
32
+ return self._value
33
+
34
+ def __repr__(self):
35
+ if self._computed:
36
+ return f"LazyValue(computed={self._value})"
37
+ return "LazyValue(not computed)"
38
+
39
+
40
+ def lazy_property(func):
41
+ """Decorator for lazy property evaluation.
42
+
43
+ Example:
44
+ >>> class Model:
45
+ ... @lazy_property
46
+ ... def expensive_data(self):
47
+ ... return load_large_dataset()
48
+ """
49
+ attr_name = "_lazy_" + func.__name__
50
+
51
+ @property
52
+ @functools.wraps(func)
53
+ def wrapper(self):
54
+ if not hasattr(self, attr_name):
55
+ setattr(self, attr_name, func(self))
56
+ return getattr(self, attr_name)
57
+
58
+ return wrapper
59
+
60
+
61
+ class ParallelExecutor:
62
+ """Smart parallelization for pipeline steps.
63
+
64
+ Automatically determines best parallelization strategy.
65
+
66
+ Examples:
67
+ >>> executor = ParallelExecutor(max_workers=4)
68
+ >>> results = executor.map_items(process_item, items)
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ max_workers: int | None = None,
74
+ mode: str = "thread", # 'thread' or 'process'
75
+ ):
76
+ self.max_workers = max_workers or mp.cpu_count()
77
+ self.mode = mode
78
+
79
+ def map_items(self, func: Callable, items: list) -> list:
80
+ """Parallel map operation.
81
+
82
+ Args:
83
+ func: Function to apply
84
+ items: Items to process
85
+
86
+ Returns:
87
+ List of results
88
+ """
89
+ executor_class = ProcessPoolExecutor if self.mode == "process" else ThreadPoolExecutor
90
+
91
+ with executor_class(max_workers=self.max_workers) as executor:
92
+ results = list(executor.map(func, items))
93
+
94
+ return results
95
+
96
+ def submit(self, func: Callable, *args, **kwargs):
97
+ """Submit a single task."""
98
+ executor = ProcessPoolExecutor(max_workers=1) if self.mode == "process" else ThreadPoolExecutor(max_workers=1)
99
+
100
+ future = executor.submit(func, *args, **kwargs)
101
+ return future
102
+
103
+
104
+ class IncrementalComputation:
105
+ """Incremental computation for data processing.
106
+
107
+ Processes data in chunks and caches intermediate results.
108
+
109
+ Examples:
110
+ >>> computer = IncrementalComputation(chunk_size=1000)
111
+ >>> result = computer.compute(large_dataset, process_chunk)
112
+ """
113
+
114
+ def __init__(
115
+ self,
116
+ chunk_size: int = 1000,
117
+ cache_dir: str = ".flowyml/incremental",
118
+ ):
119
+ self.chunk_size = chunk_size
120
+ from pathlib import Path
121
+
122
+ self.cache_dir = Path(cache_dir)
123
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
124
+
125
+ def compute(
126
+ self,
127
+ data: list,
128
+ func: Callable,
129
+ aggregate_func: Callable | None = None,
130
+ ) -> Any:
131
+ """Compute incrementally over data chunks.
132
+
133
+ Args:
134
+ data: Input data
135
+ func: Function to apply to each chunk
136
+ aggregate_func: Optional function to aggregate chunk results
137
+
138
+ Returns:
139
+ Aggregated result
140
+ """
141
+ results = []
142
+
143
+ # Process in chunks
144
+ for i in range(0, len(data), self.chunk_size):
145
+ chunk = data[i : i + self.chunk_size]
146
+
147
+ # Check cache
148
+ cache_key = f"chunk_{i}"
149
+ cache_file = self.cache_dir / f"{cache_key}.pkl"
150
+
151
+ if cache_file.exists():
152
+ import pickle
153
+
154
+ with open(cache_file, "rb") as f:
155
+ chunk_result = pickle.load(f)
156
+ else:
157
+ # Compute
158
+ chunk_result = func(chunk)
159
+
160
+ # Cache result
161
+ import pickle
162
+
163
+ with open(cache_file, "wb") as f:
164
+ pickle.dump(chunk_result, f)
165
+
166
+ results.append(chunk_result)
167
+
168
+ # Aggregate
169
+ if aggregate_func:
170
+ return aggregate_func(results)
171
+ return results
172
+
173
+
174
+ class GPUResourceManager:
175
+ """GPU resource management and allocation.
176
+
177
+ Helps manage GPU memory and device placement.
178
+
179
+ Examples:
180
+ >>> gpu = GPUResourceManager()
181
+ >>> if gpu.has_gpu():
182
+ ... with gpu.allocate_device(0):
183
+ ... # Run GPU operations
184
+ ... pass
185
+ """
186
+
187
+ def __init__(self):
188
+ self._check_gpu_availability()
189
+
190
+ def _check_gpu_availability(self) -> None:
191
+ """Check for GPU availability."""
192
+ try:
193
+ import torch
194
+
195
+ self.has_torch = True
196
+ self.torch_available = torch.cuda.is_available()
197
+ self.torch_device_count = torch.cuda.device_count() if self.torch_available else 0
198
+ except ImportError:
199
+ self.has_torch = False
200
+ self.torch_available = False
201
+ self.torch_device_count = 0
202
+
203
+ try:
204
+ import tensorflow as tf
205
+
206
+ self.has_tf = True
207
+ gpus = tf.config.list_physical_devices("GPU")
208
+ self.tf_available = len(gpus) > 0
209
+ self.tf_device_count = len(gpus)
210
+ except ImportError:
211
+ self.has_tf = False
212
+ self.tf_available = False
213
+ self.tf_device_count = 0
214
+
215
+ def has_gpu(self) -> bool:
216
+ """Check if GPU is available."""
217
+ return self.torch_available or self.tf_available
218
+
219
+ def get_device_count(self) -> int:
220
+ """Get number of available GPUs."""
221
+ return max(self.torch_device_count, self.tf_device_count)
222
+
223
+ def allocate_device(self, device_id: int = 0):
224
+ """Context manager to allocate specific GPU."""
225
+ import os
226
+
227
+ class DeviceContext:
228
+ def __enter__(ctx_self):
229
+ os.environ["CUDA_VISIBLE_DEVICES"] = str(device_id)
230
+ return device_id
231
+
232
+ def __exit__(ctx_self, *args):
233
+ # Reset
234
+ if "CUDA_VISIBLE_DEVICES" in os.environ:
235
+ del os.environ["CUDA_VISIBLE_DEVICES"]
236
+
237
+ return DeviceContext()
238
+
239
+ def get_memory_info(self, device_id: int = 0) -> dict:
240
+ """Get GPU memory information."""
241
+ if not self.has_gpu():
242
+ return {"available": False}
243
+
244
+ info = {"available": True}
245
+
246
+ if self.has_torch:
247
+ import torch
248
+
249
+ if self.torch_available:
250
+ info["torch"] = {
251
+ "allocated": torch.cuda.memory_allocated(device_id),
252
+ "cached": torch.cuda.memory_reserved(device_id),
253
+ }
254
+
255
+ if self.has_tf and self.tf_available:
256
+ # TensorFlow memory info
257
+ pass
258
+
259
+ return info
260
+
261
+ def clear_cache(self, device_id: int = 0) -> None:
262
+ """Clear GPU cache."""
263
+ if self.has_torch and self.torch_available:
264
+ import torch
265
+
266
+ torch.cuda.empty_cache()
267
+
268
+
269
+ def optimize_dataframe(df, inplace: bool = False):
270
+ """Optimize pandas DataFrame memory usage.
271
+
272
+ Args:
273
+ df: DataFrame to optimize
274
+ inplace: Whether to modify inplace
275
+
276
+ Returns:
277
+ Optimized DataFrame
278
+ """
279
+ import pandas as pd
280
+
281
+ if not inplace:
282
+ df = df.copy()
283
+
284
+ # Optimize integers
285
+ for col in df.select_dtypes(include=["int"]).columns:
286
+ df[col] = pd.to_numeric(df[col], downcast="integer")
287
+
288
+ # Optimize floats
289
+ for col in df.select_dtypes(include=["float"]).columns:
290
+ df[col] = pd.to_numeric(df[col], downcast="float")
291
+
292
+ # Convert objects to categories if appropriate
293
+ for col in df.select_dtypes(include=["object"]).columns:
294
+ num_unique = df[col].nunique()
295
+ num_total = len(df[col])
296
+
297
+ if num_unique / num_total < 0.5: # Less than 50% unique
298
+ df[col] = df[col].astype("category")
299
+
300
+ return df
301
+
302
+
303
+ def batch_iterator(items: list, batch_size: int):
304
+ """Iterate over items in batches.
305
+
306
+ Args:
307
+ items: List of items
308
+ batch_size: Size of each batch
309
+
310
+ Yields:
311
+ Batches of items
312
+ """
313
+ for i in range(0, len(items), batch_size):
314
+ yield items[i : i + batch_size]