flowyml 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +207 -0
- flowyml/assets/__init__.py +22 -0
- flowyml/assets/artifact.py +40 -0
- flowyml/assets/base.py +209 -0
- flowyml/assets/dataset.py +100 -0
- flowyml/assets/featureset.py +301 -0
- flowyml/assets/metrics.py +104 -0
- flowyml/assets/model.py +82 -0
- flowyml/assets/registry.py +157 -0
- flowyml/assets/report.py +315 -0
- flowyml/cli/__init__.py +5 -0
- flowyml/cli/experiment.py +232 -0
- flowyml/cli/init.py +256 -0
- flowyml/cli/main.py +327 -0
- flowyml/cli/run.py +75 -0
- flowyml/cli/stack_cli.py +532 -0
- flowyml/cli/ui.py +33 -0
- flowyml/core/__init__.py +68 -0
- flowyml/core/advanced_cache.py +274 -0
- flowyml/core/approval.py +64 -0
- flowyml/core/cache.py +203 -0
- flowyml/core/checkpoint.py +148 -0
- flowyml/core/conditional.py +373 -0
- flowyml/core/context.py +155 -0
- flowyml/core/error_handling.py +419 -0
- flowyml/core/executor.py +354 -0
- flowyml/core/graph.py +185 -0
- flowyml/core/parallel.py +452 -0
- flowyml/core/pipeline.py +764 -0
- flowyml/core/project.py +253 -0
- flowyml/core/resources.py +424 -0
- flowyml/core/scheduler.py +630 -0
- flowyml/core/scheduler_config.py +32 -0
- flowyml/core/step.py +201 -0
- flowyml/core/step_grouping.py +292 -0
- flowyml/core/templates.py +226 -0
- flowyml/core/versioning.py +217 -0
- flowyml/integrations/__init__.py +1 -0
- flowyml/integrations/keras.py +134 -0
- flowyml/monitoring/__init__.py +1 -0
- flowyml/monitoring/alerts.py +57 -0
- flowyml/monitoring/data.py +102 -0
- flowyml/monitoring/llm.py +160 -0
- flowyml/monitoring/monitor.py +57 -0
- flowyml/monitoring/notifications.py +246 -0
- flowyml/registry/__init__.py +5 -0
- flowyml/registry/model_registry.py +491 -0
- flowyml/registry/pipeline_registry.py +55 -0
- flowyml/stacks/__init__.py +27 -0
- flowyml/stacks/base.py +77 -0
- flowyml/stacks/bridge.py +288 -0
- flowyml/stacks/components.py +155 -0
- flowyml/stacks/gcp.py +499 -0
- flowyml/stacks/local.py +112 -0
- flowyml/stacks/migration.py +97 -0
- flowyml/stacks/plugin_config.py +78 -0
- flowyml/stacks/plugins.py +401 -0
- flowyml/stacks/registry.py +226 -0
- flowyml/storage/__init__.py +26 -0
- flowyml/storage/artifacts.py +246 -0
- flowyml/storage/materializers/__init__.py +20 -0
- flowyml/storage/materializers/base.py +133 -0
- flowyml/storage/materializers/keras.py +185 -0
- flowyml/storage/materializers/numpy.py +94 -0
- flowyml/storage/materializers/pandas.py +142 -0
- flowyml/storage/materializers/pytorch.py +135 -0
- flowyml/storage/materializers/sklearn.py +110 -0
- flowyml/storage/materializers/tensorflow.py +152 -0
- flowyml/storage/metadata.py +931 -0
- flowyml/tracking/__init__.py +1 -0
- flowyml/tracking/experiment.py +211 -0
- flowyml/tracking/leaderboard.py +191 -0
- flowyml/tracking/runs.py +145 -0
- flowyml/ui/__init__.py +15 -0
- flowyml/ui/backend/Dockerfile +31 -0
- flowyml/ui/backend/__init__.py +0 -0
- flowyml/ui/backend/auth.py +163 -0
- flowyml/ui/backend/main.py +187 -0
- flowyml/ui/backend/routers/__init__.py +0 -0
- flowyml/ui/backend/routers/assets.py +45 -0
- flowyml/ui/backend/routers/execution.py +179 -0
- flowyml/ui/backend/routers/experiments.py +49 -0
- flowyml/ui/backend/routers/leaderboard.py +118 -0
- flowyml/ui/backend/routers/notifications.py +72 -0
- flowyml/ui/backend/routers/pipelines.py +110 -0
- flowyml/ui/backend/routers/plugins.py +192 -0
- flowyml/ui/backend/routers/projects.py +85 -0
- flowyml/ui/backend/routers/runs.py +66 -0
- flowyml/ui/backend/routers/schedules.py +222 -0
- flowyml/ui/backend/routers/traces.py +84 -0
- flowyml/ui/frontend/Dockerfile +20 -0
- flowyml/ui/frontend/README.md +315 -0
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
- flowyml/ui/frontend/dist/index.html +16 -0
- flowyml/ui/frontend/index.html +15 -0
- flowyml/ui/frontend/nginx.conf +26 -0
- flowyml/ui/frontend/package-lock.json +3545 -0
- flowyml/ui/frontend/package.json +33 -0
- flowyml/ui/frontend/postcss.config.js +6 -0
- flowyml/ui/frontend/src/App.jsx +21 -0
- flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
- flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
- flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
- flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
- flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
- flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
- flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
- flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
- flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
- flowyml/ui/frontend/src/components/Layout.jsx +108 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
- flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
- flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
- flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
- flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
- flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
- flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
- flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
- flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
- flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
- flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
- flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
- flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
- flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
- flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
- flowyml/ui/frontend/src/index.css +11 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
- flowyml/ui/frontend/src/main.jsx +10 -0
- flowyml/ui/frontend/src/router/index.jsx +39 -0
- flowyml/ui/frontend/src/services/pluginService.js +90 -0
- flowyml/ui/frontend/src/utils/api.js +47 -0
- flowyml/ui/frontend/src/utils/cn.js +6 -0
- flowyml/ui/frontend/tailwind.config.js +31 -0
- flowyml/ui/frontend/vite.config.js +21 -0
- flowyml/ui/utils.py +77 -0
- flowyml/utils/__init__.py +67 -0
- flowyml/utils/config.py +308 -0
- flowyml/utils/debug.py +240 -0
- flowyml/utils/environment.py +346 -0
- flowyml/utils/git.py +319 -0
- flowyml/utils/logging.py +61 -0
- flowyml/utils/performance.py +314 -0
- flowyml/utils/stack_config.py +296 -0
- flowyml/utils/validation.py +270 -0
- flowyml-1.1.0.dist-info/METADATA +372 -0
- flowyml-1.1.0.dist-info/RECORD +159 -0
- flowyml-1.1.0.dist-info/WHEEL +4 -0
- flowyml-1.1.0.dist-info/entry_points.txt +3 -0
- flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
flowyml/utils/git.py
ADDED
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
"""Git integration utilities for experiment tracking."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GitInfo:
|
|
9
|
+
"""Git repository information."""
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
commit_hash: str | None = None,
|
|
14
|
+
branch: str | None = None,
|
|
15
|
+
is_dirty: bool = False,
|
|
16
|
+
remote_url: str | None = None,
|
|
17
|
+
author: str | None = None,
|
|
18
|
+
commit_message: str | None = None,
|
|
19
|
+
commit_time: str | None = None,
|
|
20
|
+
):
|
|
21
|
+
self.commit_hash = commit_hash
|
|
22
|
+
self.branch = branch
|
|
23
|
+
self.is_dirty = is_dirty
|
|
24
|
+
self.remote_url = remote_url
|
|
25
|
+
self.author = author
|
|
26
|
+
self.commit_message = commit_message
|
|
27
|
+
self.commit_time = commit_time
|
|
28
|
+
|
|
29
|
+
def to_dict(self) -> dict[str, Any]:
|
|
30
|
+
"""Convert to dictionary."""
|
|
31
|
+
return {
|
|
32
|
+
"commit_hash": self.commit_hash,
|
|
33
|
+
"branch": self.branch,
|
|
34
|
+
"is_dirty": self.is_dirty,
|
|
35
|
+
"remote_url": self.remote_url,
|
|
36
|
+
"author": self.author,
|
|
37
|
+
"commit_message": self.commit_message,
|
|
38
|
+
"commit_time": self.commit_time,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def is_available(self) -> bool:
|
|
43
|
+
"""Check if git info is available."""
|
|
44
|
+
return self.commit_hash is not None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def run_git_command(command: list, cwd: Path | None = None) -> str | None:
|
|
48
|
+
"""Run a git command and return output.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
command: Git command as list of strings
|
|
52
|
+
cwd: Working directory
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Command output or None if failed
|
|
56
|
+
"""
|
|
57
|
+
try:
|
|
58
|
+
result = subprocess.run(
|
|
59
|
+
["git"] + command,
|
|
60
|
+
cwd=cwd or Path.cwd(),
|
|
61
|
+
capture_output=True,
|
|
62
|
+
text=True,
|
|
63
|
+
timeout=5,
|
|
64
|
+
)
|
|
65
|
+
if result.returncode == 0:
|
|
66
|
+
return result.stdout.strip()
|
|
67
|
+
return None
|
|
68
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def is_git_repo(path: Path | None = None) -> bool:
|
|
73
|
+
"""Check if directory is a git repository.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
path: Directory to check
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
True if directory is a git repository
|
|
80
|
+
"""
|
|
81
|
+
result = run_git_command(["rev-parse", "--git-dir"], cwd=path)
|
|
82
|
+
return result is not None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_commit_hash(path: Path | None = None) -> str | None:
|
|
86
|
+
"""Get current commit hash.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
path: Repository path
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Commit hash or None
|
|
93
|
+
"""
|
|
94
|
+
return run_git_command(["rev-parse", "HEAD"], cwd=path)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_short_commit_hash(path: Path | None = None) -> str | None:
|
|
98
|
+
"""Get short commit hash.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
path: Repository path
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Short commit hash or None
|
|
105
|
+
"""
|
|
106
|
+
return run_git_command(["rev-parse", "--short", "HEAD"], cwd=path)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_branch_name(path: Path | None = None) -> str | None:
|
|
110
|
+
"""Get current branch name.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
path: Repository path
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Branch name or None
|
|
117
|
+
"""
|
|
118
|
+
return run_git_command(["rev-parse", "--abbrev-ref", "HEAD"], cwd=path)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def is_dirty(path: Path | None = None) -> bool:
|
|
122
|
+
"""Check if repository has uncommitted changes.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
path: Repository path
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
True if repository is dirty
|
|
129
|
+
"""
|
|
130
|
+
result = run_git_command(["status", "--porcelain"], cwd=path)
|
|
131
|
+
return bool(result)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def get_remote_url(path: Path | None = None, remote: str = "origin") -> str | None:
|
|
135
|
+
"""Get remote repository URL.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
path: Repository path
|
|
139
|
+
remote: Remote name
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Remote URL or None
|
|
143
|
+
"""
|
|
144
|
+
return run_git_command(["config", "--get", f"remote.{remote}.url"], cwd=path)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def get_commit_author(path: Path | None = None) -> str | None:
|
|
148
|
+
"""Get author of current commit.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
path: Repository path
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Commit author or None
|
|
155
|
+
"""
|
|
156
|
+
return run_git_command(["log", "-1", "--format=%an <%ae>"], cwd=path)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def get_commit_message(path: Path | None = None) -> str | None:
|
|
160
|
+
"""Get message of current commit.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
path: Repository path
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Commit message or None
|
|
167
|
+
"""
|
|
168
|
+
return run_git_command(["log", "-1", "--format=%s"], cwd=path)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def get_commit_time(path: Path | None = None) -> str | None:
|
|
172
|
+
"""Get timestamp of current commit.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
path: Repository path
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Commit timestamp or None
|
|
179
|
+
"""
|
|
180
|
+
return run_git_command(["log", "-1", "--format=%ci"], cwd=path)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def get_diff(path: Path | None = None, staged: bool = False) -> str | None:
|
|
184
|
+
"""Get diff of uncommitted changes.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
path: Repository path
|
|
188
|
+
staged: Get staged changes only
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Diff output or None
|
|
192
|
+
"""
|
|
193
|
+
command = ["diff"]
|
|
194
|
+
if staged:
|
|
195
|
+
command.append("--cached")
|
|
196
|
+
|
|
197
|
+
return run_git_command(command, cwd=path)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def get_git_info(path: Path | None = None) -> GitInfo:
|
|
201
|
+
"""Get comprehensive git information.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
path: Repository path
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
GitInfo object with repository information
|
|
208
|
+
"""
|
|
209
|
+
if not is_git_repo(path):
|
|
210
|
+
return GitInfo()
|
|
211
|
+
|
|
212
|
+
return GitInfo(
|
|
213
|
+
commit_hash=get_commit_hash(path),
|
|
214
|
+
branch=get_branch_name(path),
|
|
215
|
+
is_dirty=is_dirty(path),
|
|
216
|
+
remote_url=get_remote_url(path),
|
|
217
|
+
author=get_commit_author(path),
|
|
218
|
+
commit_message=get_commit_message(path),
|
|
219
|
+
commit_time=get_commit_time(path),
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def save_git_snapshot(output_dir: Path, path: Path | None = None) -> None:
|
|
224
|
+
"""Save git repository snapshot.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
output_dir: Directory to save snapshot
|
|
228
|
+
path: Repository path
|
|
229
|
+
"""
|
|
230
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
231
|
+
|
|
232
|
+
# Save git info
|
|
233
|
+
git_info = get_git_info(path)
|
|
234
|
+
if git_info.is_available:
|
|
235
|
+
import json
|
|
236
|
+
|
|
237
|
+
with open(output_dir / "git_info.json", "w") as f:
|
|
238
|
+
json.dump(git_info.to_dict(), f, indent=2)
|
|
239
|
+
|
|
240
|
+
# Save diff if dirty
|
|
241
|
+
if git_info.is_dirty:
|
|
242
|
+
diff = get_diff(path)
|
|
243
|
+
if diff:
|
|
244
|
+
with open(output_dir / "git_diff.patch", "w") as f:
|
|
245
|
+
f.write(diff)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def get_file_commit_history(
|
|
249
|
+
file_path: str,
|
|
250
|
+
max_count: int = 10,
|
|
251
|
+
path: Path | None = None,
|
|
252
|
+
) -> list[dict[str, str]]:
|
|
253
|
+
"""Get commit history for a specific file.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
file_path: Path to file
|
|
257
|
+
max_count: Maximum number of commits to return
|
|
258
|
+
path: Repository path
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
List of commit dictionaries
|
|
262
|
+
"""
|
|
263
|
+
if not is_git_repo(path):
|
|
264
|
+
return []
|
|
265
|
+
|
|
266
|
+
log_format = "%H|%an|%ae|%ci|%s"
|
|
267
|
+
result = run_git_command(
|
|
268
|
+
["log", f"--max-count={max_count}", f"--format={log_format}", "--", file_path],
|
|
269
|
+
cwd=path,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
if not result:
|
|
273
|
+
return []
|
|
274
|
+
|
|
275
|
+
commits = []
|
|
276
|
+
for line in result.split("\n"):
|
|
277
|
+
if not line:
|
|
278
|
+
continue
|
|
279
|
+
|
|
280
|
+
parts = line.split("|", 4)
|
|
281
|
+
if len(parts) == 5:
|
|
282
|
+
commits.append(
|
|
283
|
+
{
|
|
284
|
+
"hash": parts[0],
|
|
285
|
+
"author_name": parts[1],
|
|
286
|
+
"author_email": parts[2],
|
|
287
|
+
"timestamp": parts[3],
|
|
288
|
+
"message": parts[4],
|
|
289
|
+
},
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
return commits
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def get_tags(path: Path | None = None) -> list[str]:
|
|
296
|
+
"""Get list of git tags.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
path: Repository path
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
List of tag names
|
|
303
|
+
"""
|
|
304
|
+
result = run_git_command(["tag", "--list"], cwd=path)
|
|
305
|
+
if result:
|
|
306
|
+
return [tag for tag in result.split("\n") if tag]
|
|
307
|
+
return []
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def get_current_tag(path: Path | None = None) -> str | None:
|
|
311
|
+
"""Get tag pointing to current commit.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
path: Repository path
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
Tag name or None
|
|
318
|
+
"""
|
|
319
|
+
return run_git_command(["describe", "--exact-match", "--tags"], cwd=path)
|
flowyml/utils/logging.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Logging utilities for flowyml."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def setup_logger(
|
|
9
|
+
name: str = "flowyml",
|
|
10
|
+
level: int = logging.INFO,
|
|
11
|
+
log_file: str = None,
|
|
12
|
+
) -> logging.Logger:
|
|
13
|
+
"""Setup logger for flowyml.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
name: Logger name
|
|
17
|
+
level: Logging level
|
|
18
|
+
log_file: Optional log file path
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Configured logger
|
|
22
|
+
"""
|
|
23
|
+
logger = logging.getLogger(name)
|
|
24
|
+
logger.setLevel(level)
|
|
25
|
+
|
|
26
|
+
# Console handler
|
|
27
|
+
console_handler = logging.StreamHandler(sys.stdout)
|
|
28
|
+
console_handler.setLevel(level)
|
|
29
|
+
console_format = logging.Formatter(
|
|
30
|
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
31
|
+
)
|
|
32
|
+
console_handler.setFormatter(console_format)
|
|
33
|
+
logger.addHandler(console_handler)
|
|
34
|
+
|
|
35
|
+
# File handler if specified
|
|
36
|
+
if log_file:
|
|
37
|
+
log_path = Path(log_file)
|
|
38
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
|
|
40
|
+
file_handler = logging.FileHandler(log_file)
|
|
41
|
+
file_handler.setLevel(level)
|
|
42
|
+
file_handler.setFormatter(console_format)
|
|
43
|
+
logger.addHandler(file_handler)
|
|
44
|
+
|
|
45
|
+
return logger
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Default logger
|
|
49
|
+
logger = setup_logger()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_logger(name: str = "flowyml") -> logging.Logger:
|
|
53
|
+
"""Get a logger by name.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
name: Logger name
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Logger instance
|
|
60
|
+
"""
|
|
61
|
+
return logging.getLogger(name)
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
"""Performance optimization utilities."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
from typing import Any
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
|
|
7
|
+
import multiprocessing as mp
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LazyValue:
|
|
11
|
+
"""Lazy evaluation wrapper.
|
|
12
|
+
|
|
13
|
+
Computes value only when accessed.
|
|
14
|
+
|
|
15
|
+
Examples:
|
|
16
|
+
>>> lazy = LazyValue(lambda: expensive_computation())
|
|
17
|
+
>>> # Not computed yet
|
|
18
|
+
>>> result = lazy.value # Now computed
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, compute_func: Callable):
|
|
22
|
+
self._compute_func = compute_func
|
|
23
|
+
self._value = None
|
|
24
|
+
self._computed = False
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def value(self):
|
|
28
|
+
"""Get the computed value."""
|
|
29
|
+
if not self._computed:
|
|
30
|
+
self._value = self._compute_func()
|
|
31
|
+
self._computed = True
|
|
32
|
+
return self._value
|
|
33
|
+
|
|
34
|
+
def __repr__(self):
|
|
35
|
+
if self._computed:
|
|
36
|
+
return f"LazyValue(computed={self._value})"
|
|
37
|
+
return "LazyValue(not computed)"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def lazy_property(func):
|
|
41
|
+
"""Decorator for lazy property evaluation.
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
>>> class Model:
|
|
45
|
+
... @lazy_property
|
|
46
|
+
... def expensive_data(self):
|
|
47
|
+
... return load_large_dataset()
|
|
48
|
+
"""
|
|
49
|
+
attr_name = "_lazy_" + func.__name__
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
@functools.wraps(func)
|
|
53
|
+
def wrapper(self):
|
|
54
|
+
if not hasattr(self, attr_name):
|
|
55
|
+
setattr(self, attr_name, func(self))
|
|
56
|
+
return getattr(self, attr_name)
|
|
57
|
+
|
|
58
|
+
return wrapper
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ParallelExecutor:
|
|
62
|
+
"""Smart parallelization for pipeline steps.
|
|
63
|
+
|
|
64
|
+
Automatically determines best parallelization strategy.
|
|
65
|
+
|
|
66
|
+
Examples:
|
|
67
|
+
>>> executor = ParallelExecutor(max_workers=4)
|
|
68
|
+
>>> results = executor.map_items(process_item, items)
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(
|
|
72
|
+
self,
|
|
73
|
+
max_workers: int | None = None,
|
|
74
|
+
mode: str = "thread", # 'thread' or 'process'
|
|
75
|
+
):
|
|
76
|
+
self.max_workers = max_workers or mp.cpu_count()
|
|
77
|
+
self.mode = mode
|
|
78
|
+
|
|
79
|
+
def map_items(self, func: Callable, items: list) -> list:
|
|
80
|
+
"""Parallel map operation.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
func: Function to apply
|
|
84
|
+
items: Items to process
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
List of results
|
|
88
|
+
"""
|
|
89
|
+
executor_class = ProcessPoolExecutor if self.mode == "process" else ThreadPoolExecutor
|
|
90
|
+
|
|
91
|
+
with executor_class(max_workers=self.max_workers) as executor:
|
|
92
|
+
results = list(executor.map(func, items))
|
|
93
|
+
|
|
94
|
+
return results
|
|
95
|
+
|
|
96
|
+
def submit(self, func: Callable, *args, **kwargs):
|
|
97
|
+
"""Submit a single task."""
|
|
98
|
+
executor = ProcessPoolExecutor(max_workers=1) if self.mode == "process" else ThreadPoolExecutor(max_workers=1)
|
|
99
|
+
|
|
100
|
+
future = executor.submit(func, *args, **kwargs)
|
|
101
|
+
return future
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class IncrementalComputation:
|
|
105
|
+
"""Incremental computation for data processing.
|
|
106
|
+
|
|
107
|
+
Processes data in chunks and caches intermediate results.
|
|
108
|
+
|
|
109
|
+
Examples:
|
|
110
|
+
>>> computer = IncrementalComputation(chunk_size=1000)
|
|
111
|
+
>>> result = computer.compute(large_dataset, process_chunk)
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
chunk_size: int = 1000,
|
|
117
|
+
cache_dir: str = ".flowyml/incremental",
|
|
118
|
+
):
|
|
119
|
+
self.chunk_size = chunk_size
|
|
120
|
+
from pathlib import Path
|
|
121
|
+
|
|
122
|
+
self.cache_dir = Path(cache_dir)
|
|
123
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
124
|
+
|
|
125
|
+
def compute(
|
|
126
|
+
self,
|
|
127
|
+
data: list,
|
|
128
|
+
func: Callable,
|
|
129
|
+
aggregate_func: Callable | None = None,
|
|
130
|
+
) -> Any:
|
|
131
|
+
"""Compute incrementally over data chunks.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
data: Input data
|
|
135
|
+
func: Function to apply to each chunk
|
|
136
|
+
aggregate_func: Optional function to aggregate chunk results
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Aggregated result
|
|
140
|
+
"""
|
|
141
|
+
results = []
|
|
142
|
+
|
|
143
|
+
# Process in chunks
|
|
144
|
+
for i in range(0, len(data), self.chunk_size):
|
|
145
|
+
chunk = data[i : i + self.chunk_size]
|
|
146
|
+
|
|
147
|
+
# Check cache
|
|
148
|
+
cache_key = f"chunk_{i}"
|
|
149
|
+
cache_file = self.cache_dir / f"{cache_key}.pkl"
|
|
150
|
+
|
|
151
|
+
if cache_file.exists():
|
|
152
|
+
import pickle
|
|
153
|
+
|
|
154
|
+
with open(cache_file, "rb") as f:
|
|
155
|
+
chunk_result = pickle.load(f)
|
|
156
|
+
else:
|
|
157
|
+
# Compute
|
|
158
|
+
chunk_result = func(chunk)
|
|
159
|
+
|
|
160
|
+
# Cache result
|
|
161
|
+
import pickle
|
|
162
|
+
|
|
163
|
+
with open(cache_file, "wb") as f:
|
|
164
|
+
pickle.dump(chunk_result, f)
|
|
165
|
+
|
|
166
|
+
results.append(chunk_result)
|
|
167
|
+
|
|
168
|
+
# Aggregate
|
|
169
|
+
if aggregate_func:
|
|
170
|
+
return aggregate_func(results)
|
|
171
|
+
return results
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class GPUResourceManager:
|
|
175
|
+
"""GPU resource management and allocation.
|
|
176
|
+
|
|
177
|
+
Helps manage GPU memory and device placement.
|
|
178
|
+
|
|
179
|
+
Examples:
|
|
180
|
+
>>> gpu = GPUResourceManager()
|
|
181
|
+
>>> if gpu.has_gpu():
|
|
182
|
+
... with gpu.allocate_device(0):
|
|
183
|
+
... # Run GPU operations
|
|
184
|
+
... pass
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
def __init__(self):
|
|
188
|
+
self._check_gpu_availability()
|
|
189
|
+
|
|
190
|
+
def _check_gpu_availability(self) -> None:
|
|
191
|
+
"""Check for GPU availability."""
|
|
192
|
+
try:
|
|
193
|
+
import torch
|
|
194
|
+
|
|
195
|
+
self.has_torch = True
|
|
196
|
+
self.torch_available = torch.cuda.is_available()
|
|
197
|
+
self.torch_device_count = torch.cuda.device_count() if self.torch_available else 0
|
|
198
|
+
except ImportError:
|
|
199
|
+
self.has_torch = False
|
|
200
|
+
self.torch_available = False
|
|
201
|
+
self.torch_device_count = 0
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
import tensorflow as tf
|
|
205
|
+
|
|
206
|
+
self.has_tf = True
|
|
207
|
+
gpus = tf.config.list_physical_devices("GPU")
|
|
208
|
+
self.tf_available = len(gpus) > 0
|
|
209
|
+
self.tf_device_count = len(gpus)
|
|
210
|
+
except ImportError:
|
|
211
|
+
self.has_tf = False
|
|
212
|
+
self.tf_available = False
|
|
213
|
+
self.tf_device_count = 0
|
|
214
|
+
|
|
215
|
+
def has_gpu(self) -> bool:
|
|
216
|
+
"""Check if GPU is available."""
|
|
217
|
+
return self.torch_available or self.tf_available
|
|
218
|
+
|
|
219
|
+
def get_device_count(self) -> int:
|
|
220
|
+
"""Get number of available GPUs."""
|
|
221
|
+
return max(self.torch_device_count, self.tf_device_count)
|
|
222
|
+
|
|
223
|
+
def allocate_device(self, device_id: int = 0):
|
|
224
|
+
"""Context manager to allocate specific GPU."""
|
|
225
|
+
import os
|
|
226
|
+
|
|
227
|
+
class DeviceContext:
|
|
228
|
+
def __enter__(ctx_self):
|
|
229
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = str(device_id)
|
|
230
|
+
return device_id
|
|
231
|
+
|
|
232
|
+
def __exit__(ctx_self, *args):
|
|
233
|
+
# Reset
|
|
234
|
+
if "CUDA_VISIBLE_DEVICES" in os.environ:
|
|
235
|
+
del os.environ["CUDA_VISIBLE_DEVICES"]
|
|
236
|
+
|
|
237
|
+
return DeviceContext()
|
|
238
|
+
|
|
239
|
+
def get_memory_info(self, device_id: int = 0) -> dict:
|
|
240
|
+
"""Get GPU memory information."""
|
|
241
|
+
if not self.has_gpu():
|
|
242
|
+
return {"available": False}
|
|
243
|
+
|
|
244
|
+
info = {"available": True}
|
|
245
|
+
|
|
246
|
+
if self.has_torch:
|
|
247
|
+
import torch
|
|
248
|
+
|
|
249
|
+
if self.torch_available:
|
|
250
|
+
info["torch"] = {
|
|
251
|
+
"allocated": torch.cuda.memory_allocated(device_id),
|
|
252
|
+
"cached": torch.cuda.memory_reserved(device_id),
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
if self.has_tf and self.tf_available:
|
|
256
|
+
# TensorFlow memory info
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
return info
|
|
260
|
+
|
|
261
|
+
def clear_cache(self, device_id: int = 0) -> None:
|
|
262
|
+
"""Clear GPU cache."""
|
|
263
|
+
if self.has_torch and self.torch_available:
|
|
264
|
+
import torch
|
|
265
|
+
|
|
266
|
+
torch.cuda.empty_cache()
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def optimize_dataframe(df, inplace: bool = False):
|
|
270
|
+
"""Optimize pandas DataFrame memory usage.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
df: DataFrame to optimize
|
|
274
|
+
inplace: Whether to modify inplace
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Optimized DataFrame
|
|
278
|
+
"""
|
|
279
|
+
import pandas as pd
|
|
280
|
+
|
|
281
|
+
if not inplace:
|
|
282
|
+
df = df.copy()
|
|
283
|
+
|
|
284
|
+
# Optimize integers
|
|
285
|
+
for col in df.select_dtypes(include=["int"]).columns:
|
|
286
|
+
df[col] = pd.to_numeric(df[col], downcast="integer")
|
|
287
|
+
|
|
288
|
+
# Optimize floats
|
|
289
|
+
for col in df.select_dtypes(include=["float"]).columns:
|
|
290
|
+
df[col] = pd.to_numeric(df[col], downcast="float")
|
|
291
|
+
|
|
292
|
+
# Convert objects to categories if appropriate
|
|
293
|
+
for col in df.select_dtypes(include=["object"]).columns:
|
|
294
|
+
num_unique = df[col].nunique()
|
|
295
|
+
num_total = len(df[col])
|
|
296
|
+
|
|
297
|
+
if num_unique / num_total < 0.5: # Less than 50% unique
|
|
298
|
+
df[col] = df[col].astype("category")
|
|
299
|
+
|
|
300
|
+
return df
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def batch_iterator(items: list, batch_size: int):
|
|
304
|
+
"""Iterate over items in batches.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
items: List of items
|
|
308
|
+
batch_size: Size of each batch
|
|
309
|
+
|
|
310
|
+
Yields:
|
|
311
|
+
Batches of items
|
|
312
|
+
"""
|
|
313
|
+
for i in range(0, len(items), batch_size):
|
|
314
|
+
yield items[i : i + batch_size]
|