harmony-client 0.1.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- harmony_client/__init__.py +78 -0
- harmony_client/artifacts/__init__.py +5 -0
- harmony_client/artifacts/custom_artifact.py +46 -0
- harmony_client/artifacts/dataset_artifact.py +268 -0
- harmony_client/artifacts/model_artifact.py +34 -0
- harmony_client/file_storage.py +378 -0
- harmony_client/harmony_client.cp312-win_amd64.pyd +0 -0
- harmony_client/harmony_client.pyi +1615 -0
- harmony_client/internal/__init__.py +7 -0
- harmony_client/internal/eval_samples_html.py +122 -0
- harmony_client/internal/utils.py +9 -0
- harmony_client/logging_table.py +121 -0
- harmony_client/parameters/__init__.py +295 -0
- harmony_client/parameters/dataset_kinds.py +49 -0
- harmony_client/parameters/model_kinds.py +13 -0
- harmony_client/py.typed +0 -0
- harmony_client/runtime/__init__.py +29 -0
- harmony_client/runtime/context.py +191 -0
- harmony_client/runtime/data.py +76 -0
- harmony_client/runtime/decorators.py +19 -0
- harmony_client/runtime/dto/AdaptiveDataset.py +23 -0
- harmony_client/runtime/dto/AdaptiveGrader.py +68 -0
- harmony_client/runtime/dto/AdaptiveModel.py +19 -0
- harmony_client/runtime/dto/DatasetSampleFormats.py +93 -0
- harmony_client/runtime/dto/__init__.py +2 -0
- harmony_client/runtime/dto/base.py +7 -0
- harmony_client/runtime/model_artifact_save.py +23 -0
- harmony_client/runtime/runner.py +368 -0
- harmony_client/runtime/simple_notifier.py +21 -0
- harmony_client-0.1.0.dist-info/METADATA +38 -0
- harmony_client-0.1.0.dist-info/RECORD +32 -0
- harmony_client-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pure Python file storage implementation supporting local filesystem and S3.
|
|
3
|
+
Based on the Rust file-storage crate API.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
import tempfile
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional, Union
|
|
13
|
+
from urllib.parse import urlparse
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import boto3
|
|
17
|
+
from botocore.client import Config
|
|
18
|
+
from botocore.exceptions import ClientError
|
|
19
|
+
|
|
20
|
+
HAS_BOTO3 = True
|
|
21
|
+
except ImportError:
|
|
22
|
+
HAS_BOTO3 = False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class FileStorageConfig:
|
|
27
|
+
"""Configuration for file storage backends."""
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def local(path: str | Path) -> "LocalFileStorageConfig":
|
|
31
|
+
"""Create a local filesystem storage configuration."""
|
|
32
|
+
return LocalFileStorageConfig(path=Path(path))
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def s3(
|
|
36
|
+
bucket: Optional[str] = None,
|
|
37
|
+
prefix: Optional[str] = None,
|
|
38
|
+
region: Optional[str] = None,
|
|
39
|
+
url: Optional[str] = None,
|
|
40
|
+
endpoint: Optional[str] = None,
|
|
41
|
+
force_path_style: bool = False,
|
|
42
|
+
) -> "S3FileStorageConfig":
|
|
43
|
+
"""Create an S3 storage configuration."""
|
|
44
|
+
return S3FileStorageConfig(
|
|
45
|
+
bucket=bucket, prefix=prefix, region=region, url=url, endpoint=endpoint, force_path_style=force_path_style
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def from_url(
|
|
50
|
+
url: str,
|
|
51
|
+
bucket: Optional[str] = None,
|
|
52
|
+
prefix: Optional[str] = None,
|
|
53
|
+
region: Optional[str] = None,
|
|
54
|
+
endpoint: Optional[str] = None,
|
|
55
|
+
force_path_style: bool = False,
|
|
56
|
+
) -> Union["LocalFileStorageConfig", "S3FileStorageConfig"]:
|
|
57
|
+
"""Create a storage configuration from a URL."""
|
|
58
|
+
parsed = urlparse(url)
|
|
59
|
+
|
|
60
|
+
if parsed.scheme == "s3":
|
|
61
|
+
# Extract bucket and prefix from s3:// URL
|
|
62
|
+
bucket_from_url = parsed.netloc
|
|
63
|
+
prefix_from_url = parsed.path.lstrip("/")
|
|
64
|
+
|
|
65
|
+
return S3FileStorageConfig(
|
|
66
|
+
bucket=bucket or bucket_from_url,
|
|
67
|
+
prefix=prefix or prefix_from_url or None,
|
|
68
|
+
region=region, # from env ?
|
|
69
|
+
url=url,
|
|
70
|
+
endpoint=endpoint or os.environ.get("AWS_ENDPOINT_URL_S3"),
|
|
71
|
+
force_path_style=force_path_style or os.environ.get("S3_FORCE_PATH_STYLE") == "true",
|
|
72
|
+
)
|
|
73
|
+
elif parsed.scheme in ("file", "") or parsed.scheme is None:
|
|
74
|
+
# Local file path
|
|
75
|
+
path = parsed.path if parsed.scheme == "file" else url
|
|
76
|
+
return LocalFileStorageConfig(path=Path(path))
|
|
77
|
+
else:
|
|
78
|
+
raise ValueError(f"Unsupported URL scheme: {parsed.scheme}")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class LocalFileStorageConfig(FileStorageConfig):
|
|
83
|
+
"""Configuration for local filesystem storage."""
|
|
84
|
+
|
|
85
|
+
path: Path
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class S3FileStorageConfig(FileStorageConfig):
|
|
90
|
+
"""Configuration for S3 storage."""
|
|
91
|
+
|
|
92
|
+
bucket: Optional[str] = None
|
|
93
|
+
prefix: Optional[str] = None
|
|
94
|
+
region: Optional[str] = None
|
|
95
|
+
url: Optional[str] = None
|
|
96
|
+
endpoint: Optional[str] = None
|
|
97
|
+
force_path_style: bool = False
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class StoredFile:
|
|
101
|
+
"""Represents a file stored in the storage backend."""
|
|
102
|
+
|
|
103
|
+
def __init__(self, storage: "FileStorage", path: str):
|
|
104
|
+
self.storage = storage
|
|
105
|
+
self.path = path
|
|
106
|
+
|
|
107
|
+
def read(self) -> bytes:
|
|
108
|
+
"""Read the file content."""
|
|
109
|
+
return self.storage.read(self.path)
|
|
110
|
+
|
|
111
|
+
def __str__(self) -> str:
|
|
112
|
+
return self.storage._format_path(self.path)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class FileStorage(ABC):
|
|
116
|
+
"""Abstract base class for file storage backends."""
|
|
117
|
+
|
|
118
|
+
@staticmethod
|
|
119
|
+
def new(config: FileStorageConfig) -> "FileStorage":
|
|
120
|
+
"""Create a new storage instance from configuration."""
|
|
121
|
+
if isinstance(config, LocalFileStorageConfig):
|
|
122
|
+
return LocalFileStorage(config)
|
|
123
|
+
elif isinstance(config, S3FileStorageConfig):
|
|
124
|
+
return S3FileStorage(config)
|
|
125
|
+
else:
|
|
126
|
+
raise ValueError(f"Unsupported config type: {type(config)}")
|
|
127
|
+
|
|
128
|
+
@abstractmethod
|
|
129
|
+
def read(self, file_path: str, use_raw_path: bool = False) -> bytes:
|
|
130
|
+
"""Read file content and return as bytes."""
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
@abstractmethod
|
|
134
|
+
def write(self, local_file_path: str, destination_path: str) -> str:
|
|
135
|
+
"""Write a local file to the storage and return the stored file URL."""
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
@abstractmethod
|
|
139
|
+
def append(self, content: bytes, destination_path: str) -> str:
|
|
140
|
+
"""Append content to a file and return the stored file URL."""
|
|
141
|
+
pass
|
|
142
|
+
|
|
143
|
+
@abstractmethod
|
|
144
|
+
def exists(self, file_path: str, use_raw_path: bool = False) -> bool:
|
|
145
|
+
"""Check if a file exists in the storage."""
|
|
146
|
+
pass
|
|
147
|
+
|
|
148
|
+
@abstractmethod
|
|
149
|
+
def mk_url(self, file_path: str) -> str:
|
|
150
|
+
"""Generate a URL for a file in the storage."""
|
|
151
|
+
pass
|
|
152
|
+
|
|
153
|
+
@abstractmethod
|
|
154
|
+
def download_locally(self, file_path: str, destination_path, use_raw_path: bool = False) -> str:
|
|
155
|
+
"""Download a file to the destination path.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
file_path: The file URL to download
|
|
159
|
+
destination_path: Local path where the file should be saved
|
|
160
|
+
use_raw_path: If True, use the S3 path as-is without prepending prefix (for accessing shared resources like recipes)
|
|
161
|
+
"""
|
|
162
|
+
pass
|
|
163
|
+
|
|
164
|
+
@abstractmethod
|
|
165
|
+
def _format_path(self, path: str) -> str:
|
|
166
|
+
"""Format a path for display/return."""
|
|
167
|
+
pass
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class LocalFileStorage(FileStorage):
|
|
171
|
+
"""Local filesystem storage implementation."""
|
|
172
|
+
|
|
173
|
+
def __init__(self, config: LocalFileStorageConfig):
|
|
174
|
+
self.base_path = config.path
|
|
175
|
+
self.base_path.mkdir(parents=True, exist_ok=True)
|
|
176
|
+
|
|
177
|
+
def read(self, file_path: str, use_raw_path: bool = False) -> bytes:
|
|
178
|
+
"""Read file content from local storage."""
|
|
179
|
+
full_path = self._resolve_path(file_path)
|
|
180
|
+
try:
|
|
181
|
+
return full_path.read_bytes()
|
|
182
|
+
except FileNotFoundError:
|
|
183
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
184
|
+
|
|
185
|
+
def write(self, local_file_path: str, destination_path: str) -> str:
|
|
186
|
+
"""Copy a local file to the storage."""
|
|
187
|
+
src_path = Path(local_file_path)
|
|
188
|
+
dest_path = self._resolve_path(destination_path)
|
|
189
|
+
|
|
190
|
+
# Create parent directories
|
|
191
|
+
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
192
|
+
|
|
193
|
+
# Copy the file
|
|
194
|
+
shutil.copy2(src_path, dest_path)
|
|
195
|
+
|
|
196
|
+
return self._format_path(destination_path)
|
|
197
|
+
|
|
198
|
+
def append(self, content: bytes, destination_path: str) -> str:
|
|
199
|
+
"""Append content to a file in local storage."""
|
|
200
|
+
full_path = self._resolve_path(destination_path)
|
|
201
|
+
|
|
202
|
+
# Create parent directories
|
|
203
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
|
204
|
+
|
|
205
|
+
# Append content
|
|
206
|
+
with open(full_path, "ab") as f:
|
|
207
|
+
f.write(content)
|
|
208
|
+
|
|
209
|
+
return self._format_path(destination_path)
|
|
210
|
+
|
|
211
|
+
def exists(self, file_path: str, use_raw_path: bool = False) -> bool:
|
|
212
|
+
"""Check if file exists in local storage."""
|
|
213
|
+
full_path = self._resolve_path(file_path)
|
|
214
|
+
return full_path.exists()
|
|
215
|
+
|
|
216
|
+
def mk_url(self, file_path: str) -> str:
|
|
217
|
+
"""Generate a URL for a file in the storage."""
|
|
218
|
+
return f"file://{file_path}"
|
|
219
|
+
|
|
220
|
+
def download_locally(self, file_path: str, destination_path, use_raw_path: bool = False) -> str:
|
|
221
|
+
"""Download a file to the destination path"""
|
|
222
|
+
full_path = self._resolve_path(file_path)
|
|
223
|
+
dest_path = Path(destination_path)
|
|
224
|
+
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
225
|
+
shutil.copy2(full_path, dest_path)
|
|
226
|
+
return str(dest_path)
|
|
227
|
+
|
|
228
|
+
def _resolve_path(self, file_path: str) -> Path:
|
|
229
|
+
"""Resolve a relative file path to absolute path within base directory."""
|
|
230
|
+
# Handle file:// URLs
|
|
231
|
+
if file_path.startswith("file://"):
|
|
232
|
+
file_path = file_path[7:]
|
|
233
|
+
|
|
234
|
+
# Handle absolute paths
|
|
235
|
+
if file_path.startswith("/"):
|
|
236
|
+
return Path(file_path)
|
|
237
|
+
|
|
238
|
+
return self.base_path / file_path
|
|
239
|
+
|
|
240
|
+
def _format_path(self, path: str) -> str:
|
|
241
|
+
"""Format path for display."""
|
|
242
|
+
full_path = self._resolve_path(path)
|
|
243
|
+
return f"file://{full_path}"
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class S3FileStorage(FileStorage):
|
|
247
|
+
"""S3 storage implementation."""
|
|
248
|
+
|
|
249
|
+
def __init__(self, config: S3FileStorageConfig):
|
|
250
|
+
if not HAS_BOTO3:
|
|
251
|
+
raise ImportError("boto3 is required for S3 storage. Install with: pip install boto3")
|
|
252
|
+
|
|
253
|
+
self.config = config
|
|
254
|
+
# Determine bucket and prefix
|
|
255
|
+
if config.url and config.url.startswith("s3://"):
|
|
256
|
+
parsed = urlparse(config.url)
|
|
257
|
+
self.bucket = config.bucket or parsed.netloc
|
|
258
|
+
self.prefix = config.prefix or parsed.path.lstrip("/") or ""
|
|
259
|
+
else:
|
|
260
|
+
if not config.bucket:
|
|
261
|
+
raise ValueError("S3 storage requires either a bucket name or s3:// URL")
|
|
262
|
+
self.bucket = config.bucket
|
|
263
|
+
self.prefix = config.prefix or ""
|
|
264
|
+
|
|
265
|
+
# Create S3 client
|
|
266
|
+
session = boto3.Session() # type: ignore[possibly-unbound-variable]
|
|
267
|
+
|
|
268
|
+
client_kwargs = {}
|
|
269
|
+
if config.region:
|
|
270
|
+
client_kwargs["region_name"] = config.region
|
|
271
|
+
if config.endpoint:
|
|
272
|
+
client_kwargs["endpoint_url"] = config.endpoint
|
|
273
|
+
if config.force_path_style:
|
|
274
|
+
client_kwargs["config"] = Config(s3={"addressing_style": "path"}) # type: ignore[possibly-unbound-variable]
|
|
275
|
+
|
|
276
|
+
self.s3_client = session.client("s3", **client_kwargs)
|
|
277
|
+
|
|
278
|
+
def read(self, file_path: str, use_raw_path: bool = False) -> bytes:
|
|
279
|
+
"""Read file content from S3."""
|
|
280
|
+
s3_key = self._get_s3_key(file_path, use_raw_path=use_raw_path)
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
response = self.s3_client.get_object(Bucket=self.bucket, Key=s3_key)
|
|
284
|
+
return response["Body"].read()
|
|
285
|
+
except ClientError as e: # type: ignore[possibly-unbound-variable]
|
|
286
|
+
if e.response["Error"]["Code"] == "NoSuchKey":
|
|
287
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
288
|
+
raise
|
|
289
|
+
|
|
290
|
+
def write(self, local_file_path: str, destination_path: str) -> str:
|
|
291
|
+
"""Upload a local file to S3."""
|
|
292
|
+
s3_key = self._get_s3_key(destination_path)
|
|
293
|
+
self.s3_client.upload_file(local_file_path, self.bucket, s3_key)
|
|
294
|
+
|
|
295
|
+
return self._format_path(destination_path)
|
|
296
|
+
|
|
297
|
+
def append(self, content: bytes, destination_path: str) -> str:
|
|
298
|
+
"""Append content to a file in S3 (read-modify-write)."""
|
|
299
|
+
# S3 doesn't support native append, so we need to read-modify-write
|
|
300
|
+
existing_content = b""
|
|
301
|
+
|
|
302
|
+
if self.exists(destination_path):
|
|
303
|
+
existing_content = self.read(destination_path)
|
|
304
|
+
|
|
305
|
+
# Combine existing content with new content
|
|
306
|
+
combined_content = existing_content + content
|
|
307
|
+
|
|
308
|
+
# Write to a temporary file and upload
|
|
309
|
+
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
|
310
|
+
temp_file.write(combined_content)
|
|
311
|
+
temp_file.flush()
|
|
312
|
+
|
|
313
|
+
result = self.write(temp_file.name, destination_path)
|
|
314
|
+
|
|
315
|
+
# Clean up temp file
|
|
316
|
+
os.unlink(temp_file.name)
|
|
317
|
+
|
|
318
|
+
return result
|
|
319
|
+
|
|
320
|
+
def exists(self, file_path: str, use_raw_path: bool = False) -> bool:
|
|
321
|
+
"""Check if file exists in S3."""
|
|
322
|
+
s3_key = self._get_s3_key(file_path, use_raw_path=use_raw_path)
|
|
323
|
+
|
|
324
|
+
try:
|
|
325
|
+
self.s3_client.head_object(Bucket=self.bucket, Key=s3_key)
|
|
326
|
+
return True
|
|
327
|
+
except ClientError as e: # type: ignore[possibly-unbound-variable]
|
|
328
|
+
if e.response["Error"]["Code"] == "404":
|
|
329
|
+
return False
|
|
330
|
+
raise
|
|
331
|
+
|
|
332
|
+
def mk_url(self, file_path: str) -> str:
|
|
333
|
+
"""Generate a URL for a file in the storage."""
|
|
334
|
+
return f"s3://{self.bucket}/{file_path}"
|
|
335
|
+
|
|
336
|
+
def download_locally(self, file_path: str, destination_path, use_raw_path: bool = False) -> str:
|
|
337
|
+
"""Download a file to the destination path"""
|
|
338
|
+
s3_key = self._get_s3_key(file_path, use_raw_path=use_raw_path)
|
|
339
|
+
dest_path = Path(destination_path)
|
|
340
|
+
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
341
|
+
self.s3_client.download_file(self.bucket, s3_key, str(dest_path))
|
|
342
|
+
return str(dest_path)
|
|
343
|
+
|
|
344
|
+
def _get_s3_key(self, file_path: str, use_raw_path: bool = False) -> str:
|
|
345
|
+
"""Convert file path to S3 key.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
file_path: The S3 URL (e.g., s3://bucket/path)
|
|
349
|
+
use_raw_path: If True, don't prepend the prefix (for accessing shared resources)
|
|
350
|
+
|
|
351
|
+
The prefix is only prepended when:
|
|
352
|
+
1. use_raw_path is False
|
|
353
|
+
2. A prefix is configured
|
|
354
|
+
3. The path doesn't already start with the prefix
|
|
355
|
+
"""
|
|
356
|
+
# Handle non s3:// URLs
|
|
357
|
+
if not file_path.startswith("s3://"):
|
|
358
|
+
raise ValueError(f"File path {file_path} is not an S3 URL")
|
|
359
|
+
parsed = urlparse(file_path)
|
|
360
|
+
if parsed.netloc != self.bucket:
|
|
361
|
+
raise ValueError(f"File path bucket {parsed.netloc} doesn't match configured bucket {self.bucket}")
|
|
362
|
+
|
|
363
|
+
path = parsed.path.lstrip("/")
|
|
364
|
+
|
|
365
|
+
if use_raw_path:
|
|
366
|
+
return path
|
|
367
|
+
|
|
368
|
+
# Only prepend prefix if we have one and the path doesn't already start with it
|
|
369
|
+
if self.prefix:
|
|
370
|
+
prefix_normalized = self.prefix.rstrip("/")
|
|
371
|
+
return f"{prefix_normalized}/{path}"
|
|
372
|
+
|
|
373
|
+
return path
|
|
374
|
+
|
|
375
|
+
def _format_path(self, path: str) -> str:
|
|
376
|
+
"""Format path for display."""
|
|
377
|
+
s3_key = self._get_s3_key(path)
|
|
378
|
+
return f"s3://{self.bucket}/{s3_key}"
|
|
Binary file
|