harmony-client 0.1.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,378 @@
1
+ """
2
+ Pure Python file storage implementation supporting local filesystem and S3.
3
+ Based on the Rust file-storage crate API.
4
+ """
5
+
6
+ import os
7
+ import shutil
8
+ import tempfile
9
+ from abc import ABC, abstractmethod
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import Optional, Union
13
+ from urllib.parse import urlparse
14
+
15
+ try:
16
+ import boto3
17
+ from botocore.client import Config
18
+ from botocore.exceptions import ClientError
19
+
20
+ HAS_BOTO3 = True
21
+ except ImportError:
22
+ HAS_BOTO3 = False
23
+
24
+
25
+ @dataclass
26
+ class FileStorageConfig:
27
+ """Configuration for file storage backends."""
28
+
29
+ @staticmethod
30
+ def local(path: str | Path) -> "LocalFileStorageConfig":
31
+ """Create a local filesystem storage configuration."""
32
+ return LocalFileStorageConfig(path=Path(path))
33
+
34
+ @staticmethod
35
+ def s3(
36
+ bucket: Optional[str] = None,
37
+ prefix: Optional[str] = None,
38
+ region: Optional[str] = None,
39
+ url: Optional[str] = None,
40
+ endpoint: Optional[str] = None,
41
+ force_path_style: bool = False,
42
+ ) -> "S3FileStorageConfig":
43
+ """Create an S3 storage configuration."""
44
+ return S3FileStorageConfig(
45
+ bucket=bucket, prefix=prefix, region=region, url=url, endpoint=endpoint, force_path_style=force_path_style
46
+ )
47
+
48
+ @staticmethod
49
+ def from_url(
50
+ url: str,
51
+ bucket: Optional[str] = None,
52
+ prefix: Optional[str] = None,
53
+ region: Optional[str] = None,
54
+ endpoint: Optional[str] = None,
55
+ force_path_style: bool = False,
56
+ ) -> Union["LocalFileStorageConfig", "S3FileStorageConfig"]:
57
+ """Create a storage configuration from a URL."""
58
+ parsed = urlparse(url)
59
+
60
+ if parsed.scheme == "s3":
61
+ # Extract bucket and prefix from s3:// URL
62
+ bucket_from_url = parsed.netloc
63
+ prefix_from_url = parsed.path.lstrip("/")
64
+
65
+ return S3FileStorageConfig(
66
+ bucket=bucket or bucket_from_url,
67
+ prefix=prefix or prefix_from_url or None,
68
+ region=region, # from env ?
69
+ url=url,
70
+ endpoint=endpoint or os.environ.get("AWS_ENDPOINT_URL_S3"),
71
+ force_path_style=force_path_style or os.environ.get("S3_FORCE_PATH_STYLE") == "true",
72
+ )
73
+ elif parsed.scheme in ("file", "") or parsed.scheme is None:
74
+ # Local file path
75
+ path = parsed.path if parsed.scheme == "file" else url
76
+ return LocalFileStorageConfig(path=Path(path))
77
+ else:
78
+ raise ValueError(f"Unsupported URL scheme: {parsed.scheme}")
79
+
80
+
81
+ @dataclass
82
+ class LocalFileStorageConfig(FileStorageConfig):
83
+ """Configuration for local filesystem storage."""
84
+
85
+ path: Path
86
+
87
+
88
+ @dataclass
89
+ class S3FileStorageConfig(FileStorageConfig):
90
+ """Configuration for S3 storage."""
91
+
92
+ bucket: Optional[str] = None
93
+ prefix: Optional[str] = None
94
+ region: Optional[str] = None
95
+ url: Optional[str] = None
96
+ endpoint: Optional[str] = None
97
+ force_path_style: bool = False
98
+
99
+
100
+ class StoredFile:
101
+ """Represents a file stored in the storage backend."""
102
+
103
+ def __init__(self, storage: "FileStorage", path: str):
104
+ self.storage = storage
105
+ self.path = path
106
+
107
+ def read(self) -> bytes:
108
+ """Read the file content."""
109
+ return self.storage.read(self.path)
110
+
111
+ def __str__(self) -> str:
112
+ return self.storage._format_path(self.path)
113
+
114
+
115
+ class FileStorage(ABC):
116
+ """Abstract base class for file storage backends."""
117
+
118
+ @staticmethod
119
+ def new(config: FileStorageConfig) -> "FileStorage":
120
+ """Create a new storage instance from configuration."""
121
+ if isinstance(config, LocalFileStorageConfig):
122
+ return LocalFileStorage(config)
123
+ elif isinstance(config, S3FileStorageConfig):
124
+ return S3FileStorage(config)
125
+ else:
126
+ raise ValueError(f"Unsupported config type: {type(config)}")
127
+
128
+ @abstractmethod
129
+ def read(self, file_path: str, use_raw_path: bool = False) -> bytes:
130
+ """Read file content and return as bytes."""
131
+ pass
132
+
133
+ @abstractmethod
134
+ def write(self, local_file_path: str, destination_path: str) -> str:
135
+ """Write a local file to the storage and return the stored file URL."""
136
+ pass
137
+
138
+ @abstractmethod
139
+ def append(self, content: bytes, destination_path: str) -> str:
140
+ """Append content to a file and return the stored file URL."""
141
+ pass
142
+
143
+ @abstractmethod
144
+ def exists(self, file_path: str, use_raw_path: bool = False) -> bool:
145
+ """Check if a file exists in the storage."""
146
+ pass
147
+
148
+ @abstractmethod
149
+ def mk_url(self, file_path: str) -> str:
150
+ """Generate a URL for a file in the storage."""
151
+ pass
152
+
153
+ @abstractmethod
154
+ def download_locally(self, file_path: str, destination_path, use_raw_path: bool = False) -> str:
155
+ """Download a file to the destination path.
156
+
157
+ Args:
158
+ file_path: The file URL to download
159
+ destination_path: Local path where the file should be saved
160
+ use_raw_path: If True, use the S3 path as-is without prepending prefix (for accessing shared resources like recipes)
161
+ """
162
+ pass
163
+
164
+ @abstractmethod
165
+ def _format_path(self, path: str) -> str:
166
+ """Format a path for display/return."""
167
+ pass
168
+
169
+
170
+ class LocalFileStorage(FileStorage):
171
+ """Local filesystem storage implementation."""
172
+
173
+ def __init__(self, config: LocalFileStorageConfig):
174
+ self.base_path = config.path
175
+ self.base_path.mkdir(parents=True, exist_ok=True)
176
+
177
+ def read(self, file_path: str, use_raw_path: bool = False) -> bytes:
178
+ """Read file content from local storage."""
179
+ full_path = self._resolve_path(file_path)
180
+ try:
181
+ return full_path.read_bytes()
182
+ except FileNotFoundError:
183
+ raise FileNotFoundError(f"File not found: {file_path}")
184
+
185
+ def write(self, local_file_path: str, destination_path: str) -> str:
186
+ """Copy a local file to the storage."""
187
+ src_path = Path(local_file_path)
188
+ dest_path = self._resolve_path(destination_path)
189
+
190
+ # Create parent directories
191
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
192
+
193
+ # Copy the file
194
+ shutil.copy2(src_path, dest_path)
195
+
196
+ return self._format_path(destination_path)
197
+
198
+ def append(self, content: bytes, destination_path: str) -> str:
199
+ """Append content to a file in local storage."""
200
+ full_path = self._resolve_path(destination_path)
201
+
202
+ # Create parent directories
203
+ full_path.parent.mkdir(parents=True, exist_ok=True)
204
+
205
+ # Append content
206
+ with open(full_path, "ab") as f:
207
+ f.write(content)
208
+
209
+ return self._format_path(destination_path)
210
+
211
+ def exists(self, file_path: str, use_raw_path: bool = False) -> bool:
212
+ """Check if file exists in local storage."""
213
+ full_path = self._resolve_path(file_path)
214
+ return full_path.exists()
215
+
216
+ def mk_url(self, file_path: str) -> str:
217
+ """Generate a URL for a file in the storage."""
218
+ return f"file://{file_path}"
219
+
220
+ def download_locally(self, file_path: str, destination_path, use_raw_path: bool = False) -> str:
221
+ """Download a file to the destination path"""
222
+ full_path = self._resolve_path(file_path)
223
+ dest_path = Path(destination_path)
224
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
225
+ shutil.copy2(full_path, dest_path)
226
+ return str(dest_path)
227
+
228
+ def _resolve_path(self, file_path: str) -> Path:
229
+ """Resolve a relative file path to absolute path within base directory."""
230
+ # Handle file:// URLs
231
+ if file_path.startswith("file://"):
232
+ file_path = file_path[7:]
233
+
234
+ # Handle absolute paths
235
+ if file_path.startswith("/"):
236
+ return Path(file_path)
237
+
238
+ return self.base_path / file_path
239
+
240
+ def _format_path(self, path: str) -> str:
241
+ """Format path for display."""
242
+ full_path = self._resolve_path(path)
243
+ return f"file://{full_path}"
244
+
245
+
246
+ class S3FileStorage(FileStorage):
247
+ """S3 storage implementation."""
248
+
249
+ def __init__(self, config: S3FileStorageConfig):
250
+ if not HAS_BOTO3:
251
+ raise ImportError("boto3 is required for S3 storage. Install with: pip install boto3")
252
+
253
+ self.config = config
254
+ # Determine bucket and prefix
255
+ if config.url and config.url.startswith("s3://"):
256
+ parsed = urlparse(config.url)
257
+ self.bucket = config.bucket or parsed.netloc
258
+ self.prefix = config.prefix or parsed.path.lstrip("/") or ""
259
+ else:
260
+ if not config.bucket:
261
+ raise ValueError("S3 storage requires either a bucket name or s3:// URL")
262
+ self.bucket = config.bucket
263
+ self.prefix = config.prefix or ""
264
+
265
+ # Create S3 client
266
+ session = boto3.Session() # type: ignore[possibly-unbound-variable]
267
+
268
+ client_kwargs = {}
269
+ if config.region:
270
+ client_kwargs["region_name"] = config.region
271
+ if config.endpoint:
272
+ client_kwargs["endpoint_url"] = config.endpoint
273
+ if config.force_path_style:
274
+ client_kwargs["config"] = Config(s3={"addressing_style": "path"}) # type: ignore[possibly-unbound-variable]
275
+
276
+ self.s3_client = session.client("s3", **client_kwargs)
277
+
278
+ def read(self, file_path: str, use_raw_path: bool = False) -> bytes:
279
+ """Read file content from S3."""
280
+ s3_key = self._get_s3_key(file_path, use_raw_path=use_raw_path)
281
+
282
+ try:
283
+ response = self.s3_client.get_object(Bucket=self.bucket, Key=s3_key)
284
+ return response["Body"].read()
285
+ except ClientError as e: # type: ignore[possibly-unbound-variable]
286
+ if e.response["Error"]["Code"] == "NoSuchKey":
287
+ raise FileNotFoundError(f"File not found: {file_path}")
288
+ raise
289
+
290
+ def write(self, local_file_path: str, destination_path: str) -> str:
291
+ """Upload a local file to S3."""
292
+ s3_key = self._get_s3_key(destination_path)
293
+ self.s3_client.upload_file(local_file_path, self.bucket, s3_key)
294
+
295
+ return self._format_path(destination_path)
296
+
297
+ def append(self, content: bytes, destination_path: str) -> str:
298
+ """Append content to a file in S3 (read-modify-write)."""
299
+ # S3 doesn't support native append, so we need to read-modify-write
300
+ existing_content = b""
301
+
302
+ if self.exists(destination_path):
303
+ existing_content = self.read(destination_path)
304
+
305
+ # Combine existing content with new content
306
+ combined_content = existing_content + content
307
+
308
+ # Write to a temporary file and upload
309
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
310
+ temp_file.write(combined_content)
311
+ temp_file.flush()
312
+
313
+ result = self.write(temp_file.name, destination_path)
314
+
315
+ # Clean up temp file
316
+ os.unlink(temp_file.name)
317
+
318
+ return result
319
+
320
+ def exists(self, file_path: str, use_raw_path: bool = False) -> bool:
321
+ """Check if file exists in S3."""
322
+ s3_key = self._get_s3_key(file_path, use_raw_path=use_raw_path)
323
+
324
+ try:
325
+ self.s3_client.head_object(Bucket=self.bucket, Key=s3_key)
326
+ return True
327
+ except ClientError as e: # type: ignore[possibly-unbound-variable]
328
+ if e.response["Error"]["Code"] == "404":
329
+ return False
330
+ raise
331
+
332
+ def mk_url(self, file_path: str) -> str:
333
+ """Generate a URL for a file in the storage."""
334
+ return f"s3://{self.bucket}/{file_path}"
335
+
336
+ def download_locally(self, file_path: str, destination_path, use_raw_path: bool = False) -> str:
337
+ """Download a file to the destination path"""
338
+ s3_key = self._get_s3_key(file_path, use_raw_path=use_raw_path)
339
+ dest_path = Path(destination_path)
340
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
341
+ self.s3_client.download_file(self.bucket, s3_key, str(dest_path))
342
+ return str(dest_path)
343
+
344
+ def _get_s3_key(self, file_path: str, use_raw_path: bool = False) -> str:
345
+ """Convert file path to S3 key.
346
+
347
+ Args:
348
+ file_path: The S3 URL (e.g., s3://bucket/path)
349
+ use_raw_path: If True, don't prepend the prefix (for accessing shared resources)
350
+
351
+ The prefix is only prepended when:
352
+ 1. use_raw_path is False
353
+ 2. A prefix is configured
354
+ 3. The path doesn't already start with the prefix
355
+ """
356
+ # Handle non s3:// URLs
357
+ if not file_path.startswith("s3://"):
358
+ raise ValueError(f"File path {file_path} is not an S3 URL")
359
+ parsed = urlparse(file_path)
360
+ if parsed.netloc != self.bucket:
361
+ raise ValueError(f"File path bucket {parsed.netloc} doesn't match configured bucket {self.bucket}")
362
+
363
+ path = parsed.path.lstrip("/")
364
+
365
+ if use_raw_path:
366
+ return path
367
+
368
+ # Only prepend prefix if we have one and the path doesn't already start with it
369
+ if self.prefix:
370
+ prefix_normalized = self.prefix.rstrip("/")
371
+ return f"{prefix_normalized}/{path}"
372
+
373
+ return path
374
+
375
+ def _format_path(self, path: str) -> str:
376
+ """Format path for display."""
377
+ s3_key = self._get_s3_key(path)
378
+ return f"s3://{self.bucket}/{s3_key}"