cloudfs 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cloudfs-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,68 @@
1
+ Metadata-Version: 2.4
2
+ Name: cloudfs
3
+ Version: 0.2.0
4
+ Summary: An interface to interact with cloud storage as if it's a local filesystem.
5
+ License-Expression: Apache-2.0
6
+ License-File: LICENSE
7
+ Author: Allen Chou
8
+ Author-email: f1470891079@gmail.com
9
+ Requires-Python: >=3.11,<4.0
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
15
+ Provides-Extra: all
16
+ Provides-Extra: azure
17
+ Provides-Extra: google
18
+ Provides-Extra: s3
19
+ Requires-Dist: azure-storage-blob (>=12,<13) ; extra == "all"
20
+ Requires-Dist: azure-storage-blob (>=12,<13) ; extra == "azure"
21
+ Requires-Dist: boto3 (>=1.35,<2) ; extra == "all"
22
+ Requires-Dist: boto3 (>=1.35,<2) ; extra == "s3"
23
+ Requires-Dist: google-cloud-storage (>=3,<4) ; extra == "all"
24
+ Requires-Dist: google-cloud-storage (>=3,<4) ; extra == "google"
25
+ Description-Content-Type: text/markdown
26
+
27
+ # CloudFS
28
+
29
+ Cloud storage that works like your local filesystem.
30
+
31
+ CloudFS gives you a `pathlib.Path`-compatible interface for Google Cloud Storage, AWS S3, and Azure Blob Storage.
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ pip install "cloudfs[google]" # Google Cloud Storage
37
+ pip install "cloudfs[s3]" # AWS S3
38
+ pip install "cloudfs[azure]" # Azure Blob Storage
39
+ pip install "cloudfs[all]" # All backends
40
+ ```
41
+
42
+ ## Quick Start
43
+
44
+ ```python
45
+ from cloudfs import Path
46
+
47
+ # Works the same across all backends
48
+ p = Path("gs://my-bucket/data/report.csv") # GCS
49
+ p = Path("s3://my-bucket/data/report.csv") # S3
50
+ p = Path("az://my-container/data/report.csv") # Azure
51
+
52
+ p.write_text("Hello, CloudFS!")
53
+ print(p.read_text()) # Hello, CloudFS!
54
+ print(p.name) # report.csv
55
+ print(p.parent / "other.csv") # gs://my-bucket/data/other.csv
56
+
57
+ for child in p.parent.iterdir():
58
+ print(child)
59
+ ```
60
+
61
+ ## Documentation
62
+
63
+ Full documentation at **[allen2c.github.io/cloudfs](https://allen2c.github.io/cloudfs/)**.
64
+
65
+ ## License
66
+
67
+ Apache 2.0
68
+
@@ -0,0 +1,41 @@
1
+ # CloudFS
2
+
3
+ Cloud storage that works like your local filesystem.
4
+
5
+ CloudFS gives you a `pathlib.Path`-compatible interface for Google Cloud Storage, AWS S3, and Azure Blob Storage.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install "cloudfs[google]" # Google Cloud Storage
11
+ pip install "cloudfs[s3]" # AWS S3
12
+ pip install "cloudfs[azure]" # Azure Blob Storage
13
+ pip install "cloudfs[all]" # All backends
14
+ ```
15
+
16
+ ## Quick Start
17
+
18
+ ```python
19
+ from cloudfs import Path
20
+
21
+ # Works the same across all backends
22
+ p = Path("gs://my-bucket/data/report.csv") # GCS
23
+ p = Path("s3://my-bucket/data/report.csv") # S3
24
+ p = Path("az://my-container/data/report.csv") # Azure
25
+
26
+ p.write_text("Hello, CloudFS!")
27
+ print(p.read_text()) # Hello, CloudFS!
28
+ print(p.name) # report.csv
29
+ print(p.parent / "other.csv") # gs://my-bucket/data/other.csv
30
+
31
+ for child in p.parent.iterdir():
32
+ print(child)
33
+ ```
34
+
35
+ ## Documentation
36
+
37
+ Full documentation at **[allen2c.github.io/cloudfs](https://allen2c.github.io/cloudfs/)**.
38
+
39
+ ## License
40
+
41
+ Apache 2.0
@@ -0,0 +1,5 @@
1
+ from .base import CloudPath as Path
2
+ from .exceptions import CloudOperationError
3
+ from .version import VERSION as __version__
4
+
5
+ __all__ = ["__version__", "Path", "CloudOperationError"]
File without changes
@@ -0,0 +1,483 @@
1
+ """Azure Blob Storage backend for CloudFS.
2
+
3
+ Azure-specific behavior and known differences from local filesystems:
4
+
5
+ Directories:
6
+ Azure Blob Storage has no real directories — only blob names containing
7
+ slashes. Directories are simulated either by placeholder blobs (a zero-byte
8
+ blob named "prefix/") created by mkdir(), or implicitly by blobs that share
9
+ a common prefix.
10
+
11
+ - mkdir() creates a placeholder blob. If all files under a directory are
12
+ unlinked but the placeholder remains, is_dir() still returns True.
13
+ - Writing directly to a sub-path (write_text, write_bytes) never requires
14
+ a prior mkdir(), unlike local filesystems.
15
+ - A path can simultaneously satisfy is_file() and is_dir() if a blob named
16
+ "foo" and another named "foo/bar" both exist. This cannot happen locally.
17
+
18
+ rmdir():
19
+ Only removes the placeholder blob created by mkdir(). A "virtual" directory
20
+ that was never explicitly created will raise FileNotFoundError even if
21
+ is_dir() returns True.
22
+
23
+ open():
24
+ Read modes download the full blob into memory. Write modes buffer in memory
25
+ and upload on close.
26
+
27
+ rename():
28
+ Implemented as copy + delete. Not atomic — a crash between the two steps
29
+ leaves both source and destination in place.
30
+
31
+ Consistency:
32
+ Azure Blob Storage provides strong consistency for all operations.
33
+
34
+ Performance:
35
+ Each exists(), is_file(), and is_dir() call makes at least one API request.
36
+ Avoid calling them in tight loops; prefer bulk listing via iterdir() or walk().
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import io
42
+ from typing import IO, Any, Generator, Iterator
43
+
44
+ from cloudfs.base import CloudPath
45
+
46
+
47
+ class AzurePath(CloudPath):
48
+ """pathlib.Path-compatible interface for Azure Blob Storage.
49
+
50
+ URI format: az://container/blob
51
+ Credentials: set AZURE_STORAGE_CONNECTION_STRING in environment.
52
+ """
53
+
54
+ def __init__(self, container: str, key: str = "", _client=None):
55
+ self._container_name = container
56
+ self._key = key.strip("/") if key else ""
57
+ self.__client = _client
58
+
59
+ @property
60
+ def _client(self):
61
+ if self.__client is None:
62
+ import os
63
+
64
+ from azure.storage.blob import BlobServiceClient
65
+
66
+ conn_str = os.environ["AZURE_STORAGE_CONNECTION_STRING"]
67
+ self.__client = BlobServiceClient.from_connection_string(conn_str)
68
+ return self.__client
69
+
70
+ @property
71
+ def _container(self):
72
+ return self._client.get_container_client(self._container_name)
73
+
74
+ def _child(self, key: str) -> "AzurePath":
75
+ return AzurePath(self._container_name, key, _client=self.__client)
76
+
77
+ def __str__(self) -> str:
78
+ if self._key:
79
+ return f"az://{self._container_name}/{self._key}"
80
+ return f"az://{self._container_name}"
81
+
82
+ def __repr__(self) -> str:
83
+ return f"AzurePath('{self}')"
84
+
85
+ def __eq__(self, other: object) -> bool:
86
+ if isinstance(other, AzurePath):
87
+ return (
88
+ self._container_name == other._container_name
89
+ and self._key == other._key
90
+ )
91
+ return NotImplemented
92
+
93
+ def __hash__(self) -> int:
94
+ return hash((self._container_name, self._key))
95
+
96
+ def __lt__(self, other: "CloudPath") -> bool:
97
+ return str(self) < str(other)
98
+
99
+ def __le__(self, other: "CloudPath") -> bool:
100
+ return str(self) <= str(other)
101
+
102
+ def __gt__(self, other: "CloudPath") -> bool:
103
+ return str(self) > str(other)
104
+
105
+ def __ge__(self, other: "CloudPath") -> bool:
106
+ return str(self) >= str(other)
107
+
108
+ @property
109
+ def drive(self) -> str:
110
+ return f"az://{self._container_name}"
111
+
112
+ @property
113
+ def root(self) -> str:
114
+ return "/"
115
+
116
+ @property
117
+ def anchor(self) -> str:
118
+ return f"az://{self._container_name}/"
119
+
120
+ @property
121
+ def name(self) -> str:
122
+ return self._key.split("/")[-1] if self._key else ""
123
+
124
+ @property
125
+ def stem(self) -> str:
126
+ n = self.name
127
+ idx = n.rfind(".")
128
+ return n[:idx] if idx > 0 else n
129
+
130
+ @property
131
+ def suffix(self) -> str:
132
+ n = self.name
133
+ idx = n.rfind(".")
134
+ return n[idx:] if idx > 0 else ""
135
+
136
+ @property
137
+ def suffixes(self) -> list[str]:
138
+ parts = self.name.split(".")
139
+ return ["." + p for p in parts[1:]] if len(parts) > 1 else []
140
+
141
+ @property
142
+ def parent(self) -> "AzurePath":
143
+ if "/" in self._key:
144
+ return self._child("/".join(self._key.split("/")[:-1]))
145
+ return self._child("")
146
+
147
+ @property
148
+ def parents(self) -> list["AzurePath"]:
149
+ parts = self._key.split("/") if self._key else []
150
+ result = []
151
+ for i in range(len(parts) - 1, -1, -1):
152
+ result.append(self._child("/".join(parts[:i])))
153
+ return result
154
+
155
+ @property
156
+ def parts(self) -> tuple[str, ...]:
157
+ root = f"az://{self._container_name}/"
158
+ if not self._key:
159
+ return (root,)
160
+ return (root,) + tuple(self._key.split("/"))
161
+
162
+ def __truediv__(self, other: str) -> "AzurePath":
163
+ other = str(other).strip("/")
164
+ new_key = f"{self._key}/{other}" if self._key else other
165
+ return self._child(new_key)
166
+
167
+ def joinpath(self, *others: str) -> "AzurePath":
168
+ result = self
169
+ for part in others:
170
+ result = result / part
171
+ return result
172
+
173
+ def with_name(self, name: str) -> "AzurePath":
174
+ if not self._key:
175
+ raise ValueError("AzurePath has no name component")
176
+ parent_key = "/".join(self._key.split("/")[:-1])
177
+ new_key = f"{parent_key}/{name}" if parent_key else name
178
+ return self._child(new_key)
179
+
180
+ def with_stem(self, stem: str) -> "AzurePath":
181
+ return self.with_name(stem + self.suffix)
182
+
183
+ def with_suffix(self, suffix: str) -> "AzurePath":
184
+ if suffix and not suffix.startswith("."):
185
+ raise ValueError(f"Invalid suffix: {suffix!r}")
186
+ return self.with_name(self.stem + suffix)
187
+
188
+ def is_absolute(self) -> bool:
189
+ return True
190
+
191
+ def resolve(self, strict: bool = False) -> "AzurePath":
192
+ return self._child(self._key)
193
+
194
+ def absolute(self) -> "AzurePath":
195
+ return self._child(self._key)
196
+
197
+ def _blob_exists(self) -> bool:
198
+ from azure.core.exceptions import ResourceNotFoundError
199
+
200
+ try:
201
+ self._container.get_blob_client(self._key).get_blob_properties()
202
+ return True
203
+ except ResourceNotFoundError:
204
+ return False
205
+
206
+ def _has_children(self, prefix: str) -> bool:
207
+ items = self._container.list_blobs(name_starts_with=prefix)
208
+ return any(True for _ in items)
209
+
210
+ def exists(self) -> bool:
211
+ if not self._key:
212
+ try:
213
+ self._container.get_container_properties()
214
+ return True
215
+ except Exception:
216
+ return False
217
+ if self._blob_exists():
218
+ return True
219
+ return self._has_children(self._key.rstrip("/") + "/")
220
+
221
+ def is_file(self) -> bool:
222
+ return bool(self._key) and self._blob_exists()
223
+
224
+ def is_dir(self) -> bool:
225
+ if not self._key:
226
+ return self.exists()
227
+ if self._blob_exists():
228
+ return False
229
+ return self._has_children(self._key.rstrip("/") + "/")
230
+
231
+ def samefile(self, other: "CloudPath") -> bool:
232
+ if not isinstance(other, AzurePath):
233
+ return False
234
+ return self._container_name == other._container_name and self._key == other._key
235
+
236
+ def iterdir(self) -> Iterator["AzurePath"]:
237
+ from azure.storage.blob import BlobPrefix
238
+
239
+ prefix = (self._key.rstrip("/") + "/") if self._key else ""
240
+ seen: set[str] = set()
241
+ for item in self._container.walk_blobs(name_starts_with=prefix, delimiter="/"):
242
+ if isinstance(item, BlobPrefix):
243
+ rel = item.name[len(prefix) :].rstrip("/")
244
+ else:
245
+ rel = item.name[len(prefix) :]
246
+ rel = rel.split("/")[0]
247
+ if rel and rel not in seen:
248
+ seen.add(rel)
249
+ yield self._child(prefix + rel)
250
+
251
+ def glob(self, pattern: str) -> Iterator["AzurePath"]:
252
+ import fnmatch
253
+
254
+ prefix = (self._key.rstrip("/") + "/") if self._key else ""
255
+ for blob in self._container.list_blobs(name_starts_with=prefix):
256
+ rel = blob.name[len(prefix) :]
257
+ if fnmatch.fnmatch(rel, pattern):
258
+ yield self._child(blob.name)
259
+
260
+ def rglob(self, pattern: str) -> Iterator["AzurePath"]:
261
+ import fnmatch
262
+
263
+ prefix = (self._key.rstrip("/") + "/") if self._key else ""
264
+ for blob in self._container.list_blobs(name_starts_with=prefix):
265
+ rel = blob.name[len(prefix) :]
266
+ if fnmatch.fnmatch(rel, "**/" + pattern) or fnmatch.fnmatch(rel, pattern):
267
+ yield self._child(blob.name)
268
+
269
+ def walk(
270
+ self,
271
+ top_down: bool = True,
272
+ on_error: Any = None,
273
+ ) -> Generator[tuple["AzurePath", list[str], list[str]], None, None]:
274
+ from collections import defaultdict
275
+
276
+ prefix = (self._key.rstrip("/") + "/") if self._key else ""
277
+ tree: dict[str, tuple[list[str], list[str]]] = defaultdict(lambda: ([], []))
278
+ tree[self._key]
279
+
280
+ try:
281
+ for blob in self._container.list_blobs(name_starts_with=prefix):
282
+ rel = blob.name[len(prefix) :]
283
+ if not rel:
284
+ continue
285
+ parts = rel.split("/")
286
+ dir_key = self._key
287
+ for part in parts[:-1]:
288
+ parent_key = dir_key
289
+ dir_key = f"{dir_key}/{part}" if dir_key else part
290
+ if part not in tree[parent_key][0]:
291
+ tree[parent_key][0].append(part)
292
+ tree[dir_key]
293
+ tree[dir_key][1].append(parts[-1])
294
+ except Exception as e:
295
+ if on_error:
296
+ on_error(e)
297
+ return
298
+
299
+ def _yield(
300
+ key: str,
301
+ ) -> Generator[tuple["AzurePath", list[str], list[str]], None, None]:
302
+ dirnames, filenames = tree[key]
303
+ dirpath = self._child(key)
304
+ if top_down:
305
+ yield dirpath, list(dirnames), list(filenames)
306
+ for d in dirnames:
307
+ child_key = f"{key}/{d}" if key else d
308
+ yield from _yield(child_key)
309
+ else:
310
+ for d in dirnames:
311
+ child_key = f"{key}/{d}" if key else d
312
+ yield from _yield(child_key)
313
+ yield dirpath, list(dirnames), list(filenames)
314
+
315
+ yield from _yield(self._key)
316
+
317
+ def open(
318
+ self,
319
+ mode: str = "r",
320
+ buffering: int = -1,
321
+ encoding: str | None = None,
322
+ errors: str | None = None,
323
+ newline: str | None = None,
324
+ ) -> IO:
325
+ if mode in ("rb", "r"):
326
+ data = self.read_bytes()
327
+ buf = io.BytesIO(data)
328
+ if mode == "r":
329
+ return io.TextIOWrapper(
330
+ buf,
331
+ encoding=encoding or "utf-8",
332
+ errors=errors,
333
+ newline=newline,
334
+ )
335
+ return buf
336
+ if mode in ("wb", "w"):
337
+ return _AzureWriteBuffer(
338
+ self._container,
339
+ self._key,
340
+ binary=mode == "wb",
341
+ encoding=encoding or "utf-8",
342
+ errors=errors,
343
+ newline=newline,
344
+ )
345
+ raise ValueError(f"Unsupported mode: {mode!r}")
346
+
347
+ def read_bytes(self) -> bytes:
348
+ return self._container.download_blob(self._key).readall()
349
+
350
+ def read_text(self, encoding: str = "utf-8") -> str:
351
+ return self.read_bytes().decode(encoding)
352
+
353
+ def write_bytes(self, data: bytes) -> int:
354
+ self._container.upload_blob(self._key, data, overwrite=True)
355
+ return len(data)
356
+
357
+ def write_text(self, data: str, encoding: str = "utf-8") -> int:
358
+ encoded = data.encode(encoding)
359
+ self._container.upload_blob(self._key, encoded, overwrite=True)
360
+ return len(encoded)
361
+
362
+ def touch(self, mode: int = 0o666, exist_ok: bool = True) -> None:
363
+ if self._blob_exists():
364
+ if not exist_ok:
365
+ raise FileExistsError(str(self))
366
+ return
367
+ self._container.upload_blob(self._key, b"", overwrite=True)
368
+
369
+ def unlink(self, missing_ok: bool = False) -> None:
370
+ if not self._blob_exists():
371
+ if missing_ok:
372
+ return
373
+ raise FileNotFoundError(str(self))
374
+ self._container.delete_blob(self._key)
375
+
376
+ def rename(self, target: "AzurePath | str") -> "AzurePath":
377
+ if isinstance(target, str):
378
+ target = AzurePath.from_uri(target)
379
+ src_url = self._container.get_blob_client(self._key).url
380
+ dst_blob = target._container.get_blob_client(target._key)
381
+ dst_blob.start_copy_from_url(src_url)
382
+ self._container.delete_blob(self._key)
383
+ return target
384
+
385
+ def replace(self, target: "AzurePath | str") -> "AzurePath":
386
+ return self.rename(target)
387
+
388
+ def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None:
389
+ if self.exists():
390
+ if exist_ok:
391
+ return
392
+ raise FileExistsError(str(self))
393
+ placeholder = self._key.rstrip("/") + "/"
394
+ self._container.upload_blob(placeholder, b"", overwrite=True)
395
+
396
+ def rmdir(self) -> None:
397
+ from azure.core.exceptions import ResourceNotFoundError
398
+
399
+ placeholder = self._key.rstrip("/") + "/"
400
+ blob_client = self._container.get_blob_client(placeholder)
401
+ try:
402
+ blob_client.get_blob_properties()
403
+ except ResourceNotFoundError:
404
+ raise FileNotFoundError(str(self))
405
+ children = [
406
+ b
407
+ for b in self._container.list_blobs(name_starts_with=placeholder)
408
+ if b.name != placeholder
409
+ ]
410
+ if children:
411
+ raise OSError(f"Directory not empty: {self}")
412
+ self._container.delete_blob(placeholder)
413
+
414
+ def stat(self, follow_symlinks: bool = True) -> "AzureStatResult":
415
+ props = self._container.get_blob_client(self._key).get_blob_properties()
416
+ return AzureStatResult(props)
417
+
418
+ @classmethod
419
+ def from_uri(cls, uri: str, _client=None) -> "AzurePath":
420
+ if not uri.startswith("az://"):
421
+ raise ValueError(f"Not an Azure URI: {uri!r}")
422
+ without_scheme = uri[5:]
423
+ parts = without_scheme.split("/", 1)
424
+ container = parts[0]
425
+ key = parts[1] if len(parts) > 1 else ""
426
+ return cls(container, key, _client=_client)
427
+
428
+
429
+ class _AzureWriteBuffer(io.RawIOBase):
430
+ def __init__(self, container, key, binary, encoding, errors, newline):
431
+ self._container = container
432
+ self._key = key
433
+ self._binary = binary
434
+ self._buf = io.BytesIO()
435
+ self._text_wrapper = None
436
+ if not binary:
437
+ self._text_wrapper = io.TextIOWrapper(
438
+ self._buf, encoding=encoding, errors=errors, newline=newline
439
+ )
440
+
441
+ def write(self, data) -> int:
442
+ if self._text_wrapper:
443
+ return self._text_wrapper.write(data)
444
+ return self._buf.write(data)
445
+
446
+ def close(self) -> None:
447
+ if not self.closed:
448
+ if self._text_wrapper:
449
+ self._text_wrapper.flush()
450
+ self._buf.seek(0)
451
+ self._container.upload_blob(self._key, self._buf.read(), overwrite=True)
452
+ super().close()
453
+
454
+ def __enter__(self):
455
+ return self
456
+
457
+ def __exit__(self, *args):
458
+ self.close()
459
+
460
+
461
+ class AzureStatResult:
462
+ def __init__(self, props):
463
+ self._props = props
464
+
465
+ @property
466
+ def st_size(self) -> int:
467
+ return self._props.get("size", 0) or 0
468
+
469
+ @property
470
+ def st_mtime(self) -> float:
471
+ dt = self._props.get("last_modified")
472
+ return dt.timestamp() if dt else 0.0
473
+
474
+ @property
475
+ def st_ctime(self) -> float:
476
+ dt = self._props.get("creation_time")
477
+ return dt.timestamp() if dt else self.st_mtime
478
+
479
+ def __repr__(self) -> str:
480
+ return (
481
+ f"AzureStatResult(st_size={self.st_size}, "
482
+ f"st_mtime={self.st_mtime}, st_ctime={self.st_ctime})"
483
+ )