obsideo-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- obsideo/__init__.py +1 -0
- obsideo/__main__.py +4 -0
- obsideo/cli.py +568 -0
- obsideo/manifest.py +59 -0
- obsideo/sync.py +122 -0
- obsideo_cli-0.2.0.dist-info/METADATA +95 -0
- obsideo_cli-0.2.0.dist-info/RECORD +17 -0
- obsideo_cli-0.2.0.dist-info/WHEEL +5 -0
- obsideo_cli-0.2.0.dist-info/entry_points.txt +3 -0
- obsideo_cli-0.2.0.dist-info/top_level.txt +2 -0
- obsideo_core/__init__.py +4 -0
- obsideo_core/config.py +120 -0
- obsideo_core/crypto.py +50 -0
- obsideo_core/identity.py +36 -0
- obsideo_core/login.py +58 -0
- obsideo_core/names.py +65 -0
- obsideo_core/storage.py +265 -0
obsideo_core/storage.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""Obsideo storage seam — S3 to the Obsideo gateway (external passthrough).
|
|
2
|
+
|
|
3
|
+
The gateway stores bytes verbatim and holds no keys; the client encrypts before
|
|
4
|
+
calling here (see crypto.py), so the gateway/coord/providers see ciphertext only
|
|
5
|
+
(Principle 1). Objects land on three independent providers (RF=3) via the coord.
|
|
6
|
+
|
|
7
|
+
This is the shared core both the general `obsideo` CLI and the `mlvault` extension
|
|
8
|
+
build on. It generalizes the original mlvault seam with the browse operations a
|
|
9
|
+
file manager needs: list (prefix + delimiter), delete, head, mkdir-marker.
|
|
10
|
+
|
|
11
|
+
Gateway constraints engineered around:
|
|
12
|
+
* No HTTP Range — downloads use a single full-object GET (never
|
|
13
|
+
download_file/download_fileobj, which issue ranged multipart GETs).
|
|
14
|
+
* Path-style only; SigV4; ListObjectsV2 only.
|
|
15
|
+
* Uploads may be multipart (PUT parts, no Range).
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from obsideo_core import config
|
|
22
|
+
|
|
23
|
+
_DEFAULT_ENDPOINT = "https://s3.obsideo.io"
|
|
24
|
+
_DEFAULT_REGION = "us-east-1"
|
|
25
|
+
_MULTIPART_CHUNK = 16 * 1024 * 1024 # 16 MiB
|
|
26
|
+
# The gateway rejects empty-body PUTs, so empty folders are marked with a tiny
|
|
27
|
+
# non-empty placeholder object rather than a zero-byte key.
|
|
28
|
+
_FOLDER_MARKER = ".keep"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _names_on() -> bool:
|
|
32
|
+
return config.load_config().get("encrypt_names", True)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _skey(key: str) -> str:
|
|
36
|
+
"""Map a real path key to the on-server storage key — encrypts each path
|
|
37
|
+
component when name-encryption is on, so Obsideo never sees real names."""
|
|
38
|
+
if not key or not _names_on():
|
|
39
|
+
return key
|
|
40
|
+
from obsideo_core import names
|
|
41
|
+
return names.encrypt_path(key)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class StorageConfigError(EnvironmentError):
|
|
45
|
+
"""Raised when Obsideo credentials are missing/incomplete."""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _endpoint() -> str:
|
|
49
|
+
return os.environ.get("OBSIDEO_S3_ENDPOINT", _DEFAULT_ENDPOINT)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _region() -> str:
|
|
53
|
+
return os.environ.get("OBSIDEO_S3_REGION", _DEFAULT_REGION)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def bucket() -> str:
|
|
57
|
+
return os.environ.get("OBSIDEO_S3_BUCKET") or config.load_config().get("bucket", "obsideo")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _require_credentials() -> tuple[str, str]:
|
|
61
|
+
ak = os.environ.get("OBSIDEO_S3_ACCESS_KEY")
|
|
62
|
+
sk = os.environ.get("OBSIDEO_S3_SECRET_KEY")
|
|
63
|
+
if not ak or not sk:
|
|
64
|
+
raise StorageConfigError(
|
|
65
|
+
"You're not logged in. Run `obsideo login` to get started (5 GB... "
|
|
66
|
+
"actually 3 GB free), or set OBSIDEO_S3_ACCESS_KEY / OBSIDEO_S3_SECRET_KEY."
|
|
67
|
+
)
|
|
68
|
+
return ak, sk
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
_client = None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _s3():
|
|
75
|
+
global _client
|
|
76
|
+
if _client is not None:
|
|
77
|
+
return _client
|
|
78
|
+
try:
|
|
79
|
+
import boto3
|
|
80
|
+
from botocore.config import Config
|
|
81
|
+
except ImportError as e: # pragma: no cover
|
|
82
|
+
raise StorageConfigError("boto3 is required. pip install boto3") from e
|
|
83
|
+
|
|
84
|
+
ak, sk = _require_credentials()
|
|
85
|
+
base = dict(
|
|
86
|
+
region_name=_region(),
|
|
87
|
+
signature_version="s3v4",
|
|
88
|
+
s3={"addressing_style": "path"},
|
|
89
|
+
retries={"max_attempts": 3, "mode": "standard"},
|
|
90
|
+
)
|
|
91
|
+
# boto3 >=1.36 adds CRC32 checksum trailers by default, which the passthrough
|
|
92
|
+
# gateway doesn't validate and which break SigV4. Pin to when_required where
|
|
93
|
+
# supported; older botocore lacks the params (and the problematic default).
|
|
94
|
+
try:
|
|
95
|
+
cfg = Config(request_checksum_calculation="when_required",
|
|
96
|
+
response_checksum_validation="when_required", **base)
|
|
97
|
+
except TypeError:
|
|
98
|
+
cfg = Config(**base)
|
|
99
|
+
|
|
100
|
+
_client = boto3.client("s3", endpoint_url=_endpoint(),
|
|
101
|
+
aws_access_key_id=ak, aws_secret_access_key=sk, config=cfg)
|
|
102
|
+
return _client
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def reset_client() -> None:
|
|
106
|
+
"""Drop the cached client (e.g. after login swaps credentials)."""
|
|
107
|
+
global _client
|
|
108
|
+
_client = None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def ensure_bucket() -> None:
|
|
112
|
+
from botocore.exceptions import ClientError
|
|
113
|
+
s3, b = _s3(), bucket()
|
|
114
|
+
try:
|
|
115
|
+
s3.head_bucket(Bucket=b)
|
|
116
|
+
return
|
|
117
|
+
except ClientError:
|
|
118
|
+
pass
|
|
119
|
+
try:
|
|
120
|
+
s3.create_bucket(Bucket=b)
|
|
121
|
+
except ClientError as e:
|
|
122
|
+
code = e.response.get("Error", {}).get("Code", "")
|
|
123
|
+
if code not in ("BucketAlreadyOwnedByYou", "BucketAlreadyExists"):
|
|
124
|
+
raise
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ── Object ops ──────────────────────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
def put(key: str, data: bytes) -> str:
|
|
130
|
+
"""Upload bytes to key. Returns the key."""
|
|
131
|
+
import io
|
|
132
|
+
from boto3.s3.transfer import TransferConfig
|
|
133
|
+
s3 = _s3()
|
|
134
|
+
ensure_bucket()
|
|
135
|
+
transfer = TransferConfig(multipart_threshold=_MULTIPART_CHUNK,
|
|
136
|
+
multipart_chunksize=_MULTIPART_CHUNK)
|
|
137
|
+
s3.upload_fileobj(io.BytesIO(data), bucket(), _skey(key), Config=transfer)
|
|
138
|
+
return key
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def upload_file(local_path: Path, key: str) -> str:
|
|
142
|
+
from boto3.s3.transfer import TransferConfig
|
|
143
|
+
s3 = _s3()
|
|
144
|
+
ensure_bucket()
|
|
145
|
+
transfer = TransferConfig(multipart_threshold=_MULTIPART_CHUNK,
|
|
146
|
+
multipart_chunksize=_MULTIPART_CHUNK)
|
|
147
|
+
with open(local_path, "rb") as f:
|
|
148
|
+
s3.upload_fileobj(f, bucket(), _skey(key), Config=transfer)
|
|
149
|
+
return key
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def get(key: str) -> bytes:
|
|
153
|
+
"""Download an object by key (single full-object GET — no Range)."""
|
|
154
|
+
from botocore.exceptions import ClientError
|
|
155
|
+
try:
|
|
156
|
+
resp = _s3().get_object(Bucket=bucket(), Key=_skey(key))
|
|
157
|
+
return resp["Body"].read()
|
|
158
|
+
except ClientError as e:
|
|
159
|
+
code = e.response.get("Error", {}).get("Code", "")
|
|
160
|
+
if code in ("NoSuchKey", "404", "NoSuchBucket"):
|
|
161
|
+
raise FileNotFoundError(f"Not found: {key}") from e
|
|
162
|
+
raise RuntimeError(f"Download failed for '{key}': {e}") from e
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def download_file(key: str, local_path: Path) -> None:
|
|
166
|
+
data = get(key)
|
|
167
|
+
local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
168
|
+
local_path.write_bytes(data)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def delete(key: str) -> None:
|
|
172
|
+
_s3().delete_object(Bucket=bucket(), Key=_skey(key))
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def head(key: str) -> dict | None:
|
|
176
|
+
"""Return {'size','last_modified'} or None if absent."""
|
|
177
|
+
from botocore.exceptions import ClientError
|
|
178
|
+
try:
|
|
179
|
+
h = _s3().head_object(Bucket=bucket(), Key=_skey(key))
|
|
180
|
+
return {"size": h.get("ContentLength"), "last_modified": h.get("LastModified")}
|
|
181
|
+
except ClientError as e:
|
|
182
|
+
code = e.response.get("Error", {}).get("Code", "")
|
|
183
|
+
if code in ("404", "NoSuchKey", "NotFound"):
|
|
184
|
+
return None
|
|
185
|
+
raise
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def exists(key: str) -> bool:
|
|
189
|
+
return head(key) is not None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def list_prefix(prefix: str = "", delimiter: str = "/") -> dict:
|
|
193
|
+
"""List one VFS level. Returns {'folders': [name...], 'files': [{name,key,size}]}.
|
|
194
|
+
|
|
195
|
+
With delimiter='/', S3 returns CommonPrefixes (folders) + Contents at this
|
|
196
|
+
level. Folder-marker objects (keys ending in '/') are hidden from files.
|
|
197
|
+
"""
|
|
198
|
+
s3 = _s3()
|
|
199
|
+
on = _names_on()
|
|
200
|
+
norm = prefix
|
|
201
|
+
if norm and not norm.endswith("/"):
|
|
202
|
+
norm += "/"
|
|
203
|
+
|
|
204
|
+
# The server query runs against the ENCRYPTED prefix; the returned tokens are
|
|
205
|
+
# decrypted back to real names for display. Returned `key` is the REAL path so
|
|
206
|
+
# callers (get/rm/cd) can re-encrypt it transparently.
|
|
207
|
+
if on and norm:
|
|
208
|
+
from obsideo_core import names
|
|
209
|
+
enc_prefix = names.encrypt_path(norm) + "/"
|
|
210
|
+
else:
|
|
211
|
+
enc_prefix = norm
|
|
212
|
+
|
|
213
|
+
def _name(token: str) -> str:
|
|
214
|
+
if not on:
|
|
215
|
+
return token
|
|
216
|
+
from obsideo_core import names
|
|
217
|
+
return names.safe_decrypt_name(token)[0]
|
|
218
|
+
|
|
219
|
+
folders, files = [], []
|
|
220
|
+
token = None
|
|
221
|
+
while True:
|
|
222
|
+
kwargs = dict(Bucket=bucket(), Prefix=enc_prefix, Delimiter=delimiter)
|
|
223
|
+
if token:
|
|
224
|
+
kwargs["ContinuationToken"] = token
|
|
225
|
+
resp = s3.list_objects_v2(**kwargs)
|
|
226
|
+
|
|
227
|
+
for cp in resp.get("CommonPrefixes", []):
|
|
228
|
+
enc_name = cp["Prefix"][len(enc_prefix):].rstrip("/")
|
|
229
|
+
if enc_name:
|
|
230
|
+
folders.append(_name(enc_name))
|
|
231
|
+
|
|
232
|
+
for obj in resp.get("Contents", []):
|
|
233
|
+
key = obj["Key"]
|
|
234
|
+
if key == enc_prefix or key.endswith("/"):
|
|
235
|
+
continue # the folder marker itself
|
|
236
|
+
name = _name(key[len(enc_prefix):])
|
|
237
|
+
if name == _FOLDER_MARKER:
|
|
238
|
+
continue # hide the .keep placeholder that makes empty folders visible
|
|
239
|
+
files.append({"name": name, "key": norm + name, "size": obj.get("Size", 0)})
|
|
240
|
+
|
|
241
|
+
if resp.get("IsTruncated"):
|
|
242
|
+
token = resp.get("NextContinuationToken")
|
|
243
|
+
else:
|
|
244
|
+
break
|
|
245
|
+
|
|
246
|
+
folders.sort()
|
|
247
|
+
files.sort(key=lambda f: f["name"])
|
|
248
|
+
return {"folders": folders, "files": files}
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def mkdir(prefix: str) -> str:
|
|
252
|
+
"""Make an empty folder visible in `ls`. S3 has no real directories; we
|
|
253
|
+
write a tiny placeholder at 'prefix/.keep' (the gateway rejects empty
|
|
254
|
+
bodies, so the marker is non-empty). It's hidden from listings."""
|
|
255
|
+
norm = prefix if prefix.endswith("/") else prefix + "/"
|
|
256
|
+
put(norm + _FOLDER_MARKER, b".obsideo\n")
|
|
257
|
+
return norm
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def verify_pop(key: str) -> dict:
|
|
261
|
+
"""Confirm an object is stored + report durability posture (RF=3)."""
|
|
262
|
+
h = head(key)
|
|
263
|
+
if h is None:
|
|
264
|
+
return {"stored": False, "size_bytes": None, "replication_factor": 3, "backend": "obsideo"}
|
|
265
|
+
return {"stored": True, "size_bytes": h["size"], "replication_factor": 3, "backend": "obsideo"}
|