obsideo-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,265 @@
1
+ """Obsideo storage seam — S3 to the Obsideo gateway (external passthrough).
2
+
3
+ The gateway stores bytes verbatim and holds no keys; the client encrypts before
4
+ calling here (see crypto.py), so the gateway/coord/providers see ciphertext only
5
+ (Principle 1). Objects land on three independent providers (RF=3) via the coord.
6
+
7
+ This is the shared core both the general `obsideo` CLI and the `mlvault` extension
8
+ build on. It generalizes the original mlvault seam with the browse operations a
9
+ file manager needs: list (prefix + delimiter), delete, head, mkdir-marker.
10
+
11
+ Gateway constraints engineered around:
12
+ * No HTTP Range — downloads use a single full-object GET (never
13
+ download_file/download_fileobj, which issue ranged multipart GETs).
14
+ * Path-style only; SigV4; ListObjectsV2 only.
15
+ * Uploads may be multipart (PUT parts, no Range).
16
+ """
17
+
18
+ import os
19
+ from pathlib import Path
20
+
21
+ from obsideo_core import config
22
+
23
+ _DEFAULT_ENDPOINT = "https://s3.obsideo.io"
24
+ _DEFAULT_REGION = "us-east-1"
25
+ _MULTIPART_CHUNK = 16 * 1024 * 1024 # 16 MiB
26
+ # The gateway rejects empty-body PUTs, so empty folders are marked with a tiny
27
+ # non-empty placeholder object rather than a zero-byte key.
28
+ _FOLDER_MARKER = ".keep"
29
+
30
+
31
+ def _names_on() -> bool:
32
+ return config.load_config().get("encrypt_names", True)
33
+
34
+
35
+ def _skey(key: str) -> str:
36
+ """Map a real path key to the on-server storage key — encrypts each path
37
+ component when name-encryption is on, so Obsideo never sees real names."""
38
+ if not key or not _names_on():
39
+ return key
40
+ from obsideo_core import names
41
+ return names.encrypt_path(key)
42
+
43
+
44
+ class StorageConfigError(EnvironmentError):
45
+ """Raised when Obsideo credentials are missing/incomplete."""
46
+
47
+
48
+ def _endpoint() -> str:
49
+ return os.environ.get("OBSIDEO_S3_ENDPOINT", _DEFAULT_ENDPOINT)
50
+
51
+
52
+ def _region() -> str:
53
+ return os.environ.get("OBSIDEO_S3_REGION", _DEFAULT_REGION)
54
+
55
+
56
+ def bucket() -> str:
57
+ return os.environ.get("OBSIDEO_S3_BUCKET") or config.load_config().get("bucket", "obsideo")
58
+
59
+
60
+ def _require_credentials() -> tuple[str, str]:
61
+ ak = os.environ.get("OBSIDEO_S3_ACCESS_KEY")
62
+ sk = os.environ.get("OBSIDEO_S3_SECRET_KEY")
63
+ if not ak or not sk:
64
+ raise StorageConfigError(
65
+ "You're not logged in. Run `obsideo login` to get started (5 GB... "
66
+ "actually 3 GB free), or set OBSIDEO_S3_ACCESS_KEY / OBSIDEO_S3_SECRET_KEY."
67
+ )
68
+ return ak, sk
69
+
70
+
71
+ _client = None
72
+
73
+
74
+ def _s3():
75
+ global _client
76
+ if _client is not None:
77
+ return _client
78
+ try:
79
+ import boto3
80
+ from botocore.config import Config
81
+ except ImportError as e: # pragma: no cover
82
+ raise StorageConfigError("boto3 is required. pip install boto3") from e
83
+
84
+ ak, sk = _require_credentials()
85
+ base = dict(
86
+ region_name=_region(),
87
+ signature_version="s3v4",
88
+ s3={"addressing_style": "path"},
89
+ retries={"max_attempts": 3, "mode": "standard"},
90
+ )
91
+ # boto3 >=1.36 adds CRC32 checksum trailers by default, which the passthrough
92
+ # gateway doesn't validate and which break SigV4. Pin to when_required where
93
+ # supported; older botocore lacks the params (and the problematic default).
94
+ try:
95
+ cfg = Config(request_checksum_calculation="when_required",
96
+ response_checksum_validation="when_required", **base)
97
+ except TypeError:
98
+ cfg = Config(**base)
99
+
100
+ _client = boto3.client("s3", endpoint_url=_endpoint(),
101
+ aws_access_key_id=ak, aws_secret_access_key=sk, config=cfg)
102
+ return _client
103
+
104
+
105
+ def reset_client() -> None:
106
+ """Drop the cached client (e.g. after login swaps credentials)."""
107
+ global _client
108
+ _client = None
109
+
110
+
111
+ def ensure_bucket() -> None:
112
+ from botocore.exceptions import ClientError
113
+ s3, b = _s3(), bucket()
114
+ try:
115
+ s3.head_bucket(Bucket=b)
116
+ return
117
+ except ClientError:
118
+ pass
119
+ try:
120
+ s3.create_bucket(Bucket=b)
121
+ except ClientError as e:
122
+ code = e.response.get("Error", {}).get("Code", "")
123
+ if code not in ("BucketAlreadyOwnedByYou", "BucketAlreadyExists"):
124
+ raise
125
+
126
+
127
+ # ── Object ops ──────────────────────────────────────────────────────────────
128
+
129
+ def put(key: str, data: bytes) -> str:
130
+ """Upload bytes to key. Returns the key."""
131
+ import io
132
+ from boto3.s3.transfer import TransferConfig
133
+ s3 = _s3()
134
+ ensure_bucket()
135
+ transfer = TransferConfig(multipart_threshold=_MULTIPART_CHUNK,
136
+ multipart_chunksize=_MULTIPART_CHUNK)
137
+ s3.upload_fileobj(io.BytesIO(data), bucket(), _skey(key), Config=transfer)
138
+ return key
139
+
140
+
141
+ def upload_file(local_path: Path, key: str) -> str:
142
+ from boto3.s3.transfer import TransferConfig
143
+ s3 = _s3()
144
+ ensure_bucket()
145
+ transfer = TransferConfig(multipart_threshold=_MULTIPART_CHUNK,
146
+ multipart_chunksize=_MULTIPART_CHUNK)
147
+ with open(local_path, "rb") as f:
148
+ s3.upload_fileobj(f, bucket(), _skey(key), Config=transfer)
149
+ return key
150
+
151
+
152
+ def get(key: str) -> bytes:
153
+ """Download an object by key (single full-object GET — no Range)."""
154
+ from botocore.exceptions import ClientError
155
+ try:
156
+ resp = _s3().get_object(Bucket=bucket(), Key=_skey(key))
157
+ return resp["Body"].read()
158
+ except ClientError as e:
159
+ code = e.response.get("Error", {}).get("Code", "")
160
+ if code in ("NoSuchKey", "404", "NoSuchBucket"):
161
+ raise FileNotFoundError(f"Not found: {key}") from e
162
+ raise RuntimeError(f"Download failed for '{key}': {e}") from e
163
+
164
+
165
+ def download_file(key: str, local_path: Path) -> None:
166
+ data = get(key)
167
+ local_path.parent.mkdir(parents=True, exist_ok=True)
168
+ local_path.write_bytes(data)
169
+
170
+
171
+ def delete(key: str) -> None:
172
+ _s3().delete_object(Bucket=bucket(), Key=_skey(key))
173
+
174
+
175
+ def head(key: str) -> dict | None:
176
+ """Return {'size','last_modified'} or None if absent."""
177
+ from botocore.exceptions import ClientError
178
+ try:
179
+ h = _s3().head_object(Bucket=bucket(), Key=_skey(key))
180
+ return {"size": h.get("ContentLength"), "last_modified": h.get("LastModified")}
181
+ except ClientError as e:
182
+ code = e.response.get("Error", {}).get("Code", "")
183
+ if code in ("404", "NoSuchKey", "NotFound"):
184
+ return None
185
+ raise
186
+
187
+
188
+ def exists(key: str) -> bool:
189
+ return head(key) is not None
190
+
191
+
192
+ def list_prefix(prefix: str = "", delimiter: str = "/") -> dict:
193
+ """List one VFS level. Returns {'folders': [name...], 'files': [{name,key,size}]}.
194
+
195
+ With delimiter='/', S3 returns CommonPrefixes (folders) + Contents at this
196
+ level. Folder-marker objects (keys ending in '/') are hidden from files.
197
+ """
198
+ s3 = _s3()
199
+ on = _names_on()
200
+ norm = prefix
201
+ if norm and not norm.endswith("/"):
202
+ norm += "/"
203
+
204
+ # The server query runs against the ENCRYPTED prefix; the returned tokens are
205
+ # decrypted back to real names for display. Returned `key` is the REAL path so
206
+ # callers (get/rm/cd) can re-encrypt it transparently.
207
+ if on and norm:
208
+ from obsideo_core import names
209
+ enc_prefix = names.encrypt_path(norm) + "/"
210
+ else:
211
+ enc_prefix = norm
212
+
213
+ def _name(token: str) -> str:
214
+ if not on:
215
+ return token
216
+ from obsideo_core import names
217
+ return names.safe_decrypt_name(token)[0]
218
+
219
+ folders, files = [], []
220
+ token = None
221
+ while True:
222
+ kwargs = dict(Bucket=bucket(), Prefix=enc_prefix, Delimiter=delimiter)
223
+ if token:
224
+ kwargs["ContinuationToken"] = token
225
+ resp = s3.list_objects_v2(**kwargs)
226
+
227
+ for cp in resp.get("CommonPrefixes", []):
228
+ enc_name = cp["Prefix"][len(enc_prefix):].rstrip("/")
229
+ if enc_name:
230
+ folders.append(_name(enc_name))
231
+
232
+ for obj in resp.get("Contents", []):
233
+ key = obj["Key"]
234
+ if key == enc_prefix or key.endswith("/"):
235
+ continue # the folder marker itself
236
+ name = _name(key[len(enc_prefix):])
237
+ if name == _FOLDER_MARKER:
238
+ continue # hide the .keep placeholder that makes empty folders visible
239
+ files.append({"name": name, "key": norm + name, "size": obj.get("Size", 0)})
240
+
241
+ if resp.get("IsTruncated"):
242
+ token = resp.get("NextContinuationToken")
243
+ else:
244
+ break
245
+
246
+ folders.sort()
247
+ files.sort(key=lambda f: f["name"])
248
+ return {"folders": folders, "files": files}
249
+
250
+
251
+ def mkdir(prefix: str) -> str:
252
+ """Make an empty folder visible in `ls`. S3 has no real directories; we
253
+ write a tiny placeholder at 'prefix/.keep' (the gateway rejects empty
254
+ bodies, so the marker is non-empty). It's hidden from listings."""
255
+ norm = prefix if prefix.endswith("/") else prefix + "/"
256
+ put(norm + _FOLDER_MARKER, b".obsideo\n")
257
+ return norm
258
+
259
+
260
+ def verify_pop(key: str) -> dict:
261
+ """Confirm an object is stored + report durability posture (RF=3)."""
262
+ h = head(key)
263
+ if h is None:
264
+ return {"stored": False, "size_bytes": None, "replication_factor": 3, "backend": "obsideo"}
265
+ return {"stored": True, "size_bytes": h["size"], "replication_factor": 3, "backend": "obsideo"}