delimit-cli 4.5.13 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +48 -0
  2. package/README.md +9 -8
  3. package/bin/delimit-cli.js +179 -4
  4. package/bin/delimit-setup.js +46 -6
  5. package/gateway/ai/_compile_status.py +154 -0
  6. package/gateway/ai/agent_dispatch.py +41 -0
  7. package/gateway/ai/backends/git_health.py +175 -0
  8. package/gateway/ai/backends/tools_infra.py +163 -10
  9. package/gateway/ai/cli_contract.py +185 -0
  10. package/gateway/ai/daemon.py +10 -0
  11. package/gateway/ai/daily_digest.py +1 -2
  12. package/gateway/ai/delimit_daemon.py +67 -0
  13. package/gateway/ai/dispatch_gate.py +399 -0
  14. package/gateway/ai/governance.py +181 -0
  15. package/gateway/ai/heartbeat.py +290 -0
  16. package/gateway/ai/hot_reload.py +1 -2
  17. package/gateway/ai/led193_daemon/executor.py +9 -0
  18. package/gateway/ai/ledger_manager.py +90 -4
  19. package/gateway/ai/ledger_proof.py +127 -0
  20. package/gateway/ai/license.py +132 -47
  21. package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
  22. package/gateway/ai/license_core.pyi +1 -1
  23. package/gateway/ai/notify.py +39 -0
  24. package/gateway/ai/outreach_loop_daemon.py +349 -0
  25. package/gateway/ai/outreach_substantive.py +1437 -0
  26. package/gateway/ai/pro_tools.yaml +167 -0
  27. package/gateway/ai/reaper.py +70 -0
  28. package/gateway/ai/reddit_scanner.py +17 -6
  29. package/gateway/ai/sensing/schema.py +1 -1
  30. package/gateway/ai/sensing/signal_store.py +0 -1
  31. package/gateway/ai/server.py +5490 -1602
  32. package/gateway/ai/social_capability/fit_floor.py +114 -12
  33. package/gateway/ai/social_queue.py +166 -10
  34. package/gateway/ai/tdqs_lint.py +611 -0
  35. package/gateway/ai/tenant_auth.py +329 -0
  36. package/gateway/ai/tenant_data.py +339 -0
  37. package/gateway/ai/tenant_paths.py +150 -0
  38. package/gateway/ai/usage_allowlist.py +198 -0
  39. package/gateway/ai/workers/base.py +2 -2
  40. package/gateway/ai/workers/executor.py +32 -3
  41. package/gateway/ai/workers/outreach_drafter.py +0 -1
  42. package/gateway/ai/workers/pr_drafter.py +0 -1
  43. package/gateway/ai/x_ranker.py +12 -2
  44. package/gateway/core/json_schema_diff.py +25 -1
  45. package/lib/auth-signin.js +136 -0
  46. package/lib/auth-signout.js +169 -0
  47. package/lib/delimit-template.js +11 -0
  48. package/lib/migration-2092-banner.js +213 -0
  49. package/package.json +5 -2
  50. package/server.json +4 -4
  51. package/scripts/build-license-core.sh +0 -85
  52. package/scripts/security-check.sh +0 -66
  53. package/scripts/test-license-core-so.sh +0 -107
@@ -0,0 +1,329 @@
1
+ """LED-2268 P0 Phase 0.1 — gateway-side tenant API key validator.
2
+
3
+ The dashboard at app.delimit.ai (`/dashboard/api-keys`) issues per-user
4
+ keys with the `dlmt_<43-char-base64url>` shape. Only the sha256 of the
5
+ plaintext is stored — see supabase migration 034 + lib/user-api-keys.ts.
6
+
7
+ This module owns the gateway side of that contract:
8
+ - parse `Authorization: ApiKey dlmt_xxx` from an HTTP header
9
+ - sha256-hash the plaintext
10
+ - look up the hash in `user_api_keys` via service-role Supabase REST
11
+ - return `{user_id, scope, key_id}` for a live (non-revoked) match
12
+ - return None for anything else (bad shape, no match, revoked, etc.)
13
+
14
+ Phase 0.1 stays minimal on purpose:
15
+ - no `last_used_at` write (deferred — adds a write per call; Phase 0.2)
16
+ - no cache (every call hits Supabase; fine at current volume)
17
+ - no JWT, no rotation grace period — soft-delete is hard once set
18
+
19
+ Phase 0.2 will add tenant-scoped data routing (per-user data root under
20
+ ~/.delimit/tenants/<user_id>/); this module only resolves identity.
21
+ """
22
+ from __future__ import annotations
23
+
24
+ import hashlib
25
+ import json
26
+ import logging
27
+ import os
28
+ import threading
29
+ import urllib.error
30
+ import urllib.parse
31
+ import urllib.request
32
+ from datetime import datetime, timezone
33
+ from typing import Optional, TypedDict
34
+
35
+ logger = logging.getLogger("delimit.tenant_auth")
36
+
37
+ # Process-local counter for failed last_used_at PATCH writes. Lets
38
+ # operators (and future /heartbeats-style health surfaces) see whether
39
+ # the audit-write fire-and-forget is silently dropping a sustained
40
+ # burst — debug log on every error is too quiet to notice in journalctl
41
+ # during a Supabase outage. Reset only on process restart by design.
42
+ _last_used_dropped_count = 0
43
+ _last_used_dropped_lock = threading.Lock()
44
+ # Log at INFO every Nth drop so a sustained outage surfaces without
45
+ # flooding the journal on transient blips. First drop is also INFO so
46
+ # the first sign of trouble is visible.
47
+ _LAST_USED_DROP_LOG_EVERY = 10
48
+
49
+
50
+ def get_last_used_dropped_count() -> int:
51
+ """How many last_used_at PATCH writes have been dropped since process start.
52
+
53
+ Read-only; intended for /heartbeats, future metrics endpoints, and
54
+ operational tooling. NOT a security signal — dropped writes don't
55
+ affect auth correctness, only audit completeness.
56
+ """
57
+ with _last_used_dropped_lock:
58
+ return _last_used_dropped_count
59
+
60
+
61
+ class TenantIdentity(TypedDict):
62
+ """Resolved tenant identity for a presented API key."""
63
+ user_id: str
64
+ scope: str
65
+ key_id: str
66
+
67
+
68
+ # The plaintext shape issued by lib/user-api-keys.ts is `dlmt_` + 43
69
+ # base64url chars (32 random bytes encoded). Reject anything that doesn't
70
+ # fit before hashing — saves a Supabase round-trip on malformed input.
71
+ _KEY_PREFIX = "dlmt_"
72
+ _KEY_PLAINTEXT_LEN_MIN = len(_KEY_PREFIX) + 32 # be lenient on lower bound
73
+ _KEY_PLAINTEXT_LEN_MAX = len(_KEY_PREFIX) + 128 # cap to defeat absurd inputs
74
+
75
+
76
+ def parse_auth_header(header: str) -> Optional[tuple[str, str]]:
77
+ """Parse `Authorization` into (scheme, token).
78
+
79
+ Recognizes two schemes:
80
+ - `Bearer <token>` — existing shared-bearer pattern (founder/system)
81
+ - `ApiKey <plaintext>` — per-user tenant key (this module's domain)
82
+
83
+ Returns (scheme_lowercase, token) on match, None on anything else.
84
+ Caller decides which scheme is acceptable for which endpoint.
85
+ """
86
+ if not header:
87
+ return None
88
+ parts = header.split(None, 1)
89
+ if len(parts) != 2:
90
+ return None
91
+ scheme, token = parts[0].strip().lower(), parts[1].strip()
92
+ if scheme in ("bearer", "apikey") and token:
93
+ return (scheme, token)
94
+ return None
95
+
96
+
97
+ def _hash_key(plaintext: str) -> str:
98
+ """sha256(plaintext) as lowercase hex — matches lib/user-api-keys.ts."""
99
+ return hashlib.sha256(plaintext.encode("utf-8")).hexdigest()
100
+
101
+
102
+ def _looks_like_tenant_key(plaintext: str) -> bool:
103
+ """Cheap shape check before we bother Supabase."""
104
+ if not plaintext.startswith(_KEY_PREFIX):
105
+ return False
106
+ n = len(plaintext)
107
+ return _KEY_PLAINTEXT_LEN_MIN <= n <= _KEY_PLAINTEXT_LEN_MAX
108
+
109
+
110
+ def validate_api_key(plaintext: str) -> Optional[TenantIdentity]:
111
+ """Resolve `dlmt_xxx` plaintext to a tenant identity, or None.
112
+
113
+ Returns None for: malformed input, no Supabase config, network
114
+ failure, no row matched, row marked revoked. Caller treats None as
115
+ "unauthorized" — never leak why specifically.
116
+
117
+ This function is intentionally synchronous + fire-and-forget on
118
+ errors. Logs them at debug level. Production audit comes from the
119
+ request-log layer (each endpoint logs the resolved user_id, not
120
+ the validator).
121
+ """
122
+ if not _looks_like_tenant_key(plaintext):
123
+ return None
124
+
125
+ supabase_url = os.environ.get("SUPABASE_URL", "").rstrip("/")
126
+ service_key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY", "")
127
+ if not supabase_url or not service_key:
128
+ # If the gateway host hasn't been configured for Supabase, tenant
129
+ # auth simply doesn't work — the shared-bearer path stays intact.
130
+ logger.debug("validate_api_key: supabase env not configured")
131
+ return None
132
+
133
+ key_hash = _hash_key(plaintext)
134
+ # Active-only lookup: the partial index `idx_user_api_keys_active_hash`
135
+ # makes this O(log n) and gauarantees revoked keys never match.
136
+ url = (
137
+ f"{supabase_url}/rest/v1/user_api_keys"
138
+ f"?select=id,user_id,scope"
139
+ f"&key_hash=eq.{urllib.parse.quote(key_hash, safe='')}"
140
+ f"&revoked_at=is.null"
141
+ f"&limit=1"
142
+ )
143
+ req = urllib.request.Request(
144
+ url,
145
+ headers={
146
+ "apikey": service_key,
147
+ "Authorization": f"Bearer {service_key}",
148
+ "Accept": "application/json",
149
+ },
150
+ )
151
+ try:
152
+ with urllib.request.urlopen(req, timeout=5) as resp:
153
+ body = resp.read()
154
+ except urllib.error.HTTPError as e:
155
+ logger.debug("validate_api_key supabase HTTP %s", getattr(e, "code", "?"))
156
+ return None
157
+ except (urllib.error.URLError, OSError, TimeoutError) as e:
158
+ logger.debug("validate_api_key supabase net err: %s", e)
159
+ return None
160
+
161
+ try:
162
+ rows = json.loads(body)
163
+ except json.JSONDecodeError:
164
+ logger.debug("validate_api_key non-json response")
165
+ return None
166
+ if not isinstance(rows, list) or not rows:
167
+ return None
168
+ row = rows[0]
169
+ if not isinstance(row, dict):
170
+ return None
171
+ user_id = row.get("user_id") or ""
172
+ if not user_id:
173
+ return None
174
+ key_id = str(row.get("id") or "")
175
+ # Phase 0.2: fire-and-forget last_used_at write. Lets operators see
176
+ # "this key was actually used in the last N hours" in the dashboard
177
+ # API-keys list, which is important for rotation hygiene (you can
178
+ # tell which keys are dead before deciding what to revoke).
179
+ # Backgrounded so the validate path stays as fast as it was in 0.1.
180
+ if key_id:
181
+ _fire_last_used_update(supabase_url, service_key, key_id)
182
+ return TenantIdentity(
183
+ user_id=str(user_id),
184
+ scope=str(row.get("scope") or ""),
185
+ key_id=key_id,
186
+ )
187
+
188
+
189
+ def _fire_last_used_update(supabase_url: str, service_key: str, key_id: str) -> None:
190
+ """Background-thread PATCH to bump last_used_at on a successful validate.
191
+
192
+ Errors are swallowed; the validate path NEVER blocks on this and the
193
+ foreground response is unaffected. The point is best-effort audit
194
+ signal, not authorization.
195
+
196
+ The thread is daemonised so a hung Supabase call can't keep the
197
+ process alive past shutdown.
198
+ """
199
+ def _patch():
200
+ try:
201
+ url = (
202
+ f"{supabase_url.rstrip('/')}/rest/v1/user_api_keys"
203
+ f"?id=eq.{urllib.parse.quote(key_id, safe='')}"
204
+ )
205
+ body = json.dumps({
206
+ "last_used_at": datetime.now(timezone.utc).isoformat(),
207
+ }).encode("utf-8")
208
+ req = urllib.request.Request(
209
+ url,
210
+ data=body,
211
+ method="PATCH",
212
+ headers={
213
+ "apikey": service_key,
214
+ "Authorization": f"Bearer {service_key}",
215
+ "Content-Type": "application/json",
216
+ # Prefer: return=minimal — we don't need the row back.
217
+ "Prefer": "return=minimal",
218
+ },
219
+ )
220
+ with urllib.request.urlopen(req, timeout=5):
221
+ pass
222
+ except Exception as e: # noqa: BLE001 — fire-and-forget; never raise
223
+ # Bump the process-local dropped-write counter and log at
224
+ # INFO every Nth drop (plus the first). Lets a sustained
225
+ # outage surface in journalctl without spam on blips.
226
+ global _last_used_dropped_count
227
+ with _last_used_dropped_lock:
228
+ _last_used_dropped_count += 1
229
+ count = _last_used_dropped_count
230
+ if count == 1 or count % _LAST_USED_DROP_LOG_EVERY == 0:
231
+ logger.info(
232
+ "last_used_at update dropped (cum_dropped=%d): %s",
233
+ count, e,
234
+ )
235
+ else:
236
+ logger.debug(
237
+ "last_used_at update dropped (cum_dropped=%d): %s",
238
+ count, e,
239
+ )
240
+
241
+ t = threading.Thread(target=_patch, daemon=True, name="delimit-last-used-update")
242
+ t.start()
243
+
244
+
245
+ def authenticate(
246
+ header: str,
247
+ shared_bearer: str = "",
248
+ impersonation_header: str = "",
249
+ ) -> Optional[dict]:
250
+ """End-to-end auth resolver for an HTTP request.
251
+
252
+ Returns a dict describing the resolved identity, or None if the
253
+ request should be rejected. Three accepted-request outcomes:
254
+
255
+ - `{"auth_mode": "bearer", "is_tenant_scoped": False}` — shared-
256
+ bearer match WITHOUT impersonation. Founder/system access to
257
+ the shared `~/.delimit/` view. No user_id field present.
258
+ - `{"auth_mode": "bearer", "is_tenant_scoped": True, "user_id":
259
+ ..., "scope": "", "key_id": "bearer-impersonation"}` — shared
260
+ bearer match WITH a valid impersonation header. The trusted
261
+ BFF/system is acting on behalf of a specific tenant (LED-2268
262
+ Phase 0.5a, lets the Vercel dashboard read/write tenant data
263
+ on behalf of a NextAuth-authenticated user without the user
264
+ ever exposing their plaintext API key to the BFF).
265
+ - `{"auth_mode": "apikey", "is_tenant_scoped": True, "user_id":
266
+ ..., "scope": ..., "key_id": ...}` — tenant key match.
267
+
268
+ Trust model: the shared bearer is held only by a SMALL set of
269
+ trusted clients (Vercel BFF + the gateway host). If it leaks, the
270
+ blast radius is already total (founder-class access to everything
271
+ the gateway serves). The impersonation header just lets that
272
+ bearer be more granular per-request; it does NOT grant access the
273
+ bearer didn't already have.
274
+
275
+ Order: Bearer first (cheap string compare), then ApiKey (Supabase
276
+ round-trip). A request can only present one Authorization header,
277
+ so the order is which-scheme-wins-when-the-shape-fits.
278
+ """
279
+ parsed = parse_auth_header(header)
280
+ if not parsed:
281
+ return None
282
+ scheme, token = parsed
283
+ if scheme == "bearer":
284
+ if not shared_bearer or token != shared_bearer:
285
+ return None
286
+ # Phase 0.5a — optional tenant impersonation. If the BFF/system
287
+ # presented a tenant header AND it sanitises to a valid segment,
288
+ # treat as tenant-scoped under that user_id. Validate via the
289
+ # SAME sanitiser tenant_paths uses for filesystem routing so the
290
+ # downstream code sees a consistent identity.
291
+ if impersonation_header:
292
+ # Lazy import to avoid circular: tenant_paths only needed when
293
+ # impersonation is actually requested.
294
+ from . import tenant_paths
295
+ seg = tenant_paths.safe_user_segment(impersonation_header)
296
+ if seg is None:
297
+ # Header was present but garbage. Reject the request
298
+ # entirely rather than silently falling back to shared
299
+ # scope — a confused BFF surfacing here is exactly the
300
+ # class of bug that header validation should catch.
301
+ logger.info(
302
+ "authenticate: bearer + invalid impersonation header rejected: %r",
303
+ impersonation_header[:64],
304
+ )
305
+ return None
306
+ # We pass the RAW header value (not the sanitised segment)
307
+ # downstream so callers see the same user_id shape as the
308
+ # ApiKey path. tenant_paths.safe_user_segment runs again
309
+ # inside tenant_data_root for actual fs routing.
310
+ return {
311
+ "auth_mode": "bearer",
312
+ "is_tenant_scoped": True,
313
+ "user_id": impersonation_header,
314
+ "scope": "",
315
+ "key_id": "bearer-impersonation",
316
+ }
317
+ return {"auth_mode": "bearer", "is_tenant_scoped": False}
318
+ if scheme == "apikey":
319
+ identity = validate_api_key(token)
320
+ if identity is None:
321
+ return None
322
+ return {
323
+ "auth_mode": "apikey",
324
+ "is_tenant_scoped": True,
325
+ "user_id": identity["user_id"],
326
+ "scope": identity["scope"],
327
+ "key_id": identity["key_id"],
328
+ }
329
+ return None
@@ -0,0 +1,339 @@
1
+ """LED-2268 P0 Phase 0.3 — first consumer of the tenant_data_root primitive.
2
+
3
+ Provides describe_tenant_data() — the read-only view of what's on disk
4
+ inside a given tenant's data root. Used by the /tenant/data endpoint
5
+ and intended to power the dashboard's "your data lives here" home tile
6
+ for browser-only operators.
7
+
8
+ The describe call is deliberately minimal:
9
+ - data_root: absolute path string the gateway resolved for this tenant
10
+ - exists: has the dir been created yet?
11
+ - files: relative paths inside the dir (deepest-first, sorted)
12
+ - dirs: relative paths of subdirectories
13
+ - total_size_bytes: sum of all file sizes (sentinel for usage display)
14
+ - cap_bytes: soft cap if configured (Phase 0.3 hard-codes None — no cap)
15
+
16
+ Phase 0.3 ONLY reads. No write/delete API yet — that's Phase 0.4+, when
17
+ the dashboard ships its first "create note / save memory" surface.
18
+
19
+ Founder-data migration is handled by the SEPARATE manual script
20
+ scripts/delimit_seed_tenant_data.py (also in this PR), not by an
21
+ auto-trigger inside describe(). Keeps the read path side-effect-free.
22
+ """
23
+ from __future__ import annotations
24
+
25
+ import logging
26
+ import os
27
+ from pathlib import Path
28
+ from typing import Optional, TypedDict
29
+
30
+ from . import tenant_paths
31
+
32
+ logger = logging.getLogger("delimit.tenant_data")
33
+
34
+
35
+ # ─────────────────────────────────────────────────────────────────────
36
+ # Phase 0.4 — write/read/delete limits + allowlist
37
+ # ─────────────────────────────────────────────────────────────────────
38
+
39
+ # Max bytes a single tenant file may contain. Generous enough for
40
+ # memory.jsonl / ledger.jsonl scale (typically <100KB per tenant) but
41
+ # tight enough that a runaway client can't fill the disk. Future quota
42
+ # enforcement will sum across files; this is per-file.
43
+ MAX_FILE_BYTES = 1024 * 1024 # 1 MiB
44
+
45
+ # Allowlist of file extensions tenants may write/read. Restrictive on
46
+ # purpose: text-shaped data files only. Blocks .py / .sh / .so / .dll
47
+ # / anything executable so the tenant data root can never become a
48
+ # code-drop or LD-load source.
49
+ _ALLOWED_EXTENSIONS = frozenset({
50
+ ".json",
51
+ ".jsonl",
52
+ ".md",
53
+ ".txt",
54
+ ".csv",
55
+ ".yaml",
56
+ ".yml",
57
+ })
58
+
59
+ # Max path-segment count (depth) to discourage deeply-nested layouts
60
+ # that complicate audit + backup. Practical cap; nothing in the
61
+ # legitimate use case needs >5 levels of subdirectory.
62
+ _MAX_PATH_DEPTH = 5
63
+
64
+
65
+ class TenantPathError(Exception):
66
+ """Raised for any tenant-data path that fails validation.
67
+
68
+ Caller pattern is `except TenantPathError as e: return 400 ...`.
69
+ The message is the diagnostic suitable for surfacing to the user
70
+ ("path_too_deep", "extension_forbidden", "path_escapes_root", etc).
71
+ """
72
+
73
+
74
+ def _resolve_tenant_file(user_id: str, rel_path: str, *, create_root: bool = False) -> Path:
75
+ """Validate + resolve `rel_path` inside the tenant's data root.
76
+
77
+ Raises TenantPathError on any of:
78
+ - empty / non-string rel_path
79
+ - rel_path containing nul bytes
80
+ - rel_path with absolute prefix ('/...')
81
+ - rel_path with traversal segments ('..') that would escape root
82
+ - rel_path with > _MAX_PATH_DEPTH segments
83
+ - extension not in _ALLOWED_EXTENSIONS
84
+ - user_id unsanitisable (no resolvable tenant root)
85
+
86
+ Returns the absolute resolved Path, NEVER outside the tenant root.
87
+ """
88
+ if not isinstance(rel_path, str) or not rel_path:
89
+ raise TenantPathError("path_required")
90
+ if "\x00" in rel_path:
91
+ raise TenantPathError("path_invalid")
92
+ # Normalise separators (a tenant could send "\" on Windows-style
93
+ # input even if the server is Linux; treat both as separators).
94
+ norm = rel_path.replace("\\", "/").strip()
95
+ if not norm:
96
+ raise TenantPathError("path_required")
97
+ if norm.startswith("/"):
98
+ raise TenantPathError("path_must_be_relative")
99
+
100
+ # Split + reject any traversal segments before resolving. The
101
+ # post-resolve check below is a second line of defence; do this
102
+ # pre-check too so we don't even touch the filesystem for obvious
103
+ # attacks.
104
+ parts = [p for p in norm.split("/") if p]
105
+ if any(p in ("", ".", "..") for p in parts):
106
+ raise TenantPathError("path_traversal_forbidden")
107
+ if len(parts) > _MAX_PATH_DEPTH:
108
+ raise TenantPathError("path_too_deep")
109
+
110
+ # Extension allowlist applies to the final segment only.
111
+ final = parts[-1]
112
+ suffix = Path(final).suffix.lower()
113
+ if suffix not in _ALLOWED_EXTENSIONS:
114
+ raise TenantPathError("extension_forbidden")
115
+
116
+ root = tenant_paths.tenant_data_root(user_id, create=create_root)
117
+ if root is None:
118
+ raise TenantPathError("tenant_resolve_failed")
119
+
120
+ # Build the candidate path + verify it stays under the tenant root
121
+ # after path-resolution. Defence in depth against any sanitiser
122
+ # gap (symlinks, alternate path-separator tricks, OS-specific
123
+ # weirdness).
124
+ candidate = (root / Path(*parts)).resolve()
125
+ try:
126
+ candidate.relative_to(root.resolve())
127
+ except ValueError as e:
128
+ raise TenantPathError("path_escapes_root") from e
129
+ return candidate
130
+
131
+
132
+ def write_tenant_file(user_id: str, rel_path: str, content: bytes) -> int:
133
+ """Atomically write `content` to `rel_path` inside the tenant's data root.
134
+
135
+ - Creates the tenant root + intermediate directories with 0o700.
136
+ - Enforces MAX_FILE_BYTES on `content`.
137
+ - Writes to a sibling `.tmp` file then renames (atomic on POSIX).
138
+ - File mode is 0o600 (gateway-process-owner readable only).
139
+
140
+ Returns the number of bytes written. Raises TenantPathError on
141
+ validation failure or OSError on filesystem failure.
142
+ """
143
+ if not isinstance(content, (bytes, bytearray, memoryview)):
144
+ raise TenantPathError("content_must_be_bytes")
145
+ if len(content) > MAX_FILE_BYTES:
146
+ raise TenantPathError("content_too_large")
147
+ target = _resolve_tenant_file(user_id, rel_path, create_root=True)
148
+ target.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
149
+ tmp = target.with_name(target.name + ".tmp")
150
+ # Use os.open so we can set the mode atomically (chmod-after-write
151
+ # would race with a reader that opened between create + chmod).
152
+ fd = os.open(str(tmp), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
153
+ try:
154
+ os.write(fd, bytes(content))
155
+ finally:
156
+ os.close(fd)
157
+ os.replace(tmp, target)
158
+ return len(content)
159
+
160
+
161
+ def read_tenant_file(user_id: str, rel_path: str) -> Optional[bytes]:
162
+ """Read a tenant file, or None if it doesn't exist.
163
+
164
+ Raises TenantPathError on validation failure. Other filesystem
165
+ errors (PermissionError, IsADirectoryError) propagate — those
166
+ indicate a bug or hostile filesystem state, not normal client
167
+ input.
168
+ """
169
+ target = _resolve_tenant_file(user_id, rel_path, create_root=False)
170
+ if not target.is_file():
171
+ return None
172
+ if target.stat().st_size > MAX_FILE_BYTES:
173
+ # Defence in depth: even if a write somehow bypassed the cap,
174
+ # don't echo the over-large content back to a client. Return
175
+ # None and log — caller surfaces as "not found".
176
+ logger.warning(
177
+ "read_tenant_file refusing oversize file: user=%s path=%s size=%d",
178
+ user_id, rel_path, target.stat().st_size,
179
+ )
180
+ return None
181
+ return target.read_bytes()
182
+
183
+
184
+ def delete_tenant_file(user_id: str, rel_path: str) -> bool:
185
+ """Delete a tenant file. Returns True if deleted, False if absent.
186
+
187
+ Raises TenantPathError on validation failure.
188
+ """
189
+ target = _resolve_tenant_file(user_id, rel_path, create_root=False)
190
+ if not target.exists():
191
+ return False
192
+ if target.is_dir():
193
+ # We don't currently support tenant subdirs at the API level
194
+ # (write creates them as a side effect of the file path).
195
+ # Reject directory deletes outright — tenants shouldn't be
196
+ # able to recursively rm their own dir tree via this API.
197
+ raise TenantPathError("path_is_directory")
198
+ target.unlink()
199
+ return True
200
+
201
+
202
+ class TenantDataSummary(TypedDict):
203
+ """What /tenant/data returns to a caller."""
204
+ user_id: str
205
+ data_root: str
206
+ exists: bool
207
+ files: list[str]
208
+ dirs: list[str]
209
+ total_size_bytes: int
210
+ cap_bytes: Optional[int]
211
+
212
+
213
+ # Conservative cap on how many entries we'll enumerate / size-sum before
214
+ # bailing out. A tenant with 100k files shouldn't be able to make a
215
+ # single /tenant/data call stat() every one of them on every dashboard
216
+ # refresh. Returning truncated counts is honest enough for "how full is
217
+ # my dir" UX; the dashboard can surface "(more — refresh to scan)".
218
+ _MAX_ENTRIES_PER_SUMMARY = 1000
219
+
220
+
221
+ def describe_tenant_data(user_id: str, *, create: bool = False) -> Optional[TenantDataSummary]:
222
+ """Read-only summary of a tenant's on-disk data.
223
+
224
+ Returns None if `user_id` is unsanitisable (same failure mode as
225
+ tenant_paths.tenant_data_root). Caller treats that as "unauthorised".
226
+
227
+ When `create=False` (default) and the dir doesn't exist yet, returns
228
+ a summary with exists=False and empty lists. This is the normal
229
+ first-call shape — operators see "no data yet, you're brand new."
230
+ When `create=True`, the dir is mkdir'd and an empty summary returned
231
+ (used by /tenant/setup-style flows; Phase 0.3 doesn't ship one yet).
232
+ """
233
+ root = tenant_paths.tenant_data_root(user_id, create=create)
234
+ if root is None:
235
+ return None
236
+
237
+ summary: TenantDataSummary = {
238
+ "user_id": user_id,
239
+ "data_root": str(root),
240
+ "exists": root.exists(),
241
+ "files": [],
242
+ "dirs": [],
243
+ "total_size_bytes": 0,
244
+ "cap_bytes": None,
245
+ }
246
+
247
+ if not summary["exists"]:
248
+ return summary
249
+
250
+ files: list[str] = []
251
+ dirs: list[str] = []
252
+ total = 0
253
+ count = 0
254
+ try:
255
+ for entry in sorted(root.rglob("*")):
256
+ count += 1
257
+ if count > _MAX_ENTRIES_PER_SUMMARY:
258
+ break
259
+ rel = entry.relative_to(root)
260
+ rel_str = str(rel)
261
+ if entry.is_file():
262
+ files.append(rel_str)
263
+ try:
264
+ total += entry.stat().st_size
265
+ except OSError:
266
+ # Race: file existed in glob but vanished by stat.
267
+ # Treat as zero-size and continue. Not a fatal error.
268
+ pass
269
+ elif entry.is_dir():
270
+ dirs.append(rel_str)
271
+ except (OSError, PermissionError) as e:
272
+ # Don't blow up the response — return what we have so the caller
273
+ # at least sees the root + the readability problem in the log.
274
+ logger.warning("describe_tenant_data partial: %s", e)
275
+
276
+ summary["files"] = files
277
+ summary["dirs"] = dirs
278
+ summary["total_size_bytes"] = total
279
+ return summary
280
+
281
+
282
+ def describe_shared_data() -> dict:
283
+ """Read-only summary of the legacy single-tenant `~/.delimit/` view.
284
+
285
+ Used by the shared-bearer (founder/system) path on /tenant/data.
286
+ Returns the same shape as describe_tenant_data minus `user_id`
287
+ (there is no user_id for the shared-bearer caller — it's the
288
+ founder/system).
289
+ """
290
+ # Reuse the same _MAX_ENTRIES_PER_SUMMARY cap. Founder's `~/.delimit/`
291
+ # typically has hundreds of files (memory.jsonl, ledger.jsonl,
292
+ # evidence/, daemon/, etc), so truncation is realistic.
293
+ home = os.environ.get("DELIMIT_HOME")
294
+ root = Path(home).expanduser().resolve() if home else (Path.home() / ".delimit")
295
+ summary: dict = {
296
+ "user_id": "", # shared-bearer: no tenant scope
297
+ "data_root": str(root),
298
+ "exists": root.is_dir(),
299
+ "files": [],
300
+ "dirs": [],
301
+ "total_size_bytes": 0,
302
+ "cap_bytes": None,
303
+ }
304
+ if not summary["exists"]:
305
+ return summary
306
+
307
+ files: list[str] = []
308
+ dirs: list[str] = []
309
+ total = 0
310
+ count = 0
311
+ try:
312
+ for entry in sorted(root.rglob("*")):
313
+ # Skip the tenants/ subdir from the shared view — that's the
314
+ # per-tenant tree, which the founder views via the dashboard's
315
+ # tenant-list / admin surface, not as part of her own data.
316
+ try:
317
+ if entry.relative_to(root).parts[:1] == ("tenants",):
318
+ continue
319
+ except ValueError:
320
+ pass
321
+ count += 1
322
+ if count > _MAX_ENTRIES_PER_SUMMARY:
323
+ break
324
+ rel_str = str(entry.relative_to(root))
325
+ if entry.is_file():
326
+ files.append(rel_str)
327
+ try:
328
+ total += entry.stat().st_size
329
+ except OSError:
330
+ pass
331
+ elif entry.is_dir():
332
+ dirs.append(rel_str)
333
+ except (OSError, PermissionError) as e:
334
+ logger.warning("describe_shared_data partial: %s", e)
335
+
336
+ summary["files"] = files
337
+ summary["dirs"] = dirs
338
+ summary["total_size_bytes"] = total
339
+ return summary