delimit-cli 4.5.13 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +48 -0
- package/README.md +9 -8
- package/bin/delimit-cli.js +179 -4
- package/bin/delimit-setup.js +46 -6
- package/gateway/ai/_compile_status.py +154 -0
- package/gateway/ai/agent_dispatch.py +41 -0
- package/gateway/ai/backends/git_health.py +175 -0
- package/gateway/ai/backends/tools_infra.py +163 -10
- package/gateway/ai/cli_contract.py +185 -0
- package/gateway/ai/daemon.py +10 -0
- package/gateway/ai/daily_digest.py +1 -2
- package/gateway/ai/delimit_daemon.py +67 -0
- package/gateway/ai/dispatch_gate.py +399 -0
- package/gateway/ai/governance.py +181 -0
- package/gateway/ai/heartbeat.py +290 -0
- package/gateway/ai/hot_reload.py +1 -2
- package/gateway/ai/led193_daemon/executor.py +9 -0
- package/gateway/ai/ledger_manager.py +90 -4
- package/gateway/ai/ledger_proof.py +127 -0
- package/gateway/ai/license.py +132 -47
- package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
- package/gateway/ai/license_core.pyi +1 -1
- package/gateway/ai/notify.py +39 -0
- package/gateway/ai/outreach_loop_daemon.py +349 -0
- package/gateway/ai/outreach_substantive.py +1437 -0
- package/gateway/ai/pro_tools.yaml +167 -0
- package/gateway/ai/reaper.py +70 -0
- package/gateway/ai/reddit_scanner.py +17 -6
- package/gateway/ai/sensing/schema.py +1 -1
- package/gateway/ai/sensing/signal_store.py +0 -1
- package/gateway/ai/server.py +5490 -1602
- package/gateway/ai/social_capability/fit_floor.py +114 -12
- package/gateway/ai/social_queue.py +166 -10
- package/gateway/ai/tdqs_lint.py +611 -0
- package/gateway/ai/tenant_auth.py +329 -0
- package/gateway/ai/tenant_data.py +339 -0
- package/gateway/ai/tenant_paths.py +150 -0
- package/gateway/ai/usage_allowlist.py +198 -0
- package/gateway/ai/workers/base.py +2 -2
- package/gateway/ai/workers/executor.py +32 -3
- package/gateway/ai/workers/outreach_drafter.py +0 -1
- package/gateway/ai/workers/pr_drafter.py +0 -1
- package/gateway/ai/x_ranker.py +12 -2
- package/gateway/core/json_schema_diff.py +25 -1
- package/lib/auth-signin.js +136 -0
- package/lib/auth-signout.js +169 -0
- package/lib/delimit-template.js +11 -0
- package/lib/migration-2092-banner.js +213 -0
- package/package.json +5 -2
- package/server.json +4 -4
- package/scripts/build-license-core.sh +0 -85
- package/scripts/security-check.sh +0 -66
- package/scripts/test-license-core-so.sh +0 -107
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
"""LED-2268 P0 Phase 0.1 — gateway-side tenant API key validator.
|
|
2
|
+
|
|
3
|
+
The dashboard at app.delimit.ai (`/dashboard/api-keys`) issues per-user
|
|
4
|
+
keys with the `dlmt_<43-char-base64url>` shape. Only the sha256 of the
|
|
5
|
+
plaintext is stored — see supabase migration 034 + lib/user-api-keys.ts.
|
|
6
|
+
|
|
7
|
+
This module owns the gateway side of that contract:
|
|
8
|
+
- parse `Authorization: ApiKey dlmt_xxx` from an HTTP header
|
|
9
|
+
- sha256-hash the plaintext
|
|
10
|
+
- look up the hash in `user_api_keys` via service-role Supabase REST
|
|
11
|
+
- return `{user_id, scope, key_id}` for a live (non-revoked) match
|
|
12
|
+
- return None for anything else (bad shape, no match, revoked, etc.)
|
|
13
|
+
|
|
14
|
+
Phase 0.1 stays minimal on purpose:
|
|
15
|
+
- no `last_used_at` write (deferred — adds a write per call; Phase 0.2)
|
|
16
|
+
- no cache (every call hits Supabase; fine at current volume)
|
|
17
|
+
- no JWT, no rotation grace period — soft-delete is hard once set
|
|
18
|
+
|
|
19
|
+
Phase 0.2 will add tenant-scoped data routing (per-user data root under
|
|
20
|
+
~/.delimit/tenants/<user_id>/); this module only resolves identity.
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import hashlib
|
|
25
|
+
import json
|
|
26
|
+
import logging
|
|
27
|
+
import os
|
|
28
|
+
import threading
|
|
29
|
+
import urllib.error
|
|
30
|
+
import urllib.parse
|
|
31
|
+
import urllib.request
|
|
32
|
+
from datetime import datetime, timezone
|
|
33
|
+
from typing import Optional, TypedDict
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger("delimit.tenant_auth")
|
|
36
|
+
|
|
37
|
+
# Process-local counter for failed last_used_at PATCH writes. Lets
|
|
38
|
+
# operators (and future /heartbeats-style health surfaces) see whether
|
|
39
|
+
# the audit-write fire-and-forget is silently dropping a sustained
|
|
40
|
+
# burst — debug log on every error is too quiet to notice in journalctl
|
|
41
|
+
# during a Supabase outage. Reset only on process restart by design.
|
|
42
|
+
_last_used_dropped_count = 0
|
|
43
|
+
_last_used_dropped_lock = threading.Lock()
|
|
44
|
+
# Log at INFO every Nth drop so a sustained outage surfaces without
|
|
45
|
+
# flooding the journal on transient blips. First drop is also INFO so
|
|
46
|
+
# the first sign of trouble is visible.
|
|
47
|
+
_LAST_USED_DROP_LOG_EVERY = 10
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_last_used_dropped_count() -> int:
|
|
51
|
+
"""How many last_used_at PATCH writes have been dropped since process start.
|
|
52
|
+
|
|
53
|
+
Read-only; intended for /heartbeats, future metrics endpoints, and
|
|
54
|
+
operational tooling. NOT a security signal — dropped writes don't
|
|
55
|
+
affect auth correctness, only audit completeness.
|
|
56
|
+
"""
|
|
57
|
+
with _last_used_dropped_lock:
|
|
58
|
+
return _last_used_dropped_count
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class TenantIdentity(TypedDict):
|
|
62
|
+
"""Resolved tenant identity for a presented API key."""
|
|
63
|
+
user_id: str
|
|
64
|
+
scope: str
|
|
65
|
+
key_id: str
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# The plaintext shape issued by lib/user-api-keys.ts is `dlmt_` + 43
|
|
69
|
+
# base64url chars (32 random bytes encoded). Reject anything that doesn't
|
|
70
|
+
# fit before hashing — saves a Supabase round-trip on malformed input.
|
|
71
|
+
_KEY_PREFIX = "dlmt_"
|
|
72
|
+
_KEY_PLAINTEXT_LEN_MIN = len(_KEY_PREFIX) + 32 # be lenient on lower bound
|
|
73
|
+
_KEY_PLAINTEXT_LEN_MAX = len(_KEY_PREFIX) + 128 # cap to defeat absurd inputs
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def parse_auth_header(header: str) -> Optional[tuple[str, str]]:
|
|
77
|
+
"""Parse `Authorization` into (scheme, token).
|
|
78
|
+
|
|
79
|
+
Recognizes two schemes:
|
|
80
|
+
- `Bearer <token>` — existing shared-bearer pattern (founder/system)
|
|
81
|
+
- `ApiKey <plaintext>` — per-user tenant key (this module's domain)
|
|
82
|
+
|
|
83
|
+
Returns (scheme_lowercase, token) on match, None on anything else.
|
|
84
|
+
Caller decides which scheme is acceptable for which endpoint.
|
|
85
|
+
"""
|
|
86
|
+
if not header:
|
|
87
|
+
return None
|
|
88
|
+
parts = header.split(None, 1)
|
|
89
|
+
if len(parts) != 2:
|
|
90
|
+
return None
|
|
91
|
+
scheme, token = parts[0].strip().lower(), parts[1].strip()
|
|
92
|
+
if scheme in ("bearer", "apikey") and token:
|
|
93
|
+
return (scheme, token)
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _hash_key(plaintext: str) -> str:
|
|
98
|
+
"""sha256(plaintext) as lowercase hex — matches lib/user-api-keys.ts."""
|
|
99
|
+
return hashlib.sha256(plaintext.encode("utf-8")).hexdigest()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _looks_like_tenant_key(plaintext: str) -> bool:
|
|
103
|
+
"""Cheap shape check before we bother Supabase."""
|
|
104
|
+
if not plaintext.startswith(_KEY_PREFIX):
|
|
105
|
+
return False
|
|
106
|
+
n = len(plaintext)
|
|
107
|
+
return _KEY_PLAINTEXT_LEN_MIN <= n <= _KEY_PLAINTEXT_LEN_MAX
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def validate_api_key(plaintext: str) -> Optional[TenantIdentity]:
|
|
111
|
+
"""Resolve `dlmt_xxx` plaintext to a tenant identity, or None.
|
|
112
|
+
|
|
113
|
+
Returns None for: malformed input, no Supabase config, network
|
|
114
|
+
failure, no row matched, row marked revoked. Caller treats None as
|
|
115
|
+
"unauthorized" — never leak why specifically.
|
|
116
|
+
|
|
117
|
+
This function is intentionally synchronous + fire-and-forget on
|
|
118
|
+
errors. Logs them at debug level. Production audit comes from the
|
|
119
|
+
request-log layer (each endpoint logs the resolved user_id, not
|
|
120
|
+
the validator).
|
|
121
|
+
"""
|
|
122
|
+
if not _looks_like_tenant_key(plaintext):
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
supabase_url = os.environ.get("SUPABASE_URL", "").rstrip("/")
|
|
126
|
+
service_key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY", "")
|
|
127
|
+
if not supabase_url or not service_key:
|
|
128
|
+
# If the gateway host hasn't been configured for Supabase, tenant
|
|
129
|
+
# auth simply doesn't work — the shared-bearer path stays intact.
|
|
130
|
+
logger.debug("validate_api_key: supabase env not configured")
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
key_hash = _hash_key(plaintext)
|
|
134
|
+
# Active-only lookup: the partial index `idx_user_api_keys_active_hash`
|
|
135
|
+
# makes this O(log n) and gauarantees revoked keys never match.
|
|
136
|
+
url = (
|
|
137
|
+
f"{supabase_url}/rest/v1/user_api_keys"
|
|
138
|
+
f"?select=id,user_id,scope"
|
|
139
|
+
f"&key_hash=eq.{urllib.parse.quote(key_hash, safe='')}"
|
|
140
|
+
f"&revoked_at=is.null"
|
|
141
|
+
f"&limit=1"
|
|
142
|
+
)
|
|
143
|
+
req = urllib.request.Request(
|
|
144
|
+
url,
|
|
145
|
+
headers={
|
|
146
|
+
"apikey": service_key,
|
|
147
|
+
"Authorization": f"Bearer {service_key}",
|
|
148
|
+
"Accept": "application/json",
|
|
149
|
+
},
|
|
150
|
+
)
|
|
151
|
+
try:
|
|
152
|
+
with urllib.request.urlopen(req, timeout=5) as resp:
|
|
153
|
+
body = resp.read()
|
|
154
|
+
except urllib.error.HTTPError as e:
|
|
155
|
+
logger.debug("validate_api_key supabase HTTP %s", getattr(e, "code", "?"))
|
|
156
|
+
return None
|
|
157
|
+
except (urllib.error.URLError, OSError, TimeoutError) as e:
|
|
158
|
+
logger.debug("validate_api_key supabase net err: %s", e)
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
rows = json.loads(body)
|
|
163
|
+
except json.JSONDecodeError:
|
|
164
|
+
logger.debug("validate_api_key non-json response")
|
|
165
|
+
return None
|
|
166
|
+
if not isinstance(rows, list) or not rows:
|
|
167
|
+
return None
|
|
168
|
+
row = rows[0]
|
|
169
|
+
if not isinstance(row, dict):
|
|
170
|
+
return None
|
|
171
|
+
user_id = row.get("user_id") or ""
|
|
172
|
+
if not user_id:
|
|
173
|
+
return None
|
|
174
|
+
key_id = str(row.get("id") or "")
|
|
175
|
+
# Phase 0.2: fire-and-forget last_used_at write. Lets operators see
|
|
176
|
+
# "this key was actually used in the last N hours" in the dashboard
|
|
177
|
+
# API-keys list, which is important for rotation hygiene (you can
|
|
178
|
+
# tell which keys are dead before deciding what to revoke).
|
|
179
|
+
# Backgrounded so the validate path stays as fast as it was in 0.1.
|
|
180
|
+
if key_id:
|
|
181
|
+
_fire_last_used_update(supabase_url, service_key, key_id)
|
|
182
|
+
return TenantIdentity(
|
|
183
|
+
user_id=str(user_id),
|
|
184
|
+
scope=str(row.get("scope") or ""),
|
|
185
|
+
key_id=key_id,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _fire_last_used_update(supabase_url: str, service_key: str, key_id: str) -> None:
|
|
190
|
+
"""Background-thread PATCH to bump last_used_at on a successful validate.
|
|
191
|
+
|
|
192
|
+
Errors are swallowed; the validate path NEVER blocks on this and the
|
|
193
|
+
foreground response is unaffected. The point is best-effort audit
|
|
194
|
+
signal, not authorization.
|
|
195
|
+
|
|
196
|
+
The thread is daemonised so a hung Supabase call can't keep the
|
|
197
|
+
process alive past shutdown.
|
|
198
|
+
"""
|
|
199
|
+
def _patch():
|
|
200
|
+
try:
|
|
201
|
+
url = (
|
|
202
|
+
f"{supabase_url.rstrip('/')}/rest/v1/user_api_keys"
|
|
203
|
+
f"?id=eq.{urllib.parse.quote(key_id, safe='')}"
|
|
204
|
+
)
|
|
205
|
+
body = json.dumps({
|
|
206
|
+
"last_used_at": datetime.now(timezone.utc).isoformat(),
|
|
207
|
+
}).encode("utf-8")
|
|
208
|
+
req = urllib.request.Request(
|
|
209
|
+
url,
|
|
210
|
+
data=body,
|
|
211
|
+
method="PATCH",
|
|
212
|
+
headers={
|
|
213
|
+
"apikey": service_key,
|
|
214
|
+
"Authorization": f"Bearer {service_key}",
|
|
215
|
+
"Content-Type": "application/json",
|
|
216
|
+
# Prefer: return=minimal — we don't need the row back.
|
|
217
|
+
"Prefer": "return=minimal",
|
|
218
|
+
},
|
|
219
|
+
)
|
|
220
|
+
with urllib.request.urlopen(req, timeout=5):
|
|
221
|
+
pass
|
|
222
|
+
except Exception as e: # noqa: BLE001 — fire-and-forget; never raise
|
|
223
|
+
# Bump the process-local dropped-write counter and log at
|
|
224
|
+
# INFO every Nth drop (plus the first). Lets a sustained
|
|
225
|
+
# outage surface in journalctl without spam on blips.
|
|
226
|
+
global _last_used_dropped_count
|
|
227
|
+
with _last_used_dropped_lock:
|
|
228
|
+
_last_used_dropped_count += 1
|
|
229
|
+
count = _last_used_dropped_count
|
|
230
|
+
if count == 1 or count % _LAST_USED_DROP_LOG_EVERY == 0:
|
|
231
|
+
logger.info(
|
|
232
|
+
"last_used_at update dropped (cum_dropped=%d): %s",
|
|
233
|
+
count, e,
|
|
234
|
+
)
|
|
235
|
+
else:
|
|
236
|
+
logger.debug(
|
|
237
|
+
"last_used_at update dropped (cum_dropped=%d): %s",
|
|
238
|
+
count, e,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
t = threading.Thread(target=_patch, daemon=True, name="delimit-last-used-update")
|
|
242
|
+
t.start()
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def authenticate(
|
|
246
|
+
header: str,
|
|
247
|
+
shared_bearer: str = "",
|
|
248
|
+
impersonation_header: str = "",
|
|
249
|
+
) -> Optional[dict]:
|
|
250
|
+
"""End-to-end auth resolver for an HTTP request.
|
|
251
|
+
|
|
252
|
+
Returns a dict describing the resolved identity, or None if the
|
|
253
|
+
request should be rejected. Three accepted-request outcomes:
|
|
254
|
+
|
|
255
|
+
- `{"auth_mode": "bearer", "is_tenant_scoped": False}` — shared-
|
|
256
|
+
bearer match WITHOUT impersonation. Founder/system access to
|
|
257
|
+
the shared `~/.delimit/` view. No user_id field present.
|
|
258
|
+
- `{"auth_mode": "bearer", "is_tenant_scoped": True, "user_id":
|
|
259
|
+
..., "scope": "", "key_id": "bearer-impersonation"}` — shared
|
|
260
|
+
bearer match WITH a valid impersonation header. The trusted
|
|
261
|
+
BFF/system is acting on behalf of a specific tenant (LED-2268
|
|
262
|
+
Phase 0.5a, lets the Vercel dashboard read/write tenant data
|
|
263
|
+
on behalf of a NextAuth-authenticated user without the user
|
|
264
|
+
ever exposing their plaintext API key to the BFF).
|
|
265
|
+
- `{"auth_mode": "apikey", "is_tenant_scoped": True, "user_id":
|
|
266
|
+
..., "scope": ..., "key_id": ...}` — tenant key match.
|
|
267
|
+
|
|
268
|
+
Trust model: the shared bearer is held only by a SMALL set of
|
|
269
|
+
trusted clients (Vercel BFF + the gateway host). If it leaks, the
|
|
270
|
+
blast radius is already total (founder-class access to everything
|
|
271
|
+
the gateway serves). The impersonation header just lets that
|
|
272
|
+
bearer be more granular per-request; it does NOT grant access the
|
|
273
|
+
bearer didn't already have.
|
|
274
|
+
|
|
275
|
+
Order: Bearer first (cheap string compare), then ApiKey (Supabase
|
|
276
|
+
round-trip). A request can only present one Authorization header,
|
|
277
|
+
so the order is which-scheme-wins-when-the-shape-fits.
|
|
278
|
+
"""
|
|
279
|
+
parsed = parse_auth_header(header)
|
|
280
|
+
if not parsed:
|
|
281
|
+
return None
|
|
282
|
+
scheme, token = parsed
|
|
283
|
+
if scheme == "bearer":
|
|
284
|
+
if not shared_bearer or token != shared_bearer:
|
|
285
|
+
return None
|
|
286
|
+
# Phase 0.5a — optional tenant impersonation. If the BFF/system
|
|
287
|
+
# presented a tenant header AND it sanitises to a valid segment,
|
|
288
|
+
# treat as tenant-scoped under that user_id. Validate via the
|
|
289
|
+
# SAME sanitiser tenant_paths uses for filesystem routing so the
|
|
290
|
+
# downstream code sees a consistent identity.
|
|
291
|
+
if impersonation_header:
|
|
292
|
+
# Lazy import to avoid circular: tenant_paths only needed when
|
|
293
|
+
# impersonation is actually requested.
|
|
294
|
+
from . import tenant_paths
|
|
295
|
+
seg = tenant_paths.safe_user_segment(impersonation_header)
|
|
296
|
+
if seg is None:
|
|
297
|
+
# Header was present but garbage. Reject the request
|
|
298
|
+
# entirely rather than silently falling back to shared
|
|
299
|
+
# scope — a confused BFF surfacing here is exactly the
|
|
300
|
+
# class of bug that header validation should catch.
|
|
301
|
+
logger.info(
|
|
302
|
+
"authenticate: bearer + invalid impersonation header rejected: %r",
|
|
303
|
+
impersonation_header[:64],
|
|
304
|
+
)
|
|
305
|
+
return None
|
|
306
|
+
# We pass the RAW header value (not the sanitised segment)
|
|
307
|
+
# downstream so callers see the same user_id shape as the
|
|
308
|
+
# ApiKey path. tenant_paths.safe_user_segment runs again
|
|
309
|
+
# inside tenant_data_root for actual fs routing.
|
|
310
|
+
return {
|
|
311
|
+
"auth_mode": "bearer",
|
|
312
|
+
"is_tenant_scoped": True,
|
|
313
|
+
"user_id": impersonation_header,
|
|
314
|
+
"scope": "",
|
|
315
|
+
"key_id": "bearer-impersonation",
|
|
316
|
+
}
|
|
317
|
+
return {"auth_mode": "bearer", "is_tenant_scoped": False}
|
|
318
|
+
if scheme == "apikey":
|
|
319
|
+
identity = validate_api_key(token)
|
|
320
|
+
if identity is None:
|
|
321
|
+
return None
|
|
322
|
+
return {
|
|
323
|
+
"auth_mode": "apikey",
|
|
324
|
+
"is_tenant_scoped": True,
|
|
325
|
+
"user_id": identity["user_id"],
|
|
326
|
+
"scope": identity["scope"],
|
|
327
|
+
"key_id": identity["key_id"],
|
|
328
|
+
}
|
|
329
|
+
return None
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""LED-2268 P0 Phase 0.3 — first consumer of the tenant_data_root primitive.
|
|
2
|
+
|
|
3
|
+
Provides describe_tenant_data() — the read-only view of what's on disk
|
|
4
|
+
inside a given tenant's data root. Used by the /tenant/data endpoint
|
|
5
|
+
and intended to power the dashboard's "your data lives here" home tile
|
|
6
|
+
for browser-only operators.
|
|
7
|
+
|
|
8
|
+
The describe call is deliberately minimal:
|
|
9
|
+
- data_root: absolute path string the gateway resolved for this tenant
|
|
10
|
+
- exists: has the dir been created yet?
|
|
11
|
+
- files: relative paths inside the dir (deepest-first, sorted)
|
|
12
|
+
- dirs: relative paths of subdirectories
|
|
13
|
+
- total_size_bytes: sum of all file sizes (sentinel for usage display)
|
|
14
|
+
- cap_bytes: soft cap if configured (Phase 0.3 hard-codes None — no cap)
|
|
15
|
+
|
|
16
|
+
Phase 0.3 ONLY reads. No write/delete API yet — that's Phase 0.4+, when
|
|
17
|
+
the dashboard ships its first "create note / save memory" surface.
|
|
18
|
+
|
|
19
|
+
Founder-data migration is handled by the SEPARATE manual script
|
|
20
|
+
scripts/delimit_seed_tenant_data.py (also in this PR), not by an
|
|
21
|
+
auto-trigger inside describe(). Keeps the read path side-effect-free.
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import logging
|
|
26
|
+
import os
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Optional, TypedDict
|
|
29
|
+
|
|
30
|
+
from . import tenant_paths
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger("delimit.tenant_data")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
36
|
+
# Phase 0.4 — write/read/delete limits + allowlist
|
|
37
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
# Max bytes a single tenant file may contain. Generous enough for
|
|
40
|
+
# memory.jsonl / ledger.jsonl scale (typically <100KB per tenant) but
|
|
41
|
+
# tight enough that a runaway client can't fill the disk. Future quota
|
|
42
|
+
# enforcement will sum across files; this is per-file.
|
|
43
|
+
MAX_FILE_BYTES = 1024 * 1024 # 1 MiB
|
|
44
|
+
|
|
45
|
+
# Allowlist of file extensions tenants may write/read. Restrictive on
|
|
46
|
+
# purpose: text-shaped data files only. Blocks .py / .sh / .so / .dll
|
|
47
|
+
# / anything executable so the tenant data root can never become a
|
|
48
|
+
# code-drop or LD-load source.
|
|
49
|
+
_ALLOWED_EXTENSIONS = frozenset({
|
|
50
|
+
".json",
|
|
51
|
+
".jsonl",
|
|
52
|
+
".md",
|
|
53
|
+
".txt",
|
|
54
|
+
".csv",
|
|
55
|
+
".yaml",
|
|
56
|
+
".yml",
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
# Max path-segment count (depth) to discourage deeply-nested layouts
|
|
60
|
+
# that complicate audit + backup. Practical cap; nothing in the
|
|
61
|
+
# legitimate use case needs >5 levels of subdirectory.
|
|
62
|
+
_MAX_PATH_DEPTH = 5
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class TenantPathError(Exception):
|
|
66
|
+
"""Raised for any tenant-data path that fails validation.
|
|
67
|
+
|
|
68
|
+
Caller pattern is `except TenantPathError as e: return 400 ...`.
|
|
69
|
+
The message is the diagnostic suitable for surfacing to the user
|
|
70
|
+
("path_too_deep", "extension_forbidden", "path_escapes_root", etc).
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _resolve_tenant_file(user_id: str, rel_path: str, *, create_root: bool = False) -> Path:
|
|
75
|
+
"""Validate + resolve `rel_path` inside the tenant's data root.
|
|
76
|
+
|
|
77
|
+
Raises TenantPathError on any of:
|
|
78
|
+
- empty / non-string rel_path
|
|
79
|
+
- rel_path containing nul bytes
|
|
80
|
+
- rel_path with absolute prefix ('/...')
|
|
81
|
+
- rel_path with traversal segments ('..') that would escape root
|
|
82
|
+
- rel_path with > _MAX_PATH_DEPTH segments
|
|
83
|
+
- extension not in _ALLOWED_EXTENSIONS
|
|
84
|
+
- user_id unsanitisable (no resolvable tenant root)
|
|
85
|
+
|
|
86
|
+
Returns the absolute resolved Path, NEVER outside the tenant root.
|
|
87
|
+
"""
|
|
88
|
+
if not isinstance(rel_path, str) or not rel_path:
|
|
89
|
+
raise TenantPathError("path_required")
|
|
90
|
+
if "\x00" in rel_path:
|
|
91
|
+
raise TenantPathError("path_invalid")
|
|
92
|
+
# Normalise separators (a tenant could send "\" on Windows-style
|
|
93
|
+
# input even if the server is Linux; treat both as separators).
|
|
94
|
+
norm = rel_path.replace("\\", "/").strip()
|
|
95
|
+
if not norm:
|
|
96
|
+
raise TenantPathError("path_required")
|
|
97
|
+
if norm.startswith("/"):
|
|
98
|
+
raise TenantPathError("path_must_be_relative")
|
|
99
|
+
|
|
100
|
+
# Split + reject any traversal segments before resolving. The
|
|
101
|
+
# post-resolve check below is a second line of defence; do this
|
|
102
|
+
# pre-check too so we don't even touch the filesystem for obvious
|
|
103
|
+
# attacks.
|
|
104
|
+
parts = [p for p in norm.split("/") if p]
|
|
105
|
+
if any(p in ("", ".", "..") for p in parts):
|
|
106
|
+
raise TenantPathError("path_traversal_forbidden")
|
|
107
|
+
if len(parts) > _MAX_PATH_DEPTH:
|
|
108
|
+
raise TenantPathError("path_too_deep")
|
|
109
|
+
|
|
110
|
+
# Extension allowlist applies to the final segment only.
|
|
111
|
+
final = parts[-1]
|
|
112
|
+
suffix = Path(final).suffix.lower()
|
|
113
|
+
if suffix not in _ALLOWED_EXTENSIONS:
|
|
114
|
+
raise TenantPathError("extension_forbidden")
|
|
115
|
+
|
|
116
|
+
root = tenant_paths.tenant_data_root(user_id, create=create_root)
|
|
117
|
+
if root is None:
|
|
118
|
+
raise TenantPathError("tenant_resolve_failed")
|
|
119
|
+
|
|
120
|
+
# Build the candidate path + verify it stays under the tenant root
|
|
121
|
+
# after path-resolution. Defence in depth against any sanitiser
|
|
122
|
+
# gap (symlinks, alternate path-separator tricks, OS-specific
|
|
123
|
+
# weirdness).
|
|
124
|
+
candidate = (root / Path(*parts)).resolve()
|
|
125
|
+
try:
|
|
126
|
+
candidate.relative_to(root.resolve())
|
|
127
|
+
except ValueError as e:
|
|
128
|
+
raise TenantPathError("path_escapes_root") from e
|
|
129
|
+
return candidate
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def write_tenant_file(user_id: str, rel_path: str, content: bytes) -> int:
|
|
133
|
+
"""Atomically write `content` to `rel_path` inside the tenant's data root.
|
|
134
|
+
|
|
135
|
+
- Creates the tenant root + intermediate directories with 0o700.
|
|
136
|
+
- Enforces MAX_FILE_BYTES on `content`.
|
|
137
|
+
- Writes to a sibling `.tmp` file then renames (atomic on POSIX).
|
|
138
|
+
- File mode is 0o600 (gateway-process-owner readable only).
|
|
139
|
+
|
|
140
|
+
Returns the number of bytes written. Raises TenantPathError on
|
|
141
|
+
validation failure or OSError on filesystem failure.
|
|
142
|
+
"""
|
|
143
|
+
if not isinstance(content, (bytes, bytearray, memoryview)):
|
|
144
|
+
raise TenantPathError("content_must_be_bytes")
|
|
145
|
+
if len(content) > MAX_FILE_BYTES:
|
|
146
|
+
raise TenantPathError("content_too_large")
|
|
147
|
+
target = _resolve_tenant_file(user_id, rel_path, create_root=True)
|
|
148
|
+
target.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
|
|
149
|
+
tmp = target.with_name(target.name + ".tmp")
|
|
150
|
+
# Use os.open so we can set the mode atomically (chmod-after-write
|
|
151
|
+
# would race with a reader that opened between create + chmod).
|
|
152
|
+
fd = os.open(str(tmp), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
|
153
|
+
try:
|
|
154
|
+
os.write(fd, bytes(content))
|
|
155
|
+
finally:
|
|
156
|
+
os.close(fd)
|
|
157
|
+
os.replace(tmp, target)
|
|
158
|
+
return len(content)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def read_tenant_file(user_id: str, rel_path: str) -> Optional[bytes]:
|
|
162
|
+
"""Read a tenant file, or None if it doesn't exist.
|
|
163
|
+
|
|
164
|
+
Raises TenantPathError on validation failure. Other filesystem
|
|
165
|
+
errors (PermissionError, IsADirectoryError) propagate — those
|
|
166
|
+
indicate a bug or hostile filesystem state, not normal client
|
|
167
|
+
input.
|
|
168
|
+
"""
|
|
169
|
+
target = _resolve_tenant_file(user_id, rel_path, create_root=False)
|
|
170
|
+
if not target.is_file():
|
|
171
|
+
return None
|
|
172
|
+
if target.stat().st_size > MAX_FILE_BYTES:
|
|
173
|
+
# Defence in depth: even if a write somehow bypassed the cap,
|
|
174
|
+
# don't echo the over-large content back to a client. Return
|
|
175
|
+
# None and log — caller surfaces as "not found".
|
|
176
|
+
logger.warning(
|
|
177
|
+
"read_tenant_file refusing oversize file: user=%s path=%s size=%d",
|
|
178
|
+
user_id, rel_path, target.stat().st_size,
|
|
179
|
+
)
|
|
180
|
+
return None
|
|
181
|
+
return target.read_bytes()
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def delete_tenant_file(user_id: str, rel_path: str) -> bool:
|
|
185
|
+
"""Delete a tenant file. Returns True if deleted, False if absent.
|
|
186
|
+
|
|
187
|
+
Raises TenantPathError on validation failure.
|
|
188
|
+
"""
|
|
189
|
+
target = _resolve_tenant_file(user_id, rel_path, create_root=False)
|
|
190
|
+
if not target.exists():
|
|
191
|
+
return False
|
|
192
|
+
if target.is_dir():
|
|
193
|
+
# We don't currently support tenant subdirs at the API level
|
|
194
|
+
# (write creates them as a side effect of the file path).
|
|
195
|
+
# Reject directory deletes outright — tenants shouldn't be
|
|
196
|
+
# able to recursively rm their own dir tree via this API.
|
|
197
|
+
raise TenantPathError("path_is_directory")
|
|
198
|
+
target.unlink()
|
|
199
|
+
return True
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class TenantDataSummary(TypedDict):
|
|
203
|
+
"""What /tenant/data returns to a caller."""
|
|
204
|
+
user_id: str
|
|
205
|
+
data_root: str
|
|
206
|
+
exists: bool
|
|
207
|
+
files: list[str]
|
|
208
|
+
dirs: list[str]
|
|
209
|
+
total_size_bytes: int
|
|
210
|
+
cap_bytes: Optional[int]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# Conservative cap on how many entries we'll enumerate / size-sum before
|
|
214
|
+
# bailing out. A tenant with 100k files shouldn't be able to make a
|
|
215
|
+
# single /tenant/data call stat() every one of them on every dashboard
|
|
216
|
+
# refresh. Returning truncated counts is honest enough for "how full is
|
|
217
|
+
# my dir" UX; the dashboard can surface "(more — refresh to scan)".
|
|
218
|
+
_MAX_ENTRIES_PER_SUMMARY = 1000
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def describe_tenant_data(user_id: str, *, create: bool = False) -> Optional[TenantDataSummary]:
|
|
222
|
+
"""Read-only summary of a tenant's on-disk data.
|
|
223
|
+
|
|
224
|
+
Returns None if `user_id` is unsanitisable (same failure mode as
|
|
225
|
+
tenant_paths.tenant_data_root). Caller treats that as "unauthorised".
|
|
226
|
+
|
|
227
|
+
When `create=False` (default) and the dir doesn't exist yet, returns
|
|
228
|
+
a summary with exists=False and empty lists. This is the normal
|
|
229
|
+
first-call shape — operators see "no data yet, you're brand new."
|
|
230
|
+
When `create=True`, the dir is mkdir'd and an empty summary returned
|
|
231
|
+
(used by /tenant/setup-style flows; Phase 0.3 doesn't ship one yet).
|
|
232
|
+
"""
|
|
233
|
+
root = tenant_paths.tenant_data_root(user_id, create=create)
|
|
234
|
+
if root is None:
|
|
235
|
+
return None
|
|
236
|
+
|
|
237
|
+
summary: TenantDataSummary = {
|
|
238
|
+
"user_id": user_id,
|
|
239
|
+
"data_root": str(root),
|
|
240
|
+
"exists": root.exists(),
|
|
241
|
+
"files": [],
|
|
242
|
+
"dirs": [],
|
|
243
|
+
"total_size_bytes": 0,
|
|
244
|
+
"cap_bytes": None,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
if not summary["exists"]:
|
|
248
|
+
return summary
|
|
249
|
+
|
|
250
|
+
files: list[str] = []
|
|
251
|
+
dirs: list[str] = []
|
|
252
|
+
total = 0
|
|
253
|
+
count = 0
|
|
254
|
+
try:
|
|
255
|
+
for entry in sorted(root.rglob("*")):
|
|
256
|
+
count += 1
|
|
257
|
+
if count > _MAX_ENTRIES_PER_SUMMARY:
|
|
258
|
+
break
|
|
259
|
+
rel = entry.relative_to(root)
|
|
260
|
+
rel_str = str(rel)
|
|
261
|
+
if entry.is_file():
|
|
262
|
+
files.append(rel_str)
|
|
263
|
+
try:
|
|
264
|
+
total += entry.stat().st_size
|
|
265
|
+
except OSError:
|
|
266
|
+
# Race: file existed in glob but vanished by stat.
|
|
267
|
+
# Treat as zero-size and continue. Not a fatal error.
|
|
268
|
+
pass
|
|
269
|
+
elif entry.is_dir():
|
|
270
|
+
dirs.append(rel_str)
|
|
271
|
+
except (OSError, PermissionError) as e:
|
|
272
|
+
# Don't blow up the response — return what we have so the caller
|
|
273
|
+
# at least sees the root + the readability problem in the log.
|
|
274
|
+
logger.warning("describe_tenant_data partial: %s", e)
|
|
275
|
+
|
|
276
|
+
summary["files"] = files
|
|
277
|
+
summary["dirs"] = dirs
|
|
278
|
+
summary["total_size_bytes"] = total
|
|
279
|
+
return summary
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def describe_shared_data() -> dict:
|
|
283
|
+
"""Read-only summary of the legacy single-tenant `~/.delimit/` view.
|
|
284
|
+
|
|
285
|
+
Used by the shared-bearer (founder/system) path on /tenant/data.
|
|
286
|
+
Returns the same shape as describe_tenant_data minus `user_id`
|
|
287
|
+
(there is no user_id for the shared-bearer caller — it's the
|
|
288
|
+
founder/system).
|
|
289
|
+
"""
|
|
290
|
+
# Reuse the same _MAX_ENTRIES_PER_SUMMARY cap. Founder's `~/.delimit/`
|
|
291
|
+
# typically has hundreds of files (memory.jsonl, ledger.jsonl,
|
|
292
|
+
# evidence/, daemon/, etc), so truncation is realistic.
|
|
293
|
+
home = os.environ.get("DELIMIT_HOME")
|
|
294
|
+
root = Path(home).expanduser().resolve() if home else (Path.home() / ".delimit")
|
|
295
|
+
summary: dict = {
|
|
296
|
+
"user_id": "", # shared-bearer: no tenant scope
|
|
297
|
+
"data_root": str(root),
|
|
298
|
+
"exists": root.is_dir(),
|
|
299
|
+
"files": [],
|
|
300
|
+
"dirs": [],
|
|
301
|
+
"total_size_bytes": 0,
|
|
302
|
+
"cap_bytes": None,
|
|
303
|
+
}
|
|
304
|
+
if not summary["exists"]:
|
|
305
|
+
return summary
|
|
306
|
+
|
|
307
|
+
files: list[str] = []
|
|
308
|
+
dirs: list[str] = []
|
|
309
|
+
total = 0
|
|
310
|
+
count = 0
|
|
311
|
+
try:
|
|
312
|
+
for entry in sorted(root.rglob("*")):
|
|
313
|
+
# Skip the tenants/ subdir from the shared view — that's the
|
|
314
|
+
# per-tenant tree, which the founder views via the dashboard's
|
|
315
|
+
# tenant-list / admin surface, not as part of her own data.
|
|
316
|
+
try:
|
|
317
|
+
if entry.relative_to(root).parts[:1] == ("tenants",):
|
|
318
|
+
continue
|
|
319
|
+
except ValueError:
|
|
320
|
+
pass
|
|
321
|
+
count += 1
|
|
322
|
+
if count > _MAX_ENTRIES_PER_SUMMARY:
|
|
323
|
+
break
|
|
324
|
+
rel_str = str(entry.relative_to(root))
|
|
325
|
+
if entry.is_file():
|
|
326
|
+
files.append(rel_str)
|
|
327
|
+
try:
|
|
328
|
+
total += entry.stat().st_size
|
|
329
|
+
except OSError:
|
|
330
|
+
pass
|
|
331
|
+
elif entry.is_dir():
|
|
332
|
+
dirs.append(rel_str)
|
|
333
|
+
except (OSError, PermissionError) as e:
|
|
334
|
+
logger.warning("describe_shared_data partial: %s", e)
|
|
335
|
+
|
|
336
|
+
summary["files"] = files
|
|
337
|
+
summary["dirs"] = dirs
|
|
338
|
+
summary["total_size_bytes"] = total
|
|
339
|
+
return summary
|