delimit-cli 4.6.0 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -8
- package/bin/delimit-cli.js +17 -3
- package/gateway/ai/agent_dispatch.py +5 -0
- package/gateway/ai/backends/git_health.py +175 -0
- package/gateway/ai/backends/tools_infra.py +13 -0
- package/gateway/ai/cli_contract.py +185 -0
- package/gateway/ai/governance.py +181 -0
- package/gateway/ai/heartbeat.py +290 -0
- package/gateway/ai/ledger_manager.py +81 -4
- package/gateway/ai/ledger_proof.py +127 -0
- package/gateway/ai/license.py +132 -47
- package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
- package/gateway/ai/license_core.pyi +1 -1
- package/gateway/ai/outreach_loop_daemon.py +349 -0
- package/gateway/ai/outreach_substantive.py +768 -7
- package/gateway/ai/pro_tools.yaml +167 -0
- package/gateway/ai/reddit_scanner.py +7 -1
- package/gateway/ai/server.py +295 -116
- package/gateway/ai/social_queue.py +166 -10
- package/gateway/ai/tenant_auth.py +329 -0
- package/gateway/ai/tenant_data.py +339 -0
- package/gateway/ai/tenant_paths.py +150 -0
- package/package.json +4 -1
- package/scripts/build-license-core.sh +0 -85
- package/scripts/security-check.sh +0 -66
- package/scripts/test-license-core-so.sh +0 -107
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""LED-2268 P0 Phase 0.3 — first consumer of the tenant_data_root primitive.
|
|
2
|
+
|
|
3
|
+
Provides describe_tenant_data() — the read-only view of what's on disk
|
|
4
|
+
inside a given tenant's data root. Used by the /tenant/data endpoint
|
|
5
|
+
and intended to power the dashboard's "your data lives here" home tile
|
|
6
|
+
for browser-only operators.
|
|
7
|
+
|
|
8
|
+
The describe call is deliberately minimal:
|
|
9
|
+
- data_root: absolute path string the gateway resolved for this tenant
|
|
10
|
+
- exists: has the dir been created yet?
|
|
11
|
+
- files: relative paths inside the dir (deepest-first, sorted)
|
|
12
|
+
- dirs: relative paths of subdirectories
|
|
13
|
+
- total_size_bytes: sum of all file sizes (sentinel for usage display)
|
|
14
|
+
- cap_bytes: soft cap if configured (Phase 0.3 hard-codes None — no cap)
|
|
15
|
+
|
|
16
|
+
Phase 0.3 ONLY reads. No write/delete API yet — that's Phase 0.4+, when
|
|
17
|
+
the dashboard ships its first "create note / save memory" surface.
|
|
18
|
+
|
|
19
|
+
Founder-data migration is handled by the SEPARATE manual script
|
|
20
|
+
scripts/delimit_seed_tenant_data.py (also in this PR), not by an
|
|
21
|
+
auto-trigger inside describe(). Keeps the read path side-effect-free.
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import logging
|
|
26
|
+
import os
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Optional, TypedDict
|
|
29
|
+
|
|
30
|
+
from . import tenant_paths
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger("delimit.tenant_data")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
36
|
+
# Phase 0.4 — write/read/delete limits + allowlist
|
|
37
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
# Max bytes a single tenant file may contain. Generous enough for
|
|
40
|
+
# memory.jsonl / ledger.jsonl scale (typically <100KB per tenant) but
|
|
41
|
+
# tight enough that a runaway client can't fill the disk. Future quota
|
|
42
|
+
# enforcement will sum across files; this is per-file.
|
|
43
|
+
MAX_FILE_BYTES = 1024 * 1024 # 1 MiB
|
|
44
|
+
|
|
45
|
+
# Allowlist of file extensions tenants may write/read. Restrictive on
|
|
46
|
+
# purpose: text-shaped data files only. Blocks .py / .sh / .so / .dll
|
|
47
|
+
# / anything executable so the tenant data root can never become a
|
|
48
|
+
# code-drop or LD-load source.
|
|
49
|
+
_ALLOWED_EXTENSIONS = frozenset({
|
|
50
|
+
".json",
|
|
51
|
+
".jsonl",
|
|
52
|
+
".md",
|
|
53
|
+
".txt",
|
|
54
|
+
".csv",
|
|
55
|
+
".yaml",
|
|
56
|
+
".yml",
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
# Max path-segment count (depth) to discourage deeply-nested layouts
|
|
60
|
+
# that complicate audit + backup. Practical cap; nothing in the
|
|
61
|
+
# legitimate use case needs >5 levels of subdirectory.
|
|
62
|
+
_MAX_PATH_DEPTH = 5
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class TenantPathError(Exception):
|
|
66
|
+
"""Raised for any tenant-data path that fails validation.
|
|
67
|
+
|
|
68
|
+
Caller pattern is `except TenantPathError as e: return 400 ...`.
|
|
69
|
+
The message is the diagnostic suitable for surfacing to the user
|
|
70
|
+
("path_too_deep", "extension_forbidden", "path_escapes_root", etc).
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _resolve_tenant_file(user_id: str, rel_path: str, *, create_root: bool = False) -> Path:
|
|
75
|
+
"""Validate + resolve `rel_path` inside the tenant's data root.
|
|
76
|
+
|
|
77
|
+
Raises TenantPathError on any of:
|
|
78
|
+
- empty / non-string rel_path
|
|
79
|
+
- rel_path containing nul bytes
|
|
80
|
+
- rel_path with absolute prefix ('/...')
|
|
81
|
+
- rel_path with traversal segments ('..') that would escape root
|
|
82
|
+
- rel_path with > _MAX_PATH_DEPTH segments
|
|
83
|
+
- extension not in _ALLOWED_EXTENSIONS
|
|
84
|
+
- user_id unsanitisable (no resolvable tenant root)
|
|
85
|
+
|
|
86
|
+
Returns the absolute resolved Path, NEVER outside the tenant root.
|
|
87
|
+
"""
|
|
88
|
+
if not isinstance(rel_path, str) or not rel_path:
|
|
89
|
+
raise TenantPathError("path_required")
|
|
90
|
+
if "\x00" in rel_path:
|
|
91
|
+
raise TenantPathError("path_invalid")
|
|
92
|
+
# Normalise separators (a tenant could send "\" on Windows-style
|
|
93
|
+
# input even if the server is Linux; treat both as separators).
|
|
94
|
+
norm = rel_path.replace("\\", "/").strip()
|
|
95
|
+
if not norm:
|
|
96
|
+
raise TenantPathError("path_required")
|
|
97
|
+
if norm.startswith("/"):
|
|
98
|
+
raise TenantPathError("path_must_be_relative")
|
|
99
|
+
|
|
100
|
+
# Split + reject any traversal segments before resolving. The
|
|
101
|
+
# post-resolve check below is a second line of defence; do this
|
|
102
|
+
# pre-check too so we don't even touch the filesystem for obvious
|
|
103
|
+
# attacks.
|
|
104
|
+
parts = [p for p in norm.split("/") if p]
|
|
105
|
+
if any(p in ("", ".", "..") for p in parts):
|
|
106
|
+
raise TenantPathError("path_traversal_forbidden")
|
|
107
|
+
if len(parts) > _MAX_PATH_DEPTH:
|
|
108
|
+
raise TenantPathError("path_too_deep")
|
|
109
|
+
|
|
110
|
+
# Extension allowlist applies to the final segment only.
|
|
111
|
+
final = parts[-1]
|
|
112
|
+
suffix = Path(final).suffix.lower()
|
|
113
|
+
if suffix not in _ALLOWED_EXTENSIONS:
|
|
114
|
+
raise TenantPathError("extension_forbidden")
|
|
115
|
+
|
|
116
|
+
root = tenant_paths.tenant_data_root(user_id, create=create_root)
|
|
117
|
+
if root is None:
|
|
118
|
+
raise TenantPathError("tenant_resolve_failed")
|
|
119
|
+
|
|
120
|
+
# Build the candidate path + verify it stays under the tenant root
|
|
121
|
+
# after path-resolution. Defence in depth against any sanitiser
|
|
122
|
+
# gap (symlinks, alternate path-separator tricks, OS-specific
|
|
123
|
+
# weirdness).
|
|
124
|
+
candidate = (root / Path(*parts)).resolve()
|
|
125
|
+
try:
|
|
126
|
+
candidate.relative_to(root.resolve())
|
|
127
|
+
except ValueError as e:
|
|
128
|
+
raise TenantPathError("path_escapes_root") from e
|
|
129
|
+
return candidate
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def write_tenant_file(user_id: str, rel_path: str, content: bytes) -> int:
|
|
133
|
+
"""Atomically write `content` to `rel_path` inside the tenant's data root.
|
|
134
|
+
|
|
135
|
+
- Creates the tenant root + intermediate directories with 0o700.
|
|
136
|
+
- Enforces MAX_FILE_BYTES on `content`.
|
|
137
|
+
- Writes to a sibling `.tmp` file then renames (atomic on POSIX).
|
|
138
|
+
- File mode is 0o600 (gateway-process-owner readable only).
|
|
139
|
+
|
|
140
|
+
Returns the number of bytes written. Raises TenantPathError on
|
|
141
|
+
validation failure or OSError on filesystem failure.
|
|
142
|
+
"""
|
|
143
|
+
if not isinstance(content, (bytes, bytearray, memoryview)):
|
|
144
|
+
raise TenantPathError("content_must_be_bytes")
|
|
145
|
+
if len(content) > MAX_FILE_BYTES:
|
|
146
|
+
raise TenantPathError("content_too_large")
|
|
147
|
+
target = _resolve_tenant_file(user_id, rel_path, create_root=True)
|
|
148
|
+
target.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
|
|
149
|
+
tmp = target.with_name(target.name + ".tmp")
|
|
150
|
+
# Use os.open so we can set the mode atomically (chmod-after-write
|
|
151
|
+
# would race with a reader that opened between create + chmod).
|
|
152
|
+
fd = os.open(str(tmp), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
|
153
|
+
try:
|
|
154
|
+
os.write(fd, bytes(content))
|
|
155
|
+
finally:
|
|
156
|
+
os.close(fd)
|
|
157
|
+
os.replace(tmp, target)
|
|
158
|
+
return len(content)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def read_tenant_file(user_id: str, rel_path: str) -> Optional[bytes]:
|
|
162
|
+
"""Read a tenant file, or None if it doesn't exist.
|
|
163
|
+
|
|
164
|
+
Raises TenantPathError on validation failure. Other filesystem
|
|
165
|
+
errors (PermissionError, IsADirectoryError) propagate — those
|
|
166
|
+
indicate a bug or hostile filesystem state, not normal client
|
|
167
|
+
input.
|
|
168
|
+
"""
|
|
169
|
+
target = _resolve_tenant_file(user_id, rel_path, create_root=False)
|
|
170
|
+
if not target.is_file():
|
|
171
|
+
return None
|
|
172
|
+
if target.stat().st_size > MAX_FILE_BYTES:
|
|
173
|
+
# Defence in depth: even if a write somehow bypassed the cap,
|
|
174
|
+
# don't echo the over-large content back to a client. Return
|
|
175
|
+
# None and log — caller surfaces as "not found".
|
|
176
|
+
logger.warning(
|
|
177
|
+
"read_tenant_file refusing oversize file: user=%s path=%s size=%d",
|
|
178
|
+
user_id, rel_path, target.stat().st_size,
|
|
179
|
+
)
|
|
180
|
+
return None
|
|
181
|
+
return target.read_bytes()
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def delete_tenant_file(user_id: str, rel_path: str) -> bool:
|
|
185
|
+
"""Delete a tenant file. Returns True if deleted, False if absent.
|
|
186
|
+
|
|
187
|
+
Raises TenantPathError on validation failure.
|
|
188
|
+
"""
|
|
189
|
+
target = _resolve_tenant_file(user_id, rel_path, create_root=False)
|
|
190
|
+
if not target.exists():
|
|
191
|
+
return False
|
|
192
|
+
if target.is_dir():
|
|
193
|
+
# We don't currently support tenant subdirs at the API level
|
|
194
|
+
# (write creates them as a side effect of the file path).
|
|
195
|
+
# Reject directory deletes outright — tenants shouldn't be
|
|
196
|
+
# able to recursively rm their own dir tree via this API.
|
|
197
|
+
raise TenantPathError("path_is_directory")
|
|
198
|
+
target.unlink()
|
|
199
|
+
return True
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class TenantDataSummary(TypedDict):
|
|
203
|
+
"""What /tenant/data returns to a caller."""
|
|
204
|
+
user_id: str
|
|
205
|
+
data_root: str
|
|
206
|
+
exists: bool
|
|
207
|
+
files: list[str]
|
|
208
|
+
dirs: list[str]
|
|
209
|
+
total_size_bytes: int
|
|
210
|
+
cap_bytes: Optional[int]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# Conservative cap on how many entries we'll enumerate / size-sum before
|
|
214
|
+
# bailing out. A tenant with 100k files shouldn't be able to make a
|
|
215
|
+
# single /tenant/data call stat() every one of them on every dashboard
|
|
216
|
+
# refresh. Returning truncated counts is honest enough for "how full is
|
|
217
|
+
# my dir" UX; the dashboard can surface "(more — refresh to scan)".
|
|
218
|
+
_MAX_ENTRIES_PER_SUMMARY = 1000
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def describe_tenant_data(user_id: str, *, create: bool = False) -> Optional[TenantDataSummary]:
|
|
222
|
+
"""Read-only summary of a tenant's on-disk data.
|
|
223
|
+
|
|
224
|
+
Returns None if `user_id` is unsanitisable (same failure mode as
|
|
225
|
+
tenant_paths.tenant_data_root). Caller treats that as "unauthorised".
|
|
226
|
+
|
|
227
|
+
When `create=False` (default) and the dir doesn't exist yet, returns
|
|
228
|
+
a summary with exists=False and empty lists. This is the normal
|
|
229
|
+
first-call shape — operators see "no data yet, you're brand new."
|
|
230
|
+
When `create=True`, the dir is mkdir'd and an empty summary returned
|
|
231
|
+
(used by /tenant/setup-style flows; Phase 0.3 doesn't ship one yet).
|
|
232
|
+
"""
|
|
233
|
+
root = tenant_paths.tenant_data_root(user_id, create=create)
|
|
234
|
+
if root is None:
|
|
235
|
+
return None
|
|
236
|
+
|
|
237
|
+
summary: TenantDataSummary = {
|
|
238
|
+
"user_id": user_id,
|
|
239
|
+
"data_root": str(root),
|
|
240
|
+
"exists": root.exists(),
|
|
241
|
+
"files": [],
|
|
242
|
+
"dirs": [],
|
|
243
|
+
"total_size_bytes": 0,
|
|
244
|
+
"cap_bytes": None,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
if not summary["exists"]:
|
|
248
|
+
return summary
|
|
249
|
+
|
|
250
|
+
files: list[str] = []
|
|
251
|
+
dirs: list[str] = []
|
|
252
|
+
total = 0
|
|
253
|
+
count = 0
|
|
254
|
+
try:
|
|
255
|
+
for entry in sorted(root.rglob("*")):
|
|
256
|
+
count += 1
|
|
257
|
+
if count > _MAX_ENTRIES_PER_SUMMARY:
|
|
258
|
+
break
|
|
259
|
+
rel = entry.relative_to(root)
|
|
260
|
+
rel_str = str(rel)
|
|
261
|
+
if entry.is_file():
|
|
262
|
+
files.append(rel_str)
|
|
263
|
+
try:
|
|
264
|
+
total += entry.stat().st_size
|
|
265
|
+
except OSError:
|
|
266
|
+
# Race: file existed in glob but vanished by stat.
|
|
267
|
+
# Treat as zero-size and continue. Not a fatal error.
|
|
268
|
+
pass
|
|
269
|
+
elif entry.is_dir():
|
|
270
|
+
dirs.append(rel_str)
|
|
271
|
+
except (OSError, PermissionError) as e:
|
|
272
|
+
# Don't blow up the response — return what we have so the caller
|
|
273
|
+
# at least sees the root + the readability problem in the log.
|
|
274
|
+
logger.warning("describe_tenant_data partial: %s", e)
|
|
275
|
+
|
|
276
|
+
summary["files"] = files
|
|
277
|
+
summary["dirs"] = dirs
|
|
278
|
+
summary["total_size_bytes"] = total
|
|
279
|
+
return summary
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def describe_shared_data() -> dict:
|
|
283
|
+
"""Read-only summary of the legacy single-tenant `~/.delimit/` view.
|
|
284
|
+
|
|
285
|
+
Used by the shared-bearer (founder/system) path on /tenant/data.
|
|
286
|
+
Returns the same shape as describe_tenant_data minus `user_id`
|
|
287
|
+
(there is no user_id for the shared-bearer caller — it's the
|
|
288
|
+
founder/system).
|
|
289
|
+
"""
|
|
290
|
+
# Reuse the same _MAX_ENTRIES_PER_SUMMARY cap. Founder's `~/.delimit/`
|
|
291
|
+
# typically has hundreds of files (memory.jsonl, ledger.jsonl,
|
|
292
|
+
# evidence/, daemon/, etc), so truncation is realistic.
|
|
293
|
+
home = os.environ.get("DELIMIT_HOME")
|
|
294
|
+
root = Path(home).expanduser().resolve() if home else (Path.home() / ".delimit")
|
|
295
|
+
summary: dict = {
|
|
296
|
+
"user_id": "", # shared-bearer: no tenant scope
|
|
297
|
+
"data_root": str(root),
|
|
298
|
+
"exists": root.is_dir(),
|
|
299
|
+
"files": [],
|
|
300
|
+
"dirs": [],
|
|
301
|
+
"total_size_bytes": 0,
|
|
302
|
+
"cap_bytes": None,
|
|
303
|
+
}
|
|
304
|
+
if not summary["exists"]:
|
|
305
|
+
return summary
|
|
306
|
+
|
|
307
|
+
files: list[str] = []
|
|
308
|
+
dirs: list[str] = []
|
|
309
|
+
total = 0
|
|
310
|
+
count = 0
|
|
311
|
+
try:
|
|
312
|
+
for entry in sorted(root.rglob("*")):
|
|
313
|
+
# Skip the tenants/ subdir from the shared view — that's the
|
|
314
|
+
# per-tenant tree, which the founder views via the dashboard's
|
|
315
|
+
# tenant-list / admin surface, not as part of her own data.
|
|
316
|
+
try:
|
|
317
|
+
if entry.relative_to(root).parts[:1] == ("tenants",):
|
|
318
|
+
continue
|
|
319
|
+
except ValueError:
|
|
320
|
+
pass
|
|
321
|
+
count += 1
|
|
322
|
+
if count > _MAX_ENTRIES_PER_SUMMARY:
|
|
323
|
+
break
|
|
324
|
+
rel_str = str(entry.relative_to(root))
|
|
325
|
+
if entry.is_file():
|
|
326
|
+
files.append(rel_str)
|
|
327
|
+
try:
|
|
328
|
+
total += entry.stat().st_size
|
|
329
|
+
except OSError:
|
|
330
|
+
pass
|
|
331
|
+
elif entry.is_dir():
|
|
332
|
+
dirs.append(rel_str)
|
|
333
|
+
except (OSError, PermissionError) as e:
|
|
334
|
+
logger.warning("describe_shared_data partial: %s", e)
|
|
335
|
+
|
|
336
|
+
summary["files"] = files
|
|
337
|
+
summary["dirs"] = dirs
|
|
338
|
+
summary["total_size_bytes"] = total
|
|
339
|
+
return summary
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""LED-2268 P0 Phase 0.2 — tenant-scoped filesystem layout.
|
|
2
|
+
|
|
3
|
+
The gateway today stores everything under `~/.delimit/` (memory.jsonl,
|
|
4
|
+
ledger.jsonl, evidence/, etc). That's correct for the single-tenant
|
|
5
|
+
founder install but doesn't generalize once paying customers run their
|
|
6
|
+
own tenants against a shared gateway host.
|
|
7
|
+
|
|
8
|
+
This module owns the path-resolver primitive for the per-tenant layout:
|
|
9
|
+
|
|
10
|
+
~/.delimit/ ← legacy / shared root (unchanged)
|
|
11
|
+
~/.delimit/tenants/
|
|
12
|
+
<safe-user-id>/ ← one dir per resolved API-key user
|
|
13
|
+
memory.jsonl
|
|
14
|
+
ledger.jsonl
|
|
15
|
+
evidence/
|
|
16
|
+
...
|
|
17
|
+
|
|
18
|
+
Phase 0.2 ONLY ships the resolver + sanitiser + base-dir creation. No
|
|
19
|
+
existing storage is migrated; no endpoint is yet rerouted through here.
|
|
20
|
+
Phase 0.3 will add the first endpoint that uses tenant_data_root() and
|
|
21
|
+
copy the founder's existing single-tenant data into her own tenant
|
|
22
|
+
folder.
|
|
23
|
+
|
|
24
|
+
Security note: the user_id segment comes from Supabase
|
|
25
|
+
`user_api_keys.user_id` (which itself comes from NextAuth users.id, a
|
|
26
|
+
GitHub-OAuth-derived string). It's NEVER raw user input from the
|
|
27
|
+
request — but we still sanitise it defensively so a malformed value in
|
|
28
|
+
the DB can't escape into adjacent dirs via `..` or NUL bytes.
|
|
29
|
+
"""
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import os
|
|
33
|
+
import re
|
|
34
|
+
import string
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import Optional
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Base of the whole per-tenant tree. Lives under the existing delimit
|
|
40
|
+
# home so backup/restore tooling sees it without extra wiring.
|
|
41
|
+
def _delimit_home() -> Path:
|
|
42
|
+
"""Resolve ~/.delimit/ — same convention as the rest of the gateway."""
|
|
43
|
+
home = os.environ.get("DELIMIT_HOME")
|
|
44
|
+
if home:
|
|
45
|
+
return Path(home).expanduser().resolve()
|
|
46
|
+
return Path.home() / ".delimit"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
_TENANTS_DIRNAME = "tenants"
|
|
50
|
+
# Allowed chars in a sanitised user-id segment. Conservative: ASCII
|
|
51
|
+
# alphanumerics + a small set of safe punctuation. Nothing that could
|
|
52
|
+
# be interpreted by the shell, the path parser, or a downstream tool.
|
|
53
|
+
_SAFE_CHARS = frozenset(string.ascii_letters + string.digits + "-_.")
|
|
54
|
+
# Max chars in a single user-id segment. Filesystems generally allow
|
|
55
|
+
# 255-byte basenames; we cap well below that and prefix-truncate +
|
|
56
|
+
# hash-suffix any longer input so distinct over-long IDs don't collide.
|
|
57
|
+
_MAX_SEGMENT_LEN = 64
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def safe_user_segment(user_id: str) -> Optional[str]:
|
|
61
|
+
"""Sanitise a user_id into a filesystem-safe directory name.
|
|
62
|
+
|
|
63
|
+
Returns None for empty / suspicious input so callers MUST handle
|
|
64
|
+
the rejection rather than silently writing to a default dir. The
|
|
65
|
+
intentional asymmetry from `_hash_key` (which always produces a
|
|
66
|
+
valid hex string) is that an unauthenticated request can't land
|
|
67
|
+
here — only an already-validated identity does — so a None here
|
|
68
|
+
represents a corrupted DB row, not a normal failure mode.
|
|
69
|
+
|
|
70
|
+
Strategy:
|
|
71
|
+
- Strip whitespace, lowercase.
|
|
72
|
+
- Replace any char outside the safe set with '_'.
|
|
73
|
+
- If result is empty or only underscores, reject.
|
|
74
|
+
- If result is longer than _MAX_SEGMENT_LEN, truncate + append
|
|
75
|
+
a short hash suffix so distinct over-long IDs don't collide.
|
|
76
|
+
- Reject anything that resolves to '.' or '..' (defence in depth
|
|
77
|
+
against malformed DB rows like literally the string "..").
|
|
78
|
+
"""
|
|
79
|
+
if not isinstance(user_id, str) or not user_id:
|
|
80
|
+
return None
|
|
81
|
+
s = user_id.strip().lower()
|
|
82
|
+
if not s:
|
|
83
|
+
return None
|
|
84
|
+
# Substitute unsafe chars one-for-one — preserves length / readability
|
|
85
|
+
# for the common case (NextAuth GitHub uses bare integer-ish strings).
|
|
86
|
+
safe = "".join(c if c in _SAFE_CHARS else "_" for c in s)
|
|
87
|
+
if not safe or safe.strip("_") == "":
|
|
88
|
+
return None
|
|
89
|
+
if safe in (".", ".."):
|
|
90
|
+
return None
|
|
91
|
+
if len(safe) > _MAX_SEGMENT_LEN:
|
|
92
|
+
# Truncate to (max - 9) so the suffix `-<8hex>` fits in budget.
|
|
93
|
+
import hashlib
|
|
94
|
+
digest = hashlib.sha256(s.encode("utf-8")).hexdigest()[:8]
|
|
95
|
+
safe = safe[: _MAX_SEGMENT_LEN - 9] + "-" + digest
|
|
96
|
+
return safe
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def tenants_root() -> Path:
|
|
100
|
+
"""The shared parent of all per-tenant dirs. Always under DELIMIT_HOME."""
|
|
101
|
+
return _delimit_home() / _TENANTS_DIRNAME
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def tenant_data_root(user_id: str, *, create: bool = False) -> Optional[Path]:
|
|
105
|
+
"""Resolve the on-disk root for a specific tenant's data.
|
|
106
|
+
|
|
107
|
+
Returns None if `user_id` doesn't sanitise to a usable segment.
|
|
108
|
+
Caller treats that as "unauthorised" — same shape as the validator.
|
|
109
|
+
|
|
110
|
+
If `create=True`, ensures the directory exists (mkdir -p, mode 0700).
|
|
111
|
+
Default is read-only resolve so this can be called on hot paths
|
|
112
|
+
without making syscalls when the dir is already present.
|
|
113
|
+
"""
|
|
114
|
+
seg = safe_user_segment(user_id)
|
|
115
|
+
if seg is None:
|
|
116
|
+
return None
|
|
117
|
+
root = tenants_root() / seg
|
|
118
|
+
# Defence in depth: ensure the resolved path stays under tenants_root.
|
|
119
|
+
# Belt-and-braces against an unforeseen sanitiser bypass.
|
|
120
|
+
try:
|
|
121
|
+
if tenants_root().resolve() not in root.resolve().parents and \
|
|
122
|
+
root.resolve() != tenants_root().resolve():
|
|
123
|
+
return None
|
|
124
|
+
except (OSError, RuntimeError):
|
|
125
|
+
return None
|
|
126
|
+
if create:
|
|
127
|
+
root.mkdir(parents=True, exist_ok=True, mode=0o700)
|
|
128
|
+
# Ensure tenants_root itself has the right mode too — first-
|
|
129
|
+
# ever tenant write would otherwise inherit umask.
|
|
130
|
+
try:
|
|
131
|
+
tenants_root().chmod(0o700)
|
|
132
|
+
except OSError:
|
|
133
|
+
pass
|
|
134
|
+
return root
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def list_tenants() -> list[str]:
|
|
138
|
+
"""List the segment names of all tenants currently with on-disk data.
|
|
139
|
+
|
|
140
|
+
Used by maintenance / audit / backup tooling. Returns an empty list
|
|
141
|
+
when no tenants exist yet (the directory simply doesn't exist).
|
|
142
|
+
"""
|
|
143
|
+
root = tenants_root()
|
|
144
|
+
if not root.is_dir():
|
|
145
|
+
return []
|
|
146
|
+
out: list[str] = []
|
|
147
|
+
for entry in root.iterdir():
|
|
148
|
+
if entry.is_dir() and entry.name and not entry.name.startswith("."):
|
|
149
|
+
out.append(entry.name)
|
|
150
|
+
return sorted(out)
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "delimit-cli",
|
|
3
3
|
"mcpName": "io.github.delimit-ai/delimit-mcp-server",
|
|
4
|
-
"version": "4.6.
|
|
4
|
+
"version": "4.6.1",
|
|
5
5
|
"description": "Unify Claude Code, Codex, Cursor, and Gemini CLI with persistent context, governance, and multi-model debate.",
|
|
6
6
|
"main": "index.js",
|
|
7
7
|
"files": [
|
|
@@ -42,6 +42,9 @@
|
|
|
42
42
|
"!scripts/demo-v420-clean.sh",
|
|
43
43
|
"!scripts/demo-v420-deliberation.sh",
|
|
44
44
|
"!scripts/sync-gateway.sh",
|
|
45
|
+
"!scripts/build-license-core.sh",
|
|
46
|
+
"!scripts/security-check.sh",
|
|
47
|
+
"!scripts/test-license-core-so.sh",
|
|
45
48
|
"!gateway/ai/continuity.py",
|
|
46
49
|
"server.json",
|
|
47
50
|
"README.md",
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# LED-1259: Compile gateway/ai/license_core.py to a native .so via Nuitka,
|
|
3
|
-
# then strip the plaintext .py from the bundle so customers cannot grep
|
|
4
|
-
# the validation logic for bypass identifiers.
|
|
5
|
-
#
|
|
6
|
-
# Linux-only first ship. Mac/Windows expansion is filed as a follow-up
|
|
7
|
-
# ledger item — non-linux customers will hit the Python fallback in
|
|
8
|
-
# license.py (degraded Pro features) until we ship per-platform binaries.
|
|
9
|
-
#
|
|
10
|
-
# Idempotent: safe to re-run; will rebuild on every invocation.
|
|
11
|
-
|
|
12
|
-
set -euo pipefail
|
|
13
|
-
|
|
14
|
-
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
15
|
-
NPM_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
16
|
-
AI_DIR="$NPM_ROOT/gateway/ai"
|
|
17
|
-
SRC="$AI_DIR/license_core.py"
|
|
18
|
-
|
|
19
|
-
# ── Platform gate ────────────────────────────────────────────────────
|
|
20
|
-
UNAME_S="$(uname -s)"
|
|
21
|
-
UNAME_M="$(uname -m)"
|
|
22
|
-
if [ "$UNAME_S" != "Linux" ]; then
|
|
23
|
-
echo "⚠️ build-license-core: non-Linux host ($UNAME_S) — skipping compile."
|
|
24
|
-
echo " First ship is linux-only. The bundle will fall back to .py."
|
|
25
|
-
exit 0
|
|
26
|
-
fi
|
|
27
|
-
|
|
28
|
-
if [ ! -f "$SRC" ]; then
|
|
29
|
-
echo "❌ Source not found: $SRC"
|
|
30
|
-
exit 1
|
|
31
|
-
fi
|
|
32
|
-
|
|
33
|
-
# ── Toolchain check ──────────────────────────────────────────────────
|
|
34
|
-
PY="${PYTHON:-python3}"
|
|
35
|
-
if ! command -v "$PY" >/dev/null 2>&1; then
|
|
36
|
-
echo "❌ python3 not found"
|
|
37
|
-
exit 1
|
|
38
|
-
fi
|
|
39
|
-
|
|
40
|
-
PY_VER="$($PY -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
|
|
41
|
-
echo "🔧 build-license-core: python=$PY ($PY_VER), arch=$UNAME_M"
|
|
42
|
-
|
|
43
|
-
if ! "$PY" -m nuitka --version >/dev/null 2>&1; then
|
|
44
|
-
echo "📦 nuitka not installed — installing via pip..."
|
|
45
|
-
"$PY" -m pip install --quiet --user nuitka
|
|
46
|
-
fi
|
|
47
|
-
|
|
48
|
-
NUITKA_VER="$($PY -m nuitka --version 2>&1 | head -1)"
|
|
49
|
-
echo " nuitka=$NUITKA_VER"
|
|
50
|
-
|
|
51
|
-
# ── Compile ──────────────────────────────────────────────────────────
|
|
52
|
-
echo "🔨 Compiling license_core.py → .so (this takes ~30s)..."
|
|
53
|
-
cd "$AI_DIR"
|
|
54
|
-
"$PY" -m nuitka --module --quiet --remove-output --output-dir=. license_core.py
|
|
55
|
-
|
|
56
|
-
# ── Verify output ────────────────────────────────────────────────────
|
|
57
|
-
SO_FILE="$(ls -1 license_core.cpython-*-*.so 2>/dev/null | head -1 || true)"
|
|
58
|
-
if [ -z "$SO_FILE" ] || [ ! -f "$SO_FILE" ]; then
|
|
59
|
-
echo "❌ Compile failed — no .so produced in $AI_DIR"
|
|
60
|
-
ls -la "$AI_DIR"/license_core* 2>&1 || true
|
|
61
|
-
exit 1
|
|
62
|
-
fi
|
|
63
|
-
|
|
64
|
-
SO_SIZE="$(stat -c%s "$SO_FILE")"
|
|
65
|
-
echo " ✅ produced: $SO_FILE ($SO_SIZE bytes)"
|
|
66
|
-
|
|
67
|
-
# ── Bypass-identifier scan ───────────────────────────────────────────
|
|
68
|
-
# Customers must not be able to `strings | grep` the .so for known
|
|
69
|
-
# bypass class names. Fail the build if any leak through.
|
|
70
|
-
BYPASS_HITS="$(strings "$SO_FILE" | grep -iE 'DELIMIT_TEST_MODE|DELIMIT_INTERNAL_LICENSE_KEY|JAMSONS' || true)"
|
|
71
|
-
if [ -n "$BYPASS_HITS" ]; then
|
|
72
|
-
echo "❌ Bypass identifiers found in compiled .so:"
|
|
73
|
-
echo "$BYPASS_HITS"
|
|
74
|
-
exit 1
|
|
75
|
-
fi
|
|
76
|
-
echo " ✅ strings-grep clean (no bypass identifiers)"
|
|
77
|
-
|
|
78
|
-
# ── Drop the plaintext source from the bundle ────────────────────────
|
|
79
|
-
# .npmignore + package.json will also exclude it, but removing here is
|
|
80
|
-
# belt-and-suspenders so dev/test inspection of the bundle dir matches
|
|
81
|
-
# what gets packed.
|
|
82
|
-
rm -f "$AI_DIR/license_core.py"
|
|
83
|
-
echo " ✅ removed plaintext license_core.py from bundle"
|
|
84
|
-
|
|
85
|
-
echo "✅ build-license-core complete: $SO_FILE"
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# Pre-publish security check — blocks npm publish if secrets are found
|
|
3
|
-
# Run: bash scripts/security-check.sh
|
|
4
|
-
|
|
5
|
-
set -euo pipefail
|
|
6
|
-
|
|
7
|
-
echo "🔍 Delimit pre-publish security scan..."
|
|
8
|
-
|
|
9
|
-
FAIL=0
|
|
10
|
-
|
|
11
|
-
# Pack to temp and scan the actual tarball contents
|
|
12
|
-
TMPDIR=$(mktemp -d)
|
|
13
|
-
npm pack --pack-destination "$TMPDIR" --quiet 2>/dev/null
|
|
14
|
-
TARBALL=$(ls "$TMPDIR"/*.tgz)
|
|
15
|
-
tar -xzf "$TARBALL" -C "$TMPDIR"
|
|
16
|
-
|
|
17
|
-
# 1. Credential patterns
|
|
18
|
-
echo -n " Credentials... "
|
|
19
|
-
if grep -rEi '(password|passwd|secret|api_key|apikey)\s*[:=]\s*["\x27][^"\x27]{4,}' "$TMPDIR/package/" --include="*.py" --include="*.js" --include="*.json" 2>/dev/null | grep -v 'environ\|getenv\|process\.env\|os\.environ\|<configured\|example\|placeholder\|REDACTED\|\${credentials\|credentials\.\|security-scan-ignore'; then
|
|
20
|
-
echo "❌ FOUND CREDENTIALS"
|
|
21
|
-
FAIL=1
|
|
22
|
-
else
|
|
23
|
-
echo "✅ clean"
|
|
24
|
-
fi
|
|
25
|
-
|
|
26
|
-
# 2. Blocklist terms
|
|
27
|
-
echo -n " Blocklist... "
|
|
28
|
-
BLOCKLIST="jamsonsholdings|Bladabah|Domainvested26|Delimit26|home/jamsons|infracore|crypttrx|\.wr_env"
|
|
29
|
-
if grep -rEi "$BLOCKLIST" "$TMPDIR/package/" --include="*.py" --include="*.js" --include="*.json" 2>/dev/null; then
|
|
30
|
-
echo "❌ BLOCKED TERMS FOUND"
|
|
31
|
-
FAIL=1
|
|
32
|
-
else
|
|
33
|
-
echo "✅ clean"
|
|
34
|
-
fi
|
|
35
|
-
|
|
36
|
-
# 3. PII (email addresses that aren't examples)
|
|
37
|
-
echo -n " PII... "
|
|
38
|
-
if grep -rEi '[a-z0-9._%+-]+@(gmail|yahoo|hotmail|outlook|proton|jamsons|wire\.report|domainvested)' "$TMPDIR/package/" --include="*.py" --include="*.js" --include="*.json" 2>/dev/null | grep -v "example\|placeholder\|<configured\|noreply\|e\.g\.\|docstring\|Args:\|Credential resolution"; then
|
|
39
|
-
echo "❌ PII FOUND"
|
|
40
|
-
FAIL=1
|
|
41
|
-
else
|
|
42
|
-
echo "✅ clean"
|
|
43
|
-
fi
|
|
44
|
-
|
|
45
|
-
# 4. Proprietary files that shouldn't ship
|
|
46
|
-
echo -n " Proprietary files... "
|
|
47
|
-
PROPRIETARY="social_target\.py|social\.py|founding_users\.py|inbox_daemon\.py|deliberation\.py"
|
|
48
|
-
if find "$TMPDIR/package/" -name "*.py" | grep -Ei "$PROPRIETARY" 2>/dev/null; then
|
|
49
|
-
echo "❌ PROPRIETARY FILES IN PACKAGE"
|
|
50
|
-
FAIL=1
|
|
51
|
-
else
|
|
52
|
-
echo "✅ clean"
|
|
53
|
-
fi
|
|
54
|
-
|
|
55
|
-
# Cleanup
|
|
56
|
-
rm -rf "$TMPDIR"
|
|
57
|
-
|
|
58
|
-
if [ $FAIL -ne 0 ]; then
|
|
59
|
-
echo ""
|
|
60
|
-
echo "❌ SECURITY CHECK FAILED — do not publish"
|
|
61
|
-
exit 1
|
|
62
|
-
fi
|
|
63
|
-
|
|
64
|
-
echo ""
|
|
65
|
-
echo "✅ All security checks passed"
|
|
66
|
-
exit 0
|