delimit-cli 4.5.13 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +48 -0
- package/README.md +9 -8
- package/bin/delimit-cli.js +179 -4
- package/bin/delimit-setup.js +46 -6
- package/gateway/ai/_compile_status.py +154 -0
- package/gateway/ai/agent_dispatch.py +41 -0
- package/gateway/ai/backends/git_health.py +175 -0
- package/gateway/ai/backends/tools_infra.py +163 -10
- package/gateway/ai/cli_contract.py +185 -0
- package/gateway/ai/daemon.py +10 -0
- package/gateway/ai/daily_digest.py +1 -2
- package/gateway/ai/delimit_daemon.py +67 -0
- package/gateway/ai/dispatch_gate.py +399 -0
- package/gateway/ai/governance.py +181 -0
- package/gateway/ai/heartbeat.py +290 -0
- package/gateway/ai/hot_reload.py +1 -2
- package/gateway/ai/led193_daemon/executor.py +9 -0
- package/gateway/ai/ledger_manager.py +90 -4
- package/gateway/ai/ledger_proof.py +127 -0
- package/gateway/ai/license.py +132 -47
- package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
- package/gateway/ai/license_core.pyi +1 -1
- package/gateway/ai/notify.py +39 -0
- package/gateway/ai/outreach_loop_daemon.py +349 -0
- package/gateway/ai/outreach_substantive.py +1437 -0
- package/gateway/ai/pro_tools.yaml +167 -0
- package/gateway/ai/reaper.py +70 -0
- package/gateway/ai/reddit_scanner.py +17 -6
- package/gateway/ai/sensing/schema.py +1 -1
- package/gateway/ai/sensing/signal_store.py +0 -1
- package/gateway/ai/server.py +5490 -1602
- package/gateway/ai/social_capability/fit_floor.py +114 -12
- package/gateway/ai/social_queue.py +166 -10
- package/gateway/ai/tdqs_lint.py +611 -0
- package/gateway/ai/tenant_auth.py +329 -0
- package/gateway/ai/tenant_data.py +339 -0
- package/gateway/ai/tenant_paths.py +150 -0
- package/gateway/ai/usage_allowlist.py +198 -0
- package/gateway/ai/workers/base.py +2 -2
- package/gateway/ai/workers/executor.py +32 -3
- package/gateway/ai/workers/outreach_drafter.py +0 -1
- package/gateway/ai/workers/pr_drafter.py +0 -1
- package/gateway/ai/x_ranker.py +12 -2
- package/gateway/core/json_schema_diff.py +25 -1
- package/lib/auth-signin.js +136 -0
- package/lib/auth-signout.js +169 -0
- package/lib/delimit-template.js +11 -0
- package/lib/migration-2092-banner.js +213 -0
- package/package.json +5 -2
- package/server.json +4 -4
- package/scripts/build-license-core.sh +0 -85
- package/scripts/security-check.sh +0 -66
- package/scripts/test-license-core-so.sh +0 -107
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""LED-2268 P0 Phase 0.2 — tenant-scoped filesystem layout.
|
|
2
|
+
|
|
3
|
+
The gateway today stores everything under `~/.delimit/` (memory.jsonl,
|
|
4
|
+
ledger.jsonl, evidence/, etc). That's correct for the single-tenant
|
|
5
|
+
founder install but doesn't generalize once paying customers run their
|
|
6
|
+
own tenants against a shared gateway host.
|
|
7
|
+
|
|
8
|
+
This module owns the path-resolver primitive for the per-tenant layout:
|
|
9
|
+
|
|
10
|
+
~/.delimit/ ← legacy / shared root (unchanged)
|
|
11
|
+
~/.delimit/tenants/
|
|
12
|
+
<safe-user-id>/ ← one dir per resolved API-key user
|
|
13
|
+
memory.jsonl
|
|
14
|
+
ledger.jsonl
|
|
15
|
+
evidence/
|
|
16
|
+
...
|
|
17
|
+
|
|
18
|
+
Phase 0.2 ONLY ships the resolver + sanitiser + base-dir creation. No
|
|
19
|
+
existing storage is migrated; no endpoint is yet rerouted through here.
|
|
20
|
+
Phase 0.3 will add the first endpoint that uses tenant_data_root() and
|
|
21
|
+
copy the founder's existing single-tenant data into her own tenant
|
|
22
|
+
folder.
|
|
23
|
+
|
|
24
|
+
Security note: the user_id segment comes from Supabase
|
|
25
|
+
`user_api_keys.user_id` (which itself comes from NextAuth users.id, a
|
|
26
|
+
GitHub-OAuth-derived string). It's NEVER raw user input from the
|
|
27
|
+
request — but we still sanitise it defensively so a malformed value in
|
|
28
|
+
the DB can't escape into adjacent dirs via `..` or NUL bytes.
|
|
29
|
+
"""
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import os
|
|
33
|
+
import re
|
|
34
|
+
import string
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import Optional
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Base of the whole per-tenant tree. Lives under the existing delimit
|
|
40
|
+
# home so backup/restore tooling sees it without extra wiring.
|
|
41
|
+
def _delimit_home() -> Path:
|
|
42
|
+
"""Resolve ~/.delimit/ — same convention as the rest of the gateway."""
|
|
43
|
+
home = os.environ.get("DELIMIT_HOME")
|
|
44
|
+
if home:
|
|
45
|
+
return Path(home).expanduser().resolve()
|
|
46
|
+
return Path.home() / ".delimit"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
_TENANTS_DIRNAME = "tenants"
|
|
50
|
+
# Allowed chars in a sanitised user-id segment. Conservative: ASCII
|
|
51
|
+
# alphanumerics + a small set of safe punctuation. Nothing that could
|
|
52
|
+
# be interpreted by the shell, the path parser, or a downstream tool.
|
|
53
|
+
_SAFE_CHARS = frozenset(string.ascii_letters + string.digits + "-_.")
|
|
54
|
+
# Max chars in a single user-id segment. Filesystems generally allow
|
|
55
|
+
# 255-byte basenames; we cap well below that and prefix-truncate +
|
|
56
|
+
# hash-suffix any longer input so distinct over-long IDs don't collide.
|
|
57
|
+
_MAX_SEGMENT_LEN = 64
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def safe_user_segment(user_id: str) -> Optional[str]:
|
|
61
|
+
"""Sanitise a user_id into a filesystem-safe directory name.
|
|
62
|
+
|
|
63
|
+
Returns None for empty / suspicious input so callers MUST handle
|
|
64
|
+
the rejection rather than silently writing to a default dir. The
|
|
65
|
+
intentional asymmetry from `_hash_key` (which always produces a
|
|
66
|
+
valid hex string) is that an unauthenticated request can't land
|
|
67
|
+
here — only an already-validated identity does — so a None here
|
|
68
|
+
represents a corrupted DB row, not a normal failure mode.
|
|
69
|
+
|
|
70
|
+
Strategy:
|
|
71
|
+
- Strip whitespace, lowercase.
|
|
72
|
+
- Replace any char outside the safe set with '_'.
|
|
73
|
+
- If result is empty or only underscores, reject.
|
|
74
|
+
- If result is longer than _MAX_SEGMENT_LEN, truncate + append
|
|
75
|
+
a short hash suffix so distinct over-long IDs don't collide.
|
|
76
|
+
- Reject anything that resolves to '.' or '..' (defence in depth
|
|
77
|
+
against malformed DB rows like literally the string "..").
|
|
78
|
+
"""
|
|
79
|
+
if not isinstance(user_id, str) or not user_id:
|
|
80
|
+
return None
|
|
81
|
+
s = user_id.strip().lower()
|
|
82
|
+
if not s:
|
|
83
|
+
return None
|
|
84
|
+
# Substitute unsafe chars one-for-one — preserves length / readability
|
|
85
|
+
# for the common case (NextAuth GitHub uses bare integer-ish strings).
|
|
86
|
+
safe = "".join(c if c in _SAFE_CHARS else "_" for c in s)
|
|
87
|
+
if not safe or safe.strip("_") == "":
|
|
88
|
+
return None
|
|
89
|
+
if safe in (".", ".."):
|
|
90
|
+
return None
|
|
91
|
+
if len(safe) > _MAX_SEGMENT_LEN:
|
|
92
|
+
# Truncate to (max - 9) so the suffix `-<8hex>` fits in budget.
|
|
93
|
+
import hashlib
|
|
94
|
+
digest = hashlib.sha256(s.encode("utf-8")).hexdigest()[:8]
|
|
95
|
+
safe = safe[: _MAX_SEGMENT_LEN - 9] + "-" + digest
|
|
96
|
+
return safe
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def tenants_root() -> Path:
|
|
100
|
+
"""The shared parent of all per-tenant dirs. Always under DELIMIT_HOME."""
|
|
101
|
+
return _delimit_home() / _TENANTS_DIRNAME
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def tenant_data_root(user_id: str, *, create: bool = False) -> Optional[Path]:
|
|
105
|
+
"""Resolve the on-disk root for a specific tenant's data.
|
|
106
|
+
|
|
107
|
+
Returns None if `user_id` doesn't sanitise to a usable segment.
|
|
108
|
+
Caller treats that as "unauthorised" — same shape as the validator.
|
|
109
|
+
|
|
110
|
+
If `create=True`, ensures the directory exists (mkdir -p, mode 0700).
|
|
111
|
+
Default is read-only resolve so this can be called on hot paths
|
|
112
|
+
without making syscalls when the dir is already present.
|
|
113
|
+
"""
|
|
114
|
+
seg = safe_user_segment(user_id)
|
|
115
|
+
if seg is None:
|
|
116
|
+
return None
|
|
117
|
+
root = tenants_root() / seg
|
|
118
|
+
# Defence in depth: ensure the resolved path stays under tenants_root.
|
|
119
|
+
# Belt-and-braces against an unforeseen sanitiser bypass.
|
|
120
|
+
try:
|
|
121
|
+
if tenants_root().resolve() not in root.resolve().parents and \
|
|
122
|
+
root.resolve() != tenants_root().resolve():
|
|
123
|
+
return None
|
|
124
|
+
except (OSError, RuntimeError):
|
|
125
|
+
return None
|
|
126
|
+
if create:
|
|
127
|
+
root.mkdir(parents=True, exist_ok=True, mode=0o700)
|
|
128
|
+
# Ensure tenants_root itself has the right mode too — first-
|
|
129
|
+
# ever tenant write would otherwise inherit umask.
|
|
130
|
+
try:
|
|
131
|
+
tenants_root().chmod(0o700)
|
|
132
|
+
except OSError:
|
|
133
|
+
pass
|
|
134
|
+
return root
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def list_tenants() -> list[str]:
|
|
138
|
+
"""List the segment names of all tenants currently with on-disk data.
|
|
139
|
+
|
|
140
|
+
Used by maintenance / audit / backup tooling. Returns an empty list
|
|
141
|
+
when no tenants exist yet (the directory simply doesn't exist).
|
|
142
|
+
"""
|
|
143
|
+
root = tenants_root()
|
|
144
|
+
if not root.is_dir():
|
|
145
|
+
return []
|
|
146
|
+
out: list[str] = []
|
|
147
|
+
for entry in root.iterdir():
|
|
148
|
+
if entry.is_dir() and entry.name and not entry.name.startswith("."):
|
|
149
|
+
out.append(entry.name)
|
|
150
|
+
return sorted(out)
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Canonical usage allowlist for first-person experience claims in social drafts.
|
|
2
|
+
|
|
3
|
+
Single source of truth for the LED-1334 usage gate (2026-05-12 deliberation,
|
|
4
|
+
substantive unanimous). When a generated reply contains a first-person
|
|
5
|
+
experience clause (e.g. "saved me", "bit me on similar projects") paired with
|
|
6
|
+
a named third-party tool/service NOT on this allowlist, the validator blocks
|
|
7
|
+
the draft. The draft generation prompt is rendered from the same constant so
|
|
8
|
+
the prompt and validator cannot drift.
|
|
9
|
+
|
|
10
|
+
Why this exists: a Reddit draft on r/mcp/1t87arl fabricated "Pinning the
|
|
11
|
+
OpenAPI spec version you generated against and diffing on every Zoom release
|
|
12
|
+
saved me a bunch of mystery 400s." We don't use Zoom. The prior LED-1332
|
|
13
|
+
prompt rules and LED-1333 hardened claude CLI drafter both failed to catch it
|
|
14
|
+
because the OpenAPI-diff tactic IS real, but the claim of having USED IT ON
|
|
15
|
+
ZOOM was invented.
|
|
16
|
+
|
|
17
|
+
How to edit: change USAGE_ALLOWLIST below. The prompt's GROUND-TRUTH section
|
|
18
|
+
re-renders from this constant on every draft; the validator imports the same
|
|
19
|
+
constant; the parity test in tests/test_usage_gate.py asserts both consumers
|
|
20
|
+
see identical entries.
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import re
|
|
25
|
+
from typing import Iterable
|
|
26
|
+
|
|
27
|
+
# Canonical usage allowlist. Lowercase, normalized. One entry per tool/service
|
|
28
|
+
# that the founder ACTUALLY uses in their daily work and can speak to from
|
|
29
|
+
# lived experience. Adding an entry is a deliberate code change; PRs editing
|
|
30
|
+
# this file should describe the lived usage that backs the addition.
|
|
31
|
+
USAGE_ALLOWLIST: frozenset[str] = frozenset({
|
|
32
|
+
# Coding agents (founder uses daily)
|
|
33
|
+
"claude code", "claude-code", "claude",
|
|
34
|
+
"codex", "codex cli", "codex-cli",
|
|
35
|
+
"gemini", "gemini cli", "gemini-cli",
|
|
36
|
+
"cursor",
|
|
37
|
+
# Core protocol / standards we ship on
|
|
38
|
+
"openapi", "openapi spec", "openapi schema",
|
|
39
|
+
"mcp", "model context protocol", "mcp server",
|
|
40
|
+
"github actions", "github action",
|
|
41
|
+
# Attestation stack we ship
|
|
42
|
+
"sigstore", "cosign", "rekor",
|
|
43
|
+
# Languages / runtimes we ship in
|
|
44
|
+
"python", "typescript", "javascript", "node", "npm",
|
|
45
|
+
# Cloud / deploy we use
|
|
46
|
+
"vercel",
|
|
47
|
+
# API providers we call from the deliberation engine
|
|
48
|
+
"anthropic api", "openai api", "vertex ai",
|
|
49
|
+
# Our own product surface
|
|
50
|
+
"delimit", "delimit-cli", "delimit-action", "delimit-mcp-server",
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# First-person experience clauses that imply the speaker has lived/used the
|
|
55
|
+
# named subject. Detection here means "the draft is claiming standing to
|
|
56
|
+
# speak from use" — which must be backed by an allowlist match to be allowed.
|
|
57
|
+
_FIRST_PERSON_EXPERIENCE = re.compile(
|
|
58
|
+
r"\b(?:"
|
|
59
|
+
r"saved me|bit me|got me|caught me|surprised me|burned me|tripped me up|"
|
|
60
|
+
r"what I[’']d do|the way I caught|in my experience|"
|
|
61
|
+
r"when I (?:ran|used|tried|hit|wrapped|implemented|deployed|shipped|integrated)|"
|
|
62
|
+
r"I (?:ran into|tripped over|got bit by|hit (?:this|that|the))|"
|
|
63
|
+
r"I personally|from my own work|on similar projects|"
|
|
64
|
+
r"mine still loads|mine kept (?:loading|breaking|drifting)|"
|
|
65
|
+
r"I had to|I ended up"
|
|
66
|
+
r")\b",
|
|
67
|
+
re.IGNORECASE,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# Named-product extraction: proper-noun tokens, optionally compound, optionally
|
|
72
|
+
# with technical suffix (API/SDK/CLI/MCP). Tuned for permissive capture; the
|
|
73
|
+
# stopword set below filters obvious false positives.
|
|
74
|
+
_NAMED_PRODUCT = re.compile(
|
|
75
|
+
r"\b([A-Z][a-zA-Z0-9.+-]*(?:\s+(?:[A-Z][a-zA-Z0-9.+-]*|API|SDK|CLI|MCP))*)\b"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# Tokens that pass the proper-noun regex but are not actually third-party
|
|
80
|
+
# product names. Keep this conservative — over-stopword'ing weakens the
|
|
81
|
+
# guardrail. Order: sentence-starters, generic verbs, common acronyms,
|
|
82
|
+
# concept words, our own ecosystem.
|
|
83
|
+
_NAMED_PRODUCT_STOPWORDS: frozenset[str] = frozenset({
|
|
84
|
+
# Sentence-starters / pronouns
|
|
85
|
+
"the", "a", "an", "i", "it", "they", "we", "you", "this", "that",
|
|
86
|
+
# Question / conditional / temporal sentence-starters
|
|
87
|
+
"when", "where", "why", "what", "who", "how", "which",
|
|
88
|
+
"if", "but", "and", "or", "so", "then", "also",
|
|
89
|
+
"since", "while", "before", "after", "until", "unless",
|
|
90
|
+
"even", "though", "although",
|
|
91
|
+
# Common verbs in capitalized sentence-start position
|
|
92
|
+
"wrapping", "pinning", "running", "using", "trying", "diffing",
|
|
93
|
+
"once", "first", "second", "third", "next", "then", "yes", "no",
|
|
94
|
+
"yeah", "sure", "ok", "okay", "honestly", "curious", "neat", "cool",
|
|
95
|
+
# Technical primitives (concepts, not products)
|
|
96
|
+
"english", "ascii", "json", "yaml", "xml", "html", "css",
|
|
97
|
+
"http", "https", "rest", "graphql", "websocket", "soap", "grpc",
|
|
98
|
+
"tcp", "udp", "ssh", "tls", "ssl", "dns",
|
|
99
|
+
"ci", "cd", "pr", "prs", "pull request", "merge", "commit",
|
|
100
|
+
# OS / platforms
|
|
101
|
+
"linux", "windows", "macos", "ios", "android", "unix",
|
|
102
|
+
# Concept words people capitalize but aren't products
|
|
103
|
+
"gateway", "proxy", "middleware", "scanner", "drafter",
|
|
104
|
+
"markdown", "schema", "endpoint", "endpoints",
|
|
105
|
+
"agent", "agents", "tool", "tools", "server", "servers", "client", "clients",
|
|
106
|
+
# Generic words in sentence position
|
|
107
|
+
"spend", "auto", "neat", "cool", "nice", "great", "interesting",
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _normalize(token: str) -> str:
|
|
112
|
+
"""Lowercase + collapse internal whitespace for allowlist comparison."""
|
|
113
|
+
return " ".join(token.lower().split())
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def is_on_allowlist(product: str) -> bool:
|
|
117
|
+
"""Return True if `product` matches an allowlist entry.
|
|
118
|
+
|
|
119
|
+
Match is case-insensitive substring in both directions: a longer named
|
|
120
|
+
product like "Claude Code SDK" matches the "claude code" allowlist entry,
|
|
121
|
+
and a shorter named product like "MCP" matches "mcp server" by being
|
|
122
|
+
contained in the allowlist entry.
|
|
123
|
+
"""
|
|
124
|
+
normalized = _normalize(product)
|
|
125
|
+
if not normalized:
|
|
126
|
+
return False
|
|
127
|
+
for entry in USAGE_ALLOWLIST:
|
|
128
|
+
if entry in normalized or normalized in entry:
|
|
129
|
+
return True
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def extract_named_products(text: str) -> list[str]:
|
|
134
|
+
"""Extract candidate third-party product names from draft text.
|
|
135
|
+
|
|
136
|
+
Returns deduped list of capitalized tokens that survived stopword
|
|
137
|
+
filtering. The list is the candidate set the usage gate then checks
|
|
138
|
+
against the allowlist.
|
|
139
|
+
"""
|
|
140
|
+
if not text:
|
|
141
|
+
return []
|
|
142
|
+
raw = _NAMED_PRODUCT.findall(text)
|
|
143
|
+
out: list[str] = []
|
|
144
|
+
seen: set[str] = set()
|
|
145
|
+
for token in raw:
|
|
146
|
+
normalized = _normalize(token)
|
|
147
|
+
if not normalized or normalized in _NAMED_PRODUCT_STOPWORDS:
|
|
148
|
+
continue
|
|
149
|
+
# Drop ANY compound where ANY word is a stopword (sentence-starter
|
|
150
|
+
# capitalization sweeping "When I ran" / "Once I hit" into a fake
|
|
151
|
+
# compound product). Real product compounds like "Claude Code" /
|
|
152
|
+
# "OpenAPI Spec" have no stopwords in them.
|
|
153
|
+
parts = normalized.split(" ")
|
|
154
|
+
if any(p in _NAMED_PRODUCT_STOPWORDS for p in parts):
|
|
155
|
+
continue
|
|
156
|
+
if normalized in seen:
|
|
157
|
+
continue
|
|
158
|
+
seen.add(normalized)
|
|
159
|
+
out.append(token)
|
|
160
|
+
return out
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def find_off_allowlist_experience_claims(text: str) -> list[dict]:
|
|
164
|
+
"""Detect first-person experience clauses paired with off-allowlist products.
|
|
165
|
+
|
|
166
|
+
The LED-1334 usage gate. If the text contains a first-person experience
|
|
167
|
+
clause AND mentions a named product not on the allowlist, returns a list
|
|
168
|
+
of {clause, product} dicts. Empty list means the draft passes.
|
|
169
|
+
|
|
170
|
+
Returns the FULL list (not first-match) so the orchestrator can surface
|
|
171
|
+
all violations when blocking.
|
|
172
|
+
"""
|
|
173
|
+
if not text:
|
|
174
|
+
return []
|
|
175
|
+
clause_match = _FIRST_PERSON_EXPERIENCE.search(text)
|
|
176
|
+
if not clause_match:
|
|
177
|
+
return []
|
|
178
|
+
clause = clause_match.group(0)
|
|
179
|
+
products = extract_named_products(text)
|
|
180
|
+
violations: list[dict] = []
|
|
181
|
+
for product in products:
|
|
182
|
+
if is_on_allowlist(product):
|
|
183
|
+
continue
|
|
184
|
+
violations.append({"clause": clause, "product": product})
|
|
185
|
+
return violations
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def format_for_prompt() -> str:
|
|
189
|
+
"""Render the allowlist as a human-readable list for system-prompt injection.
|
|
190
|
+
|
|
191
|
+
Output is deterministic (alphabetized) so prompt parity tests stay stable.
|
|
192
|
+
"""
|
|
193
|
+
return "\n".join(f" - {entry}" for entry in sorted(USAGE_ALLOWLIST))
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def allowlist_as_sorted_tuple() -> tuple[str, ...]:
|
|
197
|
+
"""Return the allowlist as a sorted tuple for parity assertions in tests."""
|
|
198
|
+
return tuple(sorted(USAGE_ALLOWLIST))
|
|
@@ -20,7 +20,7 @@ import time
|
|
|
20
20
|
from abc import ABC, abstractmethod
|
|
21
21
|
from dataclasses import dataclass, field, asdict
|
|
22
22
|
from pathlib import Path
|
|
23
|
-
from typing import Any, Dict, List
|
|
23
|
+
from typing import Any, Dict, List
|
|
24
24
|
|
|
25
25
|
logger = logging.getLogger("delimit.workers")
|
|
26
26
|
|
|
@@ -121,7 +121,7 @@ class Worker(ABC):
|
|
|
121
121
|
Must return a WorkerResult with an artifact (work order).
|
|
122
122
|
Must NOT modify any state — output only.
|
|
123
123
|
"""
|
|
124
|
-
|
|
124
|
+
raise NotImplementedError("Subclasses must implement execute()")
|
|
125
125
|
|
|
126
126
|
def run(self, ledger_item: Dict[str, Any]) -> WorkerResult:
|
|
127
127
|
"""Run the worker with timing + audit trail."""
|
|
@@ -122,9 +122,38 @@ PROPOSE_PR_AUTHOR_NAME = "delimit-bot"
|
|
|
122
122
|
PROPOSE_PR_AUTHOR_EMAIL = "bot@delimit.ai"
|
|
123
123
|
# Hard cap on patch size — rejects accidental mega-diffs that would
|
|
124
124
|
# require a different review workflow anyway.
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
125
|
+
#
|
|
126
|
+
# LED-2238: bumped from 256KB → 5MB per file and 1MB → 50MB per PR
|
|
127
|
+
# after the original limits blocked the autonomous-build pipeline on
|
|
128
|
+
# any task touching ai/server.py (542KB), ai/loop_engine.py, or other
|
|
129
|
+
# normal-sized gateway files. The old limits assumed payloads went
|
|
130
|
+
# through GitHub's content-API (which has its own ~50MB cap), but
|
|
131
|
+
# _act_propose_pr writes via local git (`write_text` → `git add` →
|
|
132
|
+
# `git commit` → `git push`), which has no API-payload constraint —
|
|
133
|
+
# real git push supports multi-GB content. The caps stay as defense
|
|
134
|
+
# against runaway model output, just at a level that doesn't reject
|
|
135
|
+
# realistic source files.
|
|
136
|
+
#
|
|
137
|
+
# All three caps are overridable via env vars so an operator with a
|
|
138
|
+
# legitimately-larger payload can opt in without a code change:
|
|
139
|
+
# DELIMIT_PROPOSE_PR_MAX_FILES (default 50)
|
|
140
|
+
# DELIMIT_PROPOSE_PR_MAX_FILE_BYTES (default 5_242_880 = 5MB)
|
|
141
|
+
# DELIMIT_PROPOSE_PR_MAX_TOTAL_BYTES (default 52_428_800 = 50MB)
|
|
142
|
+
def _env_int(name: str, default: int) -> int:
|
|
143
|
+
"""Read positive integer from env var; fall back to default on bad/unset."""
|
|
144
|
+
raw = os.environ.get(name, "").strip()
|
|
145
|
+
if not raw:
|
|
146
|
+
return default
|
|
147
|
+
try:
|
|
148
|
+
v = int(raw)
|
|
149
|
+
return v if v > 0 else default
|
|
150
|
+
except ValueError:
|
|
151
|
+
return default
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
PROPOSE_PR_MAX_FILES = _env_int("DELIMIT_PROPOSE_PR_MAX_FILES", 50)
|
|
155
|
+
PROPOSE_PR_MAX_FILE_BYTES = _env_int("DELIMIT_PROPOSE_PR_MAX_FILE_BYTES", 5 * 1024 * 1024) # 5 MiB / file
|
|
156
|
+
PROPOSE_PR_MAX_TOTAL_BYTES = _env_int("DELIMIT_PROPOSE_PR_MAX_TOTAL_BYTES", 50 * 1024 * 1024) # 50 MiB / PR
|
|
128
157
|
|
|
129
158
|
|
|
130
159
|
class ActionError(Exception):
|
package/gateway/ai/x_ranker.py
CHANGED
|
@@ -324,13 +324,23 @@ def rank_x_targets(
|
|
|
324
324
|
|
|
325
325
|
if enable_fit_floor and recent_topics is None:
|
|
326
326
|
try:
|
|
327
|
-
from ai.social_capability.fit_floor import _recent_topic_fingerprints
|
|
328
|
-
|
|
327
|
+
from ai.social_capability.fit_floor import _recent_topic_fingerprints, topics_for_scope
|
|
328
|
+
# LED-1356: _recent_topic_fingerprints now returns Dict[scope, Set[str]].
|
|
329
|
+
# X targets share the 'x' scope (twitter platform also normalizes to 'x').
|
|
330
|
+
recent_topics_dict = _recent_topic_fingerprints(cooldown_days=cooldown_days)
|
|
331
|
+
recent_topics = topics_for_scope(recent_topics_dict, "x")
|
|
329
332
|
except Exception as exc: # pragma: no cover — tolerant fallback
|
|
330
333
|
logger.warning("x_ranker: cooldown bootstrap failed (%s) — proceeding without", exc)
|
|
331
334
|
recent_topics = set()
|
|
332
335
|
elif recent_topics is None:
|
|
333
336
|
recent_topics = set()
|
|
337
|
+
elif isinstance(recent_topics, dict):
|
|
338
|
+
# Caller passed the new dict shape; pick the X scope.
|
|
339
|
+
try:
|
|
340
|
+
from ai.social_capability.fit_floor import topics_for_scope
|
|
341
|
+
recent_topics = topics_for_scope(recent_topics, "x")
|
|
342
|
+
except Exception:
|
|
343
|
+
recent_topics = set()
|
|
334
344
|
|
|
335
345
|
survivors: List[Dict[str, Any]] = []
|
|
336
346
|
for t in targets or []:
|
|
@@ -15,7 +15,7 @@ general across any single-file JSON Schema.
|
|
|
15
15
|
|
|
16
16
|
from dataclasses import dataclass, field
|
|
17
17
|
from enum import Enum
|
|
18
|
-
from typing import Any, Dict, List, Optional
|
|
18
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class JSONSchemaChangeType(Enum):
|
|
@@ -107,6 +107,11 @@ class JSONSchemaDiffEngine:
|
|
|
107
107
|
self.changes: List[JSONSchemaChange] = []
|
|
108
108
|
self._old_defs: Dict[str, Any] = {}
|
|
109
109
|
self._new_defs: Dict[str, Any] = {}
|
|
110
|
+
# LED-1395: cycle guard. Self-referential schemas (tree nodes,
|
|
111
|
+
# linked lists, recursive $refs) would otherwise stack-overflow
|
|
112
|
+
# the recursion in _compare_schema. Mirrors oasdiff v1.15.3's
|
|
113
|
+
# mergeProps/resolveItems/resolveContains pointer-dedup.
|
|
114
|
+
self._seen_pairs: Set[Tuple[int, int]] = set()
|
|
110
115
|
|
|
111
116
|
# ------------------------------------------------------------------
|
|
112
117
|
# public API
|
|
@@ -114,6 +119,7 @@ class JSONSchemaDiffEngine:
|
|
|
114
119
|
|
|
115
120
|
def compare(self, old_schema: Dict[str, Any], new_schema: Dict[str, Any]) -> List[JSONSchemaChange]:
|
|
116
121
|
self.changes = []
|
|
122
|
+
self._seen_pairs = set()
|
|
117
123
|
old_schema = old_schema or {}
|
|
118
124
|
new_schema = new_schema or {}
|
|
119
125
|
self._old_defs = old_schema.get("definitions", {}) or {}
|
|
@@ -156,6 +162,17 @@ class JSONSchemaDiffEngine:
|
|
|
156
162
|
def _compare_schema(self, old: Any, new: Any, path: str) -> None:
|
|
157
163
|
if not isinstance(old, dict) or not isinstance(new, dict):
|
|
158
164
|
return
|
|
165
|
+
# LED-1395: cycle guard. Track the pre-resolve identity pair so
|
|
166
|
+
# recursive $ref shapes (tree.children → tree, linked-list
|
|
167
|
+
# node.next → node) terminate after one visit instead of
|
|
168
|
+
# stack-overflowing. Identity-based — if the SAME old node is
|
|
169
|
+
# paired with the SAME new node again at a deeper path, every
|
|
170
|
+
# comparison further down would be redundant by definition.
|
|
171
|
+
pair_key = (id(old), id(new))
|
|
172
|
+
if pair_key in self._seen_pairs:
|
|
173
|
+
return
|
|
174
|
+
self._seen_pairs.add(pair_key)
|
|
175
|
+
|
|
159
176
|
old = self._resolve(old, self._old_defs)
|
|
160
177
|
new = self._resolve(new, self._new_defs)
|
|
161
178
|
|
|
@@ -307,6 +324,13 @@ class JSONSchemaDiffEngine:
|
|
|
307
324
|
self._add(JSONSchemaChangeType.ADDITIONAL_PROPERTIES_LOOSENED, path,
|
|
308
325
|
{"old": old_ap, "new": new_ap},
|
|
309
326
|
f"additionalProperties loosened at {path or '/'}: {old_ap} → {new_ap}")
|
|
327
|
+
# Typed-map class (Dict[str, Model] / FastAPI + Pydantic default):
|
|
328
|
+
# `additionalProperties` is itself a schema. Recurse so required-field
|
|
329
|
+
# add/remove, property changes, and type widening inside the value
|
|
330
|
+
# schema are not silently invisible. Closes the same long-missed
|
|
331
|
+
# class oasdiff fixed in v1.15.3 (2026-05-14).
|
|
332
|
+
if isinstance(old_ap, dict) and isinstance(new_ap, dict):
|
|
333
|
+
self._compare_schema(old_ap, new_ap, f"{path}/additionalProperties")
|
|
310
334
|
|
|
311
335
|
def _compare_required(self, old: Dict, new: Dict, path: str) -> None:
|
|
312
336
|
old_req = set(old.get("required", []) or [])
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
// lib/auth-signin.js
|
|
2
|
+
//
|
|
3
|
+
// LED-2100: writes the delimit.ai OAuth bearer token to ~/.delimit/auth.json
|
|
4
|
+
// so the gateway hosted-deliberation tier (LED-2092) can authenticate from
|
|
5
|
+
// the CLI. The gateway reads `delimit_token` or `access_token` from this
|
|
6
|
+
// file (see ai/deliberation.py::_read_oauth_token).
|
|
7
|
+
//
|
|
8
|
+
// Design notes
|
|
9
|
+
// - We MERGE into any existing auth.json rather than overwrite. The legacy
|
|
10
|
+
// `lib/auth-setup.js` writes {configured, timestamp, tools} into the same
|
|
11
|
+
// file for tool-credential bookkeeping; clobbering that would regress
|
|
12
|
+
// existing users. New keys (delimit_token, access_token, signed_in_at,
|
|
13
|
+
// email) live alongside the legacy keys.
|
|
14
|
+
// - File mode is 0600 (owner-only readable). Directory is created at 0700
|
|
15
|
+
// when missing.
|
|
16
|
+
// - Token shape is intentionally lax: accept anything non-empty after
|
|
17
|
+
// trimming. The gateway is the source of truth for token validity; the
|
|
18
|
+
// CLI must not try to second-guess JWT structure (Supabase tokens vs
|
|
19
|
+
// opaque tokens vs future formats).
|
|
20
|
+
//
|
|
21
|
+
// Returns: { path, email, signedInAt, merged } so callers can render a
|
|
22
|
+
// consistent success message.
|
|
23
|
+
|
|
24
|
+
const fs = require('fs');
|
|
25
|
+
const path = require('path');
|
|
26
|
+
const { delimitHome } = require('./delimit-home');
|
|
27
|
+
|
|
28
|
+
const AUTH_FILE_BASENAME = 'auth.json';
|
|
29
|
+
|
|
30
|
+
function authFilePath() {
|
|
31
|
+
return path.join(delimitHome(), AUTH_FILE_BASENAME);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Read the existing auth.json if present, returning a plain object. Returns
|
|
36
|
+
* an empty object on any read/parse error (we do not want to corrupt unrelated
|
|
37
|
+
* keys, but a malformed file should not block sign-in either — overwrite it).
|
|
38
|
+
*/
|
|
39
|
+
function readExistingAuth(filePath) {
|
|
40
|
+
if (!fs.existsSync(filePath)) {
|
|
41
|
+
return {};
|
|
42
|
+
}
|
|
43
|
+
try {
|
|
44
|
+
const raw = fs.readFileSync(filePath, 'utf-8');
|
|
45
|
+
const parsed = JSON.parse(raw);
|
|
46
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
47
|
+
return parsed;
|
|
48
|
+
}
|
|
49
|
+
return {};
|
|
50
|
+
} catch {
|
|
51
|
+
return {};
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Persist a delimit.ai OAuth bearer token to ~/.delimit/auth.json with mode
|
|
57
|
+
* 0600. Existing keys (set by auth-setup.js or other flows) are preserved.
|
|
58
|
+
*
|
|
59
|
+
* @param {object} args
|
|
60
|
+
* @param {string} args.token Bearer token returned by delimit.ai OAuth
|
|
61
|
+
* @param {string} [args.email] Email address associated with the account
|
|
62
|
+
* @param {string} [args.now] Override clock for deterministic tests (ISO8601)
|
|
63
|
+
* @param {string} [args.home] Override DELIMIT_HOME for tests
|
|
64
|
+
* @returns {{ path: string, email: string, signedInAt: string, merged: boolean }}
|
|
65
|
+
*/
|
|
66
|
+
function writeAuthToken(args) {
|
|
67
|
+
const opts = args || {};
|
|
68
|
+
const token = (opts.token || '').toString().trim();
|
|
69
|
+
if (!token) {
|
|
70
|
+
const err = new Error('Empty token; nothing written.');
|
|
71
|
+
err.code = 'DELIMIT_SIGNIN_EMPTY_TOKEN';
|
|
72
|
+
throw err;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const home = opts.home || delimitHome();
|
|
76
|
+
if (!fs.existsSync(home)) {
|
|
77
|
+
fs.mkdirSync(home, { recursive: true, mode: 0o700 });
|
|
78
|
+
}
|
|
79
|
+
const filePath = path.join(home, AUTH_FILE_BASENAME);
|
|
80
|
+
|
|
81
|
+
const existing = readExistingAuth(filePath);
|
|
82
|
+
const merged = Object.keys(existing).length > 0;
|
|
83
|
+
const signedInAt = opts.now || new Date().toISOString();
|
|
84
|
+
const email = (opts.email || '').toString().trim();
|
|
85
|
+
|
|
86
|
+
const next = Object.assign({}, existing, {
|
|
87
|
+
delimit_token: token,
|
|
88
|
+
access_token: token,
|
|
89
|
+
signed_in_at: signedInAt,
|
|
90
|
+
});
|
|
91
|
+
if (email) {
|
|
92
|
+
next.email = email;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Two-step write: write to a temp file with mode 0600, then rename. This
|
|
96
|
+
// avoids a window where the file exists with default permissions before
|
|
97
|
+
// chmod runs.
|
|
98
|
+
const tmpPath = filePath + '.tmp';
|
|
99
|
+
fs.writeFileSync(tmpPath, JSON.stringify(next, null, 2), { mode: 0o600 });
|
|
100
|
+
// Some umasks may still strip group/other bits to match the requested
|
|
101
|
+
// mode; explicitly chmod to be safe (no-op on most platforms but cheap).
|
|
102
|
+
try {
|
|
103
|
+
fs.chmodSync(tmpPath, 0o600);
|
|
104
|
+
} catch {
|
|
105
|
+
// Non-POSIX filesystems may reject chmod; the file is already gated
|
|
106
|
+
// by writeFileSync mode, so this is best-effort.
|
|
107
|
+
}
|
|
108
|
+
fs.renameSync(tmpPath, filePath);
|
|
109
|
+
|
|
110
|
+
return {
|
|
111
|
+
path: filePath,
|
|
112
|
+
email,
|
|
113
|
+
signedInAt,
|
|
114
|
+
merged,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Read the currently stored token, if any. Returns "" when missing or
|
|
120
|
+
* malformed. Mirrors the gateway-side resolver (ai/deliberation.py::
|
|
121
|
+
* _read_oauth_token) so callers can implement `delimit signin --status`.
|
|
122
|
+
*/
|
|
123
|
+
function readCurrentToken() {
|
|
124
|
+
const filePath = authFilePath();
|
|
125
|
+
const data = readExistingAuth(filePath);
|
|
126
|
+
const token = (data.delimit_token || data.access_token || '').toString().trim();
|
|
127
|
+
return token;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
module.exports = {
|
|
131
|
+
authFilePath,
|
|
132
|
+
readExistingAuth,
|
|
133
|
+
writeAuthToken,
|
|
134
|
+
readCurrentToken,
|
|
135
|
+
AUTH_FILE_BASENAME,
|
|
136
|
+
};
|