delimit-cli 4.5.13 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +45 -0
- package/README.md +9 -8
- package/bin/delimit-cli.js +162 -1
- package/bin/delimit-setup.js +46 -6
- package/gateway/ai/_compile_status.py +154 -0
- package/gateway/ai/agent_dispatch.py +36 -0
- package/gateway/ai/backends/tools_infra.py +150 -10
- package/gateway/ai/daemon.py +10 -0
- package/gateway/ai/daily_digest.py +1 -2
- package/gateway/ai/delimit_daemon.py +67 -0
- package/gateway/ai/dispatch_gate.py +399 -0
- package/gateway/ai/hot_reload.py +1 -2
- package/gateway/ai/led193_daemon/executor.py +9 -0
- package/gateway/ai/ledger_manager.py +9 -0
- package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
- package/gateway/ai/notify.py +39 -0
- package/gateway/ai/outreach_substantive.py +676 -0
- package/gateway/ai/reaper.py +70 -0
- package/gateway/ai/reddit_scanner.py +10 -5
- package/gateway/ai/sensing/schema.py +1 -1
- package/gateway/ai/sensing/signal_store.py +0 -1
- package/gateway/ai/server.py +5171 -1462
- package/gateway/ai/social_capability/fit_floor.py +114 -12
- package/gateway/ai/tdqs_lint.py +611 -0
- package/gateway/ai/usage_allowlist.py +198 -0
- package/gateway/ai/workers/base.py +2 -2
- package/gateway/ai/workers/executor.py +32 -3
- package/gateway/ai/workers/outreach_drafter.py +0 -1
- package/gateway/ai/workers/pr_drafter.py +0 -1
- package/gateway/ai/x_ranker.py +12 -2
- package/gateway/core/json_schema_diff.py +25 -1
- package/lib/auth-signin.js +136 -0
- package/lib/auth-signout.js +169 -0
- package/lib/delimit-template.js +11 -0
- package/lib/migration-2092-banner.js +213 -0
- package/package.json +2 -2
- package/server.json +4 -4
|
@@ -0,0 +1,676 @@
|
|
|
1
|
+
"""Substantive-outreach payload, gate, and dispatch (LED-2214b).
|
|
2
|
+
|
|
3
|
+
Implements the autonomous-github-outreach architecture ratified by the
|
|
4
|
+
2026-05-11 deliberation (A1 + Codex payload amendment, B3 + Claude reg-O
|
|
5
|
+
target-side veto, C1 single-responsibility daemon). Transcript:
|
|
6
|
+
``/home/delimit/delimit-private/deliberations/2026-05-11-autonomous-github-outreach-architecture.md``.
|
|
7
|
+
|
|
8
|
+
The three SHIFT-1 holes this module closes:
|
|
9
|
+
|
|
10
|
+
* **Empty-payload dispatch** — the old generic ``outreach`` task type
|
|
11
|
+
could be dispatched on a bare "engage user" target with no evidence
|
|
12
|
+
anchor. Twenty-nine LEDs (LED-915–965) had to be bulk-cancelled in
|
|
13
|
+
2026-05 because of this class of failure. The dataclass enforces
|
|
14
|
+
required evidence fields at construction time, so empty-payload
|
|
15
|
+
dispatch is structurally impossible.
|
|
16
|
+
* **Reg-O / banking veto** — a perfectly substantive bug report on a
|
|
17
|
+
banking-fintech repo still violates SHIFT-1 (KYC would deanonymize
|
|
18
|
+
the operating account). ``is_banking_adjacent`` runs at both the scanner layer
|
|
19
|
+
(impossible-by-construction) and the submit-time gate (defense in
|
|
20
|
+
depth) so a regulator-adjacent target never reaches dispatch and
|
|
21
|
+
never reaches submission.
|
|
22
|
+
* **Covert commercial outreach** — even with a substantive technical
|
|
23
|
+
anchor, the agent might leak "btw try delimit-cli". The content gate
|
|
24
|
+
rejects forbidden phrases including our own product names, and
|
|
25
|
+
requires at least one concrete technical anchor (commit hash, spec
|
|
26
|
+
path, issue number, or CVE) before allowing submission.
|
|
27
|
+
|
|
28
|
+
Public surface:
|
|
29
|
+
|
|
30
|
+
* :class:`SubstantiveCandidate` — typed payload schema for dispatch.
|
|
31
|
+
* :func:`is_banking_adjacent` — reg-O / fintech / banking classifier.
|
|
32
|
+
* :func:`extract_technical_anchors` — anchor extraction for content gate.
|
|
33
|
+
* :func:`check_substantive_content` — content-shape gate.
|
|
34
|
+
* :func:`evaluate_substantive_payload` — composite gate (target then content).
|
|
35
|
+
* :func:`build_candidate_from_github_target` — scanner-level constructor.
|
|
36
|
+
* :func:`dispatch_substantive_outreach` — wraps :func:`dispatch_task`
|
|
37
|
+
with task_type='outreach_substantive' and the typed payload.
|
|
38
|
+
|
|
39
|
+
Not part of this module: the daemon (:mod:`ai.outreach_loop_daemon`)
|
|
40
|
+
that ticks scanner → file ledger → dispatch.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
from __future__ import annotations
|
|
44
|
+
|
|
45
|
+
import logging
|
|
46
|
+
import re
|
|
47
|
+
from dataclasses import asdict, dataclass, field
|
|
48
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
49
|
+
|
|
50
|
+
logger = logging.getLogger("delimit.ai.outreach_substantive")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
# Constants — keep these auditable. Edits require panel deliberation per
|
|
55
|
+
# the CLAUDE.md SHIFT-1 constitutional binding.
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
PROPOSED_ACTIONS = ("comment", "issue", "pr")
|
|
59
|
+
|
|
60
|
+
# CLAUDE.md SHIFT-1 HARD VETO. KYC will deanonymize the operating account
|
|
61
|
+
# on any of these target classes regardless of brand cover, so the target
|
|
62
|
+
# never enters the dispatch queue. Keyword match runs over the repo name +
|
|
63
|
+
# description + topics; any hit blocks the target.
|
|
64
|
+
#
|
|
65
|
+
# Conservative by design — false positives cost zero (we just don't
|
|
66
|
+
# engage), false negatives risk constitutional violation.
|
|
67
|
+
BANKING_ADJACENT_KEYWORDS: Tuple[str, ...] = (
|
|
68
|
+
# Direct
|
|
69
|
+
"bank", "banking", "credit-union", "credit union",
|
|
70
|
+
# Brokerage / capital markets
|
|
71
|
+
"broker", "brokerage", "securities", "custodian", "custody",
|
|
72
|
+
"clearinghouse", "clearing-house", "settlement",
|
|
73
|
+
# Payments / cards
|
|
74
|
+
"payment", "payments", "card-issuer", "card issuer", "issuer-processor",
|
|
75
|
+
"acquirer", "merchant-acquirer", "interchange", "ach ", "swift ",
|
|
76
|
+
# Lending
|
|
77
|
+
"lender", "lending", "mortgage", "underwriting", "underwrite",
|
|
78
|
+
# Insurance (reg-adjacent under McCarran-Ferguson)
|
|
79
|
+
"insurance", "insurer", "reinsurer", "underwriter",
|
|
80
|
+
# Crypto-fiat onramps (FinCEN-regulated MSBs)
|
|
81
|
+
"msb", "money-services-business", "money services business",
|
|
82
|
+
"onramp", "off-ramp", "fiat-onramp",
|
|
83
|
+
# Wealth / advisors (RIA / IAR regulated)
|
|
84
|
+
"wealth-management", "wealth management", "registered investment",
|
|
85
|
+
"ria-firm", "broker-dealer", "broker dealer",
|
|
86
|
+
# Compliance / AML / KYC vendors (likely reg-O downstream)
|
|
87
|
+
"aml-platform", "kyc-platform", "kyc-provider", "kyc provider",
|
|
88
|
+
"bsa-aml", "sanctions-screening", "ofac-screening",
|
|
89
|
+
# Regulator-adjacent
|
|
90
|
+
"regulator", "regulatory-reporting", "fr-y-9c", "call-report",
|
|
91
|
+
"fdic", "occ-supervised", "frb-supervised", "finra", "sec-registered",
|
|
92
|
+
# Reg-O specifically
|
|
93
|
+
"reg-o", "regulation-o", "regulation o", "regulation-w",
|
|
94
|
+
# Stablecoins / fintech with clear bank rails
|
|
95
|
+
"stablecoin", "neobank", "challenger-bank", "core-banking",
|
|
96
|
+
"core banking", "ledger-banking", "open-banking",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Self-references and commercial phrasing the agent must never emit on
|
|
100
|
+
# a third-party repo. Per panel verdict + Codex amendment, we ban our
|
|
101
|
+
# own product names too — substantive contributions stand on technical
|
|
102
|
+
# merit alone, not on naming the upstream tool.
|
|
103
|
+
#
|
|
104
|
+
# Matching is case-insensitive, word-boundary aware where it matters
|
|
105
|
+
# (e.g. "delimit" must not flag "delimited" or "delimiter").
|
|
106
|
+
FORBIDDEN_PHRASES: Tuple[str, ...] = (
|
|
107
|
+
# Commercial framing
|
|
108
|
+
"we built", "we made", "we created", "we developed", "we ship",
|
|
109
|
+
"our tool", "our product", "our cli", "our service", "our platform",
|
|
110
|
+
"you should try", "you might try", "you may want to try",
|
|
111
|
+
"you could try", "give it a try", "give us a try",
|
|
112
|
+
"check out our", "have a look at our", "take a look at our",
|
|
113
|
+
"btw try", "btw, try", "by the way try",
|
|
114
|
+
# Generic non-substantive
|
|
115
|
+
"thanks for the project", "great project", "love the project",
|
|
116
|
+
"interesting project",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Word-boundary product names. Ban "delimit" and "delimit-cli" as
|
|
120
|
+
# standalone tokens; don't false-positive on "delimited" or "delimiter".
|
|
121
|
+
FORBIDDEN_PRODUCT_TOKENS: Tuple[str, ...] = (
|
|
122
|
+
"delimit", "delimit-cli", "delimit.ai", "delimitdev",
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Minimum content length below which a body cannot be substantive
|
|
126
|
+
# regardless of anchors. Calibrated to "two-sentence bug report".
|
|
127
|
+
MIN_BODY_LENGTH = 200
|
|
128
|
+
|
|
129
|
+
# Patterns for technical-anchor extraction. At least one must hit.
|
|
130
|
+
_COMMIT_HASH_RE = re.compile(r"\b[0-9a-f]{7,40}\b", re.IGNORECASE)
|
|
131
|
+
_ISSUE_REF_RE = re.compile(r"#\d{1,7}\b")
|
|
132
|
+
_CVE_RE = re.compile(r"\bCVE-\d{4}-\d{4,7}\b", re.IGNORECASE)
|
|
133
|
+
_SPEC_PATH_RE = re.compile(
|
|
134
|
+
r"(?:^|[\s`])(?:[A-Za-z0-9_\-/\.]+/)?(?:openapi|swagger|asyncapi)"
|
|
135
|
+
r"[\w\-/]*\.(?:ya?ml|json)\b",
|
|
136
|
+
re.IGNORECASE,
|
|
137
|
+
)
|
|
138
|
+
_FILE_PATH_RE = re.compile(
|
|
139
|
+
r"(?:^|[\s`])[A-Za-z0-9_\-/.]+\.(?:py|ts|tsx|js|jsx|go|rs|java|"
|
|
140
|
+
r"rb|c|cc|cpp|h|md|ya?ml|json|toml|proto)\b"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
# Payload schema
|
|
146
|
+
# ---------------------------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclass(frozen=True)
|
|
150
|
+
class SubstantiveCandidate:
|
|
151
|
+
"""Typed dispatch payload for substantive github outreach.
|
|
152
|
+
|
|
153
|
+
The dataclass is ``frozen=True`` (immutable) and the constructor
|
|
154
|
+
enforces every required field — there is no path to a partially
|
|
155
|
+
populated ``SubstantiveCandidate``, which is the entire point of
|
|
156
|
+
the Codex amendment to A1. The scanner builds one of these or
|
|
157
|
+
nothing; the dispatcher refuses to fire on anything else.
|
|
158
|
+
|
|
159
|
+
Fields:
|
|
160
|
+
repo: ``owner/name`` of the target repository. Required.
|
|
161
|
+
category: One of ``pain_thread``, ``adoption_lead``,
|
|
162
|
+
``competitor_user``, ``own_repo_activity``. Required.
|
|
163
|
+
target_artifact: Canonical URL of the artifact we'd act on
|
|
164
|
+
(the issue, the PR, the repo root, etc.). Required.
|
|
165
|
+
evidence_refs: Non-empty list of concrete technical anchors
|
|
166
|
+
extracted from the target — issue numbers, commit hashes,
|
|
167
|
+
spec paths, CVE IDs. Empty list raises at construction.
|
|
168
|
+
proposed_action: One of ``comment``, ``issue``, ``pr``.
|
|
169
|
+
subcategory: Optional finer-grained label (e.g.
|
|
170
|
+
``openapi_spec``). Allowed to be empty.
|
|
171
|
+
venture: Sourcing venture (e.g. ``delimit``). Default ``delimit``.
|
|
172
|
+
fingerprint: Scanner fingerprint for idempotency. Optional.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
repo: str
|
|
176
|
+
category: str
|
|
177
|
+
target_artifact: str
|
|
178
|
+
evidence_refs: Tuple[str, ...]
|
|
179
|
+
proposed_action: str
|
|
180
|
+
subcategory: str = ""
|
|
181
|
+
venture: str = "delimit"
|
|
182
|
+
fingerprint: str = ""
|
|
183
|
+
|
|
184
|
+
def __post_init__(self):
|
|
185
|
+
# Mirror normal validate-on-construct ergonomics for a frozen
|
|
186
|
+
# dataclass. We use object.__setattr__ only for normalisation
|
|
187
|
+
# before validation; validation itself just raises.
|
|
188
|
+
if not self.repo or "/" not in self.repo:
|
|
189
|
+
raise ValueError(
|
|
190
|
+
f"SubstantiveCandidate.repo must be 'owner/name', got {self.repo!r}"
|
|
191
|
+
)
|
|
192
|
+
if self.category not in {
|
|
193
|
+
"pain_thread", "adoption_lead", "competitor_user", "own_repo_activity",
|
|
194
|
+
}:
|
|
195
|
+
raise ValueError(
|
|
196
|
+
f"SubstantiveCandidate.category invalid: {self.category!r}"
|
|
197
|
+
)
|
|
198
|
+
if not self.target_artifact:
|
|
199
|
+
raise ValueError("SubstantiveCandidate.target_artifact is required")
|
|
200
|
+
if not self.evidence_refs:
|
|
201
|
+
raise ValueError(
|
|
202
|
+
"SubstantiveCandidate.evidence_refs cannot be empty — "
|
|
203
|
+
"empty-payload dispatch is structurally forbidden (LED-2214b)"
|
|
204
|
+
)
|
|
205
|
+
if self.proposed_action not in PROPOSED_ACTIONS:
|
|
206
|
+
raise ValueError(
|
|
207
|
+
f"SubstantiveCandidate.proposed_action must be one of "
|
|
208
|
+
f"{PROPOSED_ACTIONS}, got {self.proposed_action!r}"
|
|
209
|
+
)
|
|
210
|
+
# Coerce evidence_refs to a tuple if a list slipped in. (frozen
|
|
211
|
+
# dataclasses don't auto-coerce; we go through object.__setattr__.)
|
|
212
|
+
if not isinstance(self.evidence_refs, tuple):
|
|
213
|
+
object.__setattr__(self, "evidence_refs", tuple(self.evidence_refs))
|
|
214
|
+
|
|
215
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
216
|
+
d = asdict(self)
|
|
217
|
+
d["evidence_refs"] = list(self.evidence_refs)
|
|
218
|
+
return d
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# ---------------------------------------------------------------------------
|
|
222
|
+
# Reg-O / banking target-side veto
|
|
223
|
+
# ---------------------------------------------------------------------------
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def is_banking_adjacent(target: Dict[str, Any]) -> Tuple[bool, str]:
|
|
227
|
+
"""Return ``(is_adjacent, matched_keyword)``.
|
|
228
|
+
|
|
229
|
+
Scans a target dict for any banking / fintech / regulator-adjacent
|
|
230
|
+
keyword across the fields the scanner emits today (``canonical_url``,
|
|
231
|
+
``rationale``, ``content_snippet``, and the optional ``repo_topics``
|
|
232
|
+
+ ``repo_description`` if present). Match is substring + case
|
|
233
|
+
insensitive on the lowercased haystack.
|
|
234
|
+
|
|
235
|
+
The first-match-wins return makes the logged reason actionable
|
|
236
|
+
("matched 'broker-dealer' in repo_description"). Callers should
|
|
237
|
+
treat any True return as a hard veto — no override path exists at
|
|
238
|
+
the scanner layer, by design.
|
|
239
|
+
"""
|
|
240
|
+
haystack_parts: List[str] = []
|
|
241
|
+
for key in (
|
|
242
|
+
"canonical_url", "rationale", "content_snippet",
|
|
243
|
+
"repo_topics", "repo_description", "repo", "source_id",
|
|
244
|
+
):
|
|
245
|
+
value = target.get(key)
|
|
246
|
+
if isinstance(value, list):
|
|
247
|
+
haystack_parts.extend(str(v) for v in value)
|
|
248
|
+
elif value is not None:
|
|
249
|
+
haystack_parts.append(str(value))
|
|
250
|
+
haystack = " ".join(haystack_parts).lower()
|
|
251
|
+
for kw in BANKING_ADJACENT_KEYWORDS:
|
|
252
|
+
if kw in haystack:
|
|
253
|
+
return True, kw
|
|
254
|
+
return False, ""
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# ---------------------------------------------------------------------------
|
|
258
|
+
# Technical-anchor extraction + content gate
|
|
259
|
+
# ---------------------------------------------------------------------------
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def extract_technical_anchors(text: str) -> Dict[str, List[str]]:
|
|
263
|
+
"""Extract all technical anchors found in ``text``.
|
|
264
|
+
|
|
265
|
+
Returns a dict with keys ``commits``, ``issues``, ``cves``,
|
|
266
|
+
``spec_paths``, ``file_paths``. Empty lists mean nothing of that
|
|
267
|
+
type was found. A non-empty union across any key is sufficient to
|
|
268
|
+
satisfy the substantive-content gate.
|
|
269
|
+
|
|
270
|
+
Spec paths are matched explicitly (openapi/swagger/asyncapi) and
|
|
271
|
+
are also captured by the broader file-path regex, but the spec
|
|
272
|
+
list is the load-bearing signal for adoption-lead targets.
|
|
273
|
+
"""
|
|
274
|
+
if not text:
|
|
275
|
+
return {"commits": [], "issues": [], "cves": [], "spec_paths": [], "file_paths": []}
|
|
276
|
+
return {
|
|
277
|
+
"commits": _COMMIT_HASH_RE.findall(text),
|
|
278
|
+
"issues": _ISSUE_REF_RE.findall(text),
|
|
279
|
+
"cves": _CVE_RE.findall(text),
|
|
280
|
+
"spec_paths": [m.strip("` ") for m in _SPEC_PATH_RE.findall(text)],
|
|
281
|
+
"file_paths": [m.strip("` ") for m in _FILE_PATH_RE.findall(text)],
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _hits_forbidden_product_token(text_lower: str) -> Optional[str]:
|
|
286
|
+
"""Return the first product token present as a word, else None."""
|
|
287
|
+
for token in FORBIDDEN_PRODUCT_TOKENS:
|
|
288
|
+
pattern = r"\b" + re.escape(token) + r"\b"
|
|
289
|
+
if re.search(pattern, text_lower):
|
|
290
|
+
return token
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def check_substantive_content(
|
|
295
|
+
body: str,
|
|
296
|
+
proposed_action: str,
|
|
297
|
+
) -> Dict[str, Any]:
|
|
298
|
+
"""Validate a draft body against the SHIFT-1 content rules.
|
|
299
|
+
|
|
300
|
+
Order of checks (load-bearing — do not reorder without panel
|
|
301
|
+
deliberation):
|
|
302
|
+
|
|
303
|
+
1. Type / length floor — empty or under-length bodies block.
|
|
304
|
+
2. Forbidden product tokens — bans our own names (defends against
|
|
305
|
+
"btw try delimit-cli" class).
|
|
306
|
+
3. Forbidden commercial phrases — bans the broader "we built /
|
|
307
|
+
our tool / you should try" class.
|
|
308
|
+
4. Technical anchor — must have at least one commit hash, issue
|
|
309
|
+
ref, CVE, spec path, or file path. Without an anchor the body
|
|
310
|
+
is "thanks for the project" by definition.
|
|
311
|
+
|
|
312
|
+
The function does NOT enforce target-side reg-O veto — that lives
|
|
313
|
+
at :func:`is_banking_adjacent`, called separately by
|
|
314
|
+
:func:`evaluate_substantive_payload`. Splitting them keeps the
|
|
315
|
+
failure modes distinguishable in logs and ledger entries.
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
Dict with keys ``verdict`` (``"allow"`` | ``"block"``),
|
|
319
|
+
``reason``, ``violations`` (list of strings), ``anchors``
|
|
320
|
+
(the extracted-anchors dict).
|
|
321
|
+
"""
|
|
322
|
+
violations: List[str] = []
|
|
323
|
+
if not isinstance(body, str) or not body.strip():
|
|
324
|
+
return {
|
|
325
|
+
"verdict": "block",
|
|
326
|
+
"reason": "empty_body",
|
|
327
|
+
"violations": ["body is empty"],
|
|
328
|
+
"anchors": {},
|
|
329
|
+
}
|
|
330
|
+
if proposed_action not in PROPOSED_ACTIONS:
|
|
331
|
+
return {
|
|
332
|
+
"verdict": "block",
|
|
333
|
+
"reason": "invalid_proposed_action",
|
|
334
|
+
"violations": [f"proposed_action must be one of {PROPOSED_ACTIONS}"],
|
|
335
|
+
"anchors": {},
|
|
336
|
+
}
|
|
337
|
+
if len(body) < MIN_BODY_LENGTH:
|
|
338
|
+
violations.append(
|
|
339
|
+
f"body length {len(body)} < MIN_BODY_LENGTH={MIN_BODY_LENGTH}"
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
body_lower = body.lower()
|
|
343
|
+
product_hit = _hits_forbidden_product_token(body_lower)
|
|
344
|
+
if product_hit:
|
|
345
|
+
violations.append(f"forbidden_product_token: {product_hit!r}")
|
|
346
|
+
for phrase in FORBIDDEN_PHRASES:
|
|
347
|
+
if phrase in body_lower:
|
|
348
|
+
violations.append(f"forbidden_phrase: {phrase!r}")
|
|
349
|
+
|
|
350
|
+
anchors = extract_technical_anchors(body)
|
|
351
|
+
has_anchor = any(anchors[k] for k in anchors)
|
|
352
|
+
if not has_anchor:
|
|
353
|
+
violations.append(
|
|
354
|
+
"no_technical_anchor: body must cite a commit hash, "
|
|
355
|
+
"issue number, CVE, spec path, or source file path"
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
if violations:
|
|
359
|
+
return {
|
|
360
|
+
"verdict": "block",
|
|
361
|
+
"reason": violations[0].split(":")[0],
|
|
362
|
+
"violations": violations,
|
|
363
|
+
"anchors": anchors,
|
|
364
|
+
}
|
|
365
|
+
return {
|
|
366
|
+
"verdict": "allow",
|
|
367
|
+
"reason": "ok",
|
|
368
|
+
"violations": [],
|
|
369
|
+
"anchors": anchors,
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
# ---------------------------------------------------------------------------
|
|
374
|
+
# Composite gate: target-side veto BEFORE content
|
|
375
|
+
# ---------------------------------------------------------------------------
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def evaluate_substantive_payload(
|
|
379
|
+
body: str,
|
|
380
|
+
proposed_action: str,
|
|
381
|
+
target: Optional[Dict[str, Any]] = None,
|
|
382
|
+
repo: str = "",
|
|
383
|
+
repo_description: str = "",
|
|
384
|
+
repo_topics: Optional[List[str]] = None,
|
|
385
|
+
) -> Dict[str, Any]:
|
|
386
|
+
"""Full pre-submit gate: reg-O target veto, then content shape.
|
|
387
|
+
|
|
388
|
+
Per the 2026-05-11 panel verdict + Claude's reg-O target-side veto
|
|
389
|
+
amendment: target classification is checked FIRST. A perfectly
|
|
390
|
+
substantive bug report on a banking-adjacent repo still violates
|
|
391
|
+
SHIFT-1, so the gate refuses regardless of content quality.
|
|
392
|
+
|
|
393
|
+
Callers can pass either:
|
|
394
|
+
* a full ``target`` dict (forwarded to :func:`is_banking_adjacent`),
|
|
395
|
+
* or the discrete ``repo`` / ``repo_description`` / ``repo_topics``
|
|
396
|
+
fields, which we wrap in a synthetic target.
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
Dict with ``verdict``, ``reason``, ``violations``, ``anchors``,
|
|
400
|
+
and ``stage`` (``"target"`` or ``"content"``) indicating where
|
|
401
|
+
the gate fired.
|
|
402
|
+
"""
|
|
403
|
+
if target is None:
|
|
404
|
+
target = {
|
|
405
|
+
"repo": repo,
|
|
406
|
+
"repo_description": repo_description,
|
|
407
|
+
"repo_topics": repo_topics or [],
|
|
408
|
+
}
|
|
409
|
+
elif repo or repo_description or repo_topics:
|
|
410
|
+
# Caller passed both — merge, keyword scan looks at union.
|
|
411
|
+
target = {
|
|
412
|
+
**target,
|
|
413
|
+
**({"repo": repo} if repo else {}),
|
|
414
|
+
**({"repo_description": repo_description} if repo_description else {}),
|
|
415
|
+
**({"repo_topics": repo_topics} if repo_topics else {}),
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
adjacent, matched = is_banking_adjacent(target)
|
|
419
|
+
if adjacent:
|
|
420
|
+
return {
|
|
421
|
+
"verdict": "block",
|
|
422
|
+
"reason": "banking_adjacent_target",
|
|
423
|
+
"violations": [f"banking_adjacent_target: matched keyword {matched!r}"],
|
|
424
|
+
"anchors": {},
|
|
425
|
+
"stage": "target",
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
content_result = check_substantive_content(body, proposed_action)
|
|
429
|
+
content_result["stage"] = "content"
|
|
430
|
+
return content_result
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# ---------------------------------------------------------------------------
|
|
434
|
+
# Scanner-level constructor
|
|
435
|
+
# ---------------------------------------------------------------------------
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
_FINGERPRINT_REPO_RE = re.compile(
|
|
439
|
+
r"^github:(?:issue|repo|fork|star|outreach):([^:]+/[^:]+)(?::|$)"
|
|
440
|
+
)
|
|
441
|
+
_URL_REPO_RE = re.compile(
|
|
442
|
+
r"^https?://github\.com/([^/]+/[^/]+?)(?:/|$|#|\?)"
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _repo_from_target(target: Dict[str, Any]) -> str:
|
|
447
|
+
repo = (target.get("repo") or "").strip()
|
|
448
|
+
if repo and "/" in repo:
|
|
449
|
+
return repo
|
|
450
|
+
fingerprint = target.get("fingerprint", "")
|
|
451
|
+
m = _FINGERPRINT_REPO_RE.match(fingerprint)
|
|
452
|
+
if m:
|
|
453
|
+
return m.group(1)
|
|
454
|
+
url = target.get("canonical_url", "")
|
|
455
|
+
m = _URL_REPO_RE.match(url)
|
|
456
|
+
if m:
|
|
457
|
+
return m.group(1)
|
|
458
|
+
return ""
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
_CATEGORY_TO_ACTION = {
|
|
462
|
+
"pain_thread": "comment",
|
|
463
|
+
"adoption_lead": "issue",
|
|
464
|
+
"competitor_user": "comment",
|
|
465
|
+
"own_repo_activity": "comment",
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def build_candidate_from_github_target(
|
|
470
|
+
target: Dict[str, Any],
|
|
471
|
+
category: str,
|
|
472
|
+
subcategory: str = "",
|
|
473
|
+
) -> Optional[SubstantiveCandidate]:
|
|
474
|
+
"""Build a :class:`SubstantiveCandidate` or return None.
|
|
475
|
+
|
|
476
|
+
The function returns None — *not* raises — when the target cannot
|
|
477
|
+
yield a substantive payload. This is the structural-impossibility
|
|
478
|
+
guarantee: callers that get None must NOT dispatch.
|
|
479
|
+
|
|
480
|
+
Reasons for None return:
|
|
481
|
+
* Target classified banking-adjacent (SHIFT-1 hard veto).
|
|
482
|
+
* Repo could not be derived from fingerprint or URL.
|
|
483
|
+
* No technical anchor extractable from snippet + rationale.
|
|
484
|
+
* Category not in the mapped action table.
|
|
485
|
+
|
|
486
|
+
The reg-O check happens here too, not just at submit time, so
|
|
487
|
+
banking-adjacent targets never reach the agent prompt at all.
|
|
488
|
+
Defense in depth: scanner + submit gate both veto.
|
|
489
|
+
"""
|
|
490
|
+
adjacent, matched = is_banking_adjacent(target)
|
|
491
|
+
if adjacent:
|
|
492
|
+
logger.info(
|
|
493
|
+
"build_candidate: banking-adjacent veto fingerprint=%s matched=%s",
|
|
494
|
+
target.get("fingerprint"), matched,
|
|
495
|
+
)
|
|
496
|
+
return None
|
|
497
|
+
|
|
498
|
+
repo = _repo_from_target(target)
|
|
499
|
+
if not repo:
|
|
500
|
+
logger.info(
|
|
501
|
+
"build_candidate: repo unresolved fingerprint=%s url=%s",
|
|
502
|
+
target.get("fingerprint"), target.get("canonical_url"),
|
|
503
|
+
)
|
|
504
|
+
return None
|
|
505
|
+
|
|
506
|
+
if category not in _CATEGORY_TO_ACTION:
|
|
507
|
+
logger.info("build_candidate: unmapped category=%s", category)
|
|
508
|
+
return None
|
|
509
|
+
|
|
510
|
+
snippet = target.get("content_snippet", "") or ""
|
|
511
|
+
rationale = target.get("rationale", "") or ""
|
|
512
|
+
anchors = extract_technical_anchors(f"{snippet}\n{rationale}")
|
|
513
|
+
evidence_refs: List[str] = []
|
|
514
|
+
for key in ("issues", "spec_paths", "cves", "commits", "file_paths"):
|
|
515
|
+
for ref in anchors.get(key, []):
|
|
516
|
+
label = f"{key[:-1] if key.endswith('s') else key}:{ref}"
|
|
517
|
+
if label not in evidence_refs:
|
|
518
|
+
evidence_refs.append(label)
|
|
519
|
+
if not evidence_refs:
|
|
520
|
+
logger.info(
|
|
521
|
+
"build_candidate: no_technical_anchor fingerprint=%s category=%s",
|
|
522
|
+
target.get("fingerprint"), category,
|
|
523
|
+
)
|
|
524
|
+
return None
|
|
525
|
+
|
|
526
|
+
target_artifact = target.get("canonical_url") or target.get("fingerprint", "")
|
|
527
|
+
if not target_artifact:
|
|
528
|
+
return None
|
|
529
|
+
|
|
530
|
+
try:
|
|
531
|
+
return SubstantiveCandidate(
|
|
532
|
+
repo=repo,
|
|
533
|
+
category=category,
|
|
534
|
+
target_artifact=target_artifact,
|
|
535
|
+
evidence_refs=tuple(evidence_refs),
|
|
536
|
+
proposed_action=_CATEGORY_TO_ACTION[category],
|
|
537
|
+
subcategory=subcategory or "",
|
|
538
|
+
venture=target.get("venture", "delimit"),
|
|
539
|
+
fingerprint=target.get("fingerprint", "") or "",
|
|
540
|
+
)
|
|
541
|
+
except ValueError as exc:
|
|
542
|
+
logger.warning(
|
|
543
|
+
"build_candidate: construction failed for fingerprint=%s: %s",
|
|
544
|
+
target.get("fingerprint"), exc,
|
|
545
|
+
)
|
|
546
|
+
return None
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
# ---------------------------------------------------------------------------
|
|
550
|
+
# Dispatch wrapper
|
|
551
|
+
# ---------------------------------------------------------------------------
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
OUTREACH_SUBSTANTIVE_TASK_TYPE = "outreach_substantive"
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def dispatch_substantive_outreach(
|
|
558
|
+
candidate: SubstantiveCandidate,
|
|
559
|
+
target: Dict[str, Any],
|
|
560
|
+
ledger_item_id: str = "",
|
|
561
|
+
) -> Dict[str, Any]:
|
|
562
|
+
"""Dispatch a substantive outreach task — only fires on a real payload.
|
|
563
|
+
|
|
564
|
+
The payload is the :class:`SubstantiveCandidate` — its construction
|
|
565
|
+
has already enforced that every required evidence field is present.
|
|
566
|
+
The task_type ``outreach_substantive`` is distinct from the legacy
|
|
567
|
+
``outreach`` type (which still serves reddit / x branches) so a
|
|
568
|
+
regression that tries to dispatch a non-substantive github task on
|
|
569
|
+
the old type does not silently route to the new agent.
|
|
570
|
+
|
|
571
|
+
The agent that picks up this task is expected to call
|
|
572
|
+
``delimit_substantive_content_check`` BEFORE submitting any draft
|
|
573
|
+
body, and ``delimit_external_pr_check`` BEFORE submitting if the
|
|
574
|
+
action is ``pr``. Those gates live in :mod:`ai.server`.
|
|
575
|
+
"""
|
|
576
|
+
if not isinstance(candidate, SubstantiveCandidate):
|
|
577
|
+
# Belt-and-suspenders: the dataclass cannot be constructed
|
|
578
|
+
# without the required fields, but a caller might still pass
|
|
579
|
+
# a stray dict. Refuse rather than coerce.
|
|
580
|
+
raise TypeError(
|
|
581
|
+
"dispatch_substantive_outreach requires a SubstantiveCandidate "
|
|
582
|
+
f"instance, got {type(candidate).__name__}"
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
# Late-bound import to keep the foundation module light and the
|
|
586
|
+
# cyclic-import surface clean.
|
|
587
|
+
from ai.agent_dispatch import dispatch_task, link_ledger_item
|
|
588
|
+
|
|
589
|
+
constraints = [
|
|
590
|
+
"no-deploy", "no-secrets", "no-destructive",
|
|
591
|
+
"shift-1-quiet-attraction",
|
|
592
|
+
"must-call-delimit_substantive_content_check-before-submit",
|
|
593
|
+
]
|
|
594
|
+
if candidate.proposed_action == "pr":
|
|
595
|
+
constraints.append("must-call-delimit_external_pr_check-before-submit")
|
|
596
|
+
|
|
597
|
+
tools_needed = [
|
|
598
|
+
"delimit_substantive_content_check",
|
|
599
|
+
"delimit_sensor_github_issue",
|
|
600
|
+
]
|
|
601
|
+
if candidate.proposed_action == "pr":
|
|
602
|
+
tools_needed.append("delimit_external_pr_check")
|
|
603
|
+
|
|
604
|
+
variables: Dict[str, Any] = {
|
|
605
|
+
"candidate": candidate.to_dict(),
|
|
606
|
+
"venture": candidate.venture,
|
|
607
|
+
"repo": candidate.repo,
|
|
608
|
+
"category": candidate.category,
|
|
609
|
+
"subcategory": candidate.subcategory,
|
|
610
|
+
"target_artifact": candidate.target_artifact,
|
|
611
|
+
"evidence_refs": list(candidate.evidence_refs),
|
|
612
|
+
"proposed_action": candidate.proposed_action,
|
|
613
|
+
"source_url": target.get("canonical_url", ""),
|
|
614
|
+
"source_fingerprint": candidate.fingerprint,
|
|
615
|
+
"author": target.get("author", ""),
|
|
616
|
+
"rationale": target.get("rationale", ""),
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
title = (
|
|
620
|
+
f"[{candidate.venture.upper()}] Substantive {candidate.proposed_action} "
|
|
621
|
+
f"on {candidate.repo} ({candidate.category})"
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
description = (
|
|
625
|
+
"Substantive-outreach task (LED-2214b architecture).\n"
|
|
626
|
+
f"Repo: {candidate.repo}\n"
|
|
627
|
+
f"Category: {candidate.category}"
|
|
628
|
+
f"{' / ' + candidate.subcategory if candidate.subcategory else ''}\n"
|
|
629
|
+
f"Action: {candidate.proposed_action}\n"
|
|
630
|
+
f"Target: {candidate.target_artifact}\n"
|
|
631
|
+
f"Evidence: {', '.join(candidate.evidence_refs)}\n"
|
|
632
|
+
"\n"
|
|
633
|
+
"SHIFT-1 constraints:\n"
|
|
634
|
+
" - Pseudonymous account only; no founder identity.\n"
|
|
635
|
+
" - Real technical contribution only. No 'we built' / 'our tool' / "
|
|
636
|
+
"'btw try' framing. Never name our own product in the body.\n"
|
|
637
|
+
" - delimit_substantive_content_check is MANDATORY pre-submit.\n"
|
|
638
|
+
" - delimit_external_pr_check is MANDATORY when proposed_action='pr'.\n"
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
context = (
|
|
642
|
+
"Substantive autonomous outreach via the LED-2214b architecture. "
|
|
643
|
+
"The pseudonymous-substantive-contribution carve-out (CLAUDE.md SHIFT-1, "
|
|
644
|
+
"2026-05-04) permits this provided the activity is a genuine technical "
|
|
645
|
+
"contribution. The pre-submit gate stack enforces that. If the gate "
|
|
646
|
+
"blocks, file the rejection reason on the linked ledger item and stop."
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
result = dispatch_task(
|
|
650
|
+
title=title,
|
|
651
|
+
description=description,
|
|
652
|
+
assignee="any",
|
|
653
|
+
priority="P1",
|
|
654
|
+
tools_needed=tools_needed,
|
|
655
|
+
constraints=constraints,
|
|
656
|
+
context=context,
|
|
657
|
+
task_type=OUTREACH_SUBSTANTIVE_TASK_TYPE,
|
|
658
|
+
venture=candidate.venture,
|
|
659
|
+
variables=variables,
|
|
660
|
+
external_key=(
|
|
661
|
+
f"outreach_substantive:{candidate.fingerprint}"
|
|
662
|
+
if candidate.fingerprint
|
|
663
|
+
else f"outreach_substantive:{candidate.repo}:{candidate.target_artifact}"
|
|
664
|
+
),
|
|
665
|
+
)
|
|
666
|
+
task_id = result.get("task_id", "")
|
|
667
|
+
if task_id and ledger_item_id:
|
|
668
|
+
try:
|
|
669
|
+
link_ledger_item(task_id, ledger_item_id)
|
|
670
|
+
except Exception as exc: # link is best-effort
|
|
671
|
+
logger.warning(
|
|
672
|
+
"dispatch_substantive_outreach: link_ledger_item failed "
|
|
673
|
+
"task=%s ledger=%s err=%s",
|
|
674
|
+
task_id, ledger_item_id, exc,
|
|
675
|
+
)
|
|
676
|
+
return result
|