delimit-cli 4.5.13 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +45 -0
  2. package/README.md +9 -8
  3. package/bin/delimit-cli.js +162 -1
  4. package/bin/delimit-setup.js +46 -6
  5. package/gateway/ai/_compile_status.py +154 -0
  6. package/gateway/ai/agent_dispatch.py +36 -0
  7. package/gateway/ai/backends/tools_infra.py +150 -10
  8. package/gateway/ai/daemon.py +10 -0
  9. package/gateway/ai/daily_digest.py +1 -2
  10. package/gateway/ai/delimit_daemon.py +67 -0
  11. package/gateway/ai/dispatch_gate.py +399 -0
  12. package/gateway/ai/hot_reload.py +1 -2
  13. package/gateway/ai/led193_daemon/executor.py +9 -0
  14. package/gateway/ai/ledger_manager.py +9 -0
  15. package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
  16. package/gateway/ai/notify.py +39 -0
  17. package/gateway/ai/outreach_substantive.py +676 -0
  18. package/gateway/ai/reaper.py +70 -0
  19. package/gateway/ai/reddit_scanner.py +10 -5
  20. package/gateway/ai/sensing/schema.py +1 -1
  21. package/gateway/ai/sensing/signal_store.py +0 -1
  22. package/gateway/ai/server.py +5171 -1462
  23. package/gateway/ai/social_capability/fit_floor.py +114 -12
  24. package/gateway/ai/tdqs_lint.py +611 -0
  25. package/gateway/ai/usage_allowlist.py +198 -0
  26. package/gateway/ai/workers/base.py +2 -2
  27. package/gateway/ai/workers/executor.py +32 -3
  28. package/gateway/ai/workers/outreach_drafter.py +0 -1
  29. package/gateway/ai/workers/pr_drafter.py +0 -1
  30. package/gateway/ai/x_ranker.py +12 -2
  31. package/gateway/core/json_schema_diff.py +25 -1
  32. package/lib/auth-signin.js +136 -0
  33. package/lib/auth-signout.js +169 -0
  34. package/lib/delimit-template.js +11 -0
  35. package/lib/migration-2092-banner.js +213 -0
  36. package/package.json +2 -2
  37. package/server.json +4 -4
@@ -0,0 +1,676 @@
1
+ """Substantive-outreach payload, gate, and dispatch (LED-2214b).
2
+
3
+ Implements the autonomous-github-outreach architecture ratified by the
4
+ 2026-05-11 deliberation (A1 + Codex payload amendment, B3 + Claude reg-O
5
+ target-side veto, C1 single-responsibility daemon). Transcript:
6
+ ``/home/delimit/delimit-private/deliberations/2026-05-11-autonomous-github-outreach-architecture.md``.
7
+
8
+ The three SHIFT-1 holes this module closes:
9
+
10
+ * **Empty-payload dispatch** — the old generic ``outreach`` task type
11
+ could be dispatched on a bare "engage user" target with no evidence
12
+ anchor. Twenty-nine LEDs (LED-915–965) had to be bulk-cancelled in
13
+ 2026-05 because of this class of failure. The dataclass enforces
14
+ required evidence fields at construction time, so empty-payload
15
+ dispatch is structurally impossible.
16
+ * **Reg-O / banking veto** — a perfectly substantive bug report on a
17
+ banking-fintech repo still violates SHIFT-1 (KYC would deanonymize
18
+ the operating account). ``is_banking_adjacent`` runs at both the scanner layer
19
+ (impossible-by-construction) and the submit-time gate (defense in
20
+ depth) so a regulator-adjacent target never reaches dispatch and
21
+ never reaches submission.
22
+ * **Covert commercial outreach** — even with a substantive technical
23
+ anchor, the agent might leak "btw try delimit-cli". The content gate
24
+ rejects forbidden phrases including our own product names, and
25
+ requires at least one concrete technical anchor (commit hash, spec
26
+ path, issue number, or CVE) before allowing submission.
27
+
28
+ Public surface:
29
+
30
+ * :class:`SubstantiveCandidate` — typed payload schema for dispatch.
31
+ * :func:`is_banking_adjacent` — reg-O / fintech / banking classifier.
32
+ * :func:`extract_technical_anchors` — anchor extraction for content gate.
33
+ * :func:`check_substantive_content` — content-shape gate.
34
+ * :func:`evaluate_substantive_payload` — composite gate (target then content).
35
+ * :func:`build_candidate_from_github_target` — scanner-level constructor.
36
+ * :func:`dispatch_substantive_outreach` — wraps :func:`dispatch_task`
37
+ with task_type='outreach_substantive' and the typed payload.
38
+
39
+ Not part of this module: the daemon (:mod:`ai.outreach_loop_daemon`)
40
+ that ticks scanner → file ledger → dispatch.
41
+ """
42
+
43
+ from __future__ import annotations
44
+
45
+ import logging
46
+ import re
47
+ from dataclasses import asdict, dataclass, field
48
+ from typing import Any, Dict, List, Optional, Tuple
49
+
50
+ logger = logging.getLogger("delimit.ai.outreach_substantive")
51
+
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # Constants — keep these auditable. Edits require panel deliberation per
55
+ # the CLAUDE.md SHIFT-1 constitutional binding.
56
+ # ---------------------------------------------------------------------------
57
+
58
+ PROPOSED_ACTIONS = ("comment", "issue", "pr")
59
+
60
+ # CLAUDE.md SHIFT-1 HARD VETO. KYC will deanonymize the operating account
61
+ # on any of these target classes regardless of brand cover, so the target
62
+ # never enters the dispatch queue. Keyword match runs over the repo name +
63
+ # description + topics; any hit blocks the target.
64
+ #
65
+ # Conservative by design — false positives cost zero (we just don't
66
+ # engage), false negatives risk constitutional violation.
67
+ BANKING_ADJACENT_KEYWORDS: Tuple[str, ...] = (
68
+ # Direct
69
+ "bank", "banking", "credit-union", "credit union",
70
+ # Brokerage / capital markets
71
+ "broker", "brokerage", "securities", "custodian", "custody",
72
+ "clearinghouse", "clearing-house", "settlement",
73
+ # Payments / cards
74
+ "payment", "payments", "card-issuer", "card issuer", "issuer-processor",
75
+ "acquirer", "merchant-acquirer", "interchange", "ach ", "swift ",
76
+ # Lending
77
+ "lender", "lending", "mortgage", "underwriting", "underwrite",
78
+ # Insurance (reg-adjacent under McCarran-Ferguson)
79
+ "insurance", "insurer", "reinsurer", "underwriter",
80
+ # Crypto-fiat onramps (FinCEN-regulated MSBs)
81
+ "msb", "money-services-business", "money services business",
82
+ "onramp", "off-ramp", "fiat-onramp",
83
+ # Wealth / advisors (RIA / IAR regulated)
84
+ "wealth-management", "wealth management", "registered investment",
85
+ "ria-firm", "broker-dealer", "broker dealer",
86
+ # Compliance / AML / KYC vendors (likely reg-O downstream)
87
+ "aml-platform", "kyc-platform", "kyc-provider", "kyc provider",
88
+ "bsa-aml", "sanctions-screening", "ofac-screening",
89
+ # Regulator-adjacent
90
+ "regulator", "regulatory-reporting", "fr-y-9c", "call-report",
91
+ "fdic", "occ-supervised", "frb-supervised", "finra", "sec-registered",
92
+ # Reg-O specifically
93
+ "reg-o", "regulation-o", "regulation o", "regulation-w",
94
+ # Stablecoins / fintech with clear bank rails
95
+ "stablecoin", "neobank", "challenger-bank", "core-banking",
96
+ "core banking", "ledger-banking", "open-banking",
97
+ )
98
+
99
+ # Self-references and commercial phrasing the agent must never emit on
100
+ # a third-party repo. Per panel verdict + Codex amendment, we ban our
101
+ # own product names too — substantive contributions stand on technical
102
+ # merit alone, not on naming the upstream tool.
103
+ #
104
+ # Matching is case-insensitive, word-boundary aware where it matters
105
+ # (e.g. "delimit" must not flag "delimited" or "delimiter").
106
+ FORBIDDEN_PHRASES: Tuple[str, ...] = (
107
+ # Commercial framing
108
+ "we built", "we made", "we created", "we developed", "we ship",
109
+ "our tool", "our product", "our cli", "our service", "our platform",
110
+ "you should try", "you might try", "you may want to try",
111
+ "you could try", "give it a try", "give us a try",
112
+ "check out our", "have a look at our", "take a look at our",
113
+ "btw try", "btw, try", "by the way try",
114
+ # Generic non-substantive
115
+ "thanks for the project", "great project", "love the project",
116
+ "interesting project",
117
+ )
118
+
119
+ # Word-boundary product names. Ban "delimit" and "delimit-cli" as
120
+ # standalone tokens; don't false-positive on "delimited" or "delimiter".
121
+ FORBIDDEN_PRODUCT_TOKENS: Tuple[str, ...] = (
122
+ "delimit", "delimit-cli", "delimit.ai", "delimitdev",
123
+ )
124
+
125
+ # Minimum content length below which a body cannot be substantive
126
+ # regardless of anchors. Calibrated to "two-sentence bug report".
127
+ MIN_BODY_LENGTH = 200
128
+
129
+ # Patterns for technical-anchor extraction. At least one must hit.
130
+ _COMMIT_HASH_RE = re.compile(r"\b[0-9a-f]{7,40}\b", re.IGNORECASE)
131
+ _ISSUE_REF_RE = re.compile(r"#\d{1,7}\b")
132
+ _CVE_RE = re.compile(r"\bCVE-\d{4}-\d{4,7}\b", re.IGNORECASE)
133
+ _SPEC_PATH_RE = re.compile(
134
+ r"(?:^|[\s`])(?:[A-Za-z0-9_\-/\.]+/)?(?:openapi|swagger|asyncapi)"
135
+ r"[\w\-/]*\.(?:ya?ml|json)\b",
136
+ re.IGNORECASE,
137
+ )
138
+ _FILE_PATH_RE = re.compile(
139
+ r"(?:^|[\s`])[A-Za-z0-9_\-/.]+\.(?:py|ts|tsx|js|jsx|go|rs|java|"
140
+ r"rb|c|cc|cpp|h|md|ya?ml|json|toml|proto)\b"
141
+ )
142
+
143
+
144
+ # ---------------------------------------------------------------------------
145
+ # Payload schema
146
+ # ---------------------------------------------------------------------------
147
+
148
+
149
+ @dataclass(frozen=True)
150
+ class SubstantiveCandidate:
151
+ """Typed dispatch payload for substantive github outreach.
152
+
153
+ The dataclass is ``frozen=True`` (immutable) and the constructor
154
+ enforces every required field — there is no path to a partially
155
+ populated ``SubstantiveCandidate``, which is the entire point of
156
+ the Codex amendment to A1. The scanner builds one of these or
157
+ nothing; the dispatcher refuses to fire on anything else.
158
+
159
+ Fields:
160
+ repo: ``owner/name`` of the target repository. Required.
161
+ category: One of ``pain_thread``, ``adoption_lead``,
162
+ ``competitor_user``, ``own_repo_activity``. Required.
163
+ target_artifact: Canonical URL of the artifact we'd act on
164
+ (the issue, the PR, the repo root, etc.). Required.
165
+ evidence_refs: Non-empty list of concrete technical anchors
166
+ extracted from the target — issue numbers, commit hashes,
167
+ spec paths, CVE IDs. Empty list raises at construction.
168
+ proposed_action: One of ``comment``, ``issue``, ``pr``.
169
+ subcategory: Optional finer-grained label (e.g.
170
+ ``openapi_spec``). Allowed to be empty.
171
+ venture: Sourcing venture (e.g. ``delimit``). Default ``delimit``.
172
+ fingerprint: Scanner fingerprint for idempotency. Optional.
173
+ """
174
+
175
+ repo: str
176
+ category: str
177
+ target_artifact: str
178
+ evidence_refs: Tuple[str, ...]
179
+ proposed_action: str
180
+ subcategory: str = ""
181
+ venture: str = "delimit"
182
+ fingerprint: str = ""
183
+
184
+ def __post_init__(self):
185
+ # Mirror normal validate-on-construct ergonomics for a frozen
186
+ # dataclass. We use object.__setattr__ only for normalisation
187
+ # before validation; validation itself just raises.
188
+ if not self.repo or "/" not in self.repo:
189
+ raise ValueError(
190
+ f"SubstantiveCandidate.repo must be 'owner/name', got {self.repo!r}"
191
+ )
192
+ if self.category not in {
193
+ "pain_thread", "adoption_lead", "competitor_user", "own_repo_activity",
194
+ }:
195
+ raise ValueError(
196
+ f"SubstantiveCandidate.category invalid: {self.category!r}"
197
+ )
198
+ if not self.target_artifact:
199
+ raise ValueError("SubstantiveCandidate.target_artifact is required")
200
+ if not self.evidence_refs:
201
+ raise ValueError(
202
+ "SubstantiveCandidate.evidence_refs cannot be empty — "
203
+ "empty-payload dispatch is structurally forbidden (LED-2214b)"
204
+ )
205
+ if self.proposed_action not in PROPOSED_ACTIONS:
206
+ raise ValueError(
207
+ f"SubstantiveCandidate.proposed_action must be one of "
208
+ f"{PROPOSED_ACTIONS}, got {self.proposed_action!r}"
209
+ )
210
+ # Coerce evidence_refs to a tuple if a list slipped in. (frozen
211
+ # dataclasses don't auto-coerce; we go through object.__setattr__.)
212
+ if not isinstance(self.evidence_refs, tuple):
213
+ object.__setattr__(self, "evidence_refs", tuple(self.evidence_refs))
214
+
215
+ def to_dict(self) -> Dict[str, Any]:
216
+ d = asdict(self)
217
+ d["evidence_refs"] = list(self.evidence_refs)
218
+ return d
219
+
220
+
221
+ # ---------------------------------------------------------------------------
222
+ # Reg-O / banking target-side veto
223
+ # ---------------------------------------------------------------------------
224
+
225
+
226
+ def is_banking_adjacent(target: Dict[str, Any]) -> Tuple[bool, str]:
227
+ """Return ``(is_adjacent, matched_keyword)``.
228
+
229
+ Scans a target dict for any banking / fintech / regulator-adjacent
230
+ keyword across the fields the scanner emits today (``canonical_url``,
231
+ ``rationale``, ``content_snippet``, and the optional ``repo_topics``
232
+ + ``repo_description`` if present). Match is substring + case
233
+ insensitive on the lowercased haystack.
234
+
235
+ The first-match-wins return makes the logged reason actionable
236
+ ("matched 'broker-dealer' in repo_description"). Callers should
237
+ treat any True return as a hard veto — no override path exists at
238
+ the scanner layer, by design.
239
+ """
240
+ haystack_parts: List[str] = []
241
+ for key in (
242
+ "canonical_url", "rationale", "content_snippet",
243
+ "repo_topics", "repo_description", "repo", "source_id",
244
+ ):
245
+ value = target.get(key)
246
+ if isinstance(value, list):
247
+ haystack_parts.extend(str(v) for v in value)
248
+ elif value is not None:
249
+ haystack_parts.append(str(value))
250
+ haystack = " ".join(haystack_parts).lower()
251
+ for kw in BANKING_ADJACENT_KEYWORDS:
252
+ if kw in haystack:
253
+ return True, kw
254
+ return False, ""
255
+
256
+
257
+ # ---------------------------------------------------------------------------
258
+ # Technical-anchor extraction + content gate
259
+ # ---------------------------------------------------------------------------
260
+
261
+
262
+ def extract_technical_anchors(text: str) -> Dict[str, List[str]]:
263
+ """Extract all technical anchors found in ``text``.
264
+
265
+ Returns a dict with keys ``commits``, ``issues``, ``cves``,
266
+ ``spec_paths``, ``file_paths``. Empty lists mean nothing of that
267
+ type was found. A non-empty union across any key is sufficient to
268
+ satisfy the substantive-content gate.
269
+
270
+ Spec paths are matched explicitly (openapi/swagger/asyncapi) and
271
+ are also captured by the broader file-path regex, but the spec
272
+ list is the load-bearing signal for adoption-lead targets.
273
+ """
274
+ if not text:
275
+ return {"commits": [], "issues": [], "cves": [], "spec_paths": [], "file_paths": []}
276
+ return {
277
+ "commits": _COMMIT_HASH_RE.findall(text),
278
+ "issues": _ISSUE_REF_RE.findall(text),
279
+ "cves": _CVE_RE.findall(text),
280
+ "spec_paths": [m.strip("` ") for m in _SPEC_PATH_RE.findall(text)],
281
+ "file_paths": [m.strip("` ") for m in _FILE_PATH_RE.findall(text)],
282
+ }
283
+
284
+
285
+ def _hits_forbidden_product_token(text_lower: str) -> Optional[str]:
286
+ """Return the first product token present as a word, else None."""
287
+ for token in FORBIDDEN_PRODUCT_TOKENS:
288
+ pattern = r"\b" + re.escape(token) + r"\b"
289
+ if re.search(pattern, text_lower):
290
+ return token
291
+ return None
292
+
293
+
294
+ def check_substantive_content(
295
+ body: str,
296
+ proposed_action: str,
297
+ ) -> Dict[str, Any]:
298
+ """Validate a draft body against the SHIFT-1 content rules.
299
+
300
+ Order of checks (load-bearing — do not reorder without panel
301
+ deliberation):
302
+
303
+ 1. Type / length floor — empty or under-length bodies block.
304
+ 2. Forbidden product tokens — bans our own names (defends against
305
+ "btw try delimit-cli" class).
306
+ 3. Forbidden commercial phrases — bans the broader "we built /
307
+ our tool / you should try" class.
308
+ 4. Technical anchor — must have at least one commit hash, issue
309
+ ref, CVE, spec path, or file path. Without an anchor the body
310
+ is "thanks for the project" by definition.
311
+
312
+ The function does NOT enforce target-side reg-O veto — that lives
313
+ at :func:`is_banking_adjacent`, called separately by
314
+ :func:`evaluate_substantive_payload`. Splitting them keeps the
315
+ failure modes distinguishable in logs and ledger entries.
316
+
317
+ Returns:
318
+ Dict with keys ``verdict`` (``"allow"`` | ``"block"``),
319
+ ``reason``, ``violations`` (list of strings), ``anchors``
320
+ (the extracted-anchors dict).
321
+ """
322
+ violations: List[str] = []
323
+ if not isinstance(body, str) or not body.strip():
324
+ return {
325
+ "verdict": "block",
326
+ "reason": "empty_body",
327
+ "violations": ["body is empty"],
328
+ "anchors": {},
329
+ }
330
+ if proposed_action not in PROPOSED_ACTIONS:
331
+ return {
332
+ "verdict": "block",
333
+ "reason": "invalid_proposed_action",
334
+ "violations": [f"proposed_action must be one of {PROPOSED_ACTIONS}"],
335
+ "anchors": {},
336
+ }
337
+ if len(body) < MIN_BODY_LENGTH:
338
+ violations.append(
339
+ f"body length {len(body)} < MIN_BODY_LENGTH={MIN_BODY_LENGTH}"
340
+ )
341
+
342
+ body_lower = body.lower()
343
+ product_hit = _hits_forbidden_product_token(body_lower)
344
+ if product_hit:
345
+ violations.append(f"forbidden_product_token: {product_hit!r}")
346
+ for phrase in FORBIDDEN_PHRASES:
347
+ if phrase in body_lower:
348
+ violations.append(f"forbidden_phrase: {phrase!r}")
349
+
350
+ anchors = extract_technical_anchors(body)
351
+ has_anchor = any(anchors[k] for k in anchors)
352
+ if not has_anchor:
353
+ violations.append(
354
+ "no_technical_anchor: body must cite a commit hash, "
355
+ "issue number, CVE, spec path, or source file path"
356
+ )
357
+
358
+ if violations:
359
+ return {
360
+ "verdict": "block",
361
+ "reason": violations[0].split(":")[0],
362
+ "violations": violations,
363
+ "anchors": anchors,
364
+ }
365
+ return {
366
+ "verdict": "allow",
367
+ "reason": "ok",
368
+ "violations": [],
369
+ "anchors": anchors,
370
+ }
371
+
372
+
373
+ # ---------------------------------------------------------------------------
374
+ # Composite gate: target-side veto BEFORE content
375
+ # ---------------------------------------------------------------------------
376
+
377
+
378
+ def evaluate_substantive_payload(
379
+ body: str,
380
+ proposed_action: str,
381
+ target: Optional[Dict[str, Any]] = None,
382
+ repo: str = "",
383
+ repo_description: str = "",
384
+ repo_topics: Optional[List[str]] = None,
385
+ ) -> Dict[str, Any]:
386
+ """Full pre-submit gate: reg-O target veto, then content shape.
387
+
388
+ Per the 2026-05-11 panel verdict + Claude's reg-O target-side veto
389
+ amendment: target classification is checked FIRST. A perfectly
390
+ substantive bug report on a banking-adjacent repo still violates
391
+ SHIFT-1, so the gate refuses regardless of content quality.
392
+
393
+ Callers can pass either:
394
+ * a full ``target`` dict (forwarded to :func:`is_banking_adjacent`),
395
+ * or the discrete ``repo`` / ``repo_description`` / ``repo_topics``
396
+ fields, which we wrap in a synthetic target.
397
+
398
+ Returns:
399
+ Dict with ``verdict``, ``reason``, ``violations``, ``anchors``,
400
+ and ``stage`` (``"target"`` or ``"content"``) indicating where
401
+ the gate fired.
402
+ """
403
+ if target is None:
404
+ target = {
405
+ "repo": repo,
406
+ "repo_description": repo_description,
407
+ "repo_topics": repo_topics or [],
408
+ }
409
+ elif repo or repo_description or repo_topics:
410
+ # Caller passed both — merge, keyword scan looks at union.
411
+ target = {
412
+ **target,
413
+ **({"repo": repo} if repo else {}),
414
+ **({"repo_description": repo_description} if repo_description else {}),
415
+ **({"repo_topics": repo_topics} if repo_topics else {}),
416
+ }
417
+
418
+ adjacent, matched = is_banking_adjacent(target)
419
+ if adjacent:
420
+ return {
421
+ "verdict": "block",
422
+ "reason": "banking_adjacent_target",
423
+ "violations": [f"banking_adjacent_target: matched keyword {matched!r}"],
424
+ "anchors": {},
425
+ "stage": "target",
426
+ }
427
+
428
+ content_result = check_substantive_content(body, proposed_action)
429
+ content_result["stage"] = "content"
430
+ return content_result
431
+
432
+
433
+ # ---------------------------------------------------------------------------
434
+ # Scanner-level constructor
435
+ # ---------------------------------------------------------------------------
436
+
437
+
438
+ _FINGERPRINT_REPO_RE = re.compile(
439
+ r"^github:(?:issue|repo|fork|star|outreach):([^:]+/[^:]+)(?::|$)"
440
+ )
441
+ _URL_REPO_RE = re.compile(
442
+ r"^https?://github\.com/([^/]+/[^/]+?)(?:/|$|#|\?)"
443
+ )
444
+
445
+
446
+ def _repo_from_target(target: Dict[str, Any]) -> str:
447
+ repo = (target.get("repo") or "").strip()
448
+ if repo and "/" in repo:
449
+ return repo
450
+ fingerprint = target.get("fingerprint", "")
451
+ m = _FINGERPRINT_REPO_RE.match(fingerprint)
452
+ if m:
453
+ return m.group(1)
454
+ url = target.get("canonical_url", "")
455
+ m = _URL_REPO_RE.match(url)
456
+ if m:
457
+ return m.group(1)
458
+ return ""
459
+
460
+
461
+ _CATEGORY_TO_ACTION = {
462
+ "pain_thread": "comment",
463
+ "adoption_lead": "issue",
464
+ "competitor_user": "comment",
465
+ "own_repo_activity": "comment",
466
+ }
467
+
468
+
469
+ def build_candidate_from_github_target(
470
+ target: Dict[str, Any],
471
+ category: str,
472
+ subcategory: str = "",
473
+ ) -> Optional[SubstantiveCandidate]:
474
+ """Build a :class:`SubstantiveCandidate` or return None.
475
+
476
+ The function returns None — *not* raises — when the target cannot
477
+ yield a substantive payload. This is the structural-impossibility
478
+ guarantee: callers that get None must NOT dispatch.
479
+
480
+ Reasons for None return:
481
+ * Target classified banking-adjacent (SHIFT-1 hard veto).
482
+ * Repo could not be derived from fingerprint or URL.
483
+ * No technical anchor extractable from snippet + rationale.
484
+ * Category not in the mapped action table.
485
+
486
+ The reg-O check happens here too, not just at submit time, so
487
+ banking-adjacent targets never reach the agent prompt at all.
488
+ Defense in depth: scanner + submit gate both veto.
489
+ """
490
+ adjacent, matched = is_banking_adjacent(target)
491
+ if adjacent:
492
+ logger.info(
493
+ "build_candidate: banking-adjacent veto fingerprint=%s matched=%s",
494
+ target.get("fingerprint"), matched,
495
+ )
496
+ return None
497
+
498
+ repo = _repo_from_target(target)
499
+ if not repo:
500
+ logger.info(
501
+ "build_candidate: repo unresolved fingerprint=%s url=%s",
502
+ target.get("fingerprint"), target.get("canonical_url"),
503
+ )
504
+ return None
505
+
506
+ if category not in _CATEGORY_TO_ACTION:
507
+ logger.info("build_candidate: unmapped category=%s", category)
508
+ return None
509
+
510
+ snippet = target.get("content_snippet", "") or ""
511
+ rationale = target.get("rationale", "") or ""
512
+ anchors = extract_technical_anchors(f"{snippet}\n{rationale}")
513
+ evidence_refs: List[str] = []
514
+ for key in ("issues", "spec_paths", "cves", "commits", "file_paths"):
515
+ for ref in anchors.get(key, []):
516
+ label = f"{key[:-1] if key.endswith('s') else key}:{ref}"
517
+ if label not in evidence_refs:
518
+ evidence_refs.append(label)
519
+ if not evidence_refs:
520
+ logger.info(
521
+ "build_candidate: no_technical_anchor fingerprint=%s category=%s",
522
+ target.get("fingerprint"), category,
523
+ )
524
+ return None
525
+
526
+ target_artifact = target.get("canonical_url") or target.get("fingerprint", "")
527
+ if not target_artifact:
528
+ return None
529
+
530
+ try:
531
+ return SubstantiveCandidate(
532
+ repo=repo,
533
+ category=category,
534
+ target_artifact=target_artifact,
535
+ evidence_refs=tuple(evidence_refs),
536
+ proposed_action=_CATEGORY_TO_ACTION[category],
537
+ subcategory=subcategory or "",
538
+ venture=target.get("venture", "delimit"),
539
+ fingerprint=target.get("fingerprint", "") or "",
540
+ )
541
+ except ValueError as exc:
542
+ logger.warning(
543
+ "build_candidate: construction failed for fingerprint=%s: %s",
544
+ target.get("fingerprint"), exc,
545
+ )
546
+ return None
547
+
548
+
549
+ # ---------------------------------------------------------------------------
550
+ # Dispatch wrapper
551
+ # ---------------------------------------------------------------------------
552
+
553
+
554
+ OUTREACH_SUBSTANTIVE_TASK_TYPE = "outreach_substantive"
555
+
556
+
557
+ def dispatch_substantive_outreach(
558
+ candidate: SubstantiveCandidate,
559
+ target: Dict[str, Any],
560
+ ledger_item_id: str = "",
561
+ ) -> Dict[str, Any]:
562
+ """Dispatch a substantive outreach task — only fires on a real payload.
563
+
564
+ The payload is the :class:`SubstantiveCandidate` — its construction
565
+ has already enforced that every required evidence field is present.
566
+ The task_type ``outreach_substantive`` is distinct from the legacy
567
+ ``outreach`` type (which still serves reddit / x branches) so a
568
+ regression that tries to dispatch a non-substantive github task on
569
+ the old type does not silently route to the new agent.
570
+
571
+ The agent that picks up this task is expected to call
572
+ ``delimit_substantive_content_check`` BEFORE submitting any draft
573
+ body, and ``delimit_external_pr_check`` BEFORE submitting if the
574
+ action is ``pr``. Those gates live in :mod:`ai.server`.
575
+ """
576
+ if not isinstance(candidate, SubstantiveCandidate):
577
+ # Belt-and-suspenders: the dataclass cannot be constructed
578
+ # without the required fields, but a caller might still pass
579
+ # a stray dict. Refuse rather than coerce.
580
+ raise TypeError(
581
+ "dispatch_substantive_outreach requires a SubstantiveCandidate "
582
+ f"instance, got {type(candidate).__name__}"
583
+ )
584
+
585
+ # Late-bound import to keep the foundation module light and the
586
+ # cyclic-import surface clean.
587
+ from ai.agent_dispatch import dispatch_task, link_ledger_item
588
+
589
+ constraints = [
590
+ "no-deploy", "no-secrets", "no-destructive",
591
+ "shift-1-quiet-attraction",
592
+ "must-call-delimit_substantive_content_check-before-submit",
593
+ ]
594
+ if candidate.proposed_action == "pr":
595
+ constraints.append("must-call-delimit_external_pr_check-before-submit")
596
+
597
+ tools_needed = [
598
+ "delimit_substantive_content_check",
599
+ "delimit_sensor_github_issue",
600
+ ]
601
+ if candidate.proposed_action == "pr":
602
+ tools_needed.append("delimit_external_pr_check")
603
+
604
+ variables: Dict[str, Any] = {
605
+ "candidate": candidate.to_dict(),
606
+ "venture": candidate.venture,
607
+ "repo": candidate.repo,
608
+ "category": candidate.category,
609
+ "subcategory": candidate.subcategory,
610
+ "target_artifact": candidate.target_artifact,
611
+ "evidence_refs": list(candidate.evidence_refs),
612
+ "proposed_action": candidate.proposed_action,
613
+ "source_url": target.get("canonical_url", ""),
614
+ "source_fingerprint": candidate.fingerprint,
615
+ "author": target.get("author", ""),
616
+ "rationale": target.get("rationale", ""),
617
+ }
618
+
619
+ title = (
620
+ f"[{candidate.venture.upper()}] Substantive {candidate.proposed_action} "
621
+ f"on {candidate.repo} ({candidate.category})"
622
+ )
623
+
624
+ description = (
625
+ "Substantive-outreach task (LED-2214b architecture).\n"
626
+ f"Repo: {candidate.repo}\n"
627
+ f"Category: {candidate.category}"
628
+ f"{' / ' + candidate.subcategory if candidate.subcategory else ''}\n"
629
+ f"Action: {candidate.proposed_action}\n"
630
+ f"Target: {candidate.target_artifact}\n"
631
+ f"Evidence: {', '.join(candidate.evidence_refs)}\n"
632
+ "\n"
633
+ "SHIFT-1 constraints:\n"
634
+ " - Pseudonymous account only; no founder identity.\n"
635
+ " - Real technical contribution only. No 'we built' / 'our tool' / "
636
+ "'btw try' framing. Never name our own product in the body.\n"
637
+ " - delimit_substantive_content_check is MANDATORY pre-submit.\n"
638
+ " - delimit_external_pr_check is MANDATORY when proposed_action='pr'.\n"
639
+ )
640
+
641
+ context = (
642
+ "Substantive autonomous outreach via the LED-2214b architecture. "
643
+ "The pseudonymous-substantive-contribution carve-out (CLAUDE.md SHIFT-1, "
644
+ "2026-05-04) permits this provided the activity is a genuine technical "
645
+ "contribution. The pre-submit gate stack enforces that. If the gate "
646
+ "blocks, file the rejection reason on the linked ledger item and stop."
647
+ )
648
+
649
+ result = dispatch_task(
650
+ title=title,
651
+ description=description,
652
+ assignee="any",
653
+ priority="P1",
654
+ tools_needed=tools_needed,
655
+ constraints=constraints,
656
+ context=context,
657
+ task_type=OUTREACH_SUBSTANTIVE_TASK_TYPE,
658
+ venture=candidate.venture,
659
+ variables=variables,
660
+ external_key=(
661
+ f"outreach_substantive:{candidate.fingerprint}"
662
+ if candidate.fingerprint
663
+ else f"outreach_substantive:{candidate.repo}:{candidate.target_artifact}"
664
+ ),
665
+ )
666
+ task_id = result.get("task_id", "")
667
+ if task_id and ledger_item_id:
668
+ try:
669
+ link_ledger_item(task_id, ledger_item_id)
670
+ except Exception as exc: # link is best-effort
671
+ logger.warning(
672
+ "dispatch_substantive_outreach: link_ledger_item failed "
673
+ "task=%s ledger=%s err=%s",
674
+ task_id, ledger_item_id, exc,
675
+ )
676
+ return result