delimit-cli 4.6.0 → 4.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +71 -8
- package/bin/delimit-cli.js +59 -9
- package/bin/delimit-setup.js +7 -3
- package/gateway/ai/agent_dispatch.py +5 -0
- package/gateway/ai/backends/gateway_core.py +6 -0
- package/gateway/ai/backends/git_health.py +175 -0
- package/gateway/ai/backends/memory_bridge.py +210 -53
- package/gateway/ai/backends/tools_infra.py +93 -0
- package/gateway/ai/backends/tools_real.py +53 -7
- package/gateway/ai/cli_contract.py +185 -0
- package/gateway/ai/governance.py +181 -0
- package/gateway/ai/heartbeat.py +290 -0
- package/gateway/ai/ledger_manager.py +81 -4
- package/gateway/ai/ledger_proof.py +127 -0
- package/gateway/ai/license.py +132 -47
- package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
- package/gateway/ai/license_core.pyi +1 -1
- package/gateway/ai/outreach_loop_daemon.py +349 -0
- package/gateway/ai/outreach_substantive.py +768 -7
- package/gateway/ai/pro_tools.yaml +167 -0
- package/gateway/ai/reddit_scanner.py +7 -1
- package/gateway/ai/server.py +295 -116
- package/gateway/ai/session_phoenix.py +121 -0
- package/gateway/ai/social_queue.py +166 -10
- package/gateway/ai/tenant_auth.py +329 -0
- package/gateway/ai/tenant_data.py +339 -0
- package/gateway/ai/tenant_paths.py +150 -0
- package/gateway/core/diff_engine_v2.py +517 -54
- package/gateway/core/semver_classifier.py +52 -6
- package/package.json +4 -1
- package/scripts/build-license-core.sh +0 -85
- package/scripts/security-check.sh +0 -66
- package/scripts/test-license-core-so.sh +0 -107
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
Implements the autonomous-github-outreach architecture ratified by the
|
|
4
4
|
2026-05-11 deliberation (A1 + Codex payload amendment, B3 + Claude reg-O
|
|
5
|
-
target-side veto, C1 single-responsibility daemon). Transcript
|
|
6
|
-
|
|
5
|
+
target-side veto, C1 single-responsibility daemon). Transcript stored
|
|
6
|
+
privately.
|
|
7
7
|
|
|
8
8
|
The three SHIFT-1 holes this module closes:
|
|
9
9
|
|
|
@@ -42,14 +42,63 @@ that ticks scanner → file ledger → dispatch.
|
|
|
42
42
|
|
|
43
43
|
from __future__ import annotations
|
|
44
44
|
|
|
45
|
+
import json as _json
|
|
45
46
|
import logging
|
|
47
|
+
import os as _os
|
|
46
48
|
import re
|
|
49
|
+
import subprocess as _subprocess
|
|
50
|
+
import time as _time
|
|
47
51
|
from dataclasses import asdict, dataclass, field
|
|
52
|
+
from pathlib import Path as _Path
|
|
48
53
|
from typing import Any, Dict, List, Optional, Tuple
|
|
49
54
|
|
|
50
55
|
logger = logging.getLogger("delimit.ai.outreach_substantive")
|
|
51
56
|
|
|
52
57
|
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# LED-2266: env-configurable thresholds for the outreach gate stack.
|
|
60
|
+
#
|
|
61
|
+
# Each defense layer has a default value chosen during initial deployment
|
|
62
|
+
# (PR #179 anti-spam, PR #180 engagement-floor). Operators can tune any
|
|
63
|
+
# of them via env var without code changes — useful for trying tighter
|
|
64
|
+
# thresholds on a new venture, or loosening when scanner yield is low.
|
|
65
|
+
#
|
|
66
|
+
# Defaults are conservative: they reproduce the PR-as-shipped behavior
|
|
67
|
+
# when no env var is set. The lookup helpers below are the single source
|
|
68
|
+
# of truth — module constants below resolve through them at import time
|
|
69
|
+
# so each threshold is documented in one place.
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _env_int(name: str, default: int, minimum: int = 0) -> int:
|
|
74
|
+
"""Read an int env var; fall back to `default` on missing/invalid.
|
|
75
|
+
|
|
76
|
+
Enforces `minimum` (e.g. >=1 for caps) to reject zero/negative
|
|
77
|
+
overrides that would silently disable a defense. Logs at WARNING
|
|
78
|
+
when an override is applied OR rejected so operators can see what
|
|
79
|
+
the engine is actually using.
|
|
80
|
+
"""
|
|
81
|
+
raw = _os.environ.get(name, "").strip()
|
|
82
|
+
if not raw:
|
|
83
|
+
return default
|
|
84
|
+
try:
|
|
85
|
+
value = int(raw)
|
|
86
|
+
except ValueError:
|
|
87
|
+
logger.warning(
|
|
88
|
+
"config: %s=%r is not an integer — using default=%d", name, raw, default,
|
|
89
|
+
)
|
|
90
|
+
return default
|
|
91
|
+
if value < minimum:
|
|
92
|
+
logger.warning(
|
|
93
|
+
"config: %s=%d below floor %d — using default=%d",
|
|
94
|
+
name, value, minimum, default,
|
|
95
|
+
)
|
|
96
|
+
return default
|
|
97
|
+
if value != default:
|
|
98
|
+
logger.warning("config: %s overridden default=%d -> %d", name, default, value)
|
|
99
|
+
return value
|
|
100
|
+
|
|
101
|
+
|
|
53
102
|
# ---------------------------------------------------------------------------
|
|
54
103
|
# Constants — keep these auditable. Edits require panel deliberation per
|
|
55
104
|
# the CLAUDE.md SHIFT-1 constitutional binding.
|
|
@@ -232,10 +281,19 @@ def is_banking_adjacent(target: Dict[str, Any]) -> Tuple[bool, str]:
|
|
|
232
281
|
+ ``repo_description`` if present). Match is substring + case
|
|
233
282
|
insensitive on the lowercased haystack.
|
|
234
283
|
|
|
284
|
+
LED-2265: also checks the org/username portion of the canonical URL
|
|
285
|
+
for typo-squat impersonation of known regulated entities (e.g.
|
|
286
|
+
``JPM0RCHASE`` for ``jpmorgan``, ``g0ldman`` for ``goldman``). The
|
|
287
|
+
raw keyword pass above misses these because the user-facing string
|
|
288
|
+
isn't a banking-noun; the impersonation IS the signal. Defense in
|
|
289
|
+
depth — the substantive engagement path should never land on a
|
|
290
|
+
spoofed-bank account regardless of the repo's content topic.
|
|
291
|
+
|
|
235
292
|
The first-match-wins return makes the logged reason actionable
|
|
236
|
-
("matched 'broker-dealer' in repo_description"
|
|
237
|
-
|
|
238
|
-
|
|
293
|
+
("matched 'broker-dealer' in repo_description" or "matched
|
|
294
|
+
typosquat:jpmorgan in author=JPM0RCHASE"). Callers should treat any
|
|
295
|
+
True return as a hard veto — no override path exists at the scanner
|
|
296
|
+
layer, by design.
|
|
239
297
|
"""
|
|
240
298
|
haystack_parts: List[str] = []
|
|
241
299
|
for key in (
|
|
@@ -251,9 +309,110 @@ def is_banking_adjacent(target: Dict[str, Any]) -> Tuple[bool, str]:
|
|
|
251
309
|
for kw in BANKING_ADJACENT_KEYWORDS:
|
|
252
310
|
if kw in haystack:
|
|
253
311
|
return True, kw
|
|
312
|
+
|
|
313
|
+
# LED-2265: typo-squat impersonation of known regulated orgs.
|
|
314
|
+
typosquat = _is_typosquat_impersonation(target)
|
|
315
|
+
if typosquat:
|
|
316
|
+
return True, f"typosquat:{typosquat}"
|
|
317
|
+
|
|
254
318
|
return False, ""
|
|
255
319
|
|
|
256
320
|
|
|
321
|
+
# LED-2265: known-regulated-entity org names. Used by the typo-squat
|
|
322
|
+
# impersonation check below. Names are lowercased and stored without
|
|
323
|
+
# common suffixes (`-bank`, `-chase`, etc.). Conservative list — false
|
|
324
|
+
# positives cost zero (we just don't engage), false negatives risk
|
|
325
|
+
# substantive engagement with a malicious impersonator.
|
|
326
|
+
_KNOWN_REGULATED_ORGS: Tuple[str, ...] = (
|
|
327
|
+
# Tier-1 US banks
|
|
328
|
+
"jpmorgan", "jpmorganchase", "chase", "goldman", "goldmansachs",
|
|
329
|
+
"morganstanley", "citi", "citigroup", "citibank",
|
|
330
|
+
"bankofamerica", "bofa", "wellsfargo", "usbank", "pnc", "truist",
|
|
331
|
+
"capitalone",
|
|
332
|
+
# Foreign G-SIBs
|
|
333
|
+
"hsbc", "barclays", "deutschebank", "credit-suisse", "creditsuisse",
|
|
334
|
+
"ubs", "santander", "bnpparibas", "societegenerale", "ing", "lloyds",
|
|
335
|
+
# US clearing / capital markets
|
|
336
|
+
"blackrock", "vanguard", "fidelity", "schwab", "interactive-brokers",
|
|
337
|
+
"interactivebrokers", "nyse", "nasdaq",
|
|
338
|
+
# Crypto / fintech with bank rails
|
|
339
|
+
"coinbase", "kraken", "circle", "tether", "binance",
|
|
340
|
+
# Card networks
|
|
341
|
+
"visa", "mastercard", "amex", "americanexpress",
|
|
342
|
+
# Regulators
|
|
343
|
+
"fdic", "occ", "frb", "federalreserve", "finra", "secgov",
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
# LED-2265: simple homoglyph map for digit-for-letter substitutions.
|
|
348
|
+
# Keys are digits commonly used as letter substitutes; values are the
|
|
349
|
+
# letter they impersonate. Asymmetric on purpose (we transform a
|
|
350
|
+
# candidate username INTO a likely impersonated name, then compare).
|
|
351
|
+
_HOMOGLYPH_DIGITS: Dict[str, str] = {
|
|
352
|
+
"0": "o", "1": "i", "3": "e", "4": "a", "5": "s", "7": "t",
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _normalize_for_typosquat(name: str) -> str:
|
|
357
|
+
"""Lowercase + strip non-alphanumeric + map digits to letters via the
|
|
358
|
+
homoglyph table. ``JPM0RCHASE`` → ``jpmorchase`` (after step 1) →
|
|
359
|
+
``jpmorchase`` (digits absent). Used both for the candidate org name
|
|
360
|
+
and as the comparison target — but the comparison list is built
|
|
361
|
+
from raw _KNOWN_REGULATED_ORGS (already letters only), so the
|
|
362
|
+
homoglyph step does the work."""
|
|
363
|
+
alphanum = re.sub(r"[^a-z0-9]", "", name.lower())
|
|
364
|
+
return "".join(_HOMOGLYPH_DIGITS.get(c, c) for c in alphanum)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _is_typosquat_impersonation(target: Dict[str, Any]) -> str:
|
|
368
|
+
"""Return the matched known-org name if the target's author/org/repo
|
|
369
|
+
appears to impersonate a regulated entity via digit-for-letter
|
|
370
|
+
substitution. Returns "" if no impersonation suspected.
|
|
371
|
+
|
|
372
|
+
Checks BOTH the github username AND the repo-name segment. Real
|
|
373
|
+
JPMorgan engagement would be ``jpmorganchase/<repo>`` — anything
|
|
374
|
+
matching the impersonation pattern that ISN'T the canonical org is
|
|
375
|
+
flagged.
|
|
376
|
+
"""
|
|
377
|
+
# Collect the candidate name parts: author (github username) and the
|
|
378
|
+
# owner/name segment of the canonical_url.
|
|
379
|
+
candidates: List[str] = []
|
|
380
|
+
author = target.get("author") or ""
|
|
381
|
+
if isinstance(author, str) and author:
|
|
382
|
+
candidates.append(author)
|
|
383
|
+
url = target.get("canonical_url") or ""
|
|
384
|
+
if isinstance(url, str) and url:
|
|
385
|
+
m = re.match(r"^https?://github\.com/([^/]+)/([^/?#]+)", url)
|
|
386
|
+
if m:
|
|
387
|
+
candidates.append(m.group(1)) # org/user
|
|
388
|
+
candidates.append(m.group(2)) # repo name
|
|
389
|
+
fp = target.get("fingerprint") or ""
|
|
390
|
+
if isinstance(fp, str) and fp:
|
|
391
|
+
m = re.match(r"^github:[^:]+:([^/:]+)(?:/([^:]+))?", fp)
|
|
392
|
+
if m:
|
|
393
|
+
candidates.append(m.group(1))
|
|
394
|
+
if m.group(2):
|
|
395
|
+
candidates.append(m.group(2))
|
|
396
|
+
|
|
397
|
+
for cand in candidates:
|
|
398
|
+
# Only digit-bearing candidates can be homoglyph typosquats.
|
|
399
|
+
# A pure-letter username like ``goldman`` would either be the
|
|
400
|
+
# legit org (caught by BANKING_ADJACENT_KEYWORDS keyword pass)
|
|
401
|
+
# or some other case (e.g. ``goldman-recipes``) where we don't
|
|
402
|
+
# have positive evidence of impersonation intent. Digits are
|
|
403
|
+
# the disambiguator.
|
|
404
|
+
if not any(c.isdigit() for c in cand):
|
|
405
|
+
continue
|
|
406
|
+
normalized = _normalize_for_typosquat(cand)
|
|
407
|
+
if not normalized:
|
|
408
|
+
continue
|
|
409
|
+
for org in _KNOWN_REGULATED_ORGS:
|
|
410
|
+
if org in normalized:
|
|
411
|
+
return org
|
|
412
|
+
|
|
413
|
+
return ""
|
|
414
|
+
|
|
415
|
+
|
|
257
416
|
# ---------------------------------------------------------------------------
|
|
258
417
|
# Technical-anchor extraction + content gate
|
|
259
418
|
# ---------------------------------------------------------------------------
|
|
@@ -466,6 +625,496 @@ _CATEGORY_TO_ACTION = {
|
|
|
466
625
|
}
|
|
467
626
|
|
|
468
627
|
|
|
628
|
+
# ---------------------------------------------------------------------------
|
|
629
|
+
# Issue-body fetch + cache (LED-2214b followup)
|
|
630
|
+
#
|
|
631
|
+
# The scanner truncates issue bodies to 200 chars before they reach the
|
|
632
|
+
# substantive gate (see ai/social_target.py:_scan_github phase 2). 200
|
|
633
|
+
# chars covers the title + opening summary but almost always strips the
|
|
634
|
+
# tail where anchors live — stack traces, file paths in error messages,
|
|
635
|
+
# references to other issues/commits. Result: every issue target gets
|
|
636
|
+
# rejected as no-anchor even when the issue body is anchor-rich.
|
|
637
|
+
#
|
|
638
|
+
# This block fetches the FULL issue body + first N comments via gh CLI
|
|
639
|
+
# when the snippet-derived extraction comes up empty. Per-issue 7-day
|
|
640
|
+
# disk cache; daily tick at max_dispatch=3 means worst-case ~3 API calls
|
|
641
|
+
# per day after cache warms.
|
|
642
|
+
# ---------------------------------------------------------------------------
|
|
643
|
+
|
|
644
|
+
_ISSUE_BODY_CACHE_DIR = _Path.home() / ".delimit" / "cache" / "outreach_issue_bodies"
|
|
645
|
+
# LED-2266: env-overridable via DELIMIT_OUTREACH_ISSUE_BODY_CACHE_TTL_S.
|
|
646
|
+
# Default 7 days. Minimum 60s (don't disable caching outright; would
|
|
647
|
+
# spam the github api on every tick).
|
|
648
|
+
_ISSUE_BODY_CACHE_TTL_S = _env_int(
|
|
649
|
+
"DELIMIT_OUTREACH_ISSUE_BODY_CACHE_TTL_S", 7 * 24 * 3600, minimum=60,
|
|
650
|
+
)
|
|
651
|
+
_ISSUE_COMMENTS_FETCH_LIMIT = 5
|
|
652
|
+
_GH_API_TIMEOUT_S = 30
|
|
653
|
+
_ISSUE_FP_RE = re.compile(r"^github:issue:([^/:]+/[^/:]+):(\d+)$")
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
def _issue_fp_parts(fingerprint: str) -> Optional[Tuple[str, int]]:
|
|
657
|
+
"""Extract (repo, issue_number) from a ``github:issue:owner/name:N`` fp.
|
|
658
|
+
|
|
659
|
+
Returns None for any non-issue fingerprint, so callers can use the
|
|
660
|
+
None return as the "skip body fetch" signal.
|
|
661
|
+
"""
|
|
662
|
+
m = _ISSUE_FP_RE.match(fingerprint or "")
|
|
663
|
+
if not m:
|
|
664
|
+
return None
|
|
665
|
+
try:
|
|
666
|
+
return m.group(1), int(m.group(2))
|
|
667
|
+
except (TypeError, ValueError):
|
|
668
|
+
return None
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def _issue_cache_path(repo: str, number: int) -> _Path:
|
|
672
|
+
safe = repo.replace("/", "__")
|
|
673
|
+
return _ISSUE_BODY_CACHE_DIR / f"{safe}_{number}.json"
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def _read_cached_issue_body(repo: str, number: int) -> Optional[str]:
|
|
677
|
+
"""Return cached full-text or None if missing/expired/corrupt."""
|
|
678
|
+
cache_file = _issue_cache_path(repo, number)
|
|
679
|
+
if not cache_file.exists():
|
|
680
|
+
return None
|
|
681
|
+
try:
|
|
682
|
+
data = _json.loads(cache_file.read_text())
|
|
683
|
+
except (OSError, ValueError):
|
|
684
|
+
return None
|
|
685
|
+
ts = data.get("ts")
|
|
686
|
+
if not isinstance(ts, (int, float)) or _time.time() - ts > _ISSUE_BODY_CACHE_TTL_S:
|
|
687
|
+
return None
|
|
688
|
+
body = data.get("body")
|
|
689
|
+
return body if isinstance(body, str) else None
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def _write_cached_issue_body(repo: str, number: int, body: str) -> None:
|
|
693
|
+
"""Persist fetched body. Best-effort — silent on disk failure."""
|
|
694
|
+
try:
|
|
695
|
+
_ISSUE_BODY_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
696
|
+
_issue_cache_path(repo, number).write_text(
|
|
697
|
+
_json.dumps({"ts": _time.time(), "body": body})
|
|
698
|
+
)
|
|
699
|
+
except OSError as exc:
|
|
700
|
+
logger.warning(
|
|
701
|
+
"issue-body cache write failed for %s#%d: %s", repo, number, exc,
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
_RATE_LIMIT_KILL_FILE = _Path.home() / ".delimit" / "outreach_pause"
|
|
706
|
+
_RATE_LIMIT_SIGNATURES = (
|
|
707
|
+
"rate limit", "rate-limit", "secondary rate",
|
|
708
|
+
"403", "abuse detection", "too many requests",
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def _maybe_halt_on_rate_limit(endpoint: str, stderr: str) -> None:
|
|
713
|
+
"""LED-2214b followup — defensive halt when github signals rate
|
|
714
|
+
limit / abuse-detection / forbidden. Writes the kill-switch file
|
|
715
|
+
AND ntfys (priority=5). The daemon's pre-import kill-switch check
|
|
716
|
+
will then short-circuit subsequent ticks until the file is removed.
|
|
717
|
+
|
|
718
|
+
Best-effort: silent on any failure. The halt is defense in depth —
|
|
719
|
+
if it doesn't fire here, the rate limit's own retry-after backoff
|
|
720
|
+
handles the immediate request, but future ticks would still hit
|
|
721
|
+
the same limit. The halt-on-warning pattern protects the account
|
|
722
|
+
from escalation (warning -> hard block -> ban)."""
|
|
723
|
+
if not stderr:
|
|
724
|
+
return
|
|
725
|
+
sl = stderr.lower()
|
|
726
|
+
if not any(sig in sl for sig in _RATE_LIMIT_SIGNATURES):
|
|
727
|
+
return
|
|
728
|
+
try:
|
|
729
|
+
_RATE_LIMIT_KILL_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
730
|
+
_RATE_LIMIT_KILL_FILE.write_text(
|
|
731
|
+
f"halted by _maybe_halt_on_rate_limit at "
|
|
732
|
+
f"{_time.strftime('%Y-%m-%dT%H:%M:%SZ', _time.gmtime())}\n"
|
|
733
|
+
f"endpoint: {endpoint}\n"
|
|
734
|
+
f"stderr: {stderr[:400]}\n"
|
|
735
|
+
)
|
|
736
|
+
logger.error(
|
|
737
|
+
"outreach RATE LIMIT detected — wrote kill-switch %s "
|
|
738
|
+
"(endpoint=%s)", _RATE_LIMIT_KILL_FILE, endpoint,
|
|
739
|
+
)
|
|
740
|
+
except OSError as exc:
|
|
741
|
+
logger.error(
|
|
742
|
+
"outreach rate-limit halt failed to write kill-switch: %s", exc,
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
def _gh_api_call(endpoint: str) -> Any:
|
|
747
|
+
"""Call ``gh api <endpoint>`` and return parsed JSON or None on failure.
|
|
748
|
+
|
|
749
|
+
Local copy of the same idiom in ai.social_target — duplicated to keep
|
|
750
|
+
this module importable without pulling in the much larger
|
|
751
|
+
social_target dependency graph.
|
|
752
|
+
|
|
753
|
+
On any 403 / 429 / rate-limit signature in stderr, writes the
|
|
754
|
+
kill-switch file so subsequent daemon ticks short-circuit. See
|
|
755
|
+
_maybe_halt_on_rate_limit.
|
|
756
|
+
"""
|
|
757
|
+
try:
|
|
758
|
+
proc = _subprocess.run(
|
|
759
|
+
["gh", "api", endpoint],
|
|
760
|
+
capture_output=True,
|
|
761
|
+
text=True,
|
|
762
|
+
timeout=_GH_API_TIMEOUT_S,
|
|
763
|
+
)
|
|
764
|
+
except (_subprocess.TimeoutExpired, FileNotFoundError) as exc:
|
|
765
|
+
logger.warning("gh api %s failed: %s", endpoint, exc)
|
|
766
|
+
return None
|
|
767
|
+
if proc.returncode != 0:
|
|
768
|
+
# LED-2214b followup: halt the outreach daemon on rate-limit
|
|
769
|
+
# signatures BEFORE returning. Defense in depth against escalating
|
|
770
|
+
# github enforcement (warn -> block -> ban).
|
|
771
|
+
_maybe_halt_on_rate_limit(endpoint, proc.stderr or "")
|
|
772
|
+
logger.info(
|
|
773
|
+
"gh api %s returned %d: %s",
|
|
774
|
+
endpoint, proc.returncode, (proc.stderr or "")[:160],
|
|
775
|
+
)
|
|
776
|
+
return None
|
|
777
|
+
try:
|
|
778
|
+
return _json.loads(proc.stdout)
|
|
779
|
+
except ValueError as exc:
|
|
780
|
+
logger.warning("gh api %s returned non-JSON: %s", endpoint, exc)
|
|
781
|
+
return None
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
# ---------------------------------------------------------------------------
|
|
785
|
+
# Engagement-floor check (LED-2214b followup, found 2026-05-17 when first
|
|
786
|
+
# autonomous engagement landed on a same-day-created 0-star 4-follower
|
|
787
|
+
# personal scratchpad). Substantive content gate passed (anchors were
|
|
788
|
+
# valid) but engagement value was near zero — no readership, no community.
|
|
789
|
+
#
|
|
790
|
+
# This block fetches lightweight repo metadata (1 gh api call, 7-day
|
|
791
|
+
# cached) and enforces a stars + age + not-archived + not-fork floor
|
|
792
|
+
# BEFORE the anchor check. Sits parallel to the existing repo-search
|
|
793
|
+
# filter in ai/social_target.py:_scan_github line 2024 ("stars == 0 and
|
|
794
|
+
# no description: continue") which only catches REPO targets — issue
|
|
795
|
+
# targets bypass it entirely, which was the gap.
|
|
796
|
+
#
|
|
797
|
+
# Fail-closed: if we can't fetch the metadata, we DON'T engage. Better
|
|
798
|
+
# to skip a real target than spam a maintainer on stale / missing data.
|
|
799
|
+
# ---------------------------------------------------------------------------
|
|
800
|
+
|
|
801
|
+
_REPO_META_CACHE_DIR = _Path.home() / ".delimit" / "cache" / "outreach_repo_meta"
|
|
802
|
+
# LED-2266: env-overridable engagement-floor thresholds.
|
|
803
|
+
# Defaults reproduce PR #180 shipped behavior. Floors enforce sanity
|
|
804
|
+
# (no zero or negative values that would silently disable the gate).
|
|
805
|
+
_REPO_META_CACHE_TTL_S = _env_int(
|
|
806
|
+
"DELIMIT_OUTREACH_REPO_META_CACHE_TTL_S", 7 * 24 * 3600, minimum=60,
|
|
807
|
+
)
|
|
808
|
+
_MIN_REPO_STARS = _env_int("DELIMIT_OUTREACH_MIN_STARS", 50, minimum=1)
|
|
809
|
+
_MIN_REPO_AGE_DAYS = _env_int("DELIMIT_OUTREACH_MIN_AGE_DAYS", 30, minimum=1)
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def _repo_meta_cache_path(repo: str) -> _Path:
|
|
813
|
+
safe = repo.replace("/", "__")
|
|
814
|
+
return _REPO_META_CACHE_DIR / f"{safe}.json"
|
|
815
|
+
|
|
816
|
+
|
|
817
|
+
def _read_cached_repo_meta(repo: str) -> Optional[Dict[str, Any]]:
|
|
818
|
+
cache_file = _repo_meta_cache_path(repo)
|
|
819
|
+
if not cache_file.exists():
|
|
820
|
+
return None
|
|
821
|
+
try:
|
|
822
|
+
data = _json.loads(cache_file.read_text())
|
|
823
|
+
except (OSError, ValueError):
|
|
824
|
+
return None
|
|
825
|
+
ts = data.get("_cached_ts")
|
|
826
|
+
if not isinstance(ts, (int, float)) or _time.time() - ts > _REPO_META_CACHE_TTL_S:
|
|
827
|
+
return None
|
|
828
|
+
meta = data.get("meta")
|
|
829
|
+
return meta if isinstance(meta, dict) else None
|
|
830
|
+
|
|
831
|
+
|
|
832
|
+
def _write_cached_repo_meta(repo: str, meta: Dict[str, Any]) -> None:
|
|
833
|
+
try:
|
|
834
|
+
_REPO_META_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
835
|
+
_repo_meta_cache_path(repo).write_text(
|
|
836
|
+
_json.dumps({"_cached_ts": _time.time(), "meta": meta})
|
|
837
|
+
)
|
|
838
|
+
except OSError as exc:
|
|
839
|
+
logger.warning("repo-meta cache write failed for %s: %s", repo, exc)
|
|
840
|
+
|
|
841
|
+
|
|
842
|
+
def fetch_repo_metadata(repo: str) -> Optional[Dict[str, Any]]:
|
|
843
|
+
"""Fetch lightweight repo metadata via ``gh api repos/{repo}``.
|
|
844
|
+
Cached 7 days. Returns dict with stargazers_count / forks_count /
|
|
845
|
+
open_issues_count / created_at / archived / fork / description /
|
|
846
|
+
pushed_at / owner_login, or None on any failure (caller fails closed)."""
|
|
847
|
+
cached = _read_cached_repo_meta(repo)
|
|
848
|
+
if cached is not None:
|
|
849
|
+
return cached
|
|
850
|
+
data = _gh_api_call(f"repos/{repo}")
|
|
851
|
+
if not isinstance(data, dict):
|
|
852
|
+
# Don't poison cache with None — repo may exist on next attempt
|
|
853
|
+
return None
|
|
854
|
+
owner_obj = data.get("owner") or {}
|
|
855
|
+
meta = {
|
|
856
|
+
"stargazers_count": data.get("stargazers_count", 0),
|
|
857
|
+
"forks_count": data.get("forks_count", 0),
|
|
858
|
+
"open_issues_count": data.get("open_issues_count", 0),
|
|
859
|
+
"created_at": data.get("created_at", ""),
|
|
860
|
+
"pushed_at": data.get("pushed_at", ""),
|
|
861
|
+
"archived": bool(data.get("archived", False)),
|
|
862
|
+
"fork": bool(data.get("fork", False)),
|
|
863
|
+
"description": data.get("description") or "",
|
|
864
|
+
# LED-2214b followup: owner login lets the engagement-floor veto
|
|
865
|
+
# owner-authored issues / PRs. Most owner-authored items are
|
|
866
|
+
# internal chore/release artifacts (today's audit queue had 4 of
|
|
867
|
+
# 5 real candidates in this class) — engagement value near zero.
|
|
868
|
+
"owner_login": owner_obj.get("login", "") if isinstance(owner_obj, dict) else "",
|
|
869
|
+
}
|
|
870
|
+
_write_cached_repo_meta(repo, meta)
|
|
871
|
+
return meta
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
# LED-2214b followup: per-issue state cache. Lighter than fetch_issue_full_text
|
|
875
|
+
# (which pulls body + comments) — we only need the state field. Separate cache
|
|
876
|
+
# because issue state changes more often than repo metadata, so shorter TTL.
|
|
877
|
+
_ISSUE_STATE_CACHE_TTL_S = 6 * 3600 # 6h: catches "open then closed same day"
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
def _issue_state_cache_path(repo: str, number: int) -> _Path:
|
|
881
|
+
safe = repo.replace("/", "__")
|
|
882
|
+
return _ISSUE_BODY_CACHE_DIR / f"{safe}_{number}__state.json"
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
def _read_cached_issue_state(repo: str, number: int) -> Optional[str]:
|
|
886
|
+
cf = _issue_state_cache_path(repo, number)
|
|
887
|
+
if not cf.exists():
|
|
888
|
+
return None
|
|
889
|
+
try:
|
|
890
|
+
data = _json.loads(cf.read_text())
|
|
891
|
+
except (OSError, ValueError):
|
|
892
|
+
return None
|
|
893
|
+
ts = data.get("_cached_ts")
|
|
894
|
+
if not isinstance(ts, (int, float)) or _time.time() - ts > _ISSUE_STATE_CACHE_TTL_S:
|
|
895
|
+
return None
|
|
896
|
+
state = data.get("state")
|
|
897
|
+
return state if isinstance(state, str) else None
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def _write_cached_issue_state(repo: str, number: int, state: str) -> None:
|
|
901
|
+
try:
|
|
902
|
+
_ISSUE_BODY_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
903
|
+
_issue_state_cache_path(repo, number).write_text(
|
|
904
|
+
_json.dumps({"_cached_ts": _time.time(), "state": state})
|
|
905
|
+
)
|
|
906
|
+
except OSError as exc:
|
|
907
|
+
logger.warning(
|
|
908
|
+
"issue-state cache write failed for %s#%d: %s", repo, number, exc,
|
|
909
|
+
)
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
def fetch_issue_state(repo: str, number: int) -> Optional[str]:
|
|
913
|
+
"""Return current github issue/PR state ('open' / 'closed') or None
|
|
914
|
+
on fetch failure. Cached 6h. Fail-closed: callers treating None as
|
|
915
|
+
'don't engage' is correct (we can't verify the target is live)."""
|
|
916
|
+
cached = _read_cached_issue_state(repo, number)
|
|
917
|
+
if cached is not None:
|
|
918
|
+
return cached
|
|
919
|
+
data = _gh_api_call(f"repos/{repo}/issues/{number}")
|
|
920
|
+
if not isinstance(data, dict):
|
|
921
|
+
return None
|
|
922
|
+
state = data.get("state")
|
|
923
|
+
if isinstance(state, str) and state:
|
|
924
|
+
_write_cached_issue_state(repo, number, state)
|
|
925
|
+
return state
|
|
926
|
+
return None
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
def _repo_age_days(created_at: str) -> Optional[float]:
|
|
930
|
+
"""Parse ISO timestamp and return age in days. None on parse failure."""
|
|
931
|
+
if not created_at:
|
|
932
|
+
return None
|
|
933
|
+
try:
|
|
934
|
+
# Strip fractional seconds + Z suffix
|
|
935
|
+
clean = created_at.replace("Z", "").split(".")[0]
|
|
936
|
+
epoch = _time.mktime(_time.strptime(clean, "%Y-%m-%dT%H:%M:%S")) - _time.timezone
|
|
937
|
+
except (ValueError, TypeError):
|
|
938
|
+
return None
|
|
939
|
+
return (_time.time() - epoch) / 86400.0
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
def check_engagement_floor(repo: str) -> Tuple[bool, str]:
|
|
943
|
+
"""Apply the engagement-worthiness floor.
|
|
944
|
+
|
|
945
|
+
Returns (passes, reason). On failure, reason is a short tag the
|
|
946
|
+
caller logs: ``stars<50:3`` / ``age_days<30:0.4`` / ``archived`` /
|
|
947
|
+
``fork`` / ``no_metadata``. Tunable thresholds: _MIN_REPO_STARS,
|
|
948
|
+
_MIN_REPO_AGE_DAYS.
|
|
949
|
+
"""
|
|
950
|
+
meta = fetch_repo_metadata(repo)
|
|
951
|
+
if meta is None:
|
|
952
|
+
return False, "no_metadata"
|
|
953
|
+
if meta.get("archived"):
|
|
954
|
+
return False, "archived"
|
|
955
|
+
if meta.get("fork"):
|
|
956
|
+
return False, "fork"
|
|
957
|
+
stars = meta.get("stargazers_count", 0) or 0
|
|
958
|
+
if stars < _MIN_REPO_STARS:
|
|
959
|
+
return False, f"stars<{_MIN_REPO_STARS}:{stars}"
|
|
960
|
+
age = _repo_age_days(meta.get("created_at", ""))
|
|
961
|
+
if age is not None and age < _MIN_REPO_AGE_DAYS:
|
|
962
|
+
return False, f"age_days<{_MIN_REPO_AGE_DAYS}:{age:.1f}"
|
|
963
|
+
return True, "ok"
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
def fetch_issue_full_text(repo: str, number: int) -> str:
|
|
967
|
+
"""Fetch issue body + first N comments concatenated.
|
|
968
|
+
|
|
969
|
+
Cached for 7 days. Returns "" on any failure — the caller treats
|
|
970
|
+
empty string as 'no anchors available' which correctly blocks
|
|
971
|
+
dispatch (defense in depth; we never accidentally dispatch on a
|
|
972
|
+
target whose substantive evidence we couldn't actually fetch).
|
|
973
|
+
|
|
974
|
+
Public surface (no underscore prefix) so tests + callers can
|
|
975
|
+
monkeypatch without depending on the private cache helpers.
|
|
976
|
+
"""
|
|
977
|
+
cached = _read_cached_issue_body(repo, number)
|
|
978
|
+
if cached is not None:
|
|
979
|
+
return cached
|
|
980
|
+
|
|
981
|
+
issue = _gh_api_call(f"repos/{repo}/issues/{number}")
|
|
982
|
+
if not isinstance(issue, dict):
|
|
983
|
+
_write_cached_issue_body(repo, number, "")
|
|
984
|
+
return ""
|
|
985
|
+
parts: List[str] = []
|
|
986
|
+
body = issue.get("body")
|
|
987
|
+
if isinstance(body, str) and body:
|
|
988
|
+
parts.append(body)
|
|
989
|
+
|
|
990
|
+
comments = _gh_api_call(
|
|
991
|
+
f"repos/{repo}/issues/{number}/comments?per_page={_ISSUE_COMMENTS_FETCH_LIMIT}"
|
|
992
|
+
)
|
|
993
|
+
if isinstance(comments, list):
|
|
994
|
+
for c in comments[:_ISSUE_COMMENTS_FETCH_LIMIT]:
|
|
995
|
+
if isinstance(c, dict):
|
|
996
|
+
cb = c.get("body")
|
|
997
|
+
if isinstance(cb, str) and cb:
|
|
998
|
+
parts.append(cb)
|
|
999
|
+
|
|
1000
|
+
full = "\n\n".join(parts)
|
|
1001
|
+
_write_cached_issue_body(repo, number, full)
|
|
1002
|
+
return full
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
# ---------------------------------------------------------------------------
|
|
1006
|
+
# Anti-spam — protect the operating account from github enforcement
|
|
1007
|
+
# ---------------------------------------------------------------------------
|
|
1008
|
+
#
|
|
1009
|
+
# Three hard limits on top of the per-tick spam firewall
|
|
1010
|
+
# (DEFAULT_MAX_DISPATCH=3) in the daemon:
|
|
1011
|
+
#
|
|
1012
|
+
# 1. Per-repo cooldown: don't dispatch on a repo we already dispatched
|
|
1013
|
+
# to within the last _DISPATCH_COOLDOWN_DAYS days. Avoids the
|
|
1014
|
+
# "scanner finds 3 issues on the SAME repo in one tick + we
|
|
1015
|
+
# engage on all of them = swarm" failure mode.
|
|
1016
|
+
# 2. Per-day global cap: refuse dispatch once we've crossed
|
|
1017
|
+
# _MAX_DISPATCHES_PER_DAY in the rolling 24-hour window. Catches
|
|
1018
|
+
# multiple-tick scenarios (manual run + scheduled run + retry)
|
|
1019
|
+
# that would multiply the per-tick cap.
|
|
1020
|
+
# 3. Halt on rate-limit (in _gh_api_call): if gh api returns 403/429,
|
|
1021
|
+
# write the kill-switch file and ntfy. GitHub typically warns
|
|
1022
|
+
# before banning; respecting that warning protects the account.
|
|
1023
|
+
#
|
|
1024
|
+
# The dispatch log at _DISPATCH_LOG is the source of truth for #1 and #2.
|
|
1025
|
+
# It's append-only JSONL; each successful dispatch_substantive_outreach
|
|
1026
|
+
# call writes one line.
|
|
1027
|
+
|
|
1028
|
+
_DISPATCH_LOG = _Path.home() / ".delimit" / "state" / "outreach-dispatch-log.jsonl"
|
|
1029
|
+
# LED-2266: env-overridable anti-spam thresholds (PR #179 follow-up
|
|
1030
|
+
# panel-flagged). Defaults reproduce shipped behavior. Floors enforce
|
|
1031
|
+
# sanity (minimum=1 — zero would silently disable the spam protection).
|
|
1032
|
+
_DISPATCH_COOLDOWN_DAYS = _env_int("DELIMIT_OUTREACH_COOLDOWN_DAYS", 7, minimum=1)
|
|
1033
|
+
_MAX_DISPATCHES_PER_DAY = _env_int("DELIMIT_OUTREACH_MAX_PER_DAY", 5, minimum=1)
|
|
1034
|
+
|
|
1035
|
+
|
|
1036
|
+
def _read_dispatch_log() -> List[Dict[str, Any]]:
|
|
1037
|
+
"""Return all dispatch log entries (newest first). Empty on missing/
|
|
1038
|
+
unreadable. Best-effort — never raises."""
|
|
1039
|
+
if not _DISPATCH_LOG.exists():
|
|
1040
|
+
return []
|
|
1041
|
+
try:
|
|
1042
|
+
out: List[Dict[str, Any]] = []
|
|
1043
|
+
for line in _DISPATCH_LOG.read_text().splitlines():
|
|
1044
|
+
line = line.strip()
|
|
1045
|
+
if not line:
|
|
1046
|
+
continue
|
|
1047
|
+
try:
|
|
1048
|
+
out.append(_json.loads(line))
|
|
1049
|
+
except ValueError:
|
|
1050
|
+
continue
|
|
1051
|
+
out.sort(key=lambda r: r.get("ts", ""), reverse=True)
|
|
1052
|
+
return out
|
|
1053
|
+
except OSError as exc:
|
|
1054
|
+
logger.warning("dispatch log read failed: %s", exc)
|
|
1055
|
+
return []
|
|
1056
|
+
|
|
1057
|
+
|
|
1058
|
+
def _record_dispatch(repo: str, fingerprint: str, category: str) -> None:
|
|
1059
|
+
"""Append one entry to the dispatch log. Best-effort — silent on
|
|
1060
|
+
disk failure (dispatch must not crash because logging broke)."""
|
|
1061
|
+
try:
|
|
1062
|
+
_DISPATCH_LOG.parent.mkdir(parents=True, exist_ok=True)
|
|
1063
|
+
entry = {
|
|
1064
|
+
"ts": _time.strftime("%Y-%m-%dT%H:%M:%SZ", _time.gmtime()),
|
|
1065
|
+
"repo": repo,
|
|
1066
|
+
"fingerprint": fingerprint,
|
|
1067
|
+
"category": category,
|
|
1068
|
+
}
|
|
1069
|
+
with _DISPATCH_LOG.open("a") as f:
|
|
1070
|
+
f.write(_json.dumps(entry) + "\n")
|
|
1071
|
+
except OSError as exc:
|
|
1072
|
+
logger.warning("dispatch log write failed: %s", exc)
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
def _check_per_repo_cooldown(repo: str, now: float | None = None) -> Optional[str]:
|
|
1076
|
+
"""Return cooldown-expiry ISO string if repo is in cooldown, else None.
|
|
1077
|
+
|
|
1078
|
+
`now` is overridable for tests. Defaults to current UTC epoch.
|
|
1079
|
+
"""
|
|
1080
|
+
if not repo:
|
|
1081
|
+
return None
|
|
1082
|
+
if now is None:
|
|
1083
|
+
now = _time.time()
|
|
1084
|
+
cutoff = now - (_DISPATCH_COOLDOWN_DAYS * 86400)
|
|
1085
|
+
for entry in _read_dispatch_log():
|
|
1086
|
+
if (entry.get("repo") or "").strip().lower() != repo.strip().lower():
|
|
1087
|
+
continue
|
|
1088
|
+
ts = entry.get("ts", "")
|
|
1089
|
+
try:
|
|
1090
|
+
entry_epoch = _time.mktime(_time.strptime(ts, "%Y-%m-%dT%H:%M:%SZ")) - _time.timezone
|
|
1091
|
+
except (ValueError, TypeError):
|
|
1092
|
+
continue
|
|
1093
|
+
if entry_epoch >= cutoff:
|
|
1094
|
+
# Compute cooldown-expiry as entry_ts + cooldown_days
|
|
1095
|
+
expires_epoch = entry_epoch + (_DISPATCH_COOLDOWN_DAYS * 86400)
|
|
1096
|
+
return _time.strftime("%Y-%m-%dT%H:%M:%SZ", _time.gmtime(expires_epoch))
|
|
1097
|
+
return None
|
|
1098
|
+
|
|
1099
|
+
|
|
1100
|
+
def _check_per_day_cap(now: float | None = None) -> int:
|
|
1101
|
+
"""Return count of dispatches in the rolling 24h window. Caller
|
|
1102
|
+
checks against _MAX_DISPATCHES_PER_DAY."""
|
|
1103
|
+
if now is None:
|
|
1104
|
+
now = _time.time()
|
|
1105
|
+
cutoff = now - 86400
|
|
1106
|
+
count = 0
|
|
1107
|
+
for entry in _read_dispatch_log():
|
|
1108
|
+
ts = entry.get("ts", "")
|
|
1109
|
+
try:
|
|
1110
|
+
entry_epoch = _time.mktime(_time.strptime(ts, "%Y-%m-%dT%H:%M:%SZ")) - _time.timezone
|
|
1111
|
+
except (ValueError, TypeError):
|
|
1112
|
+
continue
|
|
1113
|
+
if entry_epoch >= cutoff:
|
|
1114
|
+
count += 1
|
|
1115
|
+
return count
|
|
1116
|
+
|
|
1117
|
+
|
|
469
1118
|
def build_candidate_from_github_target(
|
|
470
1119
|
target: Dict[str, Any],
|
|
471
1120
|
category: str,
|
|
@@ -507,9 +1156,108 @@ def build_candidate_from_github_target(
|
|
|
507
1156
|
logger.info("build_candidate: unmapped category=%s", category)
|
|
508
1157
|
return None
|
|
509
1158
|
|
|
1159
|
+
# LED-2214b followup (founder's Niklas-Flaig observation 2026-05-17):
|
|
1160
|
+
# engagement-floor check BEFORE the anchor extraction + body fetch so
|
|
1161
|
+
# we don't pay the per-issue API cost on a target that's a 0-star
|
|
1162
|
+
# personal scratchpad. Existing repo-search filter in social_target
|
|
1163
|
+
# catches `stars==0 AND no description` for repo targets only; issue
|
|
1164
|
+
# targets bypassed it entirely (the gap this closes).
|
|
1165
|
+
floor_ok, floor_reason = check_engagement_floor(repo)
|
|
1166
|
+
if not floor_ok:
|
|
1167
|
+
logger.info(
|
|
1168
|
+
"build_candidate: engagement floor fingerprint=%s repo=%s reason=%s",
|
|
1169
|
+
target.get("fingerprint"), repo, floor_reason,
|
|
1170
|
+
)
|
|
1171
|
+
return None
|
|
1172
|
+
|
|
1173
|
+
# LED-2214b followup (2026-05-17 audit-queue observation): 4 of 7
|
|
1174
|
+
# dispatched tasks today were owner-authored (chore PRs, dev→main
|
|
1175
|
+
# promotions, internal scout reports). Engagement value near zero —
|
|
1176
|
+
# the owner is doing their own work, not seeking community input.
|
|
1177
|
+
# Repo metadata fetch above already populated owner_login; compare
|
|
1178
|
+
# directly to target's author. Cheap check.
|
|
1179
|
+
repo_meta = fetch_repo_metadata(repo)
|
|
1180
|
+
if repo_meta is not None:
|
|
1181
|
+
owner_login = (repo_meta.get("owner_login") or "").strip().lower()
|
|
1182
|
+
target_author = (target.get("author") or "").strip().lower()
|
|
1183
|
+
if owner_login and target_author and owner_login == target_author:
|
|
1184
|
+
logger.info(
|
|
1185
|
+
"build_candidate: owner-authored target fingerprint=%s "
|
|
1186
|
+
"author=%s == owner=%s",
|
|
1187
|
+
target.get("fingerprint"), target_author, owner_login,
|
|
1188
|
+
)
|
|
1189
|
+
return None
|
|
1190
|
+
|
|
1191
|
+
# LED-2214b followup (2026-05-17 audit-queue observation): 3 of 7
|
|
1192
|
+
# dispatched tasks today were on CLOSED issues. Engaging on a closed
|
|
1193
|
+
# thread is noise — the decision is already made. Cheap state check
|
|
1194
|
+
# before paying the body-fetch cost. Only applies to issue targets;
|
|
1195
|
+
# repo targets don't have a state in this sense.
|
|
1196
|
+
fp_parts_state = _issue_fp_parts(target.get("fingerprint", ""))
|
|
1197
|
+
if fp_parts_state is not None:
|
|
1198
|
+
state = fetch_issue_state(fp_parts_state[0], fp_parts_state[1])
|
|
1199
|
+
if state is None:
|
|
1200
|
+
# Fail-closed: can't verify the issue is live → skip
|
|
1201
|
+
logger.info(
|
|
1202
|
+
"build_candidate: issue state unverifiable fingerprint=%s",
|
|
1203
|
+
target.get("fingerprint"),
|
|
1204
|
+
)
|
|
1205
|
+
return None
|
|
1206
|
+
if state != "open":
|
|
1207
|
+
logger.info(
|
|
1208
|
+
"build_candidate: issue state=%s (not open) fingerprint=%s",
|
|
1209
|
+
state, target.get("fingerprint"),
|
|
1210
|
+
)
|
|
1211
|
+
return None
|
|
1212
|
+
|
|
1213
|
+
# LED-2214b followup — anti-spam protection for the operating account.
|
|
1214
|
+
# These checks run AFTER the banking veto + repo-resolve + category
|
|
1215
|
+
# check (so we don't burden the dispatch log with rejected targets
|
|
1216
|
+
# that wouldn't have dispatched anyway) but BEFORE the anchor
|
|
1217
|
+
# extraction + body fetch (so cool-down catches re-targeting on
|
|
1218
|
+
# repos we recently engaged with without paying the API cost to
|
|
1219
|
+
# re-fetch their issue body).
|
|
1220
|
+
|
|
1221
|
+
cooldown_expires = _check_per_repo_cooldown(repo)
|
|
1222
|
+
if cooldown_expires:
|
|
1223
|
+
logger.info(
|
|
1224
|
+
"build_candidate: per-repo cooldown fingerprint=%s repo=%s "
|
|
1225
|
+
"expires=%s",
|
|
1226
|
+
target.get("fingerprint"), repo, cooldown_expires,
|
|
1227
|
+
)
|
|
1228
|
+
return None
|
|
1229
|
+
|
|
1230
|
+
today_count = _check_per_day_cap()
|
|
1231
|
+
if today_count >= _MAX_DISPATCHES_PER_DAY:
|
|
1232
|
+
logger.warning(
|
|
1233
|
+
"build_candidate: per-day cap hit fingerprint=%s "
|
|
1234
|
+
"today_count=%d cap=%d",
|
|
1235
|
+
target.get("fingerprint"), today_count, _MAX_DISPATCHES_PER_DAY,
|
|
1236
|
+
)
|
|
1237
|
+
return None
|
|
1238
|
+
|
|
510
1239
|
snippet = target.get("content_snippet", "") or ""
|
|
511
1240
|
rationale = target.get("rationale", "") or ""
|
|
512
1241
|
anchors = extract_technical_anchors(f"{snippet}\n{rationale}")
|
|
1242
|
+
|
|
1243
|
+
# LED-2214b followup: if the snippet didn't yield anchors AND this is
|
|
1244
|
+
# an issue target, fetch the full issue body + first N comments and
|
|
1245
|
+
# re-extract. The scanner truncates issue bodies to 200 chars (see
|
|
1246
|
+
# ai/social_target.py:_scan_github phase 2) which almost always
|
|
1247
|
+
# strips the part where anchors live. Fetch is cached 7 days per
|
|
1248
|
+
# issue (see fetch_issue_full_text). On any fetch failure the
|
|
1249
|
+
# function returns "" which leaves anchors unchanged → still blocks.
|
|
1250
|
+
fp_parts = _issue_fp_parts(target.get("fingerprint", ""))
|
|
1251
|
+
needs_body_fetch = fp_parts is not None and not any(
|
|
1252
|
+
anchors.get(k) for k in ("issues", "spec_paths", "cves", "commits", "file_paths")
|
|
1253
|
+
)
|
|
1254
|
+
if needs_body_fetch:
|
|
1255
|
+
body = fetch_issue_full_text(fp_parts[0], fp_parts[1])
|
|
1256
|
+
if body:
|
|
1257
|
+
anchors = extract_technical_anchors(
|
|
1258
|
+
f"{snippet}\n{rationale}\n{body}"
|
|
1259
|
+
)
|
|
1260
|
+
|
|
513
1261
|
evidence_refs: List[str] = []
|
|
514
1262
|
for key in ("issues", "spec_paths", "cves", "commits", "file_paths"):
|
|
515
1263
|
for ref in anchors.get(key, []):
|
|
@@ -518,8 +1266,9 @@ def build_candidate_from_github_target(
|
|
|
518
1266
|
evidence_refs.append(label)
|
|
519
1267
|
if not evidence_refs:
|
|
520
1268
|
logger.info(
|
|
521
|
-
"build_candidate: no_technical_anchor fingerprint=%s category=%s"
|
|
522
|
-
|
|
1269
|
+
"build_candidate: no_technical_anchor fingerprint=%s category=%s "
|
|
1270
|
+
"(body_fetched=%s)",
|
|
1271
|
+
target.get("fingerprint"), category, needs_body_fetch,
|
|
523
1272
|
)
|
|
524
1273
|
return None
|
|
525
1274
|
|
|
@@ -673,4 +1422,16 @@ def dispatch_substantive_outreach(
|
|
|
673
1422
|
"task=%s ledger=%s err=%s",
|
|
674
1423
|
task_id, ledger_item_id, exc,
|
|
675
1424
|
)
|
|
1425
|
+
|
|
1426
|
+
# LED-2214b followup — record the dispatch for per-repo cooldown +
|
|
1427
|
+
# per-day cap. Append-only JSONL; subsequent build_candidate calls
|
|
1428
|
+
# read this log via _check_per_repo_cooldown / _check_per_day_cap.
|
|
1429
|
+
# Best-effort; logging failures must not crash a successful dispatch.
|
|
1430
|
+
if task_id:
|
|
1431
|
+
_record_dispatch(
|
|
1432
|
+
repo=candidate.repo,
|
|
1433
|
+
fingerprint=candidate.fingerprint,
|
|
1434
|
+
category=candidate.category,
|
|
1435
|
+
)
|
|
1436
|
+
|
|
676
1437
|
return result
|