npm - delimit-cli - Versions diffs - 4.1.53 → 4.3.0 - Mend

delimit-cli 4.1.53 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/CHANGELOG.md +26 -0
package/README.md +34 -3
package/bin/delimit-cli.js +150 -2
package/bin/delimit-setup.js +22 -7
package/gateway/ai/agent_dispatch.py +79 -0
package/gateway/ai/daily_digest.py +386 -0
package/gateway/ai/ledger_manager.py +32 -0
package/gateway/ai/license_core.py +2 -0
package/gateway/ai/notify.py +17 -11
package/gateway/ai/reddit_proxy.py +28 -9
package/gateway/ai/sensing/__init__.py +35 -0
package/gateway/ai/sensing/schema.py +107 -0
package/gateway/ai/sensing/signal_store.py +348 -0
package/gateway/ai/server.py +419 -6
package/gateway/ai/supabase_sync.py +308 -0
package/gateway/ai/work_order.py +216 -0
package/gateway/ai/workers/__init__.py +32 -0
package/gateway/ai/workers/base.py +154 -0
package/gateway/ai/workers/executor.py +861 -0
package/gateway/ai/workers/outreach_drafter.py +161 -0
package/gateway/ai/workers/pr_drafter.py +148 -0
package/lib/ai-sbom-engine.js +154 -0
package/lib/trust-page-engine.js +179 -0
package/lib/wrap-engine.js +431 -0
package/package.json +14 -1
package/adapters/codex-security.js +0 -64
package/adapters/codex-skill.js +0 -78
package/adapters/cursor-rules.js +0 -73
package/gateway/ai/continuity.py +0 -462
package/gateway/ai/inbox_daemon_runner.py +0 -217
package/gateway/ai/loop_engine.py +0 -1303
package/gateway/ai/social_cache.py +0 -341
package/gateway/ai/social_daemon.py +0 -483
package/gateway/ai/tweet_corpus_schema.sql +0 -76
package/scripts/crosspost_devto.py +0 -304
package/scripts/demo-v420-clean.sh +0 -267
package/scripts/demo-v420-deliberation.sh +0 -217
package/scripts/demo-v420.sh +0 -55
package/scripts/sync-gateway.sh +0 -112

package/gateway/ai/daily_digest.py ADDED Viewed

@@ -0,0 +1,386 @@
+"""Daily digest for the Delimit autonomous loop (LED-966).
+Produces a structured summary of the last 24h:
+  - Cycle count (sense-only daemon ticks)
+  - Signals ingested (count by platform)
+  - Deliberations held (count + transcript refs)
+  - Ledger deltas (items opened, in_progress, done)
+  - Agent dispatches (by assignee, status)
+  - Pending approvals (drafts awaiting founder)
+  - Critical events (errors, timeouts, guard trips)
+Writes:
+  - ~/.delimit/digest/digest-YYYY-MM-DD.md (file artifact, always)
+  - ~/.delimit/digest/digest-YYYY-MM-DD.json (machine-readable)
+  - Email to founder (if DELIMIT_DIGEST_EMAIL=true AND email pipeline healthy)
+Call via MCP: delimit_digest(action="run") or scheduled cron.
+"""
+from __future__ import annotations
+import json
+import time
+from collections import Counter
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+DIGEST_DIR = Path.home() / ".delimit" / "digest"
+LEDGER_DIR = Path.home() / ".delimit" / "ledger"
+DELIB_DIR = Path.home() / ".delimit" / "deliberations"
+SIGNALS_DIR = Path.home() / ".delimit" / "intel" / "signals"
+AGENTS_FILE = Path.home() / ".delimit" / "agents" / "tasks.json"
+def _now() -> datetime:
+    return datetime.now(timezone.utc)
+def _ensure_dir():
+    DIGEST_DIR.mkdir(parents=True, exist_ok=True)
+def _count_signals(since: datetime) -> Dict[str, Any]:
+    """Count signals ingested in the window, grouped by platform."""
+    counts: Counter = Counter()
+    total = 0
+    if not SIGNALS_DIR.exists():
+        return {"total": 0, "by_platform": {}}
+    for shard in SIGNALS_DIR.glob("*.jsonl"):
+        if shard.name.startswith("_"):
+            continue
+        try:
+            shard_date = datetime.fromisoformat(shard.stem).date()
+        except ValueError:
+            continue
+        if shard_date < since.date():
+            continue
+        try:
+            for line in shard.read_text().splitlines():
+                if not line.strip():
+                    continue
+                try:
+                    row = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                try:
+                    ts = datetime.fromisoformat(row.get("ingested_at", "").replace("Z", "+00:00"))
+                except Exception:
+                    continue
+                if ts < since:
+                    continue
+                counts[row.get("platform", "?")] += 1
+                total += 1
+        except OSError:
+            continue
+    return {"total": total, "by_platform": dict(counts.most_common())}
+def _count_deliberations(since: datetime) -> Dict[str, Any]:
+    """Count deliberation transcripts created in the window."""
+    if not DELIB_DIR.exists():
+        return {"total": 0, "unanimous": 0, "no_consensus": 0, "recent": []}
+    total = 0
+    unanimous = 0
+    no_consensus = 0
+    recent = []
+    for f in sorted(DELIB_DIR.glob("deliberation_*.json"), reverse=True)[:50]:
+        try:
+            mtime = datetime.fromtimestamp(f.stat().st_mtime, tz=timezone.utc)
+            if mtime < since:
+                continue
+            d = json.loads(f.read_text())
+            total += 1
+            verdict = (d.get("final_verdict") or "").upper()
+            if "UNANIMOUS" in verdict:
+                unanimous += 1
+            elif "NO CONSENSUS" in verdict or "MAX ROUNDS" in verdict:
+                no_consensus += 1
+            rounds_field = d.get("rounds", 0)
+            rounds_count = len(rounds_field) if isinstance(rounds_field, list) else rounds_field
+            recent.append({
+                "file": f.name,
+                "verdict": (d.get("final_verdict") or "?")[:60],
+                "status": d.get("status", "?"),
+                "rounds": rounds_count,
+            })
+        except Exception:
+            continue
+    return {
+        "total": total,
+        "unanimous": unanimous,
+        "no_consensus": no_consensus,
+        "recent": recent[:10],
+    }
+def _count_ledger_deltas(since: datetime) -> Dict[str, Any]:
+    """Count ledger items opened / updated / done in the window."""
+    opened = 0
+    done = 0
+    new_items = []
+    done_items = []
+    if not LEDGER_DIR.exists():
+        return {"opened": 0, "done": 0, "new": [], "completed": []}
+    since_iso = since.isoformat().replace("+00:00", "Z")
+    for lf in LEDGER_DIR.glob("*.jsonl"):
+        try:
+            for line in lf.read_text().splitlines():
+                if not line.strip():
+                    continue
+                try:
+                    item = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                created = item.get("created_at", "")
+                updated = item.get("updated_at", created)
+                if created >= since_iso and item.get("type") != "update":
+                    opened += 1
+                    new_items.append({
+                        "id": item.get("id"),
+                        "title": (item.get("title") or "")[:80],
+                        "priority": item.get("priority", "?"),
+                    })
+                if item.get("type") == "update" and item.get("status") == "done" and updated >= since_iso:
+                    done += 1
+                    done_items.append({
+                        "id": item.get("id"),
+                        "note": (item.get("note") or "")[:120],
+                    })
+        except OSError:
+            continue
+    return {
+        "opened": opened,
+        "done": done,
+        "new": new_items[-10:],
+        "completed": done_items[-10:],
+    }
+def _count_dispatches(since: datetime) -> Dict[str, Any]:
+    """Count swarm dispatches and their current status."""
+    if not AGENTS_FILE.exists():
+        return {"total": 0, "by_status": {}, "by_assignee": {}, "stuck_over_24h": 0}
+    try:
+        tasks = json.loads(AGENTS_FILE.read_text())
+    except Exception:
+        return {"total": 0, "by_status": {}, "by_assignee": {}, "stuck_over_24h": 0}
+    status_counts: Counter = Counter()
+    assignee_counts: Counter = Counter()
+    stuck = 0
+    dispatched_recent = 0
+    since_iso = since.isoformat().replace("+00:00", "Z")
+    for tid, task in tasks.items():
+        status = task.get("status", "?")
+        status_counts[status] += 1
+        if task.get("created_at", "") >= since_iso:
+            dispatched_recent += 1
+        if status == "dispatched":
+            assignee_counts[task.get("assignee", "?")] += 1
+            try:
+                created = datetime.fromisoformat(task.get("created_at", "").replace("Z", "+00:00"))
+                if (_now() - created) > timedelta(hours=24):
+                    stuck += 1
+            except Exception:
+                pass
+    return {
+        "total_tasks": len(tasks),
+        "dispatched_last_24h": dispatched_recent,
+        "by_status": dict(status_counts),
+        "dispatched_by_assignee": dict(assignee_counts),
+        "stuck_over_24h": stuck,
+    }
+def _check_health(since: datetime) -> Dict[str, Any]:
+    """Check for errors, guard trips, timeouts in the window."""
+    health = {
+        "pause_file_exists": (Path.home() / ".delimit" / "pause_dispatch").exists(),
+        "signal_guard_shadow_hits": 0,
+        "daemon_stopped": False,
+    }
+    # Signal guard shadow log
+    shadow = Path.home() / ".delimit" / "logs" / "signal_guard_shadow.jsonl"
+    if shadow.exists():
+        since_iso = since.isoformat().replace("+00:00", "Z")
+        try:
+            for line in shadow.read_text().splitlines():
+                if not line.strip():
+                    continue
+                try:
+                    row = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                if row.get("ts", "") >= since_iso:
+                    health["signal_guard_shadow_hits"] += 1
+        except OSError:
+            pass
+    return health
+def build_digest(window_hours: int = 24) -> Dict[str, Any]:
+    """Collect all signals for the last window_hours into a single digest dict."""
+    since = _now() - timedelta(hours=window_hours)
+    return {
+        "generated_at": _now().isoformat(),
+        "window_hours": window_hours,
+        "window_start": since.isoformat(),
+        "signals": _count_signals(since),
+        "deliberations": _count_deliberations(since),
+        "ledger": _count_ledger_deltas(since),
+        "dispatches": _count_dispatches(since),
+        "health": _check_health(since),
+    }
+def render_markdown(digest: Dict[str, Any]) -> str:
+    """Render the digest as a founder-readable markdown document."""
+    g = digest
+    s = g["signals"]
+    d = g["deliberations"]
+    l = g["ledger"]
+    dsp = g["dispatches"]
+    h = g["health"]
+    lines = [
+        f"# Delimit Daily Digest — {g['generated_at'][:10]}",
+        "",
+        f"Window: last {g['window_hours']}h (since {g['window_start'][:16]}Z)",
+        "",
+        "## Health",
+        "",
+        f"- Pause file: {'🔴 ACTIVE' if h['pause_file_exists'] else '🟢 clear'}",
+        f"- Signal guard shadow hits: {h['signal_guard_shadow_hits']}",
+        "",
+        "## Signals ingested",
+        "",
+        f"Total: **{s['total']}** signals",
+    ]
+    for platform, count in s.get("by_platform", {}).items():
+        lines.append(f"- {platform}: {count}")
+    lines.extend([
+        "",
+        "## Deliberations",
+        "",
+        f"- Total: **{d['total']}**",
+        f"- Unanimous: {d['unanimous']}",
+        f"- No consensus / max rounds: {d['no_consensus']}",
+    ])
+    if d.get("recent"):
+        lines.append("")
+        lines.append("Recent transcripts:")
+        for r in d["recent"]:
+            lines.append(f"  - `{r['file']}` — {r['verdict']} ({r.get('rounds', '?')} rounds)")
+    lines.extend([
+        "",
+        "## Ledger deltas",
+        "",
+        f"- Items opened: **{l['opened']}**",
+        f"- Items completed: **{l['done']}**",
+    ])
+    if l.get("new"):
+        lines.append("")
+        lines.append("New items:")
+        for item in l["new"]:
+            lines.append(f"  - {item['id']} [{item['priority']}] {item['title']}")
+    if l.get("completed"):
+        lines.append("")
+        lines.append("Completed:")
+        for item in l["completed"]:
+            lines.append(f"  - {item['id']} — {item['note']}")
+    lines.extend([
+        "",
+        "## Swarm dispatches",
+        "",
+        f"- Total tasks ever: {dsp['total_tasks']}",
+        f"- New dispatches last 24h: **{dsp['dispatched_last_24h']}**",
+        f"- Stuck (dispatched >24h): {dsp['stuck_over_24h']}",
+    ])
+    if dsp.get("by_status"):
+        lines.append("")
+        lines.append("By status:")
+        for status, count in dsp["by_status"].items():
+            lines.append(f"  - {status}: {count}")
+    if dsp.get("dispatched_by_assignee"):
+        lines.append("")
+        lines.append("Currently dispatched by assignee:")
+        for who, count in dsp["dispatched_by_assignee"].items():
+            lines.append(f"  - {who}: {count}")
+    lines.extend([
+        "",
+        "## Pending founder actions",
+        "",
+        f"- Stuck dispatches (need worker): {dsp['stuck_over_24h']}",
+        f"- Pause file present: {'yes' if h['pause_file_exists'] else 'no'}",
+        f"- Guard shadow hits (investigate if >0): {h['signal_guard_shadow_hits']}",
+        "",
+        "---",
+        f"Digest generated at {g['generated_at']}",
+    ])
+    return "\n".join(lines)
+def write_digest(window_hours: int = 24) -> Dict[str, str]:
+    """Generate the digest and write both markdown + json artifacts.
+    Returns paths to the created files so the founder can inspect them
+    from the interactive session even without email delivery.
+    """
+    _ensure_dir()
+    digest = build_digest(window_hours=window_hours)
+    date_slug = digest["generated_at"][:10]
+    md_path = DIGEST_DIR / f"digest-{date_slug}.md"
+    json_path = DIGEST_DIR / f"digest-{date_slug}.json"
+    md_path.write_text(render_markdown(digest))
+    json_path.write_text(json.dumps(digest, indent=2))
+    return {
+        "markdown_path": str(md_path),
+        "json_path": str(json_path),
+        "summary": f"{digest['signals']['total']} signals, {digest['deliberations']['total']} deliberations, {digest['ledger']['opened']} new ledger items, {digest['dispatches']['stuck_over_24h']} stuck dispatches",
+    }
+def send_digest_email(to: str = "", from_account: str = "pro@delimit.ai") -> Dict[str, Any]:
+    """Send the most recent digest via the notify pipeline.
+    Gated on environment: returns a no-op result when DMARC is missing
+    and email would be filtered. Set DELIMIT_DIGEST_EMAIL=true to force
+    send attempts regardless. The digest markdown is always written to
+    disk so the founder can inspect it from the interactive session.
+    """
+    import os
+    result = write_digest(window_hours=24)
+    md_path = Path(result["markdown_path"])
+    if not md_path.exists():
+        return {"error": "digest not written", "files": result}
+    send_enabled = os.environ.get("DELIMIT_DIGEST_EMAIL", "").lower() in ("true", "1", "yes")
+    if not send_enabled:
+        return {
+            "status": "skipped_email",
+            "reason": "DELIMIT_DIGEST_EMAIL not set to true; digest written to disk only",
+            "files": result,
+        }
+    try:
+        from ai.notify import send_notification
+        body = md_path.read_text()
+        send_notification(
+            channel="email",
+            message=body,
+            subject=f"[DIGEST] Delimit — {result['summary']}",
+            to=to or os.environ.get("DELIMIT_SMTP_TO", ""),
+            from_account=from_account,
+            event_type="daily_digest",
+        )
+        return {
+            "status": "sent",
+            "files": result,
+        }
+    except Exception as exc:
+        return {
+            "status": "send_failed",
+            "error": str(exc),
+            "files": result,
+        }

package/gateway/ai/ledger_manager.py CHANGED Viewed

@@ -202,7 +202,39 @@ def add_item(
     LED-189: Items can have acceptance_criteria (testable "done when" conditions).
     LED-190: Items can have context, tools_needed, and estimated_complexity
     for agent-executable task format.
+    LED-877: Signal guard — rejects source='social_scan' writes so sensed
+    observations cannot land in the ledger. Observations belong in the intel
+    signal store (ai/sensing/signal_store.py). Bypass via env var for the
+    promote_to_ledger path: _DELIMIT_SIGNAL_PROMOTED_BY=<who>.
     """
+    _src_norm = (source or "").strip().lower()
+    _promoted_by = os.environ.get("_DELIMIT_SIGNAL_PROMOTED_BY", "")
+    _guard_mode = os.environ.get("DELIMIT_SIGNAL_GUARD", "enforce").lower()
+    if _src_norm.startswith("social_scan") or _src_norm.startswith("social_strategy"):
+        if not _promoted_by:
+            msg = (
+                f"LED-877 guard: source={source!r} is a sensed observation, not "
+                f"a ledger item. Use ai.sensing.signal_store.ingest() instead. "
+                f"Promote explicitly via promote_to_ledger(signal_id=...)."
+            )
+            if _guard_mode == "shadow":
+                try:
+                    _shadow_log = Path.home() / ".delimit" / "logs" / "signal_guard_shadow.jsonl"
+                    _shadow_log.parent.mkdir(parents=True, exist_ok=True)
+                    with _shadow_log.open("a") as _f:
+                        _f.write(json.dumps({
+                            "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
+                            "title": title,
+                            "source": source,
+                            "ledger": ledger,
+                            "msg": msg,
+                        }) + "\n")
+                except Exception:
+                    pass
+                # fall through
+            else:
+                raise ValueError(msg)
     _ensure(project_path)
     venture = _detect_venture(project_path)
     ledger_dir = _project_ledger_dir(project_path)

package/gateway/ai/license_core.py CHANGED Viewed

@@ -37,6 +37,8 @@ PRO_TOOLS = frozenset({
     # Agent orchestration
     "delimit_agent_dispatch", "delimit_agent_status",
     "delimit_agent_complete", "delimit_agent_handoff",
+    # Worker Pool v2 executor (LED-981)
+    "delimit_executor",
 })
 # Free trial limits

package/gateway/ai/notify.py CHANGED Viewed

@@ -1044,17 +1044,23 @@ def _enforce_email_protocol(subject: str, message: str, event_type: str) -> tupl
     # 1. Subject must have a valid prefix bracket
     if not any(subject.startswith(p) for p in _VALID_SUBJECT_PREFIXES):
         # Try to infer from event_type
-        prefix_map = {
-            "social_draft": "[APPROVE]",
-            "outreach": "[OUTREACH]",
-            "deploy": "[DEPLOY]",
-            "gate_failure": "[ALERT]",
-            "digest": "[DIGEST]",
-            "info": "[INFO]",
-        }
-        prefix = prefix_map.get(event_type, "[INFO]")
-        subject = f"{prefix} {subject}"
-        warnings.append(f"Subject prefix added: {prefix}")
+        # LED-969: customer-facing emails should not get bracket prefixes.
+        # Any event_type starting with "customer_" is external-facing and
+        # the subject should be sent as-is (clean, professional).
+        if event_type and event_type.startswith("customer_"):
+            pass  # no prefix for customer emails
+        else:
+            prefix_map = {
+                "social_draft": "[APPROVE]",
+                "outreach": "[OUTREACH]",
+                "deploy": "[DEPLOY]",
+                "gate_failure": "[ALERT]",
+                "digest": "[DIGEST]",
+                "info": "[INFO]",
+            }
+            prefix = prefix_map.get(event_type, "[INFO]")
+            subject = f"{prefix} {subject}"
+            warnings.append(f"Subject prefix added: {prefix}")
     # 2. Check required sections for this event_type
     required = _EMAIL_PROTOCOL.get(event_type, [])

package/gateway/ai/reddit_proxy.py CHANGED Viewed

@@ -9,24 +9,35 @@ from typing import Any, Dict, List, Optional
 logger = logging.getLogger("delimit.ai.reddit_proxy")
 def _get_proxy_config() -> Dict[str, str]:
-    """Load proxy config from private secrets or environment."""
-    config = {"proxy_url": ""}
-    # 1. Check environment variable
+    """Load proxy config from private secrets or environment.
+    Returns {proxy_url, token}. The server-side proxy requires a bearer
+    token (LED-988 follow-up) — clients without a token still populate
+    proxy_url but will fail auth at the server unless the server is run
+    without a token (not recommended).
+    """
+    config = {"proxy_url": "", "token": ""}
+    # 1. Environment variables
     env_url = os.environ.get("DELIMIT_REDDIT_PROXY")
+    env_token = os.environ.get("DELIMIT_REDDIT_PROXY_TOKEN")
     if env_url:
         config["proxy_url"] = env_url
+    if env_token:
+        config["token"] = env_token
+    if config["proxy_url"]:
         return config
-    # 2. Check private secrets file
+    # 2. Secrets file
     secrets_path = Path.home() / ".delimit" / "secrets" / "reddit-proxy.json"
     if secrets_path.exists():
         try:
             secrets = json.loads(secrets_path.read_text())
-            config["proxy_url"] = secrets.get("proxy_url", "")
+            config["proxy_url"] = secrets.get("proxy_url", "") or config["proxy_url"]
+            config["token"] = secrets.get("token", "") or config["token"]
         except Exception as e:
             logger.debug(f"Failed to load reddit-proxy secrets: {e}")
     return config
 def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[Dict[str, Any]]:
@@ -42,7 +53,11 @@ def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[
     if proxy_url:
         try:
             fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
-            req = urllib.request.Request(fetch_url, headers={"User-Agent": "Delimit/1.0"})
+            headers = {"User-Agent": "Delimit/1.0"}
+            token = proxy_cfg.get("token", "")
+            if token:
+                headers["Authorization"] = f"Bearer {token}"
+            req = urllib.request.Request(fetch_url, headers=headers)
             with urllib.request.urlopen(req, timeout=10) as resp:
                 body = json.loads(resp.read().decode())
                 children = body.get("data", {}).get("children", [])
@@ -84,7 +99,11 @@ def fetch_thread(thread_id: str) -> Optional[Dict[str, Any]]:
     if proxy_url:
         try:
             fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
-            req = urllib.request.Request(fetch_url, headers={"User-Agent": "Delimit/1.0"})
+            headers = {"User-Agent": "Delimit/1.0"}
+            token = proxy_cfg.get("token", "")
+            if token:
+                headers["Authorization"] = f"Bearer {token}"
+            req = urllib.request.Request(fetch_url, headers=headers)
             with urllib.request.urlopen(req, timeout=10) as resp:
                 data = json.loads(resp.read().decode())
                 if isinstance(data, list) and len(data) > 0:

package/gateway/ai/sensing/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""Signal sensing layer (LED-877).
+Physically separates observational signals from the ledger. Signals are a
+deliberation corpus, not a task queue — they must never be pulled by
+build_loop as work. Import from ai.sensing.signal_store for ingest/query.
+"""
+from ai.sensing.schema import Signal, ValidationError, normalize_url, fingerprint_of
+from ai.sensing.signal_store import (
+    ingest,
+    query,
+    dedup_check,
+    age_out_to_warm,
+    freeze_cold,
+    promote_to_ledger,
+    SIGNALS_DIR,
+    HOT_WINDOW_DAYS,
+    WARM_WINDOW_DAYS,
+)
+__all__ = [
+    "Signal",
+    "ValidationError",
+    "normalize_url",
+    "fingerprint_of",
+    "ingest",
+    "query",
+    "dedup_check",
+    "age_out_to_warm",
+    "freeze_cold",
+    "promote_to_ledger",
+    "SIGNALS_DIR",
+    "HOT_WINDOW_DAYS",
+    "WARM_WINDOW_DAYS",
+]

package/gateway/ai/sensing/schema.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""Signal schema + validation (LED-877).
+A signal is an observation, not a commitment. Schema enforces enough metadata
+for deliberation to work with, rejects empty-identity rows at ingest (killing
+the LED-876 ghost-engage-task class of bug at its source).
+"""
+from __future__ import annotations
+import hashlib
+import re
+from dataclasses import dataclass, field, asdict
+from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode
+class ValidationError(ValueError):
+    """Raised when a signal fails schema validation on ingest."""
+_UTM_RE = re.compile(r"^utm_")
+def normalize_url(url: str) -> str:
+    """Canonicalize URL: strip utm_* query params, fragment, trailing slash."""
+    if not url:
+        return ""
+    try:
+        p = urlparse(url.strip())
+    except Exception:
+        return url.strip()
+    if not p.scheme:
+        return url.strip()
+    query = [(k, v) for k, v in parse_qsl(p.query) if not _UTM_RE.match(k)]
+    path = p.path.rstrip("/") or "/"
+    cleaned = urlunparse(
+        (p.scheme.lower(), p.netloc.lower(), path, "", urlencode(query), "")
+    )
+    return cleaned
+def fingerprint_of(platform: str, canonical_url: str, author: str) -> str:
+    """Stable dedup key for a signal."""
+    raw = f"{(platform or '').lower()}|{normalize_url(canonical_url)}|{(author or '').lower()}"
+    return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
+@dataclass
+class Signal:
+    """A sensed observation from an external platform.
+    Mandatory: canonical_url AND (author OR content_snippet).
+    Anything weaker than that is rejected at ingest because deliberation
+    cannot draw useful conclusions from a row with no identity.
+    """
+    fingerprint: str
+    platform: str
+    canonical_url: str
+    author: str = ""
+    author_handle: str = ""
+    content_snippet: str = ""
+    posted_at: str = ""
+    ingested_at: str = ""
+    classification: str = "signal"
+    relevance_score: float = 0.0
+    themes: List[str] = field(default_factory=list)
+    raw_ref: str = ""
+    id: str = ""
+    def to_dict(self) -> Dict[str, Any]:
+        return asdict(self)
+def validate_and_normalize(raw: Dict[str, Any]) -> Signal:
+    """Convert a raw target dict from social_target.py into a validated Signal.
+    Raises ValidationError on missing mandatory fields so bugs surface loudly
+    at ingest rather than producing empty-identity rows that pollute the
+    corpus (the LED-876 failure mode).
+    """
+    platform = (raw.get("platform") or "").strip()
+    canonical_url = normalize_url(raw.get("canonical_url") or raw.get("url") or "")
+    author = (raw.get("author") or "").strip()
+    content_snippet = (raw.get("content_snippet") or raw.get("title") or "").strip()[:500]
+    if not canonical_url:
+        raise ValidationError("canonical_url is required")
+    if not author and not content_snippet:
+        raise ValidationError("at least one of author or content_snippet is required")
+    if not platform:
+        raise ValidationError("platform is required")
+    return Signal(
+        fingerprint=fingerprint_of(platform, canonical_url, author),
+        platform=platform,
+        canonical_url=canonical_url,
+        author=author,
+        author_handle=(raw.get("author_handle") or "").strip(),
+        content_snippet=content_snippet,
+        posted_at=(raw.get("posted_at") or "").strip(),
+        ingested_at="",  # filled by signal_store.ingest
+        classification=(raw.get("classification") or "signal").strip(),
+        relevance_score=float(raw.get("relevance_score") or 0.0),
+        themes=list(raw.get("themes") or []),
+        raw_ref=(raw.get("raw_ref") or raw.get("source_url") or canonical_url).strip(),
+    )