npm - social-autoposter - Versions diffs - 1.6.48 → 1.6.49 - Mend

social-autoposter 1.6.48 → 1.6.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/mcp/dist/index.js +12 -0
package/package.json +1 -1
package/scripts/capture_thread_media.py +169 -0
package/scripts/engage_twitter_helper.py +28 -0
package/scripts/follow_gate_log.py +59 -0
package/scripts/harvest_twitter_following.py +237 -0
package/scripts/log_post.py +23 -0
package/scripts/log_thread_media.py +108 -0
package/scripts/scan_twitter_thread_followups.py +11 -0
package/scripts/score_twitter_candidates.py +77 -1
package/scripts/twitter_browser.py +292 -90
package/scripts/twitter_post_plan.py +50 -0
package/skill/engage-twitter.sh +2 -0
package/skill/refresh-twitter-following.sh +52 -0
package/skill/run-twitter-cycle.sh +36 -0

package/mcp/dist/index.js CHANGED Viewed

@@ -650,6 +650,18 @@ server.registerTool("draft_cycle", {
             batch_id: drafted.batchId,
             drafted: count,
             status: "awaiting_decision",
+            // Include the actual draft text here, not just a count. Some hosts
+            // (e.g. Claude Desktop) surface ONLY structuredContent to the model and
+            // drop the human-readable `content` table — which left the agent saying
+            // "drafted: 2" with no way to show the drafts. Carrying the drafts in
+            // structuredContent makes them available regardless of host behavior.
+            drafts: (plan.candidates || []).map((c, i) => ({
+                n: i + 1,
+                author: c.thread_author,
+                tweet_url: c.candidate_url,
+                reply_text: c.reply_text,
+                language: c.language,
+            })),
         },
     };
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "social-autoposter",
-  "version": "1.6.48",
+  "version": "1.6.49",
   "description": "Automated social posting pipeline for Reddit, X/Twitter, LinkedIn, and Moltbook. Install as a Claude Code agent skill.",
   "bin": {
     "social-autoposter": "bin/cli.js"

package/scripts/capture_thread_media.py ADDED Viewed

@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+"""Deterministically capture + persist + format thread media for the prep step.
+Companion to the main Twitter posting cycle (run-twitter-cycle.sh Phase 2b-prep,
+2026-06-03 thread-media feature). The prep prompt forbids the model from calling
+twitter_browser.py, so the SHELL pre-fetches the media of every candidate the
+model is about to draft against, in ONE cheap browser pass, then:
+  1. persists each candidate's media into twitter_candidates.thread_media (so the
+     record survives independent of the model), and
+  2. emits a "MEDIA CONTEXT" prompt block to stdout so the reply-writer can "see"
+     the image / video / GIF / link-card it is replying to instead of replying
+     text-blind.
+Input: a TSV file, one `candidate_id<TAB>tweet_url` per line (built by the
+CANDIDATE_BLOCK loop in run-twitter-cycle.sh).
+Media shape per item: {url, alt, type}, type in image|video|gif|card. An empty
+list [] is valid and meaningful ("captured, none found", distinct from NULL =
+"never captured").
+Usage:
+    python3 scripts/capture_thread_media.py --urls-file /tmp/urls.tsv \\
+        [--scroll 1] [--no-persist]
+Output:
+    stdout  -> the MEDIA CONTEXT prompt block (empty string if no media at all)
+    stderr  -> per-candidate diagnostics + a final JSON summary line
+"""
+import argparse
+import json
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from http_api import api_patch  # noqa: E402
+# Imported lazily inside main() so --help works without a browser / playwright.
+def _load_pairs(urls_file):
+    """Return [(candidate_id:str, url:str)] from a `cid<TAB>url` TSV file."""
+    pairs = []
+    with open(urls_file) as f:
+        for line in f:
+            line = line.rstrip("\n")
+            if not line.strip():
+                continue
+            if "\t" in line:
+                cid, url = line.split("\t", 1)
+            else:
+                # Tolerate a bare-URL line (no cid); skip it, we can't key it.
+                continue
+            cid = cid.strip()
+            url = url.strip()
+            if cid and url:
+                pairs.append((cid, url))
+    return pairs
+def _persist(candidate_id, media):
+    """Persist media onto twitter_candidates.thread_media via the set_media action."""
+    payload = {"id": int(candidate_id), "action": "set_media", "thread_media": media}
+    resp = api_patch(
+        "/api/v1/twitter-candidates/by-id", payload,
+        ok_on_conflict=True, ok_on_404=True,
+    )
+    if (resp or {}).get("_not_found"):
+        return False, "CANDIDATE_NOT_FOUND"
+    if not (resp or {}).get("ok"):
+        return False, (resp or {}).get("error") or "SET_MEDIA_FAILED"
+    return True, None
+def _format_item(item):
+    """One '  - <type>: "<alt>" (<url>)' line for the prompt block."""
+    t = (item.get("type") or "media").strip()
+    alt = (item.get("alt") or "").strip()
+    url = (item.get("url") or "").strip()
+    alt_part = f'"{alt}"' if alt else "[no description]"
+    return f"  - {t}: {alt_part} ({url})"
+def _build_block(captured):
+    """captured: list of (candidate_id, media_list). Returns prompt block str."""
+    sections = []
+    for cid, media in captured:
+        if not media:
+            continue
+        lines = "\n".join(_format_item(it) for it in media)
+        sections.append(f"Candidate {cid}:\n{lines}")
+    if not sections:
+        return ""
+    header = (
+        "## MEDIA IN THESE THREADS\n"
+        "Some candidate threads contain images, videos, GIFs, or link-cards. "
+        "This is part of the content you are replying to: react to what the tweet "
+        "VISUALLY shows, not just its text. A candidate NOT listed here had no "
+        "media (or capture was skipped); reply to its text as usual. Descriptions "
+        "marked [no description] mean the media had no alt-text, so infer from the "
+        "thread text and the media type."
+    )
+    return header + "\n\n" + "\n".join(sections) + "\n"
+def main():
+    p = argparse.ArgumentParser()
+    p.add_argument("--urls-file", required=True,
+                   help="TSV: one candidate_id<TAB>tweet_url per line.")
+    p.add_argument("--scroll", type=int, default=1,
+                   help="scroll_count passed to the batch scraper (default 1).")
+    p.add_argument("--no-persist", action="store_true",
+                   help="Skip writing thread_media to the DB (format only).")
+    args = p.parse_args()
+    pairs = _load_pairs(args.urls_file)
+    if not pairs:
+        # Nothing to do; emit empty block, exit clean so the shell continues.
+        print("", end="")
+        print(json.dumps({"captured": 0, "persisted": 0, "with_media": 0}), file=sys.stderr)
+        return
+    # Lazy import so an empty/short-circuit run never pays the playwright cost.
+    from twitter_browser import scrape_many_thread_media
+    urls = [url for _cid, url in pairs]
+    try:
+        batch = scrape_many_thread_media(urls, scroll_count=args.scroll)
+    except Exception as e:
+        # Browser failure must NOT break the cycle: emit empty block, log, exit 0.
+        print("", end="")
+        print(json.dumps({"error": "SCRAPE_FAILED", "detail": str(e)}), file=sys.stderr)
+        return
+    # Map url -> media (results echo the input url verbatim as thread_url).
+    by_url = {}
+    for r in (batch or {}).get("results", []):
+        by_url[r.get("thread_url")] = r.get("media") or []
+    captured = []          # (cid, media) for ALL pairs (media may be [])
+    persisted = 0
+    with_media = 0
+    for cid, url in pairs:
+        media = by_url.get(url, [])
+        captured.append((cid, media))
+        if media:
+            with_media += 1
+        if not args.no_persist:
+            ok, err = _persist(cid, media)
+            if ok:
+                persisted += 1
+            else:
+                print(f"[capture_thread_media] persist failed cid={cid}: {err}",
+                      file=sys.stderr)
+    block = _build_block(captured)
+    # stdout = the prompt block ONLY (shell captures it verbatim).
+    sys.stdout.write(block)
+    print(json.dumps({
+        "captured": len(captured),
+        "persisted": persisted,
+        "with_media": with_media,
+        "urls_visited": (batch or {}).get("urls_visited", 0),
+    }), file=sys.stderr)
+if __name__ == "__main__":
+    main()

package/scripts/engage_twitter_helper.py CHANGED Viewed

@@ -117,6 +117,33 @@ def cmd_reply_counts() -> int:
     return 0
+def _render_media_block(media) -> str:
+    """Render replies.their_media ([{url,alt,type}]) into a short, self-titled
+    text block for the Phase B prompt (2026-06-03 thread-media feature). Empty
+    string when the comment had no media (or media was never captured), so it
+    stays invisible in the embedded JSON for text-only comments.
+    """
+    if not isinstance(media, list) or not media:
+        return ""
+    lines = []
+    for it in media:
+        if not isinstance(it, dict):
+            continue
+        t = (it.get("type") or "media").strip()
+        alt = (it.get("alt") or "").strip()
+        url = (it.get("url") or "").strip()
+        alt_part = f'"{alt}"' if alt else "[no description]"
+        lines.append(f"  - {t}: {alt_part} ({url})")
+    if not lines:
+        return ""
+    return (
+        "## Media in the comment you are replying to\n"
+        "React to what it VISUALLY shows, not just the text. "
+        "[no description] = no alt-text; infer from the comment + media type.\n"
+        + "\n".join(lines)
+    )
 def cmd_pending_data(batch_size: int) -> int:
     try:
         from account_resolver import resolve as _resolve_account  # noqa: WPS433
@@ -204,6 +231,7 @@ def cmd_pending_data(batch_size: int) -> int:
             "is_our_original_post": int(r.get("is_our_original_post") or 0),
             "project_name": r.get("project_name"),
             "counterparty_history_block": history_block,
+            "their_media_block": _render_media_block(r.get("their_media")),
         })
     # json_agg(...) returns null when the array is empty; engage-twitter.sh's
     # downstream prompt-template expects an empty array instead, which is

package/scripts/follow_gate_log.py ADDED Viewed

@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+"""Dedicated, isolated logging for the Twitter follow-gate.
+The follow-gate in score_twitter_candidates.py drops candidate threads whose
+author we already follow. Its `[follow_gate]` stderr markers land in the giant
+mixed twitter-cycle log; this helper ALSO writes a clean, timestamped, greppable
+record to skill/logs/follow-gate.log so you can `tail -f` exactly what the filter
+loads and catches each cycle, without digging through 20MB of cycle output.
+All functions are best-effort: they NEVER raise, so logging can never break the
+fail-open gate. If the log can't be written, the gate proceeds silently.
+Line formats (one CYCLE line per scoring run, one SKIP line per dropped author):
+  <iso8601> <our_account> CYCLE loaded=<N> source=<ok|404|error|unresolved> checked=<M> skipped=<K> batch=<id>
+  <iso8601> <our_account> SKIP @<handle> url=<url> batch=<id>
+Read it with:  tail -f ~/social-autoposter/skill/logs/follow-gate.log
+"""
+from __future__ import annotations
+import os
+from datetime import datetime, timezone
+LOG_PATH = os.path.expanduser("~/social-autoposter/skill/logs/follow-gate.log")
+def _now() -> str:
+    try:
+        return datetime.now(timezone.utc).astimezone().strftime("%Y-%m-%dT%H:%M:%S%z")
+    except Exception:
+        return "?"
+def _append(line: str) -> None:
+    try:
+        os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
+        with open(LOG_PATH, "a") as fh:
+            fh.write(line.rstrip("\n") + "\n")
+    except Exception:
+        # Best-effort: never let logging break the fail-open gate.
+        pass
+def record_cycle(our_account, loaded, source, checked, skipped, batch_id=None) -> None:
+    """One line per scoring run: did the gate load the set (loaded>0, source=ok),
+    how many candidates it checked, and how many it skipped this run."""
+    _append(
+        f"{_now()} {our_account or '(unresolved)'} CYCLE "
+        f"loaded={loaded} source={source} checked={checked} "
+        f"skipped={skipped} batch={batch_id or '-'}"
+    )
+def record_skip(our_account, handle, url, batch_id=None) -> None:
+    """One line per dropped candidate (author we already follow)."""
+    _append(
+        f"{_now()} {our_account or '(unresolved)'} SKIP "
+        f"@{handle} url={url} batch={batch_id or '-'}"
+    )

package/scripts/harvest_twitter_following.py ADDED Viewed

@@ -0,0 +1,237 @@
+#!/usr/bin/env python3
+"""harvest_twitter_following.py — cache the list of accounts WE follow on X.
+The Twitter reply pipeline (score_twitter_candidates.py) drops candidate threads
+whose author is someone we already follow. fxtwitter can't supply that edge — it's
+an unauthenticated public API with no concept of "us" — so the follow relationship
+has to be read from our own logged-in session. This script scrapes
+`x.com/<handle>/following` via the harness Chrome (CDP, port 9555, same browser the
+cycle uses) and uploads the set to /api/v1/followed-accounts.
+Read-only: ONE navigation + DOM reads + scrolls. No clicks, no posting, no
+/voyager. Runs under the shared "twitter-browser" lock (held by the shell wrapper
+skill/refresh-twitter-following.sh) so it never races a live cycle.
+Completeness guard: we only upload when the scroll reached the end of the list
+(the deduped set stopped growing for STABLE_PASSES passes). A partial scrape is
+discarded, never uploaded — otherwise the un-scrolled tail would wrongly age out
+of the server's freshness window.
+Usage:
+    python3 scripts/harvest_twitter_following.py            # scrape + upload
+    python3 scripts/harvest_twitter_following.py --dry-run  # scrape + print, no upload
+    python3 scripts/harvest_twitter_following.py --out /tmp/following.json
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+import time
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+CDP_URL = os.environ.get("TWITTER_CDP_URL", "http://127.0.0.1:9555").strip()
+PLATFORM = "twitter"
+# Scroll/scrape tuning (env-overridable for slow boxes / very large lists).
+STABLE_PASSES = int(os.environ.get("FOLLOW_HARVEST_STABLE_PASSES", "5"))
+MAX_PASSES = int(os.environ.get("FOLLOW_HARVEST_MAX_PASSES", "800"))
+PAUSE_MS = int(os.environ.get("FOLLOW_HARVEST_PAUSE_MS", "900"))
+UPLOAD_CHUNK = int(os.environ.get("FOLLOW_HARVEST_UPLOAD_CHUNK", "1000"))
+# Each row on the Following tab is a [data-testid="UserCell"]. The profile link
+# href is exactly `/<screen_name>`; grab the first anchor matching that shape
+# (X handles are 1-15 chars of [A-Za-z0-9_]) that isn't a reserved app route.
+SCRAPE_JS = r"""
+(() => {
+  const RESERVED = new Set(['home','explore','notifications','messages','i',
+    'settings','search','compose','hashtag','intent','login','signup','tos',
+    'privacy','about']);
+  const cells = Array.from(document.querySelectorAll('[data-testid="UserCell"]'));
+  const out = [];
+  for (const c of cells) {
+    let handle = null;
+    for (const a of c.querySelectorAll('a[href^="/"]')) {
+      const m = (a.getAttribute('href') || '').match(/^\/([A-Za-z0-9_]{1,15})$/);
+      if (m && !RESERVED.has(m[1].toLowerCase())) { handle = m[1]; break; }
+    }
+    if (!handle) continue;
+    let name = null;
+    const un = c.querySelector('[data-testid="User-Name"]');
+    if (un) {
+      // User-Name mashes "Display Name@handle…"; the display name is the text
+      // before the first '@'.
+      name = ((un.textContent || '').split('@')[0]).trim().slice(0, 120) || null;
+    }
+    out.push({ screen_name: handle, name });
+  }
+  return JSON.stringify(out);
+})()
+"""
+def _resolve_handle() -> str:
+    try:
+        import account_resolver
+        h = account_resolver.resolve("twitter")
+        if h:
+            return h.lstrip("@").strip().lower()
+    except Exception as e:
+        print(f"[harvest] account_resolver failed ({e}); falling back to m13v_",
+              file=sys.stderr)
+    return "m13v_"
+def _looks_logged_out(url: str) -> bool:
+    u = (url or "").lower()
+    return ("/login" in u) or ("i/flow/login" in u) or ("/account/access" in u)
+def scrape_following(handle: str) -> tuple[dict, bool]:
+    """Return (handle->name dict, complete). complete=True means the scroll
+    reached the end (set stopped growing) rather than hitting the pass cap."""
+    from playwright.sync_api import sync_playwright
+    seen: dict[str, str] = {}
+    complete = False
+    with sync_playwright() as p:
+        browser = p.chromium.connect_over_cdp(CDP_URL)
+        contexts = browser.contexts
+        if not contexts:
+            raise RuntimeError("no browser context on harness Chrome — is it logged in?")
+        context = contexts[0]
+        # Reuse an existing tab (tab hygiene); fall back to a fresh page.
+        page = context.pages[0] if context.pages else context.new_page()
+        url = f"https://x.com/{handle}/following"
+        page.goto(url, wait_until="domcontentloaded", timeout=45000)
+        page.wait_for_timeout(2500)
+        if _looks_logged_out(page.url):
+            raise RuntimeError(f"session looks logged out (url={page.url})")
+        # Wait for at least one row to render before scrolling.
+        try:
+            page.wait_for_selector('[data-testid="UserCell"]', timeout=20000)
+        except Exception:
+            # No cells at all — empty list, protected, or a block page. Treat as
+            # incomplete so we never upload an empty/partial set.
+            print(f"[harvest] no UserCell rendered for @{handle} (url={page.url})",
+                  file=sys.stderr)
+            return seen, False
+        last = 0
+        stable = 0
+        for i in range(MAX_PASSES):
+            try:
+                raw = page.evaluate(SCRAPE_JS)
+                rows = json.loads(raw) if isinstance(raw, str) else (raw or [])
+            except Exception as e:
+                print(f"[harvest] evaluate failed on pass {i} ({e})", file=sys.stderr)
+                rows = []
+            for r in rows:
+                sn = (r.get("screen_name") or "").strip().lower()
+                if not sn or sn == handle:  # never list ourselves
+                    continue
+                if sn not in seen:
+                    seen[sn] = r.get("name") or ""
+            if len(seen) == last:
+                stable += 1
+                if stable >= STABLE_PASSES:
+                    complete = True
+                    break
+            else:
+                stable = 0
+                last = len(seen)
+            page.evaluate(
+                "window.scrollBy(0, Math.round(document.documentElement.clientHeight * 0.85));"
+            )
+            page.wait_for_timeout(PAUSE_MS)
+        # Disconnect the CDP client without closing the shared Chrome/tab.
+        try:
+            browser.close()
+        except Exception:
+            pass
+    print(
+        f"[harvest] @{handle}: collected {len(seen)} followed handles "
+        f"(complete={complete}, passes_stable={stable}/{STABLE_PASSES})",
+        file=sys.stderr,
+    )
+    return seen, complete
+def upload(handle: str, seen: dict) -> int:
+    from http_api import api_post
+    accounts = [{"handle": h, "name": n} for h, n in seen.items()]
+    posted = 0
+    for i in range(0, len(accounts), UPLOAD_CHUNK):
+        chunk = accounts[i:i + UPLOAD_CHUNK]
+        api_post(
+            "/api/v1/followed-accounts",
+            {
+                "platform": PLATFORM,
+                "our_account": handle,
+                "accounts": chunk,
+                "complete": True,
+            },
+        )
+        posted += len(chunk)
+    return posted
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dry-run", action="store_true",
+                        help="Scrape and report but do not upload.")
+    parser.add_argument("--out", help="Also write the scraped set to this JSON path.")
+    parser.add_argument("--handle", help="Override the resolved posting handle.")
+    args = parser.parse_args()
+    handle = (args.handle or _resolve_handle()).lstrip("@").strip().lower()
+    print(f"[harvest] resolving following list for @{handle} via {CDP_URL}",
+          file=sys.stderr)
+    try:
+        seen, complete = scrape_following(handle)
+    except Exception as e:
+        print(f"[harvest] FAILED: {e}", file=sys.stderr)
+        return 1
+    if args.out:
+        try:
+            with open(args.out, "w") as fh:
+                json.dump({"handle": handle, "complete": complete,
+                           "accounts": seen}, fh, indent=2)
+            print(f"[harvest] wrote scrape to {args.out}", file=sys.stderr)
+        except OSError as e:
+            print(f"[harvest] could not write {args.out}: {e}", file=sys.stderr)
+    if not seen:
+        print("[harvest] scraped 0 handles; nothing to upload.", file=sys.stderr)
+        return 2
+    if not complete:
+        print(
+            f"[harvest] scrape INCOMPLETE (hit {MAX_PASSES}-pass cap at "
+            f"{len(seen)} handles); NOT uploading, to avoid aging out the "
+            f"un-scrolled tail. Re-run will retry.",
+            file=sys.stderr,
+        )
+        return 3
+    if args.dry_run:
+        print(f"[harvest] dry-run: would upload {len(seen)} handles for @{handle}.")
+        return 0
+    posted = upload(handle, seen)
+    print(f"[harvest] uploaded {posted} followed handles for @{handle}.")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

package/scripts/log_post.py CHANGED Viewed

@@ -455,6 +455,14 @@ def main():
                              "No live refresh, no extra API calls; whatever the "
                              "candidate row already had under *_t0 is what gets "
                              "recorded. Capped at 2 KB by the API.")
+    parser.add_argument("--thread-media", default=None,
+                        help="JSON array snapshot of the original thread's media "
+                             "([{\"url\":...,\"alt\":...,\"type\":\"image|video|gif|card\"}]) "
+                             "captured at draft time. Stored in posts.thread_media "
+                             "(JSONB) as the immutable record of what the thread "
+                             "visually showed when we replied. An empty array [] is "
+                             "valid (captured-none). Omitted/None leaves the column "
+                             "NULL (never captured). 2026-06-03 thread-media feature.")
     args = parser.parse_args()
     if args.mark_self_reply:
@@ -541,6 +549,21 @@ def main():
         body["length_arm"] = args.length_arm
     if args.thread_engagement:
         body["thread_engagement"] = args.thread_engagement
+    # Thread media snapshot (2026-06-03): the media of the thread we replied to,
+    # frozen onto posts.thread_media as an immutable audit record. Read from the
+    # candidate row by twitter_post_plan.py and forwarded here as a JSON array
+    # string. Parse defensively: a malformed value must NOT block the post, so on
+    # any parse error we skip the field (column stays NULL) rather than failing.
+    if args.thread_media is not None:
+        try:
+            parsed_media = json.loads(args.thread_media)
+            if isinstance(parsed_media, list):
+                body["thread_media"] = parsed_media
+        except (TypeError, ValueError) as e:
+            print(json.dumps({
+                "warning": "THREAD_MEDIA_PARSE_FAILED",
+                "message": f"could not parse --thread-media: {e}",
+            }), file=sys.stderr)
     # autoposter_version: stamped on every write so we can attribute
     # engagement back to the release of the autoposter code that produced
     # this row. None when package.json + env are both missing.

package/scripts/log_thread_media.py ADDED Viewed

@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+"""Persist captured thread media on a twitter_candidates row.
+Deterministic, model-free companion to the main posting cycle (2026-06-03
+thread-media feature). The cycle pre-fetches the media of every candidate it is
+about to draft against (twitter_browser.py thread-media-batch), then calls this
+script once per candidate to persist the media into
+twitter_candidates.thread_media so the reply-writer prompt can "see" the
+image / video / GIF / link-card it is replying to, and the record survives
+independent of the model.
+Media shape: a JSON array of {url, alt, type} objects, type in
+image|video|gif|card. An empty array [] is valid and meaningful ("captured,
+none found", distinct from NULL = "never captured").
+Usage:
+    # Pass media JSON inline:
+    python3 scripts/log_thread_media.py --candidate-id 12345 \\
+        --media '[{"url":"https://pbs.twimg.com/...","alt":"Image","type":"image"}]'
+    # Or read the media JSON array from a file (handy for batch wiring):
+    python3 scripts/log_thread_media.py --candidate-id 12345 --media-file /tmp/m.json
+Output (JSON):
+    {"logged": true, "candidate_id": 12345, "media_count": 1}
+    {"error": "CANDIDATE_NOT_FOUND", ...}
+    {"error": "BAD_MEDIA_JSON", ...}
+"""
+import argparse
+import json
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from http_api import api_patch
+def _load_media(args):
+    """Return a parsed media list (or raise ValueError) from --media/--media-file."""
+    raw = None
+    if args.media_file:
+        with open(args.media_file) as f:
+            raw = f.read()
+    elif args.media is not None:
+        raw = args.media
+    else:
+        raise ValueError("one of --media or --media-file is required")
+    raw = (raw or "").strip()
+    if raw == "":
+        # Treat an empty arg as "captured, none found" -> [].
+        return []
+    parsed = json.loads(raw)
+    if not isinstance(parsed, list):
+        raise ValueError("media must be a JSON array")
+    return parsed
+def main():
+    p = argparse.ArgumentParser()
+    p.add_argument("--candidate-id", type=int, required=True)
+    p.add_argument(
+        "--media", default=None,
+        help='JSON array of {url,alt,type}. Empty/"" means captured-none ([]).',
+    )
+    p.add_argument(
+        "--media-file", default=None,
+        help="Path to a file containing the media JSON array (alternative to --media).",
+    )
+    args = p.parse_args()
+    try:
+        media = _load_media(args)
+    except Exception as e:
+        print(json.dumps({"error": "BAD_MEDIA_JSON", "detail": str(e)}))
+        sys.exit(1)
+    payload = {
+        "id": args.candidate_id,
+        "action": "set_media",
+        "thread_media": media,
+    }
+    resp = api_patch(
+        "/api/v1/twitter-candidates/by-id", payload,
+        ok_on_conflict=True, ok_on_404=True,
+    )
+    if (resp or {}).get("_not_found"):
+        print(json.dumps({"error": "CANDIDATE_NOT_FOUND", "candidate_id": args.candidate_id}))
+        sys.exit(1)
+    if not (resp or {}).get("ok"):
+        print(json.dumps({
+            "error": "SET_MEDIA_FAILED",
+            "candidate_id": args.candidate_id,
+            "detail": (resp or {}).get("error"),
+        }))
+        sys.exit(1)
+    print(json.dumps({
+        "logged": True,
+        "candidate_id": args.candidate_id,
+        "media_count": len(media),
+    }))
+if __name__ == "__main__":
+    main()