social-autoposter 1.6.48 → 1.6.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/mcp/dist/index.js CHANGED
@@ -650,6 +650,18 @@ server.registerTool("draft_cycle", {
650
650
  batch_id: drafted.batchId,
651
651
  drafted: count,
652
652
  status: "awaiting_decision",
653
+ // Include the actual draft text here, not just a count. Some hosts
654
+ // (e.g. Claude Desktop) surface ONLY structuredContent to the model and
655
+ // drop the human-readable `content` table — which left the agent saying
656
+ // "drafted: 2" with no way to show the drafts. Carrying the drafts in
657
+ // structuredContent makes them available regardless of host behavior.
658
+ drafts: (plan.candidates || []).map((c, i) => ({
659
+ n: i + 1,
660
+ author: c.thread_author,
661
+ tweet_url: c.candidate_url,
662
+ reply_text: c.reply_text,
663
+ language: c.language,
664
+ })),
653
665
  },
654
666
  };
655
667
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "social-autoposter",
3
- "version": "1.6.48",
3
+ "version": "1.6.49",
4
4
  "description": "Automated social posting pipeline for Reddit, X/Twitter, LinkedIn, and Moltbook. Install as a Claude Code agent skill.",
5
5
  "bin": {
6
6
  "social-autoposter": "bin/cli.js"
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env python3
2
+ """Deterministically capture + persist + format thread media for the prep step.
3
+
4
+ Companion to the main Twitter posting cycle (run-twitter-cycle.sh Phase 2b-prep,
5
+ 2026-06-03 thread-media feature). The prep prompt forbids the model from calling
6
+ twitter_browser.py, so the SHELL pre-fetches the media of every candidate the
7
+ model is about to draft against, in ONE cheap browser pass, then:
8
+
9
+ 1. persists each candidate's media into twitter_candidates.thread_media (so the
10
+ record survives independent of the model), and
11
+ 2. emits a "MEDIA CONTEXT" prompt block to stdout so the reply-writer can "see"
12
+ the image / video / GIF / link-card it is replying to instead of replying
13
+ text-blind.
14
+
15
+ Input: a TSV file, one `candidate_id<TAB>tweet_url` per line (built by the
16
+ CANDIDATE_BLOCK loop in run-twitter-cycle.sh).
17
+
18
+ Media shape per item: {url, alt, type}, type in image|video|gif|card. An empty
19
+ list [] is valid and meaningful ("captured, none found", distinct from NULL =
20
+ "never captured").
21
+
22
+ Usage:
23
+ python3 scripts/capture_thread_media.py --urls-file /tmp/urls.tsv \\
24
+ [--scroll 1] [--no-persist]
25
+
26
+ Output:
27
+ stdout -> the MEDIA CONTEXT prompt block (empty string if no media at all)
28
+ stderr -> per-candidate diagnostics + a final JSON summary line
29
+ """
30
+
31
+ import argparse
32
+ import json
33
+ import os
34
+ import sys
35
+
36
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
37
+ from http_api import api_patch # noqa: E402
38
+
39
+ # Imported lazily inside main() so --help works without a browser / playwright.
40
+
41
+
42
+ def _load_pairs(urls_file):
43
+ """Return [(candidate_id:str, url:str)] from a `cid<TAB>url` TSV file."""
44
+ pairs = []
45
+ with open(urls_file) as f:
46
+ for line in f:
47
+ line = line.rstrip("\n")
48
+ if not line.strip():
49
+ continue
50
+ if "\t" in line:
51
+ cid, url = line.split("\t", 1)
52
+ else:
53
+ # Tolerate a bare-URL line (no cid); skip it, we can't key it.
54
+ continue
55
+ cid = cid.strip()
56
+ url = url.strip()
57
+ if cid and url:
58
+ pairs.append((cid, url))
59
+ return pairs
60
+
61
+
62
+ def _persist(candidate_id, media):
63
+ """Persist media onto twitter_candidates.thread_media via the set_media action."""
64
+ payload = {"id": int(candidate_id), "action": "set_media", "thread_media": media}
65
+ resp = api_patch(
66
+ "/api/v1/twitter-candidates/by-id", payload,
67
+ ok_on_conflict=True, ok_on_404=True,
68
+ )
69
+ if (resp or {}).get("_not_found"):
70
+ return False, "CANDIDATE_NOT_FOUND"
71
+ if not (resp or {}).get("ok"):
72
+ return False, (resp or {}).get("error") or "SET_MEDIA_FAILED"
73
+ return True, None
74
+
75
+
76
+ def _format_item(item):
77
+ """One ' - <type>: "<alt>" (<url>)' line for the prompt block."""
78
+ t = (item.get("type") or "media").strip()
79
+ alt = (item.get("alt") or "").strip()
80
+ url = (item.get("url") or "").strip()
81
+ alt_part = f'"{alt}"' if alt else "[no description]"
82
+ return f" - {t}: {alt_part} ({url})"
83
+
84
+
85
+ def _build_block(captured):
86
+ """captured: list of (candidate_id, media_list). Returns prompt block str."""
87
+ sections = []
88
+ for cid, media in captured:
89
+ if not media:
90
+ continue
91
+ lines = "\n".join(_format_item(it) for it in media)
92
+ sections.append(f"Candidate {cid}:\n{lines}")
93
+ if not sections:
94
+ return ""
95
+ header = (
96
+ "## MEDIA IN THESE THREADS\n"
97
+ "Some candidate threads contain images, videos, GIFs, or link-cards. "
98
+ "This is part of the content you are replying to: react to what the tweet "
99
+ "VISUALLY shows, not just its text. A candidate NOT listed here had no "
100
+ "media (or capture was skipped); reply to its text as usual. Descriptions "
101
+ "marked [no description] mean the media had no alt-text, so infer from the "
102
+ "thread text and the media type."
103
+ )
104
+ return header + "\n\n" + "\n".join(sections) + "\n"
105
+
106
+
107
+ def main():
108
+ p = argparse.ArgumentParser()
109
+ p.add_argument("--urls-file", required=True,
110
+ help="TSV: one candidate_id<TAB>tweet_url per line.")
111
+ p.add_argument("--scroll", type=int, default=1,
112
+ help="scroll_count passed to the batch scraper (default 1).")
113
+ p.add_argument("--no-persist", action="store_true",
114
+ help="Skip writing thread_media to the DB (format only).")
115
+ args = p.parse_args()
116
+
117
+ pairs = _load_pairs(args.urls_file)
118
+ if not pairs:
119
+ # Nothing to do; emit empty block, exit clean so the shell continues.
120
+ print("", end="")
121
+ print(json.dumps({"captured": 0, "persisted": 0, "with_media": 0}), file=sys.stderr)
122
+ return
123
+
124
+ # Lazy import so an empty/short-circuit run never pays the playwright cost.
125
+ from twitter_browser import scrape_many_thread_media
126
+
127
+ urls = [url for _cid, url in pairs]
128
+ try:
129
+ batch = scrape_many_thread_media(urls, scroll_count=args.scroll)
130
+ except Exception as e:
131
+ # Browser failure must NOT break the cycle: emit empty block, log, exit 0.
132
+ print("", end="")
133
+ print(json.dumps({"error": "SCRAPE_FAILED", "detail": str(e)}), file=sys.stderr)
134
+ return
135
+
136
+ # Map url -> media (results echo the input url verbatim as thread_url).
137
+ by_url = {}
138
+ for r in (batch or {}).get("results", []):
139
+ by_url[r.get("thread_url")] = r.get("media") or []
140
+
141
+ captured = [] # (cid, media) for ALL pairs (media may be [])
142
+ persisted = 0
143
+ with_media = 0
144
+ for cid, url in pairs:
145
+ media = by_url.get(url, [])
146
+ captured.append((cid, media))
147
+ if media:
148
+ with_media += 1
149
+ if not args.no_persist:
150
+ ok, err = _persist(cid, media)
151
+ if ok:
152
+ persisted += 1
153
+ else:
154
+ print(f"[capture_thread_media] persist failed cid={cid}: {err}",
155
+ file=sys.stderr)
156
+
157
+ block = _build_block(captured)
158
+ # stdout = the prompt block ONLY (shell captures it verbatim).
159
+ sys.stdout.write(block)
160
+ print(json.dumps({
161
+ "captured": len(captured),
162
+ "persisted": persisted,
163
+ "with_media": with_media,
164
+ "urls_visited": (batch or {}).get("urls_visited", 0),
165
+ }), file=sys.stderr)
166
+
167
+
168
+ if __name__ == "__main__":
169
+ main()
@@ -117,6 +117,33 @@ def cmd_reply_counts() -> int:
117
117
  return 0
118
118
 
119
119
 
120
+ def _render_media_block(media) -> str:
121
+ """Render replies.their_media ([{url,alt,type}]) into a short, self-titled
122
+ text block for the Phase B prompt (2026-06-03 thread-media feature). Empty
123
+ string when the comment had no media (or media was never captured), so it
124
+ stays invisible in the embedded JSON for text-only comments.
125
+ """
126
+ if not isinstance(media, list) or not media:
127
+ return ""
128
+ lines = []
129
+ for it in media:
130
+ if not isinstance(it, dict):
131
+ continue
132
+ t = (it.get("type") or "media").strip()
133
+ alt = (it.get("alt") or "").strip()
134
+ url = (it.get("url") or "").strip()
135
+ alt_part = f'"{alt}"' if alt else "[no description]"
136
+ lines.append(f" - {t}: {alt_part} ({url})")
137
+ if not lines:
138
+ return ""
139
+ return (
140
+ "## Media in the comment you are replying to\n"
141
+ "React to what it VISUALLY shows, not just the text. "
142
+ "[no description] = no alt-text; infer from the comment + media type.\n"
143
+ + "\n".join(lines)
144
+ )
145
+
146
+
120
147
  def cmd_pending_data(batch_size: int) -> int:
121
148
  try:
122
149
  from account_resolver import resolve as _resolve_account # noqa: WPS433
@@ -204,6 +231,7 @@ def cmd_pending_data(batch_size: int) -> int:
204
231
  "is_our_original_post": int(r.get("is_our_original_post") or 0),
205
232
  "project_name": r.get("project_name"),
206
233
  "counterparty_history_block": history_block,
234
+ "their_media_block": _render_media_block(r.get("their_media")),
207
235
  })
208
236
  # json_agg(...) returns null when the array is empty; engage-twitter.sh's
209
237
  # downstream prompt-template expects an empty array instead, which is
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env python3
2
+ """Dedicated, isolated logging for the Twitter follow-gate.
3
+
4
+ The follow-gate in score_twitter_candidates.py drops candidate threads whose
5
+ author we already follow. Its `[follow_gate]` stderr markers land in the giant
6
+ mixed twitter-cycle log; this helper ALSO writes a clean, timestamped, greppable
7
+ record to skill/logs/follow-gate.log so you can `tail -f` exactly what the filter
8
+ loads and catches each cycle, without digging through 20MB of cycle output.
9
+
10
+ All functions are best-effort: they NEVER raise, so logging can never break the
11
+ fail-open gate. If the log can't be written, the gate proceeds silently.
12
+
13
+ Line formats (one CYCLE line per scoring run, one SKIP line per dropped author):
14
+ <iso8601> <our_account> CYCLE loaded=<N> source=<ok|404|error|unresolved> checked=<M> skipped=<K> batch=<id>
15
+ <iso8601> <our_account> SKIP @<handle> url=<url> batch=<id>
16
+
17
+ Read it with: tail -f ~/social-autoposter/skill/logs/follow-gate.log
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ from datetime import datetime, timezone
23
+
24
+ LOG_PATH = os.path.expanduser("~/social-autoposter/skill/logs/follow-gate.log")
25
+
26
+
27
+ def _now() -> str:
28
+ try:
29
+ return datetime.now(timezone.utc).astimezone().strftime("%Y-%m-%dT%H:%M:%S%z")
30
+ except Exception:
31
+ return "?"
32
+
33
+
34
+ def _append(line: str) -> None:
35
+ try:
36
+ os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
37
+ with open(LOG_PATH, "a") as fh:
38
+ fh.write(line.rstrip("\n") + "\n")
39
+ except Exception:
40
+ # Best-effort: never let logging break the fail-open gate.
41
+ pass
42
+
43
+
44
+ def record_cycle(our_account, loaded, source, checked, skipped, batch_id=None) -> None:
45
+ """One line per scoring run: did the gate load the set (loaded>0, source=ok),
46
+ how many candidates it checked, and how many it skipped this run."""
47
+ _append(
48
+ f"{_now()} {our_account or '(unresolved)'} CYCLE "
49
+ f"loaded={loaded} source={source} checked={checked} "
50
+ f"skipped={skipped} batch={batch_id or '-'}"
51
+ )
52
+
53
+
54
+ def record_skip(our_account, handle, url, batch_id=None) -> None:
55
+ """One line per dropped candidate (author we already follow)."""
56
+ _append(
57
+ f"{_now()} {our_account or '(unresolved)'} SKIP "
58
+ f"@{handle} url={url} batch={batch_id or '-'}"
59
+ )
@@ -0,0 +1,237 @@
1
+ #!/usr/bin/env python3
2
+ """harvest_twitter_following.py — cache the list of accounts WE follow on X.
3
+
4
+ The Twitter reply pipeline (score_twitter_candidates.py) drops candidate threads
5
+ whose author is someone we already follow. fxtwitter can't supply that edge — it's
6
+ an unauthenticated public API with no concept of "us" — so the follow relationship
7
+ has to be read from our own logged-in session. This script scrapes
8
+ `x.com/<handle>/following` via the harness Chrome (CDP, port 9555, same browser the
9
+ cycle uses) and uploads the set to /api/v1/followed-accounts.
10
+
11
+ Read-only: ONE navigation + DOM reads + scrolls. No clicks, no posting, no
12
+ /voyager. Runs under the shared "twitter-browser" lock (held by the shell wrapper
13
+ skill/refresh-twitter-following.sh) so it never races a live cycle.
14
+
15
+ Completeness guard: we only upload when the scroll reached the end of the list
16
+ (the deduped set stopped growing for STABLE_PASSES passes). A partial scrape is
17
+ discarded, never uploaded — otherwise the un-scrolled tail would wrongly age out
18
+ of the server's freshness window.
19
+
20
+ Usage:
21
+ python3 scripts/harvest_twitter_following.py # scrape + upload
22
+ python3 scripts/harvest_twitter_following.py --dry-run # scrape + print, no upload
23
+ python3 scripts/harvest_twitter_following.py --out /tmp/following.json
24
+ """
25
+ from __future__ import annotations
26
+
27
+ import argparse
28
+ import json
29
+ import os
30
+ import sys
31
+ import time
32
+
33
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
34
+
35
+ CDP_URL = os.environ.get("TWITTER_CDP_URL", "http://127.0.0.1:9555").strip()
36
+ PLATFORM = "twitter"
37
+
38
+ # Scroll/scrape tuning (env-overridable for slow boxes / very large lists).
39
+ STABLE_PASSES = int(os.environ.get("FOLLOW_HARVEST_STABLE_PASSES", "5"))
40
+ MAX_PASSES = int(os.environ.get("FOLLOW_HARVEST_MAX_PASSES", "800"))
41
+ PAUSE_MS = int(os.environ.get("FOLLOW_HARVEST_PAUSE_MS", "900"))
42
+ UPLOAD_CHUNK = int(os.environ.get("FOLLOW_HARVEST_UPLOAD_CHUNK", "1000"))
43
+
44
+ # Each row on the Following tab is a [data-testid="UserCell"]. The profile link
45
+ # href is exactly `/<screen_name>`; grab the first anchor matching that shape
46
+ # (X handles are 1-15 chars of [A-Za-z0-9_]) that isn't a reserved app route.
47
+ SCRAPE_JS = r"""
48
+ (() => {
49
+ const RESERVED = new Set(['home','explore','notifications','messages','i',
50
+ 'settings','search','compose','hashtag','intent','login','signup','tos',
51
+ 'privacy','about']);
52
+ const cells = Array.from(document.querySelectorAll('[data-testid="UserCell"]'));
53
+ const out = [];
54
+ for (const c of cells) {
55
+ let handle = null;
56
+ for (const a of c.querySelectorAll('a[href^="/"]')) {
57
+ const m = (a.getAttribute('href') || '').match(/^\/([A-Za-z0-9_]{1,15})$/);
58
+ if (m && !RESERVED.has(m[1].toLowerCase())) { handle = m[1]; break; }
59
+ }
60
+ if (!handle) continue;
61
+ let name = null;
62
+ const un = c.querySelector('[data-testid="User-Name"]');
63
+ if (un) {
64
+ // User-Name mashes "Display Name@handle…"; the display name is the text
65
+ // before the first '@'.
66
+ name = ((un.textContent || '').split('@')[0]).trim().slice(0, 120) || null;
67
+ }
68
+ out.push({ screen_name: handle, name });
69
+ }
70
+ return JSON.stringify(out);
71
+ })()
72
+ """
73
+
74
+
75
+ def _resolve_handle() -> str:
76
+ try:
77
+ import account_resolver
78
+ h = account_resolver.resolve("twitter")
79
+ if h:
80
+ return h.lstrip("@").strip().lower()
81
+ except Exception as e:
82
+ print(f"[harvest] account_resolver failed ({e}); falling back to m13v_",
83
+ file=sys.stderr)
84
+ return "m13v_"
85
+
86
+
87
+ def _looks_logged_out(url: str) -> bool:
88
+ u = (url or "").lower()
89
+ return ("/login" in u) or ("i/flow/login" in u) or ("/account/access" in u)
90
+
91
+
92
+ def scrape_following(handle: str) -> tuple[dict, bool]:
93
+ """Return (handle->name dict, complete). complete=True means the scroll
94
+ reached the end (set stopped growing) rather than hitting the pass cap."""
95
+ from playwright.sync_api import sync_playwright
96
+
97
+ seen: dict[str, str] = {}
98
+ complete = False
99
+ with sync_playwright() as p:
100
+ browser = p.chromium.connect_over_cdp(CDP_URL)
101
+ contexts = browser.contexts
102
+ if not contexts:
103
+ raise RuntimeError("no browser context on harness Chrome — is it logged in?")
104
+ context = contexts[0]
105
+ # Reuse an existing tab (tab hygiene); fall back to a fresh page.
106
+ page = context.pages[0] if context.pages else context.new_page()
107
+
108
+ url = f"https://x.com/{handle}/following"
109
+ page.goto(url, wait_until="domcontentloaded", timeout=45000)
110
+ page.wait_for_timeout(2500)
111
+
112
+ if _looks_logged_out(page.url):
113
+ raise RuntimeError(f"session looks logged out (url={page.url})")
114
+
115
+ # Wait for at least one row to render before scrolling.
116
+ try:
117
+ page.wait_for_selector('[data-testid="UserCell"]', timeout=20000)
118
+ except Exception:
119
+ # No cells at all — empty list, protected, or a block page. Treat as
120
+ # incomplete so we never upload an empty/partial set.
121
+ print(f"[harvest] no UserCell rendered for @{handle} (url={page.url})",
122
+ file=sys.stderr)
123
+ return seen, False
124
+
125
+ last = 0
126
+ stable = 0
127
+ for i in range(MAX_PASSES):
128
+ try:
129
+ raw = page.evaluate(SCRAPE_JS)
130
+ rows = json.loads(raw) if isinstance(raw, str) else (raw or [])
131
+ except Exception as e:
132
+ print(f"[harvest] evaluate failed on pass {i} ({e})", file=sys.stderr)
133
+ rows = []
134
+ for r in rows:
135
+ sn = (r.get("screen_name") or "").strip().lower()
136
+ if not sn or sn == handle: # never list ourselves
137
+ continue
138
+ if sn not in seen:
139
+ seen[sn] = r.get("name") or ""
140
+
141
+ if len(seen) == last:
142
+ stable += 1
143
+ if stable >= STABLE_PASSES:
144
+ complete = True
145
+ break
146
+ else:
147
+ stable = 0
148
+ last = len(seen)
149
+
150
+ page.evaluate(
151
+ "window.scrollBy(0, Math.round(document.documentElement.clientHeight * 0.85));"
152
+ )
153
+ page.wait_for_timeout(PAUSE_MS)
154
+
155
+ # Disconnect the CDP client without closing the shared Chrome/tab.
156
+ try:
157
+ browser.close()
158
+ except Exception:
159
+ pass
160
+
161
+ print(
162
+ f"[harvest] @{handle}: collected {len(seen)} followed handles "
163
+ f"(complete={complete}, passes_stable={stable}/{STABLE_PASSES})",
164
+ file=sys.stderr,
165
+ )
166
+ return seen, complete
167
+
168
+
169
+ def upload(handle: str, seen: dict) -> int:
170
+ from http_api import api_post
171
+
172
+ accounts = [{"handle": h, "name": n} for h, n in seen.items()]
173
+ posted = 0
174
+ for i in range(0, len(accounts), UPLOAD_CHUNK):
175
+ chunk = accounts[i:i + UPLOAD_CHUNK]
176
+ api_post(
177
+ "/api/v1/followed-accounts",
178
+ {
179
+ "platform": PLATFORM,
180
+ "our_account": handle,
181
+ "accounts": chunk,
182
+ "complete": True,
183
+ },
184
+ )
185
+ posted += len(chunk)
186
+ return posted
187
+
188
+
189
+ def main() -> int:
190
+ parser = argparse.ArgumentParser()
191
+ parser.add_argument("--dry-run", action="store_true",
192
+ help="Scrape and report but do not upload.")
193
+ parser.add_argument("--out", help="Also write the scraped set to this JSON path.")
194
+ parser.add_argument("--handle", help="Override the resolved posting handle.")
195
+ args = parser.parse_args()
196
+
197
+ handle = (args.handle or _resolve_handle()).lstrip("@").strip().lower()
198
+ print(f"[harvest] resolving following list for @{handle} via {CDP_URL}",
199
+ file=sys.stderr)
200
+
201
+ try:
202
+ seen, complete = scrape_following(handle)
203
+ except Exception as e:
204
+ print(f"[harvest] FAILED: {e}", file=sys.stderr)
205
+ return 1
206
+
207
+ if args.out:
208
+ try:
209
+ with open(args.out, "w") as fh:
210
+ json.dump({"handle": handle, "complete": complete,
211
+ "accounts": seen}, fh, indent=2)
212
+ print(f"[harvest] wrote scrape to {args.out}", file=sys.stderr)
213
+ except OSError as e:
214
+ print(f"[harvest] could not write {args.out}: {e}", file=sys.stderr)
215
+
216
+ if not seen:
217
+ print("[harvest] scraped 0 handles; nothing to upload.", file=sys.stderr)
218
+ return 2
219
+ if not complete:
220
+ print(
221
+ f"[harvest] scrape INCOMPLETE (hit {MAX_PASSES}-pass cap at "
222
+ f"{len(seen)} handles); NOT uploading, to avoid aging out the "
223
+ f"un-scrolled tail. Re-run will retry.",
224
+ file=sys.stderr,
225
+ )
226
+ return 3
227
+ if args.dry_run:
228
+ print(f"[harvest] dry-run: would upload {len(seen)} handles for @{handle}.")
229
+ return 0
230
+
231
+ posted = upload(handle, seen)
232
+ print(f"[harvest] uploaded {posted} followed handles for @{handle}.")
233
+ return 0
234
+
235
+
236
+ if __name__ == "__main__":
237
+ sys.exit(main())
@@ -455,6 +455,14 @@ def main():
455
455
  "No live refresh, no extra API calls; whatever the "
456
456
  "candidate row already had under *_t0 is what gets "
457
457
  "recorded. Capped at 2 KB by the API.")
458
+ parser.add_argument("--thread-media", default=None,
459
+ help="JSON array snapshot of the original thread's media "
460
+ "([{\"url\":...,\"alt\":...,\"type\":\"image|video|gif|card\"}]) "
461
+ "captured at draft time. Stored in posts.thread_media "
462
+ "(JSONB) as the immutable record of what the thread "
463
+ "visually showed when we replied. An empty array [] is "
464
+ "valid (captured-none). Omitted/None leaves the column "
465
+ "NULL (never captured). 2026-06-03 thread-media feature.")
458
466
  args = parser.parse_args()
459
467
 
460
468
  if args.mark_self_reply:
@@ -541,6 +549,21 @@ def main():
541
549
  body["length_arm"] = args.length_arm
542
550
  if args.thread_engagement:
543
551
  body["thread_engagement"] = args.thread_engagement
552
+ # Thread media snapshot (2026-06-03): the media of the thread we replied to,
553
+ # frozen onto posts.thread_media as an immutable audit record. Read from the
554
+ # candidate row by twitter_post_plan.py and forwarded here as a JSON array
555
+ # string. Parse defensively: a malformed value must NOT block the post, so on
556
+ # any parse error we skip the field (column stays NULL) rather than failing.
557
+ if args.thread_media is not None:
558
+ try:
559
+ parsed_media = json.loads(args.thread_media)
560
+ if isinstance(parsed_media, list):
561
+ body["thread_media"] = parsed_media
562
+ except (TypeError, ValueError) as e:
563
+ print(json.dumps({
564
+ "warning": "THREAD_MEDIA_PARSE_FAILED",
565
+ "message": f"could not parse --thread-media: {e}",
566
+ }), file=sys.stderr)
544
567
  # autoposter_version: stamped on every write so we can attribute
545
568
  # engagement back to the release of the autoposter code that produced
546
569
  # this row. None when package.json + env are both missing.
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env python3
2
+ """Persist captured thread media on a twitter_candidates row.
3
+
4
+ Deterministic, model-free companion to the main posting cycle (2026-06-03
5
+ thread-media feature). The cycle pre-fetches the media of every candidate it is
6
+ about to draft against (twitter_browser.py thread-media-batch), then calls this
7
+ script once per candidate to persist the media into
8
+ twitter_candidates.thread_media so the reply-writer prompt can "see" the
9
+ image / video / GIF / link-card it is replying to, and the record survives
10
+ independent of the model.
11
+
12
+ Media shape: a JSON array of {url, alt, type} objects, type in
13
+ image|video|gif|card. An empty array [] is valid and meaningful ("captured,
14
+ none found", distinct from NULL = "never captured").
15
+
16
+ Usage:
17
+ # Pass media JSON inline:
18
+ python3 scripts/log_thread_media.py --candidate-id 12345 \\
19
+ --media '[{"url":"https://pbs.twimg.com/...","alt":"Image","type":"image"}]'
20
+
21
+ # Or read the media JSON array from a file (handy for batch wiring):
22
+ python3 scripts/log_thread_media.py --candidate-id 12345 --media-file /tmp/m.json
23
+
24
+ Output (JSON):
25
+ {"logged": true, "candidate_id": 12345, "media_count": 1}
26
+ {"error": "CANDIDATE_NOT_FOUND", ...}
27
+ {"error": "BAD_MEDIA_JSON", ...}
28
+ """
29
+
30
+ import argparse
31
+ import json
32
+ import os
33
+ import sys
34
+
35
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
36
+ from http_api import api_patch
37
+
38
+
39
+ def _load_media(args):
40
+ """Return a parsed media list (or raise ValueError) from --media/--media-file."""
41
+ raw = None
42
+ if args.media_file:
43
+ with open(args.media_file) as f:
44
+ raw = f.read()
45
+ elif args.media is not None:
46
+ raw = args.media
47
+ else:
48
+ raise ValueError("one of --media or --media-file is required")
49
+ raw = (raw or "").strip()
50
+ if raw == "":
51
+ # Treat an empty arg as "captured, none found" -> [].
52
+ return []
53
+ parsed = json.loads(raw)
54
+ if not isinstance(parsed, list):
55
+ raise ValueError("media must be a JSON array")
56
+ return parsed
57
+
58
+
59
+ def main():
60
+ p = argparse.ArgumentParser()
61
+ p.add_argument("--candidate-id", type=int, required=True)
62
+ p.add_argument(
63
+ "--media", default=None,
64
+ help='JSON array of {url,alt,type}. Empty/"" means captured-none ([]).',
65
+ )
66
+ p.add_argument(
67
+ "--media-file", default=None,
68
+ help="Path to a file containing the media JSON array (alternative to --media).",
69
+ )
70
+ args = p.parse_args()
71
+
72
+ try:
73
+ media = _load_media(args)
74
+ except Exception as e:
75
+ print(json.dumps({"error": "BAD_MEDIA_JSON", "detail": str(e)}))
76
+ sys.exit(1)
77
+
78
+ payload = {
79
+ "id": args.candidate_id,
80
+ "action": "set_media",
81
+ "thread_media": media,
82
+ }
83
+
84
+ resp = api_patch(
85
+ "/api/v1/twitter-candidates/by-id", payload,
86
+ ok_on_conflict=True, ok_on_404=True,
87
+ )
88
+
89
+ if (resp or {}).get("_not_found"):
90
+ print(json.dumps({"error": "CANDIDATE_NOT_FOUND", "candidate_id": args.candidate_id}))
91
+ sys.exit(1)
92
+ if not (resp or {}).get("ok"):
93
+ print(json.dumps({
94
+ "error": "SET_MEDIA_FAILED",
95
+ "candidate_id": args.candidate_id,
96
+ "detail": (resp or {}).get("error"),
97
+ }))
98
+ sys.exit(1)
99
+
100
+ print(json.dumps({
101
+ "logged": True,
102
+ "candidate_id": args.candidate_id,
103
+ "media_count": len(media),
104
+ }))
105
+
106
+
107
+ if __name__ == "__main__":
108
+ main()