npm - social-autoposter - Versions diffs - 1.6.2 → 1.6.3 - Mend

social-autoposter 1.6.2 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/bin/server.js +4 -0
package/package.json +1 -1
package/scripts/refresh_instagram_tokens.py +280 -0
package/scripts/scan_instagram_comments.py +481 -0
package/scripts/twitter_browser.py +14 -6
package/skill/refresh-instagram-tokens.sh +57 -0
package/skill/run-instagram-daily.sh +26 -0
package/skill/scan-instagram-replies.sh +61 -0

package/bin/server.js CHANGED Viewed

@@ -71,6 +71,8 @@ const JOBS = [
   { label: 'com.m13v.social-engage-reddit', name: 'Engage Reddit', type: 'Engage', platform: 'Reddit', script: 'engage-reddit.sh', logPrefix: 'engage-reddit-', plist: 'com.m13v.social-engage-reddit.plist' },
   { label: 'com.m13v.social-scan-moltbook-replies', name: 'MoltBook Scan', type: 'Other', platform: 'MoltBook', script: 'run-scan-moltbook-replies.sh', logPrefix: 'run-scan-moltbook-replies-', plist: 'com.m13v.social-scan-moltbook-replies.plist' },
   { label: 'com.m13v.social-scan-twitter-followups', name: 'Twitter Thread Follow-ups', type: 'Other', platform: 'Twitter', script: 'scan-twitter-followups.sh', logPrefix: 'scan-twitter-followups-', plist: 'com.m13v.social-scan-twitter-followups.plist' },
+  { label: 'com.m13v.social-scan-instagram-replies', name: 'Instagram Scan', type: 'Other', platform: 'Instagram', script: 'scan-instagram-replies.sh', logPrefix: 'scan-instagram-replies-', plist: 'com.m13v.social-scan-instagram-replies.plist' },
+  { label: 'com.m13v.social-refresh-instagram-tokens', name: 'IG Token Refresh', type: 'Other', platform: 'Instagram', script: 'refresh-instagram-tokens.sh', logPrefix: 'refresh-instagram-tokens-', plist: 'com.m13v.social-refresh-instagram-tokens.plist' },
   // DM Outreach row (initiate DMs to engaged users)
   { label: 'com.m13v.social-dm-outreach-reddit', name: 'DM Outreach Reddit', type: 'DM Outreach', platform: 'Reddit', script: 'dm-outreach-reddit.sh', logPrefix: 'dm-outreach-reddit-', plist: 'com.m13v.social-dm-outreach-reddit.plist' },
   { label: 'com.m13v.social-dm-outreach-twitter', name: 'DM Outreach Twitter', type: 'DM Outreach', platform: 'Twitter', script: 'dm-outreach-twitter.sh', logPrefix: 'dm-outreach-twitter-', plist: 'com.m13v.social-dm-outreach-twitter.plist' },
@@ -137,6 +139,8 @@ const REQUIRED_LOCKS = {
   'link-edit-github.sh':            ['link-edit-github'],
   'stats-reddit.sh':                ['reddit-browser'],
   'stats-instagram.sh':             ['instagram-poster'],
+  'scan-instagram-replies.sh':      ['instagram-poster'],
+  'refresh-instagram-tokens.sh':    ['instagram-poster'],
   'run-instagram-daily.sh':         ['instagram-poster'],
   'run-instagram-render.sh':        ['instagram-render'],
   'audit-reddit.sh':                ['reddit-browser', 'audit-reddit'],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "social-autoposter",
-  "version": "1.6.2",
+  "version": "1.6.3",
   "description": "Automated social posting pipeline for Reddit, X/Twitter, LinkedIn, and Moltbook. Install as a Claude Code agent skill.",
   "bin": {
     "social-autoposter": "bin/cli.js"

package/scripts/refresh_instagram_tokens.py ADDED Viewed

@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+"""Refresh Instagram Graph API long-lived tokens before they expire.
+Instagram long-lived user tokens are valid for ~60 days. Each call to the
+refresh_access_token endpoint extends the lifetime by another 60 days. The
+token must be at least 24 hours old to be refreshable, and Meta recommends
+refreshing well before expiry (we use a 14-day buffer).
+This script:
+  1. Iterates over every account in config.json -> instagram.accounts[].
+  2. Reads the current token + expiry from ~/instagram-graph-api/.env via the
+     ig_long_token_env / derived IG_TOKEN_EXPIRES_<suffix> key.
+  3. If the token expires within REFRESH_BUFFER_DAYS, calls the Graph API
+     refresh_access_token endpoint and rewrites the .env file in place
+     (atomic: write to tempfile then os.replace).
+  4. Prints a machine-readable SUMMARY line for the wrapper to log via
+     scripts/log_run.py.
+The .env file is the SINGLE source of truth — update_instagram_stats.py and
+scan_instagram_comments.py both read it on every invocation, so a refreshed
+token is picked up by the next pipeline run with no daemon-restart needed.
+Usage:
+    python3 scripts/refresh_instagram_tokens.py [--quiet] [--force] [--dry-run]
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+import tempfile
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+IG_ENV_PATH = Path.home() / "instagram-graph-api" / ".env"
+GRAPH = "https://graph.instagram.com"
+SA_CONFIG = Path(__file__).resolve().parent.parent / "config.json"
+# Refresh tokens that expire within this many days. 14 days gives us 2 weeks
+# of headroom for cron failures, network outages, or attention lapses.
+REFRESH_BUFFER_DAYS = 14
+# Meta requires tokens to be at least 24h old before they can be refreshed.
+MIN_TOKEN_AGE_HOURS = 24
+def load_env_lines() -> list[str]:
+    """Return the .env file as a list of raw lines (preserving comments +
+    blank lines), so we can rewrite individual keys without reformatting."""
+    if not IG_ENV_PATH.exists():
+        return []
+    return IG_ENV_PATH.read_text().splitlines()
+def env_dict_from_lines(lines: list[str]) -> dict[str, str]:
+    env = {}
+    for line in lines:
+        s = line.strip()
+        if not s or s.startswith("#") or "=" not in s:
+            continue
+        k, v = s.split("=", 1)
+        env[k.strip()] = v.strip()
+    return env
+def write_env_atomic(lines: list[str]):
+    """Rewrite the .env file from `lines`. Atomic via temp-file + os.replace
+    so a Ctrl-C or crash mid-write can't truncate the file."""
+    dir_ = IG_ENV_PATH.parent
+    dir_.mkdir(parents=True, exist_ok=True)
+    fd, tmp = tempfile.mkstemp(prefix=".env.tmp.", dir=str(dir_))
+    try:
+        with os.fdopen(fd, "w") as f:
+            f.write("\n".join(lines))
+            if lines and not lines[-1].endswith("\n"):
+                f.write("\n")
+        os.chmod(tmp, 0o600)
+        os.replace(tmp, IG_ENV_PATH)
+    finally:
+        if os.path.exists(tmp):
+            try:
+                os.unlink(tmp)
+            except OSError:
+                pass
+def expires_key_for(token_key: str) -> str:
+    """Derive the IG_TOKEN_EXPIRES env-var name from the IG_LONG_TOKEN one.
+    IG_LONG_TOKEN -> IG_TOKEN_EXPIRES
+    IG_LONG_TOKEN_MATTHEWHEARTFUL -> IG_TOKEN_EXPIRES_MATTHEWHEARTFUL
+    IG_LONG_TOKEN_OMIDOTME -> IG_TOKEN_EXPIRES_OMIDOTME
+    """
+    if not token_key.startswith("IG_LONG_TOKEN"):
+        return ""
+    return "IG_TOKEN_EXPIRES" + token_key[len("IG_LONG_TOKEN"):]
+def parse_expires(s: str | None) -> datetime | None:
+    if not s:
+        return None
+    s = s.strip()
+    if not s:
+        return None
+    # Accept both "2026-07-05T23:06:44Z" and "2026-07-05T23:06:44+00:00".
+    try:
+        if s.endswith("Z"):
+            s = s[:-1] + "+00:00"
+        return datetime.fromisoformat(s)
+    except ValueError:
+        return None
+def format_expires(dt: datetime) -> str:
+    """Match the existing .env convention: ISO-8601 UTC with trailing Z."""
+    return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+def refresh_token(long_token: str) -> dict:
+    qs = urllib.parse.urlencode({
+        "grant_type": "ig_refresh_token",
+        "access_token": long_token,
+    })
+    url = f"{GRAPH}/refresh_access_token?{qs}"
+    try:
+        with urllib.request.urlopen(url, timeout=30) as r:
+            return json.loads(r.read())
+    except urllib.error.HTTPError as e:
+        body = e.read().decode(errors="replace")
+        raise RefreshError(f"HTTP {e.code}: {body[:300]}") from e
+class RefreshError(Exception):
+    pass
+def update_line(lines: list[str], key: str, value: str) -> list[str]:
+    """Return a new list with the line `<key>=<old>` replaced by `<key>=<value>`.
+    If the key isn't present, appends `<key>=<value>` at the end."""
+    out = []
+    found = False
+    prefix = f"{key}="
+    for line in lines:
+        if line.strip().startswith(prefix) or line.startswith(prefix):
+            out.append(f"{key}={value}")
+            found = True
+        else:
+            out.append(line)
+    if not found:
+        out.append(f"{key}={value}")
+    return out
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--quiet", action="store_true")
+    parser.add_argument("--force", action="store_true",
+                        help="Refresh every token regardless of expiry buffer")
+    parser.add_argument("--dry-run", action="store_true",
+                        help="Print what would be refreshed but don't call the API")
+    parser.add_argument("--account", default=None,
+                        help="Only refresh this account (default: all accounts)")
+    args = parser.parse_args()
+    def log(msg: str):
+        if not args.quiet:
+            print(msg)
+    if not IG_ENV_PATH.exists():
+        print(f"[refresh-ig-tokens] env file missing: {IG_ENV_PATH}")
+        print("SUMMARY:REFRESHED=0 SKIPPED=0 FAILED=0 ACCOUNTS=0")
+        sys.exit(0)
+    try:
+        cfg = json.loads(SA_CONFIG.read_text())
+    except FileNotFoundError:
+        cfg = {}
+    accounts_cfg = ((cfg.get("instagram") or {}).get("accounts") or [])
+    if args.account:
+        accounts_cfg = [a for a in accounts_cfg
+                        if a.get("username", "").lower() == args.account.lower()]
+    if not accounts_cfg:
+        print("[refresh-ig-tokens] no instagram accounts in config")
+        print("SUMMARY:REFRESHED=0 SKIPPED=0 FAILED=0 ACCOUNTS=0")
+        sys.exit(0)
+    lines = load_env_lines()
+    env = env_dict_from_lines(lines)
+    now = datetime.now(timezone.utc)
+    buffer_secs = REFRESH_BUFFER_DAYS * 86400
+    refreshed = 0
+    skipped = 0
+    failed = 0
+    for account_cfg in accounts_cfg:
+        username = account_cfg.get("username", "")
+        token_key = account_cfg.get("ig_long_token_env", "IG_LONG_TOKEN")
+        exp_key = expires_key_for(token_key)
+        if not exp_key:
+            log(f"[refresh-ig-tokens] {username}: cannot derive expires key from {token_key}; skipping")
+            skipped += 1
+            continue
+        cur_token = env.get(token_key)
+        if not cur_token:
+            log(f"[refresh-ig-tokens] {username}: no value for {token_key}; skipping")
+            skipped += 1
+            continue
+        cur_exp_raw = env.get(exp_key)
+        cur_exp = parse_expires(cur_exp_raw)
+        if cur_exp is None and not args.force:
+            log(f"[refresh-ig-tokens] {username}: {exp_key} unparseable ({cur_exp_raw!r}); skipping (use --force to refresh anyway)")
+            skipped += 1
+            continue
+        if cur_exp is not None and not args.force:
+            remaining = (cur_exp - now).total_seconds()
+            if remaining > buffer_secs:
+                days_left = remaining / 86400
+                log(f"[refresh-ig-tokens] {username}: {days_left:.1f}d remaining (> {REFRESH_BUFFER_DAYS}d buffer); skipping")
+                skipped += 1
+                continue
+            if remaining < 0:
+                log(f"[refresh-ig-tokens] {username}: EXPIRED {(-remaining)/86400:.1f}d ago; attempting refresh anyway (Meta may reject)")
+        if args.dry_run:
+            log(f"[refresh-ig-tokens] {username}: DRY-RUN would refresh {token_key} (exp {cur_exp_raw})")
+            refreshed += 1
+            continue
+        log(f"[refresh-ig-tokens] {username}: refreshing {token_key} (current exp {cur_exp_raw})")
+        try:
+            resp = refresh_token(cur_token)
+        except RefreshError as e:
+            log(f"[refresh-ig-tokens] {username}: REFRESH FAILED: {e}")
+            failed += 1
+            continue
+        except Exception as e:
+            log(f"[refresh-ig-tokens] {username}: REFRESH FAILED (unexpected): {e}")
+            failed += 1
+            continue
+        new_token = resp.get("access_token")
+        expires_in = resp.get("expires_in")
+        if not new_token or not expires_in:
+            log(f"[refresh-ig-tokens] {username}: refresh response missing fields: {resp}")
+            failed += 1
+            continue
+        new_exp_dt = datetime.now(timezone.utc).fromtimestamp(time.time() + expires_in, tz=timezone.utc)
+        new_exp_str = format_expires(new_exp_dt)
+        lines = update_line(lines, token_key, new_token)
+        lines = update_line(lines, exp_key, new_exp_str)
+        env[token_key] = new_token
+        env[exp_key] = new_exp_str
+        log(f"[refresh-ig-tokens] {username}: OK, new expiry {new_exp_str} (~{expires_in/86400:.0f}d)")
+        refreshed += 1
+    if refreshed and not args.dry_run:
+        write_env_atomic(lines)
+        log(f"[refresh-ig-tokens] wrote {IG_ENV_PATH}")
+    print(
+        f"SUMMARY:REFRESHED={refreshed} SKIPPED={skipped} FAILED={failed} "
+        f"ACCOUNTS={len(accounts_cfg)}"
+    )
+if __name__ == "__main__":
+    main()

package/scripts/scan_instagram_comments.py ADDED Viewed

@@ -0,0 +1,481 @@
+#!/usr/bin/env python3
+"""Scan Instagram Graph API for new comments on our posts.
+For each enabled Instagram account in config.json (matt_diak, matthewheartful,
+omidotme), this:
+  1. Fetches /api/v1/posts?platform=instagram&our_account=<username> to build
+     a {shortcode: post_id} map of our DB-tracked IG posts.
+  2. Lists /me/media for the account (reuses the same Graph API call shape
+     update_instagram_stats.py uses).
+  3. For each media item present in our DB, calls /{media-id}/comments with
+     the replies sub-resource expanded.
+  4. Inserts each comment (and its nested replies) into the `replies` table
+     via reply_insert.insert_reply(). Server-side UNIQUE (platform,
+     their_comment_id) handles dedup; this script never SELECTs.
+Filters (mirrors scan_reddit_replies / scan_github_replies behavior):
+  - Skip comments whose author is in config.exclusions.authors
+  - Skip our own usernames (matt_diak / matthewheartful / omidotme) so we
+    don't try to reply to ourselves
+  - Skip backfill-old comments (older than BACKFILL_HOURS) with
+    status='skipped' / skip_reason='backfill_old'
+  - Skip too-short comments (< MIN_WORDS) with skip_reason='too_short'
+This is discovery-only. Posting replies back to Instagram lives in a separate
+engage script (Phase 2, not built yet); for now new rows surface in the
+dashboard replies feed as platform='instagram', status='pending'.
+Usage:
+    python3 scripts/scan_instagram_comments.py [--quiet] [--limit N]
+                                               [--account NAME]
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from http_api import api_get
+from reply_insert import insert_reply as _insert_reply
+IG_ENV_PATH = Path.home() / "instagram-graph-api" / ".env"
+GRAPH = "https://graph.instagram.com/v22.0"
+SA_CONFIG = Path(__file__).resolve().parent.parent / "config.json"
+# Discovery filters
+BACKFILL_HOURS = 48
+MIN_WORDS = 5
+# Per-Graph-API-call sleep so we stay polite under the 60/hr, 4800/day caps.
+# 3 accounts * ~10 media * (1 list + 1 comments call) = ~60 calls/cycle;
+# at 0.2s sleep that's ~12s per cycle, well inside 30-minute scheduling.
+GRAPH_SLEEP_SECS = 0.2
+# ── env / config ──────────────────────────────────────────────────────────────
+def load_ig_env() -> dict:
+    if not IG_ENV_PATH.exists():
+        return {}
+    env = {}
+    for line in IG_ENV_PATH.read_text().splitlines():
+        line = line.strip()
+        if not line or line.startswith("#") or "=" not in line:
+            continue
+        k, v = line.split("=", 1)
+        env[k.strip()] = v.strip()
+    return env
+def load_config() -> dict:
+    try:
+        return json.loads(SA_CONFIG.read_text())
+    except FileNotFoundError:
+        return {}
+def resolve_account_creds(account_name: str, ig_env: dict, accounts_cfg: list):
+    """Return (ig_user_id, long_token) or (None, None). Matches the lookup
+    pattern in scripts/update_instagram_stats.py."""
+    match = next(
+        (a for a in accounts_cfg if a.get("username", "").lower() == account_name.lower()),
+        None,
+    )
+    if match:
+        uid = ig_env.get(match.get("ig_user_id_env", "IG_USER_ID"))
+        tok = ig_env.get(match.get("ig_long_token_env", "IG_LONG_TOKEN"))
+        if uid and tok:
+            return uid, tok
+    uid = ig_env.get("IG_USER_ID")
+    tok = ig_env.get("IG_LONG_TOKEN")
+    return uid, tok
+# ── Graph API helpers ─────────────────────────────────────────────────────────
+def graph_get(path: str, token: str, **params):
+    params["access_token"] = token
+    url = f"{GRAPH}/{path}?{urllib.parse.urlencode(params)}"
+    with urllib.request.urlopen(url, timeout=20) as r:
+        return json.loads(r.read())
+def shortcode_from_url(url: str | None) -> str | None:
+    """Extract shortcode from an IG permalink.
+    https://www.instagram.com/reel/DYkkj8RDo9P/ -> DYkkj8RDo9P
+    """
+    import re
+    m = re.search(r"/(?:reel|p|tv)/([A-Za-z0-9_-]+)", url or "")
+    return m.group(1) if m else None
+def fetch_media_list(ig_user_id: str, token: str, max_pages: int = 5) -> list[dict]:
+    """Page through /me/media. Returns the raw items list with permalink + id."""
+    out = []
+    fields = "id,media_type,media_product_type,permalink,timestamp"
+    url = (
+        f"{GRAPH}/{ig_user_id}/media"
+        f"?fields={fields}&limit=100&access_token={token}"
+    )
+    pages = 0
+    while url and pages < max_pages:
+        with urllib.request.urlopen(url, timeout=20) as r:
+            data = json.loads(r.read())
+        out.extend(data.get("data", []) or [])
+        url = (data.get("paging") or {}).get("next")
+        pages += 1
+        if url:
+            time.sleep(GRAPH_SLEEP_SECS)
+    return out
+def fetch_comments(media_id: str, token: str) -> list[dict]:
+    """Return top-level comments for a media item, each with a nested
+    `replies.data[]` list (Graph API caps the sub-list at 25 by default; that
+    matches typical traffic on our posts)."""
+    fields = (
+        "id,username,text,timestamp,"
+        "replies{id,username,text,timestamp}"
+    )
+    try:
+        data = graph_get(f"{media_id}/comments", token, fields=fields, limit=50)
+    except urllib.error.HTTPError as e:
+        body = e.read().decode(errors="replace")[:200]
+        raise GraphApiError(f"HTTP {e.code} on /{media_id}/comments: {body}")
+    return data.get("data", []) or []
+class GraphApiError(Exception):
+    pass
+# ── posts lookup ──────────────────────────────────────────────────────────────
+def fetch_posts_map(account_username: str) -> dict[str, int]:
+    """Build {shortcode: post_id} for posts.platform='instagram' AND
+    posts.our_account=account_username. Uses the same /api/v1/posts endpoint
+    scan_reddit_replies.py uses for its post-id lookup."""
+    out: dict[str, int] = {}
+    resp = api_get(
+        "/api/v1/posts",
+        query={"platform": "instagram", "limit": 500},
+    )
+    posts = ((resp or {}).get("data") or {}).get("posts") or []
+    for p in posts:
+        if (p.get("our_account") or "").lower() != account_username.lower():
+            continue
+        code = shortcode_from_url(p.get("our_url"))
+        if code:
+            out[code] = int(p.get("id"))
+    return out
+# ── parse / classify ──────────────────────────────────────────────────────────
+def parse_ts(ts: str | None) -> float:
+    """Parse an IG ISO-8601 timestamp to a unix timestamp. Returns 0 on
+    failure (which counts as "old" for backfill purposes)."""
+    if not ts:
+        return 0.0
+    try:
+        # Instagram returns +0000 (no colon), strip and parse as UTC.
+        s = ts.replace("+0000", "+00:00")
+        return datetime.fromisoformat(s).timestamp()
+    except Exception:
+        return 0.0
+def word_count(text: str | None) -> int:
+    return len((text or "").split())
+def build_comment_url(shortcode: str, comment_id: str) -> str:
+    return f"https://www.instagram.com/p/{shortcode}/c/{comment_id}/"
+# ── main scan loop ────────────────────────────────────────────────────────────
+class IgCommentScanner:
+    def __init__(
+        self,
+        account_username: str,
+        ig_user_id: str,
+        token: str,
+        posts_map: dict[str, int],
+        excluded_authors: set[str],
+        quiet: bool = False,
+        media_limit: int | None = None,
+    ):
+        self.account = account_username
+        self.ig_user_id = ig_user_id
+        self.token = token
+        self.posts_map = posts_map
+        self.excluded = excluded_authors
+        self.quiet = quiet
+        self.media_limit = media_limit
+        self.discovered = 0
+        self.backfill_skipped = 0
+        self.too_short_skipped = 0
+        self.excluded_skipped = 0
+        self.already_tracked = 0
+        self.media_checked = 0
+        self.media_no_post = 0
+        self.comments_seen = 0
+    def log(self, msg: str):
+        if not self.quiet:
+            print(msg)
+    def _insert(
+        self,
+        post_id: int,
+        comment_id: str,
+        author: str,
+        content: str,
+        comment_url: str,
+        depth: int,
+        status: str,
+        skip_reason: str | None = None,
+    ):
+        result = _insert_reply(
+            None, post_id, "instagram", comment_id, author, content, comment_url,
+            parent_reply_id=None, depth=depth, status=status, skip_reason=skip_reason,
+        )
+        if result is None:
+            self.already_tracked += 1
+            return
+        if result == "pending":
+            self.discovered += 1
+        elif result == "skipped":
+            if skip_reason == "backfill_old":
+                self.backfill_skipped += 1
+            elif skip_reason and skip_reason.startswith("too_short"):
+                self.too_short_skipped += 1
+            elif skip_reason == "excluded_author":
+                self.excluded_skipped += 1
+    def _classify_and_insert(
+        self,
+        post_id: int,
+        shortcode: str,
+        comment: dict,
+        backfill_cutoff: float,
+        depth: int,
+    ):
+        comment_id = str(comment.get("id") or "")
+        if not comment_id:
+            return
+        self.comments_seen += 1
+        author = comment.get("username") or ""
+        content = comment.get("text") or ""
+        comment_url = build_comment_url(shortcode, comment_id)
+        created = parse_ts(comment.get("timestamp"))
+        if author.lower() in self.excluded:
+            self._insert(
+                post_id, comment_id, author, content, comment_url, depth,
+                status="skipped", skip_reason="excluded_author",
+            )
+            return
+        if created and created < backfill_cutoff:
+            self._insert(
+                post_id, comment_id, author, content, comment_url, depth,
+                status="skipped", skip_reason="backfill_old",
+            )
+            return
+        wc = word_count(content)
+        if wc < MIN_WORDS:
+            self._insert(
+                post_id, comment_id, author, content, comment_url, depth,
+                status="skipped", skip_reason=f"too_short ({wc} words)",
+            )
+            return
+        self._insert(
+            post_id, comment_id, author, content, comment_url, depth,
+            status="pending", skip_reason=None,
+        )
+    def scan(self):
+        self.log(f"[scan-ig-comments] account={self.account} posts_in_db={len(self.posts_map)}")
+        if not self.posts_map:
+            self.log(f"[scan-ig-comments]   no instagram posts in DB for account={self.account}; nothing to scan")
+            return
+        try:
+            media_items = fetch_media_list(self.ig_user_id, self.token)
+        except urllib.error.HTTPError as e:
+            body = e.read().decode(errors="replace")[:200]
+            self.log(f"[scan-ig-comments] /me/media failed for {self.account}: HTTP {e.code} {body}")
+            return
+        except Exception as e:
+            self.log(f"[scan-ig-comments] /me/media failed for {self.account}: {e}")
+            return
+        self.log(f"[scan-ig-comments]   /me/media returned {len(media_items)} items")
+        backfill_cutoff = time.time() - BACKFILL_HOURS * 3600
+        checked = 0
+        for item in media_items:
+            if self.media_limit and checked >= self.media_limit:
+                break
+            permalink = item.get("permalink")
+            shortcode = shortcode_from_url(permalink)
+            if not shortcode:
+                continue
+            post_id = self.posts_map.get(shortcode)
+            if not post_id:
+                self.media_no_post += 1
+                continue
+            media_id = item.get("id")
+            try:
+                comments = fetch_comments(media_id, self.token)
+            except GraphApiError as e:
+                self.log(f"[scan-ig-comments]   media={media_id} shortcode={shortcode} comments fetch failed: {e}")
+                continue
+            self.media_checked += 1
+            checked += 1
+            self.log(
+                f"[scan-ig-comments]   media={media_id} shortcode={shortcode} "
+                f"top_level_comments={len(comments)}"
+            )
+            for c in comments:
+                self._classify_and_insert(post_id, shortcode, c, backfill_cutoff, depth=1)
+                # Nested replies (replies to top-level comments). Author may
+                # be us (we already replied) or someone else (we got a reply
+                # to OUR reply). The excluded-author filter inside
+                # _classify_and_insert handles the first case.
+                replies = ((c.get("replies") or {}).get("data") or [])
+                for r in replies:
+                    self._classify_and_insert(post_id, shortcode, r, backfill_cutoff, depth=2)
+            time.sleep(GRAPH_SLEEP_SECS)
+    def summary(self) -> dict:
+        return {
+            "account": self.account,
+            "media_checked": self.media_checked,
+            "media_no_post_in_db": self.media_no_post,
+            "comments_seen": self.comments_seen,
+            "discovered": self.discovered,
+            "backfill_skipped": self.backfill_skipped,
+            "too_short_skipped": self.too_short_skipped,
+            "excluded_skipped": self.excluded_skipped,
+            "already_tracked": self.already_tracked,
+        }
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--quiet", action="store_true")
+    parser.add_argument("--limit", type=int, default=None,
+                        help="Cap media items inspected per account (debug)")
+    parser.add_argument("--account", default=None,
+                        help="Scan only this account (default: all enabled)")
+    args = parser.parse_args()
+    ig_env = load_ig_env()
+    cfg = load_config()
+    accounts_cfg = ((cfg.get("instagram") or {}).get("accounts") or [])
+    exclusions = cfg.get("exclusions") or {}
+    base_excluded = {a.lower() for a in (exclusions.get("authors") or [])}
+    # Always exclude our own usernames so we don't reply to ourselves.
+    own_usernames = {a.get("username", "").lower() for a in accounts_cfg if a.get("username")}
+    if args.account:
+        accounts_to_scan = [a for a in accounts_cfg
+                            if a.get("username", "").lower() == args.account.lower()]
+    else:
+        accounts_to_scan = [a for a in accounts_cfg if a.get("enabled", True)]
+    if not accounts_to_scan:
+        print("[scan-ig-comments] no instagram accounts to scan; exiting")
+        print("SUMMARY:DISCOVERED=0 SKIPPED=0 CHECKED=0 ALREADY=0 ACCOUNTS=0")
+        return
+    totals = {
+        "discovered": 0,
+        "backfill_skipped": 0,
+        "too_short_skipped": 0,
+        "excluded_skipped": 0,
+        "already_tracked": 0,
+        "media_checked": 0,
+        "comments_seen": 0,
+        "accounts": 0,
+    }
+    for account_cfg in accounts_to_scan:
+        username = account_cfg.get("username", "")
+        if not username:
+            continue
+        uid, tok = resolve_account_creds(username, ig_env, accounts_cfg)
+        if not uid or not tok:
+            print(f"[scan-ig-comments] missing creds for account={username}; skipping")
+            continue
+        excluded_for_account = set(base_excluded) | set(own_usernames)
+        try:
+            posts_map = fetch_posts_map(username)
+        except Exception as e:
+            print(f"[scan-ig-comments] posts lookup failed for {username}: {e}")
+            continue
+        scanner = IgCommentScanner(
+            username, uid, tok, posts_map, excluded_for_account,
+            quiet=args.quiet, media_limit=args.limit,
+        )
+        scanner.scan()
+        s = scanner.summary()
+        if not args.quiet:
+            print(
+                f"[scan-ig-comments] account={username} done: "
+                f"media_checked={s['media_checked']} comments_seen={s['comments_seen']} "
+                f"discovered={s['discovered']} "
+                f"backfill_skipped={s['backfill_skipped']} "
+                f"too_short_skipped={s['too_short_skipped']} "
+                f"excluded_skipped={s['excluded_skipped']} "
+                f"already_tracked={s['already_tracked']}"
+            )
+        totals["discovered"] += s["discovered"]
+        totals["backfill_skipped"] += s["backfill_skipped"]
+        totals["too_short_skipped"] += s["too_short_skipped"]
+        totals["excluded_skipped"] += s["excluded_skipped"]
+        totals["already_tracked"] += s["already_tracked"]
+        totals["media_checked"] += s["media_checked"]
+        totals["comments_seen"] += s["comments_seen"]
+        totals["accounts"] += 1
+    skipped_total = (
+        totals["backfill_skipped"]
+        + totals["too_short_skipped"]
+        + totals["excluded_skipped"]
+    )
+    print(
+        f"SUMMARY:DISCOVERED={totals['discovered']} SKIPPED={skipped_total} "
+        f"CHECKED={totals['media_checked']} ALREADY={totals['already_tracked']} "
+        f"ACCOUNTS={totals['accounts']}"
+    )
+if __name__ == "__main__":
+    main()

package/scripts/twitter_browser.py CHANGED Viewed

@@ -609,8 +609,14 @@ def reply_to_tweet(tweet_url, text, apply_campaigns=True):
             except Exception:
                 pass
-            page.goto(tweet_url, wait_until="domcontentloaded")
-            page.wait_for_timeout(5000)
+            try:
+                page.goto(tweet_url, wait_until="load", timeout=60000)
+            except Exception:
+                try:
+                    page.goto(tweet_url, wait_until="domcontentloaded", timeout=60000)
+                except Exception:
+                    pass
+            page.wait_for_timeout(15000)
             # Check if page exists
             page_text = page.text_content("main") or ""
@@ -620,18 +626,20 @@ def reply_to_tweet(tweet_url, text, apply_campaigns=True):
             # Snapshot our reply links before posting (to detect the new one)
             links_before = _collect_our_reply_links(page)
-            # Find the reply textbox
+            # Find the reply textbox. On slower egress (E2B sandbox VMs) x.com
+            # can need 20-30s to attach the React reply composer; do not lower
+            # these timeouts.
             reply_box = None
             try:
                 reply_box = page.get_by_role("textbox", name="Post text")
-                reply_box.wait_for(timeout=10000)
+                reply_box.wait_for(timeout=30000)
             except Exception:
                 # Scroll down to find the reply box
                 page.evaluate("window.scrollBy(0, 500)")
-                page.wait_for_timeout(2000)
+                page.wait_for_timeout(3000)
                 try:
                     reply_box = page.get_by_role("textbox", name="Post text")
-                    reply_box.wait_for(timeout=5000)
+                    reply_box.wait_for(timeout=15000)
                 except Exception:
                     return {"ok": False, "error": "reply_box_not_found"}

package/skill/refresh-instagram-tokens.sh ADDED Viewed

@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+# refresh-instagram-tokens.sh — Refresh Instagram Graph API long-lived tokens
+# before they expire.
+#
+# IG long-lived tokens last ~60 days; this job runs daily and refreshes any
+# token within REFRESH_BUFFER_DAYS (default 14d) of expiry. The .env file at
+# ~/instagram-graph-api/.env is rewritten atomically on success.
+#
+# Lightweight (no lock needed — read+write to a file we own, no browser/MCP)
+# but we take instagram-poster anyway so a poster/stats/scan run that's mid-
+# flight can finish reading the existing token before we swap it.
+#
+# Logs: skill/logs/refresh-instagram-tokens-YYYY-MM-DD_HHMMSS.log
+set -uo pipefail
+REPO_DIR="$HOME/social-autoposter"
+LOG_DIR="$REPO_DIR/skill/logs"
+mkdir -p "$LOG_DIR"
+LOG_FILE="$LOG_DIR/refresh-instagram-tokens-$(date +%Y-%m-%d_%H%M%S).log"
+log() { echo "[$(date +%H:%M:%S)] $*" | tee -a "$LOG_FILE"; }
+log "=== refresh-instagram-tokens fire: $(date) ==="
+RUN_START=$(date +%s)
+# shellcheck source=lock.sh
+source "$REPO_DIR/skill/lock.sh"
+acquire_lock instagram-poster 30
+OUTPUT_FILE="/tmp/refresh-instagram-tokens-$$.out"
+if ! /opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/refresh_instagram_tokens.py" 2>>"$LOG_FILE" | tee -a "$LOG_FILE" >"$OUTPUT_FILE"; then
+    log "refresh_instagram_tokens.py exited non-zero"
+    REFRESHED=0; SKIPPED=0; FAILED=0; ACCOUNTS=0
+else
+    SUMMARY=$(grep '^SUMMARY:' "$OUTPUT_FILE" | tail -1)
+    REFRESHED=$(echo "$SUMMARY" | sed -n 's/.*REFRESHED=\([0-9]*\).*/\1/p'); REFRESHED=${REFRESHED:-0}
+    SKIPPED=$(echo "$SUMMARY" | sed -n 's/.*SKIPPED=\([0-9]*\).*/\1/p'); SKIPPED=${SKIPPED:-0}
+    FAILED=$(echo "$SUMMARY" | sed -n 's/.*FAILED=\([0-9]*\).*/\1/p'); FAILED=${FAILED:-0}
+    ACCOUNTS=$(echo "$SUMMARY" | sed -n 's/.*ACCOUNTS=\([0-9]*\).*/\1/p'); ACCOUNTS=${ACCOUNTS:-0}
+fi
+rm -f "$OUTPUT_FILE"
+RUN_ELAPSED=$(( $(date +%s) - RUN_START ))
+log "logging run: refreshed=$REFRESHED skipped=$SKIPPED failed=$FAILED accounts=$ACCOUNTS elapsed=${RUN_ELAPSED}s"
+/opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/log_run.py" \
+    --script "refresh_instagram_tokens" \
+    --posted "$REFRESHED" \
+    --skipped "$SKIPPED" \
+    --failed "$FAILED" \
+    --cost 0 \
+    --elapsed "$RUN_ELAPSED" >>"$LOG_FILE" 2>&1 || log "log_run.py failed"
+log "=== refresh-instagram-tokens done ==="
+exit 0

package/skill/run-instagram-daily.sh CHANGED Viewed

@@ -38,8 +38,28 @@ fi
 log() { echo "[$(date +%H:%M:%S)] $*" | tee -a "$LOG_FILE"; }
+# Run accounting for dashboard Job History (Post Threads · Instagram).
+# Each exit site updates POSTED_CT / SKIPPED_CT / FAILED_CT; the EXIT trap
+# always emits one log_run.py line so the run shows up under
+# thread_instagram, matching how thread_twitter / thread_reddit log.
+RUN_START_EPOCH=$(date +%s)
+POSTED_CT=0
+SKIPPED_CT=0
+FAILED_CT=0
 cleanup() {
+  local rc=$?
   rm -f "$PICK_FILE"
+  if [ "$POSTED_CT" -eq 0 ] && [ "$SKIPPED_CT" -eq 0 ] && [ "$FAILED_CT" -eq 0 ]; then
+    if [ "$rc" -eq 0 ]; then SKIPPED_CT=1; else FAILED_CT=1; fi
+  fi
+  local elapsed=$(( $(date +%s) - RUN_START_EPOCH ))
+  local cost
+  cost=$(/usr/bin/python3 "$REPO_DIR/scripts/get_run_cost.py" --since "$RUN_START_EPOCH" --scripts "run-instagram-daily" 2>/dev/null || echo "0.0000")
+  /usr/bin/python3 "$REPO_DIR/scripts/log_run.py" \
+      --script "thread_instagram" \
+      --posted "$POSTED_CT" --skipped "$SKIPPED_CT" --failed "$FAILED_CT" \
+      --cost "$cost" --elapsed "$elapsed" >/dev/null 2>&1 || true
 }
 trap cleanup EXIT INT TERM HUP
@@ -65,6 +85,7 @@ else
 fi
 if [ -z "$TARGET_ACCOUNT" ]; then
   log "pick_ig_account.py produced no account — exiting non-zero"
+  FAILED_CT=1
   exit 1
 fi
 log "picker chose account: $TARGET_ACCOUNT"
@@ -76,9 +97,11 @@ if ! /opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/ig_post_type_picker.py" \
   rc=$?
   if [ "$rc" -eq 2 ]; then
     log "queue exhausted for account=$TARGET_ACCOUNT (no drafts of either type) — exiting cleanly"
+    SKIPPED_CT=1
     exit 0
   fi
   log "picker failed rc=$rc — exiting non-zero"
+  FAILED_CT=1
   exit 1
 fi
@@ -93,6 +116,7 @@ log "picker reason: ${REASON}"
 if [ ! -f "$VIDEO_PATH" ]; then
   log "ERROR: picker pointed at $VIDEO_PATH but file missing on disk"
+  FAILED_CT=1
   exit 1
 fi
@@ -107,9 +131,11 @@ log "step 3: post_to_ig.py --file $(basename "$VIDEO_PATH") --post-type $POST_TY
 if ! /opt/homebrew/bin/python3.11 "$REPO_DIR/mixer/post_to_ig.py" \
         --file "$VIDEO_PATH" --post-type "$POST_TYPE" --account "$TARGET_ACCOUNT" $DRY_FLAG >>"$LOG_FILE" 2>&1; then
   log "post_to_ig.py failed — exiting non-zero"
+  FAILED_CT=1
   exit 1
 fi
+POSTED_CT=1
 log "=== finished post-${POST_NUMBER} (${POST_TYPE}) on ${TARGET_ACCOUNT} successfully ==="
 # Step 4: mirror the new media_posts row into the cross-platform `posts` table

package/skill/scan-instagram-replies.sh ADDED Viewed

@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+# scan-instagram-replies.sh — Discover new inbound comments on our Instagram
+# posts via the Graph API and insert them into the `replies` table.
+#
+# Mirrors the pattern used by stats-instagram.sh: API-only (no browser),
+# instagram-poster lock (so scan, stats, and post can't race for the same
+# token-bucket), then a SUMMARY-line parsed by log_run.py for the dashboard
+# Jobs panel.
+#
+# Logs: skill/logs/scan-instagram-replies-YYYY-MM-DD_HHMMSS.log
+set -uo pipefail
+REPO_DIR="$HOME/social-autoposter"
+LOG_DIR="$REPO_DIR/skill/logs"
+mkdir -p "$LOG_DIR"
+LOG_FILE="$LOG_DIR/scan-instagram-replies-$(date +%Y-%m-%d_%H%M%S).log"
+log() { echo "[$(date +%H:%M:%S)] $*" | tee -a "$LOG_FILE"; }
+log "=== scan-instagram-replies fire: $(date) ==="
+RUN_START=$(date +%s)
+# instagram-poster lock — stats, scan, daily-post, and render all share this
+# lane so we don't race on the same /me/media token bucket.
+# shellcheck source=lock.sh
+source "$REPO_DIR/skill/lock.sh"
+acquire_lock instagram-poster 30
+OUTPUT_FILE="/tmp/scan-instagram-replies-$$.out"
+if ! /opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/scan_instagram_comments.py" 2>>"$LOG_FILE" | tee -a "$LOG_FILE" >"$OUTPUT_FILE"; then
+    log "scan_instagram_comments.py exited non-zero — logging run as failed"
+    DISCOVERED=0; SKIPPED=0; CHECKED=0; ALREADY=0; ACCOUNTS=0
+else
+    SUMMARY=$(grep '^SUMMARY:' "$OUTPUT_FILE" | tail -1)
+    DISCOVERED=$(echo "$SUMMARY" | sed -n 's/.*DISCOVERED=\([0-9]*\).*/\1/p'); DISCOVERED=${DISCOVERED:-0}
+    SKIPPED=$(echo "$SUMMARY" | sed -n 's/.*SKIPPED=\([0-9]*\).*/\1/p'); SKIPPED=${SKIPPED:-0}
+    CHECKED=$(echo "$SUMMARY" | sed -n 's/.*CHECKED=\([0-9]*\).*/\1/p'); CHECKED=${CHECKED:-0}
+    ALREADY=$(echo "$SUMMARY" | sed -n 's/.*ALREADY=\([0-9]*\).*/\1/p'); ALREADY=${ALREADY:-0}
+    ACCOUNTS=$(echo "$SUMMARY" | sed -n 's/.*ACCOUNTS=\([0-9]*\).*/\1/p'); ACCOUNTS=${ACCOUNTS:-0}
+fi
+rm -f "$OUTPUT_FILE"
+RUN_ELAPSED=$(( $(date +%s) - RUN_START ))
+log "logging run: discovered=$DISCOVERED skipped=$SKIPPED checked=$CHECKED already=$ALREADY accounts=$ACCOUNTS elapsed=${RUN_ELAPSED}s"
+# discovered -> posted (new pending rows are the productive output of a scan,
+# same convention scan_reddit_replies / scan_github_replies use).
+# skipped -> skipped. checked -> scanned (media items inspected).
+/opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/log_run.py" \
+    --script "scan_instagram_comments" \
+    --posted "$DISCOVERED" \
+    --skipped "$SKIPPED" \
+    --failed 0 \
+    --scanned "$CHECKED" \
+    --cost 0 \
+    --elapsed "$RUN_ELAPSED" >>"$LOG_FILE" 2>&1 || log "log_run.py failed"
+log "=== scan-instagram-replies done ==="
+exit 0