social-autoposter 1.6.1 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/server.js CHANGED
@@ -49,6 +49,13 @@ const JOBS = [
49
49
  // Post Threads row (original threads/posts)
50
50
  { label: 'com.m13v.social-reddit-threads', name: 'Reddit Threads', type: 'Post Threads', platform: 'Reddit', script: 'run-reddit-threads.sh', logPrefix: 'run-reddit-threads-', plist: 'com.m13v.social-reddit-threads.plist' },
51
51
  { label: 'com.m13v.social-twitter-threads', name: 'Twitter Threads', type: 'Post Threads', platform: 'Twitter', script: 'run-twitter-threads.sh', logPrefix: 'run-twitter-threads-', plist: 'com.m13v.social-twitter-threads.plist' },
52
+ // Instagram per-account daily posters (5×/day each, FORCE_ACCOUNT pinned).
53
+ { label: 'com.m13v.social-instagram-daily-matt_diak', name: 'IG Daily (matt_diak)', type: 'Post Threads', platform: 'Instagram', script: 'run-instagram-daily.sh', logPrefix: 'instagram-daily-', plist: 'com.m13v.social-instagram-daily-matt_diak.plist' },
54
+ { label: 'com.m13v.social-instagram-daily-matthewheartful', name: 'IG Daily (matthewheartful)', type: 'Post Threads', platform: 'Instagram', script: 'run-instagram-daily.sh', logPrefix: 'instagram-daily-', plist: 'com.m13v.social-instagram-daily-matthewheartful.plist' },
55
+ // Instagram per-account render (upstream of daily-posters; produces the
56
+ // mp4 + caption draft that the daily-poster then uploads).
57
+ { label: 'com.m13v.social-instagram-render-matt_diak', name: 'IG Render (matt_diak)', type: 'Other', platform: 'Instagram', script: 'run-instagram-render.sh', logPrefix: 'instagram-render-', plist: 'com.m13v.social-instagram-render-matt_diak.plist' },
58
+ { label: 'com.m13v.social-instagram-render-matthewheartful', name: 'IG Render (matthewheartful)', type: 'Other', platform: 'Instagram', script: 'run-instagram-render.sh', logPrefix: 'instagram-render-', plist: 'com.m13v.social-instagram-render-matthewheartful.plist' },
52
59
  // Post Comments row (replies/comments on others' content)
53
60
  { label: 'com.m13v.social-reddit-search', name: 'Reddit', type: 'Post Comments', platform: 'Reddit', script: 'run-reddit-search.sh', logPrefix: 'run-reddit-search-', plist: 'com.m13v.social-reddit-search.plist' },
54
61
  { label: 'com.m13v.social-twitter-cycle', name: 'Twitter', type: 'Post Comments', platform: 'Twitter', script: 'run-twitter-cycle.sh', logPrefix: 'twitter-cycle-', plist: 'com.m13v.social-twitter-cycle.plist' },
@@ -64,6 +71,8 @@ const JOBS = [
64
71
  { label: 'com.m13v.social-engage-reddit', name: 'Engage Reddit', type: 'Engage', platform: 'Reddit', script: 'engage-reddit.sh', logPrefix: 'engage-reddit-', plist: 'com.m13v.social-engage-reddit.plist' },
65
72
  { label: 'com.m13v.social-scan-moltbook-replies', name: 'MoltBook Scan', type: 'Other', platform: 'MoltBook', script: 'run-scan-moltbook-replies.sh', logPrefix: 'run-scan-moltbook-replies-', plist: 'com.m13v.social-scan-moltbook-replies.plist' },
66
73
  { label: 'com.m13v.social-scan-twitter-followups', name: 'Twitter Thread Follow-ups', type: 'Other', platform: 'Twitter', script: 'scan-twitter-followups.sh', logPrefix: 'scan-twitter-followups-', plist: 'com.m13v.social-scan-twitter-followups.plist' },
74
+ { label: 'com.m13v.social-scan-instagram-replies', name: 'Instagram Scan', type: 'Other', platform: 'Instagram', script: 'scan-instagram-replies.sh', logPrefix: 'scan-instagram-replies-', plist: 'com.m13v.social-scan-instagram-replies.plist' },
75
+ { label: 'com.m13v.social-refresh-instagram-tokens', name: 'IG Token Refresh', type: 'Other', platform: 'Instagram', script: 'refresh-instagram-tokens.sh', logPrefix: 'refresh-instagram-tokens-', plist: 'com.m13v.social-refresh-instagram-tokens.plist' },
67
76
  // DM Outreach row (initiate DMs to engaged users)
68
77
  { label: 'com.m13v.social-dm-outreach-reddit', name: 'DM Outreach Reddit', type: 'DM Outreach', platform: 'Reddit', script: 'dm-outreach-reddit.sh', logPrefix: 'dm-outreach-reddit-', plist: 'com.m13v.social-dm-outreach-reddit.plist' },
69
78
  { label: 'com.m13v.social-dm-outreach-twitter', name: 'DM Outreach Twitter', type: 'DM Outreach', platform: 'Twitter', script: 'dm-outreach-twitter.sh', logPrefix: 'dm-outreach-twitter-', plist: 'com.m13v.social-dm-outreach-twitter.plist' },
@@ -130,6 +139,10 @@ const REQUIRED_LOCKS = {
130
139
  'link-edit-github.sh': ['link-edit-github'],
131
140
  'stats-reddit.sh': ['reddit-browser'],
132
141
  'stats-instagram.sh': ['instagram-poster'],
142
+ 'scan-instagram-replies.sh': ['instagram-poster'],
143
+ 'refresh-instagram-tokens.sh': ['instagram-poster'],
144
+ 'run-instagram-daily.sh': ['instagram-poster'],
145
+ 'run-instagram-render.sh': ['instagram-render'],
133
146
  'audit-reddit.sh': ['reddit-browser', 'audit-reddit'],
134
147
  'audit-twitter.sh': ['twitter-browser', 'audit-twitter'],
135
148
  'audit-linkedin.sh': ['linkedin-browser', 'audit-linkedin'],
@@ -5234,7 +5247,7 @@ async function handleApi(req, res) {
5234
5247
  const url = new URL(req.url, 'http://localhost');
5235
5248
  const windowHours = Math.max(1, Math.min(720, parseInt(url.searchParams.get('hours') || '24', 10) || 24));
5236
5249
  const rawProject = (url.searchParams.get('project') || '').trim();
5237
- const ALLOWED_COST_PLATFORMS = new Set(['reddit', 'twitter', 'linkedin', 'moltbook', 'github', 'seo', 'email']);
5250
+ const ALLOWED_COST_PLATFORMS = new Set(['reddit', 'twitter', 'linkedin', 'moltbook', 'github', 'seo', 'email', 'instagram']);
5238
5251
  let rawPlat = String(url.searchParams.get('platform') || '').toLowerCase().trim();
5239
5252
  if (rawPlat === 'x') rawPlat = 'twitter';
5240
5253
  const plat = ALLOWED_COST_PLATFORMS.has(rawPlat) ? rawPlat : '';
@@ -5376,7 +5389,7 @@ async function handleApi(req, res) {
5376
5389
  const project = (rawProject === '' || rawProject.toLowerCase() === 'all') ? '' : rawProject;
5377
5390
  const projectOk = project === '' || /^[A-Za-z0-9_\- ]{1,64}$/.test(project);
5378
5391
  if (!projectOk) return json(res, { error: 'invalid project' }, 400);
5379
- const ALLOWED_COST_PLATFORMS = new Set(['reddit', 'twitter', 'linkedin', 'moltbook', 'github', 'seo', 'email']);
5392
+ const ALLOWED_COST_PLATFORMS = new Set(['reddit', 'twitter', 'linkedin', 'moltbook', 'github', 'seo', 'email', 'instagram']);
5380
5393
  let rawPlat = String(url.searchParams.get('platform') || '').toLowerCase().trim();
5381
5394
  if (rawPlat === 'x') rawPlat = 'twitter';
5382
5395
  const plat = ALLOWED_COST_PLATFORMS.has(rawPlat) ? rawPlat : '';
@@ -6751,7 +6764,7 @@ async function handleApi(req, res) {
6751
6764
  const configuredProjects = Array.isArray(config.projects) ? config.projects : [];
6752
6765
  const weighted = configuredProjects.filter(p => (p.weight || 0) > 0);
6753
6766
  const totalWeight = weighted.reduce((a, p) => a + (p.weight || 0), 0) || 1;
6754
- const platforms = ['reddit', 'twitter', 'linkedin', 'moltbook', 'github'];
6767
+ const platforms = ['reddit', 'twitter', 'linkedin', 'moltbook', 'github', 'instagram'];
6755
6768
  // Per-platform eligibility: a project is eligible to be picked for a
6756
6769
  // platform only if it has the data that platform's picker needs. Mirrors
6757
6770
  // scripts/pick_project.py and scripts/pick_thread_target.py. Projects
@@ -8093,6 +8106,7 @@ const HTML = `<!DOCTYPE html>
8093
8106
  <th>LinkedIn</th>
8094
8107
  <th>MoltBook</th>
8095
8108
  <th>GitHub</th>
8109
+ <th>Instagram</th>
8096
8110
  </tr>
8097
8111
  </thead>
8098
8112
  <tbody id="matrix-body"></tbody>
@@ -10574,8 +10588,8 @@ const EVENT_DESCRIPTIONS = {
10574
10588
  page_expired: 'SEO page deleted by the daily expire pipeline because it had zero clicks in the last 30 days. The on-disk source file was removed; Next.js now returns 404 for the URL. Logged for audit/revert in seo_expired_pages.',
10575
10589
  resurrected: 'Previously archived or unavailable item brought back into rotation (e.g., a removed post restored after reappearing).',
10576
10590
  };
10577
- const ACTIVITY_PLATFORMS = ['reddit', 'twitter', 'linkedin', 'moltbook', 'github', 'seo'];
10578
- const ACTIVITY_PLATFORM_LABELS = { reddit: 'Reddit', twitter: 'Twitter / X', linkedin: 'LinkedIn', moltbook: 'Moltbook', github: 'GitHub', seo: 'SEO' };
10591
+ const ACTIVITY_PLATFORMS = ['reddit', 'twitter', 'linkedin', 'moltbook', 'github', 'seo', 'instagram'];
10592
+ const ACTIVITY_PLATFORM_LABELS = { reddit: 'Reddit', twitter: 'Twitter / X', linkedin: 'LinkedIn', moltbook: 'Moltbook', github: 'GitHub', seo: 'SEO', instagram: 'Instagram' };
10579
10593
  const PROJECT_LABELS = { tenxats: '10xats' };
10580
10594
  const ACTIVITY_PROJECT_NONE = '(none)';
10581
10595
  const ACTIVITY_CAMPAIGN_ORGANIC = '(organic)';
@@ -16515,10 +16529,10 @@ async function loadDeployHealth() {
16515
16529
  // hours by platform against config.json weight targets. Each platform cell
16516
16530
  // shows the count plus that project's share of the platform's posts in
16517
16531
  // brackets, so operators can spot imbalance without a separate deficit field.
16518
- const PROJECT_STATUS_PLATFORMS = ['reddit', 'twitter', 'linkedin', 'moltbook', 'github'];
16532
+ const PROJECT_STATUS_PLATFORMS = ['reddit', 'twitter', 'linkedin', 'moltbook', 'github', 'instagram'];
16519
16533
  const PROJECT_STATUS_PLATFORM_LABELS = {
16520
16534
  reddit: 'Reddit', twitter: 'Twitter', linkedin: 'LinkedIn',
16521
- moltbook: 'MoltBook', github: 'GitHub',
16535
+ moltbook: 'MoltBook', github: 'GitHub', instagram: 'Instagram',
16522
16536
  };
16523
16537
  let _projectStatusLoading = false;
16524
16538
  let _projectStatusData = null;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "social-autoposter",
3
- "version": "1.6.1",
3
+ "version": "1.6.3",
4
4
  "description": "Automated social posting pipeline for Reddit, X/Twitter, LinkedIn, and Moltbook. Install as a Claude Code agent skill.",
5
5
  "bin": {
6
6
  "social-autoposter": "bin/cli.js"
@@ -0,0 +1,280 @@
1
+ #!/usr/bin/env python3
2
+ """Refresh Instagram Graph API long-lived tokens before they expire.
3
+
4
+ Instagram long-lived user tokens are valid for ~60 days. Each call to the
5
+ refresh_access_token endpoint extends the lifetime by another 60 days. The
6
+ token must be at least 24 hours old to be refreshable, and Meta recommends
7
+ refreshing well before expiry (we use a 14-day buffer).
8
+
9
+ This script:
10
+ 1. Iterates over every account in config.json -> instagram.accounts[].
11
+ 2. Reads the current token + expiry from ~/instagram-graph-api/.env via the
12
+ ig_long_token_env / derived IG_TOKEN_EXPIRES_<suffix> key.
13
+ 3. If the token expires within REFRESH_BUFFER_DAYS, calls the Graph API
14
+ refresh_access_token endpoint and rewrites the .env file in place
15
+ (atomic: write to tempfile then os.replace).
16
+ 4. Prints a machine-readable SUMMARY line for the wrapper to log via
17
+ scripts/log_run.py.
18
+
19
+ The .env file is the SINGLE source of truth — update_instagram_stats.py and
20
+ scan_instagram_comments.py both read it on every invocation, so a refreshed
21
+ token is picked up by the next pipeline run with no daemon-restart needed.
22
+
23
+ Usage:
24
+ python3 scripts/refresh_instagram_tokens.py [--quiet] [--force] [--dry-run]
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import argparse
30
+ import json
31
+ import os
32
+ import sys
33
+ import tempfile
34
+ import time
35
+ import urllib.error
36
+ import urllib.parse
37
+ import urllib.request
38
+ from datetime import datetime, timezone
39
+ from pathlib import Path
40
+
41
+ IG_ENV_PATH = Path.home() / "instagram-graph-api" / ".env"
42
+ GRAPH = "https://graph.instagram.com"
43
+ SA_CONFIG = Path(__file__).resolve().parent.parent / "config.json"
44
+
45
+ # Refresh tokens that expire within this many days. 14 days gives us 2 weeks
46
+ # of headroom for cron failures, network outages, or attention lapses.
47
+ REFRESH_BUFFER_DAYS = 14
48
+ # Meta requires tokens to be at least 24h old before they can be refreshed.
49
+ MIN_TOKEN_AGE_HOURS = 24
50
+
51
+
52
+ def load_env_lines() -> list[str]:
53
+ """Return the .env file as a list of raw lines (preserving comments +
54
+ blank lines), so we can rewrite individual keys without reformatting."""
55
+ if not IG_ENV_PATH.exists():
56
+ return []
57
+ return IG_ENV_PATH.read_text().splitlines()
58
+
59
+
60
+ def env_dict_from_lines(lines: list[str]) -> dict[str, str]:
61
+ env = {}
62
+ for line in lines:
63
+ s = line.strip()
64
+ if not s or s.startswith("#") or "=" not in s:
65
+ continue
66
+ k, v = s.split("=", 1)
67
+ env[k.strip()] = v.strip()
68
+ return env
69
+
70
+
71
+ def write_env_atomic(lines: list[str]):
72
+ """Rewrite the .env file from `lines`. Atomic via temp-file + os.replace
73
+ so a Ctrl-C or crash mid-write can't truncate the file."""
74
+ dir_ = IG_ENV_PATH.parent
75
+ dir_.mkdir(parents=True, exist_ok=True)
76
+ fd, tmp = tempfile.mkstemp(prefix=".env.tmp.", dir=str(dir_))
77
+ try:
78
+ with os.fdopen(fd, "w") as f:
79
+ f.write("\n".join(lines))
80
+ if lines and not lines[-1].endswith("\n"):
81
+ f.write("\n")
82
+ os.chmod(tmp, 0o600)
83
+ os.replace(tmp, IG_ENV_PATH)
84
+ finally:
85
+ if os.path.exists(tmp):
86
+ try:
87
+ os.unlink(tmp)
88
+ except OSError:
89
+ pass
90
+
91
+
92
+ def expires_key_for(token_key: str) -> str:
93
+ """Derive the IG_TOKEN_EXPIRES env-var name from the IG_LONG_TOKEN one.
94
+
95
+ IG_LONG_TOKEN -> IG_TOKEN_EXPIRES
96
+ IG_LONG_TOKEN_MATTHEWHEARTFUL -> IG_TOKEN_EXPIRES_MATTHEWHEARTFUL
97
+ IG_LONG_TOKEN_OMIDOTME -> IG_TOKEN_EXPIRES_OMIDOTME
98
+ """
99
+ if not token_key.startswith("IG_LONG_TOKEN"):
100
+ return ""
101
+ return "IG_TOKEN_EXPIRES" + token_key[len("IG_LONG_TOKEN"):]
102
+
103
+
104
+ def parse_expires(s: str | None) -> datetime | None:
105
+ if not s:
106
+ return None
107
+ s = s.strip()
108
+ if not s:
109
+ return None
110
+ # Accept both "2026-07-05T23:06:44Z" and "2026-07-05T23:06:44+00:00".
111
+ try:
112
+ if s.endswith("Z"):
113
+ s = s[:-1] + "+00:00"
114
+ return datetime.fromisoformat(s)
115
+ except ValueError:
116
+ return None
117
+
118
+
119
+ def format_expires(dt: datetime) -> str:
120
+ """Match the existing .env convention: ISO-8601 UTC with trailing Z."""
121
+ return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
122
+
123
+
124
+ def refresh_token(long_token: str) -> dict:
125
+ qs = urllib.parse.urlencode({
126
+ "grant_type": "ig_refresh_token",
127
+ "access_token": long_token,
128
+ })
129
+ url = f"{GRAPH}/refresh_access_token?{qs}"
130
+ try:
131
+ with urllib.request.urlopen(url, timeout=30) as r:
132
+ return json.loads(r.read())
133
+ except urllib.error.HTTPError as e:
134
+ body = e.read().decode(errors="replace")
135
+ raise RefreshError(f"HTTP {e.code}: {body[:300]}") from e
136
+
137
+
138
+ class RefreshError(Exception):
139
+ pass
140
+
141
+
142
+ def update_line(lines: list[str], key: str, value: str) -> list[str]:
143
+ """Return a new list with the line `<key>=<old>` replaced by `<key>=<value>`.
144
+ If the key isn't present, appends `<key>=<value>` at the end."""
145
+ out = []
146
+ found = False
147
+ prefix = f"{key}="
148
+ for line in lines:
149
+ if line.strip().startswith(prefix) or line.startswith(prefix):
150
+ out.append(f"{key}={value}")
151
+ found = True
152
+ else:
153
+ out.append(line)
154
+ if not found:
155
+ out.append(f"{key}={value}")
156
+ return out
157
+
158
+
159
+ def main():
160
+ parser = argparse.ArgumentParser()
161
+ parser.add_argument("--quiet", action="store_true")
162
+ parser.add_argument("--force", action="store_true",
163
+ help="Refresh every token regardless of expiry buffer")
164
+ parser.add_argument("--dry-run", action="store_true",
165
+ help="Print what would be refreshed but don't call the API")
166
+ parser.add_argument("--account", default=None,
167
+ help="Only refresh this account (default: all accounts)")
168
+ args = parser.parse_args()
169
+
170
+ def log(msg: str):
171
+ if not args.quiet:
172
+ print(msg)
173
+
174
+ if not IG_ENV_PATH.exists():
175
+ print(f"[refresh-ig-tokens] env file missing: {IG_ENV_PATH}")
176
+ print("SUMMARY:REFRESHED=0 SKIPPED=0 FAILED=0 ACCOUNTS=0")
177
+ sys.exit(0)
178
+
179
+ try:
180
+ cfg = json.loads(SA_CONFIG.read_text())
181
+ except FileNotFoundError:
182
+ cfg = {}
183
+ accounts_cfg = ((cfg.get("instagram") or {}).get("accounts") or [])
184
+
185
+ if args.account:
186
+ accounts_cfg = [a for a in accounts_cfg
187
+ if a.get("username", "").lower() == args.account.lower()]
188
+ if not accounts_cfg:
189
+ print("[refresh-ig-tokens] no instagram accounts in config")
190
+ print("SUMMARY:REFRESHED=0 SKIPPED=0 FAILED=0 ACCOUNTS=0")
191
+ sys.exit(0)
192
+
193
+ lines = load_env_lines()
194
+ env = env_dict_from_lines(lines)
195
+ now = datetime.now(timezone.utc)
196
+ buffer_secs = REFRESH_BUFFER_DAYS * 86400
197
+
198
+ refreshed = 0
199
+ skipped = 0
200
+ failed = 0
201
+
202
+ for account_cfg in accounts_cfg:
203
+ username = account_cfg.get("username", "")
204
+ token_key = account_cfg.get("ig_long_token_env", "IG_LONG_TOKEN")
205
+ exp_key = expires_key_for(token_key)
206
+ if not exp_key:
207
+ log(f"[refresh-ig-tokens] {username}: cannot derive expires key from {token_key}; skipping")
208
+ skipped += 1
209
+ continue
210
+
211
+ cur_token = env.get(token_key)
212
+ if not cur_token:
213
+ log(f"[refresh-ig-tokens] {username}: no value for {token_key}; skipping")
214
+ skipped += 1
215
+ continue
216
+
217
+ cur_exp_raw = env.get(exp_key)
218
+ cur_exp = parse_expires(cur_exp_raw)
219
+ if cur_exp is None and not args.force:
220
+ log(f"[refresh-ig-tokens] {username}: {exp_key} unparseable ({cur_exp_raw!r}); skipping (use --force to refresh anyway)")
221
+ skipped += 1
222
+ continue
223
+
224
+ if cur_exp is not None and not args.force:
225
+ remaining = (cur_exp - now).total_seconds()
226
+ if remaining > buffer_secs:
227
+ days_left = remaining / 86400
228
+ log(f"[refresh-ig-tokens] {username}: {days_left:.1f}d remaining (> {REFRESH_BUFFER_DAYS}d buffer); skipping")
229
+ skipped += 1
230
+ continue
231
+ if remaining < 0:
232
+ log(f"[refresh-ig-tokens] {username}: EXPIRED {(-remaining)/86400:.1f}d ago; attempting refresh anyway (Meta may reject)")
233
+
234
+ if args.dry_run:
235
+ log(f"[refresh-ig-tokens] {username}: DRY-RUN would refresh {token_key} (exp {cur_exp_raw})")
236
+ refreshed += 1
237
+ continue
238
+
239
+ log(f"[refresh-ig-tokens] {username}: refreshing {token_key} (current exp {cur_exp_raw})")
240
+ try:
241
+ resp = refresh_token(cur_token)
242
+ except RefreshError as e:
243
+ log(f"[refresh-ig-tokens] {username}: REFRESH FAILED: {e}")
244
+ failed += 1
245
+ continue
246
+ except Exception as e:
247
+ log(f"[refresh-ig-tokens] {username}: REFRESH FAILED (unexpected): {e}")
248
+ failed += 1
249
+ continue
250
+
251
+ new_token = resp.get("access_token")
252
+ expires_in = resp.get("expires_in")
253
+ if not new_token or not expires_in:
254
+ log(f"[refresh-ig-tokens] {username}: refresh response missing fields: {resp}")
255
+ failed += 1
256
+ continue
257
+
258
+ new_exp_dt = datetime.now(timezone.utc).fromtimestamp(time.time() + expires_in, tz=timezone.utc)
259
+ new_exp_str = format_expires(new_exp_dt)
260
+
261
+ lines = update_line(lines, token_key, new_token)
262
+ lines = update_line(lines, exp_key, new_exp_str)
263
+ env[token_key] = new_token
264
+ env[exp_key] = new_exp_str
265
+
266
+ log(f"[refresh-ig-tokens] {username}: OK, new expiry {new_exp_str} (~{expires_in/86400:.0f}d)")
267
+ refreshed += 1
268
+
269
+ if refreshed and not args.dry_run:
270
+ write_env_atomic(lines)
271
+ log(f"[refresh-ig-tokens] wrote {IG_ENV_PATH}")
272
+
273
+ print(
274
+ f"SUMMARY:REFRESHED={refreshed} SKIPPED={skipped} FAILED={failed} "
275
+ f"ACCOUNTS={len(accounts_cfg)}"
276
+ )
277
+
278
+
279
+ if __name__ == "__main__":
280
+ main()
@@ -0,0 +1,481 @@
1
+ #!/usr/bin/env python3
2
+ """Scan Instagram Graph API for new comments on our posts.
3
+
4
+ For each enabled Instagram account in config.json (matt_diak, matthewheartful,
5
+ omidotme), this:
6
+
7
+ 1. Fetches /api/v1/posts?platform=instagram&our_account=<username> to build
8
+ a {shortcode: post_id} map of our DB-tracked IG posts.
9
+ 2. Lists /me/media for the account (reuses the same Graph API call shape
10
+ update_instagram_stats.py uses).
11
+ 3. For each media item present in our DB, calls /{media-id}/comments with
12
+ the replies sub-resource expanded.
13
+ 4. Inserts each comment (and its nested replies) into the `replies` table
14
+ via reply_insert.insert_reply(). Server-side UNIQUE (platform,
15
+ their_comment_id) handles dedup; this script never SELECTs.
16
+
17
+ Filters (mirrors scan_reddit_replies / scan_github_replies behavior):
18
+ - Skip comments whose author is in config.exclusions.authors
19
+ - Skip our own usernames (matt_diak / matthewheartful / omidotme) so we
20
+ don't try to reply to ourselves
21
+ - Skip backfill-old comments (older than BACKFILL_HOURS) with
22
+ status='skipped' / skip_reason='backfill_old'
23
+ - Skip too-short comments (< MIN_WORDS) with skip_reason='too_short'
24
+
25
+ This is discovery-only. Posting replies back to Instagram lives in a separate
26
+ engage script (Phase 2, not built yet); for now new rows surface in the
27
+ dashboard replies feed as platform='instagram', status='pending'.
28
+
29
+ Usage:
30
+ python3 scripts/scan_instagram_comments.py [--quiet] [--limit N]
31
+ [--account NAME]
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import argparse
37
+ import json
38
+ import os
39
+ import sys
40
+ import time
41
+ import urllib.error
42
+ import urllib.parse
43
+ import urllib.request
44
+ from datetime import datetime, timezone
45
+ from pathlib import Path
46
+
47
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
48
+ from http_api import api_get
49
+ from reply_insert import insert_reply as _insert_reply
50
+
51
+
52
+ IG_ENV_PATH = Path.home() / "instagram-graph-api" / ".env"
53
+ GRAPH = "https://graph.instagram.com/v22.0"
54
+ SA_CONFIG = Path(__file__).resolve().parent.parent / "config.json"
55
+
56
+ # Discovery filters
57
+ BACKFILL_HOURS = 48
58
+ MIN_WORDS = 5
59
+ # Per-Graph-API-call sleep so we stay polite under the 60/hr, 4800/day caps.
60
+ # 3 accounts * ~10 media * (1 list + 1 comments call) = ~60 calls/cycle;
61
+ # at 0.2s sleep that's ~12s per cycle, well inside 30-minute scheduling.
62
+ GRAPH_SLEEP_SECS = 0.2
63
+
64
+
65
+ # ── env / config ──────────────────────────────────────────────────────────────
66
+
67
+ def load_ig_env() -> dict:
68
+ if not IG_ENV_PATH.exists():
69
+ return {}
70
+ env = {}
71
+ for line in IG_ENV_PATH.read_text().splitlines():
72
+ line = line.strip()
73
+ if not line or line.startswith("#") or "=" not in line:
74
+ continue
75
+ k, v = line.split("=", 1)
76
+ env[k.strip()] = v.strip()
77
+ return env
78
+
79
+
80
+ def load_config() -> dict:
81
+ try:
82
+ return json.loads(SA_CONFIG.read_text())
83
+ except FileNotFoundError:
84
+ return {}
85
+
86
+
87
+ def resolve_account_creds(account_name: str, ig_env: dict, accounts_cfg: list):
88
+ """Return (ig_user_id, long_token) or (None, None). Matches the lookup
89
+ pattern in scripts/update_instagram_stats.py."""
90
+ match = next(
91
+ (a for a in accounts_cfg if a.get("username", "").lower() == account_name.lower()),
92
+ None,
93
+ )
94
+ if match:
95
+ uid = ig_env.get(match.get("ig_user_id_env", "IG_USER_ID"))
96
+ tok = ig_env.get(match.get("ig_long_token_env", "IG_LONG_TOKEN"))
97
+ if uid and tok:
98
+ return uid, tok
99
+ uid = ig_env.get("IG_USER_ID")
100
+ tok = ig_env.get("IG_LONG_TOKEN")
101
+ return uid, tok
102
+
103
+
104
+ # ── Graph API helpers ─────────────────────────────────────────────────────────
105
+
106
+ def graph_get(path: str, token: str, **params):
107
+ params["access_token"] = token
108
+ url = f"{GRAPH}/{path}?{urllib.parse.urlencode(params)}"
109
+ with urllib.request.urlopen(url, timeout=20) as r:
110
+ return json.loads(r.read())
111
+
112
+
113
+ def shortcode_from_url(url: str | None) -> str | None:
114
+ """Extract shortcode from an IG permalink.
115
+
116
+ https://www.instagram.com/reel/DYkkj8RDo9P/ -> DYkkj8RDo9P
117
+ """
118
+ import re
119
+ m = re.search(r"/(?:reel|p|tv)/([A-Za-z0-9_-]+)", url or "")
120
+ return m.group(1) if m else None
121
+
122
+
123
+ def fetch_media_list(ig_user_id: str, token: str, max_pages: int = 5) -> list[dict]:
124
+ """Page through /me/media. Returns the raw items list with permalink + id."""
125
+ out = []
126
+ fields = "id,media_type,media_product_type,permalink,timestamp"
127
+ url = (
128
+ f"{GRAPH}/{ig_user_id}/media"
129
+ f"?fields={fields}&limit=100&access_token={token}"
130
+ )
131
+ pages = 0
132
+ while url and pages < max_pages:
133
+ with urllib.request.urlopen(url, timeout=20) as r:
134
+ data = json.loads(r.read())
135
+ out.extend(data.get("data", []) or [])
136
+ url = (data.get("paging") or {}).get("next")
137
+ pages += 1
138
+ if url:
139
+ time.sleep(GRAPH_SLEEP_SECS)
140
+ return out
141
+
142
+
143
+ def fetch_comments(media_id: str, token: str) -> list[dict]:
144
+ """Return top-level comments for a media item, each with a nested
145
+ `replies.data[]` list (Graph API caps the sub-list at 25 by default; that
146
+ matches typical traffic on our posts)."""
147
+ fields = (
148
+ "id,username,text,timestamp,"
149
+ "replies{id,username,text,timestamp}"
150
+ )
151
+ try:
152
+ data = graph_get(f"{media_id}/comments", token, fields=fields, limit=50)
153
+ except urllib.error.HTTPError as e:
154
+ body = e.read().decode(errors="replace")[:200]
155
+ raise GraphApiError(f"HTTP {e.code} on /{media_id}/comments: {body}")
156
+ return data.get("data", []) or []
157
+
158
+
159
+ class GraphApiError(Exception):
160
+ pass
161
+
162
+
163
+ # ── posts lookup ──────────────────────────────────────────────────────────────
164
+
165
+ def fetch_posts_map(account_username: str) -> dict[str, int]:
166
+ """Build {shortcode: post_id} for posts.platform='instagram' AND
167
+ posts.our_account=account_username. Uses the same /api/v1/posts endpoint
168
+ scan_reddit_replies.py uses for its post-id lookup."""
169
+ out: dict[str, int] = {}
170
+ resp = api_get(
171
+ "/api/v1/posts",
172
+ query={"platform": "instagram", "limit": 500},
173
+ )
174
+ posts = ((resp or {}).get("data") or {}).get("posts") or []
175
+ for p in posts:
176
+ if (p.get("our_account") or "").lower() != account_username.lower():
177
+ continue
178
+ code = shortcode_from_url(p.get("our_url"))
179
+ if code:
180
+ out[code] = int(p.get("id"))
181
+ return out
182
+
183
+
184
+ # ── parse / classify ──────────────────────────────────────────────────────────
185
+
186
+ def parse_ts(ts: str | None) -> float:
187
+ """Parse an IG ISO-8601 timestamp to a unix timestamp. Returns 0 on
188
+ failure (which counts as "old" for backfill purposes)."""
189
+ if not ts:
190
+ return 0.0
191
+ try:
192
+ # Instagram returns +0000 (no colon), strip and parse as UTC.
193
+ s = ts.replace("+0000", "+00:00")
194
+ return datetime.fromisoformat(s).timestamp()
195
+ except Exception:
196
+ return 0.0
197
+
198
+
199
+ def word_count(text: str | None) -> int:
200
+ return len((text or "").split())
201
+
202
+
203
+ def build_comment_url(shortcode: str, comment_id: str) -> str:
204
+ return f"https://www.instagram.com/p/{shortcode}/c/{comment_id}/"
205
+
206
+
207
+ # ── main scan loop ────────────────────────────────────────────────────────────
208
+
209
+ class IgCommentScanner:
210
+ def __init__(
211
+ self,
212
+ account_username: str,
213
+ ig_user_id: str,
214
+ token: str,
215
+ posts_map: dict[str, int],
216
+ excluded_authors: set[str],
217
+ quiet: bool = False,
218
+ media_limit: int | None = None,
219
+ ):
220
+ self.account = account_username
221
+ self.ig_user_id = ig_user_id
222
+ self.token = token
223
+ self.posts_map = posts_map
224
+ self.excluded = excluded_authors
225
+ self.quiet = quiet
226
+ self.media_limit = media_limit
227
+
228
+ self.discovered = 0
229
+ self.backfill_skipped = 0
230
+ self.too_short_skipped = 0
231
+ self.excluded_skipped = 0
232
+ self.already_tracked = 0
233
+ self.media_checked = 0
234
+ self.media_no_post = 0
235
+ self.comments_seen = 0
236
+
237
+ def log(self, msg: str):
238
+ if not self.quiet:
239
+ print(msg)
240
+
241
+ def _insert(
242
+ self,
243
+ post_id: int,
244
+ comment_id: str,
245
+ author: str,
246
+ content: str,
247
+ comment_url: str,
248
+ depth: int,
249
+ status: str,
250
+ skip_reason: str | None = None,
251
+ ):
252
+ result = _insert_reply(
253
+ None, post_id, "instagram", comment_id, author, content, comment_url,
254
+ parent_reply_id=None, depth=depth, status=status, skip_reason=skip_reason,
255
+ )
256
+ if result is None:
257
+ self.already_tracked += 1
258
+ return
259
+ if result == "pending":
260
+ self.discovered += 1
261
+ elif result == "skipped":
262
+ if skip_reason == "backfill_old":
263
+ self.backfill_skipped += 1
264
+ elif skip_reason and skip_reason.startswith("too_short"):
265
+ self.too_short_skipped += 1
266
+ elif skip_reason == "excluded_author":
267
+ self.excluded_skipped += 1
268
+
269
+ def _classify_and_insert(
270
+ self,
271
+ post_id: int,
272
+ shortcode: str,
273
+ comment: dict,
274
+ backfill_cutoff: float,
275
+ depth: int,
276
+ ):
277
+ comment_id = str(comment.get("id") or "")
278
+ if not comment_id:
279
+ return
280
+ self.comments_seen += 1
281
+ author = comment.get("username") or ""
282
+ content = comment.get("text") or ""
283
+ comment_url = build_comment_url(shortcode, comment_id)
284
+ created = parse_ts(comment.get("timestamp"))
285
+
286
+ if author.lower() in self.excluded:
287
+ self._insert(
288
+ post_id, comment_id, author, content, comment_url, depth,
289
+ status="skipped", skip_reason="excluded_author",
290
+ )
291
+ return
292
+
293
+ if created and created < backfill_cutoff:
294
+ self._insert(
295
+ post_id, comment_id, author, content, comment_url, depth,
296
+ status="skipped", skip_reason="backfill_old",
297
+ )
298
+ return
299
+
300
+ wc = word_count(content)
301
+ if wc < MIN_WORDS:
302
+ self._insert(
303
+ post_id, comment_id, author, content, comment_url, depth,
304
+ status="skipped", skip_reason=f"too_short ({wc} words)",
305
+ )
306
+ return
307
+
308
+ self._insert(
309
+ post_id, comment_id, author, content, comment_url, depth,
310
+ status="pending", skip_reason=None,
311
+ )
312
+
313
+ def scan(self):
314
+ self.log(f"[scan-ig-comments] account={self.account} posts_in_db={len(self.posts_map)}")
315
+ if not self.posts_map:
316
+ self.log(f"[scan-ig-comments] no instagram posts in DB for account={self.account}; nothing to scan")
317
+ return
318
+
319
+ try:
320
+ media_items = fetch_media_list(self.ig_user_id, self.token)
321
+ except urllib.error.HTTPError as e:
322
+ body = e.read().decode(errors="replace")[:200]
323
+ self.log(f"[scan-ig-comments] /me/media failed for {self.account}: HTTP {e.code} {body}")
324
+ return
325
+ except Exception as e:
326
+ self.log(f"[scan-ig-comments] /me/media failed for {self.account}: {e}")
327
+ return
328
+
329
+ self.log(f"[scan-ig-comments] /me/media returned {len(media_items)} items")
330
+ backfill_cutoff = time.time() - BACKFILL_HOURS * 3600
331
+
332
+ checked = 0
333
+ for item in media_items:
334
+ if self.media_limit and checked >= self.media_limit:
335
+ break
336
+ permalink = item.get("permalink")
337
+ shortcode = shortcode_from_url(permalink)
338
+ if not shortcode:
339
+ continue
340
+ post_id = self.posts_map.get(shortcode)
341
+ if not post_id:
342
+ self.media_no_post += 1
343
+ continue
344
+
345
+ media_id = item.get("id")
346
+ try:
347
+ comments = fetch_comments(media_id, self.token)
348
+ except GraphApiError as e:
349
+ self.log(f"[scan-ig-comments] media={media_id} shortcode={shortcode} comments fetch failed: {e}")
350
+ continue
351
+
352
+ self.media_checked += 1
353
+ checked += 1
354
+ self.log(
355
+ f"[scan-ig-comments] media={media_id} shortcode={shortcode} "
356
+ f"top_level_comments={len(comments)}"
357
+ )
358
+
359
+ for c in comments:
360
+ self._classify_and_insert(post_id, shortcode, c, backfill_cutoff, depth=1)
361
+ # Nested replies (replies to top-level comments). Author may
362
+ # be us (we already replied) or someone else (we got a reply
363
+ # to OUR reply). The excluded-author filter inside
364
+ # _classify_and_insert handles the first case.
365
+ replies = ((c.get("replies") or {}).get("data") or [])
366
+ for r in replies:
367
+ self._classify_and_insert(post_id, shortcode, r, backfill_cutoff, depth=2)
368
+
369
+ time.sleep(GRAPH_SLEEP_SECS)
370
+
371
+ def summary(self) -> dict:
372
+ return {
373
+ "account": self.account,
374
+ "media_checked": self.media_checked,
375
+ "media_no_post_in_db": self.media_no_post,
376
+ "comments_seen": self.comments_seen,
377
+ "discovered": self.discovered,
378
+ "backfill_skipped": self.backfill_skipped,
379
+ "too_short_skipped": self.too_short_skipped,
380
+ "excluded_skipped": self.excluded_skipped,
381
+ "already_tracked": self.already_tracked,
382
+ }
383
+
384
+
385
+ def main():
386
+ parser = argparse.ArgumentParser()
387
+ parser.add_argument("--quiet", action="store_true")
388
+ parser.add_argument("--limit", type=int, default=None,
389
+ help="Cap media items inspected per account (debug)")
390
+ parser.add_argument("--account", default=None,
391
+ help="Scan only this account (default: all enabled)")
392
+ args = parser.parse_args()
393
+
394
+ ig_env = load_ig_env()
395
+ cfg = load_config()
396
+ accounts_cfg = ((cfg.get("instagram") or {}).get("accounts") or [])
397
+ exclusions = cfg.get("exclusions") or {}
398
+ base_excluded = {a.lower() for a in (exclusions.get("authors") or [])}
399
+ # Always exclude our own usernames so we don't reply to ourselves.
400
+ own_usernames = {a.get("username", "").lower() for a in accounts_cfg if a.get("username")}
401
+
402
+ if args.account:
403
+ accounts_to_scan = [a for a in accounts_cfg
404
+ if a.get("username", "").lower() == args.account.lower()]
405
+ else:
406
+ accounts_to_scan = [a for a in accounts_cfg if a.get("enabled", True)]
407
+
408
+ if not accounts_to_scan:
409
+ print("[scan-ig-comments] no instagram accounts to scan; exiting")
410
+ print("SUMMARY:DISCOVERED=0 SKIPPED=0 CHECKED=0 ALREADY=0 ACCOUNTS=0")
411
+ return
412
+
413
+ totals = {
414
+ "discovered": 0,
415
+ "backfill_skipped": 0,
416
+ "too_short_skipped": 0,
417
+ "excluded_skipped": 0,
418
+ "already_tracked": 0,
419
+ "media_checked": 0,
420
+ "comments_seen": 0,
421
+ "accounts": 0,
422
+ }
423
+
424
+ for account_cfg in accounts_to_scan:
425
+ username = account_cfg.get("username", "")
426
+ if not username:
427
+ continue
428
+ uid, tok = resolve_account_creds(username, ig_env, accounts_cfg)
429
+ if not uid or not tok:
430
+ print(f"[scan-ig-comments] missing creds for account={username}; skipping")
431
+ continue
432
+
433
+ excluded_for_account = set(base_excluded) | set(own_usernames)
434
+
435
+ try:
436
+ posts_map = fetch_posts_map(username)
437
+ except Exception as e:
438
+ print(f"[scan-ig-comments] posts lookup failed for {username}: {e}")
439
+ continue
440
+
441
+ scanner = IgCommentScanner(
442
+ username, uid, tok, posts_map, excluded_for_account,
443
+ quiet=args.quiet, media_limit=args.limit,
444
+ )
445
+ scanner.scan()
446
+ s = scanner.summary()
447
+ if not args.quiet:
448
+ print(
449
+ f"[scan-ig-comments] account={username} done: "
450
+ f"media_checked={s['media_checked']} comments_seen={s['comments_seen']} "
451
+ f"discovered={s['discovered']} "
452
+ f"backfill_skipped={s['backfill_skipped']} "
453
+ f"too_short_skipped={s['too_short_skipped']} "
454
+ f"excluded_skipped={s['excluded_skipped']} "
455
+ f"already_tracked={s['already_tracked']}"
456
+ )
457
+
458
+ totals["discovered"] += s["discovered"]
459
+ totals["backfill_skipped"] += s["backfill_skipped"]
460
+ totals["too_short_skipped"] += s["too_short_skipped"]
461
+ totals["excluded_skipped"] += s["excluded_skipped"]
462
+ totals["already_tracked"] += s["already_tracked"]
463
+ totals["media_checked"] += s["media_checked"]
464
+ totals["comments_seen"] += s["comments_seen"]
465
+ totals["accounts"] += 1
466
+
467
+ skipped_total = (
468
+ totals["backfill_skipped"]
469
+ + totals["too_short_skipped"]
470
+ + totals["excluded_skipped"]
471
+ )
472
+
473
+ print(
474
+ f"SUMMARY:DISCOVERED={totals['discovered']} SKIPPED={skipped_total} "
475
+ f"CHECKED={totals['media_checked']} ALREADY={totals['already_tracked']} "
476
+ f"ACCOUNTS={totals['accounts']}"
477
+ )
478
+
479
+
480
+ if __name__ == "__main__":
481
+ main()
@@ -609,8 +609,14 @@ def reply_to_tweet(tweet_url, text, apply_campaigns=True):
609
609
  except Exception:
610
610
  pass
611
611
 
612
- page.goto(tweet_url, wait_until="domcontentloaded")
613
- page.wait_for_timeout(5000)
612
+ try:
613
+ page.goto(tweet_url, wait_until="load", timeout=60000)
614
+ except Exception:
615
+ try:
616
+ page.goto(tweet_url, wait_until="domcontentloaded", timeout=60000)
617
+ except Exception:
618
+ pass
619
+ page.wait_for_timeout(15000)
614
620
 
615
621
  # Check if page exists
616
622
  page_text = page.text_content("main") or ""
@@ -620,18 +626,20 @@ def reply_to_tweet(tweet_url, text, apply_campaigns=True):
620
626
  # Snapshot our reply links before posting (to detect the new one)
621
627
  links_before = _collect_our_reply_links(page)
622
628
 
623
- # Find the reply textbox
629
+ # Find the reply textbox. On slower egress (E2B sandbox VMs) x.com
630
+ # can need 20-30s to attach the React reply composer; do not lower
631
+ # these timeouts.
624
632
  reply_box = None
625
633
  try:
626
634
  reply_box = page.get_by_role("textbox", name="Post text")
627
- reply_box.wait_for(timeout=10000)
635
+ reply_box.wait_for(timeout=30000)
628
636
  except Exception:
629
637
  # Scroll down to find the reply box
630
638
  page.evaluate("window.scrollBy(0, 500)")
631
- page.wait_for_timeout(2000)
639
+ page.wait_for_timeout(3000)
632
640
  try:
633
641
  reply_box = page.get_by_role("textbox", name="Post text")
634
- reply_box.wait_for(timeout=5000)
642
+ reply_box.wait_for(timeout=15000)
635
643
  except Exception:
636
644
  return {"ok": False, "error": "reply_box_not_found"}
637
645
 
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env bash
2
+ # refresh-instagram-tokens.sh — Refresh Instagram Graph API long-lived tokens
3
+ # before they expire.
4
+ #
5
+ # IG long-lived tokens last ~60 days; this job runs daily and refreshes any
6
+ # token within REFRESH_BUFFER_DAYS (default 14d) of expiry. The .env file at
7
+ # ~/instagram-graph-api/.env is rewritten atomically on success.
8
+ #
9
+ # Lightweight (no lock needed — read+write to a file we own, no browser/MCP)
10
+ # but we take instagram-poster anyway so a poster/stats/scan run that's mid-
11
+ # flight can finish reading the existing token before we swap it.
12
+ #
13
+ # Logs: skill/logs/refresh-instagram-tokens-YYYY-MM-DD_HHMMSS.log
14
+
15
+ set -uo pipefail
16
+
17
+ REPO_DIR="$HOME/social-autoposter"
18
+ LOG_DIR="$REPO_DIR/skill/logs"
19
+ mkdir -p "$LOG_DIR"
20
+ LOG_FILE="$LOG_DIR/refresh-instagram-tokens-$(date +%Y-%m-%d_%H%M%S).log"
21
+
22
+ log() { echo "[$(date +%H:%M:%S)] $*" | tee -a "$LOG_FILE"; }
23
+ log "=== refresh-instagram-tokens fire: $(date) ==="
24
+
25
+ RUN_START=$(date +%s)
26
+
27
+ # shellcheck source=lock.sh
28
+ source "$REPO_DIR/skill/lock.sh"
29
+ acquire_lock instagram-poster 30
30
+
31
+ OUTPUT_FILE="/tmp/refresh-instagram-tokens-$$.out"
32
+ if ! /opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/refresh_instagram_tokens.py" 2>>"$LOG_FILE" | tee -a "$LOG_FILE" >"$OUTPUT_FILE"; then
33
+ log "refresh_instagram_tokens.py exited non-zero"
34
+ REFRESHED=0; SKIPPED=0; FAILED=0; ACCOUNTS=0
35
+ else
36
+ SUMMARY=$(grep '^SUMMARY:' "$OUTPUT_FILE" | tail -1)
37
+ REFRESHED=$(echo "$SUMMARY" | sed -n 's/.*REFRESHED=\([0-9]*\).*/\1/p'); REFRESHED=${REFRESHED:-0}
38
+ SKIPPED=$(echo "$SUMMARY" | sed -n 's/.*SKIPPED=\([0-9]*\).*/\1/p'); SKIPPED=${SKIPPED:-0}
39
+ FAILED=$(echo "$SUMMARY" | sed -n 's/.*FAILED=\([0-9]*\).*/\1/p'); FAILED=${FAILED:-0}
40
+ ACCOUNTS=$(echo "$SUMMARY" | sed -n 's/.*ACCOUNTS=\([0-9]*\).*/\1/p'); ACCOUNTS=${ACCOUNTS:-0}
41
+ fi
42
+ rm -f "$OUTPUT_FILE"
43
+
44
+ RUN_ELAPSED=$(( $(date +%s) - RUN_START ))
45
+
46
+ log "logging run: refreshed=$REFRESHED skipped=$SKIPPED failed=$FAILED accounts=$ACCOUNTS elapsed=${RUN_ELAPSED}s"
47
+
48
+ /opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/log_run.py" \
49
+ --script "refresh_instagram_tokens" \
50
+ --posted "$REFRESHED" \
51
+ --skipped "$SKIPPED" \
52
+ --failed "$FAILED" \
53
+ --cost 0 \
54
+ --elapsed "$RUN_ELAPSED" >>"$LOG_FILE" 2>&1 || log "log_run.py failed"
55
+
56
+ log "=== refresh-instagram-tokens done ==="
57
+ exit 0
@@ -38,8 +38,28 @@ fi
38
38
 
39
39
  log() { echo "[$(date +%H:%M:%S)] $*" | tee -a "$LOG_FILE"; }
40
40
 
41
+ # Run accounting for dashboard Job History (Post Threads · Instagram).
42
+ # Each exit site updates POSTED_CT / SKIPPED_CT / FAILED_CT; the EXIT trap
43
+ # always emits one log_run.py line so the run shows up under
44
+ # thread_instagram, matching how thread_twitter / thread_reddit log.
45
+ RUN_START_EPOCH=$(date +%s)
46
+ POSTED_CT=0
47
+ SKIPPED_CT=0
48
+ FAILED_CT=0
49
+
41
50
  cleanup() {
51
+ local rc=$?
42
52
  rm -f "$PICK_FILE"
53
+ if [ "$POSTED_CT" -eq 0 ] && [ "$SKIPPED_CT" -eq 0 ] && [ "$FAILED_CT" -eq 0 ]; then
54
+ if [ "$rc" -eq 0 ]; then SKIPPED_CT=1; else FAILED_CT=1; fi
55
+ fi
56
+ local elapsed=$(( $(date +%s) - RUN_START_EPOCH ))
57
+ local cost
58
+ cost=$(/usr/bin/python3 "$REPO_DIR/scripts/get_run_cost.py" --since "$RUN_START_EPOCH" --scripts "run-instagram-daily" 2>/dev/null || echo "0.0000")
59
+ /usr/bin/python3 "$REPO_DIR/scripts/log_run.py" \
60
+ --script "thread_instagram" \
61
+ --posted "$POSTED_CT" --skipped "$SKIPPED_CT" --failed "$FAILED_CT" \
62
+ --cost "$cost" --elapsed "$elapsed" >/dev/null 2>&1 || true
43
63
  }
44
64
  trap cleanup EXIT INT TERM HUP
45
65
 
@@ -65,6 +85,7 @@ else
65
85
  fi
66
86
  if [ -z "$TARGET_ACCOUNT" ]; then
67
87
  log "pick_ig_account.py produced no account — exiting non-zero"
88
+ FAILED_CT=1
68
89
  exit 1
69
90
  fi
70
91
  log "picker chose account: $TARGET_ACCOUNT"
@@ -76,9 +97,11 @@ if ! /opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/ig_post_type_picker.py" \
76
97
  rc=$?
77
98
  if [ "$rc" -eq 2 ]; then
78
99
  log "queue exhausted for account=$TARGET_ACCOUNT (no drafts of either type) — exiting cleanly"
100
+ SKIPPED_CT=1
79
101
  exit 0
80
102
  fi
81
103
  log "picker failed rc=$rc — exiting non-zero"
104
+ FAILED_CT=1
82
105
  exit 1
83
106
  fi
84
107
 
@@ -93,6 +116,7 @@ log "picker reason: ${REASON}"
93
116
 
94
117
  if [ ! -f "$VIDEO_PATH" ]; then
95
118
  log "ERROR: picker pointed at $VIDEO_PATH but file missing on disk"
119
+ FAILED_CT=1
96
120
  exit 1
97
121
  fi
98
122
 
@@ -107,9 +131,11 @@ log "step 3: post_to_ig.py --file $(basename "$VIDEO_PATH") --post-type $POST_TY
107
131
  if ! /opt/homebrew/bin/python3.11 "$REPO_DIR/mixer/post_to_ig.py" \
108
132
  --file "$VIDEO_PATH" --post-type "$POST_TYPE" --account "$TARGET_ACCOUNT" $DRY_FLAG >>"$LOG_FILE" 2>&1; then
109
133
  log "post_to_ig.py failed — exiting non-zero"
134
+ FAILED_CT=1
110
135
  exit 1
111
136
  fi
112
137
 
138
+ POSTED_CT=1
113
139
  log "=== finished post-${POST_NUMBER} (${POST_TYPE}) on ${TARGET_ACCOUNT} successfully ==="
114
140
 
115
141
  # Step 4: mirror the new media_posts row into the cross-platform `posts` table
@@ -1047,7 +1047,7 @@ export CLAUDE_SESSION_ID
1047
1047
 
1048
1048
  PREP_SCHEMA='{"type":"object","properties":{"candidates":{"type":"array","items":{"type":"object","properties":{"candidate_id":{"type":"integer"},"candidate_url":{"type":"string"},"thread_author":{"type":"string"},"thread_text":{"type":"string"},"matched_project":{"type":"string"},"reply_text":{"type":"string"},"engagement_style":{"type":"string"},"language":{"type":"string"},"has_landing_pages":{"type":"boolean"},"link_keyword":{"type":"string"},"link_slug":{"type":"string"}},"required":["candidate_id","candidate_url","matched_project","reply_text","engagement_style","language","has_landing_pages"]}},"rejected":{"type":"array","items":{"type":"object","properties":{"candidate_id":{"type":"integer"},"reason":{"type":"string"},"proposed_excludes":{"type":"array","items":{"type":"string"}}},"required":["candidate_id","reason"]}}},"required":["candidates","rejected"]}'
1049
1049
 
1050
- PREP_OUTPUT=$("$REPO_DIR/scripts/run_claude.sh" "run-twitter-cycle-prep" --strict-mcp-config --mcp-config "$TW_MCP_CONFIG" -p --output-format json --json-schema "$PREP_SCHEMA" "${TW_ENGINE_PREFIX}You are the Social Autoposter prep step.
1050
+ PREP_PROMPT="${TW_ENGINE_PREFIX}You are the Social Autoposter prep step.
1051
1051
 
1052
1052
  Your ONLY job in THIS session:
1053
1053
  1. Read each thread you decide to reply to (browser tools from the BROWSER BACKEND block above, READ-ONLY).
@@ -1130,7 +1130,13 @@ CRITICAL:
1130
1130
  - DO NOT call log_post.py or campaign_bump.py.
1131
1131
  - Browser tools (from the BROWSER BACKEND block) are READ-ONLY in this step.
1132
1132
  - NEVER use em dashes. Use commas, periods, or regular dashes (-).
1133
- - Reply in the SAME LANGUAGE as the parent tweet." 2>&1)
1133
+ - Reply in the SAME LANGUAGE as the parent tweet."
1134
+
1135
+ # Pipe the prep prompt via stdin instead of passing as a shell argument.
1136
+ # On Linux ARG_MAX is 2MB; the assembled prompt (config.json + top_report +
1137
+ # styles + schema + candidates) busts that on the VM, dying with E2BIG
1138
+ # "Argument list too long". stdin has no such cap.
1139
+ PREP_OUTPUT=$(printf '%s' "$PREP_PROMPT" | "$REPO_DIR/scripts/run_claude.sh" "run-twitter-cycle-prep" --strict-mcp-config --mcp-config "$TW_MCP_CONFIG" -p --output-format json --json-schema "$PREP_SCHEMA" 2>&1)
1134
1140
 
1135
1141
  echo "$PREP_OUTPUT" >> "$LOG_FILE"
1136
1142
 
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env bash
2
+ # scan-instagram-replies.sh — Discover new inbound comments on our Instagram
3
+ # posts via the Graph API and insert them into the `replies` table.
4
+ #
5
+ # Mirrors the pattern used by stats-instagram.sh: API-only (no browser),
6
+ # instagram-poster lock (so scan, stats, and post can't race for the same
7
+ # token-bucket), then a SUMMARY-line parsed by log_run.py for the dashboard
8
+ # Jobs panel.
9
+ #
10
+ # Logs: skill/logs/scan-instagram-replies-YYYY-MM-DD_HHMMSS.log
11
+
12
+ set -uo pipefail
13
+
14
+ REPO_DIR="$HOME/social-autoposter"
15
+ LOG_DIR="$REPO_DIR/skill/logs"
16
+ mkdir -p "$LOG_DIR"
17
+ LOG_FILE="$LOG_DIR/scan-instagram-replies-$(date +%Y-%m-%d_%H%M%S).log"
18
+
19
+ log() { echo "[$(date +%H:%M:%S)] $*" | tee -a "$LOG_FILE"; }
20
+ log "=== scan-instagram-replies fire: $(date) ==="
21
+
22
+ RUN_START=$(date +%s)
23
+
24
+ # instagram-poster lock — stats, scan, daily-post, and render all share this
25
+ # lane so we don't race on the same /me/media token bucket.
26
+ # shellcheck source=lock.sh
27
+ source "$REPO_DIR/skill/lock.sh"
28
+ acquire_lock instagram-poster 30
29
+
30
+ OUTPUT_FILE="/tmp/scan-instagram-replies-$$.out"
31
+ if ! /opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/scan_instagram_comments.py" 2>>"$LOG_FILE" | tee -a "$LOG_FILE" >"$OUTPUT_FILE"; then
32
+ log "scan_instagram_comments.py exited non-zero — logging run as failed"
33
+ DISCOVERED=0; SKIPPED=0; CHECKED=0; ALREADY=0; ACCOUNTS=0
34
+ else
35
+ SUMMARY=$(grep '^SUMMARY:' "$OUTPUT_FILE" | tail -1)
36
+ DISCOVERED=$(echo "$SUMMARY" | sed -n 's/.*DISCOVERED=\([0-9]*\).*/\1/p'); DISCOVERED=${DISCOVERED:-0}
37
+ SKIPPED=$(echo "$SUMMARY" | sed -n 's/.*SKIPPED=\([0-9]*\).*/\1/p'); SKIPPED=${SKIPPED:-0}
38
+ CHECKED=$(echo "$SUMMARY" | sed -n 's/.*CHECKED=\([0-9]*\).*/\1/p'); CHECKED=${CHECKED:-0}
39
+ ALREADY=$(echo "$SUMMARY" | sed -n 's/.*ALREADY=\([0-9]*\).*/\1/p'); ALREADY=${ALREADY:-0}
40
+ ACCOUNTS=$(echo "$SUMMARY" | sed -n 's/.*ACCOUNTS=\([0-9]*\).*/\1/p'); ACCOUNTS=${ACCOUNTS:-0}
41
+ fi
42
+ rm -f "$OUTPUT_FILE"
43
+
44
+ RUN_ELAPSED=$(( $(date +%s) - RUN_START ))
45
+
46
+ log "logging run: discovered=$DISCOVERED skipped=$SKIPPED checked=$CHECKED already=$ALREADY accounts=$ACCOUNTS elapsed=${RUN_ELAPSED}s"
47
+
48
+ # discovered -> posted (new pending rows are the productive output of a scan,
49
+ # same convention scan_reddit_replies / scan_github_replies use).
50
+ # skipped -> skipped. checked -> scanned (media items inspected).
51
+ /opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/log_run.py" \
52
+ --script "scan_instagram_comments" \
53
+ --posted "$DISCOVERED" \
54
+ --skipped "$SKIPPED" \
55
+ --failed 0 \
56
+ --scanned "$CHECKED" \
57
+ --cost 0 \
58
+ --elapsed "$RUN_ELAPSED" >>"$LOG_FILE" 2>&1 || log "log_run.py failed"
59
+
60
+ log "=== scan-instagram-replies done ==="
61
+ exit 0