@m13v/s4l 1.6.197-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1314 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +497 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,972 @@
1
+ #!/usr/bin/env python3
2
+ """Append one redacted memory/process snapshot as JSONL.
3
+
4
+ This is intentionally short-lived. A scheduler can run it once per minute and
5
+ the process exits after writing a single line, so the observer does not become
6
+ another resident background service.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import datetime as dt
13
+ import json
14
+ import os
15
+ import re
16
+ import socket
17
+ import subprocess
18
+ import sys
19
+ import time
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+
24
+ REPO_DIR = Path(os.environ.get("REPO_DIR", Path(__file__).resolve().parents[1]))
25
+ DEFAULT_OUTPUT = REPO_DIR / "skill" / "logs" / "memory-snapshots.jsonl"
26
+
27
+ SECRET_PATTERNS = [
28
+ re.compile(r"sk-ant-[A-Za-z0-9_-]+"),
29
+ re.compile(r"sk-[A-Za-z0-9_-]{16,}"),
30
+ re.compile(r"github_pat_[A-Za-z0-9_]+"),
31
+ re.compile(r"gh[pousr]_[A-Za-z0-9_]+"),
32
+ re.compile(r"xox[abprs]-[A-Za-z0-9-]+"),
33
+ re.compile(r"(?i)(bearer\s+)[A-Za-z0-9._~+/=-]+"),
34
+ re.compile(r"(?i)(\"?(?:api[_-]?key|token|secret|password|authorization|anthropic_api_key)\"?\s*[:=]\s*\"?)([^\"\\s,}]+)"),
35
+ ]
36
+
37
+
38
+ def run(args: list[str], timeout: float = 5.0) -> str:
39
+ try:
40
+ proc = subprocess.run(
41
+ args,
42
+ stdout=subprocess.PIPE,
43
+ stderr=subprocess.DEVNULL,
44
+ text=True,
45
+ timeout=timeout,
46
+ check=False,
47
+ )
48
+ return proc.stdout or ""
49
+ except Exception:
50
+ return ""
51
+
52
+
53
+ def redact(value: str) -> str:
54
+ out = value
55
+ for pattern in SECRET_PATTERNS:
56
+ if pattern.groups >= 2:
57
+ out = pattern.sub(lambda m: f"{m.group(1)}REDACTED", out)
58
+ elif pattern.groups == 1:
59
+ out = pattern.sub(lambda m: f"{m.group(1)}REDACTED", out)
60
+ else:
61
+ out = pattern.sub("REDACTED", out)
62
+ return out
63
+
64
+
65
+ def shorten(value: str, max_len: int = 360) -> str:
66
+ value = " ".join(redact(value).split())
67
+ if len(value) <= max_len:
68
+ return value
69
+ return value[: max_len - 3] + "..."
70
+
71
+
72
+ def mb(kb: int | float) -> float:
73
+ return round(float(kb) / 1024.0, 1)
74
+
75
+
76
+ def parse_ps() -> tuple[list[dict[str, Any]], dict[int, dict[str, Any]], dict[int, list[int]]]:
77
+ rows: list[dict[str, Any]] = []
78
+ by_pid: dict[int, dict[str, Any]] = {}
79
+ children: dict[int, list[int]] = {}
80
+ out = run(["ps", "-axo", "pid=,ppid=,pgid=,pcpu=,rss=,command="], timeout=8.0)
81
+ for line in out.splitlines():
82
+ parts = line.strip().split(None, 5)
83
+ if len(parts) < 6:
84
+ continue
85
+ try:
86
+ pid = int(parts[0])
87
+ ppid = int(parts[1])
88
+ pgid = int(parts[2])
89
+ cpu = float(parts[3])
90
+ rss_kb = int(parts[4])
91
+ except ValueError:
92
+ continue
93
+ command = parts[5]
94
+ row = {
95
+ "pid": pid,
96
+ "ppid": ppid,
97
+ "pgid": pgid,
98
+ "cpu_pct": cpu,
99
+ "rss_mb": mb(rss_kb),
100
+ "rss_kb": rss_kb,
101
+ "cmd": shorten(command),
102
+ "_command_raw": command,
103
+ }
104
+ rows.append(row)
105
+ by_pid[pid] = row
106
+ children.setdefault(ppid, []).append(pid)
107
+ return rows, by_pid, children
108
+
109
+
110
+ def process_tree(root_pid: int, by_pid: dict[int, dict[str, Any]], children: dict[int, list[int]]) -> dict[str, Any] | None:
111
+ root = by_pid.get(root_pid)
112
+ if not root:
113
+ return None
114
+ seen: set[int] = set()
115
+ stack = [root_pid]
116
+ pids: list[int] = []
117
+ while stack:
118
+ pid = stack.pop()
119
+ if pid in seen:
120
+ continue
121
+ seen.add(pid)
122
+ if pid not in by_pid:
123
+ continue
124
+ pids.append(pid)
125
+ stack.extend(children.get(pid, []))
126
+ total_kb = sum(int(by_pid[pid]["rss_kb"]) for pid in pids)
127
+ return {
128
+ "pid": root_pid,
129
+ "rss_mb": root["rss_mb"],
130
+ "tree_rss_mb": mb(total_kb),
131
+ "descendant_count": max(0, len(pids) - 1),
132
+ "cmd": root["cmd"],
133
+ "pids": sorted(pids),
134
+ }
135
+
136
+
137
+ def parse_vm_stat() -> dict[str, Any]:
138
+ out = run(["vm_stat"], timeout=5.0)
139
+ if not out:
140
+ return {}
141
+ page_size = 4096
142
+ first = out.splitlines()[0] if out.splitlines() else ""
143
+ match = re.search(r"page size of (\d+) bytes", first)
144
+ if match:
145
+ page_size = int(match.group(1))
146
+ pages: dict[str, int] = {}
147
+ for line in out.splitlines()[1:]:
148
+ if ":" not in line:
149
+ continue
150
+ key, raw = line.split(":", 1)
151
+ key = key.strip().strip('"').lower().replace(" ", "_")
152
+ num_match = re.search(r"(-?\d+)", raw.replace(".", ""))
153
+ if num_match:
154
+ pages[key] = int(num_match.group(1))
155
+ sysctl_bin = "/usr/sbin/sysctl" if Path("/usr/sbin/sysctl").exists() else "sysctl"
156
+ total_bytes_raw = run([sysctl_bin, "-n", "hw.memsize"], timeout=2.0).strip()
157
+ try:
158
+ total_mb = round(int(total_bytes_raw) / 1024 / 1024, 1)
159
+ except ValueError:
160
+ total_mb = None
161
+
162
+ def pages_mb(name: str) -> float:
163
+ return round(pages.get(name, 0) * page_size / 1024 / 1024, 1)
164
+
165
+ return {
166
+ "page_size": page_size,
167
+ "total_mb": total_mb,
168
+ "free_mb": pages_mb("pages_free"),
169
+ "active_mb": pages_mb("pages_active"),
170
+ "inactive_mb": pages_mb("pages_inactive"),
171
+ "speculative_mb": pages_mb("pages_speculative"),
172
+ "wired_mb": pages_mb("pages_wired_down"),
173
+ "purgeable_mb": pages_mb("pages_purgeable"),
174
+ "compressed_mb": pages_mb("pages_occupied_by_compressor"),
175
+ "swapins": pages.get("swapins"),
176
+ "swapouts": pages.get("swapouts"),
177
+ "pages": pages,
178
+ }
179
+
180
+
181
+ def memory_pressure_pct_free() -> float | None:
182
+ """macOS-authoritative availability signal.
183
+
184
+ `memory_pressure` prints "System-wide memory free percentage: N%". THIS is the
185
+ number to trust for "is the box starved?" — NOT vm_stat "pages free", which sits
186
+ near-zero by design (macOS hoards RAM as cached/inactive/compressed pages, so a
187
+ tiny "free" is the normal healthy state, not starvation). Best-effort; returns
188
+ None if the tool is unavailable so the caller can fall back to a pages estimate.
189
+ """
190
+ out = run(["/usr/bin/memory_pressure"], timeout=6.0)
191
+ if not out:
192
+ return None
193
+ m = re.search(r"free percentage:\s*([\d.]+)", out)
194
+ return round(float(m.group(1)), 1) if m else None
195
+
196
+
197
+ def swap_used_mb() -> float | None:
198
+ """Active swap in MB from vm.swapusage (a real-pressure corroborator)."""
199
+ sysctl_bin = "/usr/sbin/sysctl" if Path("/usr/sbin/sysctl").exists() else "sysctl"
200
+ out = run([sysctl_bin, "-n", "vm.swapusage"], timeout=2.0)
201
+ m = re.search(r"used\s*=\s*([\d.]+)M", out or "")
202
+ return round(float(m.group(1)), 1) if m else None
203
+
204
+
205
+ def launchd_jobs(by_pid: dict[int, dict[str, Any]], children: dict[int, list[int]]) -> list[dict[str, Any]]:
206
+ out = run(["launchctl", "list"], timeout=5.0)
207
+ jobs: list[dict[str, Any]] = []
208
+ for line in out.splitlines():
209
+ parts = line.strip().split(None, 2)
210
+ if len(parts) != 3:
211
+ continue
212
+ pid_s, status_s, label = parts
213
+ if not label.startswith("com.m13v."):
214
+ continue
215
+ try:
216
+ pid = int(pid_s)
217
+ except ValueError:
218
+ pid = None
219
+ try:
220
+ status: int | str = int(status_s)
221
+ except ValueError:
222
+ status = status_s
223
+ job: dict[str, Any] = {"label": label, "pid": pid, "status": status}
224
+ if pid is not None:
225
+ tree = process_tree(pid, by_pid, children)
226
+ if tree:
227
+ job.update(tree)
228
+ jobs.append(job)
229
+ jobs.sort(key=lambda j: (j.get("pid") is None, j["label"]))
230
+ return jobs
231
+
232
+
233
+ S4L_MCP_ENTRYPOINT = str(REPO_DIR / "mcp" / "dist" / "index.js")
234
+
235
+
236
+ def _command(row: dict[str, Any]) -> str:
237
+ return str(row.get("_command_raw", ""))
238
+
239
+
240
+ def _node_running_script(command: str, script_path: str) -> bool:
241
+ return bool(
242
+ re.search(
243
+ rf"(^|\s)(?:/[^ \t]+/)?node\s+{re.escape(script_path)}(?:\s|$)",
244
+ command,
245
+ )
246
+ )
247
+
248
+
249
+ def _is_social_autoposter_mcp_server(row: dict[str, Any]) -> bool:
250
+ return _node_running_script(_command(row), S4L_MCP_ENTRYPOINT)
251
+
252
+
253
+ def _is_configured_with_social_autoposter_mcp(row: dict[str, Any]) -> bool:
254
+ command = _command(row)
255
+ return S4L_MCP_ENTRYPOINT in command and not _is_social_autoposter_mcp_server(row)
256
+
257
+
258
+ def _is_dashboard_server(row: dict[str, Any]) -> bool:
259
+ command = _command(row)
260
+ return (
261
+ _node_running_script(command, str(REPO_DIR / "bin" / "server.js"))
262
+ or bool(re.search(r"(^|\s)(?:/[^ \t]+/)?node\s+bin/server\.js(?:\s|$)", command))
263
+ )
264
+
265
+
266
+ def _is_claude_cli(row: dict[str, Any]) -> bool:
267
+ command = _command(row)
268
+ return (
269
+ "/claude.app/Contents/MacOS/claude" in command
270
+ or bool(re.search(r"(^|\s)(?:/[^ \t]+/)?claude(?:\s|$)", command))
271
+ )
272
+
273
+
274
+ def _is_browser_harness(row: dict[str, Any]) -> bool:
275
+ return ".claude/browser-profiles/browser-harness" in _command(row)
276
+
277
+
278
+ def _is_remote_macos_mcp_server(row: dict[str, Any]) -> bool:
279
+ command = _command(row)
280
+ if "SkyComputerUseService" in command or "SkyComputerUseClient" in command:
281
+ return True
282
+ if bool(re.search(r"(^|\s)(?:/[^ \t]+/)?mcp-server-macos-use(?:\s|$)", command)):
283
+ return True
284
+ if "mcp-server-macos-use" in command and re.match(r"^(?:/[^ \t]+/)?ssh(?:\s|$)", command):
285
+ return True
286
+ return bool(
287
+ re.search(
288
+ r"(^|\s)(?:/bin/(?:bash|sh|zsh)\s+)?[^ \t]*macos-use-remote[^ \t]*(?:\s|$)",
289
+ command,
290
+ )
291
+ and not _is_claude_cli(row)
292
+ )
293
+
294
+
295
+ def _is_configured_with_remote_macos_mcp(row: dict[str, Any]) -> bool:
296
+ command = _command(row)
297
+ return (
298
+ any(
299
+ needle in command
300
+ for needle in ("macos-use-remote", "mcp-server-macos-use", "mcp__computer-use")
301
+ )
302
+ and not _is_remote_macos_mcp_server(row)
303
+ )
304
+
305
+
306
+ def _is_twitter_browser_pipeline(row: dict[str, Any]) -> bool:
307
+ command = _command(row)
308
+ return "twitter_browser.py" in command or "run-twitter-cycle" in command
309
+
310
+
311
+ def _is_social_autoposter_repo_process(row: dict[str, Any]) -> bool:
312
+ command = _command(row)
313
+ repo = str(REPO_DIR)
314
+ if _is_configured_with_social_autoposter_mcp(row):
315
+ return False
316
+ if (
317
+ _is_social_autoposter_mcp_server(row)
318
+ or _is_dashboard_server(row)
319
+ or _is_twitter_browser_pipeline(row)
320
+ ):
321
+ return True
322
+ return any(
323
+ f"{repo}/{subdir}/" in command
324
+ for subdir in ("bin", "mcp", "scripts", "setup", "skill")
325
+ )
326
+
327
+
328
+ GROUP_MATCHERS = {
329
+ "social_autoposter_repo_processes": _is_social_autoposter_repo_process,
330
+ "social_autoposter_mcp_servers": _is_social_autoposter_mcp_server,
331
+ "sessions_configured_social_autoposter_mcp": _is_configured_with_social_autoposter_mcp,
332
+ "dashboard_server": _is_dashboard_server,
333
+ "claude_cli": _is_claude_cli,
334
+ "browser_harness": _is_browser_harness,
335
+ "remote_macos_mcp_servers": _is_remote_macos_mcp_server,
336
+ "sessions_configured_remote_macos_mcp": _is_configured_with_remote_macos_mcp,
337
+ "twitter_browser_pipeline": _is_twitter_browser_pipeline,
338
+ }
339
+
340
+
341
+ def group_summaries(rows: list[dict[str, Any]]) -> dict[str, Any]:
342
+ summaries: dict[str, Any] = {}
343
+ for name, matcher in GROUP_MATCHERS.items():
344
+ matched = [row for row in rows if matcher(row)]
345
+ total_kb = sum(int(row["rss_kb"]) for row in matched)
346
+ top = sorted(matched, key=lambda row: int(row["rss_kb"]), reverse=True)[:10]
347
+ summaries[name] = {
348
+ "count": len(matched),
349
+ "rss_mb": mb(total_kb),
350
+ "top_pids": [
351
+ {
352
+ "pid": row["pid"],
353
+ "ppid": row["ppid"],
354
+ "rss_mb": row["rss_mb"],
355
+ "cmd": row["cmd"],
356
+ }
357
+ for row in top
358
+ ],
359
+ }
360
+ return summaries
361
+
362
+
363
+ def active_claude_sidecars(by_pid: dict[int, dict[str, Any]], children: dict[int, list[int]]) -> list[dict[str, Any]]:
364
+ sidecars: list[dict[str, Any]] = []
365
+ for path in sorted(Path("/tmp/sa-active-claude").glob("*.json")):
366
+ try:
367
+ data = json.loads(path.read_text())
368
+ except Exception as exc:
369
+ sidecars.append({"path": str(path), "error": str(exc)})
370
+ continue
371
+ wrapper_pid = data.get("wrapper_pid")
372
+ if isinstance(wrapper_pid, int):
373
+ data["wrapper_tree"] = process_tree(wrapper_pid, by_pid, children)
374
+ data["path"] = str(path)
375
+ sidecars.append(data)
376
+ return sidecars
377
+
378
+
379
+ def _json_file_metadata(path: Path) -> dict[str, Any]:
380
+ try:
381
+ stat = path.stat()
382
+ except OSError:
383
+ return {"path": str(path), "exists": False}
384
+ meta: dict[str, Any] = {
385
+ "path": str(path),
386
+ "size_bytes": stat.st_size,
387
+ "mtime": dt.datetime.fromtimestamp(stat.st_mtime, dt.timezone.utc).astimezone().isoformat(timespec="seconds"),
388
+ }
389
+ try:
390
+ data = json.loads(path.read_text())
391
+ except Exception as exc:
392
+ meta["error"] = str(exc)
393
+ return meta
394
+ for key in ("job_id", "type", "tag", "created_at", "status", "error"):
395
+ if key in data:
396
+ meta[key] = shorten(str(data[key]), 160)
397
+ if isinstance(data.get("created_at"), (int, float)):
398
+ meta["age_sec"] = round(max(0.0, dt.datetime.now().timestamp() - float(data["created_at"])), 1)
399
+ return meta
400
+
401
+
402
+ def claude_queue_summary() -> dict[str, Any]:
403
+ root = Path(os.environ.get("S4L_STATE_DIR", str(Path.home() / ".social-autoposter-mcp"))) / "claude-queue"
404
+ summary: dict[str, Any] = {
405
+ "path": str(root),
406
+ "exists": root.exists(),
407
+ "pending_total": 0,
408
+ "pending_by_type": {},
409
+ "running_total": 0,
410
+ "result_total": 0,
411
+ "oldest_age_sec": None,
412
+ "running_jobs": [],
413
+ "oldest_pending": [],
414
+ }
415
+ if not root.exists():
416
+ return summary
417
+
418
+ ages: list[float] = []
419
+ pending_root = root / "pending"
420
+ if pending_root.exists():
421
+ for qtype_dir in sorted(p for p in pending_root.iterdir() if p.is_dir()):
422
+ files = sorted(qtype_dir.glob("*.json"))
423
+ summary["pending_by_type"][qtype_dir.name] = len(files)
424
+ summary["pending_total"] += len(files)
425
+ for path in files[:5]:
426
+ meta = _json_file_metadata(path)
427
+ if isinstance(meta.get("age_sec"), (int, float)):
428
+ ages.append(float(meta["age_sec"]))
429
+ if len(summary["oldest_pending"]) < 10:
430
+ summary["oldest_pending"].append(meta)
431
+
432
+ running_files = sorted((root / "running").glob("*.json")) if (root / "running").exists() else []
433
+ result_files = sorted((root / "result").glob("*.json")) if (root / "result").exists() else []
434
+ summary["running_total"] = len(running_files)
435
+ summary["result_total"] = len(result_files)
436
+ for path in running_files[:10]:
437
+ meta = _json_file_metadata(path)
438
+ if isinstance(meta.get("age_sec"), (int, float)):
439
+ ages.append(float(meta["age_sec"]))
440
+ summary["running_jobs"].append(meta)
441
+ summary["oldest_age_sec"] = max(ages) if ages else None
442
+ provider_log = root / "provider.log"
443
+ if provider_log.exists():
444
+ try:
445
+ stat = provider_log.stat()
446
+ summary["provider_log"] = {
447
+ "path": str(provider_log),
448
+ "size_bytes": stat.st_size,
449
+ "mtime": dt.datetime.fromtimestamp(stat.st_mtime, dt.timezone.utc).astimezone().isoformat(timespec="seconds"),
450
+ }
451
+ except OSError:
452
+ pass
453
+ # The producer's drain latch: consecutive_timeouts>=1 means the scheduled-task
454
+ # worker stopped draining (the definitive phase2b-stall signal). Surfacing it
455
+ # here lets the heartbeat carry it server-side, so a stall is visible centrally
456
+ # without SSHing the box. See claude_job.py _bump_drain_timeout / _clear_drain.
457
+ drain_path = root / "drain-status.json"
458
+ if drain_path.exists():
459
+ try:
460
+ ds = json.loads(drain_path.read_text())
461
+ summary["drain_status"] = {
462
+ "consecutive_timeouts": int(ds.get("consecutive_timeouts", 0) or 0),
463
+ "last_success_at": ds.get("last_success_at"),
464
+ "last_timeout_at": ds.get("last_timeout_at"),
465
+ }
466
+ except (OSError, ValueError, TypeError):
467
+ pass
468
+ return summary
469
+
470
+
471
+ def lock_queue_summary(by_pid: dict[int, dict[str, Any]]) -> list[dict[str, Any]]:
472
+ now = dt.datetime.now().timestamp()
473
+ locks: list[dict[str, Any]] = []
474
+ names: set[str] = set()
475
+ for path in Path("/tmp").glob("social-autoposter-*.lock"):
476
+ if path.is_dir():
477
+ names.add(path.name.removeprefix("social-autoposter-").removesuffix(".lock"))
478
+ for path in Path("/tmp").glob("social-autoposter-*.lock.queue"):
479
+ if path.is_dir():
480
+ names.add(path.name.removeprefix("social-autoposter-").removesuffix(".lock.queue"))
481
+
482
+ for name in sorted(names):
483
+ lock_dir = Path("/tmp") / f"social-autoposter-{name}.lock"
484
+ queue_dir = Path("/tmp") / f"social-autoposter-{name}.lock.queue"
485
+ item: dict[str, Any] = {"name": name, "locked": lock_dir.exists(), "queue_depth": 0}
486
+ if lock_dir.exists():
487
+ try:
488
+ stat = lock_dir.stat()
489
+ item["age_sec"] = round(max(0.0, now - stat.st_mtime), 1)
490
+ except OSError:
491
+ pass
492
+ try:
493
+ holder_pid = int((lock_dir / "pid").read_text().strip())
494
+ item["holder_pid"] = holder_pid
495
+ item["holder_alive"] = holder_pid in by_pid
496
+ if holder_pid in by_pid:
497
+ item["holder_rss_mb"] = by_pid[holder_pid]["rss_mb"]
498
+ item["holder_cmd"] = by_pid[holder_pid]["cmd"]
499
+ except Exception:
500
+ item["holder_pid"] = None
501
+ try:
502
+ expires_at = int((lock_dir / "expires_at").read_text().strip())
503
+ item["expires_in_sec"] = expires_at - int(now)
504
+ except Exception:
505
+ pass
506
+ if queue_dir.exists():
507
+ tickets = sorted(p for p in queue_dir.iterdir() if p.is_file())
508
+ item["queue_depth"] = len(tickets)
509
+ queued: list[dict[str, Any]] = []
510
+ for ticket in tickets[:10]:
511
+ entry: dict[str, Any] = {"ticket": ticket.name}
512
+ try:
513
+ pid = int(ticket.read_text().strip())
514
+ entry["pid"] = pid
515
+ entry["alive"] = pid in by_pid
516
+ if pid in by_pid:
517
+ entry["cmd"] = by_pid[pid]["cmd"]
518
+ except Exception:
519
+ pass
520
+ queued.append(entry)
521
+ item["queued"] = queued
522
+ locks.append(item)
523
+ return locks
524
+
525
+
526
+ def scheduled_tasks_summary() -> dict[str, Any]:
527
+ summary: dict[str, Any] = {
528
+ "skill_files": [],
529
+ "registries": [],
530
+ "enabled_total": 0,
531
+ "disabled_total": 0,
532
+ }
533
+ scheduled_root = Path.home() / ".claude" / "scheduled-tasks"
534
+ if scheduled_root.exists():
535
+ for path in sorted(scheduled_root.glob("*/SKILL.md")):
536
+ summary["skill_files"].append({"id": path.parent.name, "path": str(path)})
537
+
538
+ # "Claude*": the host app can run with a custom --user-data-dir (per-account
539
+ # dirs like "Claude-mediar"), putting registries outside plain "Claude/".
540
+ # Keep in sync with scripts/schedule_state.py::SCHED_REGISTRY_GLOB.
541
+ app_support = Path.home() / "Library" / "Application Support"
542
+ registries = sorted(
543
+ app_support.glob("Claude*/claude-code-sessions/**/scheduled-tasks.json")
544
+ )
545
+ if not registries:
546
+ return summary
547
+ for registry in registries[:50]:
548
+ reg: dict[str, Any] = {"path": str(registry), "tasks": []}
549
+ try:
550
+ data = json.loads(registry.read_text())
551
+ except Exception as exc:
552
+ reg["error"] = str(exc)
553
+ summary["registries"].append(reg)
554
+ continue
555
+ for task in data.get("scheduledTasks", [])[:30]:
556
+ enabled = bool(task.get("enabled"))
557
+ if enabled:
558
+ summary["enabled_total"] += 1
559
+ else:
560
+ summary["disabled_total"] += 1
561
+ reg["tasks"].append({
562
+ "id": task.get("id"),
563
+ "enabled": enabled,
564
+ "fireAt": task.get("fireAt"),
565
+ "lastRunAt": task.get("lastRunAt"),
566
+ "lastScheduledFor": task.get("lastScheduledFor"),
567
+ "cwd": shorten(str(task.get("cwd", "")), 220),
568
+ "filePath": shorten(str(task.get("filePath", "")), 220),
569
+ })
570
+ summary["registries"].append(reg)
571
+ return summary
572
+
573
+
574
+ def queues_summary(by_pid: dict[int, dict[str, Any]]) -> dict[str, Any]:
575
+ return {
576
+ "claude_queue": claude_queue_summary(),
577
+ "social_locks": lock_queue_summary(by_pid),
578
+ "scheduled_tasks": scheduled_tasks_summary(),
579
+ }
580
+
581
+
582
+ def rotate_log(path: Path, max_bytes: int, keep: int = 3) -> None:
583
+ if max_bytes <= 0:
584
+ return
585
+ try:
586
+ if not path.exists() or path.stat().st_size < max_bytes:
587
+ return
588
+ for idx in range(keep - 1, 0, -1):
589
+ src = path.with_name(f"{path.name}.{idx}")
590
+ dst = path.with_name(f"{path.name}.{idx + 1}")
591
+ if src.exists():
592
+ src.replace(dst)
593
+ path.replace(path.with_name(f"{path.name}.1"))
594
+ except Exception:
595
+ return
596
+
597
+
598
+ def build_snapshot(top_n: int) -> dict[str, Any]:
599
+ rows, by_pid, children = parse_ps()
600
+ top = sorted(rows, key=lambda row: int(row["rss_kb"]), reverse=True)[:top_n]
601
+ return {
602
+ "ts": dt.datetime.now(dt.timezone.utc).astimezone().isoformat(timespec="seconds"),
603
+ "hostname": socket.gethostname(),
604
+ "repo_dir": str(REPO_DIR),
605
+ "claude_desktop_version": claude_desktop_version(),
606
+ "reaper": reaper_status(),
607
+ "memory": parse_vm_stat(),
608
+ "process_count": len(rows),
609
+ "top_rss": [
610
+ {
611
+ "pid": row["pid"],
612
+ "ppid": row["ppid"],
613
+ "pgid": row["pgid"],
614
+ "cpu_pct": row["cpu_pct"],
615
+ "rss_mb": row["rss_mb"],
616
+ "cmd": row["cmd"],
617
+ }
618
+ for row in top
619
+ ],
620
+ "groups": group_summaries(rows),
621
+ "launchd_jobs": launchd_jobs(by_pid, children),
622
+ "active_claude_sidecars": active_claude_sidecars(by_pid, children),
623
+ "queues": queues_summary(by_pid),
624
+ }
625
+
626
+
627
+ def build_summary() -> dict[str, Any]:
628
+ """Slim, cheap snapshot for the heartbeat body.
629
+
630
+ Skips the heavier sections (launchd jobs, sidecars, lock queues, per-group
631
+ top_pids) so the MCP can compute it inline on every 15-min heartbeat. Just
632
+ the host memory totals, per-group RSS counts, the single biggest process,
633
+ and the claude-queue depth — enough to spot a leaking box centrally.
634
+ """
635
+ rows, by_pid, children = parse_ps()
636
+ mem = parse_vm_stat()
637
+ total = mem.get("total_mb")
638
+ # macOS memory accounting: "available" headroom is what can be handed to a
639
+ # process WITHOUT paging — free + inactive + speculative + purgeable, all of
640
+ # which the OS reclaims on demand. The real footprint is total - available.
641
+ # Do NOT use vm_stat "pages free" as the headline: it is near-zero by design
642
+ # (macOS keeps RAM full of reclaimable cache), so total-minus-free reads ~99%
643
+ # and falsely looks like starvation. That trap caused a wrong OOM call once.
644
+ avail_parts = [mem.get(k) for k in ("free_mb", "inactive_mb", "speculative_mb", "purgeable_mb")]
645
+ available = (
646
+ round(sum(p for p in avail_parts if isinstance(p, (int, float))), 1) if mem else None
647
+ )
648
+ used = (
649
+ round(float(total) - float(available), 1)
650
+ if isinstance(total, (int, float)) and isinstance(available, (int, float))
651
+ else None
652
+ )
653
+ # pct_free is kept CONSISTENT with the MB figures above (available / total) so a
654
+ # reader never sees two contradictory percentages. `pressure_pct` is the separate
655
+ # OS pressure gauge from `memory_pressure` (counts evictable file cache as free, so
656
+ # it reads higher) — it is the most robust starvation detector, so `health` is
657
+ # derived from it, falling back to pct_free only when the tool is unavailable.
658
+ pct_free = (
659
+ round(available / total * 100, 1)
660
+ if isinstance(total, (int, float)) and isinstance(available, (int, float)) and total
661
+ else None
662
+ )
663
+ pressure_pct = memory_pressure_pct_free()
664
+ basis = pressure_pct if pressure_pct is not None else pct_free
665
+ if basis is None:
666
+ health = "unknown"
667
+ elif basis < 10:
668
+ health = "critical"
669
+ elif basis < 20:
670
+ health = "warn"
671
+ else:
672
+ health = "ok"
673
+ swap_used = swap_used_mb()
674
+ slim_groups = {
675
+ name: {"count": g["count"], "rss_mb": g["rss_mb"]}
676
+ for name, g in group_summaries(rows).items()
677
+ }
678
+ top = sorted(rows, key=lambda row: int(row["rss_kb"]), reverse=True)[:1]
679
+ top_proc = (
680
+ {"pid": top[0]["pid"], "rss_mb": top[0]["rss_mb"], "cmd": top[0]["cmd"]}
681
+ if top
682
+ else None
683
+ )
684
+ cq = claude_queue_summary()
685
+ ds = cq.get("drain_status") or {}
686
+ oldest = cq.get("oldest_age_sec")
687
+ consec = int(ds.get("consecutive_timeouts", 0) or 0)
688
+ # Mirror the MCP's autopilotStalled(): a latched producer timeout, OR a draft
689
+ # job that has sat unclaimed past 180s, means no scheduled-task worker is
690
+ # draining the queue. Carrying this on the heartbeat makes a phase2b stall
691
+ # visible in installation_resource_samples without SSHing the box.
692
+ stalled = bool(consec >= 1 or (isinstance(oldest, (int, float)) and oldest > 180))
693
+ return {
694
+ "ts": dt.datetime.now(dt.timezone.utc).astimezone().isoformat(timespec="seconds"),
695
+ "hostname": socket.gethostname(),
696
+ "app_version": _app_version(),
697
+ "claude_desktop_version": claude_desktop_version(),
698
+ "reaper": reaper_status(),
699
+ "process_count": len(rows),
700
+ "mem": {
701
+ "total_mb": total,
702
+ "used_mb": used,
703
+ "available_mb": available,
704
+ "pct_free": pct_free,
705
+ "pressure_pct": pressure_pct,
706
+ "health": health,
707
+ "wired_mb": mem.get("wired_mb"),
708
+ "compressed_mb": mem.get("compressed_mb"),
709
+ "swap_used_mb": swap_used,
710
+ "swapouts": mem.get("swapouts"),
711
+ },
712
+ "groups": slim_groups,
713
+ "top": top_proc,
714
+ "claude_queue": {
715
+ "pending": cq.get("pending_total", 0),
716
+ "running": cq.get("running_total", 0),
717
+ "oldest_age_sec": oldest,
718
+ "stalled": stalled,
719
+ "consecutive_timeouts": consec,
720
+ "last_success_at": ds.get("last_success_at"),
721
+ },
722
+ }
723
+
724
+
725
+ def _app_version() -> str | None:
726
+ """Plugin version from manifest.json / package.json at the repo root."""
727
+ for name in ("manifest.json", "package.json"):
728
+ try:
729
+ data = json.loads((REPO_DIR / name).read_text())
730
+ except Exception:
731
+ continue
732
+ v = data.get("version")
733
+ if v:
734
+ return str(v).strip() or None
735
+ return None
736
+
737
+
738
+ def claude_desktop_version() -> str | None:
739
+ """CFBundleShortVersionString of the Claude Desktop app, or None if not found.
740
+
741
+ This is the ONE variable we could not answer for Karol: the reaper's blind spot
742
+ (a newer Claude Code changed the session-path shape so UUID_RE stopped matching)
743
+ is version-correlated, so we now stamp the Desktop version on every heartbeat +
744
+ snapshot. Reading Info.plist via plistlib is more robust than shelling `defaults`
745
+ (works headless, no user-defaults cache). Checks both the system-wide and the
746
+ per-user install locations. Best-effort: never raises."""
747
+ candidates = [
748
+ Path("/Applications/Claude.app/Contents/Info.plist"),
749
+ Path.home() / "Applications" / "Claude.app" / "Contents" / "Info.plist",
750
+ ]
751
+ for plist in candidates:
752
+ try:
753
+ if not plist.exists():
754
+ continue
755
+ import plistlib
756
+
757
+ with plist.open("rb") as f:
758
+ data = plistlib.load(f)
759
+ v = data.get("CFBundleShortVersionString") or data.get("CFBundleVersion")
760
+ if v:
761
+ return str(v).strip() or None
762
+ except Exception:
763
+ continue
764
+ return None
765
+
766
+
767
+ def reaper_status() -> dict[str, Any] | None:
768
+ """Last cycle written by reap_stale_claude_sessions.py::write_status(), or None.
769
+
770
+ The reaper is a SEPARATE launchd job (com.m13v.social-claude-reaper) whose stderr
771
+ only lands in a local file, so its outcome was invisible centrally. It now drops a
772
+ reaper-status.json each cycle; we carry it on the heartbeat so a stuck/blind reaper
773
+ (e.g. ps_timed_out, or unparsed_worker_procs climbing while it kills nothing — the
774
+ Karol failure mode) is visible in installation_resource_samples. Also surfaces
775
+ staleness: if the file has not been touched recently the reaper itself may be dead."""
776
+ path = (
777
+ Path(os.environ.get("S4L_STATE_DIR", str(Path.home() / ".social-autoposter-mcp")))
778
+ / "claude-queue"
779
+ / "reaper-status.json"
780
+ )
781
+ try:
782
+ if not path.exists():
783
+ return None
784
+ ds = json.loads(path.read_text())
785
+ age = None
786
+ try:
787
+ age = round(time.time() - path.stat().st_mtime, 1)
788
+ except OSError:
789
+ pass
790
+ return {
791
+ "ts": ds.get("ts"),
792
+ "age_sec": age, # seconds since the reaper last wrote — >120s hints it is dead
793
+ "mode": ds.get("mode"),
794
+ "claude_killed": ds.get("claude_killed"),
795
+ "macos_mcp_killed": ds.get("macos_mcp_killed"),
796
+ "worker_probe_seen": ds.get("worker_probe_seen"),
797
+ "reapable_workers": ds.get("reapable_workers"),
798
+ "unparsed_worker_procs": ds.get("unparsed_worker_procs"),
799
+ "macos_mcp_seen": ds.get("macos_mcp_seen"),
800
+ "leaked_groups": ds.get("leaked_groups"),
801
+ "ps_timed_out": ds.get("ps_timed_out"),
802
+ "snapshot_empty": ds.get("snapshot_empty"),
803
+ }
804
+ except (OSError, ValueError, TypeError):
805
+ return None
806
+
807
+
808
+ def _tail_lines(path: Path, n: int, approx_line_bytes: int = 4096) -> list[str]:
809
+ """Return the last `n` lines of a possibly-large file without reading it all.
810
+ Reads a bounded tail window (n * approx_line_bytes) from the end. Best-effort."""
811
+ try:
812
+ size = path.stat().st_size
813
+ want = min(size, n * approx_line_bytes)
814
+ with path.open("rb") as f:
815
+ f.seek(size - want)
816
+ data = f.read()
817
+ text = data.decode("utf-8", "replace")
818
+ lines = text.splitlines()
819
+ # Drop a possibly-truncated first line when we did not start at byte 0.
820
+ if want < size and lines:
821
+ lines = lines[1:]
822
+ return lines[-n:]
823
+ except Exception:
824
+ return []
825
+
826
+
827
+ def _maybe_leak_alert(output: Path, current: dict[str, Any]) -> None:
828
+ """Fire a Sentry event when a monitored process group climbs monotonically for
829
+ N consecutive snapshots — the leak SHAPE that took down Karol's box (claude
830
+ workers + remote-macos-use MCP servers ratcheting up unbounded). This catches a
831
+ leak while it is GROWING, hours before the box freezes, instead of us finding out
832
+ from a support ticket. Best-effort + rate-limited by a cooldown file so a genuine
833
+ ongoing leak pages once per window, not every minute.
834
+
835
+ Runs on the JSONL path (every ~minute), reading its own recent history from the
836
+ file just written, so it needs no extra state beyond a small cooldown marker."""
837
+ # Watch claude_cli (the runaway worker fan-out) and the claude sessions
838
+ # CONFIGURED with the remote-macos-use MCP. Karol's 06-30 double-leak lived
839
+ # entirely in these two: claude_cli 289 + sessions_configured_remote_macos_mcp
840
+ # 280 at peak, while remote_macos_mcp_servers (the standalone server procs)
841
+ # stayed 0 the whole time. Watching the server group would have been blind.
842
+ groups_to_watch = ("claude_cli", "sessions_configured_remote_macos_mcp")
843
+ samples = _env_int("S4L_LEAK_ALERT_SAMPLES", 5) # consecutive climbs required
844
+ floor = _env_int("S4L_LEAK_ALERT_FLOOR", 20) # ignore below this count
845
+ climb_min = _env_int("S4L_LEAK_ALERT_CLIMB_MIN", 12) # min first->last growth
846
+ cooldown_s = _env_int("S4L_LEAK_ALERT_COOLDOWN", 1800)
847
+ if samples < 3:
848
+ samples = 3
849
+
850
+ tail = _tail_lines(output, samples)
851
+ series: list[dict[str, Any]] = []
852
+ for line in tail:
853
+ try:
854
+ series.append(json.loads(line))
855
+ except Exception:
856
+ continue
857
+ if len(series) < samples:
858
+ return
859
+
860
+ def counts(name: str) -> list[int]:
861
+ vals = []
862
+ for snap in series[-samples:]:
863
+ g = (snap.get("groups") or {}).get(name) or {}
864
+ c = g.get("count")
865
+ vals.append(int(c) if isinstance(c, (int, float)) else 0)
866
+ return vals
867
+
868
+ leaking: list[str] = []
869
+ for name in groups_to_watch:
870
+ vals = counts(name)
871
+ if len(vals) < samples:
872
+ continue
873
+ monotonic = all(vals[i] <= vals[i + 1] for i in range(len(vals) - 1))
874
+ grew = (vals[-1] - vals[0]) >= climb_min
875
+ if monotonic and grew and vals[-1] >= floor:
876
+ leaking.append(f"{name} {vals[0]}->{vals[-1]} over {samples} samples")
877
+
878
+ if not leaking:
879
+ return
880
+
881
+ # Cooldown: one page per window even if the leak persists for hours.
882
+ state = Path(os.environ.get("S4L_STATE_DIR", str(Path.home() / ".social-autoposter-mcp"))) / "claude-queue"
883
+ cooldown = state / "leak-alert.cooldown"
884
+ now = time.time()
885
+ try:
886
+ if cooldown.exists() and (now - cooldown.stat().st_mtime) < cooldown_s:
887
+ return
888
+ except OSError:
889
+ pass
890
+
891
+ reason = "; ".join(leaking)
892
+ # Always emit the stderr marker (parsed into the dashboard even without Sentry).
893
+ print(f"LEAK_ALERT {reason}", file=sys.stderr)
894
+ try:
895
+ import sentry_init
896
+
897
+ sentry_init.init()
898
+ sentry_init.capture_message(
899
+ f"process-group leak climbing: {reason}",
900
+ level="warning",
901
+ tags={
902
+ "component": "leak_detector",
903
+ "hostname": socket.gethostname(),
904
+ "claude_desktop_version": claude_desktop_version() or "unknown",
905
+ "app_version": _app_version() or "unknown",
906
+ },
907
+ )
908
+ sentry_init.flush(3.0)
909
+ except Exception:
910
+ pass
911
+ try:
912
+ state.mkdir(parents=True, exist_ok=True)
913
+ cooldown.write_text(str(now))
914
+ except Exception:
915
+ pass
916
+
917
+
918
+ def _env_int(name: str, default: int) -> int:
919
+ try:
920
+ return int(os.environ.get(name, default))
921
+ except (TypeError, ValueError):
922
+ return default
923
+
924
+
925
+ def main() -> int:
926
+ parser = argparse.ArgumentParser(description=__doc__)
927
+ parser.add_argument("--output", default=os.environ.get("S4L_MEMORY_SNAPSHOT_LOG", str(DEFAULT_OUTPUT)))
928
+ parser.add_argument("--top", type=int, default=int(os.environ.get("S4L_MEMORY_TOP_N", "30")))
929
+ parser.add_argument("--max-bytes", type=int, default=int(os.environ.get("S4L_MEMORY_MAX_BYTES", str(100 * 1024 * 1024))))
930
+ parser.add_argument(
931
+ "--summary",
932
+ action="store_true",
933
+ help="Print a slim JSON summary to stdout and exit (no JSONL write). Used by the heartbeat.",
934
+ )
935
+ args = parser.parse_args()
936
+
937
+ if args.summary:
938
+ sys.stdout.write(json.dumps(build_summary(), separators=(",", ":")))
939
+ return 0
940
+
941
+ output = Path(args.output)
942
+ output.parent.mkdir(parents=True, exist_ok=True)
943
+ rotate_log(output, args.max_bytes)
944
+
945
+ snapshot = build_snapshot(max(1, args.top))
946
+ with output.open("a", encoding="utf-8") as fh:
947
+ fh.write(json.dumps(snapshot, sort_keys=True, separators=(",", ":")) + "\n")
948
+
949
+ # Proactive leak page: reads the tail of the JSONL we just appended to, so no
950
+ # extra state. Best-effort; never blocks the snapshot write.
951
+ _maybe_leak_alert(output, snapshot)
952
+
953
+ groups = snapshot.get("groups", {})
954
+ queues = snapshot.get("queues", {})
955
+ claude_queue = queues.get("claude_queue", {}) if isinstance(queues, dict) else {}
956
+ print(
957
+ "memory_snapshot "
958
+ f"ts={snapshot['ts']} "
959
+ f"social_repo_processes_mb={groups.get('social_autoposter_repo_processes', {}).get('rss_mb', 0)} "
960
+ f"saps_mcp_servers={groups.get('social_autoposter_mcp_servers', {}).get('count', 0)} "
961
+ f"saps_mcp_servers_mb={groups.get('social_autoposter_mcp_servers', {}).get('rss_mb', 0)} "
962
+ f"saps_configured_sessions={groups.get('sessions_configured_social_autoposter_mcp', {}).get('count', 0)} "
963
+ f"remote_macos_mcp_servers_mb={groups.get('remote_macos_mcp_servers', {}).get('rss_mb', 0)} "
964
+ f"claude_queue_pending={claude_queue.get('pending_total', 0)} "
965
+ f"claude_queue_running={claude_queue.get('running_total', 0)} "
966
+ f"output={output}"
967
+ )
968
+ return 0
969
+
970
+
971
+ if __name__ == "__main__":
972
+ sys.exit(main())