@m13v/s4l 1.6.197-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1314 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +497 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,1563 @@
1
+ #!/usr/bin/env python3
2
+ """JSON wrapper around project_stats.py for the dashboard /api/funnel/stats endpoint.
3
+
4
+ Emits a single JSON object on stdout: { generated_at, days, projects: [ ... ], overall }.
5
+ Keeps project_stats.py untouched (it is chflags uchg-locked).
6
+ """
7
+
8
+ import json
9
+ import os
10
+ import re
11
+ import socket
12
+ import sys
13
+ import time
14
+ import urllib.error
15
+ import urllib.parse
16
+ import urllib.request
17
+ from concurrent.futures import ThreadPoolExecutor
18
+ from datetime import datetime, timedelta, timezone
19
+
20
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
21
+
22
+ import project_stats as ps
23
+ from project_slugs import bookings_require_utm as _bookings_require_utm
24
+
25
+
26
+ _PAGE_FILENAMES = ("page.tsx", "page.ts", "page.jsx", "page.js", "page.mdx", "page.md")
27
+
28
+
29
+ def _normalize_platform(p):
30
+ """Lowercase + alias 'x' -> 'twitter'. Empty / 'all' / None -> '' (no filter).
31
+
32
+ Matches the same normalization used by /api/style/stats so the
33
+ project final stats table speaks the same vocabulary as the
34
+ engagement-style table when the dashboard's platform pill is set.
35
+ """
36
+ if not p:
37
+ return ""
38
+ v = str(p).strip().lower()
39
+ if v in ("", "all"):
40
+ return ""
41
+ return "twitter" if v == "x" else v
42
+
43
+
44
+ def _platform_sql_clause(platform, table_alias=""):
45
+ """Return an SQL fragment (string, no placeholders) that:
46
+
47
+ 1. Filters to the given platform when one is provided (empty = no filter).
48
+
49
+ Mentions live in the dedicated `mentions` table now (2026-05-23 cutover);
50
+ no posts-level filter needed. Previously this clause excluded placeholder
51
+ `posts` rows where our_content = '(mention - no original post)', which is
52
+ no longer present after migrate_mentions_out_of_posts.py --commit-delete.
53
+
54
+ Folds the 'x' -> 'twitter' alias inside the SQL so reddit/linkedin/twitter
55
+ all just work. Caller is responsible for placement inside the WHERE.
56
+ """
57
+ if not platform:
58
+ return ""
59
+ prefix = (table_alias + ".") if table_alias else ""
60
+ # Safe: platform has already passed the [a-z0-9_]{1,32} regex in the caller.
61
+ return (
62
+ " AND LOWER(CASE WHEN LOWER(" + prefix + "platform)='x' "
63
+ "THEN 'twitter' ELSE " + prefix + "platform END) = '" + platform + "'"
64
+ )
65
+
66
+
67
+ # Synthetic project name for rows in `posts` where project_name IS NULL.
68
+ # Keeps off-config / un-tagged posts visible on the dashboard without
69
+ # polluting real project rows. Chosen to be unambiguous and distinct from
70
+ # any historical 'General' rows the table once had.
71
+ SYNTHETIC_NO_PROJECT_NAME = "(no project)"
72
+
73
+
74
+ def _project_filter_sql(proj_name, table_alias="p"):
75
+ """Return (clause, params) for a per-project WHERE filter.
76
+
77
+ Real projects -> "<alias>.project_name = %s" with (proj_name,).
78
+ Synthetic "(no project)" bucket -> "<alias>.project_name IS NULL" with ().
79
+ Centralizes the NULL-vs-equality choice so every per-project SQL site
80
+ handles the synthetic bucket the same way.
81
+ """
82
+ prefix = (table_alias + ".") if table_alias else ""
83
+ if proj_name == SYNTHETIC_NO_PROJECT_NAME:
84
+ return (prefix + "project_name IS NULL", ())
85
+ return (prefix + "project_name = %s", (proj_name,))
86
+
87
+
88
+ def _bridge_per_project_posthog_keys_from_keychain(config, env):
89
+ import subprocess
90
+ seen = set()
91
+ for proj in config.get("projects", []) or []:
92
+ name_env = ((proj.get("posthog") or {}).get("api_key_env") or "").strip()
93
+ if not name_env or name_env in seen or name_env == "POSTHOG_PERSONAL_API_KEY":
94
+ continue
95
+ seen.add(name_env)
96
+ if env.get(name_env):
97
+ continue
98
+ try:
99
+ v = subprocess.check_output(
100
+ ["security", "find-generic-password", "-s", name_env, "-w"],
101
+ stderr=subprocess.DEVNULL,
102
+ ).decode().strip()
103
+ except subprocess.CalledProcessError:
104
+ continue
105
+ if v:
106
+ env[name_env] = v
107
+
108
+
109
+ def _scan_repo_pages(repo_path):
110
+ """Walk a Next.js app-router repo and return URL paths we ship as static files.
111
+
112
+ Skips dynamic segments ([slug], [...rest]), route groups ((group)), private
113
+ folders (_foo), and parallel-route slots (@slot) per Next.js conventions.
114
+ Route groups collapse to nothing; dynamic segments exclude the whole branch.
115
+ """
116
+ out = set()
117
+ if not repo_path:
118
+ return out
119
+ repo = os.path.expanduser(repo_path)
120
+ app_roots = [
121
+ os.path.join(repo, "src", "app"),
122
+ os.path.join(repo, "app"),
123
+ ]
124
+ for root in app_roots:
125
+ if not os.path.isdir(root):
126
+ continue
127
+ for dirpath, dirnames, filenames in os.walk(root):
128
+ rel = os.path.relpath(dirpath, root)
129
+ segs = [] if rel == "." else rel.split(os.sep)
130
+ if any(s.startswith(("[", "_", "@")) for s in segs):
131
+ dirnames[:] = []
132
+ continue
133
+ dirnames[:] = [d for d in dirnames if not d.startswith(("[", "_", "@", "."))
134
+ and d not in ("node_modules",)]
135
+ has_page = any(f in _PAGE_FILENAMES for f in filenames)
136
+ if has_page:
137
+ url_segs = [s for s in segs if not (s.startswith("(") and s.endswith(")"))]
138
+ path = "/" + "/".join(url_segs) if url_segs else "/"
139
+ out.add(path)
140
+ return out
141
+
142
+
143
+ def _db_created_pages(conn, product_name, days=None):
144
+ """Return {domain: set(paths)} for pages this project published via the SEO
145
+ pipelines (seo_keywords) or GSC-driven page generation (gsc_queries).
146
+
147
+ When `days` is set, restrict to pages whose `completed_at` falls inside the
148
+ window. The seo_keywords / gsc_queries rows get `completed_at` stamped when
149
+ the page is actually generated, so this matches "pages created in the last
150
+ N days" as used by the dashboard's period selector.
151
+ """
152
+ out = {}
153
+ window_sql = ""
154
+ if days is not None:
155
+ window_sql = f" AND completed_at >= NOW() - INTERVAL '{int(days)} days'"
156
+ for sql in (
157
+ "SELECT page_url FROM seo_keywords WHERE product = %s AND page_url IS NOT NULL" + window_sql,
158
+ "SELECT page_url FROM gsc_queries WHERE product = %s AND page_url IS NOT NULL" + window_sql,
159
+ ):
160
+ try:
161
+ cur = conn.execute(sql, (product_name,))
162
+ for row in cur.fetchall():
163
+ url = row[0]
164
+ if not url:
165
+ continue
166
+ try:
167
+ parsed = urllib.parse.urlparse(url)
168
+ except Exception:
169
+ continue
170
+ host = (parsed.netloc or "").lower()
171
+ path = parsed.path or "/"
172
+ while len(path) > 1 and path.endswith("/"):
173
+ path = path[:-1]
174
+ if not host:
175
+ continue
176
+ out.setdefault(host, set()).add(path)
177
+ except Exception as e:
178
+ print(f" _db_created_pages query error: {e}", file=sys.stderr)
179
+ return out
180
+
181
+
182
+ def _created_paths_for_project(conn, proj, days=None):
183
+ """Return {domain: set(paths)} of pages we created for this project.
184
+
185
+ Source-of-truth union: filesystem scan of the project's landing-pages repo
186
+ (applies to every domain the project owns) plus any URLs logged in
187
+ seo_keywords / gsc_queries (keyed by their own host).
188
+
189
+ When `days` is set, the filesystem scan is skipped entirely — static page
190
+ files on disk carry no creation timestamp we can trust, so a window-scoped
191
+ "pages created in the last N days" answer has to come from the DB alone.
192
+ """
193
+ by_domain = {}
194
+ domains = ps.get_project_domains(proj) or []
195
+ if days is None:
196
+ lp = proj.get("landing_pages") or {}
197
+ repo_path = lp.get("repo") if isinstance(lp, dict) else None
198
+ fs_paths = _scan_repo_pages(repo_path) if repo_path else set()
199
+ for d in domains:
200
+ by_domain.setdefault(d.lower(), set()).update(fs_paths)
201
+ for host, paths in _db_created_pages(conn, proj.get("name") or "", days=days).items():
202
+ by_domain.setdefault(host, set()).update(paths)
203
+ return by_domain
204
+
205
+
206
+ def _norm_path(p):
207
+ """Match the frontend `normPath` in bin/server.js so PostHog pathnames
208
+ (`properties.$pathname`) and DB-derived created paths compare cleanly.
209
+ """
210
+ s = str(p or "/")
211
+ if not s.startswith("/"):
212
+ s = "/" + s
213
+ while len(s) > 1 and s.endswith("/"):
214
+ s = s[:-1]
215
+ return s
216
+
217
+
218
+ # HogQL-based PostHog query layer.
219
+ #
220
+ # project_stats.py uses the events LIST endpoint with limit=1000 and no
221
+ # pagination, so any (domain, event) that exceeds 1000 occurrences in the
222
+ # window silently caps at 1000 and misreports the funnel. We swap that out
223
+ # for HogQL aggregate queries (COUNT/GROUP BY), which return the true
224
+ # totals in a single call per query.
225
+ _SAFE_DOMAIN_RE = re.compile(r"^[A-Za-z0-9._-]+$")
226
+
227
+
228
+ class HogqlError(Exception):
229
+ """Raised when a HogQL query fails after all retries.
230
+
231
+ Caller is expected to surface this as an error on the affected rows
232
+ instead of silently rendering zeros.
233
+ """
234
+
235
+
236
+ _RETRY_BACKOFF_S = (2.0, 5.0, 12.0)
237
+ _RETRY_AFTER_CAP_S = 30.0
238
+
239
+
240
+ def _hogql(api_key, project_id, query, timeout=120, max_attempts=4):
241
+ """Run a HogQL query against /api/projects/{pid}/query/.
242
+
243
+ Retries on 429 (throttled), 5xx, and socket read timeouts. Honors
244
+ `Retry-After` up to `_RETRY_AFTER_CAP_S`; otherwise uses
245
+ `_RETRY_BACKOFF_S`. Raises `HogqlError` on permanent failure so
246
+ callers can mark rows as errored rather than zero.
247
+
248
+ NOTE: the batched-by-$host queries cover many domains in one scan, so a
249
+ single query for a large shared PostHog bucket (e.g. pid 330744 with
250
+ ~18 projects) can run >60s on a cold cache. A socket read timeout
251
+ surfaces as `socket.timeout`/`TimeoutError`, which is a sibling of
252
+ `urllib.error.URLError` (both subclass OSError), so it must be caught
253
+ explicitly; otherwise it escapes this retry loop and the caller marks
254
+ the entire bucket as errored on the very first slow query, rendering
255
+ 'err' for every project sharing that bucket.
256
+ """
257
+ url = f"https://us.posthog.com/api/projects/{project_id}/query/"
258
+ body = json.dumps({"query": {"kind": "HogQLQuery", "query": query}}).encode("utf-8")
259
+ last_err = None
260
+ for attempt in range(max_attempts):
261
+ req = urllib.request.Request(url, data=body, method="POST", headers={
262
+ "Authorization": f"Bearer {api_key}",
263
+ "Content-Type": "application/json",
264
+ })
265
+ try:
266
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
267
+ data = json.loads(resp.read())
268
+ return data.get("results", []) or []
269
+ except urllib.error.HTTPError as e:
270
+ try:
271
+ detail = e.read().decode("utf-8", errors="replace")[:300]
272
+ except Exception:
273
+ detail = ""
274
+ last_err = f"HTTP {e.code}: {detail}"
275
+ retryable = (e.code == 429) or (500 <= e.code < 600)
276
+ if not retryable or attempt == max_attempts - 1:
277
+ print(f" HogQL HTTPError {e.code}: {detail} | query={query[:120]}", file=sys.stderr)
278
+ break
279
+ wait = _RETRY_BACKOFF_S[min(attempt, len(_RETRY_BACKOFF_S) - 1)]
280
+ try:
281
+ ra = e.headers.get("Retry-After") if e.headers else None
282
+ if ra is not None:
283
+ wait = min(_RETRY_AFTER_CAP_S, max(wait, float(ra)))
284
+ except Exception:
285
+ pass
286
+ print(f" HogQL {e.code} retry {attempt + 1}/{max_attempts - 1} in {wait:.1f}s | query={query[:80]}", file=sys.stderr)
287
+ time.sleep(wait)
288
+ continue
289
+ except (socket.timeout, TimeoutError) as e:
290
+ # Read timeout on a heavy batched query. Retryable: a retry
291
+ # often hits a warm cache and returns in time. Caught before
292
+ # URLError because TimeoutError is NOT a URLError subclass.
293
+ last_err = f"read timeout after {timeout}s: {e}"
294
+ if attempt == max_attempts - 1:
295
+ print(f" HogQL timeout (>{timeout}s): {e} | query={query[:120]}", file=sys.stderr)
296
+ break
297
+ wait = _RETRY_BACKOFF_S[min(attempt, len(_RETRY_BACKOFF_S) - 1)]
298
+ print(f" HogQL timeout retry {attempt + 1}/{max_attempts - 1} in {wait:.1f}s | query={query[:80]}", file=sys.stderr)
299
+ time.sleep(wait)
300
+ continue
301
+ except urllib.error.URLError as e:
302
+ # A URLError can also wrap a socket.timeout (e.reason). Treat
303
+ # those as the retryable timeout case above.
304
+ last_err = f"URLError: {e}"
305
+ if attempt == max_attempts - 1:
306
+ print(f" HogQL URLError: {e} | query={query[:120]}", file=sys.stderr)
307
+ break
308
+ wait = _RETRY_BACKOFF_S[min(attempt, len(_RETRY_BACKOFF_S) - 1)]
309
+ print(f" HogQL URLError retry {attempt + 1}/{max_attempts - 1} in {wait:.1f}s: {e}", file=sys.stderr)
310
+ time.sleep(wait)
311
+ continue
312
+ raise HogqlError(last_err or "unknown HogQL failure")
313
+
314
+
315
+ def _empty_domain_stats(domain, error=None):
316
+ """Zero'd per-domain stats. If `error` is set, treat the zeros as
317
+ UNKNOWN (not truly 0) so the dashboard can render an error cell
318
+ instead of silently misreporting."""
319
+ out = {
320
+ "pageviews": 0,
321
+ "cta_clicks": 0,
322
+ "email_signups": 0,
323
+ "schedule_clicks": 0,
324
+ "get_started_clicks": 0,
325
+ "cross_product_clicks": 0,
326
+ "pageview_details": {domain: {
327
+ "total": 0,
328
+ "top_pages": {},
329
+ "top_pages_signups": {},
330
+ "top_pages_schedule": {},
331
+ "top_pages_get_started": {},
332
+ }},
333
+ "cta_details": [],
334
+ }
335
+ if error:
336
+ out["error"] = error
337
+ return out
338
+
339
+
340
+ # Legacy + canonical event names for the "get started" click. Fazm fires
341
+ # `download_click`, Assrt fires `cta_get_started_clicked`, new sites fire
342
+ # `get_started_click`. Collapsed back to a single name once both old sites
343
+ # migrate to trackGetStartedClick.
344
+ _GET_STARTED_EVENTS = "('get_started_click', 'download_click', 'cta_get_started_clicked')"
345
+
346
+
347
+ def _ph_batch_counts(api_key, project_id, domains, after_iso):
348
+ """Fetch per-domain PostHog aggregates for every `domain` in one batched
349
+ pass against a single (api_key, project_id) bucket.
350
+
351
+ The previous implementation fired ~10 HogQL queries per domain, which
352
+ fanned out to 100+ concurrent requests and tripped PostHog's rate
353
+ limiter; throttled calls silently returned 0, misreporting every
354
+ project except the one with its own dedicated API key.
355
+
356
+ This version groups each aggregate by `properties.$host`, so one query
357
+ covers every domain in the bucket. Returns `{domain: stats_dict}` in
358
+ the same shape the old per-domain function produced. On permanent
359
+ HogQL failure, raises `HogqlError` so the caller can mark rows as
360
+ errored rather than rendering a misleading zero.
361
+ """
362
+ result = {d: _empty_domain_stats(d) for d in domains}
363
+ safe_domains = []
364
+ for d in domains:
365
+ if _SAFE_DOMAIN_RE.match(d or ""):
366
+ safe_domains.append(d)
367
+ else:
368
+ print(f" skip unsafe domain: {d!r}", file=sys.stderr)
369
+ result[d]["error"] = "unsafe domain"
370
+ if not safe_domains:
371
+ return result
372
+
373
+ after_str = (after_iso or "").replace("T", " ")
374
+ if not after_str:
375
+ return result
376
+
377
+ in_list = ", ".join(f"'{d}'" for d in safe_domains)
378
+
379
+ def _count_by_host(event_clause, distinct_key=None):
380
+ # Pass `distinct_key` (e.g. "properties.email") to dedupe across
381
+ # double-fired events for the same conversion. Used for email
382
+ # signups where both `newsletter_subscribed` (client) and
383
+ # `newsletter_subscribed_server` (server) fire for one submission.
384
+ count_expr = (
385
+ f"count(DISTINCT {distinct_key}) AS c"
386
+ if distinct_key
387
+ else "count() AS c"
388
+ )
389
+ q = (
390
+ f"SELECT properties.$host AS host, {count_expr} FROM events "
391
+ f"WHERE {event_clause} "
392
+ f"AND properties.$host IN ({in_list}) "
393
+ f"AND timestamp >= toDateTime('{after_str}') "
394
+ "GROUP BY host"
395
+ )
396
+ rows = _hogql(api_key, project_id, q)
397
+ return {r[0]: int(r[1]) for r in (rows or []) if r and r[0]}
398
+
399
+ def _top_pages_by_host(event_clause, row_cap=5000, distinct_key="distinct_id"):
400
+ # All per-page breakdowns count unique users (distinct_id) rather than
401
+ # raw events. A visitor that views the same /pricing twice or rage-
402
+ # clicks the same CTA still counts as 1. Pass `distinct_key=None` to
403
+ # opt back into raw count() for legacy callers.
404
+ count_expr = (
405
+ f"count(DISTINCT {distinct_key}) AS c"
406
+ if distinct_key
407
+ else "count() AS c"
408
+ )
409
+ q = (
410
+ f"SELECT properties.$host AS host, properties.$pathname AS path, {count_expr} FROM events "
411
+ f"WHERE {event_clause} "
412
+ f"AND properties.$host IN ({in_list}) "
413
+ f"AND timestamp >= toDateTime('{after_str}') "
414
+ f"GROUP BY host, path ORDER BY c DESC LIMIT {int(row_cap)}"
415
+ )
416
+ rows = _hogql(api_key, project_id, q)
417
+ out = {d: {} for d in safe_domains}
418
+ for r in (rows or []):
419
+ host = r[0] if len(r) > 0 else None
420
+ path = r[1] if len(r) > 1 and r[1] else "/"
421
+ cnt = int(r[2]) if len(r) > 2 else 0
422
+ if host in out:
423
+ out[host][path] = cnt
424
+ return out
425
+
426
+ # Email signups: client `newsletter_subscribed` is ad-blocker-lossy
427
+ # (~57% capture). Server-side `newsletter_subscribed_server` (added in
428
+ # @m13v/seo-components v0.38) is ground truth. Count both with DISTINCT
429
+ # email so a client + server pair for the same submission collapses to one.
430
+ _SIGNUP_CLAUSE = (
431
+ "event IN ('newsletter_subscribed', 'newsletter_subscribed_server')"
432
+ )
433
+
434
+ # Visitors, not raw pageviews. Globally consistent with every other
435
+ # column in this batch (cta_clicks, schedule_clicks, get_started_clicks,
436
+ # cross_product_clicks, email_signups all count unique users). A visitor
437
+ # bouncing between /pricing and /docs still counts as 1.
438
+ #
439
+ # PERF/RATE-LIMIT: PostHog throttles the (shared) personal API key
440
+ # org-wide on the /query endpoint (429 "throttled"). The five
441
+ # unique-visitor counts that all dedupe on distinct_id are folded into
442
+ # ONE grouped query (bucketed by event via multiIf) instead of five
443
+ # separate HogQL calls. Email signups stay on their own query because
444
+ # they dedupe on coalesce(email, distinct_id), a different key. This
445
+ # halves the per-bucket request count, which is what was blowing the
446
+ # rate limit when all windows/buckets fired at once.
447
+ def _multi_count_by_host():
448
+ bucket_expr = (
449
+ "multiIf("
450
+ "event = '$pageview', 'pv', "
451
+ "event = 'cta_click', 'cta', "
452
+ "event = 'schedule_click', 'sched', "
453
+ f"event IN {_GET_STARTED_EVENTS}, 'gs', "
454
+ "'cross')"
455
+ )
456
+ q = (
457
+ f"SELECT properties.$host AS host, {bucket_expr} AS bkt, "
458
+ "count(DISTINCT distinct_id) AS c FROM events "
459
+ "WHERE event IN ('$pageview', 'cta_click', 'schedule_click', "
460
+ "'cross_product_click', 'get_started_click', 'download_click', "
461
+ "'cta_get_started_clicked') "
462
+ f"AND properties.$host IN ({in_list}) "
463
+ f"AND timestamp >= toDateTime('{after_str}') "
464
+ "GROUP BY host, bkt"
465
+ )
466
+ rows = _hogql(api_key, project_id, q)
467
+ out = {"pv": {}, "cta": {}, "sched": {}, "gs": {}, "cross": {}}
468
+ for r in (rows or []):
469
+ host = r[0] if len(r) > 0 else None
470
+ bkt = r[1] if len(r) > 1 else None
471
+ cnt = int(r[2]) if len(r) > 2 else 0
472
+ if host and bkt in out:
473
+ out[bkt][host] = cnt
474
+ return out
475
+
476
+ _counts = _multi_count_by_host()
477
+ pv_total = _counts["pv"]
478
+ cta_total = _counts["cta"]
479
+ sched_total = _counts["sched"]
480
+ # Get Started = unique users who took the conversion action, not raw clicks.
481
+ get_started_total = _counts["gs"]
482
+ cross_product_total = _counts["cross"]
483
+ # Email signups: client `newsletter_subscribed` is ad-blocker-lossy
484
+ # (~57% capture). Server-side `newsletter_subscribed_server` is ground
485
+ # truth. Count both with DISTINCT email so a client + server pair for the
486
+ # same submission collapses to one. Kept as its own query (distinct key
487
+ # differs from the distinct_id batch above).
488
+ signup_total = _count_by_host(
489
+ _SIGNUP_CLAUSE,
490
+ distinct_key="coalesce(properties.email, distinct_id)",
491
+ )
492
+
493
+ # Per-page breakdowns. The big $pageview scan keeps its own large cap;
494
+ # the three low-volume conversion breakdowns (signup/sched/get_started,
495
+ # all distinct_id) fold into one grouped-by-event query for the same
496
+ # rate-limit reason as the counts above.
497
+ top_pv = _top_pages_by_host("event = '$pageview'", row_cap=5000)
498
+
499
+ def _multi_top_pages_small():
500
+ bucket_expr = (
501
+ "multiIf("
502
+ "event = 'schedule_click', 'sched', "
503
+ f"event IN {_GET_STARTED_EVENTS}, 'gs', "
504
+ "'signup')"
505
+ )
506
+ q = (
507
+ f"SELECT properties.$host AS host, properties.$pathname AS path, "
508
+ f"{bucket_expr} AS bkt, count(DISTINCT distinct_id) AS c FROM events "
509
+ "WHERE event IN ('schedule_click', 'newsletter_subscribed', "
510
+ "'newsletter_subscribed_server', 'get_started_click', "
511
+ "'download_click', 'cta_get_started_clicked') "
512
+ f"AND properties.$host IN ({in_list}) "
513
+ f"AND timestamp >= toDateTime('{after_str}') "
514
+ "GROUP BY host, path, bkt ORDER BY c DESC LIMIT 1500"
515
+ )
516
+ rows = _hogql(api_key, project_id, q)
517
+ out = {
518
+ "signup": {d: {} for d in safe_domains},
519
+ "sched": {d: {} for d in safe_domains},
520
+ "gs": {d: {} for d in safe_domains},
521
+ }
522
+ for r in (rows or []):
523
+ host = r[0] if len(r) > 0 else None
524
+ path = r[1] if len(r) > 1 and r[1] else "/"
525
+ bkt = r[2] if len(r) > 2 else None
526
+ cnt = int(r[3]) if len(r) > 3 else 0
527
+ if bkt in out and host in out[bkt]:
528
+ out[bkt][host][path] = cnt
529
+ return out
530
+
531
+ _tp = _multi_top_pages_small()
532
+ top_signup = _tp["signup"]
533
+ top_sched = _tp["sched"]
534
+ top_get_started = _tp["gs"]
535
+
536
+ cta_details_by_host = {d: [] for d in safe_domains}
537
+ if any(v > 0 for v in cta_total.values()):
538
+ cta_detail_q = (
539
+ "SELECT properties.$host AS host, properties.$el_text, properties.text, properties.section, timestamp "
540
+ "FROM events "
541
+ "WHERE event = 'cta_click' "
542
+ f"AND properties.$host IN ({in_list}) "
543
+ f"AND timestamp >= toDateTime('{after_str}') "
544
+ "ORDER BY timestamp DESC LIMIT 200"
545
+ )
546
+ rows = _hogql(api_key, project_id, cta_detail_q)
547
+ for r in (rows or []):
548
+ host = r[0] if len(r) > 0 else None
549
+ el_text = r[1] if len(r) > 1 else None
550
+ text = r[2] if len(r) > 2 else None
551
+ section = r[3] if len(r) > 3 else None
552
+ ts = r[4] if len(r) > 4 else None
553
+ bucket = cta_details_by_host.get(host)
554
+ if bucket is None or len(bucket) >= 10:
555
+ continue
556
+ bucket.append({
557
+ "text": el_text or text or "?",
558
+ "section": section or "?",
559
+ "time": (str(ts)[:16] if ts else "?"),
560
+ })
561
+
562
+ # Autocapture fallback: only domains with zero `cta_click` get the
563
+ # "$autocapture clicks whose text contains 'book'" treatment. Batched
564
+ # like everything else so we don't fan out.
565
+ fallback_hosts = [d for d in safe_domains if cta_total.get(d, 0) == 0]
566
+ if fallback_hosts:
567
+ fb_in = ", ".join(f"'{d}'" for d in fallback_hosts)
568
+ ac_total_q = (
569
+ "SELECT properties.$host AS host, count(DISTINCT distinct_id) AS c FROM events "
570
+ "WHERE event = '$autocapture' "
571
+ f"AND properties.$host IN ({fb_in}) "
572
+ f"AND timestamp >= toDateTime('{after_str}') "
573
+ "AND lower(properties.$el_text) LIKE '%book%' "
574
+ "GROUP BY host"
575
+ )
576
+ ac_rows = _hogql(api_key, project_id, ac_total_q)
577
+ ac_total = {r[0]: int(r[1]) for r in (ac_rows or []) if r and r[0]}
578
+ hosts_with_ac = [d for d in fallback_hosts if ac_total.get(d, 0) > 0]
579
+ if hosts_with_ac:
580
+ ac_in = ", ".join(f"'{d}'" for d in hosts_with_ac)
581
+ ac_detail_q = (
582
+ "SELECT properties.$host AS host, properties.$el_text, properties.text, properties.section, timestamp "
583
+ "FROM events "
584
+ "WHERE event = '$autocapture' "
585
+ f"AND properties.$host IN ({ac_in}) "
586
+ f"AND timestamp >= toDateTime('{after_str}') "
587
+ "AND lower(properties.$el_text) LIKE '%book%' "
588
+ "ORDER BY timestamp DESC LIMIT 200"
589
+ )
590
+ rows = _hogql(api_key, project_id, ac_detail_q)
591
+ for r in (rows or []):
592
+ host = r[0] if len(r) > 0 else None
593
+ el_text = r[1] if len(r) > 1 else None
594
+ text = r[2] if len(r) > 2 else None
595
+ section = r[3] if len(r) > 3 else None
596
+ ts = r[4] if len(r) > 4 else None
597
+ bucket = cta_details_by_host.get(host)
598
+ if bucket is None or len(bucket) >= 10:
599
+ continue
600
+ bucket.append({
601
+ "text": el_text or text or "?",
602
+ "section": section or "?",
603
+ "time": (str(ts)[:16] if ts else "?"),
604
+ })
605
+ # Roll autocapture counts into cta_total so the funnel "cta_clicks"
606
+ # column matches the detail list for fallback domains.
607
+ for h, c in ac_total.items():
608
+ cta_total[h] = max(cta_total.get(h, 0), c)
609
+
610
+ for d in safe_domains:
611
+ pv = pv_total.get(d, 0)
612
+ result[d] = {
613
+ "pageviews": pv,
614
+ "cta_clicks": cta_total.get(d, 0),
615
+ "email_signups": signup_total.get(d, 0),
616
+ "schedule_clicks": sched_total.get(d, 0),
617
+ "get_started_clicks": get_started_total.get(d, 0),
618
+ "cross_product_clicks": cross_product_total.get(d, 0),
619
+ "pageview_details": {d: {
620
+ "total": pv,
621
+ "top_pages": top_pv.get(d, {}),
622
+ "top_pages_signups": top_signup.get(d, {}),
623
+ "top_pages_schedule": top_sched.get(d, {}),
624
+ "top_pages_get_started": top_get_started.get(d, {}),
625
+ }},
626
+ "cta_details": cta_details_by_host.get(d, []),
627
+ }
628
+ return result
629
+
630
+
631
+ def _ph_combine(per_domain):
632
+ out = {
633
+ "pageviews": 0,
634
+ "cta_clicks": 0,
635
+ "email_signups": 0,
636
+ "schedule_clicks": 0,
637
+ "get_started_clicks": 0,
638
+ "cross_product_clicks": 0,
639
+ "pageview_details": {},
640
+ "cta_details": [],
641
+ }
642
+ for s in per_domain:
643
+ out["pageviews"] += s.get("pageviews", 0)
644
+ out["cta_clicks"] += s.get("cta_clicks", 0)
645
+ out["email_signups"] += s.get("email_signups", 0)
646
+ out["schedule_clicks"] += s.get("schedule_clicks", 0)
647
+ out["get_started_clicks"] += s.get("get_started_clicks", 0)
648
+ out["cross_product_clicks"] += s.get("cross_product_clicks", 0)
649
+ out["pageview_details"].update(s.get("pageview_details", {}))
650
+ out["cta_details"].extend(s.get("cta_details", []))
651
+ return out
652
+
653
+
654
+ def _bookings_shared(bookings_conn, client_slug, days, table="cal_bookings", require_utm=False):
655
+ """Same output shape as ps.get_booking_stats, but reuses a shared psycopg2
656
+ connection instead of opening a fresh one per project.
657
+ `table` is `cal_bookings` (Cal.com) or `calendly_bookings` (Calendly).
658
+ `require_utm` gates `real_bookings` on `utm_source IS NOT NULL` for
659
+ projects whose booking destination is shared with non-marketing inbound
660
+ (set in config.json via `bookings_require_utm`)."""
661
+ if not bookings_conn or not client_slug:
662
+ return None
663
+ try:
664
+ if table not in {"cal_bookings", "calendly_bookings"}:
665
+ raise ValueError(f"unsupported booking table: {table}")
666
+ utm_clause = " AND utm_source IS NOT NULL" if require_utm else ""
667
+ cur = bookings_conn.cursor()
668
+ cur.execute(
669
+ "SELECT COUNT(*), "
670
+ "COUNT(*) FILTER (WHERE status = 'created'), "
671
+ "COUNT(*) FILTER (WHERE status = 'cancelled'), "
672
+ "COUNT(*) FILTER (WHERE status = 'rescheduled'), "
673
+ "COUNT(*) FILTER (WHERE attendee_email NOT ILIKE '%%test%%' "
674
+ "AND attendee_email NOT ILIKE '%%example%%' "
675
+ "AND attendee_email NOT ILIKE '%%+%%verify%%' "
676
+ "AND attendee_name NOT ILIKE '%%test%%' "
677
+ "AND attendee_name NOT ILIKE '%%verification%%' "
678
+ "AND attendee_name NOT ILIKE '%%delete-me%%' "
679
+ "AND attendee_name NOT ILIKE '%%john doe%%'"
680
+ + utm_clause + ") "
681
+ "FROM " + table + " WHERE client_slug = %s "
682
+ "AND created_at >= NOW() - INTERVAL '" + str(days) + " days'",
683
+ (client_slug,),
684
+ )
685
+ row = cur.fetchone()
686
+ cols = ["total", "booked", "cancelled", "rescheduled", "real_bookings"]
687
+ result = dict(zip(cols, row)) if row else {}
688
+
689
+ cur.execute(
690
+ "SELECT attendee_name, attendee_email, status, start_time, created_at "
691
+ "FROM " + table + " WHERE client_slug = %s "
692
+ "AND created_at >= NOW() - INTERVAL '" + str(days) + " days' "
693
+ "ORDER BY created_at DESC LIMIT 5",
694
+ (client_slug,),
695
+ )
696
+ result["recent"] = [
697
+ {"name": r[0], "email": r[1], "status": r[2],
698
+ "start": str(r[3])[:16] if r[3] else "?",
699
+ "created": str(r[4])[:16] if r[4] else "?"}
700
+ for r in cur.fetchall()
701
+ ]
702
+ cur.close()
703
+ return result
704
+ except Exception as e:
705
+ print(f" Bookings DB error for {client_slug}: {e}", file=sys.stderr)
706
+ return None
707
+
708
+
709
+ def _dm_short_link_stats(conn, name, days):
710
+ """Per-project DM short-link click attribution.
711
+
712
+ `dm_clicks`: SUM(dm_links.clicks) JOIN dms d for DMs that reference this
713
+ project (target_project OR membership in target_projects[]) and were last
714
+ touched in the window. Captures every DM click — booking, github, website,
715
+ or kind=other — bumped at the resolver. Multi-link, multi-turn safe.
716
+ """
717
+ if not name or name == SYNTHETIC_NO_PROJECT_NAME:
718
+ return 0
719
+ try:
720
+ cur = conn.execute(
721
+ "SELECT COALESCE(SUM(l.clicks), 0)::int "
722
+ "FROM dm_links l "
723
+ "JOIN dms d ON d.id = l.dm_id "
724
+ "WHERE (COALESCE(d.target_project, d.project_name) = %s "
725
+ " OR %s = ANY(d.target_projects)) "
726
+ "AND COALESCE(d.last_message_at, d.discovered_at) >= NOW() - INTERVAL '" + str(int(days)) + " days'",
727
+ (name, name),
728
+ )
729
+ return int((cur.fetchone() or (0,))[0])
730
+ except Exception as e:
731
+ print(f" dm_short_link_stats error for {name}: {e}", file=sys.stderr)
732
+ return 0
733
+
734
+
735
+ def _dm_booking_count(conn, bookings_conn, name, days):
736
+ """Count cal_bookings within the window whose metadata.utm_content
737
+ (`dm_<id>`) maps to a DM targeting this project.
738
+
739
+ The webhook stores the entire Cal.com payload under cal_bookings.metadata,
740
+ and the original UTM lives at metadata.payload.metadata.utm_content. We
741
+ parse the dm_id out of `dm_<n>`, then join against dms.target_project /
742
+ project_name in the main DB to scope by project.
743
+ """
744
+ if not bookings_conn or not name or name == SYNTHETIC_NO_PROJECT_NAME:
745
+ return 0
746
+ try:
747
+ cur = bookings_conn.cursor()
748
+ cur.execute(
749
+ "SELECT metadata#>>'{payload,metadata,utm_content}' AS utm_content "
750
+ "FROM cal_bookings "
751
+ "WHERE metadata#>>'{payload,metadata,utm_content}' LIKE 'dm_%%' "
752
+ "AND created_at >= NOW() - INTERVAL '" + str(int(days)) + " days' "
753
+ "AND COALESCE(attendee_email, '') NOT ILIKE '%%test%%'"
754
+ )
755
+ dm_ids = []
756
+ for (utm,) in cur.fetchall():
757
+ if utm and utm.startswith('dm_'):
758
+ try:
759
+ dm_ids.append(int(utm.split('_', 1)[1]))
760
+ except (ValueError, IndexError):
761
+ pass
762
+ cur.close()
763
+ if not dm_ids:
764
+ return 0
765
+ cur2 = conn.execute(
766
+ "SELECT COUNT(*)::int FROM dms WHERE id = ANY(%s) "
767
+ "AND COALESCE(target_project, project_name) = %s",
768
+ (dm_ids, name),
769
+ )
770
+ return int((cur2.fetchone() or (0,))[0])
771
+ except Exception as e:
772
+ print(f" dm_booking_count error for {name}: {e}", file=sys.stderr)
773
+ return 0
774
+
775
+
776
+ def _period_total_engagement(conn, name, days, platform=None):
777
+ """Total engagement *gained during the window* across ALL posts, regardless
778
+ of when each post was created.
779
+
780
+ Used to populate the "(total)" bracketed value on the project panel.
781
+ Logic per post:
782
+ gain = latest_snapshot_in_window - latest_snapshot_before_window
783
+ with the "before" leg treated as 0 when the post did not exist before
784
+ the window (new posts contribute their full current value, which is
785
+ why this differs from the Trends-tab LAG() approach: that one excludes
786
+ every post's first snapshot and therefore undercounts fresh activity).
787
+
788
+ Same platform filter as the Trends tab: excludes moltbook / github /
789
+ github_issues. Same project filter via posts.project_name.
790
+
791
+ For post_clicks: COUNT of post_link_clicks rows with is_bot=FALSE in the
792
+ window, joined post_links -> posts so we can apply the project filter.
793
+ Pre-2026-05-07 click rows do not exist (is_bot logging started then), so
794
+ the count returns 0 for older days rather than mixing inflated counters.
795
+ """
796
+ # Period total = engagement gained during the last N days, summed from
797
+ # two complementary branches that always together produce a value
798
+ # >= the panel's scoped column:
799
+ #
800
+ # (1) new_posts_branch — posts CREATED in the window. Their full
801
+ # live posts.* counters are credited as in-window gain (all of
802
+ # it was earned during the window since the post didn't exist
803
+ # before). No reddit/moltbook -1 OP self-vote discount here
804
+ # (the scoped column applies that discount, so the un-discounted
805
+ # sum here is guaranteed >= scoped).
806
+ #
807
+ # (2) old_posts_branch — posts created BEFORE the window. Uses the
808
+ # Trends-tab LAG approach over post_views_daily, summing daily
809
+ # gains across snapshots inside the window. NULL values
810
+ # (Reddit posts don't write upvotes/comments to post_views_daily
811
+ # at all) are excluded by the IS NOT NULL FILTER, so old
812
+ # Reddit posts contribute 0 here — that's a known limitation
813
+ # of the snapshot pipeline and matches the Trends chart.
814
+ # Per-metric platform filter matches the SCOPED column's filter so the
815
+ # bracket is always >= scoped for the same metric:
816
+ # upvotes: no platform filter (scoped sums all platforms with reddit/
817
+ # moltbook -1 OP self-vote discount; bracket uses raw values).
818
+ # comments: no platform filter (scoped sums all platforms).
819
+ # views: excludes moltbook/github/github_issues (matches scoped's
820
+ # FILTER clause in _windowed_post_engagement).
821
+ days_sql = "INTERVAL '" + str(int(days)) + " days'"
822
+ views_excl = "LOWER(p.platform) NOT IN ('moltbook', 'github', 'github_issues')"
823
+ # When platform is set, also apply the mention-row exclusion so this
824
+ # function lines up with the /api/style/stats view that the dashboard
825
+ # shows above the Project Final Stats table.
826
+ plat_clause = _platform_sql_clause(platform, "p")
827
+ proj_clause, proj_params = _project_filter_sql(name, "p")
828
+ cur = conn.execute(
829
+ # Branch 1: posts CREATED in the window. Full live posts.* values
830
+ # are credited as in-window gain. No -1 OP discount on upvotes, so
831
+ # bracket >= scoped on reddit/moltbook by exactly #posts_in_window.
832
+ "WITH new_posts AS ("
833
+ "SELECT "
834
+ "COALESCE(SUM(p.upvotes), 0)::bigint AS upvotes, "
835
+ "COALESCE(SUM(p.comments_count), 0)::bigint AS comments, "
836
+ "COALESCE(SUM(p.views) FILTER (WHERE " + views_excl + "), 0)::bigint AS views "
837
+ "FROM posts p "
838
+ "WHERE " + proj_clause + " "
839
+ "AND p.posted_at >= NOW() - " + days_sql + plat_clause +
840
+ "), "
841
+ # Branch 2: posts created BEFORE the window. LAG over snapshots
842
+ # inside the window. Reddit/moltbook post_views_daily rows carry
843
+ # NULL upvotes/comments by design (those stats pipelines only
844
+ # write views), so the IS NOT NULL FILTER drops them: old Reddit
845
+ # upvotes/comments gain is structurally invisible here, matching
846
+ # the Trends chart.
847
+ "old_post_daily AS ("
848
+ "SELECT pvd.post_id, p.platform, "
849
+ "pvd.upvotes, LAG(pvd.upvotes) OVER w AS prev_upvotes, "
850
+ "pvd.comments, LAG(pvd.comments) OVER w AS prev_comments, "
851
+ "pvd.views, LAG(pvd.views) OVER w AS prev_views "
852
+ "FROM post_views_daily pvd "
853
+ "JOIN posts p ON p.id = pvd.post_id "
854
+ "WHERE pvd.day >= CURRENT_DATE - " + days_sql + " "
855
+ "AND " + proj_clause + " "
856
+ "AND p.posted_at < NOW() - " + days_sql + plat_clause + " "
857
+ "WINDOW w AS (PARTITION BY pvd.post_id ORDER BY pvd.day)"
858
+ "), "
859
+ "old_posts AS ("
860
+ "SELECT "
861
+ "COALESCE(SUM(GREATEST(upvotes - prev_upvotes, 0)) "
862
+ "FILTER (WHERE prev_upvotes IS NOT NULL AND upvotes IS NOT NULL), 0)::bigint AS upvotes, "
863
+ "COALESCE(SUM(GREATEST(comments - prev_comments, 0)) "
864
+ "FILTER (WHERE prev_comments IS NOT NULL AND comments IS NOT NULL), 0)::bigint AS comments, "
865
+ "COALESCE(SUM(GREATEST(views - prev_views, 0)) "
866
+ "FILTER (WHERE prev_views IS NOT NULL AND views IS NOT NULL "
867
+ "AND LOWER(platform) NOT IN ('moltbook', 'github', 'github_issues')), 0)::bigint AS views "
868
+ "FROM old_post_daily"
869
+ ") "
870
+ "SELECT "
871
+ "n.upvotes + o.upvotes, "
872
+ "n.comments + o.comments, "
873
+ "n.views + o.views "
874
+ "FROM new_posts n CROSS JOIN old_posts o",
875
+ proj_params + proj_params,
876
+ )
877
+ row = cur.fetchone() or (0, 0, 0)
878
+ upvotes_total = int(row[0] or 0)
879
+ comments_total = int(row[1] or 0)
880
+ views_total = int(row[2] or 0)
881
+
882
+ # post_clicks bracket = scoped (post_links.clicks SUM for new posts in
883
+ # window) + COUNT of post_link_clicks events on OLD posts during the
884
+ # window. The "new posts" leg matches the scoped column exactly so
885
+ # bracket >= scoped is guaranteed; the "old posts" leg captures
886
+ # click traffic that hit pre-existing posts during the period.
887
+ cur2 = conn.execute(
888
+ "WITH new_clicks AS ("
889
+ "SELECT COALESCE(SUM(pl.total_clicks), 0)::bigint AS clicks "
890
+ "FROM posts p "
891
+ "LEFT JOIN ("
892
+ "SELECT post_id, SUM(clicks)::int AS total_clicks "
893
+ "FROM post_links WHERE post_id IS NOT NULL GROUP BY post_id"
894
+ ") pl ON pl.post_id = p.id "
895
+ "WHERE " + proj_clause + " "
896
+ "AND p.posted_at >= NOW() - " + days_sql + plat_clause +
897
+ "), "
898
+ "old_event_clicks AS ("
899
+ "SELECT COALESCE(COUNT(*), 0)::bigint AS clicks "
900
+ "FROM post_link_clicks plc "
901
+ "JOIN post_links pl ON pl.code = plc.code "
902
+ "JOIN posts p ON p.id = pl.post_id "
903
+ "WHERE plc.ts >= NOW() - " + days_sql + " "
904
+ "AND plc.is_bot = FALSE "
905
+ "AND " + proj_clause + " "
906
+ "AND p.posted_at < NOW() - " + days_sql + plat_clause +
907
+ ") "
908
+ "SELECT n.clicks + o.clicks "
909
+ "FROM new_clicks n CROSS JOIN old_event_clicks o",
910
+ proj_params + proj_params,
911
+ )
912
+ row2 = cur2.fetchone() or (0,)
913
+ post_clicks_total = int(row2[0] or 0)
914
+
915
+ return {
916
+ "upvotes": upvotes_total,
917
+ "comments": comments_total,
918
+ "views": views_total,
919
+ "post_clicks": post_clicks_total,
920
+ }
921
+
922
+
923
+ def _windowed_post_engagement(conn, name, days, platform=None):
924
+ """Sum engagement only for posts *created within the window*.
925
+
926
+ project_stats.get_post_stats aggregates engagement over ALL time for the
927
+ project, which is misleading when the window is a day or a week. Here we
928
+ filter by posted_at so upvotes/comments/views/post_clicks match the same
929
+ 24h slice as the 'recent' post count.
930
+
931
+ When `platform` is set, also folds in the same platform/mention filter
932
+ that /api/style/stats uses so the Project Final Stats and Posts by
933
+ Engagement Style tables agree on the same denominator.
934
+
935
+ post_clicks: SUM of post_links.clicks attributable to short links minted
936
+ for posts in this project's window (post_id-keyed; reply-keyed clicks
937
+ excluded so we don't double-count engagement on replies hanging off
938
+ someone else's thread).
939
+ """
940
+ # upvotes is NET of the Reddit/Moltbook OP self-upvote (both platforms auto-
941
+ # apply a +1 to every post). Discounting per row before the SUM means the
942
+ # funnel reflects organic engagement, not (posts * 1) + organic. X /
943
+ # LinkedIn / GitHub have no equivalent auto-vote so they pass through.
944
+ # Matches top_performers.SCORE_SQL and bin/server.js upvotes_discounted.
945
+ plat_clause = _platform_sql_clause(platform, "p")
946
+ proj_clause, proj_params = _project_filter_sql(name, "p")
947
+ cur = conn.execute(
948
+ "SELECT COALESCE(SUM(CASE WHEN LOWER(p.platform) IN ('reddit', 'moltbook') "
949
+ " THEN GREATEST(0, COALESCE(p.upvotes, 0) - 1) "
950
+ " ELSE COALESCE(p.upvotes, 0) END), 0), "
951
+ "COALESCE(SUM(p.comments_count), 0), "
952
+ "COALESCE(SUM(p.views) FILTER (WHERE LOWER(p.platform) NOT IN ('moltbook', 'github', 'github_issues')), 0), "
953
+ "COUNT(*) FILTER (WHERE LOWER(p.platform) NOT IN ('moltbook', 'github', 'github_issues')), "
954
+ "COALESCE(SUM(pl.total_clicks), 0) "
955
+ "FROM posts p "
956
+ "LEFT JOIN ("
957
+ " SELECT post_id, SUM(clicks)::int AS total_clicks "
958
+ " FROM post_links WHERE post_id IS NOT NULL GROUP BY post_id"
959
+ ") pl ON pl.post_id = p.id "
960
+ "WHERE " + proj_clause + " AND p.posted_at >= NOW() - INTERVAL '" + str(days) + " days'"
961
+ + plat_clause,
962
+ proj_params,
963
+ )
964
+ row = cur.fetchone() or (0, 0, 0, 0, 0)
965
+ return {
966
+ "upvotes": int(row[0] or 0),
967
+ "comments": int(row[1] or 0),
968
+ "views": int(row[2] or 0),
969
+ "views_posts": int(row[3] or 0),
970
+ "post_clicks": int(row[4] or 0),
971
+ }
972
+
973
+
974
+ def _seo_pages_count(conn, name, days):
975
+ """Count SEO pages published in window. seo_keywords.product matches project_name."""
976
+ cur = conn.execute(
977
+ "SELECT "
978
+ "(SELECT COUNT(*) FROM seo_keywords WHERE product = %s "
979
+ " AND completed_at >= NOW() - INTERVAL '" + str(days) + " days' "
980
+ " AND page_url IS NOT NULL) + "
981
+ "(SELECT COUNT(*) FROM gsc_queries WHERE product = %s "
982
+ " AND completed_at >= NOW() - INTERVAL '" + str(days) + " days' "
983
+ " AND page_url IS NOT NULL)",
984
+ (name, name),
985
+ )
986
+ row = cur.fetchone()
987
+ return int((row and row[0]) or 0)
988
+
989
+
990
+ def _amplitude_signups_24h_from_cache(proj):
991
+ """For days==1, read the precomputed rolling-24h count from the cache
992
+ written by scripts/amplitude_24h_signups.py.
993
+
994
+ That script uses our own server-side PostHog `newsletter_subscribed`
995
+ event (real-time, partner_outcome IN ('partner_created','partner_reused'))
996
+ as the primary source, because Amplitude segmentation/export both lag
997
+ several hours behind real time and bucket by calendar day in the
998
+ project's display timezone.
999
+
1000
+ Returns int (count) or None when:
1001
+ - cache file missing / unreadable
1002
+ - project not present in cache
1003
+ - cache is older than 30 minutes (stale, fall back to live segmentation)
1004
+ """
1005
+ cache_path = os.path.expanduser(
1006
+ "~/social-autoposter/skill/cache/amplitude_24h_signups.json"
1007
+ )
1008
+ if not os.path.exists(cache_path):
1009
+ return None
1010
+ try:
1011
+ with open(cache_path) as f:
1012
+ cur = json.load(f)
1013
+ gen = cur.get("generated_at_utc")
1014
+ if gen:
1015
+ age_min = (datetime.now(timezone.utc) - datetime.fromisoformat(gen)).total_seconds() / 60
1016
+ if age_min > 30:
1017
+ return None
1018
+ for p in cur.get("projects") or []:
1019
+ if p.get("name") == proj.get("name"):
1020
+ v = p.get("count_24h")
1021
+ return int(v) if v is not None else None
1022
+ except Exception:
1023
+ return None
1024
+ return None
1025
+
1026
+
1027
+ def _amplitude_signups(proj, days, env):
1028
+ """Pull attributed end-product signup count from the client's Amplitude.
1029
+
1030
+ For projects with an `amplitude` config block (project_id, api_key_env,
1031
+ secret_key_env, signup_event, attribution_filter). Returns total signups
1032
+ matching the filter over the last `days`, or None if not configured /
1033
+ creds missing / API errors. Errors are non-fatal — they collapse to None
1034
+ so the dashboard falls back to the click-based metric.
1035
+
1036
+ Special case: days == 1 reads from the rolling-24h cache populated by
1037
+ scripts/amplitude_24h_signups.py, which uses real-time PostHog data
1038
+ instead of Amplitude segmentation (which lags hours and buckets by
1039
+ calendar day in the project's display timezone). Falls through to the
1040
+ segmentation path if the cache is missing or stale.
1041
+ """
1042
+ amp = proj.get("amplitude")
1043
+ if not amp:
1044
+ return None
1045
+ if days == 1:
1046
+ cached = _amplitude_signups_24h_from_cache(proj)
1047
+ if cached is not None:
1048
+ return cached
1049
+ api_key = env.get(amp.get("api_key_env", ""))
1050
+ secret_key = env.get(amp.get("secret_key_env", ""))
1051
+ if not api_key or not secret_key:
1052
+ return None
1053
+ import base64
1054
+ end_dt = datetime.now(timezone.utc)
1055
+ start_dt = end_dt - timedelta(days=max(1, days) - 1)
1056
+ e = json.dumps({
1057
+ "event_type": amp.get("signup_event", "New User Sign Up"),
1058
+ "filters": [
1059
+ {
1060
+ "subprop_type": "event",
1061
+ "subprop_key": k,
1062
+ "subprop_op": "is",
1063
+ "subprop_value": v if isinstance(v, list) else [v],
1064
+ }
1065
+ for k, v in (amp.get("attribution_filter") or {}).items()
1066
+ ],
1067
+ })
1068
+ qs = urllib.parse.urlencode({
1069
+ "e": e,
1070
+ "start": start_dt.strftime("%Y%m%d"),
1071
+ "end": end_dt.strftime("%Y%m%d"),
1072
+ "i": "1",
1073
+ "m": "totals",
1074
+ })
1075
+ auth_b64 = base64.b64encode(f"{api_key}:{secret_key}".encode()).decode()
1076
+ req = urllib.request.Request(
1077
+ f"https://amplitude.com/api/2/events/segmentation?{qs}",
1078
+ headers={"Authorization": f"Basic {auth_b64}"},
1079
+ )
1080
+ try:
1081
+ with urllib.request.urlopen(req, timeout=20) as resp:
1082
+ data = json.loads(resp.read())
1083
+ except Exception as exc:
1084
+ print(f" amplitude signups fetch error ({proj.get('name')}): {exc}", file=sys.stderr)
1085
+ return None
1086
+ series = (data.get("data", {}).get("series") or [[]])[0]
1087
+ return int(sum(int(x or 0) for x in series))
1088
+
1089
+
1090
+ def _post_stats_synthetic_null(conn, days):
1091
+ """NULL-project sibling of ps.get_post_stats. Same shape, same upvote
1092
+ discount logic; filters posts.project_name IS NULL instead of = name.
1093
+
1094
+ ps.get_post_stats lives in the chflags-locked project_stats.py, so the
1095
+ synthetic '(no project)' bucket reuses this in build_project_entry
1096
+ rather than passing a magic string into a function that would return
1097
+ all-zeros for it.
1098
+ """
1099
+ cur = conn.execute(
1100
+ "SELECT COUNT(*), "
1101
+ "COUNT(*) FILTER (WHERE posted_at >= NOW() - INTERVAL '" + str(int(days)) + " days'), "
1102
+ "COUNT(*) FILTER (WHERE status = 'active'), "
1103
+ "COUNT(*) FILTER (WHERE status IN ('removed', 'deleted')), "
1104
+ "COALESCE(SUM(CASE WHEN LOWER(platform) IN ('reddit', 'moltbook') "
1105
+ " THEN GREATEST(0, COALESCE(upvotes, 0) - 1) "
1106
+ " ELSE COALESCE(upvotes, 0) END), 0), "
1107
+ "COALESCE(SUM(comments_count), 0), "
1108
+ "COALESCE(SUM(views), 0) "
1109
+ "FROM posts WHERE project_name IS NULL"
1110
+ )
1111
+ row = cur.fetchone()
1112
+ if not row:
1113
+ return {}
1114
+ cols = ["total", "recent", "active", "removed", "total_upvotes", "total_comments", "total_views"]
1115
+ return dict(zip(cols, row))
1116
+
1117
+
1118
+ def _platform_breakdown_synthetic_null(conn, days):
1119
+ """NULL-project sibling of ps.get_platform_breakdown."""
1120
+ cur = conn.execute(
1121
+ "SELECT platform, COUNT(*) as cnt FROM posts "
1122
+ "WHERE project_name IS NULL AND posted_at >= NOW() - INTERVAL '" + str(int(days)) + " days' "
1123
+ "GROUP BY platform ORDER BY cnt DESC"
1124
+ )
1125
+ return {row[0]: row[1] for row in cur.fetchall()}
1126
+
1127
+
1128
+ def build_project_entry(proj, days, api_key, ph_pid, env, ph_results, platform=None):
1129
+ name = proj["name"]
1130
+ # All main-DB + bookings-DB per-project stats come from one consolidated
1131
+ # HTTP endpoint (HTTP-only migration 2026-06-01). Booking scoping params
1132
+ # (client_slug / table / require_utm) are computed locally from config and
1133
+ # forwarded so the endpoint can read the separate bookings DB server-side.
1134
+ # The endpoint folds the platform filter into post_stats.recent directly,
1135
+ # so the legacy platform-override COUNT is no longer needed here.
1136
+ client_slug = ps.get_client_slug(name)
1137
+ booking_table = ps.get_booking_table(name)
1138
+ require_utm = _bookings_require_utm(name)
1139
+ from http_api import api_get
1140
+ _detail = (api_get("/api/v1/stats/project-detail", query={
1141
+ "project": name,
1142
+ "days": int(days),
1143
+ "platform": platform or "",
1144
+ "client_slug": client_slug or "",
1145
+ "booking_table": booking_table or "cal_bookings",
1146
+ "require_utm": "1" if require_utm else "0",
1147
+ }).get("data") or {})
1148
+ post_stats = dict(_detail.get("post_stats") or {})
1149
+ platforms = _detail.get("platforms") or {}
1150
+ eng_recent = _detail.get("windowed") or {"upvotes": 0, "comments": 0, "views": 0, "views_posts": 0, "post_clicks": 0}
1151
+ eng_period_total = _detail.get("period") or {"upvotes": 0, "comments": 0, "views": 0, "post_clicks": 0}
1152
+ seo_pages_recent = int(_detail.get("seo_pages_recent") or 0)
1153
+
1154
+ domains = ps.get_project_domains(proj)
1155
+ ph_override = proj.get("posthog", {}) or {}
1156
+ ph_key = env.get(ph_override.get("api_key_env", ""), api_key)
1157
+ ph_pid_proj = ph_override.get("project_id", ph_pid)
1158
+ analytics_error = None
1159
+ if domains:
1160
+ per_domain = []
1161
+ for d in domains:
1162
+ stats = ph_results.get((ph_key, ph_pid_proj, d))
1163
+ if stats is None:
1164
+ stats = _empty_domain_stats(d)
1165
+ if stats.get("error") and not analytics_error:
1166
+ analytics_error = stats["error"]
1167
+ per_domain.append(stats)
1168
+ posthog = _ph_combine(per_domain)
1169
+ if analytics_error:
1170
+ posthog["error"] = analytics_error
1171
+ else:
1172
+ posthog = None
1173
+
1174
+ # Window-scoped: `created_paths` is now restricted to pages whose
1175
+ # seo_keywords/gsc_queries `completed_at` falls inside `days`. Top tab →
1176
+ # Pages sub-tab already filters rows on this set, so it becomes "pages
1177
+ # created in the selected period" automatically.
1178
+ # Window-scoped created paths come from the endpoint's db_created_pages
1179
+ # ({host: [paths]}). With a window set, the filesystem scan is intentionally
1180
+ # skipped (static page files carry no trustworthy creation timestamp), so
1181
+ # the DB-derived set is the whole answer — matching _created_paths_for_project
1182
+ # with days set.
1183
+ created_by_domain = {
1184
+ host: set(paths) for host, paths in (_detail.get("db_created_pages") or {}).items()
1185
+ }
1186
+ if posthog is not None:
1187
+ for d, detail in (posthog.get("pageview_details") or {}).items():
1188
+ paths = created_by_domain.get((d or "").lower(), set())
1189
+ detail["created_paths"] = sorted(paths)
1190
+
1191
+ # Preserve the pre-rewrite, domain-wide totals for the analytics-broken
1192
+ # canary below — it's meant to answer "is window.posthog wired up on this
1193
+ # site at all?", which requires domain-level signal, not per-new-page.
1194
+ domain_wide_pv = int(posthog["pageviews"]) if posthog else 0
1195
+ domain_wide_signups = int(posthog["email_signups"]) if posthog else 0
1196
+ domain_wide_sched = int(posthog["schedule_clicks"]) if posthog else 0
1197
+ domain_wide_get_started = int(posthog["get_started_clicks"]) if posthog else 0
1198
+
1199
+ # Recompute funnel totals against the window-scoped created set so the
1200
+ # Status tab → project funnel columns reflect "pageviews / signups /
1201
+ # schedule clicks / download clicks ONLY on pages we generated in this
1202
+ # window" instead of domain-wide traffic. cta_clicks and real_bookings
1203
+ # are not tracked per-page so they stay domain/project-wide.
1204
+ #
1205
+ # Skip entirely when PostHog is errored: the top_pages maps are empty
1206
+ # for errored domains, so scoping would silently collapse everything to
1207
+ # zero. Keep the funnel values as None below so the dashboard renders
1208
+ # 'err' instead of a misleading 0.
1209
+ # Only pageviews get window-scoped to "traffic on pages we generated in
1210
+ # this window". Conversion events (newsletter_subscribed, schedule_click,
1211
+ # get_started_click) fire on dedicated landing pages (/, /use-case, /ig,
1212
+ # etc.), almost never on the freshly-generated /blog/* and /t/* SEO pages
1213
+ # we ship each cycle. Scoping those collapsed every project to 0 and made
1214
+ # the dashboard's Email Signups / Schedule Clicks / Get Started columns
1215
+ # useless. Domain-wide is the honest metric for those.
1216
+ if posthog is not None and not analytics_error:
1217
+ scoped_pv = 0
1218
+ for d, detail in (posthog.get("pageview_details") or {}).items():
1219
+ created = {_norm_path(p) for p in created_by_domain.get((d or "").lower(), set())}
1220
+ if not created:
1221
+ continue
1222
+ for path, cnt in (detail.get("top_pages") or {}).items():
1223
+ if _norm_path(path) in created:
1224
+ scoped_pv += int(cnt or 0)
1225
+ posthog["pageviews"] = scoped_pv
1226
+
1227
+ bookings = _detail.get("bookings")
1228
+
1229
+ # When the PostHog batch failed, the aggregate numbers on `posthog` are
1230
+ # all 0 but that doesn't mean there are no events, it means we couldn't
1231
+ # read them. Surface null + an error string on the funnel so the
1232
+ # dashboard renders 'err' instead of silently claiming "zero pageviews".
1233
+ if analytics_error:
1234
+ pvs = None
1235
+ ctas = None
1236
+ email_signups = None
1237
+ schedule_clicks = None
1238
+ get_started_clicks = None
1239
+ cross_product_clicks = None
1240
+ ctr = None
1241
+ conv = None
1242
+ dw_pv_out = None
1243
+ dw_signups_out = None
1244
+ dw_sched_out = None
1245
+ dw_get_started_out = None
1246
+ analytics_suspected_broken = False
1247
+ else:
1248
+ pvs = posthog["pageviews"] if posthog else 0
1249
+ ctas = posthog["cta_clicks"] if posthog else 0
1250
+ email_signups = (posthog["email_signups"] if posthog else 0)
1251
+ schedule_clicks = (posthog["schedule_clicks"] if posthog else 0)
1252
+ get_started_clicks = (posthog["get_started_clicks"] if posthog else 0)
1253
+ # Cross-product stays domain-wide on purpose: it's a lightweight
1254
+ # signal ("how many clicks went to a sibling product from this site")
1255
+ # with no per-page top-pages breakdown, so there's nothing to scope.
1256
+ cross_product_clicks = (posthog.get("cross_product_clicks", 0) if posthog else 0)
1257
+ # Domain-wide counterparts for the "scoped (domain-wide)" dashboard
1258
+ # rendering. domain_wide_* were captured before the window-scoping
1259
+ # overwrote posthog["pageviews"] etc.
1260
+ dw_pv_out = domain_wide_pv if posthog else 0
1261
+ dw_signups_out = domain_wide_signups if posthog else 0
1262
+ dw_sched_out = domain_wide_sched if posthog else 0
1263
+ dw_get_started_out = domain_wide_get_started if posthog else 0
1264
+ ctr = (ctas / pvs * 100) if pvs else None
1265
+ conv = None # computed below once `real` is in scope
1266
+ # Canary: real traffic but zero tracked conversion events almost
1267
+ # always means window.posthog was never wired up on the site (e.g.
1268
+ # Fazm newsletter bug where signups worked but nothing fired to
1269
+ # PostHog). Use domain-wide totals so the signal isn't diluted by
1270
+ # the window-scoped funnel numbers above.
1271
+ analytics_suspected_broken = (domain_wide_pv >= 500) and ((domain_wide_signups + domain_wide_sched + domain_wide_get_started) == 0)
1272
+
1273
+ real = bookings.get("real_bookings", 0) if bookings else 0
1274
+ dm_clicks = int(_detail.get("dm_clicks") or 0)
1275
+ dm_bookings = int(_detail.get("dm_bookings") or 0)
1276
+ amplitude_signups = _amplitude_signups(proj, days, env)
1277
+ if not analytics_error:
1278
+ conv = (real / ctas * 100) if ctas else None
1279
+
1280
+ return {
1281
+ "name": name,
1282
+ "posts": {
1283
+ "total": post_stats.get("total", 0),
1284
+ "recent": post_stats.get("recent", 0),
1285
+ "active": post_stats.get("active", 0),
1286
+ "removed": post_stats.get("removed", 0),
1287
+ # Lifetime engagement across ALL posts for this project (kept for context).
1288
+ "upvotes": post_stats.get("total_upvotes", 0),
1289
+ "comments": post_stats.get("total_comments", 0),
1290
+ "views": post_stats.get("total_views", 0),
1291
+ # Window-scoped engagement: only posts created in the last `days`.
1292
+ "upvotes_recent": eng_recent["upvotes"],
1293
+ "comments_recent": eng_recent["comments"],
1294
+ "views_recent": eng_recent["views"] if eng_recent["views_posts"] > 0 else None,
1295
+ # post_clicks_recent: SUM of post_links.clicks for short links
1296
+ # minted for posts in this project's window. Pre-2026-05-07 rows
1297
+ # may include bot prefetches; post-2026-05-07 rows are humans-only
1298
+ # (Twitter card / LinkedIn unfurl / Slack preview filtered at the
1299
+ # resolver via post_link_clicks.is_bot). See server.js /api/top.
1300
+ "post_clicks_recent": eng_recent["post_clicks"],
1301
+ # Period totals: engagement GAINED during the window across ALL
1302
+ # posts (regardless of posted_at), mirroring the Trends-tab
1303
+ # /api/{views,upvotes,comments,clicks}/per-day SUM. The dashboard
1304
+ # renders each as "<scoped> (<period_total>)" in gray brackets.
1305
+ # post_clicks_period_total counts post_link_clicks (is_bot=FALSE)
1306
+ # in the window joined to this project's posts.
1307
+ "upvotes_period_total": eng_period_total["upvotes"],
1308
+ "comments_period_total": eng_period_total["comments"],
1309
+ "views_period_total": eng_period_total["views"],
1310
+ "post_clicks_period_total": eng_period_total["post_clicks"],
1311
+ },
1312
+ "seo": {"pages_recent": seo_pages_recent},
1313
+ "platforms": platforms,
1314
+ "posthog": posthog,
1315
+ "bookings": bookings,
1316
+ "funnel": {
1317
+ "pageviews": pvs,
1318
+ "cta_clicks": ctas,
1319
+ "email_signups": email_signups,
1320
+ "schedule_clicks": schedule_clicks,
1321
+ "get_started_clicks": get_started_clicks,
1322
+ "cross_product_clicks": cross_product_clicks,
1323
+ "real_bookings": real,
1324
+ "dm_clicks": dm_clicks,
1325
+ "dm_bookings": dm_bookings,
1326
+ # Attributed signups on the client's product (Amplitude), filtered
1327
+ # by the UTM source we forward (config.json projects[].amplitude).
1328
+ # null when the project has no `amplitude` block or the fetch
1329
+ # fails — dashboard falls back to get_started_clicks.
1330
+ "amplitude_signups": amplitude_signups,
1331
+ # Filter shape (e.g. {"utm_source": "studyly.io"}) for tooltip;
1332
+ # null when the project has no `amplitude` block.
1333
+ "amplitude_filter": (proj.get("amplitude") or {}).get("attribution_filter") if proj.get("amplitude") else None,
1334
+ "ctr_pct": ctr,
1335
+ "conv_pct": conv,
1336
+ # Domain-wide siblings: the dashboard shows each as "<scoped>
1337
+ # (<domain>)" so "0 pv for mk0r" doesn't hide 62 real visits
1338
+ # that happened to land on older pages.
1339
+ "domain_pageviews": dw_pv_out,
1340
+ "domain_email_signups": dw_signups_out,
1341
+ "domain_schedule_clicks": dw_sched_out,
1342
+ "domain_get_started_clicks": dw_get_started_out,
1343
+ },
1344
+ "analytics_error": analytics_error,
1345
+ "analytics_suspected_broken": analytics_suspected_broken,
1346
+ }
1347
+
1348
+
1349
+ def main():
1350
+ import argparse
1351
+ parser = argparse.ArgumentParser()
1352
+ parser.add_argument("--days", type=int, default=1)
1353
+ parser.add_argument("--project", help="Filter to a single project name")
1354
+ parser.add_argument(
1355
+ "--platform",
1356
+ default="",
1357
+ help=(
1358
+ "Filter to a single platform (twitter|reddit|linkedin|github|moltbook). "
1359
+ "'x' is folded into 'twitter'. Empty / 'all' = no filter. "
1360
+ "Matches the same normalization used by /api/style/stats."
1361
+ ),
1362
+ )
1363
+ parser.add_argument(
1364
+ "--posts-only",
1365
+ action="store_true",
1366
+ help=(
1367
+ "Emit ONLY the per-project posts.* engagement counters; skip the "
1368
+ "PostHog batch (pageviews/CTAs), the bookings DB, Amplitude, and "
1369
+ "SEO page counts. Drops the python runtime from ~30s+ to ~1s. "
1370
+ "Used by /api/funnel/stats as a fast overlay path when the "
1371
+ "dashboard's platform pill changes — those slow sources are "
1372
+ "platform-independent so the all-platform snapshot's values for "
1373
+ "them stay correct, and only the engagement columns need to "
1374
+ "react to the filter."
1375
+ ),
1376
+ )
1377
+ args = parser.parse_args()
1378
+
1379
+ # Normalize platform early; pass empty string when no filter so build_project_entry
1380
+ # can splat it unconditionally without spreading the alias logic everywhere.
1381
+ platform = _normalize_platform(args.platform)
1382
+ # Safety: enforce the same regex /api/funnel/stats accepts so a bad CLI
1383
+ # value can't smuggle SQL through _platform_sql_clause.
1384
+ if platform and not re.match(r"^[a-z0-9_]{1,32}$", platform):
1385
+ print(json.dumps({"error": f"invalid platform: {args.platform!r}"}), file=sys.stdout)
1386
+ sys.exit(1)
1387
+
1388
+ ps.load_env()
1389
+ env = os.environ
1390
+ config = ps.load_config()
1391
+
1392
+ api_key = env.get("POSTHOG_PERSONAL_API_KEY")
1393
+ project_id = env.get("POSTHOG_PROJECT_ID", "330744")
1394
+ bookings_db_url = env.get("BOOKINGS_DATABASE_URL")
1395
+
1396
+ _bridge_per_project_posthog_keys_from_keychain(config, env)
1397
+
1398
+ # Fast path: --posts-only skips the slow PostHog/Amplitude/bookings work
1399
+ # and emits ONLY the per-project posts.* counters. Used as a low-latency
1400
+ # overlay on top of the cached all-platform snapshot when the dashboard's
1401
+ # platform pill changes (see /api/funnel/stats in bin/server.js). Runs
1402
+ # in ~1s instead of ~30s because there are no external HTTP calls AND
1403
+ # the per-project SQL is collapsed into 3 batched GROUP BY queries
1404
+ # (the naive per-project loop pays N x ~180ms Postgres round-trip).
1405
+ if args.posts_only:
1406
+ # posts-only batch: all 8 batched GROUP BY / synthetic queries now run
1407
+ # server-side at /api/v1/stats/posts-batch (HTTP-only). The dicts below
1408
+ # are keyed by project_name with the synthetic NULL bucket under
1409
+ # SYNTHETIC_NO_PROJECT_NAME, matching the prior local computation.
1410
+ from http_api import api_get
1411
+ _batch = (api_get("/api/v1/stats/posts-batch",
1412
+ query={"days": int(args.days), "platform": platform}).get("data") or {})
1413
+ lifetime = _batch.get("lifetime") or {}
1414
+ windowed = _batch.get("windowed") or {}
1415
+ period = _batch.get("period") or {}
1416
+ period_clicks = _batch.get("period_clicks") or {}
1417
+
1418
+ # Project list: real projects from config + the synthetic NULL bucket.
1419
+ proj_list = list(config.get("projects", [])) + [{"name": SYNTHETIC_NO_PROJECT_NAME}]
1420
+ out_projects = []
1421
+ for proj in proj_list:
1422
+ name = proj["name"]
1423
+ if args.project and args.project.lower() != name.lower():
1424
+ continue
1425
+ life = lifetime.get(name) or {}
1426
+ w = windowed.get(name) or {"upvotes": 0, "comments": 0, "views": 0, "views_posts": 0, "post_clicks": 0}
1427
+ pe = period.get(name) or {"upvotes": 0, "comments": 0, "views": 0}
1428
+ out_projects.append({
1429
+ "name": name,
1430
+ "posts": {
1431
+ "total": int(life.get("total", 0)),
1432
+ "recent": int(life.get("recent", 0)),
1433
+ "active": int(life.get("active", 0)),
1434
+ "removed": int(life.get("removed", 0)),
1435
+ "upvotes": int(life.get("total_upvotes", 0)),
1436
+ "comments": int(life.get("total_comments", 0)),
1437
+ "views": int(life.get("total_views", 0)),
1438
+ "upvotes_recent": w["upvotes"],
1439
+ "comments_recent": w["comments"],
1440
+ "views_recent": w["views"] if w["views_posts"] > 0 else None,
1441
+ "post_clicks_recent": w["post_clicks"],
1442
+ "upvotes_period_total": pe["upvotes"],
1443
+ "comments_period_total": pe["comments"],
1444
+ "views_period_total": pe["views"],
1445
+ "post_clicks_period_total": int(period_clicks.get(name, 0)),
1446
+ },
1447
+ })
1448
+ print(json.dumps({
1449
+ "generated_at": datetime.now(timezone.utc).isoformat(),
1450
+ "days": args.days,
1451
+ "platform": platform or "all",
1452
+ "posts_only": True,
1453
+ "projects": out_projects,
1454
+ }))
1455
+ return
1456
+
1457
+ if not api_key:
1458
+ print(json.dumps({"error": "POSTHOG_PERSONAL_API_KEY not set"}), file=sys.stdout)
1459
+ sys.exit(1)
1460
+
1461
+ # Per-project main-DB + bookings-DB stats now come from HTTP endpoints
1462
+ # (build_project_entry calls /api/v1/stats/project-detail). No direct
1463
+ # Postgres connection is opened here anymore (HTTP-only, 2026-06-01).
1464
+ selected_projects = []
1465
+ for proj in config.get("projects", []):
1466
+ name = proj["name"]
1467
+ if args.project and args.project.lower() != name.lower():
1468
+ continue
1469
+ selected_projects.append(proj)
1470
+
1471
+ # Synthetic '(no project)' bucket: surfaces posts.project_name IS NULL rows
1472
+ # (e.g. IG drafts that landed without a project tag) so the funnel total
1473
+ # lines up with /api/style/stats. No website/landing_pages/posthog block,
1474
+ # so get_project_domains() returns [] -> PostHog/SEO/booking lookups all
1475
+ # become no-ops; per-project SQL helpers route through _project_filter_sql
1476
+ # to use `IS NULL` instead of `= name`.
1477
+ if not args.project or args.project.lower() == SYNTHETIC_NO_PROJECT_NAME.lower():
1478
+ selected_projects.append({"name": SYNTHETIC_NO_PROJECT_NAME})
1479
+
1480
+ # Group domains by (api_key, project_id) so we issue one batched set of
1481
+ # HogQL calls per PostHog bucket instead of one-per-domain. Projects that
1482
+ # share a bucket collapse into a single batched fetch; projects with
1483
+ # dedicated credentials run in their own bucket concurrently.
1484
+ after = (datetime.now(timezone.utc) - timedelta(days=args.days)).strftime("%Y-%m-%dT%H:%M:%S")
1485
+ buckets = {}
1486
+ for proj in selected_projects:
1487
+ domains = ps.get_project_domains(proj)
1488
+ if not domains:
1489
+ continue
1490
+ ph_over = proj.get("posthog", {}) or {}
1491
+ ph_key = env.get(ph_over.get("api_key_env", ""), api_key)
1492
+ ph_pid_proj = ph_over.get("project_id", project_id)
1493
+ bucket_domains = buckets.setdefault((ph_key, ph_pid_proj), set())
1494
+ for d in domains:
1495
+ bucket_domains.add(d)
1496
+
1497
+ # One batched fetch per bucket. When a batch fails after retries, mark
1498
+ # every domain in that bucket as errored rather than rendering zeros.
1499
+ #
1500
+ # Concurrency is capped low (2) on purpose: PostHog's query endpoint
1501
+ # enforces a short-window burst limit (429 "throttled", recovery 1-12s),
1502
+ # and the personal API key is shared across most buckets. Firing 8
1503
+ # buckets at once (each ~10 sequential HogQL queries) created a
1504
+ # thundering herd that all hit the limiter together, all backed off
1505
+ # together, and re-collided on retry until the 4 attempts were
1506
+ # exhausted, marking whole buckets errored ('err' on the dashboard for
1507
+ # every project sharing them). Two-at-a-time keeps us under the burst
1508
+ # ceiling while the Retry-After-honoring backoff in _hogql absorbs the
1509
+ # occasional 429.
1510
+ ph_results = {}
1511
+ if buckets:
1512
+ pool_size = max(1, min(2, len(buckets)))
1513
+ with ThreadPoolExecutor(max_workers=pool_size) as ex:
1514
+ futs = {
1515
+ ex.submit(_ph_batch_counts, k, pid, sorted(ds), after): (k, pid, ds)
1516
+ for (k, pid), ds in buckets.items()
1517
+ }
1518
+ for fut, (k, pid, ds) in futs.items():
1519
+ try:
1520
+ per_domain = fut.result()
1521
+ for d, stats in per_domain.items():
1522
+ ph_results[(k, pid, d)] = stats
1523
+ except HogqlError as e:
1524
+ msg = f"PostHog unavailable: {e}"
1525
+ print(f" PostHog batch error (pid={pid}): {e}", file=sys.stderr)
1526
+ for d in ds:
1527
+ ph_results[(k, pid, d)] = _empty_domain_stats(d, error=msg)
1528
+ except Exception as e:
1529
+ msg = f"PostHog batch error: {e}"
1530
+ print(f" PostHog batch unexpected error (pid={pid}): {e}", file=sys.stderr)
1531
+ for d in ds:
1532
+ ph_results[(k, pid, d)] = _empty_domain_stats(d, error=msg)
1533
+
1534
+ out_projects = []
1535
+ for proj in selected_projects:
1536
+ name = proj["name"]
1537
+ try:
1538
+ out_projects.append(build_project_entry(
1539
+ proj, args.days, api_key, project_id, env, ph_results,
1540
+ platform=platform,
1541
+ ))
1542
+ except Exception as e:
1543
+ out_projects.append({"name": name, "error": str(e)})
1544
+
1545
+ # `overall.recent` also respects the platform filter so the dashboard's
1546
+ # "N project(s)" / total header stays self-consistent with the per-row data.
1547
+ from http_api import api_get
1548
+ _overall = (api_get("/api/v1/stats/posts-overall",
1549
+ query={"days": int(args.days), "platform": platform or ""}).get("data") or {})
1550
+ total_all = int(_overall.get("total") or 0)
1551
+ total_recent = int(_overall.get("recent") or 0)
1552
+
1553
+ print(json.dumps({
1554
+ "generated_at": datetime.now(timezone.utc).isoformat(),
1555
+ "days": args.days,
1556
+ "platform": platform or "all",
1557
+ "projects": out_projects,
1558
+ "overall": {"total": total_all, "recent": total_recent},
1559
+ }))
1560
+
1561
+
1562
+ if __name__ == "__main__":
1563
+ main()