@m13v/s4l 1.6.197-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1336 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +513 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,1563 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""JSON wrapper around project_stats.py for the dashboard /api/funnel/stats endpoint.
|
|
3
|
+
|
|
4
|
+
Emits a single JSON object on stdout: { generated_at, days, projects: [ ... ], overall }.
|
|
5
|
+
Keeps project_stats.py untouched (it is chflags uchg-locked).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import socket
|
|
12
|
+
import sys
|
|
13
|
+
import time
|
|
14
|
+
import urllib.error
|
|
15
|
+
import urllib.parse
|
|
16
|
+
import urllib.request
|
|
17
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
18
|
+
from datetime import datetime, timedelta, timezone
|
|
19
|
+
|
|
20
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
21
|
+
|
|
22
|
+
import project_stats as ps
|
|
23
|
+
from project_slugs import bookings_require_utm as _bookings_require_utm
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_PAGE_FILENAMES = ("page.tsx", "page.ts", "page.jsx", "page.js", "page.mdx", "page.md")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _normalize_platform(p):
|
|
30
|
+
"""Lowercase + alias 'x' -> 'twitter'. Empty / 'all' / None -> '' (no filter).
|
|
31
|
+
|
|
32
|
+
Matches the same normalization used by /api/style/stats so the
|
|
33
|
+
project final stats table speaks the same vocabulary as the
|
|
34
|
+
engagement-style table when the dashboard's platform pill is set.
|
|
35
|
+
"""
|
|
36
|
+
if not p:
|
|
37
|
+
return ""
|
|
38
|
+
v = str(p).strip().lower()
|
|
39
|
+
if v in ("", "all"):
|
|
40
|
+
return ""
|
|
41
|
+
return "twitter" if v == "x" else v
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _platform_sql_clause(platform, table_alias=""):
|
|
45
|
+
"""Return an SQL fragment (string, no placeholders) that:
|
|
46
|
+
|
|
47
|
+
1. Filters to the given platform when one is provided (empty = no filter).
|
|
48
|
+
|
|
49
|
+
Mentions live in the dedicated `mentions` table now (2026-05-23 cutover);
|
|
50
|
+
no posts-level filter needed. Previously this clause excluded placeholder
|
|
51
|
+
`posts` rows where our_content = '(mention - no original post)', which is
|
|
52
|
+
no longer present after migrate_mentions_out_of_posts.py --commit-delete.
|
|
53
|
+
|
|
54
|
+
Folds the 'x' -> 'twitter' alias inside the SQL so reddit/linkedin/twitter
|
|
55
|
+
all just work. Caller is responsible for placement inside the WHERE.
|
|
56
|
+
"""
|
|
57
|
+
if not platform:
|
|
58
|
+
return ""
|
|
59
|
+
prefix = (table_alias + ".") if table_alias else ""
|
|
60
|
+
# Safe: platform has already passed the [a-z0-9_]{1,32} regex in the caller.
|
|
61
|
+
return (
|
|
62
|
+
" AND LOWER(CASE WHEN LOWER(" + prefix + "platform)='x' "
|
|
63
|
+
"THEN 'twitter' ELSE " + prefix + "platform END) = '" + platform + "'"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# Synthetic project name for rows in `posts` where project_name IS NULL.
|
|
68
|
+
# Keeps off-config / un-tagged posts visible on the dashboard without
|
|
69
|
+
# polluting real project rows. Chosen to be unambiguous and distinct from
|
|
70
|
+
# any historical 'General' rows the table once had.
|
|
71
|
+
SYNTHETIC_NO_PROJECT_NAME = "(no project)"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _project_filter_sql(proj_name, table_alias="p"):
|
|
75
|
+
"""Return (clause, params) for a per-project WHERE filter.
|
|
76
|
+
|
|
77
|
+
Real projects -> "<alias>.project_name = %s" with (proj_name,).
|
|
78
|
+
Synthetic "(no project)" bucket -> "<alias>.project_name IS NULL" with ().
|
|
79
|
+
Centralizes the NULL-vs-equality choice so every per-project SQL site
|
|
80
|
+
handles the synthetic bucket the same way.
|
|
81
|
+
"""
|
|
82
|
+
prefix = (table_alias + ".") if table_alias else ""
|
|
83
|
+
if proj_name == SYNTHETIC_NO_PROJECT_NAME:
|
|
84
|
+
return (prefix + "project_name IS NULL", ())
|
|
85
|
+
return (prefix + "project_name = %s", (proj_name,))
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _bridge_per_project_posthog_keys_from_keychain(config, env):
|
|
89
|
+
import subprocess
|
|
90
|
+
seen = set()
|
|
91
|
+
for proj in config.get("projects", []) or []:
|
|
92
|
+
name_env = ((proj.get("posthog") or {}).get("api_key_env") or "").strip()
|
|
93
|
+
if not name_env or name_env in seen or name_env == "POSTHOG_PERSONAL_API_KEY":
|
|
94
|
+
continue
|
|
95
|
+
seen.add(name_env)
|
|
96
|
+
if env.get(name_env):
|
|
97
|
+
continue
|
|
98
|
+
try:
|
|
99
|
+
v = subprocess.check_output(
|
|
100
|
+
["security", "find-generic-password", "-s", name_env, "-w"],
|
|
101
|
+
stderr=subprocess.DEVNULL,
|
|
102
|
+
).decode().strip()
|
|
103
|
+
except subprocess.CalledProcessError:
|
|
104
|
+
continue
|
|
105
|
+
if v:
|
|
106
|
+
env[name_env] = v
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _scan_repo_pages(repo_path):
|
|
110
|
+
"""Walk a Next.js app-router repo and return URL paths we ship as static files.
|
|
111
|
+
|
|
112
|
+
Skips dynamic segments ([slug], [...rest]), route groups ((group)), private
|
|
113
|
+
folders (_foo), and parallel-route slots (@slot) per Next.js conventions.
|
|
114
|
+
Route groups collapse to nothing; dynamic segments exclude the whole branch.
|
|
115
|
+
"""
|
|
116
|
+
out = set()
|
|
117
|
+
if not repo_path:
|
|
118
|
+
return out
|
|
119
|
+
repo = os.path.expanduser(repo_path)
|
|
120
|
+
app_roots = [
|
|
121
|
+
os.path.join(repo, "src", "app"),
|
|
122
|
+
os.path.join(repo, "app"),
|
|
123
|
+
]
|
|
124
|
+
for root in app_roots:
|
|
125
|
+
if not os.path.isdir(root):
|
|
126
|
+
continue
|
|
127
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
128
|
+
rel = os.path.relpath(dirpath, root)
|
|
129
|
+
segs = [] if rel == "." else rel.split(os.sep)
|
|
130
|
+
if any(s.startswith(("[", "_", "@")) for s in segs):
|
|
131
|
+
dirnames[:] = []
|
|
132
|
+
continue
|
|
133
|
+
dirnames[:] = [d for d in dirnames if not d.startswith(("[", "_", "@", "."))
|
|
134
|
+
and d not in ("node_modules",)]
|
|
135
|
+
has_page = any(f in _PAGE_FILENAMES for f in filenames)
|
|
136
|
+
if has_page:
|
|
137
|
+
url_segs = [s for s in segs if not (s.startswith("(") and s.endswith(")"))]
|
|
138
|
+
path = "/" + "/".join(url_segs) if url_segs else "/"
|
|
139
|
+
out.add(path)
|
|
140
|
+
return out
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _db_created_pages(conn, product_name, days=None):
|
|
144
|
+
"""Return {domain: set(paths)} for pages this project published via the SEO
|
|
145
|
+
pipelines (seo_keywords) or GSC-driven page generation (gsc_queries).
|
|
146
|
+
|
|
147
|
+
When `days` is set, restrict to pages whose `completed_at` falls inside the
|
|
148
|
+
window. The seo_keywords / gsc_queries rows get `completed_at` stamped when
|
|
149
|
+
the page is actually generated, so this matches "pages created in the last
|
|
150
|
+
N days" as used by the dashboard's period selector.
|
|
151
|
+
"""
|
|
152
|
+
out = {}
|
|
153
|
+
window_sql = ""
|
|
154
|
+
if days is not None:
|
|
155
|
+
window_sql = f" AND completed_at >= NOW() - INTERVAL '{int(days)} days'"
|
|
156
|
+
for sql in (
|
|
157
|
+
"SELECT page_url FROM seo_keywords WHERE product = %s AND page_url IS NOT NULL" + window_sql,
|
|
158
|
+
"SELECT page_url FROM gsc_queries WHERE product = %s AND page_url IS NOT NULL" + window_sql,
|
|
159
|
+
):
|
|
160
|
+
try:
|
|
161
|
+
cur = conn.execute(sql, (product_name,))
|
|
162
|
+
for row in cur.fetchall():
|
|
163
|
+
url = row[0]
|
|
164
|
+
if not url:
|
|
165
|
+
continue
|
|
166
|
+
try:
|
|
167
|
+
parsed = urllib.parse.urlparse(url)
|
|
168
|
+
except Exception:
|
|
169
|
+
continue
|
|
170
|
+
host = (parsed.netloc or "").lower()
|
|
171
|
+
path = parsed.path or "/"
|
|
172
|
+
while len(path) > 1 and path.endswith("/"):
|
|
173
|
+
path = path[:-1]
|
|
174
|
+
if not host:
|
|
175
|
+
continue
|
|
176
|
+
out.setdefault(host, set()).add(path)
|
|
177
|
+
except Exception as e:
|
|
178
|
+
print(f" _db_created_pages query error: {e}", file=sys.stderr)
|
|
179
|
+
return out
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _created_paths_for_project(conn, proj, days=None):
|
|
183
|
+
"""Return {domain: set(paths)} of pages we created for this project.
|
|
184
|
+
|
|
185
|
+
Source-of-truth union: filesystem scan of the project's landing-pages repo
|
|
186
|
+
(applies to every domain the project owns) plus any URLs logged in
|
|
187
|
+
seo_keywords / gsc_queries (keyed by their own host).
|
|
188
|
+
|
|
189
|
+
When `days` is set, the filesystem scan is skipped entirely — static page
|
|
190
|
+
files on disk carry no creation timestamp we can trust, so a window-scoped
|
|
191
|
+
"pages created in the last N days" answer has to come from the DB alone.
|
|
192
|
+
"""
|
|
193
|
+
by_domain = {}
|
|
194
|
+
domains = ps.get_project_domains(proj) or []
|
|
195
|
+
if days is None:
|
|
196
|
+
lp = proj.get("landing_pages") or {}
|
|
197
|
+
repo_path = lp.get("repo") if isinstance(lp, dict) else None
|
|
198
|
+
fs_paths = _scan_repo_pages(repo_path) if repo_path else set()
|
|
199
|
+
for d in domains:
|
|
200
|
+
by_domain.setdefault(d.lower(), set()).update(fs_paths)
|
|
201
|
+
for host, paths in _db_created_pages(conn, proj.get("name") or "", days=days).items():
|
|
202
|
+
by_domain.setdefault(host, set()).update(paths)
|
|
203
|
+
return by_domain
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _norm_path(p):
|
|
207
|
+
"""Match the frontend `normPath` in bin/server.js so PostHog pathnames
|
|
208
|
+
(`properties.$pathname`) and DB-derived created paths compare cleanly.
|
|
209
|
+
"""
|
|
210
|
+
s = str(p or "/")
|
|
211
|
+
if not s.startswith("/"):
|
|
212
|
+
s = "/" + s
|
|
213
|
+
while len(s) > 1 and s.endswith("/"):
|
|
214
|
+
s = s[:-1]
|
|
215
|
+
return s
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
# HogQL-based PostHog query layer.
|
|
219
|
+
#
|
|
220
|
+
# project_stats.py uses the events LIST endpoint with limit=1000 and no
|
|
221
|
+
# pagination, so any (domain, event) that exceeds 1000 occurrences in the
|
|
222
|
+
# window silently caps at 1000 and misreports the funnel. We swap that out
|
|
223
|
+
# for HogQL aggregate queries (COUNT/GROUP BY), which return the true
|
|
224
|
+
# totals in a single call per query.
|
|
225
|
+
_SAFE_DOMAIN_RE = re.compile(r"^[A-Za-z0-9._-]+$")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class HogqlError(Exception):
|
|
229
|
+
"""Raised when a HogQL query fails after all retries.
|
|
230
|
+
|
|
231
|
+
Caller is expected to surface this as an error on the affected rows
|
|
232
|
+
instead of silently rendering zeros.
|
|
233
|
+
"""
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
_RETRY_BACKOFF_S = (2.0, 5.0, 12.0)
|
|
237
|
+
_RETRY_AFTER_CAP_S = 30.0
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _hogql(api_key, project_id, query, timeout=120, max_attempts=4):
|
|
241
|
+
"""Run a HogQL query against /api/projects/{pid}/query/.
|
|
242
|
+
|
|
243
|
+
Retries on 429 (throttled), 5xx, and socket read timeouts. Honors
|
|
244
|
+
`Retry-After` up to `_RETRY_AFTER_CAP_S`; otherwise uses
|
|
245
|
+
`_RETRY_BACKOFF_S`. Raises `HogqlError` on permanent failure so
|
|
246
|
+
callers can mark rows as errored rather than zero.
|
|
247
|
+
|
|
248
|
+
NOTE: the batched-by-$host queries cover many domains in one scan, so a
|
|
249
|
+
single query for a large shared PostHog bucket (e.g. pid 330744 with
|
|
250
|
+
~18 projects) can run >60s on a cold cache. A socket read timeout
|
|
251
|
+
surfaces as `socket.timeout`/`TimeoutError`, which is a sibling of
|
|
252
|
+
`urllib.error.URLError` (both subclass OSError), so it must be caught
|
|
253
|
+
explicitly; otherwise it escapes this retry loop and the caller marks
|
|
254
|
+
the entire bucket as errored on the very first slow query, rendering
|
|
255
|
+
'err' for every project sharing that bucket.
|
|
256
|
+
"""
|
|
257
|
+
url = f"https://us.posthog.com/api/projects/{project_id}/query/"
|
|
258
|
+
body = json.dumps({"query": {"kind": "HogQLQuery", "query": query}}).encode("utf-8")
|
|
259
|
+
last_err = None
|
|
260
|
+
for attempt in range(max_attempts):
|
|
261
|
+
req = urllib.request.Request(url, data=body, method="POST", headers={
|
|
262
|
+
"Authorization": f"Bearer {api_key}",
|
|
263
|
+
"Content-Type": "application/json",
|
|
264
|
+
})
|
|
265
|
+
try:
|
|
266
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
267
|
+
data = json.loads(resp.read())
|
|
268
|
+
return data.get("results", []) or []
|
|
269
|
+
except urllib.error.HTTPError as e:
|
|
270
|
+
try:
|
|
271
|
+
detail = e.read().decode("utf-8", errors="replace")[:300]
|
|
272
|
+
except Exception:
|
|
273
|
+
detail = ""
|
|
274
|
+
last_err = f"HTTP {e.code}: {detail}"
|
|
275
|
+
retryable = (e.code == 429) or (500 <= e.code < 600)
|
|
276
|
+
if not retryable or attempt == max_attempts - 1:
|
|
277
|
+
print(f" HogQL HTTPError {e.code}: {detail} | query={query[:120]}", file=sys.stderr)
|
|
278
|
+
break
|
|
279
|
+
wait = _RETRY_BACKOFF_S[min(attempt, len(_RETRY_BACKOFF_S) - 1)]
|
|
280
|
+
try:
|
|
281
|
+
ra = e.headers.get("Retry-After") if e.headers else None
|
|
282
|
+
if ra is not None:
|
|
283
|
+
wait = min(_RETRY_AFTER_CAP_S, max(wait, float(ra)))
|
|
284
|
+
except Exception:
|
|
285
|
+
pass
|
|
286
|
+
print(f" HogQL {e.code} retry {attempt + 1}/{max_attempts - 1} in {wait:.1f}s | query={query[:80]}", file=sys.stderr)
|
|
287
|
+
time.sleep(wait)
|
|
288
|
+
continue
|
|
289
|
+
except (socket.timeout, TimeoutError) as e:
|
|
290
|
+
# Read timeout on a heavy batched query. Retryable: a retry
|
|
291
|
+
# often hits a warm cache and returns in time. Caught before
|
|
292
|
+
# URLError because TimeoutError is NOT a URLError subclass.
|
|
293
|
+
last_err = f"read timeout after {timeout}s: {e}"
|
|
294
|
+
if attempt == max_attempts - 1:
|
|
295
|
+
print(f" HogQL timeout (>{timeout}s): {e} | query={query[:120]}", file=sys.stderr)
|
|
296
|
+
break
|
|
297
|
+
wait = _RETRY_BACKOFF_S[min(attempt, len(_RETRY_BACKOFF_S) - 1)]
|
|
298
|
+
print(f" HogQL timeout retry {attempt + 1}/{max_attempts - 1} in {wait:.1f}s | query={query[:80]}", file=sys.stderr)
|
|
299
|
+
time.sleep(wait)
|
|
300
|
+
continue
|
|
301
|
+
except urllib.error.URLError as e:
|
|
302
|
+
# A URLError can also wrap a socket.timeout (e.reason). Treat
|
|
303
|
+
# those as the retryable timeout case above.
|
|
304
|
+
last_err = f"URLError: {e}"
|
|
305
|
+
if attempt == max_attempts - 1:
|
|
306
|
+
print(f" HogQL URLError: {e} | query={query[:120]}", file=sys.stderr)
|
|
307
|
+
break
|
|
308
|
+
wait = _RETRY_BACKOFF_S[min(attempt, len(_RETRY_BACKOFF_S) - 1)]
|
|
309
|
+
print(f" HogQL URLError retry {attempt + 1}/{max_attempts - 1} in {wait:.1f}s: {e}", file=sys.stderr)
|
|
310
|
+
time.sleep(wait)
|
|
311
|
+
continue
|
|
312
|
+
raise HogqlError(last_err or "unknown HogQL failure")
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _empty_domain_stats(domain, error=None):
|
|
316
|
+
"""Zero'd per-domain stats. If `error` is set, treat the zeros as
|
|
317
|
+
UNKNOWN (not truly 0) so the dashboard can render an error cell
|
|
318
|
+
instead of silently misreporting."""
|
|
319
|
+
out = {
|
|
320
|
+
"pageviews": 0,
|
|
321
|
+
"cta_clicks": 0,
|
|
322
|
+
"email_signups": 0,
|
|
323
|
+
"schedule_clicks": 0,
|
|
324
|
+
"get_started_clicks": 0,
|
|
325
|
+
"cross_product_clicks": 0,
|
|
326
|
+
"pageview_details": {domain: {
|
|
327
|
+
"total": 0,
|
|
328
|
+
"top_pages": {},
|
|
329
|
+
"top_pages_signups": {},
|
|
330
|
+
"top_pages_schedule": {},
|
|
331
|
+
"top_pages_get_started": {},
|
|
332
|
+
}},
|
|
333
|
+
"cta_details": [],
|
|
334
|
+
}
|
|
335
|
+
if error:
|
|
336
|
+
out["error"] = error
|
|
337
|
+
return out
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# Legacy + canonical event names for the "get started" click. Fazm fires
|
|
341
|
+
# `download_click`, Assrt fires `cta_get_started_clicked`, new sites fire
|
|
342
|
+
# `get_started_click`. Collapsed back to a single name once both old sites
|
|
343
|
+
# migrate to trackGetStartedClick.
|
|
344
|
+
_GET_STARTED_EVENTS = "('get_started_click', 'download_click', 'cta_get_started_clicked')"
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def _ph_batch_counts(api_key, project_id, domains, after_iso):
|
|
348
|
+
"""Fetch per-domain PostHog aggregates for every `domain` in one batched
|
|
349
|
+
pass against a single (api_key, project_id) bucket.
|
|
350
|
+
|
|
351
|
+
The previous implementation fired ~10 HogQL queries per domain, which
|
|
352
|
+
fanned out to 100+ concurrent requests and tripped PostHog's rate
|
|
353
|
+
limiter; throttled calls silently returned 0, misreporting every
|
|
354
|
+
project except the one with its own dedicated API key.
|
|
355
|
+
|
|
356
|
+
This version groups each aggregate by `properties.$host`, so one query
|
|
357
|
+
covers every domain in the bucket. Returns `{domain: stats_dict}` in
|
|
358
|
+
the same shape the old per-domain function produced. On permanent
|
|
359
|
+
HogQL failure, raises `HogqlError` so the caller can mark rows as
|
|
360
|
+
errored rather than rendering a misleading zero.
|
|
361
|
+
"""
|
|
362
|
+
result = {d: _empty_domain_stats(d) for d in domains}
|
|
363
|
+
safe_domains = []
|
|
364
|
+
for d in domains:
|
|
365
|
+
if _SAFE_DOMAIN_RE.match(d or ""):
|
|
366
|
+
safe_domains.append(d)
|
|
367
|
+
else:
|
|
368
|
+
print(f" skip unsafe domain: {d!r}", file=sys.stderr)
|
|
369
|
+
result[d]["error"] = "unsafe domain"
|
|
370
|
+
if not safe_domains:
|
|
371
|
+
return result
|
|
372
|
+
|
|
373
|
+
after_str = (after_iso or "").replace("T", " ")
|
|
374
|
+
if not after_str:
|
|
375
|
+
return result
|
|
376
|
+
|
|
377
|
+
in_list = ", ".join(f"'{d}'" for d in safe_domains)
|
|
378
|
+
|
|
379
|
+
def _count_by_host(event_clause, distinct_key=None):
|
|
380
|
+
# Pass `distinct_key` (e.g. "properties.email") to dedupe across
|
|
381
|
+
# double-fired events for the same conversion. Used for email
|
|
382
|
+
# signups where both `newsletter_subscribed` (client) and
|
|
383
|
+
# `newsletter_subscribed_server` (server) fire for one submission.
|
|
384
|
+
count_expr = (
|
|
385
|
+
f"count(DISTINCT {distinct_key}) AS c"
|
|
386
|
+
if distinct_key
|
|
387
|
+
else "count() AS c"
|
|
388
|
+
)
|
|
389
|
+
q = (
|
|
390
|
+
f"SELECT properties.$host AS host, {count_expr} FROM events "
|
|
391
|
+
f"WHERE {event_clause} "
|
|
392
|
+
f"AND properties.$host IN ({in_list}) "
|
|
393
|
+
f"AND timestamp >= toDateTime('{after_str}') "
|
|
394
|
+
"GROUP BY host"
|
|
395
|
+
)
|
|
396
|
+
rows = _hogql(api_key, project_id, q)
|
|
397
|
+
return {r[0]: int(r[1]) for r in (rows or []) if r and r[0]}
|
|
398
|
+
|
|
399
|
+
def _top_pages_by_host(event_clause, row_cap=5000, distinct_key="distinct_id"):
|
|
400
|
+
# All per-page breakdowns count unique users (distinct_id) rather than
|
|
401
|
+
# raw events. A visitor that views the same /pricing twice or rage-
|
|
402
|
+
# clicks the same CTA still counts as 1. Pass `distinct_key=None` to
|
|
403
|
+
# opt back into raw count() for legacy callers.
|
|
404
|
+
count_expr = (
|
|
405
|
+
f"count(DISTINCT {distinct_key}) AS c"
|
|
406
|
+
if distinct_key
|
|
407
|
+
else "count() AS c"
|
|
408
|
+
)
|
|
409
|
+
q = (
|
|
410
|
+
f"SELECT properties.$host AS host, properties.$pathname AS path, {count_expr} FROM events "
|
|
411
|
+
f"WHERE {event_clause} "
|
|
412
|
+
f"AND properties.$host IN ({in_list}) "
|
|
413
|
+
f"AND timestamp >= toDateTime('{after_str}') "
|
|
414
|
+
f"GROUP BY host, path ORDER BY c DESC LIMIT {int(row_cap)}"
|
|
415
|
+
)
|
|
416
|
+
rows = _hogql(api_key, project_id, q)
|
|
417
|
+
out = {d: {} for d in safe_domains}
|
|
418
|
+
for r in (rows or []):
|
|
419
|
+
host = r[0] if len(r) > 0 else None
|
|
420
|
+
path = r[1] if len(r) > 1 and r[1] else "/"
|
|
421
|
+
cnt = int(r[2]) if len(r) > 2 else 0
|
|
422
|
+
if host in out:
|
|
423
|
+
out[host][path] = cnt
|
|
424
|
+
return out
|
|
425
|
+
|
|
426
|
+
# Email signups: client `newsletter_subscribed` is ad-blocker-lossy
|
|
427
|
+
# (~57% capture). Server-side `newsletter_subscribed_server` (added in
|
|
428
|
+
# @m13v/seo-components v0.38) is ground truth. Count both with DISTINCT
|
|
429
|
+
# email so a client + server pair for the same submission collapses to one.
|
|
430
|
+
_SIGNUP_CLAUSE = (
|
|
431
|
+
"event IN ('newsletter_subscribed', 'newsletter_subscribed_server')"
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
# Visitors, not raw pageviews. Globally consistent with every other
|
|
435
|
+
# column in this batch (cta_clicks, schedule_clicks, get_started_clicks,
|
|
436
|
+
# cross_product_clicks, email_signups all count unique users). A visitor
|
|
437
|
+
# bouncing between /pricing and /docs still counts as 1.
|
|
438
|
+
#
|
|
439
|
+
# PERF/RATE-LIMIT: PostHog throttles the (shared) personal API key
|
|
440
|
+
# org-wide on the /query endpoint (429 "throttled"). The five
|
|
441
|
+
# unique-visitor counts that all dedupe on distinct_id are folded into
|
|
442
|
+
# ONE grouped query (bucketed by event via multiIf) instead of five
|
|
443
|
+
# separate HogQL calls. Email signups stay on their own query because
|
|
444
|
+
# they dedupe on coalesce(email, distinct_id), a different key. This
|
|
445
|
+
# halves the per-bucket request count, which is what was blowing the
|
|
446
|
+
# rate limit when all windows/buckets fired at once.
|
|
447
|
+
def _multi_count_by_host():
|
|
448
|
+
bucket_expr = (
|
|
449
|
+
"multiIf("
|
|
450
|
+
"event = '$pageview', 'pv', "
|
|
451
|
+
"event = 'cta_click', 'cta', "
|
|
452
|
+
"event = 'schedule_click', 'sched', "
|
|
453
|
+
f"event IN {_GET_STARTED_EVENTS}, 'gs', "
|
|
454
|
+
"'cross')"
|
|
455
|
+
)
|
|
456
|
+
q = (
|
|
457
|
+
f"SELECT properties.$host AS host, {bucket_expr} AS bkt, "
|
|
458
|
+
"count(DISTINCT distinct_id) AS c FROM events "
|
|
459
|
+
"WHERE event IN ('$pageview', 'cta_click', 'schedule_click', "
|
|
460
|
+
"'cross_product_click', 'get_started_click', 'download_click', "
|
|
461
|
+
"'cta_get_started_clicked') "
|
|
462
|
+
f"AND properties.$host IN ({in_list}) "
|
|
463
|
+
f"AND timestamp >= toDateTime('{after_str}') "
|
|
464
|
+
"GROUP BY host, bkt"
|
|
465
|
+
)
|
|
466
|
+
rows = _hogql(api_key, project_id, q)
|
|
467
|
+
out = {"pv": {}, "cta": {}, "sched": {}, "gs": {}, "cross": {}}
|
|
468
|
+
for r in (rows or []):
|
|
469
|
+
host = r[0] if len(r) > 0 else None
|
|
470
|
+
bkt = r[1] if len(r) > 1 else None
|
|
471
|
+
cnt = int(r[2]) if len(r) > 2 else 0
|
|
472
|
+
if host and bkt in out:
|
|
473
|
+
out[bkt][host] = cnt
|
|
474
|
+
return out
|
|
475
|
+
|
|
476
|
+
_counts = _multi_count_by_host()
|
|
477
|
+
pv_total = _counts["pv"]
|
|
478
|
+
cta_total = _counts["cta"]
|
|
479
|
+
sched_total = _counts["sched"]
|
|
480
|
+
# Get Started = unique users who took the conversion action, not raw clicks.
|
|
481
|
+
get_started_total = _counts["gs"]
|
|
482
|
+
cross_product_total = _counts["cross"]
|
|
483
|
+
# Email signups: client `newsletter_subscribed` is ad-blocker-lossy
|
|
484
|
+
# (~57% capture). Server-side `newsletter_subscribed_server` is ground
|
|
485
|
+
# truth. Count both with DISTINCT email so a client + server pair for the
|
|
486
|
+
# same submission collapses to one. Kept as its own query (distinct key
|
|
487
|
+
# differs from the distinct_id batch above).
|
|
488
|
+
signup_total = _count_by_host(
|
|
489
|
+
_SIGNUP_CLAUSE,
|
|
490
|
+
distinct_key="coalesce(properties.email, distinct_id)",
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
# Per-page breakdowns. The big $pageview scan keeps its own large cap;
|
|
494
|
+
# the three low-volume conversion breakdowns (signup/sched/get_started,
|
|
495
|
+
# all distinct_id) fold into one grouped-by-event query for the same
|
|
496
|
+
# rate-limit reason as the counts above.
|
|
497
|
+
top_pv = _top_pages_by_host("event = '$pageview'", row_cap=5000)
|
|
498
|
+
|
|
499
|
+
def _multi_top_pages_small():
|
|
500
|
+
bucket_expr = (
|
|
501
|
+
"multiIf("
|
|
502
|
+
"event = 'schedule_click', 'sched', "
|
|
503
|
+
f"event IN {_GET_STARTED_EVENTS}, 'gs', "
|
|
504
|
+
"'signup')"
|
|
505
|
+
)
|
|
506
|
+
q = (
|
|
507
|
+
f"SELECT properties.$host AS host, properties.$pathname AS path, "
|
|
508
|
+
f"{bucket_expr} AS bkt, count(DISTINCT distinct_id) AS c FROM events "
|
|
509
|
+
"WHERE event IN ('schedule_click', 'newsletter_subscribed', "
|
|
510
|
+
"'newsletter_subscribed_server', 'get_started_click', "
|
|
511
|
+
"'download_click', 'cta_get_started_clicked') "
|
|
512
|
+
f"AND properties.$host IN ({in_list}) "
|
|
513
|
+
f"AND timestamp >= toDateTime('{after_str}') "
|
|
514
|
+
"GROUP BY host, path, bkt ORDER BY c DESC LIMIT 1500"
|
|
515
|
+
)
|
|
516
|
+
rows = _hogql(api_key, project_id, q)
|
|
517
|
+
out = {
|
|
518
|
+
"signup": {d: {} for d in safe_domains},
|
|
519
|
+
"sched": {d: {} for d in safe_domains},
|
|
520
|
+
"gs": {d: {} for d in safe_domains},
|
|
521
|
+
}
|
|
522
|
+
for r in (rows or []):
|
|
523
|
+
host = r[0] if len(r) > 0 else None
|
|
524
|
+
path = r[1] if len(r) > 1 and r[1] else "/"
|
|
525
|
+
bkt = r[2] if len(r) > 2 else None
|
|
526
|
+
cnt = int(r[3]) if len(r) > 3 else 0
|
|
527
|
+
if bkt in out and host in out[bkt]:
|
|
528
|
+
out[bkt][host][path] = cnt
|
|
529
|
+
return out
|
|
530
|
+
|
|
531
|
+
_tp = _multi_top_pages_small()
|
|
532
|
+
top_signup = _tp["signup"]
|
|
533
|
+
top_sched = _tp["sched"]
|
|
534
|
+
top_get_started = _tp["gs"]
|
|
535
|
+
|
|
536
|
+
cta_details_by_host = {d: [] for d in safe_domains}
|
|
537
|
+
if any(v > 0 for v in cta_total.values()):
|
|
538
|
+
cta_detail_q = (
|
|
539
|
+
"SELECT properties.$host AS host, properties.$el_text, properties.text, properties.section, timestamp "
|
|
540
|
+
"FROM events "
|
|
541
|
+
"WHERE event = 'cta_click' "
|
|
542
|
+
f"AND properties.$host IN ({in_list}) "
|
|
543
|
+
f"AND timestamp >= toDateTime('{after_str}') "
|
|
544
|
+
"ORDER BY timestamp DESC LIMIT 200"
|
|
545
|
+
)
|
|
546
|
+
rows = _hogql(api_key, project_id, cta_detail_q)
|
|
547
|
+
for r in (rows or []):
|
|
548
|
+
host = r[0] if len(r) > 0 else None
|
|
549
|
+
el_text = r[1] if len(r) > 1 else None
|
|
550
|
+
text = r[2] if len(r) > 2 else None
|
|
551
|
+
section = r[3] if len(r) > 3 else None
|
|
552
|
+
ts = r[4] if len(r) > 4 else None
|
|
553
|
+
bucket = cta_details_by_host.get(host)
|
|
554
|
+
if bucket is None or len(bucket) >= 10:
|
|
555
|
+
continue
|
|
556
|
+
bucket.append({
|
|
557
|
+
"text": el_text or text or "?",
|
|
558
|
+
"section": section or "?",
|
|
559
|
+
"time": (str(ts)[:16] if ts else "?"),
|
|
560
|
+
})
|
|
561
|
+
|
|
562
|
+
# Autocapture fallback: only domains with zero `cta_click` get the
|
|
563
|
+
# "$autocapture clicks whose text contains 'book'" treatment. Batched
|
|
564
|
+
# like everything else so we don't fan out.
|
|
565
|
+
fallback_hosts = [d for d in safe_domains if cta_total.get(d, 0) == 0]
|
|
566
|
+
if fallback_hosts:
|
|
567
|
+
fb_in = ", ".join(f"'{d}'" for d in fallback_hosts)
|
|
568
|
+
ac_total_q = (
|
|
569
|
+
"SELECT properties.$host AS host, count(DISTINCT distinct_id) AS c FROM events "
|
|
570
|
+
"WHERE event = '$autocapture' "
|
|
571
|
+
f"AND properties.$host IN ({fb_in}) "
|
|
572
|
+
f"AND timestamp >= toDateTime('{after_str}') "
|
|
573
|
+
"AND lower(properties.$el_text) LIKE '%book%' "
|
|
574
|
+
"GROUP BY host"
|
|
575
|
+
)
|
|
576
|
+
ac_rows = _hogql(api_key, project_id, ac_total_q)
|
|
577
|
+
ac_total = {r[0]: int(r[1]) for r in (ac_rows or []) if r and r[0]}
|
|
578
|
+
hosts_with_ac = [d for d in fallback_hosts if ac_total.get(d, 0) > 0]
|
|
579
|
+
if hosts_with_ac:
|
|
580
|
+
ac_in = ", ".join(f"'{d}'" for d in hosts_with_ac)
|
|
581
|
+
ac_detail_q = (
|
|
582
|
+
"SELECT properties.$host AS host, properties.$el_text, properties.text, properties.section, timestamp "
|
|
583
|
+
"FROM events "
|
|
584
|
+
"WHERE event = '$autocapture' "
|
|
585
|
+
f"AND properties.$host IN ({ac_in}) "
|
|
586
|
+
f"AND timestamp >= toDateTime('{after_str}') "
|
|
587
|
+
"AND lower(properties.$el_text) LIKE '%book%' "
|
|
588
|
+
"ORDER BY timestamp DESC LIMIT 200"
|
|
589
|
+
)
|
|
590
|
+
rows = _hogql(api_key, project_id, ac_detail_q)
|
|
591
|
+
for r in (rows or []):
|
|
592
|
+
host = r[0] if len(r) > 0 else None
|
|
593
|
+
el_text = r[1] if len(r) > 1 else None
|
|
594
|
+
text = r[2] if len(r) > 2 else None
|
|
595
|
+
section = r[3] if len(r) > 3 else None
|
|
596
|
+
ts = r[4] if len(r) > 4 else None
|
|
597
|
+
bucket = cta_details_by_host.get(host)
|
|
598
|
+
if bucket is None or len(bucket) >= 10:
|
|
599
|
+
continue
|
|
600
|
+
bucket.append({
|
|
601
|
+
"text": el_text or text or "?",
|
|
602
|
+
"section": section or "?",
|
|
603
|
+
"time": (str(ts)[:16] if ts else "?"),
|
|
604
|
+
})
|
|
605
|
+
# Roll autocapture counts into cta_total so the funnel "cta_clicks"
|
|
606
|
+
# column matches the detail list for fallback domains.
|
|
607
|
+
for h, c in ac_total.items():
|
|
608
|
+
cta_total[h] = max(cta_total.get(h, 0), c)
|
|
609
|
+
|
|
610
|
+
for d in safe_domains:
|
|
611
|
+
pv = pv_total.get(d, 0)
|
|
612
|
+
result[d] = {
|
|
613
|
+
"pageviews": pv,
|
|
614
|
+
"cta_clicks": cta_total.get(d, 0),
|
|
615
|
+
"email_signups": signup_total.get(d, 0),
|
|
616
|
+
"schedule_clicks": sched_total.get(d, 0),
|
|
617
|
+
"get_started_clicks": get_started_total.get(d, 0),
|
|
618
|
+
"cross_product_clicks": cross_product_total.get(d, 0),
|
|
619
|
+
"pageview_details": {d: {
|
|
620
|
+
"total": pv,
|
|
621
|
+
"top_pages": top_pv.get(d, {}),
|
|
622
|
+
"top_pages_signups": top_signup.get(d, {}),
|
|
623
|
+
"top_pages_schedule": top_sched.get(d, {}),
|
|
624
|
+
"top_pages_get_started": top_get_started.get(d, {}),
|
|
625
|
+
}},
|
|
626
|
+
"cta_details": cta_details_by_host.get(d, []),
|
|
627
|
+
}
|
|
628
|
+
return result
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def _ph_combine(per_domain):
|
|
632
|
+
out = {
|
|
633
|
+
"pageviews": 0,
|
|
634
|
+
"cta_clicks": 0,
|
|
635
|
+
"email_signups": 0,
|
|
636
|
+
"schedule_clicks": 0,
|
|
637
|
+
"get_started_clicks": 0,
|
|
638
|
+
"cross_product_clicks": 0,
|
|
639
|
+
"pageview_details": {},
|
|
640
|
+
"cta_details": [],
|
|
641
|
+
}
|
|
642
|
+
for s in per_domain:
|
|
643
|
+
out["pageviews"] += s.get("pageviews", 0)
|
|
644
|
+
out["cta_clicks"] += s.get("cta_clicks", 0)
|
|
645
|
+
out["email_signups"] += s.get("email_signups", 0)
|
|
646
|
+
out["schedule_clicks"] += s.get("schedule_clicks", 0)
|
|
647
|
+
out["get_started_clicks"] += s.get("get_started_clicks", 0)
|
|
648
|
+
out["cross_product_clicks"] += s.get("cross_product_clicks", 0)
|
|
649
|
+
out["pageview_details"].update(s.get("pageview_details", {}))
|
|
650
|
+
out["cta_details"].extend(s.get("cta_details", []))
|
|
651
|
+
return out
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def _bookings_shared(bookings_conn, client_slug, days, table="cal_bookings", require_utm=False):
|
|
655
|
+
"""Same output shape as ps.get_booking_stats, but reuses a shared psycopg2
|
|
656
|
+
connection instead of opening a fresh one per project.
|
|
657
|
+
`table` is `cal_bookings` (Cal.com) or `calendly_bookings` (Calendly).
|
|
658
|
+
`require_utm` gates `real_bookings` on `utm_source IS NOT NULL` for
|
|
659
|
+
projects whose booking destination is shared with non-marketing inbound
|
|
660
|
+
(set in config.json via `bookings_require_utm`)."""
|
|
661
|
+
if not bookings_conn or not client_slug:
|
|
662
|
+
return None
|
|
663
|
+
try:
|
|
664
|
+
if table not in {"cal_bookings", "calendly_bookings"}:
|
|
665
|
+
raise ValueError(f"unsupported booking table: {table}")
|
|
666
|
+
utm_clause = " AND utm_source IS NOT NULL" if require_utm else ""
|
|
667
|
+
cur = bookings_conn.cursor()
|
|
668
|
+
cur.execute(
|
|
669
|
+
"SELECT COUNT(*), "
|
|
670
|
+
"COUNT(*) FILTER (WHERE status = 'created'), "
|
|
671
|
+
"COUNT(*) FILTER (WHERE status = 'cancelled'), "
|
|
672
|
+
"COUNT(*) FILTER (WHERE status = 'rescheduled'), "
|
|
673
|
+
"COUNT(*) FILTER (WHERE attendee_email NOT ILIKE '%%test%%' "
|
|
674
|
+
"AND attendee_email NOT ILIKE '%%example%%' "
|
|
675
|
+
"AND attendee_email NOT ILIKE '%%+%%verify%%' "
|
|
676
|
+
"AND attendee_name NOT ILIKE '%%test%%' "
|
|
677
|
+
"AND attendee_name NOT ILIKE '%%verification%%' "
|
|
678
|
+
"AND attendee_name NOT ILIKE '%%delete-me%%' "
|
|
679
|
+
"AND attendee_name NOT ILIKE '%%john doe%%'"
|
|
680
|
+
+ utm_clause + ") "
|
|
681
|
+
"FROM " + table + " WHERE client_slug = %s "
|
|
682
|
+
"AND created_at >= NOW() - INTERVAL '" + str(days) + " days'",
|
|
683
|
+
(client_slug,),
|
|
684
|
+
)
|
|
685
|
+
row = cur.fetchone()
|
|
686
|
+
cols = ["total", "booked", "cancelled", "rescheduled", "real_bookings"]
|
|
687
|
+
result = dict(zip(cols, row)) if row else {}
|
|
688
|
+
|
|
689
|
+
cur.execute(
|
|
690
|
+
"SELECT attendee_name, attendee_email, status, start_time, created_at "
|
|
691
|
+
"FROM " + table + " WHERE client_slug = %s "
|
|
692
|
+
"AND created_at >= NOW() - INTERVAL '" + str(days) + " days' "
|
|
693
|
+
"ORDER BY created_at DESC LIMIT 5",
|
|
694
|
+
(client_slug,),
|
|
695
|
+
)
|
|
696
|
+
result["recent"] = [
|
|
697
|
+
{"name": r[0], "email": r[1], "status": r[2],
|
|
698
|
+
"start": str(r[3])[:16] if r[3] else "?",
|
|
699
|
+
"created": str(r[4])[:16] if r[4] else "?"}
|
|
700
|
+
for r in cur.fetchall()
|
|
701
|
+
]
|
|
702
|
+
cur.close()
|
|
703
|
+
return result
|
|
704
|
+
except Exception as e:
|
|
705
|
+
print(f" Bookings DB error for {client_slug}: {e}", file=sys.stderr)
|
|
706
|
+
return None
|
|
707
|
+
|
|
708
|
+
|
|
709
|
+
def _dm_short_link_stats(conn, name, days):
|
|
710
|
+
"""Per-project DM short-link click attribution.
|
|
711
|
+
|
|
712
|
+
`dm_clicks`: SUM(dm_links.clicks) JOIN dms d for DMs that reference this
|
|
713
|
+
project (target_project OR membership in target_projects[]) and were last
|
|
714
|
+
touched in the window. Captures every DM click — booking, github, website,
|
|
715
|
+
or kind=other — bumped at the resolver. Multi-link, multi-turn safe.
|
|
716
|
+
"""
|
|
717
|
+
if not name or name == SYNTHETIC_NO_PROJECT_NAME:
|
|
718
|
+
return 0
|
|
719
|
+
try:
|
|
720
|
+
cur = conn.execute(
|
|
721
|
+
"SELECT COALESCE(SUM(l.clicks), 0)::int "
|
|
722
|
+
"FROM dm_links l "
|
|
723
|
+
"JOIN dms d ON d.id = l.dm_id "
|
|
724
|
+
"WHERE (COALESCE(d.target_project, d.project_name) = %s "
|
|
725
|
+
" OR %s = ANY(d.target_projects)) "
|
|
726
|
+
"AND COALESCE(d.last_message_at, d.discovered_at) >= NOW() - INTERVAL '" + str(int(days)) + " days'",
|
|
727
|
+
(name, name),
|
|
728
|
+
)
|
|
729
|
+
return int((cur.fetchone() or (0,))[0])
|
|
730
|
+
except Exception as e:
|
|
731
|
+
print(f" dm_short_link_stats error for {name}: {e}", file=sys.stderr)
|
|
732
|
+
return 0
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
def _dm_booking_count(conn, bookings_conn, name, days):
|
|
736
|
+
"""Count cal_bookings within the window whose metadata.utm_content
|
|
737
|
+
(`dm_<id>`) maps to a DM targeting this project.
|
|
738
|
+
|
|
739
|
+
The webhook stores the entire Cal.com payload under cal_bookings.metadata,
|
|
740
|
+
and the original UTM lives at metadata.payload.metadata.utm_content. We
|
|
741
|
+
parse the dm_id out of `dm_<n>`, then join against dms.target_project /
|
|
742
|
+
project_name in the main DB to scope by project.
|
|
743
|
+
"""
|
|
744
|
+
if not bookings_conn or not name or name == SYNTHETIC_NO_PROJECT_NAME:
|
|
745
|
+
return 0
|
|
746
|
+
try:
|
|
747
|
+
cur = bookings_conn.cursor()
|
|
748
|
+
cur.execute(
|
|
749
|
+
"SELECT metadata#>>'{payload,metadata,utm_content}' AS utm_content "
|
|
750
|
+
"FROM cal_bookings "
|
|
751
|
+
"WHERE metadata#>>'{payload,metadata,utm_content}' LIKE 'dm_%%' "
|
|
752
|
+
"AND created_at >= NOW() - INTERVAL '" + str(int(days)) + " days' "
|
|
753
|
+
"AND COALESCE(attendee_email, '') NOT ILIKE '%%test%%'"
|
|
754
|
+
)
|
|
755
|
+
dm_ids = []
|
|
756
|
+
for (utm,) in cur.fetchall():
|
|
757
|
+
if utm and utm.startswith('dm_'):
|
|
758
|
+
try:
|
|
759
|
+
dm_ids.append(int(utm.split('_', 1)[1]))
|
|
760
|
+
except (ValueError, IndexError):
|
|
761
|
+
pass
|
|
762
|
+
cur.close()
|
|
763
|
+
if not dm_ids:
|
|
764
|
+
return 0
|
|
765
|
+
cur2 = conn.execute(
|
|
766
|
+
"SELECT COUNT(*)::int FROM dms WHERE id = ANY(%s) "
|
|
767
|
+
"AND COALESCE(target_project, project_name) = %s",
|
|
768
|
+
(dm_ids, name),
|
|
769
|
+
)
|
|
770
|
+
return int((cur2.fetchone() or (0,))[0])
|
|
771
|
+
except Exception as e:
|
|
772
|
+
print(f" dm_booking_count error for {name}: {e}", file=sys.stderr)
|
|
773
|
+
return 0
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
def _period_total_engagement(conn, name, days, platform=None):
|
|
777
|
+
"""Total engagement *gained during the window* across ALL posts, regardless
|
|
778
|
+
of when each post was created.
|
|
779
|
+
|
|
780
|
+
Used to populate the "(total)" bracketed value on the project panel.
|
|
781
|
+
Logic per post:
|
|
782
|
+
gain = latest_snapshot_in_window - latest_snapshot_before_window
|
|
783
|
+
with the "before" leg treated as 0 when the post did not exist before
|
|
784
|
+
the window (new posts contribute their full current value, which is
|
|
785
|
+
why this differs from the Trends-tab LAG() approach: that one excludes
|
|
786
|
+
every post's first snapshot and therefore undercounts fresh activity).
|
|
787
|
+
|
|
788
|
+
Same platform filter as the Trends tab: excludes moltbook / github /
|
|
789
|
+
github_issues. Same project filter via posts.project_name.
|
|
790
|
+
|
|
791
|
+
For post_clicks: COUNT of post_link_clicks rows with is_bot=FALSE in the
|
|
792
|
+
window, joined post_links -> posts so we can apply the project filter.
|
|
793
|
+
Pre-2026-05-07 click rows do not exist (is_bot logging started then), so
|
|
794
|
+
the count returns 0 for older days rather than mixing inflated counters.
|
|
795
|
+
"""
|
|
796
|
+
# Period total = engagement gained during the last N days, summed from
|
|
797
|
+
# two complementary branches that always together produce a value
|
|
798
|
+
# >= the panel's scoped column:
|
|
799
|
+
#
|
|
800
|
+
# (1) new_posts_branch — posts CREATED in the window. Their full
|
|
801
|
+
# live posts.* counters are credited as in-window gain (all of
|
|
802
|
+
# it was earned during the window since the post didn't exist
|
|
803
|
+
# before). No reddit/moltbook -1 OP self-vote discount here
|
|
804
|
+
# (the scoped column applies that discount, so the un-discounted
|
|
805
|
+
# sum here is guaranteed >= scoped).
|
|
806
|
+
#
|
|
807
|
+
# (2) old_posts_branch — posts created BEFORE the window. Uses the
|
|
808
|
+
# Trends-tab LAG approach over post_views_daily, summing daily
|
|
809
|
+
# gains across snapshots inside the window. NULL values
|
|
810
|
+
# (Reddit posts don't write upvotes/comments to post_views_daily
|
|
811
|
+
# at all) are excluded by the IS NOT NULL FILTER, so old
|
|
812
|
+
# Reddit posts contribute 0 here — that's a known limitation
|
|
813
|
+
# of the snapshot pipeline and matches the Trends chart.
|
|
814
|
+
# Per-metric platform filter matches the SCOPED column's filter so the
|
|
815
|
+
# bracket is always >= scoped for the same metric:
|
|
816
|
+
# upvotes: no platform filter (scoped sums all platforms with reddit/
|
|
817
|
+
# moltbook -1 OP self-vote discount; bracket uses raw values).
|
|
818
|
+
# comments: no platform filter (scoped sums all platforms).
|
|
819
|
+
# views: excludes moltbook/github/github_issues (matches scoped's
|
|
820
|
+
# FILTER clause in _windowed_post_engagement).
|
|
821
|
+
days_sql = "INTERVAL '" + str(int(days)) + " days'"
|
|
822
|
+
views_excl = "LOWER(p.platform) NOT IN ('moltbook', 'github', 'github_issues')"
|
|
823
|
+
# When platform is set, also apply the mention-row exclusion so this
|
|
824
|
+
# function lines up with the /api/style/stats view that the dashboard
|
|
825
|
+
# shows above the Project Final Stats table.
|
|
826
|
+
plat_clause = _platform_sql_clause(platform, "p")
|
|
827
|
+
proj_clause, proj_params = _project_filter_sql(name, "p")
|
|
828
|
+
cur = conn.execute(
|
|
829
|
+
# Branch 1: posts CREATED in the window. Full live posts.* values
|
|
830
|
+
# are credited as in-window gain. No -1 OP discount on upvotes, so
|
|
831
|
+
# bracket >= scoped on reddit/moltbook by exactly #posts_in_window.
|
|
832
|
+
"WITH new_posts AS ("
|
|
833
|
+
"SELECT "
|
|
834
|
+
"COALESCE(SUM(p.upvotes), 0)::bigint AS upvotes, "
|
|
835
|
+
"COALESCE(SUM(p.comments_count), 0)::bigint AS comments, "
|
|
836
|
+
"COALESCE(SUM(p.views) FILTER (WHERE " + views_excl + "), 0)::bigint AS views "
|
|
837
|
+
"FROM posts p "
|
|
838
|
+
"WHERE " + proj_clause + " "
|
|
839
|
+
"AND p.posted_at >= NOW() - " + days_sql + plat_clause +
|
|
840
|
+
"), "
|
|
841
|
+
# Branch 2: posts created BEFORE the window. LAG over snapshots
|
|
842
|
+
# inside the window. Reddit/moltbook post_views_daily rows carry
|
|
843
|
+
# NULL upvotes/comments by design (those stats pipelines only
|
|
844
|
+
# write views), so the IS NOT NULL FILTER drops them: old Reddit
|
|
845
|
+
# upvotes/comments gain is structurally invisible here, matching
|
|
846
|
+
# the Trends chart.
|
|
847
|
+
"old_post_daily AS ("
|
|
848
|
+
"SELECT pvd.post_id, p.platform, "
|
|
849
|
+
"pvd.upvotes, LAG(pvd.upvotes) OVER w AS prev_upvotes, "
|
|
850
|
+
"pvd.comments, LAG(pvd.comments) OVER w AS prev_comments, "
|
|
851
|
+
"pvd.views, LAG(pvd.views) OVER w AS prev_views "
|
|
852
|
+
"FROM post_views_daily pvd "
|
|
853
|
+
"JOIN posts p ON p.id = pvd.post_id "
|
|
854
|
+
"WHERE pvd.day >= CURRENT_DATE - " + days_sql + " "
|
|
855
|
+
"AND " + proj_clause + " "
|
|
856
|
+
"AND p.posted_at < NOW() - " + days_sql + plat_clause + " "
|
|
857
|
+
"WINDOW w AS (PARTITION BY pvd.post_id ORDER BY pvd.day)"
|
|
858
|
+
"), "
|
|
859
|
+
"old_posts AS ("
|
|
860
|
+
"SELECT "
|
|
861
|
+
"COALESCE(SUM(GREATEST(upvotes - prev_upvotes, 0)) "
|
|
862
|
+
"FILTER (WHERE prev_upvotes IS NOT NULL AND upvotes IS NOT NULL), 0)::bigint AS upvotes, "
|
|
863
|
+
"COALESCE(SUM(GREATEST(comments - prev_comments, 0)) "
|
|
864
|
+
"FILTER (WHERE prev_comments IS NOT NULL AND comments IS NOT NULL), 0)::bigint AS comments, "
|
|
865
|
+
"COALESCE(SUM(GREATEST(views - prev_views, 0)) "
|
|
866
|
+
"FILTER (WHERE prev_views IS NOT NULL AND views IS NOT NULL "
|
|
867
|
+
"AND LOWER(platform) NOT IN ('moltbook', 'github', 'github_issues')), 0)::bigint AS views "
|
|
868
|
+
"FROM old_post_daily"
|
|
869
|
+
") "
|
|
870
|
+
"SELECT "
|
|
871
|
+
"n.upvotes + o.upvotes, "
|
|
872
|
+
"n.comments + o.comments, "
|
|
873
|
+
"n.views + o.views "
|
|
874
|
+
"FROM new_posts n CROSS JOIN old_posts o",
|
|
875
|
+
proj_params + proj_params,
|
|
876
|
+
)
|
|
877
|
+
row = cur.fetchone() or (0, 0, 0)
|
|
878
|
+
upvotes_total = int(row[0] or 0)
|
|
879
|
+
comments_total = int(row[1] or 0)
|
|
880
|
+
views_total = int(row[2] or 0)
|
|
881
|
+
|
|
882
|
+
# post_clicks bracket = scoped (post_links.clicks SUM for new posts in
|
|
883
|
+
# window) + COUNT of post_link_clicks events on OLD posts during the
|
|
884
|
+
# window. The "new posts" leg matches the scoped column exactly so
|
|
885
|
+
# bracket >= scoped is guaranteed; the "old posts" leg captures
|
|
886
|
+
# click traffic that hit pre-existing posts during the period.
|
|
887
|
+
cur2 = conn.execute(
|
|
888
|
+
"WITH new_clicks AS ("
|
|
889
|
+
"SELECT COALESCE(SUM(pl.total_clicks), 0)::bigint AS clicks "
|
|
890
|
+
"FROM posts p "
|
|
891
|
+
"LEFT JOIN ("
|
|
892
|
+
"SELECT post_id, SUM(clicks)::int AS total_clicks "
|
|
893
|
+
"FROM post_links WHERE post_id IS NOT NULL GROUP BY post_id"
|
|
894
|
+
") pl ON pl.post_id = p.id "
|
|
895
|
+
"WHERE " + proj_clause + " "
|
|
896
|
+
"AND p.posted_at >= NOW() - " + days_sql + plat_clause +
|
|
897
|
+
"), "
|
|
898
|
+
"old_event_clicks AS ("
|
|
899
|
+
"SELECT COALESCE(COUNT(*), 0)::bigint AS clicks "
|
|
900
|
+
"FROM post_link_clicks plc "
|
|
901
|
+
"JOIN post_links pl ON pl.code = plc.code "
|
|
902
|
+
"JOIN posts p ON p.id = pl.post_id "
|
|
903
|
+
"WHERE plc.ts >= NOW() - " + days_sql + " "
|
|
904
|
+
"AND plc.is_bot = FALSE "
|
|
905
|
+
"AND " + proj_clause + " "
|
|
906
|
+
"AND p.posted_at < NOW() - " + days_sql + plat_clause +
|
|
907
|
+
") "
|
|
908
|
+
"SELECT n.clicks + o.clicks "
|
|
909
|
+
"FROM new_clicks n CROSS JOIN old_event_clicks o",
|
|
910
|
+
proj_params + proj_params,
|
|
911
|
+
)
|
|
912
|
+
row2 = cur2.fetchone() or (0,)
|
|
913
|
+
post_clicks_total = int(row2[0] or 0)
|
|
914
|
+
|
|
915
|
+
return {
|
|
916
|
+
"upvotes": upvotes_total,
|
|
917
|
+
"comments": comments_total,
|
|
918
|
+
"views": views_total,
|
|
919
|
+
"post_clicks": post_clicks_total,
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
def _windowed_post_engagement(conn, name, days, platform=None):
|
|
924
|
+
"""Sum engagement only for posts *created within the window*.
|
|
925
|
+
|
|
926
|
+
project_stats.get_post_stats aggregates engagement over ALL time for the
|
|
927
|
+
project, which is misleading when the window is a day or a week. Here we
|
|
928
|
+
filter by posted_at so upvotes/comments/views/post_clicks match the same
|
|
929
|
+
24h slice as the 'recent' post count.
|
|
930
|
+
|
|
931
|
+
When `platform` is set, also folds in the same platform/mention filter
|
|
932
|
+
that /api/style/stats uses so the Project Final Stats and Posts by
|
|
933
|
+
Engagement Style tables agree on the same denominator.
|
|
934
|
+
|
|
935
|
+
post_clicks: SUM of post_links.clicks attributable to short links minted
|
|
936
|
+
for posts in this project's window (post_id-keyed; reply-keyed clicks
|
|
937
|
+
excluded so we don't double-count engagement on replies hanging off
|
|
938
|
+
someone else's thread).
|
|
939
|
+
"""
|
|
940
|
+
# upvotes is NET of the Reddit/Moltbook OP self-upvote (both platforms auto-
|
|
941
|
+
# apply a +1 to every post). Discounting per row before the SUM means the
|
|
942
|
+
# funnel reflects organic engagement, not (posts * 1) + organic. X /
|
|
943
|
+
# LinkedIn / GitHub have no equivalent auto-vote so they pass through.
|
|
944
|
+
# Matches top_performers.SCORE_SQL and bin/server.js upvotes_discounted.
|
|
945
|
+
plat_clause = _platform_sql_clause(platform, "p")
|
|
946
|
+
proj_clause, proj_params = _project_filter_sql(name, "p")
|
|
947
|
+
cur = conn.execute(
|
|
948
|
+
"SELECT COALESCE(SUM(CASE WHEN LOWER(p.platform) IN ('reddit', 'moltbook') "
|
|
949
|
+
" THEN GREATEST(0, COALESCE(p.upvotes, 0) - 1) "
|
|
950
|
+
" ELSE COALESCE(p.upvotes, 0) END), 0), "
|
|
951
|
+
"COALESCE(SUM(p.comments_count), 0), "
|
|
952
|
+
"COALESCE(SUM(p.views) FILTER (WHERE LOWER(p.platform) NOT IN ('moltbook', 'github', 'github_issues')), 0), "
|
|
953
|
+
"COUNT(*) FILTER (WHERE LOWER(p.platform) NOT IN ('moltbook', 'github', 'github_issues')), "
|
|
954
|
+
"COALESCE(SUM(pl.total_clicks), 0) "
|
|
955
|
+
"FROM posts p "
|
|
956
|
+
"LEFT JOIN ("
|
|
957
|
+
" SELECT post_id, SUM(clicks)::int AS total_clicks "
|
|
958
|
+
" FROM post_links WHERE post_id IS NOT NULL GROUP BY post_id"
|
|
959
|
+
") pl ON pl.post_id = p.id "
|
|
960
|
+
"WHERE " + proj_clause + " AND p.posted_at >= NOW() - INTERVAL '" + str(days) + " days'"
|
|
961
|
+
+ plat_clause,
|
|
962
|
+
proj_params,
|
|
963
|
+
)
|
|
964
|
+
row = cur.fetchone() or (0, 0, 0, 0, 0)
|
|
965
|
+
return {
|
|
966
|
+
"upvotes": int(row[0] or 0),
|
|
967
|
+
"comments": int(row[1] or 0),
|
|
968
|
+
"views": int(row[2] or 0),
|
|
969
|
+
"views_posts": int(row[3] or 0),
|
|
970
|
+
"post_clicks": int(row[4] or 0),
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
|
|
974
|
+
def _seo_pages_count(conn, name, days):
|
|
975
|
+
"""Count SEO pages published in window. seo_keywords.product matches project_name."""
|
|
976
|
+
cur = conn.execute(
|
|
977
|
+
"SELECT "
|
|
978
|
+
"(SELECT COUNT(*) FROM seo_keywords WHERE product = %s "
|
|
979
|
+
" AND completed_at >= NOW() - INTERVAL '" + str(days) + " days' "
|
|
980
|
+
" AND page_url IS NOT NULL) + "
|
|
981
|
+
"(SELECT COUNT(*) FROM gsc_queries WHERE product = %s "
|
|
982
|
+
" AND completed_at >= NOW() - INTERVAL '" + str(days) + " days' "
|
|
983
|
+
" AND page_url IS NOT NULL)",
|
|
984
|
+
(name, name),
|
|
985
|
+
)
|
|
986
|
+
row = cur.fetchone()
|
|
987
|
+
return int((row and row[0]) or 0)
|
|
988
|
+
|
|
989
|
+
|
|
990
|
+
def _amplitude_signups_24h_from_cache(proj):
|
|
991
|
+
"""For days==1, read the precomputed rolling-24h count from the cache
|
|
992
|
+
written by scripts/amplitude_24h_signups.py.
|
|
993
|
+
|
|
994
|
+
That script uses our own server-side PostHog `newsletter_subscribed`
|
|
995
|
+
event (real-time, partner_outcome IN ('partner_created','partner_reused'))
|
|
996
|
+
as the primary source, because Amplitude segmentation/export both lag
|
|
997
|
+
several hours behind real time and bucket by calendar day in the
|
|
998
|
+
project's display timezone.
|
|
999
|
+
|
|
1000
|
+
Returns int (count) or None when:
|
|
1001
|
+
- cache file missing / unreadable
|
|
1002
|
+
- project not present in cache
|
|
1003
|
+
- cache is older than 30 minutes (stale, fall back to live segmentation)
|
|
1004
|
+
"""
|
|
1005
|
+
cache_path = os.path.expanduser(
|
|
1006
|
+
"~/social-autoposter/skill/cache/amplitude_24h_signups.json"
|
|
1007
|
+
)
|
|
1008
|
+
if not os.path.exists(cache_path):
|
|
1009
|
+
return None
|
|
1010
|
+
try:
|
|
1011
|
+
with open(cache_path) as f:
|
|
1012
|
+
cur = json.load(f)
|
|
1013
|
+
gen = cur.get("generated_at_utc")
|
|
1014
|
+
if gen:
|
|
1015
|
+
age_min = (datetime.now(timezone.utc) - datetime.fromisoformat(gen)).total_seconds() / 60
|
|
1016
|
+
if age_min > 30:
|
|
1017
|
+
return None
|
|
1018
|
+
for p in cur.get("projects") or []:
|
|
1019
|
+
if p.get("name") == proj.get("name"):
|
|
1020
|
+
v = p.get("count_24h")
|
|
1021
|
+
return int(v) if v is not None else None
|
|
1022
|
+
except Exception:
|
|
1023
|
+
return None
|
|
1024
|
+
return None
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
def _amplitude_signups(proj, days, env):
|
|
1028
|
+
"""Pull attributed end-product signup count from the client's Amplitude.
|
|
1029
|
+
|
|
1030
|
+
For projects with an `amplitude` config block (project_id, api_key_env,
|
|
1031
|
+
secret_key_env, signup_event, attribution_filter). Returns total signups
|
|
1032
|
+
matching the filter over the last `days`, or None if not configured /
|
|
1033
|
+
creds missing / API errors. Errors are non-fatal — they collapse to None
|
|
1034
|
+
so the dashboard falls back to the click-based metric.
|
|
1035
|
+
|
|
1036
|
+
Special case: days == 1 reads from the rolling-24h cache populated by
|
|
1037
|
+
scripts/amplitude_24h_signups.py, which uses real-time PostHog data
|
|
1038
|
+
instead of Amplitude segmentation (which lags hours and buckets by
|
|
1039
|
+
calendar day in the project's display timezone). Falls through to the
|
|
1040
|
+
segmentation path if the cache is missing or stale.
|
|
1041
|
+
"""
|
|
1042
|
+
amp = proj.get("amplitude")
|
|
1043
|
+
if not amp:
|
|
1044
|
+
return None
|
|
1045
|
+
if days == 1:
|
|
1046
|
+
cached = _amplitude_signups_24h_from_cache(proj)
|
|
1047
|
+
if cached is not None:
|
|
1048
|
+
return cached
|
|
1049
|
+
api_key = env.get(amp.get("api_key_env", ""))
|
|
1050
|
+
secret_key = env.get(amp.get("secret_key_env", ""))
|
|
1051
|
+
if not api_key or not secret_key:
|
|
1052
|
+
return None
|
|
1053
|
+
import base64
|
|
1054
|
+
end_dt = datetime.now(timezone.utc)
|
|
1055
|
+
start_dt = end_dt - timedelta(days=max(1, days) - 1)
|
|
1056
|
+
e = json.dumps({
|
|
1057
|
+
"event_type": amp.get("signup_event", "New User Sign Up"),
|
|
1058
|
+
"filters": [
|
|
1059
|
+
{
|
|
1060
|
+
"subprop_type": "event",
|
|
1061
|
+
"subprop_key": k,
|
|
1062
|
+
"subprop_op": "is",
|
|
1063
|
+
"subprop_value": v if isinstance(v, list) else [v],
|
|
1064
|
+
}
|
|
1065
|
+
for k, v in (amp.get("attribution_filter") or {}).items()
|
|
1066
|
+
],
|
|
1067
|
+
})
|
|
1068
|
+
qs = urllib.parse.urlencode({
|
|
1069
|
+
"e": e,
|
|
1070
|
+
"start": start_dt.strftime("%Y%m%d"),
|
|
1071
|
+
"end": end_dt.strftime("%Y%m%d"),
|
|
1072
|
+
"i": "1",
|
|
1073
|
+
"m": "totals",
|
|
1074
|
+
})
|
|
1075
|
+
auth_b64 = base64.b64encode(f"{api_key}:{secret_key}".encode()).decode()
|
|
1076
|
+
req = urllib.request.Request(
|
|
1077
|
+
f"https://amplitude.com/api/2/events/segmentation?{qs}",
|
|
1078
|
+
headers={"Authorization": f"Basic {auth_b64}"},
|
|
1079
|
+
)
|
|
1080
|
+
try:
|
|
1081
|
+
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
1082
|
+
data = json.loads(resp.read())
|
|
1083
|
+
except Exception as exc:
|
|
1084
|
+
print(f" amplitude signups fetch error ({proj.get('name')}): {exc}", file=sys.stderr)
|
|
1085
|
+
return None
|
|
1086
|
+
series = (data.get("data", {}).get("series") or [[]])[0]
|
|
1087
|
+
return int(sum(int(x or 0) for x in series))
|
|
1088
|
+
|
|
1089
|
+
|
|
1090
|
+
def _post_stats_synthetic_null(conn, days):
|
|
1091
|
+
"""NULL-project sibling of ps.get_post_stats. Same shape, same upvote
|
|
1092
|
+
discount logic; filters posts.project_name IS NULL instead of = name.
|
|
1093
|
+
|
|
1094
|
+
ps.get_post_stats lives in the chflags-locked project_stats.py, so the
|
|
1095
|
+
synthetic '(no project)' bucket reuses this in build_project_entry
|
|
1096
|
+
rather than passing a magic string into a function that would return
|
|
1097
|
+
all-zeros for it.
|
|
1098
|
+
"""
|
|
1099
|
+
cur = conn.execute(
|
|
1100
|
+
"SELECT COUNT(*), "
|
|
1101
|
+
"COUNT(*) FILTER (WHERE posted_at >= NOW() - INTERVAL '" + str(int(days)) + " days'), "
|
|
1102
|
+
"COUNT(*) FILTER (WHERE status = 'active'), "
|
|
1103
|
+
"COUNT(*) FILTER (WHERE status IN ('removed', 'deleted')), "
|
|
1104
|
+
"COALESCE(SUM(CASE WHEN LOWER(platform) IN ('reddit', 'moltbook') "
|
|
1105
|
+
" THEN GREATEST(0, COALESCE(upvotes, 0) - 1) "
|
|
1106
|
+
" ELSE COALESCE(upvotes, 0) END), 0), "
|
|
1107
|
+
"COALESCE(SUM(comments_count), 0), "
|
|
1108
|
+
"COALESCE(SUM(views), 0) "
|
|
1109
|
+
"FROM posts WHERE project_name IS NULL"
|
|
1110
|
+
)
|
|
1111
|
+
row = cur.fetchone()
|
|
1112
|
+
if not row:
|
|
1113
|
+
return {}
|
|
1114
|
+
cols = ["total", "recent", "active", "removed", "total_upvotes", "total_comments", "total_views"]
|
|
1115
|
+
return dict(zip(cols, row))
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
def _platform_breakdown_synthetic_null(conn, days):
|
|
1119
|
+
"""NULL-project sibling of ps.get_platform_breakdown."""
|
|
1120
|
+
cur = conn.execute(
|
|
1121
|
+
"SELECT platform, COUNT(*) as cnt FROM posts "
|
|
1122
|
+
"WHERE project_name IS NULL AND posted_at >= NOW() - INTERVAL '" + str(int(days)) + " days' "
|
|
1123
|
+
"GROUP BY platform ORDER BY cnt DESC"
|
|
1124
|
+
)
|
|
1125
|
+
return {row[0]: row[1] for row in cur.fetchall()}
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
def build_project_entry(proj, days, api_key, ph_pid, env, ph_results, platform=None):
|
|
1129
|
+
name = proj["name"]
|
|
1130
|
+
# All main-DB + bookings-DB per-project stats come from one consolidated
|
|
1131
|
+
# HTTP endpoint (HTTP-only migration 2026-06-01). Booking scoping params
|
|
1132
|
+
# (client_slug / table / require_utm) are computed locally from config and
|
|
1133
|
+
# forwarded so the endpoint can read the separate bookings DB server-side.
|
|
1134
|
+
# The endpoint folds the platform filter into post_stats.recent directly,
|
|
1135
|
+
# so the legacy platform-override COUNT is no longer needed here.
|
|
1136
|
+
client_slug = ps.get_client_slug(name)
|
|
1137
|
+
booking_table = ps.get_booking_table(name)
|
|
1138
|
+
require_utm = _bookings_require_utm(name)
|
|
1139
|
+
from http_api import api_get
|
|
1140
|
+
_detail = (api_get("/api/v1/stats/project-detail", query={
|
|
1141
|
+
"project": name,
|
|
1142
|
+
"days": int(days),
|
|
1143
|
+
"platform": platform or "",
|
|
1144
|
+
"client_slug": client_slug or "",
|
|
1145
|
+
"booking_table": booking_table or "cal_bookings",
|
|
1146
|
+
"require_utm": "1" if require_utm else "0",
|
|
1147
|
+
}).get("data") or {})
|
|
1148
|
+
post_stats = dict(_detail.get("post_stats") or {})
|
|
1149
|
+
platforms = _detail.get("platforms") or {}
|
|
1150
|
+
eng_recent = _detail.get("windowed") or {"upvotes": 0, "comments": 0, "views": 0, "views_posts": 0, "post_clicks": 0}
|
|
1151
|
+
eng_period_total = _detail.get("period") or {"upvotes": 0, "comments": 0, "views": 0, "post_clicks": 0}
|
|
1152
|
+
seo_pages_recent = int(_detail.get("seo_pages_recent") or 0)
|
|
1153
|
+
|
|
1154
|
+
domains = ps.get_project_domains(proj)
|
|
1155
|
+
ph_override = proj.get("posthog", {}) or {}
|
|
1156
|
+
ph_key = env.get(ph_override.get("api_key_env", ""), api_key)
|
|
1157
|
+
ph_pid_proj = ph_override.get("project_id", ph_pid)
|
|
1158
|
+
analytics_error = None
|
|
1159
|
+
if domains:
|
|
1160
|
+
per_domain = []
|
|
1161
|
+
for d in domains:
|
|
1162
|
+
stats = ph_results.get((ph_key, ph_pid_proj, d))
|
|
1163
|
+
if stats is None:
|
|
1164
|
+
stats = _empty_domain_stats(d)
|
|
1165
|
+
if stats.get("error") and not analytics_error:
|
|
1166
|
+
analytics_error = stats["error"]
|
|
1167
|
+
per_domain.append(stats)
|
|
1168
|
+
posthog = _ph_combine(per_domain)
|
|
1169
|
+
if analytics_error:
|
|
1170
|
+
posthog["error"] = analytics_error
|
|
1171
|
+
else:
|
|
1172
|
+
posthog = None
|
|
1173
|
+
|
|
1174
|
+
# Window-scoped: `created_paths` is now restricted to pages whose
|
|
1175
|
+
# seo_keywords/gsc_queries `completed_at` falls inside `days`. Top tab →
|
|
1176
|
+
# Pages sub-tab already filters rows on this set, so it becomes "pages
|
|
1177
|
+
# created in the selected period" automatically.
|
|
1178
|
+
# Window-scoped created paths come from the endpoint's db_created_pages
|
|
1179
|
+
# ({host: [paths]}). With a window set, the filesystem scan is intentionally
|
|
1180
|
+
# skipped (static page files carry no trustworthy creation timestamp), so
|
|
1181
|
+
# the DB-derived set is the whole answer — matching _created_paths_for_project
|
|
1182
|
+
# with days set.
|
|
1183
|
+
created_by_domain = {
|
|
1184
|
+
host: set(paths) for host, paths in (_detail.get("db_created_pages") or {}).items()
|
|
1185
|
+
}
|
|
1186
|
+
if posthog is not None:
|
|
1187
|
+
for d, detail in (posthog.get("pageview_details") or {}).items():
|
|
1188
|
+
paths = created_by_domain.get((d or "").lower(), set())
|
|
1189
|
+
detail["created_paths"] = sorted(paths)
|
|
1190
|
+
|
|
1191
|
+
# Preserve the pre-rewrite, domain-wide totals for the analytics-broken
|
|
1192
|
+
# canary below — it's meant to answer "is window.posthog wired up on this
|
|
1193
|
+
# site at all?", which requires domain-level signal, not per-new-page.
|
|
1194
|
+
domain_wide_pv = int(posthog["pageviews"]) if posthog else 0
|
|
1195
|
+
domain_wide_signups = int(posthog["email_signups"]) if posthog else 0
|
|
1196
|
+
domain_wide_sched = int(posthog["schedule_clicks"]) if posthog else 0
|
|
1197
|
+
domain_wide_get_started = int(posthog["get_started_clicks"]) if posthog else 0
|
|
1198
|
+
|
|
1199
|
+
# Recompute funnel totals against the window-scoped created set so the
|
|
1200
|
+
# Status tab → project funnel columns reflect "pageviews / signups /
|
|
1201
|
+
# schedule clicks / download clicks ONLY on pages we generated in this
|
|
1202
|
+
# window" instead of domain-wide traffic. cta_clicks and real_bookings
|
|
1203
|
+
# are not tracked per-page so they stay domain/project-wide.
|
|
1204
|
+
#
|
|
1205
|
+
# Skip entirely when PostHog is errored: the top_pages maps are empty
|
|
1206
|
+
# for errored domains, so scoping would silently collapse everything to
|
|
1207
|
+
# zero. Keep the funnel values as None below so the dashboard renders
|
|
1208
|
+
# 'err' instead of a misleading 0.
|
|
1209
|
+
# Only pageviews get window-scoped to "traffic on pages we generated in
|
|
1210
|
+
# this window". Conversion events (newsletter_subscribed, schedule_click,
|
|
1211
|
+
# get_started_click) fire on dedicated landing pages (/, /use-case, /ig,
|
|
1212
|
+
# etc.), almost never on the freshly-generated /blog/* and /t/* SEO pages
|
|
1213
|
+
# we ship each cycle. Scoping those collapsed every project to 0 and made
|
|
1214
|
+
# the dashboard's Email Signups / Schedule Clicks / Get Started columns
|
|
1215
|
+
# useless. Domain-wide is the honest metric for those.
|
|
1216
|
+
if posthog is not None and not analytics_error:
|
|
1217
|
+
scoped_pv = 0
|
|
1218
|
+
for d, detail in (posthog.get("pageview_details") or {}).items():
|
|
1219
|
+
created = {_norm_path(p) for p in created_by_domain.get((d or "").lower(), set())}
|
|
1220
|
+
if not created:
|
|
1221
|
+
continue
|
|
1222
|
+
for path, cnt in (detail.get("top_pages") or {}).items():
|
|
1223
|
+
if _norm_path(path) in created:
|
|
1224
|
+
scoped_pv += int(cnt or 0)
|
|
1225
|
+
posthog["pageviews"] = scoped_pv
|
|
1226
|
+
|
|
1227
|
+
bookings = _detail.get("bookings")
|
|
1228
|
+
|
|
1229
|
+
# When the PostHog batch failed, the aggregate numbers on `posthog` are
|
|
1230
|
+
# all 0 but that doesn't mean there are no events, it means we couldn't
|
|
1231
|
+
# read them. Surface null + an error string on the funnel so the
|
|
1232
|
+
# dashboard renders 'err' instead of silently claiming "zero pageviews".
|
|
1233
|
+
if analytics_error:
|
|
1234
|
+
pvs = None
|
|
1235
|
+
ctas = None
|
|
1236
|
+
email_signups = None
|
|
1237
|
+
schedule_clicks = None
|
|
1238
|
+
get_started_clicks = None
|
|
1239
|
+
cross_product_clicks = None
|
|
1240
|
+
ctr = None
|
|
1241
|
+
conv = None
|
|
1242
|
+
dw_pv_out = None
|
|
1243
|
+
dw_signups_out = None
|
|
1244
|
+
dw_sched_out = None
|
|
1245
|
+
dw_get_started_out = None
|
|
1246
|
+
analytics_suspected_broken = False
|
|
1247
|
+
else:
|
|
1248
|
+
pvs = posthog["pageviews"] if posthog else 0
|
|
1249
|
+
ctas = posthog["cta_clicks"] if posthog else 0
|
|
1250
|
+
email_signups = (posthog["email_signups"] if posthog else 0)
|
|
1251
|
+
schedule_clicks = (posthog["schedule_clicks"] if posthog else 0)
|
|
1252
|
+
get_started_clicks = (posthog["get_started_clicks"] if posthog else 0)
|
|
1253
|
+
# Cross-product stays domain-wide on purpose: it's a lightweight
|
|
1254
|
+
# signal ("how many clicks went to a sibling product from this site")
|
|
1255
|
+
# with no per-page top-pages breakdown, so there's nothing to scope.
|
|
1256
|
+
cross_product_clicks = (posthog.get("cross_product_clicks", 0) if posthog else 0)
|
|
1257
|
+
# Domain-wide counterparts for the "scoped (domain-wide)" dashboard
|
|
1258
|
+
# rendering. domain_wide_* were captured before the window-scoping
|
|
1259
|
+
# overwrote posthog["pageviews"] etc.
|
|
1260
|
+
dw_pv_out = domain_wide_pv if posthog else 0
|
|
1261
|
+
dw_signups_out = domain_wide_signups if posthog else 0
|
|
1262
|
+
dw_sched_out = domain_wide_sched if posthog else 0
|
|
1263
|
+
dw_get_started_out = domain_wide_get_started if posthog else 0
|
|
1264
|
+
ctr = (ctas / pvs * 100) if pvs else None
|
|
1265
|
+
conv = None # computed below once `real` is in scope
|
|
1266
|
+
# Canary: real traffic but zero tracked conversion events almost
|
|
1267
|
+
# always means window.posthog was never wired up on the site (e.g.
|
|
1268
|
+
# Fazm newsletter bug where signups worked but nothing fired to
|
|
1269
|
+
# PostHog). Use domain-wide totals so the signal isn't diluted by
|
|
1270
|
+
# the window-scoped funnel numbers above.
|
|
1271
|
+
analytics_suspected_broken = (domain_wide_pv >= 500) and ((domain_wide_signups + domain_wide_sched + domain_wide_get_started) == 0)
|
|
1272
|
+
|
|
1273
|
+
real = bookings.get("real_bookings", 0) if bookings else 0
|
|
1274
|
+
dm_clicks = int(_detail.get("dm_clicks") or 0)
|
|
1275
|
+
dm_bookings = int(_detail.get("dm_bookings") or 0)
|
|
1276
|
+
amplitude_signups = _amplitude_signups(proj, days, env)
|
|
1277
|
+
if not analytics_error:
|
|
1278
|
+
conv = (real / ctas * 100) if ctas else None
|
|
1279
|
+
|
|
1280
|
+
return {
|
|
1281
|
+
"name": name,
|
|
1282
|
+
"posts": {
|
|
1283
|
+
"total": post_stats.get("total", 0),
|
|
1284
|
+
"recent": post_stats.get("recent", 0),
|
|
1285
|
+
"active": post_stats.get("active", 0),
|
|
1286
|
+
"removed": post_stats.get("removed", 0),
|
|
1287
|
+
# Lifetime engagement across ALL posts for this project (kept for context).
|
|
1288
|
+
"upvotes": post_stats.get("total_upvotes", 0),
|
|
1289
|
+
"comments": post_stats.get("total_comments", 0),
|
|
1290
|
+
"views": post_stats.get("total_views", 0),
|
|
1291
|
+
# Window-scoped engagement: only posts created in the last `days`.
|
|
1292
|
+
"upvotes_recent": eng_recent["upvotes"],
|
|
1293
|
+
"comments_recent": eng_recent["comments"],
|
|
1294
|
+
"views_recent": eng_recent["views"] if eng_recent["views_posts"] > 0 else None,
|
|
1295
|
+
# post_clicks_recent: SUM of post_links.clicks for short links
|
|
1296
|
+
# minted for posts in this project's window. Pre-2026-05-07 rows
|
|
1297
|
+
# may include bot prefetches; post-2026-05-07 rows are humans-only
|
|
1298
|
+
# (Twitter card / LinkedIn unfurl / Slack preview filtered at the
|
|
1299
|
+
# resolver via post_link_clicks.is_bot). See server.js /api/top.
|
|
1300
|
+
"post_clicks_recent": eng_recent["post_clicks"],
|
|
1301
|
+
# Period totals: engagement GAINED during the window across ALL
|
|
1302
|
+
# posts (regardless of posted_at), mirroring the Trends-tab
|
|
1303
|
+
# /api/{views,upvotes,comments,clicks}/per-day SUM. The dashboard
|
|
1304
|
+
# renders each as "<scoped> (<period_total>)" in gray brackets.
|
|
1305
|
+
# post_clicks_period_total counts post_link_clicks (is_bot=FALSE)
|
|
1306
|
+
# in the window joined to this project's posts.
|
|
1307
|
+
"upvotes_period_total": eng_period_total["upvotes"],
|
|
1308
|
+
"comments_period_total": eng_period_total["comments"],
|
|
1309
|
+
"views_period_total": eng_period_total["views"],
|
|
1310
|
+
"post_clicks_period_total": eng_period_total["post_clicks"],
|
|
1311
|
+
},
|
|
1312
|
+
"seo": {"pages_recent": seo_pages_recent},
|
|
1313
|
+
"platforms": platforms,
|
|
1314
|
+
"posthog": posthog,
|
|
1315
|
+
"bookings": bookings,
|
|
1316
|
+
"funnel": {
|
|
1317
|
+
"pageviews": pvs,
|
|
1318
|
+
"cta_clicks": ctas,
|
|
1319
|
+
"email_signups": email_signups,
|
|
1320
|
+
"schedule_clicks": schedule_clicks,
|
|
1321
|
+
"get_started_clicks": get_started_clicks,
|
|
1322
|
+
"cross_product_clicks": cross_product_clicks,
|
|
1323
|
+
"real_bookings": real,
|
|
1324
|
+
"dm_clicks": dm_clicks,
|
|
1325
|
+
"dm_bookings": dm_bookings,
|
|
1326
|
+
# Attributed signups on the client's product (Amplitude), filtered
|
|
1327
|
+
# by the UTM source we forward (config.json projects[].amplitude).
|
|
1328
|
+
# null when the project has no `amplitude` block or the fetch
|
|
1329
|
+
# fails — dashboard falls back to get_started_clicks.
|
|
1330
|
+
"amplitude_signups": amplitude_signups,
|
|
1331
|
+
# Filter shape (e.g. {"utm_source": "studyly.io"}) for tooltip;
|
|
1332
|
+
# null when the project has no `amplitude` block.
|
|
1333
|
+
"amplitude_filter": (proj.get("amplitude") or {}).get("attribution_filter") if proj.get("amplitude") else None,
|
|
1334
|
+
"ctr_pct": ctr,
|
|
1335
|
+
"conv_pct": conv,
|
|
1336
|
+
# Domain-wide siblings: the dashboard shows each as "<scoped>
|
|
1337
|
+
# (<domain>)" so "0 pv for mk0r" doesn't hide 62 real visits
|
|
1338
|
+
# that happened to land on older pages.
|
|
1339
|
+
"domain_pageviews": dw_pv_out,
|
|
1340
|
+
"domain_email_signups": dw_signups_out,
|
|
1341
|
+
"domain_schedule_clicks": dw_sched_out,
|
|
1342
|
+
"domain_get_started_clicks": dw_get_started_out,
|
|
1343
|
+
},
|
|
1344
|
+
"analytics_error": analytics_error,
|
|
1345
|
+
"analytics_suspected_broken": analytics_suspected_broken,
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
|
|
1349
|
+
def main():
|
|
1350
|
+
import argparse
|
|
1351
|
+
parser = argparse.ArgumentParser()
|
|
1352
|
+
parser.add_argument("--days", type=int, default=1)
|
|
1353
|
+
parser.add_argument("--project", help="Filter to a single project name")
|
|
1354
|
+
parser.add_argument(
|
|
1355
|
+
"--platform",
|
|
1356
|
+
default="",
|
|
1357
|
+
help=(
|
|
1358
|
+
"Filter to a single platform (twitter|reddit|linkedin|github|moltbook). "
|
|
1359
|
+
"'x' is folded into 'twitter'. Empty / 'all' = no filter. "
|
|
1360
|
+
"Matches the same normalization used by /api/style/stats."
|
|
1361
|
+
),
|
|
1362
|
+
)
|
|
1363
|
+
parser.add_argument(
|
|
1364
|
+
"--posts-only",
|
|
1365
|
+
action="store_true",
|
|
1366
|
+
help=(
|
|
1367
|
+
"Emit ONLY the per-project posts.* engagement counters; skip the "
|
|
1368
|
+
"PostHog batch (pageviews/CTAs), the bookings DB, Amplitude, and "
|
|
1369
|
+
"SEO page counts. Drops the python runtime from ~30s+ to ~1s. "
|
|
1370
|
+
"Used by /api/funnel/stats as a fast overlay path when the "
|
|
1371
|
+
"dashboard's platform pill changes — those slow sources are "
|
|
1372
|
+
"platform-independent so the all-platform snapshot's values for "
|
|
1373
|
+
"them stay correct, and only the engagement columns need to "
|
|
1374
|
+
"react to the filter."
|
|
1375
|
+
),
|
|
1376
|
+
)
|
|
1377
|
+
args = parser.parse_args()
|
|
1378
|
+
|
|
1379
|
+
# Normalize platform early; pass empty string when no filter so build_project_entry
|
|
1380
|
+
# can splat it unconditionally without spreading the alias logic everywhere.
|
|
1381
|
+
platform = _normalize_platform(args.platform)
|
|
1382
|
+
# Safety: enforce the same regex /api/funnel/stats accepts so a bad CLI
|
|
1383
|
+
# value can't smuggle SQL through _platform_sql_clause.
|
|
1384
|
+
if platform and not re.match(r"^[a-z0-9_]{1,32}$", platform):
|
|
1385
|
+
print(json.dumps({"error": f"invalid platform: {args.platform!r}"}), file=sys.stdout)
|
|
1386
|
+
sys.exit(1)
|
|
1387
|
+
|
|
1388
|
+
ps.load_env()
|
|
1389
|
+
env = os.environ
|
|
1390
|
+
config = ps.load_config()
|
|
1391
|
+
|
|
1392
|
+
api_key = env.get("POSTHOG_PERSONAL_API_KEY")
|
|
1393
|
+
project_id = env.get("POSTHOG_PROJECT_ID", "330744")
|
|
1394
|
+
bookings_db_url = env.get("BOOKINGS_DATABASE_URL")
|
|
1395
|
+
|
|
1396
|
+
_bridge_per_project_posthog_keys_from_keychain(config, env)
|
|
1397
|
+
|
|
1398
|
+
# Fast path: --posts-only skips the slow PostHog/Amplitude/bookings work
|
|
1399
|
+
# and emits ONLY the per-project posts.* counters. Used as a low-latency
|
|
1400
|
+
# overlay on top of the cached all-platform snapshot when the dashboard's
|
|
1401
|
+
# platform pill changes (see /api/funnel/stats in bin/server.js). Runs
|
|
1402
|
+
# in ~1s instead of ~30s because there are no external HTTP calls AND
|
|
1403
|
+
# the per-project SQL is collapsed into 3 batched GROUP BY queries
|
|
1404
|
+
# (the naive per-project loop pays N x ~180ms Postgres round-trip).
|
|
1405
|
+
if args.posts_only:
|
|
1406
|
+
# posts-only batch: all 8 batched GROUP BY / synthetic queries now run
|
|
1407
|
+
# server-side at /api/v1/stats/posts-batch (HTTP-only). The dicts below
|
|
1408
|
+
# are keyed by project_name with the synthetic NULL bucket under
|
|
1409
|
+
# SYNTHETIC_NO_PROJECT_NAME, matching the prior local computation.
|
|
1410
|
+
from http_api import api_get
|
|
1411
|
+
_batch = (api_get("/api/v1/stats/posts-batch",
|
|
1412
|
+
query={"days": int(args.days), "platform": platform}).get("data") or {})
|
|
1413
|
+
lifetime = _batch.get("lifetime") or {}
|
|
1414
|
+
windowed = _batch.get("windowed") or {}
|
|
1415
|
+
period = _batch.get("period") or {}
|
|
1416
|
+
period_clicks = _batch.get("period_clicks") or {}
|
|
1417
|
+
|
|
1418
|
+
# Project list: real projects from config + the synthetic NULL bucket.
|
|
1419
|
+
proj_list = list(config.get("projects", [])) + [{"name": SYNTHETIC_NO_PROJECT_NAME}]
|
|
1420
|
+
out_projects = []
|
|
1421
|
+
for proj in proj_list:
|
|
1422
|
+
name = proj["name"]
|
|
1423
|
+
if args.project and args.project.lower() != name.lower():
|
|
1424
|
+
continue
|
|
1425
|
+
life = lifetime.get(name) or {}
|
|
1426
|
+
w = windowed.get(name) or {"upvotes": 0, "comments": 0, "views": 0, "views_posts": 0, "post_clicks": 0}
|
|
1427
|
+
pe = period.get(name) or {"upvotes": 0, "comments": 0, "views": 0}
|
|
1428
|
+
out_projects.append({
|
|
1429
|
+
"name": name,
|
|
1430
|
+
"posts": {
|
|
1431
|
+
"total": int(life.get("total", 0)),
|
|
1432
|
+
"recent": int(life.get("recent", 0)),
|
|
1433
|
+
"active": int(life.get("active", 0)),
|
|
1434
|
+
"removed": int(life.get("removed", 0)),
|
|
1435
|
+
"upvotes": int(life.get("total_upvotes", 0)),
|
|
1436
|
+
"comments": int(life.get("total_comments", 0)),
|
|
1437
|
+
"views": int(life.get("total_views", 0)),
|
|
1438
|
+
"upvotes_recent": w["upvotes"],
|
|
1439
|
+
"comments_recent": w["comments"],
|
|
1440
|
+
"views_recent": w["views"] if w["views_posts"] > 0 else None,
|
|
1441
|
+
"post_clicks_recent": w["post_clicks"],
|
|
1442
|
+
"upvotes_period_total": pe["upvotes"],
|
|
1443
|
+
"comments_period_total": pe["comments"],
|
|
1444
|
+
"views_period_total": pe["views"],
|
|
1445
|
+
"post_clicks_period_total": int(period_clicks.get(name, 0)),
|
|
1446
|
+
},
|
|
1447
|
+
})
|
|
1448
|
+
print(json.dumps({
|
|
1449
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
1450
|
+
"days": args.days,
|
|
1451
|
+
"platform": platform or "all",
|
|
1452
|
+
"posts_only": True,
|
|
1453
|
+
"projects": out_projects,
|
|
1454
|
+
}))
|
|
1455
|
+
return
|
|
1456
|
+
|
|
1457
|
+
if not api_key:
|
|
1458
|
+
print(json.dumps({"error": "POSTHOG_PERSONAL_API_KEY not set"}), file=sys.stdout)
|
|
1459
|
+
sys.exit(1)
|
|
1460
|
+
|
|
1461
|
+
# Per-project main-DB + bookings-DB stats now come from HTTP endpoints
|
|
1462
|
+
# (build_project_entry calls /api/v1/stats/project-detail). No direct
|
|
1463
|
+
# Postgres connection is opened here anymore (HTTP-only, 2026-06-01).
|
|
1464
|
+
selected_projects = []
|
|
1465
|
+
for proj in config.get("projects", []):
|
|
1466
|
+
name = proj["name"]
|
|
1467
|
+
if args.project and args.project.lower() != name.lower():
|
|
1468
|
+
continue
|
|
1469
|
+
selected_projects.append(proj)
|
|
1470
|
+
|
|
1471
|
+
# Synthetic '(no project)' bucket: surfaces posts.project_name IS NULL rows
|
|
1472
|
+
# (e.g. IG drafts that landed without a project tag) so the funnel total
|
|
1473
|
+
# lines up with /api/style/stats. No website/landing_pages/posthog block,
|
|
1474
|
+
# so get_project_domains() returns [] -> PostHog/SEO/booking lookups all
|
|
1475
|
+
# become no-ops; per-project SQL helpers route through _project_filter_sql
|
|
1476
|
+
# to use `IS NULL` instead of `= name`.
|
|
1477
|
+
if not args.project or args.project.lower() == SYNTHETIC_NO_PROJECT_NAME.lower():
|
|
1478
|
+
selected_projects.append({"name": SYNTHETIC_NO_PROJECT_NAME})
|
|
1479
|
+
|
|
1480
|
+
# Group domains by (api_key, project_id) so we issue one batched set of
|
|
1481
|
+
# HogQL calls per PostHog bucket instead of one-per-domain. Projects that
|
|
1482
|
+
# share a bucket collapse into a single batched fetch; projects with
|
|
1483
|
+
# dedicated credentials run in their own bucket concurrently.
|
|
1484
|
+
after = (datetime.now(timezone.utc) - timedelta(days=args.days)).strftime("%Y-%m-%dT%H:%M:%S")
|
|
1485
|
+
buckets = {}
|
|
1486
|
+
for proj in selected_projects:
|
|
1487
|
+
domains = ps.get_project_domains(proj)
|
|
1488
|
+
if not domains:
|
|
1489
|
+
continue
|
|
1490
|
+
ph_over = proj.get("posthog", {}) or {}
|
|
1491
|
+
ph_key = env.get(ph_over.get("api_key_env", ""), api_key)
|
|
1492
|
+
ph_pid_proj = ph_over.get("project_id", project_id)
|
|
1493
|
+
bucket_domains = buckets.setdefault((ph_key, ph_pid_proj), set())
|
|
1494
|
+
for d in domains:
|
|
1495
|
+
bucket_domains.add(d)
|
|
1496
|
+
|
|
1497
|
+
# One batched fetch per bucket. When a batch fails after retries, mark
|
|
1498
|
+
# every domain in that bucket as errored rather than rendering zeros.
|
|
1499
|
+
#
|
|
1500
|
+
# Concurrency is capped low (2) on purpose: PostHog's query endpoint
|
|
1501
|
+
# enforces a short-window burst limit (429 "throttled", recovery 1-12s),
|
|
1502
|
+
# and the personal API key is shared across most buckets. Firing 8
|
|
1503
|
+
# buckets at once (each ~10 sequential HogQL queries) created a
|
|
1504
|
+
# thundering herd that all hit the limiter together, all backed off
|
|
1505
|
+
# together, and re-collided on retry until the 4 attempts were
|
|
1506
|
+
# exhausted, marking whole buckets errored ('err' on the dashboard for
|
|
1507
|
+
# every project sharing them). Two-at-a-time keeps us under the burst
|
|
1508
|
+
# ceiling while the Retry-After-honoring backoff in _hogql absorbs the
|
|
1509
|
+
# occasional 429.
|
|
1510
|
+
ph_results = {}
|
|
1511
|
+
if buckets:
|
|
1512
|
+
pool_size = max(1, min(2, len(buckets)))
|
|
1513
|
+
with ThreadPoolExecutor(max_workers=pool_size) as ex:
|
|
1514
|
+
futs = {
|
|
1515
|
+
ex.submit(_ph_batch_counts, k, pid, sorted(ds), after): (k, pid, ds)
|
|
1516
|
+
for (k, pid), ds in buckets.items()
|
|
1517
|
+
}
|
|
1518
|
+
for fut, (k, pid, ds) in futs.items():
|
|
1519
|
+
try:
|
|
1520
|
+
per_domain = fut.result()
|
|
1521
|
+
for d, stats in per_domain.items():
|
|
1522
|
+
ph_results[(k, pid, d)] = stats
|
|
1523
|
+
except HogqlError as e:
|
|
1524
|
+
msg = f"PostHog unavailable: {e}"
|
|
1525
|
+
print(f" PostHog batch error (pid={pid}): {e}", file=sys.stderr)
|
|
1526
|
+
for d in ds:
|
|
1527
|
+
ph_results[(k, pid, d)] = _empty_domain_stats(d, error=msg)
|
|
1528
|
+
except Exception as e:
|
|
1529
|
+
msg = f"PostHog batch error: {e}"
|
|
1530
|
+
print(f" PostHog batch unexpected error (pid={pid}): {e}", file=sys.stderr)
|
|
1531
|
+
for d in ds:
|
|
1532
|
+
ph_results[(k, pid, d)] = _empty_domain_stats(d, error=msg)
|
|
1533
|
+
|
|
1534
|
+
out_projects = []
|
|
1535
|
+
for proj in selected_projects:
|
|
1536
|
+
name = proj["name"]
|
|
1537
|
+
try:
|
|
1538
|
+
out_projects.append(build_project_entry(
|
|
1539
|
+
proj, args.days, api_key, project_id, env, ph_results,
|
|
1540
|
+
platform=platform,
|
|
1541
|
+
))
|
|
1542
|
+
except Exception as e:
|
|
1543
|
+
out_projects.append({"name": name, "error": str(e)})
|
|
1544
|
+
|
|
1545
|
+
# `overall.recent` also respects the platform filter so the dashboard's
|
|
1546
|
+
# "N project(s)" / total header stays self-consistent with the per-row data.
|
|
1547
|
+
from http_api import api_get
|
|
1548
|
+
_overall = (api_get("/api/v1/stats/posts-overall",
|
|
1549
|
+
query={"days": int(args.days), "platform": platform or ""}).get("data") or {})
|
|
1550
|
+
total_all = int(_overall.get("total") or 0)
|
|
1551
|
+
total_recent = int(_overall.get("recent") or 0)
|
|
1552
|
+
|
|
1553
|
+
print(json.dumps({
|
|
1554
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
1555
|
+
"days": args.days,
|
|
1556
|
+
"platform": platform or "all",
|
|
1557
|
+
"projects": out_projects,
|
|
1558
|
+
"overall": {"total": total_all, "recent": total_recent},
|
|
1559
|
+
}))
|
|
1560
|
+
|
|
1561
|
+
|
|
1562
|
+
if __name__ == "__main__":
|
|
1563
|
+
main()
|