@m13v/s4l 1.6.197-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1314 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +497 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""harvest_twitter_following.py — cache the list of accounts WE follow on X.
|
|
3
|
+
|
|
4
|
+
The Twitter reply pipeline (score_twitter_candidates.py) drops candidate threads
|
|
5
|
+
whose author is someone we already follow. fxtwitter can't supply that edge — it's
|
|
6
|
+
an unauthenticated public API with no concept of "us" — so the follow relationship
|
|
7
|
+
has to be read from our own logged-in session. This script scrapes
|
|
8
|
+
`x.com/<handle>/following` via the harness Chrome (CDP, port 9555, same browser the
|
|
9
|
+
cycle uses) and uploads the set to /api/v1/followed-accounts.
|
|
10
|
+
|
|
11
|
+
Read-only: ONE navigation + DOM reads + scrolls. No clicks, no posting, no
|
|
12
|
+
/voyager. Runs under the shared "twitter-browser" lock (held by the shell wrapper
|
|
13
|
+
skill/refresh-twitter-following.sh) so it never races a live cycle.
|
|
14
|
+
|
|
15
|
+
Completeness guard: we only upload when the scroll reached the end of the list
|
|
16
|
+
(the deduped set stopped growing for STABLE_PASSES passes). A partial scrape is
|
|
17
|
+
discarded, never uploaded — otherwise the un-scrolled tail would wrongly age out
|
|
18
|
+
of the server's freshness window.
|
|
19
|
+
|
|
20
|
+
Usage:
|
|
21
|
+
python3 scripts/harvest_twitter_following.py # scrape + upload
|
|
22
|
+
python3 scripts/harvest_twitter_following.py --dry-run # scrape + print, no upload
|
|
23
|
+
python3 scripts/harvest_twitter_following.py --out /tmp/following.json
|
|
24
|
+
"""
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import json
|
|
29
|
+
import os
|
|
30
|
+
import sys
|
|
31
|
+
import time
|
|
32
|
+
|
|
33
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
34
|
+
|
|
35
|
+
CDP_URL = os.environ.get("TWITTER_CDP_URL", "http://127.0.0.1:9555").strip()
|
|
36
|
+
PLATFORM = "twitter"
|
|
37
|
+
|
|
38
|
+
# Scroll/scrape tuning (env-overridable for slow boxes / very large lists).
|
|
39
|
+
STABLE_PASSES = int(os.environ.get("FOLLOW_HARVEST_STABLE_PASSES", "5"))
|
|
40
|
+
MAX_PASSES = int(os.environ.get("FOLLOW_HARVEST_MAX_PASSES", "800"))
|
|
41
|
+
PAUSE_MS = int(os.environ.get("FOLLOW_HARVEST_PAUSE_MS", "900"))
|
|
42
|
+
UPLOAD_CHUNK = int(os.environ.get("FOLLOW_HARVEST_UPLOAD_CHUNK", "1000"))
|
|
43
|
+
|
|
44
|
+
# Each row on the Following tab is a [data-testid="UserCell"]. The profile link
|
|
45
|
+
# href is exactly `/<screen_name>`; grab the first anchor matching that shape
|
|
46
|
+
# (X handles are 1-15 chars of [A-Za-z0-9_]) that isn't a reserved app route.
|
|
47
|
+
SCRAPE_JS = r"""
|
|
48
|
+
(() => {
|
|
49
|
+
const RESERVED = new Set(['home','explore','notifications','messages','i',
|
|
50
|
+
'settings','search','compose','hashtag','intent','login','signup','tos',
|
|
51
|
+
'privacy','about']);
|
|
52
|
+
const cells = Array.from(document.querySelectorAll('[data-testid="UserCell"]'));
|
|
53
|
+
const out = [];
|
|
54
|
+
for (const c of cells) {
|
|
55
|
+
let handle = null;
|
|
56
|
+
for (const a of c.querySelectorAll('a[href^="/"]')) {
|
|
57
|
+
const m = (a.getAttribute('href') || '').match(/^\/([A-Za-z0-9_]{1,15})$/);
|
|
58
|
+
if (m && !RESERVED.has(m[1].toLowerCase())) { handle = m[1]; break; }
|
|
59
|
+
}
|
|
60
|
+
if (!handle) continue;
|
|
61
|
+
let name = null;
|
|
62
|
+
const un = c.querySelector('[data-testid="User-Name"]');
|
|
63
|
+
if (un) {
|
|
64
|
+
// User-Name mashes "Display Name@handle…"; the display name is the text
|
|
65
|
+
// before the first '@'.
|
|
66
|
+
name = ((un.textContent || '').split('@')[0]).trim().slice(0, 120) || null;
|
|
67
|
+
}
|
|
68
|
+
out.push({ screen_name: handle, name });
|
|
69
|
+
}
|
|
70
|
+
return JSON.stringify(out);
|
|
71
|
+
})()
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _resolve_handle() -> str:
|
|
76
|
+
try:
|
|
77
|
+
import account_resolver
|
|
78
|
+
h = account_resolver.resolve("twitter")
|
|
79
|
+
if h:
|
|
80
|
+
return h.lstrip("@").strip().lower()
|
|
81
|
+
except Exception as e:
|
|
82
|
+
print(f"[harvest] account_resolver failed ({e})", file=sys.stderr)
|
|
83
|
+
# No hardcoded fallback: stamping a default handle here silently harvests /
|
|
84
|
+
# attributes under the wrong account. Refuse to run instead so the missing
|
|
85
|
+
# config surfaces. Run connect_x (or `setup_twitter_auth.py resolve-handle`)
|
|
86
|
+
# to persist accounts.twitter.handle.
|
|
87
|
+
print("[harvest] no Twitter handle configured (accounts.twitter.handle / "
|
|
88
|
+
"AUTOPOSTER_TWITTER_HANDLE); refusing to run to avoid wrong-account "
|
|
89
|
+
"attribution.", file=sys.stderr)
|
|
90
|
+
sys.exit(1)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _looks_logged_out(url: str) -> bool:
|
|
94
|
+
u = (url or "").lower()
|
|
95
|
+
return ("/login" in u) or ("i/flow/login" in u) or ("/account/access" in u)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def scrape_following(handle: str) -> tuple[dict, bool]:
|
|
99
|
+
"""Return (handle->name dict, complete). complete=True means the scroll
|
|
100
|
+
reached the end (set stopped growing) rather than hitting the pass cap."""
|
|
101
|
+
from playwright.sync_api import sync_playwright
|
|
102
|
+
|
|
103
|
+
seen: dict[str, str] = {}
|
|
104
|
+
complete = False
|
|
105
|
+
with sync_playwright() as p:
|
|
106
|
+
browser = p.chromium.connect_over_cdp(CDP_URL)
|
|
107
|
+
contexts = browser.contexts
|
|
108
|
+
if not contexts:
|
|
109
|
+
raise RuntimeError("no browser context on harness Chrome — is it logged in?")
|
|
110
|
+
context = contexts[0]
|
|
111
|
+
# Reuse an existing tab (tab hygiene); fall back to a fresh page.
|
|
112
|
+
page = context.pages[0] if context.pages else context.new_page()
|
|
113
|
+
|
|
114
|
+
url = f"https://x.com/{handle}/following"
|
|
115
|
+
page.goto(url, wait_until="domcontentloaded", timeout=45000)
|
|
116
|
+
page.wait_for_timeout(2500)
|
|
117
|
+
|
|
118
|
+
if _looks_logged_out(page.url):
|
|
119
|
+
raise RuntimeError(f"session looks logged out (url={page.url})")
|
|
120
|
+
|
|
121
|
+
# Wait for at least one row to render before scrolling.
|
|
122
|
+
try:
|
|
123
|
+
page.wait_for_selector('[data-testid="UserCell"]', timeout=20000)
|
|
124
|
+
except Exception:
|
|
125
|
+
# No cells at all — empty list, protected, or a block page. Treat as
|
|
126
|
+
# incomplete so we never upload an empty/partial set.
|
|
127
|
+
print(f"[harvest] no UserCell rendered for @{handle} (url={page.url})",
|
|
128
|
+
file=sys.stderr)
|
|
129
|
+
return seen, False
|
|
130
|
+
|
|
131
|
+
last = 0
|
|
132
|
+
stable = 0
|
|
133
|
+
for i in range(MAX_PASSES):
|
|
134
|
+
try:
|
|
135
|
+
raw = page.evaluate(SCRAPE_JS)
|
|
136
|
+
rows = json.loads(raw) if isinstance(raw, str) else (raw or [])
|
|
137
|
+
except Exception as e:
|
|
138
|
+
print(f"[harvest] evaluate failed on pass {i} ({e})", file=sys.stderr)
|
|
139
|
+
rows = []
|
|
140
|
+
for r in rows:
|
|
141
|
+
sn = (r.get("screen_name") or "").strip().lower()
|
|
142
|
+
if not sn or sn == handle: # never list ourselves
|
|
143
|
+
continue
|
|
144
|
+
if sn not in seen:
|
|
145
|
+
seen[sn] = r.get("name") or ""
|
|
146
|
+
|
|
147
|
+
if len(seen) == last:
|
|
148
|
+
stable += 1
|
|
149
|
+
if stable >= STABLE_PASSES:
|
|
150
|
+
complete = True
|
|
151
|
+
break
|
|
152
|
+
else:
|
|
153
|
+
stable = 0
|
|
154
|
+
last = len(seen)
|
|
155
|
+
|
|
156
|
+
page.evaluate(
|
|
157
|
+
"window.scrollBy(0, Math.round(document.documentElement.clientHeight * 0.85));"
|
|
158
|
+
)
|
|
159
|
+
page.wait_for_timeout(PAUSE_MS)
|
|
160
|
+
|
|
161
|
+
# Disconnect the CDP client without closing the shared Chrome/tab.
|
|
162
|
+
try:
|
|
163
|
+
browser.close()
|
|
164
|
+
except Exception:
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
print(
|
|
168
|
+
f"[harvest] @{handle}: collected {len(seen)} followed handles "
|
|
169
|
+
f"(complete={complete}, passes_stable={stable}/{STABLE_PASSES})",
|
|
170
|
+
file=sys.stderr,
|
|
171
|
+
)
|
|
172
|
+
return seen, complete
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def upload(handle: str, seen: dict) -> int:
|
|
176
|
+
from http_api import api_post
|
|
177
|
+
|
|
178
|
+
accounts = [{"handle": h, "name": n} for h, n in seen.items()]
|
|
179
|
+
posted = 0
|
|
180
|
+
for i in range(0, len(accounts), UPLOAD_CHUNK):
|
|
181
|
+
chunk = accounts[i:i + UPLOAD_CHUNK]
|
|
182
|
+
api_post(
|
|
183
|
+
"/api/v1/followed-accounts",
|
|
184
|
+
{
|
|
185
|
+
"platform": PLATFORM,
|
|
186
|
+
"our_account": handle,
|
|
187
|
+
"accounts": chunk,
|
|
188
|
+
"complete": True,
|
|
189
|
+
},
|
|
190
|
+
)
|
|
191
|
+
posted += len(chunk)
|
|
192
|
+
return posted
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def main() -> int:
|
|
196
|
+
parser = argparse.ArgumentParser()
|
|
197
|
+
parser.add_argument("--dry-run", action="store_true",
|
|
198
|
+
help="Scrape and report but do not upload.")
|
|
199
|
+
parser.add_argument("--out", help="Also write the scraped set to this JSON path.")
|
|
200
|
+
parser.add_argument("--handle", help="Override the resolved posting handle.")
|
|
201
|
+
args = parser.parse_args()
|
|
202
|
+
|
|
203
|
+
handle = (args.handle or _resolve_handle()).lstrip("@").strip().lower()
|
|
204
|
+
print(f"[harvest] resolving following list for @{handle} via {CDP_URL}",
|
|
205
|
+
file=sys.stderr)
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
seen, complete = scrape_following(handle)
|
|
209
|
+
except Exception as e:
|
|
210
|
+
print(f"[harvest] FAILED: {e}", file=sys.stderr)
|
|
211
|
+
return 1
|
|
212
|
+
|
|
213
|
+
if args.out:
|
|
214
|
+
try:
|
|
215
|
+
with open(args.out, "w") as fh:
|
|
216
|
+
json.dump({"handle": handle, "complete": complete,
|
|
217
|
+
"accounts": seen}, fh, indent=2)
|
|
218
|
+
print(f"[harvest] wrote scrape to {args.out}", file=sys.stderr)
|
|
219
|
+
except OSError as e:
|
|
220
|
+
print(f"[harvest] could not write {args.out}: {e}", file=sys.stderr)
|
|
221
|
+
|
|
222
|
+
if not seen:
|
|
223
|
+
print("[harvest] scraped 0 handles; nothing to upload.", file=sys.stderr)
|
|
224
|
+
return 2
|
|
225
|
+
if not complete:
|
|
226
|
+
print(
|
|
227
|
+
f"[harvest] scrape INCOMPLETE (hit {MAX_PASSES}-pass cap at "
|
|
228
|
+
f"{len(seen)} handles); NOT uploading, to avoid aging out the "
|
|
229
|
+
f"un-scrolled tail. Re-run will retry.",
|
|
230
|
+
file=sys.stderr,
|
|
231
|
+
)
|
|
232
|
+
return 3
|
|
233
|
+
if args.dry_run:
|
|
234
|
+
print(f"[harvest] dry-run: would upload {len(seen)} handles for @{handle}.")
|
|
235
|
+
return 0
|
|
236
|
+
|
|
237
|
+
posted = upload(handle, seen)
|
|
238
|
+
print(f"[harvest] uploaded {posted} followed handles for @{handle}.")
|
|
239
|
+
return 0
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
if __name__ == "__main__":
|
|
243
|
+
sys.exit(main())
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Continuous heartbeat to /api/v1/installations/heartbeat.
|
|
3
|
+
#
|
|
4
|
+
# Independent of any reply traffic: even when github-engage is quiet, this
|
|
5
|
+
# proves the install lane (identity.py + Vercel + Postgres) is round-tripping.
|
|
6
|
+
# A gap in installations.last_seen_at on the server is a leading signal of
|
|
7
|
+
# Vercel outage / DNS / cert / migration drift before any user-facing
|
|
8
|
+
# pipeline notices.
|
|
9
|
+
#
|
|
10
|
+
# Schedule: every 15 minutes via launchd.
|
|
11
|
+
# Logs: ~/social-autoposter/skill/logs/heartbeat-*.log
|
|
12
|
+
|
|
13
|
+
set -euo pipefail
|
|
14
|
+
|
|
15
|
+
# SAPS_->S4L_ env mirror (brand rename 2026-07-03): old plists/tasks still
|
|
16
|
+
# export SAPS_*; new code reads S4L_*. Copy names, never values via eval.
|
|
17
|
+
while IFS='=' read -r _k _; do
|
|
18
|
+
case "$_k" in SAPS_*) _n="S4L_${_k#SAPS_}"; eval "[ -n \"\${$_n+x}\" ] || export $_n=\"\${$_k}\"";; esac
|
|
19
|
+
done <<EOF_ENV
|
|
20
|
+
$(env | grep '^SAPS_' | cut -d= -f1 | sed 's/$/=/')
|
|
21
|
+
EOF_ENV
|
|
22
|
+
|
|
23
|
+
REPO_DIR="${REPO_DIR:-$HOME/social-autoposter}"
|
|
24
|
+
BASE_URL="${AUTOPOSTER_API_BASE:-https://s4l.ai}"
|
|
25
|
+
LOG_DIR="$REPO_DIR/skill/logs"
|
|
26
|
+
LOG_FILE="$LOG_DIR/heartbeat.log"
|
|
27
|
+
|
|
28
|
+
mkdir -p "$LOG_DIR"
|
|
29
|
+
|
|
30
|
+
ts() { date -u "+%Y-%m-%dT%H:%M:%SZ"; }
|
|
31
|
+
log() { printf '[%s] %s\n' "$(ts)" "$1" >> "$LOG_FILE"; }
|
|
32
|
+
|
|
33
|
+
PYTHON_BIN="${PYTHON_BIN:-/usr/bin/python3}"
|
|
34
|
+
HDR=$("$PYTHON_BIN" "$REPO_DIR/scripts/identity.py" header 2>>"$LOG_FILE") || {
|
|
35
|
+
log "FAIL identity.py header non-zero"
|
|
36
|
+
exit 1
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Attach the S4L autopilot scheduled-task folder state (parity with the .mcpb
|
|
40
|
+
# heartbeat) so the server can tell centrally whether the queue-worker tasks are
|
|
41
|
+
# running from ~/.s4l-worker or are still mislocated. Best-effort: any failure
|
|
42
|
+
# falls back to an empty body so the heartbeat itself never depends on it.
|
|
43
|
+
BODY='{}'
|
|
44
|
+
if ST=$("$PYTHON_BIN" "$REPO_DIR/scripts/scheduled_tasks_snapshot.py" --summary 2>>"$LOG_FILE"); then
|
|
45
|
+
if [ -n "$ST" ]; then
|
|
46
|
+
BODY="{\"scheduled_tasks\":$ST}"
|
|
47
|
+
fi
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
# POST so the server can refresh the volatile fields (last_ip, last_seen_at).
|
|
51
|
+
RESP=$(curl -fsS -m 20 \
|
|
52
|
+
-X POST \
|
|
53
|
+
-H "X-Installation: $HDR" \
|
|
54
|
+
-H "content-type: application/json" \
|
|
55
|
+
-d "$BODY" \
|
|
56
|
+
-w "\n__HTTP__%{http_code}__%{time_total}s" \
|
|
57
|
+
"$BASE_URL/api/v1/installations/heartbeat" 2>>"$LOG_FILE") || {
|
|
58
|
+
log "FAIL curl exit=$?"
|
|
59
|
+
exit 1
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
CODE=$(echo "$RESP" | sed -n 's/.*__HTTP__\([0-9]*\)__.*/\1/p')
|
|
63
|
+
DUR=$(echo "$RESP" | sed -n 's/.*__HTTP__[0-9]*__\(.*\)/\1/p')
|
|
64
|
+
|
|
65
|
+
if [ "$CODE" = "200" ]; then
|
|
66
|
+
log "ok 200 ${DUR}"
|
|
67
|
+
else
|
|
68
|
+
log "FAIL http=$CODE dur=$DUR"
|
|
69
|
+
exit 1
|
|
70
|
+
fi
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
history_context.py -- PROTOTYPE: consent-gated, cwd-scoped pull of prior Claude
|
|
4
|
+
Code conversation context to prefill S4L onboarding fields.
|
|
5
|
+
|
|
6
|
+
Design (agreed 2026-07-02):
|
|
7
|
+
* READ-ONLY. Never mutates Claude session state (no archive, no writes to the
|
|
8
|
+
desktop app's session store).
|
|
9
|
+
* Consent-gated. Requires a one-time opt-in. The gate is checked on every call;
|
|
10
|
+
without it the pull refuses and returns nothing.
|
|
11
|
+
* cwd-scoped. Only reads sessions whose working dir is under the product's own
|
|
12
|
+
repos (local_repo + landing_pages.repo from config.json). This is BOTH the
|
|
13
|
+
relevance filter and the privacy boundary -- other clients' repos are never
|
|
14
|
+
touched.
|
|
15
|
+
* Enrichment only. Output is CANDIDATE fields for the user to confirm, never
|
|
16
|
+
saved silently.
|
|
17
|
+
|
|
18
|
+
Sources (all local, no network, no approval card in the running session's mode):
|
|
19
|
+
* ~/.claude/claude_sessions.db FTS5 index over msg_preview/tool_names/cwd
|
|
20
|
+
* ~/.claude/projects/<esc-cwd>/*.jsonl full transcripts, read only on --expand
|
|
21
|
+
|
|
22
|
+
CLI:
|
|
23
|
+
python3 scripts/history_context.py --project fazm
|
|
24
|
+
python3 scripts/history_context.py --project fazm --terms "icp,positioning,voice"
|
|
25
|
+
python3 scripts/history_context.py --project fazm --expand
|
|
26
|
+
python3 scripts/history_context.py --optin-status
|
|
27
|
+
python3 scripts/history_context.py --set-optin yes
|
|
28
|
+
|
|
29
|
+
Production note: the opt-in flag is stored here in a sidecar (~/.claude/
|
|
30
|
+
s4l_history_optin.json) to avoid mutating the live config.json from a prototype.
|
|
31
|
+
In production this becomes a top-level `history_context_optin` key in config.json
|
|
32
|
+
so it's one global, persisted decision reused across every product onboarding.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import argparse
|
|
38
|
+
import json
|
|
39
|
+
import os
|
|
40
|
+
import re
|
|
41
|
+
import sys
|
|
42
|
+
from pathlib import Path
|
|
43
|
+
|
|
44
|
+
HOME = Path.home()
|
|
45
|
+
# NOTE: ~/.claude/claude_sessions.db is NOT a general index -- on this machine it
|
|
46
|
+
# covered only 5 claude-meter sessions. The authoritative, complete source is the
|
|
47
|
+
# per-cwd transcript tree below, so the pull reads that directly (the same
|
|
48
|
+
# card-free file path used to read an archived session).
|
|
49
|
+
PROJECTS_DIR = HOME / ".claude" / "projects"
|
|
50
|
+
CONFIG_PATH = Path(__file__).resolve().parent.parent / "config.json"
|
|
51
|
+
OPTIN_SIDECAR = HOME / ".claude" / "s4l_history_optin.json"
|
|
52
|
+
|
|
53
|
+
MAX_FILES_PER_DIR = 80
|
|
54
|
+
MAX_SNIPPETS_PER_SESSION = 6
|
|
55
|
+
PREVIEW_LEN = 220
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------- consent gate
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def optin_status() -> dict:
|
|
61
|
+
"""Return the persisted one-time opt-in. Absent => not yet asked."""
|
|
62
|
+
if OPTIN_SIDECAR.exists():
|
|
63
|
+
try:
|
|
64
|
+
return json.loads(OPTIN_SIDECAR.read_text())
|
|
65
|
+
except Exception:
|
|
66
|
+
pass
|
|
67
|
+
return {"allowed": None, "ts": None} # None => never asked
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def set_optin(allowed: bool) -> dict:
|
|
71
|
+
rec = {"allowed": bool(allowed), "ts": _now_iso()}
|
|
72
|
+
OPTIN_SIDECAR.write_text(json.dumps(rec, indent=2))
|
|
73
|
+
return rec
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _now_iso() -> str:
|
|
77
|
+
# Prototype-safe: avoid importing datetime.now semantics into workflow harness.
|
|
78
|
+
import datetime
|
|
79
|
+
|
|
80
|
+
return datetime.datetime.now().replace(microsecond=0).isoformat()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ------------------------------------------------------------- scope resolution
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def load_config() -> dict:
|
|
87
|
+
return json.loads(CONFIG_PATH.read_text())
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def resolve_scope(project_name: str) -> list[str]:
|
|
91
|
+
"""Return absolute repo paths that define this product's cwd scope."""
|
|
92
|
+
cfg = load_config()
|
|
93
|
+
proj = next(
|
|
94
|
+
(p for p in cfg.get("projects", []) if p.get("name") == project_name), None
|
|
95
|
+
)
|
|
96
|
+
if not proj:
|
|
97
|
+
raise SystemExit(f"project '{project_name}' not found in {CONFIG_PATH}")
|
|
98
|
+
paths = []
|
|
99
|
+
for key in ("local_repo",):
|
|
100
|
+
v = proj.get(key)
|
|
101
|
+
if v:
|
|
102
|
+
paths.append(os.path.expanduser(v))
|
|
103
|
+
lp = proj.get("landing_pages") or {}
|
|
104
|
+
if lp.get("repo"):
|
|
105
|
+
paths.append(os.path.expanduser(lp["repo"]))
|
|
106
|
+
# de-dupe, keep order
|
|
107
|
+
seen, out = set(), []
|
|
108
|
+
for p in paths:
|
|
109
|
+
p = p.rstrip("/")
|
|
110
|
+
if p and p not in seen:
|
|
111
|
+
seen.add(p)
|
|
112
|
+
out.append(p)
|
|
113
|
+
return out
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ------------------------------------------------------------------ the lookup
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _escaped_prefix(path: str) -> str:
|
|
120
|
+
# Claude Code encodes a cwd as its path with every '/' and '.' -> '-'.
|
|
121
|
+
return re.sub(r"[/.]", "-", path)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _text_of(msg: dict) -> str:
|
|
125
|
+
c = (msg or {}).get("content")
|
|
126
|
+
if isinstance(c, str):
|
|
127
|
+
return c
|
|
128
|
+
if isinstance(c, list):
|
|
129
|
+
return "\n".join(
|
|
130
|
+
b.get("text", "")
|
|
131
|
+
for b in c
|
|
132
|
+
if isinstance(b, dict) and b.get("type") == "text"
|
|
133
|
+
)
|
|
134
|
+
return ""
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _iter_session_files(scope: list[str]):
|
|
138
|
+
"""Yield (transcript_path, scope_root) for every session dir whose escaped
|
|
139
|
+
name starts with a scope root. The prefix glob can over-match a sibling
|
|
140
|
+
(…-website vs …-websitex); the caller re-checks each file's real cwd field."""
|
|
141
|
+
seen = set()
|
|
142
|
+
for root in scope:
|
|
143
|
+
prefix = _escaped_prefix(root)
|
|
144
|
+
for d in sorted(PROJECTS_DIR.glob(prefix + "*")):
|
|
145
|
+
if not d.is_dir():
|
|
146
|
+
continue
|
|
147
|
+
files = sorted(
|
|
148
|
+
d.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True
|
|
149
|
+
)
|
|
150
|
+
for f in files[:MAX_FILES_PER_DIR]:
|
|
151
|
+
if f in seen:
|
|
152
|
+
continue
|
|
153
|
+
seen.add(f)
|
|
154
|
+
yield f, root
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def pull(project_name: str, terms: list[str] | None = None, limit: int = 40) -> dict:
|
|
158
|
+
"""Consent-gated, cwd-scoped candidate-context pull. Read-only, filesystem."""
|
|
159
|
+
status = optin_status()
|
|
160
|
+
if not status.get("allowed"):
|
|
161
|
+
return {
|
|
162
|
+
"ok": False,
|
|
163
|
+
"reason": "not_opted_in",
|
|
164
|
+
"hint": "run --set-optin yes (or ask the user) before pulling history",
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
scope = resolve_scope(project_name)
|
|
168
|
+
if not scope:
|
|
169
|
+
return {"ok": False, "reason": "no_repo_scope", "project": project_name}
|
|
170
|
+
if not PROJECTS_DIR.exists():
|
|
171
|
+
return {"ok": False, "reason": "no_transcripts_dir", "dir": str(PROJECTS_DIR)}
|
|
172
|
+
|
|
173
|
+
term_list = [t.strip().lower() for t in (terms or []) if t.strip()]
|
|
174
|
+
sessions: dict[str, list] = {}
|
|
175
|
+
snippet_total = 0
|
|
176
|
+
|
|
177
|
+
for f, root in _iter_session_files(scope):
|
|
178
|
+
file_cwd = None
|
|
179
|
+
picks: list[dict] = []
|
|
180
|
+
try:
|
|
181
|
+
with f.open() as fh:
|
|
182
|
+
for line in fh:
|
|
183
|
+
try:
|
|
184
|
+
d = json.loads(line)
|
|
185
|
+
except Exception:
|
|
186
|
+
continue
|
|
187
|
+
if file_cwd is None and d.get("cwd"):
|
|
188
|
+
file_cwd = d["cwd"]
|
|
189
|
+
m = d.get("message") or {}
|
|
190
|
+
role = m.get("role")
|
|
191
|
+
if role not in ("user", "assistant"):
|
|
192
|
+
continue
|
|
193
|
+
txt = _text_of(m).strip()
|
|
194
|
+
# skip tool plumbing / system-injected blocks, keep real prose
|
|
195
|
+
if not txt or txt.startswith("<") or "tool_result" in txt[:24]:
|
|
196
|
+
continue
|
|
197
|
+
if term_list and not any(t in txt.lower() for t in term_list):
|
|
198
|
+
continue
|
|
199
|
+
picks.append({"role": role, "preview": txt[:PREVIEW_LEN]})
|
|
200
|
+
except Exception:
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
# verify the transcript really belongs to this scope root (exact or subdir)
|
|
204
|
+
if not file_cwd or not (file_cwd == root or file_cwd.startswith(root + "/")):
|
|
205
|
+
continue
|
|
206
|
+
if not picks:
|
|
207
|
+
continue
|
|
208
|
+
sessions[f.stem] = picks[-MAX_SNIPPETS_PER_SESSION:] # most recent few
|
|
209
|
+
snippet_total += len(sessions[f.stem])
|
|
210
|
+
if len(sessions) >= limit:
|
|
211
|
+
break
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
"ok": True,
|
|
215
|
+
"project": project_name,
|
|
216
|
+
"scope": scope,
|
|
217
|
+
"session_count": len(sessions),
|
|
218
|
+
"snippet_count": snippet_total,
|
|
219
|
+
"sessions": sessions,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def expand_span(file_path: str, line_no: int, radius: int = 0) -> str:
|
|
224
|
+
"""Read the full message text for a matched preview from its .jsonl line.
|
|
225
|
+
Only invoked on --expand, so full transcripts are never bulk-read."""
|
|
226
|
+
p = Path(file_path)
|
|
227
|
+
if not p.exists():
|
|
228
|
+
return ""
|
|
229
|
+
with p.open() as fh:
|
|
230
|
+
for i, line in enumerate(fh):
|
|
231
|
+
if i == line_no:
|
|
232
|
+
try:
|
|
233
|
+
d = json.loads(line)
|
|
234
|
+
except Exception:
|
|
235
|
+
return ""
|
|
236
|
+
content = (d.get("message") or {}).get("content")
|
|
237
|
+
if isinstance(content, str):
|
|
238
|
+
return content
|
|
239
|
+
if isinstance(content, list):
|
|
240
|
+
return "\n".join(
|
|
241
|
+
b.get("text", "")
|
|
242
|
+
for b in content
|
|
243
|
+
if isinstance(b, dict) and b.get("type") == "text"
|
|
244
|
+
)
|
|
245
|
+
return ""
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# ------------------------------------------------------------------------- CLI
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def main() -> None:
|
|
252
|
+
ap = argparse.ArgumentParser(description=__doc__)
|
|
253
|
+
ap.add_argument("--project")
|
|
254
|
+
ap.add_argument("--terms", help="comma-separated FTS terms")
|
|
255
|
+
ap.add_argument("--limit", type=int, default=40)
|
|
256
|
+
ap.add_argument("--expand", action="store_true", help="read full matched spans")
|
|
257
|
+
ap.add_argument("--optin-status", action="store_true")
|
|
258
|
+
ap.add_argument("--set-optin", choices=["yes", "no"])
|
|
259
|
+
args = ap.parse_args()
|
|
260
|
+
|
|
261
|
+
if args.set_optin:
|
|
262
|
+
print(json.dumps(set_optin(args.set_optin == "yes"), indent=2))
|
|
263
|
+
return
|
|
264
|
+
if args.optin_status:
|
|
265
|
+
print(json.dumps(optin_status(), indent=2))
|
|
266
|
+
return
|
|
267
|
+
if not args.project:
|
|
268
|
+
ap.error("--project is required for a pull")
|
|
269
|
+
|
|
270
|
+
terms = args.terms.split(",") if args.terms else None
|
|
271
|
+
result = pull(args.project, terms, limit=args.limit)
|
|
272
|
+
|
|
273
|
+
if result.get("ok") and args.expand:
|
|
274
|
+
for sess in result["sessions"].values():
|
|
275
|
+
for snip in sess:
|
|
276
|
+
fp, ln = snip.get("file_path"), snip.get("line_no")
|
|
277
|
+
if fp is not None and ln is not None:
|
|
278
|
+
snip["full"] = expand_span(fp, ln)
|
|
279
|
+
|
|
280
|
+
print(json.dumps(result, indent=2)[:8000])
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
if __name__ == "__main__":
|
|
284
|
+
main()
|