x-summary 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,6 +8,154 @@ Summarize your X (Twitter) **Following** (Recent) feed and **For You** suggestio
8
8
  - Google Chrome (Playwright `channel: "chrome"`)
9
9
  - API key(s) for your chosen LLM provider
10
10
 
11
+ ## Sending reports
12
+
13
+ Use a tool such as [run-and-notify](https://github.com/barbieri/run-and-notify) to send the processed output to Slack or Email.
14
+
15
+ The example bellow is my setup that notifies me via Email and Slack at 6am/pm (12 hour interval = 720 minutes), it assumes code was checkout and installed at `$HOME/git/x-summary` and `$HOME/git/run-and-notify`:
16
+
17
+ ### Setup the environment variables
18
+
19
+ Create `$HOME/git/x-summary/tmp/env.sh`
20
+
21
+ ``` bash
22
+ # .env
23
+ export LOG_LEVEL=warn
24
+ # Choose your notification delivery channel
25
+ export SMTP_PASS=some-pass
26
+ export SLACK_BOT_TOKEN=xoxb-...
27
+ # Choose your summarization model provider
28
+ export OPENROUTER_API_KEY=sk-or-v1-...
29
+ export OPENAI_API_KEY=sk-svcacct-...
30
+ ```
31
+
32
+ > **NOTE:** if you plan to use Slack, you need a `SLACK_BOT_TOKEN` with permissions `chat:write` and `im:write`.
33
+
34
+ ### Configure run-and-notify
35
+
36
+ Create `$HOME/git/x-summary/tmp/run-and-notify-config.json`:
37
+
38
+ ``` json
39
+ {
40
+ "timeoutSeconds": 2400,
41
+ "hideCommandIfSuccess": true,
42
+ "propagateExitCode": true,
43
+ "name": "X Summary",
44
+ "stdout": {
45
+ "format": "markdown"
46
+ },
47
+ "stderr": {
48
+ "format": "jsonl"
49
+ },
50
+ "transports": {
51
+ "smtp": {
52
+ "enabled": true,
53
+ "host": "smtp.gmail.com",
54
+ "port": 587,
55
+ "secure": false,
56
+ "from": "YOUR_EMAIL@gmail.com",
57
+ "to": ["YOUR_EMAIL+x-summary@gmail.com"],
58
+ "auth": {
59
+ "user": "YOUR_EMAIL@gmail.com",
60
+ "passEnvVar": "SMTP_PASS"
61
+ }
62
+ },
63
+ "slack": {
64
+ "enabled": true,
65
+ "tokenEnvVar": "SLACK_BOT_TOKEN",
66
+ "defaultChannel": "@YOUR_SLACK_USER"
67
+ }
68
+ }
69
+ }
70
+ ```
71
+
72
+ ### Configure x-summary
73
+
74
+ Create `$HOME/git/x-summary/tmp/x-summary-config.json` (720 minutes time window is 12 hours):
75
+
76
+ ``` json
77
+ {
78
+ "ownerHandle": "YOUR_X_USER",
79
+ "abortOnIncorrectOwnerHandle": true,
80
+ "timeWindowMinutes": 720,
81
+ "statePath": "./tmp/state.json",
82
+ "instructionsPath": "./INSTRUCTIONS.md",
83
+ "monitored": ["SOME_X_USER_TO_MONITOR", "OTHER_X_USER_TO_MONITOR", "gsbarbieri"],
84
+ "timezone": "America/New_York",
85
+ "llm": {
86
+ "provider": "openai",
87
+ "model": "gpt-5.4-mini"
88
+ }
89
+ }
90
+ ```
91
+
92
+ > **NOTE:** `openai/gpt-5.4-mini` is a cheap model that provides good summarization. To use `provider: openai` you need `OPENAI_API_KEY`.
93
+
94
+ Define your summarization prompt instructions or use the provided example:
95
+
96
+ ``` bash
97
+ ln -s INSTRUCTIONS.example.md INSTRUCTIONS.md
98
+ ```
99
+
100
+ > **NOTE:** given the configuration it will abort if the browser is not properly logged in to the `ownerHandle` user (`abortOnIncorrectOwnerHandle: true`), the you **MUST** run this once **WITHOUT** that flag to allow the login!
101
+
102
+
103
+ ### Create a runner script
104
+
105
+ Create `$HOME/git/x-summary/tmp/x-summary-run-and-notify.sh` and make it executable (`chmod +x`):
106
+
107
+ ``` bash
108
+ #!/bin/sh
109
+
110
+ set -o pipefail
111
+
112
+ source $HOME/git/x-summary/tmp/env.sh
113
+
114
+ cd $HOME/git/run-and-notify
115
+
116
+ node dist/bundle/run-and-notify.mjs \
117
+ --config=$HOME/git/x-summary/tmp/run-and-notify-config.json \
118
+ --cwd=$HOME/git/x-summary -- \
119
+ node dist/bundle/x-summary.mjs $HOME/git/x-summary/tmp/x-summary-config.json
120
+ ```
121
+
122
+ ### Create a systemd service and timer
123
+
124
+ Create `$HOME/.config/systemd/user/x-summary-run-and-notify.service`:
125
+
126
+ ``` ini
127
+ [Unit]
128
+ Description=Run and Notify X.com scrape & summarize
129
+
130
+ [Service]
131
+ Type=oneshot
132
+ ExecStart=%h/git/x-summary/tmp/x-summary-run-and-notify.sh
133
+ ```
134
+
135
+ Then create a timer to trigger it at 6 am/pm at `$HOME/.config/systemd/user/x-summary-run-and-notify.timer`:
136
+
137
+ ``` ini
138
+ [Unit]
139
+ Description=Runs x-summary-run-and-notify at 6am/6pm
140
+
141
+ [Timer]
142
+ OnCalendar=*-*-* 06,18:00:00
143
+ Persistent=true
144
+
145
+ [Install]
146
+ WantedBy=timers.target
147
+ ```
148
+
149
+ And reload the **USER** daemon, then enable the timer. Optionally allow the timer to run even if you're not logged in:
150
+
151
+ ``` bash
152
+ systemctl --user daemon-reload
153
+ systemctl --user enable --now x-summary-run-and-notify.timer
154
+
155
+ # optional: you need this so the timer runs even if the user is NOT logged in
156
+ sudo loginctl enable-linger $USER
157
+ ```
158
+
11
159
  ## Install (npm)
12
160
 
13
161
  Published builds ship minified CLI bundles; runtime libraries (Playwright, Pino, AI SDK, etc.) are installed as npm dependencies.
@@ -22,7 +170,13 @@ cp node_modules/x-summary/INSTRUCTIONS.example.md INSTRUCTIONS.md
22
170
  cp node_modules/x-summary/.env.example .env
23
171
  ```
24
172
 
25
- Edit `config.json`, `INSTRUCTIONS.md`, and `.env` (API keys). Then:
173
+ Edit `config.json`, `INSTRUCTIONS.md`, and `.env` (API keys). Then run them in one go:
174
+
175
+ ```bash
176
+ npx x-summary config.json
177
+ ```
178
+
179
+ Or run them individually:
26
180
 
27
181
  ```bash
28
182
  npx x-summary-scrape config.json
@@ -35,8 +189,7 @@ Global install (optional):
35
189
  npm install -g x-summary
36
190
  npx playwright install chrome
37
191
  # run from a directory with config.json, INSTRUCTIONS.md, and .env
38
- x-summary-scrape config.json
39
- x-summary-summarize config.json
192
+ x-summary config.json
40
193
  ```
41
194
 
42
195
  `playwright install chrome` downloads browser support for the Playwright version bundled as a dependency. Run it once per machine (or after upgrading `x-summary`).
@@ -151,9 +304,11 @@ pnpm run build:cli # esbuild only
151
304
  pnpm run inspect:x config.json --action home-following # dump X DOM for scraper work
152
305
  pnpm run scrape [config.json] # scrape → save state (tsx)
153
306
  pnpm run summarize [config.json] # summarize persisted state (tsx)
154
- pnpm run start [config.json] # scrape then summarize
307
+ pnpm run x-summary [config.json] # scrape then summarize (tsx)
308
+ pnpm run start [config.json] # scrape then summarize (tsx alias to x-summary)
155
309
  pnpm run scrape:bundle [config.json] # minified bundle (after build)
156
310
  pnpm run summarize:bundle [config.json] # minified bundle (after build)
311
+ pnpm run x-summary:bundle [config.json] # minified bundle (after build)
157
312
  pnpm run start:bundle [config.json] # minified bundle (after build)
158
313
  ```
159
314
 
@@ -1,9 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- import{config as ue}from"dotenv";ue({quiet:!0});import{access as to}from"node:fs/promises";import{resolve as le}from"node:path";import{fileURLToPath as eo}from"node:url";import{readFile as Te}from"node:fs/promises";import{resolve as fe}from"node:path";var Q="./tmp/browser-profile";function St(t,e=process.cwd()){let r=t.browserProfilePath??Q;return fe(e,r)}import{readFile as pe}from"node:fs/promises";import{createRequire as de}from"node:module";import{dirname as me,join as vt}from"node:path";import{fileURLToPath as ge}from"node:url";import{Ajv as we}from"ajv";var he=de(import.meta.url),ye=he("ajv-formats"),Pe=vt(me(ge(import.meta.url)),"../../schemas"),X;function be(){if(X)return X;let t=new we({allErrors:!0,strict:!0,validateSchema:!1,removeAdditional:!1});return ye(t),X=t,t}async function Se(t){let e=vt(Pe,t),r=await pe(e,"utf8");return JSON.parse(r)}var xt=new Map;async function xe(t){let e=xt.get(t);if(e)return e;let r=(async()=>{let o=be(),n=await Se(t);return o.compile(n)})();return xt.set(t,r),r}function ve(t){return t?.length?t.map(e=>`${e.instancePath||"/"}: ${e.message??"invalid"}`).join(`
4
- `):"Unknown validation error"}async function E(t,e,r){let o=await xe(t);if(!o(e))throw new Error(`${r} validation failed:
5
- ${ve(o.errors)}`);return e}function Tt(t){return`${JSON.stringify(t,null,2)}
6
- `}function $(t){return JSON.parse(t)}var Le="./tmp/state.json",D=4;async function Lt(t){let e=await Te(t,"utf8"),r=$(e),o=await E("config.schema.json",r,"Config");return{...o,statePath:o.statePath??Le,browserProfilePath:o.browserProfilePath??Q,llm:{...o.llm,...o.llm.temperature?{temperature:o.llm.temperature}:{}},parallelTabs:o.parallelTabs??D}}import _e from"pino";function Re(){return process.env.LOG_LEVEL??"info"}var Ae=_e({level:Re(),base:{app:"x-summary"}});function h(){return Ae.child({module:"scrape"})}function L(t,e){let r=e.err instanceof Error?{message:e.err.message,stack:e.err.stack,name:e.err.name}:{message:String(e.err)};t.error({action:e.action,expected:e.expected,missing:e.missing,href:e.href,err:r},"scrape step failed")}function S(t){return/^repost:\/\/[^@]+@(.+)$/.exec(t)?.[1]??t}function j(t,e){return`repost://${t.replace(/^@/,"")}@${e}`}function Z(t){let e=t.startsWith("http")?t:`https://x.com${t}`;try{let o=new URL(e).pathname.match(/^(\/[^/]+\/status\/\d+)/);return o?`https://x.com${o[1]}`:e}catch{return e}}function _(t){return Z(t).match(/\/status\/(\d+)/)?.[1]??null}async function R(t){let e=t.getByRole("link"),r=await e.count(),o=null;for(let n=0;n<r;n++){let i=await e.nth(n).getAttribute("href");if(!i?.includes("/status/"))continue;let s=Z(i),a=new URL(s).pathname;if(/^\/[^/]+\/status\/\d+$/.test(a))return s;o??=s}return o}function m(t){try{let e=new URL(t);return e.hash="",e.search="",e.toString()}catch{return t}}async function N(t,e){let r=async o=>{let n=t.getByTestId(o).first();if(!await n.count())return e.debug({testId:o},"stat control not found, defaulting to 0"),0;let i=await n.innerText().catch(()=>"0");return ke(i)};return{comments:await r("reply"),reposts:await r("retweet"),likes:await r("like")}}async function W(t){let e=t.getByTestId("User-Name");if(await e.count()){let o=await e.getByRole("link").first().getAttribute("href");if(o){let n=o.replace(/^\//,"").split("/")[0]?.replace(/^@/,"");if(n)return{author:n}}}return{}}async function A(t){let r=await t.locator("time").first().getAttribute("datetime").catch(()=>null);return r?{timestamp:r}:{}}function ke(t){let e=t.trim().toUpperCase();if(!e||e==="\u2014")return 0;let r=/^([\d,.]+)\s*([KMB])?/.exec(e);if(!r)return 0;let o=Number.parseFloat(r[1]?.replace(/,/g,"")??"0"),n=r[2];return Math.round(o*(n==="K"?1e3:n==="M"?1e6:n==="B"?1e9:1))}import{promises as Ce}from"node:dns";import{isIP as Rt}from"node:net";var At=10,Ee=3e4,Oe=512e3,Fe=["twitter:description","og:description","description"];async function kt(t,e){let r=await Me(t,e);try{let o=await Ne(r,e?.signal);if(!o)return{url:r};let{title:n,description:i}=Ue(o);return{url:r,...n?{title:n}:{},...i?{description:i}:{}}}catch{return{url:r}}}async function Me(t,e){let r=e?.maxRedirects??At,o=new URL(t);for(let n=0;n<=r;n++){await O(o);let i=await rt(o.toString(),{method:"HEAD",redirect:"manual",...et(e?.signal)}),s=tt(o,i);if(s){await O(s),o=s;continue}if(i.status===405||i.status===501){let a=await rt(o.toString(),{method:"GET",redirect:"manual",...et(e?.signal)}),c=tt(o,a);if(c){await O(c),o=c;continue}}return o.toString()}throw new Error(`Too many redirects resolving ${t}`)}function Ue(t){let e=He(t),r=Be(t);return{...e?{title:e}:{},...r?{description:r}:{}}}function He(t){return/<title[^>]*>([^<]*)<\/title>/i.exec(t)?.[1]?.trim()||void 0}function Be(t){let e=Ie(t);for(let r of Fe){let o=e.get(r);if(o)return o}for(let[r,o]of e)if(r.endsWith(":description")||r==="description")return o}function Ie(t){let e=new Map,r=/<meta\s+[^>]*>/gi;for(let o of t.matchAll(r)){let n=$e(o[0]??""),i=n.name??n.property,s=n.content;i&&s&&e.set(i.toLowerCase(),De(s))}return e}function $e(t){let e={},r=/([a-zA-Z_:.-]+)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/g;for(let o of t.matchAll(r)){let n=o[1]?.toLowerCase(),i=o[3]??o[4]??o[5]??"";(n==="name"||n==="property"||n==="content")&&(e[n]=i)}return e}function De(t){return t.replaceAll("&amp;","&").replaceAll("&lt;","<").replaceAll("&gt;",">").replaceAll("&quot;",'"').replaceAll("&#39;","'")}function je(t){if(!t)return!1;let e=t.split(";")[0]?.trim().toLowerCase()??"";return e==="text/html"||e==="application/xhtml+xml"}async function Ne(t,e){let r=new URL(t);for(let o=0;o<=At;o++){await O(r);let n=await rt(r.toString(),{method:"GET",redirect:"manual",headers:{Accept:"text/html,application/xhtml+xml"},...et(e)}),i=tt(r,n);if(i){await O(i),r=i;continue}return await We(r.toString(),n)}throw new Error(`Too many redirects fetching HTML for ${t}`)}async function We(t,e){if(!e.ok)throw new Error(`Failed to fetch HTML for ${t}: HTTP ${e.status}`);if(!je(e.headers.get("content-type")))return null;let r=e.body?.getReader();if(!r)return"";let o=[],n=0;for(;;){let{done:i,value:s}=await r.read();if(i)break;if(s){if(n+=s.length,n>Oe)break;o.push(s)}}return new TextDecoder().decode(Ke(o))}function Ke(t){let e=t.reduce((n,i)=>n+i.length,0),r=new Uint8Array(e),o=0;for(let n of t)r.set(n,o),o+=n.length;return r}function tt(t,e){if(!qe(e.status))return null;let r=e.headers.get("location");return r?new URL(r,t):t}function qe(t){return t>=300&&t<400}async function O(t){if(t.protocol!=="http:"&&t.protocol!=="https:")throw new Error(`Unsafe URL protocol: ${t.protocol}`);let e=Ve(t.hostname);if(ze(e))throw new Error(`Unsafe local URL host: ${t.hostname}`);if(_t(e))throw new Error(`Unsafe private URL host: ${t.hostname}`);if(Rt(e))return;let r=await Ce.lookup(e,{all:!0,verbatim:!0});if(!r.length)throw new Error(`Could not resolve URL host: ${t.hostname}`);for(let{address:o}of r)if(_t(o))throw new Error(`Unsafe private URL host: ${t.hostname}`)}function ze(t){return t==="localhost"||t.endsWith(".localhost")}function Ve(t){let e=t.replace(/\.$/,"").toLowerCase();return e.startsWith("[")&&e.endsWith("]")?e.slice(1,-1):e}function _t(t){let e=Rt(t);return e===4?Ct(t):e===6?Ge(t):!1}function Ct(t){let e=t.split(".").map(n=>Number.parseInt(n,10));if(e.length!==4||e.some(n=>!Number.isInteger(n)||n<0||n>255))return!0;let[r=0,o=0]=e;return r===0||r===10||r===127||r===100&&o>=64&&o<=127||r===169&&o===254||r===172&&o>=16&&o<=31||r===192&&o===168||r===198&&(o===18||o===19)||r>=224}function Ge(t){let e=t.toLowerCase();if(e.startsWith("::ffff:")){let r=e.slice(7);return Ct(r)}return e==="::"||e==="::1"||e.startsWith("fc")||e.startsWith("fd")||/^fe[89ab]/.test(e)||e.startsWith("ff")}function et(t){return t?{signal:t}:{}}async function rt(t,e){let r=new AbortController,o=setTimeout(()=>r.abort(),Ee),n=e.signal?AbortSignal.any([e.signal,r.signal]):r.signal;try{return await fetch(t,{...e,signal:n})}finally{clearTimeout(o)}}var ot=class extends Error{constructor(e,r){super(`${e} timed out after ${r}ms`),this.name="ScrapeTimeoutError"}};async function K(t,e,r){let o,n=new Promise((i,s)=>{o=setTimeout(()=>{s(new ot(r,e))},e)});try{return await Promise.race([t,n])}finally{o!==void 0&&clearTimeout(o)}}function f(t){if(t.startsWith("repost://"))return t;try{let e=new URL(t);return e.hash="",e.toString()}catch{return t}}var q=class{postCache=new Map;linkCache=new Map;log;constructor(e){this.log=e}getCached(e){return this.postCache.get(f(e))}remember(e){this.postCache.set(f(e.href),e)}collectAllHrefs(e,r){let o=f(e.href);if(!r.has(o)){r.add(o);for(let n of e.references??[])this.collectAllHrefs(n,r);for(let n of e.thread??[])this.collectAllHrefs(n,r)}}async finalize(e,r,o=!0){let n=f(e.href),i=this.postCache.get(n);if(i)return i;if(r.has(n))return this.log.debug({href:n},"cycle detected; omitting nested content"),e;r.add(n);let s=e.linkUrls?.length?e.linkUrls:Qe(e.body??""),a=s.length?await this.resolveLinksCached(s):void 0,c=await this.finalizeNested(e.references??[],r,!1),l=await this.finalizeNested(e.thread??[],r,!1);r.delete(n);let{references:u,thread:p,links:d,linkUrls:T,...P}=e,bt={...P,...a?.length?{links:a}:{},...c.length?{references:c}:{},...l.length?{thread:l}:{}};return o&&this.postCache.set(n,bt),bt}async finalizeNested(e,r,o){let n=[];for(let i of e){let s=f(i.href);if(r.has(s)){this.log.debug({href:s},"cycle detected; skipping reference/thread insert");continue}n.push(await this.finalize(i,r,o))}return n}async resolveLinksCached(e){let r=[];for(let o of e){let n=this.linkCache.get(o);if(n){r.push(n);continue}if(Je(o)){let i={url:o};this.linkCache.set(o,i),r.push(i);continue}try{let i=await K(kt(o),3e4,`external link ${o}`);this.linkCache.set(o,i),r.push(i)}catch(i){this.log.warn({url:o,err:i},"external link resolution failed; keeping url only");let s={url:o};this.linkCache.set(o,s),r.push(s)}}return r}};function Je(t){try{let{pathname:e}=new URL(t);return/\.(mp4|m3u8|webm|mov)(\?|$)/i.test(e)||e.includes("/video/")||e.includes("/amplify_video/")}catch{return!1}}function Qe(t){let e=[],r=/https?:\/\/[^\s)>\]]+/g;for(let o of t.matchAll(r))e.push(o[0].replace(/[.,;:!?)]+$/,""));return e}function Et(t,e,r=Date.now()){if(e)return{cutoffMs:Date.parse(e.timestamp),cutoffTimestamp:e.timestamp};let o=r-t.timeWindowMinutes*60*1e3;return{cutoffMs:o,cutoffTimestamp:new Date(o).toISOString()}}function Ot(t,e,r,o,n){let i={};for(let s of r)F(s,i);for(let s of o)F(s,i);for(let s of Object.values(n))for(let a of s)F(a,i);return{timestamp:t,cutoffTimestamp:e,posts:i,following:r.map(s=>f(S(s.href))),forYouSuggestions:o.map(s=>f(S(s.href))),monitored:Object.fromEntries(Object.entries(n).map(([s,a])=>[s,a.map(c=>f(S(c.href)))]))}}function F(t,e){let r=f(t.href);for(let o of t.references??[])F(o,e);for(let o of t.thread??[])F(o,e);e[r]||(e[r]=Xe(t))}function Xe(t){return{stats:t.stats,...t.author?{author:t.author}:{},...t.timestamp?{timestamp:t.timestamp}:{},...t.body?{body:t.body}:{},...t.links?.length?{links:t.links}:{},...t.thread?.length?{thread:t.thread.map(e=>f(e.href))}:{},...t.references?.length?{references:t.references.map(e=>f(e.href))}:{}}}function Ft(t){let e=new Set,r=o=>{e.add(f(o))};for(let o of t.following)r(o);for(let o of t.forYouSuggestions)r(o);for(let o of Object.values(t.monitored))for(let n of o)r(n);for(let o of Object.keys(t.posts))r(o);for(let o of Object.values(t.posts)){for(let n of o.references??[])r(n);for(let n of o.thread??[])r(n)}return e}async function k(){let t=Math.floor(Math.random()*500);await new Promise(e=>setTimeout(e,500+t))}async function y(t,e,r){e.debug({label:r},"waiting for UI to settle"),await t.waitForLoadState("networkidle",{timeout:15e3}).catch(()=>{}),await Mt(t)}async function w(t,e,r){e.debug({label:r},"waiting after DOM action"),await Mt(t)}async function M(t,e,r="post conversation"){e.debug({label:r},"waiting for conversation timeline");let o=t.getByLabel("Timeline: Conversation",{exact:!0});await o.waitFor({state:"visible",timeout:2e4}),await o.locator('article[data-testid="tweet"]').first().waitFor({state:"visible",timeout:2e4}).catch(()=>{}),await k()}async function Mt(t){let e=t.locator('[aria-busy="true"]');await e.count()>0&&await e.first().waitFor({state:"hidden",timeout:1e4}).catch(()=>{}),await k()}async function nt(t,e,r,o,n){e.info({action:o},"interaction"),await r.click(n),await y(t,e,o)}function U(t){return{href:f(t),stats:{comments:0,reposts:0,likes:0}}}async function it(t){let e=t.locator('[data-testid="tweetText"]'),r=await e.count();for(let o=0;o<r;o++){let n=e.nth(o);if(await n.evaluate(c=>!!c.closest('div[role="link"]')))continue;let s=(await n.innerText()).trim();if(!s)continue;let a=await n.evaluate(c=>{let l=[];for(let u of c.querySelectorAll("a[href]"))l.push({text:(u.textContent??"").trim(),href:u.getAttribute("href")??""});return l});return st(s,a)}}function st(t,e){let r=t;for(let{text:o,href:n}of e){let i=Ze(n);!o||r.includes(`](${i})`)||(r=r.replace(o,`[${o}](${i})`))}return r}function Ze(t){return t.startsWith("http")?t:t.startsWith("/")?`https://x.com${t}`:t}function tr(t,e){let r=null,o=!1,n=[],i=a=>{if(!o){o=!0,r=a;for(let c of n)c(a);n.length=0}},s=async a=>{let c=a.url();if(!(!c.includes("TweetDetail")||!c.includes(e)))try{i(await a.text())}catch{}};return t.on("response",s),{waitFor:(a=15e3)=>r?Promise.resolve(r):new Promise(c=>{let l=setTimeout(()=>{t.off("response",s),c(r)},a);n.push(u=>{clearTimeout(l),c(u)})}),detach:()=>{t.off("response",s)}}}async function Ut(t,e,r){let o=_(e);if(!o)return null;let n=tr(t,o),i=m(e);try{return m(t.url())!==i?await t.goto(e,{waitUntil:"domcontentloaded"}):(r.debug({focalId:o},"reloading conversation to capture TweetDetail"),await t.reload({waitUntil:"domcontentloaded"})),await M(t,r),await n.waitFor(15e3)}finally{n.detach()}}function Ht(t,e){let r;try{r=JSON.parse(t)}catch{return null}let o=er(r),n=o.get(e);return n?z(n,o,{includeThread:!0,includeQuotes:!0,allowSyntheticRepost:!0}):null}function er(t){let e=new Map,r=o=>{if(!o||typeof o!="object")return;if(Array.isArray(o)){for(let a of o)r(a);return}let n=o,i=n.legacy?.id_str,s=n.core?.user_results?.result?.core?.screen_name;i&&s&&e.set(i,n);for(let a of Object.values(o))r(a)};return r(t),e}function Bt(t,e){let r=ur(e.created_at);return{stats:cr(e),...t?{author:t}:{},...r?{timestamp:r}:{}}}function rr(t,e,r,o,n){let i=t.retweeted_status_result?.result;if(!i)throw new Error("bare retweet missing retweeted_status_result");return{href:j(r,o),...Bt(r,n),references:[z(i,e,{includeThread:!1,includeQuotes:!0,allowSyntheticRepost:!1})]}}function or(t,e,r){let o=t.quoted_status_result?.result;return!r.includeQuotes||!o?[]:[z(o,e,{includeThread:!1,includeQuotes:!1,allowSyntheticRepost:!1})]}function z(t,e,r){let o=t.legacy;if(!o?.id_str)throw new Error("tweet node missing id_str");let n=t.core?.user_results?.result?.core?.screen_name??"",i=lr(n,o.id_str);if(r.allowSyntheticRepost&&ir(t))return rr(t,e,n,i,o);let s=sr(t),a=ar(t),c=or(t,e,r),l=r.includeThread?nr(t,e):[];return{href:i,...Bt(n,o),...s?{body:s}:{},...a.length?{linkUrls:a}:{},...c.length?{references:c}:{},...l.length?{thread:l}:{}}}function nr(t,e){let r=[],o=new Set,n=t;for(;n?.legacy?.in_reply_to_status_id_str;){let i=n.legacy.in_reply_to_status_id_str;if(o.has(i))break;o.add(i);let s=e.get(i);if(!s)break;r.unshift(z(s,e,{includeThread:!1,includeQuotes:!1,allowSyntheticRepost:!1})),n=s}return r}function ir(t){if(!t.retweeted_status_result?.result||t.legacy?.is_quote_status)return!1;let r=at(t).trim();return r?/^RT @\w+:/i.test(r):!0}function at(t){return t.note_tweet?.note_tweet_results?.result?.text??t.legacy?.full_text??""}function sr(t){let e=at(t).trim();if(!e)return;let r=[];for(let o of t.legacy?.entities?.urls??[])o.expanded_url&&r.push({text:o.display_url??o.url??o.expanded_url,href:o.expanded_url});for(let o of It(t))o.expanded_url&&o.display_url&&r.push({text:o.display_url,href:o.expanded_url});return st(e,r)}function It(t){return t.legacy?.extended_entities?.media??t.legacy?.entities?.media??[]}function ar(t){let e=new Set,r=o=>{if(!(!o||o.startsWith("blob:")))try{let n=new URL(o);pr(n)&&e.add(n.toString())}catch{o.startsWith("/")&&e.add(new URL(o,"https://x.com").toString())}};for(let o of t.legacy?.entities?.urls??[])r(o.expanded_url);for(let o of It(t)){r(o.expanded_url),r(o.media_url_https);for(let n of o.video_info?.variants??[])n.content_type?.startsWith("video/")&&r(n.url)}for(let o of t.card?.legacy?.binding_values??[]){let n=o.value?.string_value;(o.key?.includes("url")||n?.startsWith("http"))&&r(n)}for(let o of fr(at(t)))r(o);return[...e]}function cr(t){return{comments:t.reply_count??0,reposts:t.retweet_count??0,likes:t.favorite_count??0}}function lr(t,e){return m(`https://x.com/${t}/status/${e}`)}function ur(t){if(!t)return;let e=Date.parse(t);return Number.isNaN(e)?void 0:new Date(e).toISOString()}function fr(t){let e=t.replace(/\s+/g,""),r=[],o=/https?:\/\/[^\s]+|(?:https?:\/\/)?(?:x\.com|twitter\.com)\/[^\s]+/gi;for(let n of e.matchAll(o)){let i=n[0].replace(/[.,;:!?)…]+$/,"");i.startsWith("http")||(i=`https://${i}`),r.push(i)}return r}function pr(t){return t.protocol==="http:"||t.protocol==="https:"}var mr="Timeline: Conversation",V=class{pool;processor;log;inFlight=new Map;constructor(e,r,o){this.pool=e,this.processor=r,this.log=o}async scrapeMany(e){return Promise.all(e.map(r=>this.scrape(r)))}async scrape(e,r){let o=f(e),n=this.processor.getCached(o);if(n)return n;let i=this.inFlight.get(o);if(i)return r?(this.log.warn({href:o},"nested scrape skipped; same href already in flight (would deadlock)"),U(e)):(this.log.debug({href:o},"awaiting in-flight post detail scrape"),i);let s=this.runScrape(e,r);this.inFlight.set(o,s);try{return await s}finally{this.inFlight.delete(o)}}scrapeLinked(e,r,o){return this.scrape(r,{page:e,returnHref:o})}async runScrape(e,r){let o=f(e),n=r?()=>this.parseOnPage(r.page,e,r.returnHref):()=>this.pool.run(a=>this.parseOnPage(a,e)),i;try{i=await K(n(),6e4,`post detail ${o}`)}catch(a){return this.failPost(e,a)}let s=await this.processor.finalize(i,new Set);return this.processor.remember(s),s}failPost(e,r){L(this.log,{action:"scrapePostDetail",expected:"TweetDetail GraphQL or conversation timeline",href:e,err:r});let o=U(e);return this.processor.remember(o),o}async parseOnPage(e,r,o){let n=o?m(o):void 0,i=_(r);try{if(i){let s=await Ut(e,r,this.log);if(s){let a=Ht(s,i);if(a)return this.log.debug({href:r,source:"TweetDetail"},"parsed post from API"),a}}return this.log.warn({href:r},"TweetDetail unavailable; falling back to DOM"),await gr(e,r,this.log)}finally{n&&m(e.url())!==n&&(await e.goto(n,{waitUntil:"domcontentloaded"}),await M(e,this.log,"restore focal conversation"))}}};async function gr(t,e,r){let o=m(e);m(t.url())!==o&&(await t.goto(e,{waitUntil:"domcontentloaded"}),await M(t,r));let n=$t(t);if(await n.first().waitFor({state:"visible",timeout:2e4}).catch(()=>{}),await n.count()===0)return r.warn({href:o},"no conversation articles; keeping href-only stub"),U(o);let s=await wr(n,o);for(let d=0;d<=s;d++)await Pr(t,n.nth(d),r);let a=[];for(let d=0;d<s;d++)a.push(await hr(n.nth(d),r));let c=$t(t).nth(s),l=await br(c),u=await it(c);if(l&&!u){let d=c.locator('article[data-testid="tweet"]'),T=await R(d.last());return{href:j(l,o),stats:await N(c,r),...await W(c),...await A(c),references:T?[U(m(T))]:[]}}let p=await Dt(c,u);return{href:o,stats:await N(c,r),...await W(c),...await A(c),...u?{body:u}:{},...p.length?{linkUrls:p}:{},...a.length?{thread:a}:{}}}async function wr(t,e){let r=_(e),o=await t.count();for(let n=0;n<o;n++){let i=await R(t.nth(n));if(i&&_(i)===r)return n}return 0}async function hr(t,e){let r=await R(t);if(!r)throw new Error("thread article missing status href");let o=await it(t),n=await Dt(t,o);return{href:m(r),stats:await N(t,e),...await W(t),...await A(t),...o?{body:o}:{},...n.length?{linkUrls:n}:{}}}function yr(t){return t.getByLabel(mr,{exact:!0})}function $t(t){return yr(t).locator('article[data-testid="tweet"]')}async function Pr(t,e,r){let o=e.getByRole("button",{name:/^Show more$/i});for(;await o.isVisible().catch(()=>!1);)r.info({action:"expand show more"},"interaction"),await o.click(),await w(t,r,"expand show more");let n=e.getByRole("button",{name:/^Show \d+ posts?$/i});for(;await n.isVisible().catch(()=>!1);)r.info({action:"expand thread posts"},"interaction"),await n.click(),await w(t,r,"expand thread posts")}async function br(t){let e=t.getByTestId("socialContext");if(!await e.count())return null;let r=t.locator('a[href^="/"]').filter({has:e}).first();if(!await r.count())return null;let o=await r.getAttribute("href");return!o||o.includes("/status/")?null:o.replace(/^\//,"").split("/")[0]?.replace(/^@/,"")??null}async function Dt(t,e){let r=[],o=new Set,n=c=>{if(!c||c.startsWith("blob:"))return;let l=Sr(c);!l||o.has(l)||(o.add(l),r.push(l))},i=t.getByTestId("card.wrapper");if(await i.count()){let c=i.locator('a[role="link"]');await c.count()&&n(await c.first().getAttribute("href",{timeout:3e3}).catch(()=>null))}let s=t.locator('[data-testid="tweetPhoto"] img[src*="twimg.com"]'),a=await s.count();for(let c=0;c<a;c++)n(await s.nth(c).getAttribute("src"));if(e)for(let c of xr(e))n(c);return r}function Sr(t){try{return t.startsWith("http")?new URL(t).toString():t.startsWith("/")?new URL(t,"https://x.com").toString():null}catch{return null}}function xr(t){let e=[],r=/\]\((https?:\/\/[^)]+)\)|https?:\/\/[^\s)>\]]+/g;for(let o of t.matchAll(r)){let n=(o[1]??o[0]).replace(/[.,;:!?)]+$/,"");e.push(n)}return e}var G=class t{pages=[];available=[];waiters=[];log;constructor(e){this.log=e}static async create(e,r,o){let n=new t(o),i=Math.max(1,r);for(let s=0;s<i;s++){let a=await e.newPage();n.pages.push(a),n.available.push(a)}return o.info({parallelTabs:i},"detail tab pool ready"),n}async run(e){let r=await this.acquire();try{return await e(r)}finally{this.release(r)}}async close(){await Promise.all(this.pages.map(e=>e.close().catch(()=>{}))),this.pages.length=0,this.available.length=0,this.log.debug("detail tab pool closed")}async acquire(){let e=this.available.pop();return e||new Promise(r=>{this.waiters.push(r)})}release(e){let r=this.waiters.shift();if(r){r(e);return}this.available.push(e)}};var ct="Timeline: Your Home Timeline";function Y(t){return t.getByLabel(ct).locator('article[data-testid="tweet"]')}async function jt(t){return await t.locator('xpath=ancestor::*[@data-testid="placementTracking"][1]').count()>0}function vr(t,e){return e==="home"?t.getByLabel(ct):t.locator('[data-testid="primaryColumn"]')}async function Tr(t,e){let r=await vr(t,e).boundingBox().catch(()=>null);r&&await t.mouse.move(r.x+r.width/2,r.y+Math.min(r.height*.45,520))}async function Nt(t,e,r){return await Tr(t,r),await t.mouse.wheel(0,e),t.evaluate(`((delta, feedKind, label) => {
3
+ import{config as de}from"dotenv";de({quiet:!0});import{access as eo}from"node:fs/promises";import{resolve as pe}from"node:path";import{fileURLToPath as ro}from"node:url";import{readFile as Re}from"node:fs/promises";import{resolve as me}from"node:path";var X="./tmp/browser-profile";function xt(t,e=process.cwd()){let r=t.browserProfilePath??X;return me(e,r)}import{readFile as ge}from"node:fs/promises";import{createRequire as we}from"node:module";import{dirname as he,join as Tt}from"node:path";import{fileURLToPath as ye}from"node:url";import{Ajv as Pe}from"ajv";var be=we(import.meta.url),Se=be("ajv-formats"),xe=Tt(he(ye(import.meta.url)),"../../schemas"),Z;function ve(){if(Z)return Z;let t=new Pe({allErrors:!0,strict:!0,validateSchema:!1,removeAdditional:!1});return Se(t),Z=t,t}async function Te(t){let e=Tt(xe,t),r=await ge(e,"utf8");return JSON.parse(r)}var vt=new Map;async function Le(t){let e=vt.get(t);if(e)return e;let r=(async()=>{let o=ve(),n=await Te(t);return o.compile(n)})();return vt.set(t,r),r}function _e(t){return t?.length?t.map(e=>`${e.instancePath||"/"}: ${e.message??"invalid"}`).join(`
4
+ `):"Unknown validation error"}async function O(t,e,r){let o=await Le(t);if(!o(e))throw new Error(`${r} validation failed:
5
+ ${_e(o.errors)}`);return e}function Lt(t){return`${JSON.stringify(t,null,2)}
6
+ `}function D(t){return JSON.parse(t)}var Ae="./tmp/state.json",N=4;async function _t(t){let e=await Re(t,"utf8"),r=D(e),o=await O("config.schema.json",r,"Config");return{...o,statePath:o.statePath??Ae,browserProfilePath:o.browserProfilePath??X,llm:{...o.llm,...o.llm.temperature?{temperature:o.llm.temperature}:{}},parallelTabs:o.parallelTabs??N}}import Rt from"pino";function ke(){return process.env.LOG_LEVEL??"info"}var tt=Rt({level:ke(),base:{app:"x-summary"}},Rt.destination(2));function P(){return tt.child({module:"scrape"})}function L(t,e){let r=e.err instanceof Error?{message:e.err.message,stack:e.err.stack,name:e.err.name}:{message:String(e.err)};t.error({action:e.action,expected:e.expected,missing:e.missing,href:e.href,err:r},"scrape step failed")}function x(t){return/^repost:\/\/[^@]+@(.+)$/.exec(t)?.[1]??t}function j(t,e){return`repost://${t.replace(/^@/,"")}@${e}`}function et(t){let e=t.startsWith("http")?t:`https://x.com${t}`;try{let o=new URL(e).pathname.match(/^(\/[^/]+\/status\/\d+)/);return o?`https://x.com${o[1]}`:e}catch{return e}}function _(t){return et(t).match(/\/status\/(\d+)/)?.[1]??null}async function R(t){let e=t.getByRole("link"),r=await e.count(),o=null;for(let n=0;n<r;n++){let i=await e.nth(n).getAttribute("href");if(!i?.includes("/status/"))continue;let s=et(i),a=new URL(s).pathname;if(/^\/[^/]+\/status\/\d+$/.test(a))return s;o??=s}return o}function g(t){try{let e=new URL(t);return e.hash="",e.search="",e.toString()}catch{return t}}async function W(t,e){let r=async o=>{let n=t.getByTestId(o).first();if(!await n.count())return e.debug({testId:o},"stat control not found, defaulting to 0"),0;let i=await n.innerText().catch(()=>"0");return Ce(i)};return{comments:await r("reply"),reposts:await r("retweet"),likes:await r("like")}}async function K(t){let e=t.getByTestId("User-Name");if(await e.count()){let o=await e.getByRole("link").first().getAttribute("href");if(o){let n=o.replace(/^\//,"").split("/")[0]?.replace(/^@/,"");if(n)return{author:n}}}return{}}async function A(t){let r=await t.locator("time").first().getAttribute("datetime").catch(()=>null);return r?{timestamp:r}:{}}function Ce(t){let e=t.trim().toUpperCase();if(!e||e==="\u2014")return 0;let r=/^([\d,.]+)\s*([KMB])?/.exec(e);if(!r)return 0;let o=Number.parseFloat(r[1]?.replace(/,/g,"")??"0"),n=r[2];return Math.round(o*(n==="K"?1e3:n==="M"?1e6:n==="B"?1e9:1))}import{promises as Ee}from"node:dns";import{isIP as kt}from"node:net";var Ct=10,Oe=3e4,Me=512e3,Ue=["twitter:description","og:description","description"];async function Et(t,e){let r=await Fe(t,e);try{let o=await We(r,e?.signal);if(!o)return{url:r};let{title:n,description:i}=Be(o);return{url:r,...n?{title:n}:{},...i?{description:i}:{}}}catch{return{url:r}}}async function Fe(t,e){let r=e?.maxRedirects??Ct,o=new URL(t);for(let n=0;n<=r;n++){await M(o);let i=await nt(o.toString(),{method:"HEAD",redirect:"manual",...ot(e?.signal)}),s=rt(o,i);if(s){await M(s),o=s;continue}if(i.status===405||i.status===501){let a=await nt(o.toString(),{method:"GET",redirect:"manual",...ot(e?.signal)}),c=rt(o,a);if(c){await M(c),o=c;continue}}return o.toString()}throw new Error(`Too many redirects resolving ${t}`)}function Be(t){let e=He(t),r=Ie(t);return{...e?{title:e}:{},...r?{description:r}:{}}}function He(t){return/<title[^>]*>([^<]*)<\/title>/i.exec(t)?.[1]?.trim()||void 0}function Ie(t){let e=$e(t);for(let r of Ue){let o=e.get(r);if(o)return o}for(let[r,o]of e)if(r.endsWith(":description")||r==="description")return o}function $e(t){let e=new Map,r=/<meta\s+[^>]*>/gi;for(let o of t.matchAll(r)){let n=De(o[0]??""),i=n.name??n.property,s=n.content;i&&s&&e.set(i.toLowerCase(),Ne(s))}return e}function De(t){let e={},r=/([a-zA-Z_:.-]+)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/g;for(let o of t.matchAll(r)){let n=o[1]?.toLowerCase(),i=o[3]??o[4]??o[5]??"";(n==="name"||n==="property"||n==="content")&&(e[n]=i)}return e}function Ne(t){return t.replaceAll("&amp;","&").replaceAll("&lt;","<").replaceAll("&gt;",">").replaceAll("&quot;",'"').replaceAll("&#39;","'")}function je(t){if(!t)return!1;let e=t.split(";")[0]?.trim().toLowerCase()??"";return e==="text/html"||e==="application/xhtml+xml"}async function We(t,e){let r=new URL(t);for(let o=0;o<=Ct;o++){await M(r);let n=await nt(r.toString(),{method:"GET",redirect:"manual",headers:{Accept:"text/html,application/xhtml+xml"},...ot(e)}),i=rt(r,n);if(i){await M(i),r=i;continue}return await Ke(r.toString(),n)}throw new Error(`Too many redirects fetching HTML for ${t}`)}async function Ke(t,e){if(!e.ok)throw new Error(`Failed to fetch HTML for ${t}: HTTP ${e.status}`);if(!je(e.headers.get("content-type")))return null;let r=e.body?.getReader();if(!r)return"";let o=[],n=0;for(;;){let{done:i,value:s}=await r.read();if(i)break;if(s){if(n+=s.length,n>Me)break;o.push(s)}}return new TextDecoder().decode(qe(o))}function qe(t){let e=t.reduce((n,i)=>n+i.length,0),r=new Uint8Array(e),o=0;for(let n of t)r.set(n,o),o+=n.length;return r}function rt(t,e){if(!ze(e.status))return null;let r=e.headers.get("location");return r?new URL(r,t):t}function ze(t){return t>=300&&t<400}async function M(t){if(t.protocol!=="http:"&&t.protocol!=="https:")throw new Error(`Unsafe URL protocol: ${t.protocol}`);let e=Ve(t.hostname);if(Ge(e))throw new Error(`Unsafe local URL host: ${t.hostname}`);if(At(e))throw new Error(`Unsafe private URL host: ${t.hostname}`);if(kt(e))return;let r=await Ee.lookup(e,{all:!0,verbatim:!0});if(!r.length)throw new Error(`Could not resolve URL host: ${t.hostname}`);for(let{address:o}of r)if(At(o))throw new Error(`Unsafe private URL host: ${t.hostname}`)}function Ge(t){return t==="localhost"||t.endsWith(".localhost")}function Ve(t){let e=t.replace(/\.$/,"").toLowerCase();return e.startsWith("[")&&e.endsWith("]")?e.slice(1,-1):e}function At(t){let e=kt(t);return e===4?Ot(t):e===6?Ye(t):!1}function Ot(t){let e=t.split(".").map(n=>Number.parseInt(n,10));if(e.length!==4||e.some(n=>!Number.isInteger(n)||n<0||n>255))return!0;let[r=0,o=0]=e;return r===0||r===10||r===127||r===100&&o>=64&&o<=127||r===169&&o===254||r===172&&o>=16&&o<=31||r===192&&o===168||r===198&&(o===18||o===19)||r>=224}function Ye(t){let e=t.toLowerCase();if(e.startsWith("::ffff:")){let r=e.slice(7);return Ot(r)}return e==="::"||e==="::1"||e.startsWith("fc")||e.startsWith("fd")||/^fe[89ab]/.test(e)||e.startsWith("ff")}function ot(t){return t?{signal:t}:{}}async function nt(t,e){let r=new AbortController,o=setTimeout(()=>r.abort(),Oe),n=e.signal?AbortSignal.any([e.signal,r.signal]):r.signal;try{return await fetch(t,{...e,signal:n})}finally{clearTimeout(o)}}var it=class extends Error{constructor(e,r){super(`${e} timed out after ${r}ms`),this.name="ScrapeTimeoutError"}};async function q(t,e,r){let o,n=new Promise((i,s)=>{o=setTimeout(()=>{s(new it(r,e))},e)});try{return await Promise.race([t,n])}finally{o!==void 0&&clearTimeout(o)}}function f(t){if(t.startsWith("repost://"))return t;try{let e=new URL(t);return e.hash="",e.toString()}catch{return t}}var z=class{postCache=new Map;linkCache=new Map;log;constructor(e){this.log=e}getCached(e){return this.postCache.get(f(e))}remember(e){this.postCache.set(f(e.href),e)}collectAllHrefs(e,r){let o=f(e.href);if(!r.has(o)){r.add(o);for(let n of e.references??[])this.collectAllHrefs(n,r);for(let n of e.thread??[])this.collectAllHrefs(n,r)}}async finalize(e,r,o=!0){let n=f(e.href),i=this.postCache.get(n);if(i)return i;if(r.has(n))return this.log.debug({href:n},"cycle detected; omitting nested content"),e;r.add(n);let s=e.linkUrls?.length?e.linkUrls:Xe(e.body??""),a=s.length?await this.resolveLinksCached(s):void 0,c=await this.finalizeNested(e.references??[],r,!1),l=await this.finalizeNested(e.thread??[],r,!1);r.delete(n);let{references:u,thread:p,links:d,linkUrls:m,...h}=e,$={...h,...a?.length?{links:a}:{},...c.length?{references:c}:{},...l.length?{thread:l}:{}};return o&&this.postCache.set(n,$),$}async finalizeNested(e,r,o){let n=[];for(let i of e){let s=f(i.href);if(r.has(s)){this.log.debug({href:s},"cycle detected; skipping reference/thread insert");continue}n.push(await this.finalize(i,r,o))}return n}async resolveLinksCached(e){let r=[],o=new Set;for(let n of e){let i=Mt(n);if(!i){this.log.warn({url:n},"invalid external link skipped");continue}if(o.has(i))continue;o.add(i);let s=this.linkCache.get(i);if(s){r.push(s);continue}if(Qe(i)){let a={url:i};this.linkCache.set(i,a),r.push(a);continue}try{let a=await q(Et(i),3e4,`external link ${i}`),c=Mt(a.url);if(!c){this.log.warn({url:i,resolved:a},"resolved external link is invalid; skipping");continue}let l={...a,url:c};this.linkCache.set(i,l),r.push(l)}catch(a){this.log.warn({url:i,err:a},"external link resolution failed; keeping url only");let c={url:i};this.linkCache.set(i,c),r.push(c)}}return r}};function Mt(t){if(t.startsWith("blob:"))return null;try{let e=new URL(t);return e.protocol!=="http:"&&e.protocol!=="https:"?null:e.toString()}catch{return null}}function Qe(t){try{let{pathname:e}=new URL(t);return/\.(mp4|m3u8|webm|mov)(\?|$)/i.test(e)||e.includes("/video/")||e.includes("/amplify_video/")}catch{return!1}}function Xe(t){let e=[],r=/https?:\/\/[^\s)>\]]+/g;for(let o of t.matchAll(r))e.push(o[0].replace(/[.,;:!?)]+$/,""));return e}function Ut(t,e,r=Date.now()){if(e)return{cutoffMs:Date.parse(e.timestamp),cutoffTimestamp:e.timestamp};let o=r-t.timeWindowMinutes*60*1e3;return{cutoffMs:o,cutoffTimestamp:new Date(o).toISOString()}}function Ft(t,e,r,o,n){let i={};for(let s of r)U(s,i);for(let s of o)U(s,i);for(let s of Object.values(n))for(let a of s)U(a,i);return{timestamp:t,cutoffTimestamp:e,posts:i,following:r.map(s=>f(x(s.href))),forYouSuggestions:o.map(s=>f(x(s.href))),monitored:Object.fromEntries(Object.entries(n).map(([s,a])=>[s,a.map(c=>f(x(c.href)))]))}}function U(t,e){let r=f(t.href);for(let o of t.references??[])U(o,e);for(let o of t.thread??[])U(o,e);e[r]||(e[r]=Ze(t))}function Ze(t){return{stats:t.stats,...t.author?{author:t.author}:{},...t.timestamp?{timestamp:t.timestamp}:{},...t.body?{body:t.body}:{},...t.links?.length?{links:t.links}:{},...t.thread?.length?{thread:t.thread.map(e=>f(e.href))}:{},...t.references?.length?{references:t.references.map(e=>f(e.href))}:{}}}function Bt(t){let e=new Set,r=o=>{e.add(f(o))};for(let o of t.following)r(o);for(let o of t.forYouSuggestions)r(o);for(let o of Object.values(t.monitored))for(let n of o)r(n);for(let o of Object.keys(t.posts))r(o);for(let o of Object.values(t.posts)){for(let n of o.references??[])r(n);for(let n of o.thread??[])r(n)}return e}async function k(){let t=Math.floor(Math.random()*500);await new Promise(e=>setTimeout(e,500+t))}async function b(t,e,r){e.debug({label:r},"waiting for UI to settle"),await t.waitForLoadState("networkidle",{timeout:15e3}).catch(()=>{}),await Ht(t)}async function y(t,e,r){e.debug({label:r},"waiting after DOM action"),await Ht(t)}async function F(t,e,r="post conversation"){e.debug({label:r},"waiting for conversation timeline");let o=t.getByLabel("Timeline: Conversation",{exact:!0});await o.waitFor({state:"visible",timeout:2e4}),await o.locator('article[data-testid="tweet"]').first().waitFor({state:"visible",timeout:2e4}).catch(()=>{}),await k()}async function Ht(t){let e=t.locator('[aria-busy="true"]');await e.count()>0&&await e.first().waitFor({state:"hidden",timeout:1e4}).catch(()=>{}),await k()}async function st(t,e,r,o,n){e.info({action:o},"interaction"),await r.click(n),await b(t,e,o)}function B(t){return{href:f(t),stats:{comments:0,reposts:0,likes:0}}}async function at(t){let e=t.locator('[data-testid="tweetText"]'),r=await e.count();for(let o=0;o<r;o++){let n=e.nth(o);if(await n.evaluate(c=>!!c.closest('div[role="link"]')))continue;let s=(await n.innerText()).trim();if(!s)continue;let a=await n.evaluate(c=>{let l=[];for(let u of c.querySelectorAll("a[href]"))l.push({text:(u.textContent??"").trim(),href:u.getAttribute("href")??""});return l});return ct(s,a)}}function ct(t,e){let r=t;for(let{text:o,href:n}of e){let i=tr(n);!o||r.includes(`](${i})`)||(r=r.replace(o,`[${o}](${i})`))}return r}function tr(t){return t.startsWith("http")?t:t.startsWith("/")?`https://x.com${t}`:t}function er(t,e){let r=null,o=!1,n=[],i=a=>{if(!o){o=!0,r=a;for(let c of n)c(a);n.length=0}},s=async a=>{let c=a.url();if(!(!c.includes("TweetDetail")||!c.includes(e)))try{i(await a.text())}catch{}};return t.on("response",s),{waitFor:(a=15e3)=>r?Promise.resolve(r):new Promise(c=>{let l=setTimeout(()=>{t.off("response",s),c(r)},a);n.push(u=>{clearTimeout(l),c(u)})}),detach:()=>{t.off("response",s)}}}async function It(t,e,r){let o=_(e);if(!o)return null;let n=er(t,o),i=g(e);try{return g(t.url())!==i?await t.goto(e,{waitUntil:"domcontentloaded"}):(r.debug({focalId:o},"reloading conversation to capture TweetDetail"),await t.reload({waitUntil:"domcontentloaded"})),await F(t,r),await n.waitFor(15e3)}finally{n.detach()}}function $t(t,e){let r;try{r=JSON.parse(t)}catch{return null}let o=rr(r),n=o.get(e);return n?G(n,o,{includeThread:!0,includeQuotes:!0,allowSyntheticRepost:!0}):null}function rr(t){let e=new Map,r=o=>{if(!o||typeof o!="object")return;if(Array.isArray(o)){for(let a of o)r(a);return}let n=o,i=n.legacy?.id_str,s=n.core?.user_results?.result?.core?.screen_name;i&&s&&e.set(i,n);for(let a of Object.values(o))r(a)};return r(t),e}function Dt(t,e){let r=fr(e.created_at);return{stats:lr(e),...t?{author:t}:{},...r?{timestamp:r}:{}}}function or(t,e,r,o,n){let i=t.retweeted_status_result?.result;if(!i)throw new Error("bare retweet missing retweeted_status_result");return{href:j(r,o),...Dt(r,n),references:[G(i,e,{includeThread:!1,includeQuotes:!0,allowSyntheticRepost:!1})]}}function nr(t,e,r){let o=t.quoted_status_result?.result;return!r.includeQuotes||!o?[]:[G(o,e,{includeThread:!1,includeQuotes:!1,allowSyntheticRepost:!1})]}function G(t,e,r){let o=t.legacy;if(!o?.id_str)throw new Error("tweet node missing id_str");let n=t.core?.user_results?.result?.core?.screen_name??"",i=ur(n,o.id_str);if(r.allowSyntheticRepost&&sr(t))return or(t,e,n,i,o);let s=ar(t),a=cr(t),c=nr(t,e,r),l=r.includeThread?ir(t,e):[];return{href:i,...Dt(n,o),...s?{body:s}:{},...a.length?{linkUrls:a}:{},...c.length?{references:c}:{},...l.length?{thread:l}:{}}}function ir(t,e){let r=[],o=new Set,n=t;for(;n?.legacy?.in_reply_to_status_id_str;){let i=n.legacy.in_reply_to_status_id_str;if(o.has(i))break;o.add(i);let s=e.get(i);if(!s)break;r.unshift(G(s,e,{includeThread:!1,includeQuotes:!1,allowSyntheticRepost:!1})),n=s}return r}function sr(t){if(!t.retweeted_status_result?.result||t.legacy?.is_quote_status)return!1;let r=lt(t).trim();return r?/^RT @\w+:/i.test(r):!0}function lt(t){return t.note_tweet?.note_tweet_results?.result?.text??t.legacy?.full_text??""}function ar(t){let e=lt(t).trim();if(!e)return;let r=[];for(let o of t.legacy?.entities?.urls??[])o.expanded_url&&r.push({text:o.display_url??o.url??o.expanded_url,href:o.expanded_url});for(let o of Nt(t))o.expanded_url&&o.display_url&&r.push({text:o.display_url,href:o.expanded_url});return ct(e,r)}function Nt(t){return t.legacy?.extended_entities?.media??t.legacy?.entities?.media??[]}function cr(t){let e=new Set,r=o=>{if(!(!o||o.startsWith("blob:")))try{let n=new URL(o);dr(n)&&e.add(n.toString())}catch{o.startsWith("/")&&e.add(new URL(o,"https://x.com").toString())}};for(let o of t.legacy?.entities?.urls??[])r(o.expanded_url);for(let o of Nt(t)){r(o.expanded_url),r(o.media_url_https);for(let n of o.video_info?.variants??[])n.content_type?.startsWith("video/")&&r(n.url)}for(let o of t.card?.legacy?.binding_values??[]){let n=o.value?.string_value;(o.key?.includes("url")||n?.startsWith("http"))&&r(n)}for(let o of pr(lt(t)))r(o);return[...e]}function lr(t){return{comments:t.reply_count??0,reposts:t.retweet_count??0,likes:t.favorite_count??0}}function ur(t,e){return g(`https://x.com/${t}/status/${e}`)}function fr(t){if(!t)return;let e=Date.parse(t);return Number.isNaN(e)?void 0:new Date(e).toISOString()}function pr(t){let e=t.replace(/\s+/g,""),r=[],o=/https?:\/\/[^\s]+|(?:https?:\/\/)?(?:x\.com|twitter\.com)\/[^\s]+/gi;for(let n of e.matchAll(o)){let i=n[0].replace(/[.,;:!?)…]+$/,"");i.startsWith("http")||(i=`https://${i}`),r.push(i)}return r}function dr(t){return t.protocol==="http:"||t.protocol==="https:"}var gr="Timeline: Conversation",V=class{pool;processor;log;inFlight=new Map;constructor(e,r,o){this.pool=e,this.processor=r,this.log=o}async scrapeMany(e){return Promise.all(e.map(r=>this.scrape(r)))}async scrape(e,r){let o=f(e),n=this.processor.getCached(o);if(n)return n;let i=this.inFlight.get(o);if(i)return r?(this.log.warn({href:o},"nested scrape skipped; same href already in flight (would deadlock)"),B(e)):(this.log.debug({href:o},"awaiting in-flight post detail scrape"),i);let s=this.runScrape(e,r);this.inFlight.set(o,s);try{return await s}finally{this.inFlight.delete(o)}}scrapeLinked(e,r,o){return this.scrape(r,{page:e,returnHref:o})}async runScrape(e,r){let o=f(e),n=r?()=>this.parseOnPage(r.page,e,r.returnHref):()=>this.pool.run(a=>this.parseOnPage(a,e)),i;try{i=await q(n(),6e4,`post detail ${o}`)}catch(a){return this.failPost(e,a)}let s=await this.processor.finalize(i,new Set);return this.processor.remember(s),s}failPost(e,r){L(this.log,{action:"scrapePostDetail",expected:"TweetDetail GraphQL or conversation timeline",href:e,err:r});let o=B(e);return this.processor.remember(o),o}async parseOnPage(e,r,o){let n=o?g(o):void 0,i=_(r);try{if(i){let s=await It(e,r,this.log);if(s){let a=$t(s,i);if(a)return this.log.debug({href:r,source:"TweetDetail"},"parsed post from API"),a}}return this.log.warn({href:r},"TweetDetail unavailable; falling back to DOM"),await wr(e,r,this.log)}finally{n&&g(e.url())!==n&&(await e.goto(n,{waitUntil:"domcontentloaded"}),await F(e,this.log,"restore focal conversation"))}}};async function wr(t,e,r){let o=g(e);g(t.url())!==o&&(await t.goto(e,{waitUntil:"domcontentloaded"}),await F(t,r));let n=jt(t);if(await n.first().waitFor({state:"visible",timeout:2e4}).catch(()=>{}),await n.count()===0)return r.warn({href:o},"no conversation articles; keeping href-only stub"),B(o);let s=await hr(n,o);for(let d=0;d<=s;d++)await br(t,n.nth(d),r);let a=[];for(let d=0;d<s;d++)a.push(await yr(n.nth(d),r));let c=jt(t).nth(s),l=await Sr(c),u=await at(c);if(l&&!u){let d=c.locator('article[data-testid="tweet"]'),m=await R(d.last());return{href:j(l,o),stats:await W(c,r),...await K(c),...await A(c),references:m?[B(g(m))]:[]}}let p=await Wt(c,u);return{href:o,stats:await W(c,r),...await K(c),...await A(c),...u?{body:u}:{},...p.length?{linkUrls:p}:{},...a.length?{thread:a}:{}}}async function hr(t,e){let r=_(e),o=await t.count();for(let n=0;n<o;n++){let i=await R(t.nth(n));if(i&&_(i)===r)return n}return 0}async function yr(t,e){let r=await R(t);if(!r)throw new Error("thread article missing status href");let o=await at(t),n=await Wt(t,o);return{href:g(r),stats:await W(t,e),...await K(t),...await A(t),...o?{body:o}:{},...n.length?{linkUrls:n}:{}}}function Pr(t){return t.getByLabel(gr,{exact:!0})}function jt(t){return Pr(t).locator('article[data-testid="tweet"]')}async function br(t,e,r){let o=e.getByRole("button",{name:/^Show more$/i});for(;await o.isVisible().catch(()=>!1);)r.info({action:"expand show more"},"interaction"),await o.click(),await y(t,r,"expand show more");let n=e.getByRole("button",{name:/^Show \d+ posts?$/i});for(;await n.isVisible().catch(()=>!1);)r.info({action:"expand thread posts"},"interaction"),await n.click(),await y(t,r,"expand thread posts")}async function Sr(t){let e=t.getByTestId("socialContext");if(!await e.count())return null;let r=t.locator('a[href^="/"]').filter({has:e}).first();if(!await r.count())return null;let o=await r.getAttribute("href");return!o||o.includes("/status/")?null:o.replace(/^\//,"").split("/")[0]?.replace(/^@/,"")??null}async function Wt(t,e){let r=[],o=new Set,n=c=>{if(!c||c.startsWith("blob:"))return;let l=xr(c);!l||o.has(l)||(o.add(l),r.push(l))},i=t.getByTestId("card.wrapper");if(await i.count()){let c=i.locator('a[role="link"]');await c.count()&&n(await c.first().getAttribute("href",{timeout:3e3}).catch(()=>null))}let s=t.locator('[data-testid="tweetPhoto"] img[src*="twimg.com"]'),a=await s.count();for(let c=0;c<a;c++)n(await s.nth(c).getAttribute("src"));if(e)for(let c of vr(e))n(c);return r}function xr(t){try{return t.startsWith("http")?new URL(t).toString():t.startsWith("/")?new URL(t,"https://x.com").toString():null}catch{return null}}function vr(t){let e=[],r=/\]\((https?:\/\/[^)]+)\)|https?:\/\/[^\s)>\]]+/g;for(let o of t.matchAll(r)){let n=(o[1]??o[0]).replace(/[.,;:!?)]+$/,"");e.push(n)}return e}var Y=class t{pages=[];available=[];waiters=[];log;constructor(e){this.log=e}static async create(e,r,o){let n=new t(o),i=Math.max(1,r);for(let s=0;s<i;s++){let a=await e.newPage();n.pages.push(a),n.available.push(a)}return o.info({parallelTabs:i},"detail tab pool ready"),n}async run(e){let r=await this.acquire();try{return await e(r)}finally{this.release(r)}}async close(){await Promise.all(this.pages.map(e=>e.close().catch(()=>{}))),this.pages.length=0,this.available.length=0,this.log.debug("detail tab pool closed")}async acquire(){let e=this.available.pop();return e||new Promise(r=>{this.waiters.push(r)})}release(e){let r=this.waiters.shift();if(r){r(e);return}this.available.push(e)}};var ut="Timeline: Your Home Timeline";function J(t){return t.getByLabel(ut).locator('article[data-testid="tweet"]')}async function Kt(t){return await t.locator('xpath=ancestor::*[@data-testid="placementTracking"][1]').count()>0}function Tr(t,e){return e==="home"?t.getByLabel(ut):t.locator('[data-testid="primaryColumn"]')}async function Lr(t,e){let r=await Tr(t,e).boundingBox().catch(()=>null);r&&await t.mouse.move(r.x+r.width/2,r.y+Math.min(r.height*.45,520))}async function qt(t,e,r){return await Lr(t,r),await t.mouse.wheel(0,e),t.evaluate(`((delta, feedKind, label) => {
7
7
  const tryScroll = (el) => {
8
8
  if (!el) {
9
9
  return false;
@@ -36,7 +36,7 @@ ${ve(o.errors)}`);return e}function Tt(t){return`${JSON.stringify(t,null,2)}
36
36
  const before = window.scrollY;
37
37
  window.scrollBy(0, delta);
38
38
  return window.scrollY > before;
39
- })(${e}, ${JSON.stringify(r)}, ${JSON.stringify(ct)})`)}async function Wt(t,e,r,o){let n=await Nt(t,1800,o);return await k(),await w(t,e,"timeline scroll"),(o==="home"?await Y(t).count():await t.locator('article[data-testid="tweet"]').count())>r?!0:n}async function lt(t,e,r,o){await e.scrollIntoViewIfNeeded().catch(()=>{}),await e.evaluate(s=>{s.scrollIntoView({block:"end",inline:"nearest"})}),await k();let n=await e.boundingBox().catch(()=>null),i=n?Math.ceil(n.height)+480:1200;await Nt(t,i,o),await k(),await w(t,r,"scroll past post")}var ut="Following",Kt="For you",x="Recent";async function qt(t,e){let r=h(),{cutoffMs:o,cutoffTimestamp:n}=Et(e.config,e.previousState),i=Br(e.previousState),s=new q(r),a=await G.create(t.context(),e.config.parallelTabs??D,r),c=new V(a,s,r);r.info({timeWindowMinutes:e.config.timeWindowMinutes,cutoffTimestamp:n,incremental:!!e.previousState,parallelTabs:e.config.parallelTabs??D},"starting scrape");try{let u=await Lr(t,{cutoffMs:o,stopHrefs:i,processor:s,detailScraper:c},r),p=new Set;for(let P of u)p.add(f(S(P.href))),s.collectAllHrefs(P,p);r.info({count:u.length,unique:p.size},"following feed complete");let d=await _r(t,{cutoffMs:o,stopHrefs:i,skipHrefs:p,processor:s,detailScraper:c},r),T={};for(let P of e.config.monitored)r.info({handle:P},"scraping monitored profile"),T[P]=await Rr(t,P,{cutoffMs:o,stopHrefs:i,processor:s,detailScraper:c},r);return Ot(new Date().toISOString(),n,u,d,T)}finally{await a.close()}}async function Lr(t,e,r){return r.info({tab:ut,sort:x},"scraping following"),await t.goto("https://x.com/home",{waitUntil:"domcontentloaded"}),await y(t,r,"home"),await kr(t,r),await Cr(t,r),await t.keyboard.press("Escape"),await w(t,r,"close following sort menu"),await Y(t).first().waitFor({state:"visible",timeout:2e4}).catch(()=>{}),pt(t,e,r,"following","home")}async function _r(t,e,r){return r.info({tab:Kt},"scraping for-you suggestions"),await Ar(t,Kt,r),pt(t,e,r,"forYouSuggestions","home")}async function Rr(t,e,r,o){let n=e.replace(/^@/,"");return o.info({handle:n},"navigating to profile"),await t.goto(`https://x.com/${n}`,{waitUntil:"domcontentloaded"}),await y(t,o,`profile:${n}`),pt(t,r,o,`monitored:${n}`,"profile")}async function Ar(t,e,r){await t.goto("https://x.com/home",{waitUntil:"domcontentloaded"}),await y(t,r,"home");let o=ft(t,e);if(!await o.isVisible().catch(()=>!1))throw L(r,{action:"selectHomeTab",expected:`tab "${e}" visible`,missing:"home tab",err:new Error(`Tab not found: ${e}`)}),new Error(`Home tab not found: ${e}`);await nt(t,r,o,`select tab ${e}`)}function ft(t,e){return t.locator('[data-testid="ScrollSnap-List"]').getByRole("tab",{name:e,exact:!0})}async function kr(t,e){let r=ft(t,ut);await r.getAttribute("aria-selected")!=="true"&&(e.info({action:"select Following tab"},"interaction"),await r.click(),await w(t,e,"select Following tab"))}function J(t){return t.getByRole("menu").filter({has:t.getByRole("menuitem",{name:x,exact:!0})})}async function Cr(t,e){if(await Er(t,e),await Or(t,x)){e.info({sort:x},"following sort already selected"),await t.keyboard.press("Escape");return}let r=J(t).getByRole("menuitem",{name:x,exact:!0});if(!await r.isVisible().catch(()=>!1)){e.warn({sort:x},"could not find Following sort menuitem; continuing"),await t.keyboard.press("Escape");return}await nt(t,e,r,`select following sort ${x}`,{force:!0})}async function Er(t,e){let r=ft(t,ut);await r.waitFor({state:"visible",timeout:15e3}),!await J(t).isVisible().catch(()=>!1)&&(e.info({action:"Following tab (open sort menu)"},"interaction"),await r.click(),await w(t,e,"Following tab (open sort menu)"),await J(t).waitFor({state:"visible",timeout:1e4}).catch(()=>{}))}async function Or(t,e){let r=J(t).getByRole("menuitem",{name:e,exact:!0});return await r.count()?await r.locator(":scope > div").nth(1).locator("svg").count()>0:!1}async function Fr(t,e,r,o,n,i,s,a){if(await jt(r))return"continue";let c=await R(r);if(!c)return"continue";let l=f(c);if(o.has(l))return"continue";if(o.add(l),e.skipHrefs?.has(l))return a.debug({href:l,feed:i},"skipping post already collected from Following"),await lt(t,r,a,s),"advanced";let u=(await A(r)).timestamp,p=f(S(l));return e.stopHrefs.has(p)||Hr(u,e.cutoffMs)?"stop":(a.debug({href:l,feed:i},"scraping post detail"),n.push(await e.detailScraper.scrape(c)),await lt(t,r,a,s),"advanced")}async function Mr(t,e,r,o){let n=await zt(t,e).count();return await Wt(t,o,n,r)?"moved":"stalled"}async function Ur(t,e,r,o,n,i,s,a){let c=zt(t,r),l=await c.count();for(let u=0;u<l;u++){let p=await Fr(t,e,c.nth(u),o,n,i,s,a);if(p!=="continue")return p}return"continue"}async function pt(t,e,r,o,n){let i=[],s=new Set,a=0,c=n;for(let l=0;l<600;l++){let u=await Ur(t,e,n,s,i,o,c,r);if(u==="stop")return r.info({feed:o,timelineItems:i.length,reason:"stop condition"},"timeline walk ended"),i;if(u==="advanced"){a=0;continue}if(await Mr(t,n,c,r)==="stalled"){if(a++,a>=4){r.info({feed:o,timelineItems:i.length,reason:"stalled scroll"},"timeline walk ended");break}}else a=0}return r.info({feed:o,timelineItems:i.length,reason:"iteration limit"},"timeline walk ended"),i}function zt(t,e){return e==="home"?Y(t):t.locator('article[data-testid="tweet"]')}function Hr(t,e){let r=t?Date.parse(t):Number.NaN;return!Number.isNaN(r)&&r<e}function Br(t){return t?Ft(t):new Set}import{mkdir as Dr}from"node:fs/promises";import{chromium as Jt}from"playwright";async function b(t){let r=(await t.cookies()).filter(i=>/x\.com|twitter\.com/i.test(i.domain)),o=r.find(i=>i.name==="auth_token"&&i.value.length>0),n=r.find(i=>i.name==="ct0"&&i.value.length>0);return!!(o&&n)}var H="https://x.com/i/flow/login";function v(t,e){let r=["A separate Chrome window opens (no Playwright remote debugging).","Use X username/email and password \u2014 not Google Sign-In."];return t==="owner-mismatch"?[...r,`Sign in as @${e} (or switch to that account).`,"Quit Chrome completely when the correct account is active (close the browser, not just a tab)."].join(" "):[...r,"Complete onboarding until you reach the home timeline, then quit Chrome completely."].join(" ")}import{chromium as $r}from"playwright";var Ir=`
39
+ })(${e}, ${JSON.stringify(r)}, ${JSON.stringify(ut)})`)}async function zt(t,e,r,o){let n=await qt(t,1800,o);return await k(),await y(t,e,"timeline scroll"),(o==="home"?await J(t).count():await t.locator('article[data-testid="tweet"]').count())>r?!0:n}async function ft(t,e,r,o){await e.scrollIntoViewIfNeeded().catch(()=>{}),await e.evaluate(s=>{s.scrollIntoView({block:"end",inline:"nearest"})}),await k();let n=await e.boundingBox().catch(()=>null),i=n?Math.ceil(n.height)+480:1200;await qt(t,i,o),await k(),await y(t,r,"scroll past post")}var pt="Following",Gt="For you",v="Recent";async function Vt(t,e){let r=P(),{cutoffMs:o,cutoffTimestamp:n}=Ut(e.config,e.previousState),i=Ir(e.previousState),s=new z(r),a=await Y.create(t.context(),e.config.parallelTabs??N,r),c=new V(a,s,r);r.info({timeWindowMinutes:e.config.timeWindowMinutes,cutoffTimestamp:n,incremental:!!e.previousState,parallelTabs:e.config.parallelTabs??N},"starting scrape");try{let u=await _r(t,{cutoffMs:o,stopHrefs:i,processor:s,detailScraper:c},r),p=new Set;for(let h of u)p.add(f(x(h.href))),s.collectAllHrefs(h,p);r.info({count:u.length,unique:p.size},"following feed complete");let d=await Rr(t,{cutoffMs:o,stopHrefs:i,skipHrefs:p,processor:s,detailScraper:c},r),m={};for(let h of e.config.monitored)r.info({handle:h},"scraping monitored profile"),m[h]=await Ar(t,h,{cutoffMs:o,stopHrefs:i,processor:s,detailScraper:c},r);return Ft(new Date().toISOString(),n,u,d,m)}finally{await a.close()}}async function _r(t,e,r){return r.info({tab:pt,sort:v},"scraping following"),await t.goto("https://x.com/home",{waitUntil:"domcontentloaded"}),await b(t,r,"home"),await Cr(t,r),await Er(t,r),await t.keyboard.press("Escape"),await y(t,r,"close following sort menu"),await J(t).first().waitFor({state:"visible",timeout:2e4}).catch(()=>{}),mt(t,e,r,"following","home")}async function Rr(t,e,r){return r.info({tab:Gt},"scraping for-you suggestions"),await kr(t,Gt,r),mt(t,e,r,"forYouSuggestions","home")}async function Ar(t,e,r,o){let n=e.replace(/^@/,"");return o.info({handle:n},"navigating to profile"),await t.goto(`https://x.com/${n}`,{waitUntil:"domcontentloaded"}),await b(t,o,`profile:${n}`),mt(t,r,o,`monitored:${n}`,"profile")}async function kr(t,e,r){await t.goto("https://x.com/home",{waitUntil:"domcontentloaded"}),await b(t,r,"home");let o=dt(t,e);if(!await o.isVisible().catch(()=>!1))throw L(r,{action:"selectHomeTab",expected:`tab "${e}" visible`,missing:"home tab",err:new Error(`Tab not found: ${e}`)}),new Error(`Home tab not found: ${e}`);await st(t,r,o,`select tab ${e}`)}function dt(t,e){return t.locator('[data-testid="ScrollSnap-List"]').getByRole("tab",{name:e,exact:!0})}async function Cr(t,e){let r=dt(t,pt);await r.getAttribute("aria-selected")!=="true"&&(e.info({action:"select Following tab"},"interaction"),await r.click(),await y(t,e,"select Following tab"))}function Q(t){return t.getByRole("menu").filter({has:t.getByRole("menuitem",{name:v,exact:!0})})}async function Er(t,e){if(await Or(t,e),await Mr(t,v)){e.info({sort:v},"following sort already selected"),await t.keyboard.press("Escape");return}let r=Q(t).getByRole("menuitem",{name:v,exact:!0});if(!await r.isVisible().catch(()=>!1)){e.warn({sort:v},"could not find Following sort menuitem; continuing"),await t.keyboard.press("Escape");return}await st(t,e,r,`select following sort ${v}`,{force:!0})}async function Or(t,e){let r=dt(t,pt);await r.waitFor({state:"visible",timeout:15e3}),!await Q(t).isVisible().catch(()=>!1)&&(e.info({action:"Following tab (open sort menu)"},"interaction"),await r.click(),await y(t,e,"Following tab (open sort menu)"),await Q(t).waitFor({state:"visible",timeout:1e4}).catch(()=>{}))}async function Mr(t,e){let r=Q(t).getByRole("menuitem",{name:e,exact:!0});return await r.count()?await r.locator(":scope > div").nth(1).locator("svg").count()>0:!1}async function Ur(t,e,r,o,n,i,s,a){if(await Kt(r))return"continue";let c=await R(r);if(!c)return"continue";let l=f(c);if(o.has(l))return"continue";if(o.add(l),e.skipHrefs?.has(l))return a.debug({href:l,feed:i},"skipping post already collected from Following"),await ft(t,r,a,s),"advanced";let u=(await A(r)).timestamp,p=f(x(l));return e.stopHrefs.has(p)||Hr(u,e.cutoffMs)?"stop":(a.debug({href:l,feed:i},"scraping post detail"),n.push(await e.detailScraper.scrape(c)),await ft(t,r,a,s),"advanced")}async function Fr(t,e,r,o){let n=await Yt(t,e).count();return await zt(t,o,n,r)?"moved":"stalled"}async function Br(t,e,r,o,n,i,s,a){let c=Yt(t,r),l=await c.count();for(let u=0;u<l;u++){let p=await Ur(t,e,c.nth(u),o,n,i,s,a);if(p!=="continue")return p}return"continue"}async function mt(t,e,r,o,n){let i=[],s=new Set,a=0,c=n;for(let l=0;l<600;l++){let u=await Br(t,e,n,s,i,o,c,r);if(u==="stop")return r.info({feed:o,timelineItems:i.length,reason:"stop condition"},"timeline walk ended"),i;if(u==="advanced"){a=0;continue}if(await Fr(t,n,c,r)==="stalled"){if(a++,a>=4){r.info({feed:o,timelineItems:i.length,reason:"stalled scroll"},"timeline walk ended");break}}else a=0}return r.info({feed:o,timelineItems:i.length,reason:"iteration limit"},"timeline walk ended"),i}function Yt(t,e){return e==="home"?J(t):t.locator('article[data-testid="tweet"]')}function Hr(t,e){let r=t?Date.parse(t):Number.NaN;return!Number.isNaN(r)&&r<e}function Ir(t){return t?Bt(t):new Set}import{mkdir as Nr}from"node:fs/promises";import{chromium as Zt}from"playwright";async function S(t){let r=(await t.cookies()).filter(i=>/x\.com|twitter\.com/i.test(i.domain)),o=r.find(i=>i.name==="auth_token"&&i.value.length>0),n=r.find(i=>i.name==="ct0"&&i.value.length>0);return!!(o&&n)}var H="https://x.com/i/flow/login";function T(t,e){let r=["A separate Chrome window opens (no Playwright remote debugging).","Use X username/email and password \u2014 not Google Sign-In."];return t==="owner-mismatch"?[...r,`Sign in as @${e} (or switch to that account).`,"Quit Chrome completely when the correct account is active (close the browser, not just a tab)."].join(" "):[...r,"Complete onboarding until you reach the home timeline, then quit Chrome completely."].join(" ")}import{chromium as Dr}from"playwright";var $r=`
40
40
  (() => {
41
41
  Object.defineProperty(navigator, "webdriver", {
42
42
  get: () => undefined,
@@ -46,5 +46,5 @@ ${ve(o.errors)}`);return e}function Tt(t){return`${JSON.stringify(t,null,2)}
46
46
  window.chrome = { runtime: {} };
47
47
  }
48
48
  })();
49
- `;function dt(t){return{headless:t,channel:"chrome",locale:"en-US",ignoreDefaultArgs:["--use-mock-keychain"],acceptDownloads:!1,serviceWorkers:"allow",chromiumSandbox:!0}}function Vt(){let e=dt(!1);return{...e,ignoreDefaultArgs:["--remote-debugging-pipe",...e.ignoreDefaultArgs]}}async function mt(t){await t.addInitScript(Ir)}async function gt(t,e,r,o){let n=v(r,o),i=r==="owner-mismatch"?"change user":"sign in with email/password";e.warn({profilePath:t,reason:r,expectedOwner:o,guidance:n,loginUrl:H},"opening manual login window \u2014 %s for owner %s at %s, then quit Chrome when done",i,o,H);let s;try{s=await $r.launchPersistentContext(t,Vt())}catch(a){let c=a instanceof Error?a.message:String(a);if(/process_singleton|singleton|user data dir|profile/i.test(c)){if(c.includes("browser has been closed")){e.info({profilePath:t},"login browser closed; profile saved to disk");return}throw e.error({profilePath:t,err:a},"cannot open login window \u2014 profile locked at %s. Close other Chrome windows using this profile. Error: %s",t,a),new Error(`Cannot open login window \u2014 profile locked at ${t}. Close other Chrome windows using this profile.`)}throw a}throw await s.close(),new Error("Unexpected: launching a browser window without remote debugging pipes should fail")}var wt="https://x.com/home",jr=2e3,Nr=3e4,B=class extends Error{constructor(e){super(e),this.name="OwnerSessionError"}},g=null,C=null,Gt=new WeakSet;async function Qt(t,e=h()){let r=St(t),o=t.browserCdpEndpoint?.trim()||r;return g&&C===o?(e.debug({sessionKey:o},"reusing in-process browser session"),await g.page.bringToFront().catch(()=>{}),g):(g&&(e.info({sessionKey:C},"closing previous browser before new session"),await I(g,e)),g=t.browserCdpEndpoint?.trim()?await Wr(t.browserCdpEndpoint.trim(),r,e):await Xt(r,e,t.ownerHandle,t.headless),C=o,g)}async function Wr(t,e,r){r.info({endpoint:t},"attaching to Chrome over CDP");let o=await Jt.connectOverCDP(t),n=o.contexts()[0];if(!n)throw new Error(`No browser context at ${t}. Start Chrome with remote debugging (see README).`);await mt(n),Zt(n);let i=n.pages()[0]??await n.newPage();return ht(i),await i.goto(await b(n)?wt:H,{waitUntil:"domcontentloaded"}),await y(i,r,"cdp attach"),{context:n,page:i,profilePath:e,cdpAttached:!0,cdpBrowser:o}}async function Xt(t,e,r,o){await Dr(t,{recursive:!0});let n=await Yt(t,e,o);if(await b(n.context)||(e.info("no auth cookies in scrape session; starting manual login window"),await I(n,e),await gt(t,e,"login",r),n=await Yt(t,e,o)),!await b(n.context))throw new B(`Login did not persist to ${t} (missing auth_token/ct0). ${v("login",r)}`);return n}async function Yt(t,e,r){e.info({profilePath:t},"opening scrape Chrome profile (Playwright-controlled)");let o;try{o=await Jt.launchPersistentContext(t,dt(r))}catch(s){let a=s instanceof Error?s.message:String(s);throw/process_singleton|singleton|user data dir|profile/i.test(a)?new Error(`Chrome profile is locked at ${t}. Close any other Chrome window using this profile.`):s}await mt(o),Zt(o);let n=o.pages()[0]??await o.newPage();ht(n),await n.goto(wt,{waitUntil:"domcontentloaded"}),await y(n,e,"scrape session launch");let i=await b(o);return e.info({profilePath:t,hasAuth:i},"scrape Chrome session ready"),{context:o,page:n,profilePath:t,cdpAttached:!1}}function Zt(t){t.on("page",e=>{ht(e)})}function ht(t){if(Gt.has(t))return;Gt.add(t);let e=h().child({source:"browser"}),r=[{match:/^The resource \S+ was preloaded using link preload but not used within a few seconds/i,note:"Preload warning; ignore",level:"debug"},{match:/^Banner not shown/i,note:"Banner not shown; ignore",level:"debug"},{match:/GSI_LOGGER|FedCM/i,note:"Google Sign-In noise; use X email/password login instead",level:"error"}],o={assert:"fatal",clear:"debug",count:"debug",dir:"debug",dirxml:"debug",endGroup:"debug",error:"error",warning:"warn",info:"info",debug:"debug",log:"debug",profile:"trace",profileEnd:"trace",startGroup:"debug",startGroupCollapsed:"debug",table:"debug",time:"debug",timeEnd:"debug",trace:"trace"};t.on("console",n=>{let i=n.type(),s=n.text(),a={type:i,text:s,location:n.location()};for(let{match:c,note:l,level:u}of r)if(c.test(s)){e[u]({...a,note:l},"browser console: %s",s);return}e[o[i]](a,"browser console: %s",s)}),t.on("pageerror",n=>{e.error({err:n.message,stack:n.stack},"browser page error")}),t.on("response",n=>{let i=n.url();i.includes("onboarding/task.json")&&n.status()>=400&&e.warn({url:i,status:n.status(),hint:"X onboarding API failed \u2014 finish login in the manual login window"},"x api response")})}async function yt(t,e){let r=e.log??h(),o=te(e.ownerHandle);if(await ee(t.page,o))return r.info({ownerHandle:o},"owner session verified"),t;let n=await re(t.page),i=!n;if(e.abortOnIncorrectOwnerHandle){let s=n?`Login required for @${o}. ${v("login",o)}`:`Active session does not match ownerHandle @${o}. ${v("owner-mismatch",o)}`;throw L(r,{action:"ensureOwnerSession",expected:`logged in as @${o}`,missing:n?"auth_token and ct0 cookies":`profile for @${o}`,err:new B(s)}),new B(s)}if(!t.cdpAttached){let s=n?"login":"owner-mismatch";return r.warn({expectedOwner:o,reason:s},"opening manual login window to fix session"),Kr(t,e,s)}return i&&await qr(t,o,r),t}async function Kr(t,e,r){let o=te(e.ownerHandle);await I(t,e.log),g=null,C=null,await gt(t.profilePath,e.log,r,o);let n=await Xt(t.profilePath,e.log,e.ownerHandle,e.headless);return g=n,C=t.profilePath,yt(n,e)}async function qr(t,e,r){let{page:o}=t;r.warn({expectedOwner:e,guidance:v("owner-mismatch",e)},"wrong account on CDP browser \u2014 switch to the configured owner in that Chrome window"),await o.bringToFront();let n=Date.now(),i=n;for(;;){if(await ee(o,e)){r.info({ownerHandle:e},"owner session verified after waiting"),await o.goto(wt,{waitUntil:"domcontentloaded"}),await y(o,r,"post-login home");return}let s=Date.now();s-i>=Nr&&(r.info({expectedOwner:e,waitedMs:s-n,hasAuthCookies:await b(o.context())},"still waiting for correct owner on scrape session"),i=s),await o.waitForTimeout(jr)}}function te(t){return t.replace(/^@/,"").toLowerCase()}async function ee(t,e){return!await b(t.context())||await re(t)?!1:!!(await t.getByRole("link",{name:new RegExp(`@${e}`,"i")}).first().isVisible().catch(()=>!1)||await t.getByRole("button",{name:new RegExp(`@${e}|account menu`,"i")}).first().isVisible().catch(()=>!1)||(await t.getByTestId("AppTabBar_Profile_Link").getAttribute("href").catch(()=>null))?.toLowerCase().includes(`/${e}`))}async function re(t){if(await b(t.context()))return!1;let e=t.url();return/\/login|\/flow\/login/i.test(e)||await t.getByRole("button",{name:/^(log in|sign in)$/i}).first().isVisible().catch(()=>!1)||await t.getByRole("link",{name:/^(log in|sign in)$/i}).first().isVisible().catch(()=>!1),!0}async function I(t,e=h()){t.cdpAttached&&t.cdpBrowser?(e.info("detaching from CDP (leaving your Chrome running)"),await t.cdpBrowser.close()):(e.info({profilePath:t.profilePath},"closing browser; persisting profile to disk"),await t.context.close()),g===t&&(g=null,C=null)}var zr="./config.json";function oe(t){let e=zr,r;for(let o=2;o<t.length;o++){let n=t[o];if(n!==void 0){if(n==="--abort-on-incorrect-ownerHandle"){r=!0;continue}if(n.startsWith("-"))throw new Error(`Unknown option: ${n}`);e=n}}return{configPath:e,...r!==void 0?{abortOnIncorrectOwnerHandle:r}:{}}}function ne(t,e){return t.abortOnIncorrectOwnerHandle??e??!1}import{access as ie,mkdir as Vr,readFile as Gr,rename as Yr,unlink as Jr,writeFile as Qr}from"node:fs/promises";import{dirname as Xr}from"node:path";async function se(t){try{let e=await Gr(t,"utf8"),r=$(e);return await E("state.schema.json",r,"State")}catch(e){if(Pt(e))return null;throw e}}async function ae(t,e){await E("state.schema.json",e,"State"),await Vr(Xr(t),{recursive:!0}),await Zr(t),await Qr(t,Tt(e),"utf8")}async function Zr(t){let e=`${t}.bkp`;try{await ie(t)}catch(r){if(Pt(r))return;throw r}try{await ie(e),await Jr(e)}catch(r){if(!Pt(r))throw r}await Yr(t,e)}function Pt(t){return typeof t=="object"&&t!==null&&"code"in t&&t.code==="ENOENT"}async function ro(t){let e=oe(t),r=le(e.configPath);await oo(r,"Config file");let o=await Lt(r),n=h(),i=ne(e,o.abortOnIncorrectOwnerHandle),s=await se(o.statePath),a=await Qt(o,n);try{a=await yt(a,{ownerHandle:o.ownerHandle,headless:o.headless,abortOnIncorrectOwnerHandle:i,log:n});let c=await qt(a.page,{config:o,previousState:s});return await ae(o.statePath,c),n.info({statePath:o.statePath,following:c.following.length,forYouSuggestions:c.forYouSuggestions.length,monitored:Object.fromEntries(Object.entries(c.monitored).map(([l,u])=>[l,u.length]))},"scrape complete; state saved"),c}finally{await I(a,n)}}async function oo(t,e){try{await to(t)}catch{throw new Error(`${e} not found: ${t}`)}}async function no(){await ro(process.argv)}var ce=process.argv[1],io=ce!==void 0&&le(ce)===eo(import.meta.url);io&&no().catch(t=>{let e=t instanceof Error?t.message:String(t);console.error(e),process.exitCode=1});export{ro as runScrape};
49
+ `;function gt(t){return{headless:t,channel:"chrome",locale:"en-US",ignoreDefaultArgs:["--use-mock-keychain"],acceptDownloads:!1,serviceWorkers:"allow",chromiumSandbox:!0}}function Jt(){let e=gt(!1);return{...e,ignoreDefaultArgs:["--remote-debugging-pipe",...e.ignoreDefaultArgs]}}async function wt(t){await t.addInitScript($r)}async function ht(t,e,r,o){let n=T(r,o),i=r==="owner-mismatch"?"change user":"sign in with email/password";e.warn({profilePath:t,reason:r,expectedOwner:o,guidance:n,loginUrl:H},"opening manual login window \u2014 %s for owner %s at %s, then quit Chrome when done",i,o,H);let s;try{s=await Dr.launchPersistentContext(t,Jt())}catch(a){let c=a instanceof Error?a.message:String(a);if(/process_singleton|singleton|user data dir|profile/i.test(c)){if(c.includes("browser has been closed")){e.info({profilePath:t},"login browser closed; profile saved to disk");return}throw e.error({profilePath:t,err:a},"cannot open login window \u2014 profile locked at %s. Close other Chrome windows using this profile. Error: %s",t,a),new Error(`Cannot open login window \u2014 profile locked at ${t}. Close other Chrome windows using this profile.`)}throw a}throw await s.close(),new Error("Unexpected: launching a browser window without remote debugging pipes should fail")}var yt="https://x.com/home",jr=2e3,Wr=3e4,I=class extends Error{constructor(e){super(e),this.name="OwnerSessionError"}},w=null,C=null,Qt=new WeakSet;async function te(t,e=P()){let r=xt(t),o=t.browserCdpEndpoint?.trim()||r;return w&&C===o?(e.debug({sessionKey:o},"reusing in-process browser session"),await w.page.bringToFront().catch(()=>{}),w):(w&&(e.info({sessionKey:C},"closing previous browser before new session"),await E(w,e)),w=t.browserCdpEndpoint?.trim()?await Kr(t.browserCdpEndpoint.trim(),r,e):await ee(r,e,t.ownerHandle,t.headless),C=o,w)}async function Kr(t,e,r){r.info({endpoint:t},"attaching to Chrome over CDP");let o=await Zt.connectOverCDP(t),n=o.contexts()[0];if(!n)throw new Error(`No browser context at ${t}. Start Chrome with remote debugging (see README).`);await wt(n),re(n);let i=n.pages()[0]??await n.newPage();return Pt(i),await i.goto(await S(n)?yt:H,{waitUntil:"domcontentloaded"}),await b(i,r,"cdp attach"),{context:n,page:i,profilePath:e,cdpAttached:!0,cdpBrowser:o}}async function ee(t,e,r,o){await Nr(t,{recursive:!0});let n=await Xt(t,e,o);if(await S(n.context)||(e.info("no auth cookies in scrape session; starting manual login window"),await E(n,e),await ht(t,e,"login",r),n=await Xt(t,e,o)),!await S(n.context))throw new I(`Login did not persist to ${t} (missing auth_token/ct0). ${T("login",r)}`);return n}async function Xt(t,e,r){e.info({profilePath:t},"opening scrape Chrome profile (Playwright-controlled)");let o=gt(r),n;try{n=await Zt.launchPersistentContext(t,o)}catch(a){let c=a instanceof Error?a.message:String(a);throw/process_singleton|singleton|user data dir|profile/i.test(c)?(e.error({err:a,profilePath:t,options:o},"Failed to launch persistent session: %s",a),new Error(`Chrome profile is locked at ${t}. Close any other Chrome window using this profile.`)):a}await wt(n),re(n);let i=n.pages()[0]??await n.newPage();Pt(i),await i.goto(yt,{waitUntil:"domcontentloaded"}),await b(i,e,"scrape session launch");let s=await S(n);return e.info({profilePath:t,hasAuth:s},"scrape Chrome session ready"),{context:n,page:i,profilePath:t,cdpAttached:!1}}function re(t){t.on("page",e=>{Pt(e)})}function Pt(t){if(Qt.has(t))return;Qt.add(t);let e=P().child({source:"browser"}),r=[{match:/^The resource \S+ was preloaded using link preload but not used within a few seconds/i,note:"Preload warning; ignore",level:"debug"},{match:/^Banner not shown/i,note:"Banner not shown; ignore",level:"debug"},{match:/GSI_LOGGER|FedCM/i,note:"Google Sign-In noise; use X email/password login instead",level:"error"},{match:/Failed to load resource: the server responded with a status of 503/i,matchUrl:/[/][/]ads-api[.]x/i,note:"Failed to load resource advertisement resource",level:"debug"}],o={assert:"fatal",clear:"debug",count:"debug",dir:"debug",dirxml:"debug",endGroup:"debug",error:"error",warning:"warn",info:"info",debug:"debug",log:"debug",profile:"trace",profileEnd:"trace",startGroup:"debug",startGroupCollapsed:"debug",table:"debug",time:"debug",timeEnd:"debug",trace:"trace",verbose:"debug"};t.on("console",n=>{let i=n.type(),s=n.text(),a=n.location(),c={type:i,text:s,location:a};for(let{match:l,matchUrl:u,note:p,level:d}of r)if(l.test(s)&&(u?.test(a.url)??!0)){e[d]({...c,note:p},"browser console: %s",s);return}e[o[i]](c,"browser console: %s",s)}),t.on("pageerror",n=>{e.error({err:n.message,stack:n.stack},"browser page error")}),t.on("response",n=>{let i=n.url();i.includes("onboarding/task.json")&&n.status()>=400&&e.warn({url:i,status:n.status(),hint:"X onboarding API failed \u2014 finish login in the manual login window"},"x api response")})}async function bt(t,e){let r=e.log??P(),o=oe(e.ownerHandle);if(await ne(t.page,o))return r.info({ownerHandle:o},"owner session verified"),t;let n=await ie(t.page),i=!n;if(e.abortOnIncorrectOwnerHandle){let s=n?`Login required for @${o}. ${T("login",o)}`:`Active session does not match ownerHandle @${o}. ${T("owner-mismatch",o)}`;throw L(r,{action:"ensureOwnerSession",expected:`logged in as @${o}`,missing:n?"auth_token and ct0 cookies":`profile for @${o}`,err:new I(s)}),new I(s)}if(!t.cdpAttached){let s=n?"login":"owner-mismatch";return r.warn({expectedOwner:o,reason:s},"opening manual login window to fix session"),qr(t,e,s)}return i&&await zr(t,o,r),t}async function qr(t,e,r){let o=oe(e.ownerHandle);await E(t,e.log),w=null,C=null,await ht(t.profilePath,e.log,r,o);let n=await ee(t.profilePath,e.log,e.ownerHandle,e.headless);return w=n,C=t.profilePath,bt(n,e)}async function zr(t,e,r){let{page:o}=t;r.warn({expectedOwner:e,guidance:T("owner-mismatch",e)},"wrong account on CDP browser \u2014 switch to the configured owner in that Chrome window"),await o.bringToFront();let n=Date.now(),i=n;for(;;){if(await ne(o,e)){r.info({ownerHandle:e},"owner session verified after waiting"),await o.goto(yt,{waitUntil:"domcontentloaded"}),await b(o,r,"post-login home");return}let s=Date.now();s-i>=Wr&&(r.info({expectedOwner:e,waitedMs:s-n,hasAuthCookies:await S(o.context())},"still waiting for correct owner on scrape session"),i=s),await o.waitForTimeout(jr)}}function oe(t){return t.replace(/^@/,"").toLowerCase()}async function ne(t,e){return!await S(t.context())||await ie(t)?!1:!!(await t.getByRole("link",{name:new RegExp(`@${e}`,"i")}).first().isVisible().catch(()=>!1)||await t.getByRole("button",{name:new RegExp(`@${e}|account menu`,"i")}).first().isVisible().catch(()=>!1)||(await t.getByTestId("AppTabBar_Profile_Link").getAttribute("href").catch(()=>null))?.toLowerCase().includes(`/${e}`))}async function ie(t){if(await S(t.context()))return!1;let e=t.url();return/\/login|\/flow\/login/i.test(e)||await t.getByRole("button",{name:/^(log in|sign in)$/i}).first().isVisible().catch(()=>!1)||await t.getByRole("link",{name:/^(log in|sign in)$/i}).first().isVisible().catch(()=>!1),!0}async function E(t,e=P()){t.cdpAttached&&t.cdpBrowser?(e.info("detaching from CDP (leaving your Chrome running)"),await t.cdpBrowser.close()):(e.info({profilePath:t.profilePath},"closing browser; persisting profile to disk"),await t.context.close()),w===t&&(w=null,C=null)}var Gr="./config.json";function se(t){let e=Gr,r;for(let o=2;o<t.length;o++){let n=t[o];if(n!==void 0){if(n==="--abort-on-incorrect-ownerHandle"){r=!0;continue}if(n.startsWith("-"))throw new Error(`Unknown option: ${n}`);e=n}}return{configPath:e,...r!==void 0?{abortOnIncorrectOwnerHandle:r}:{}}}function ae(t,e){return t.abortOnIncorrectOwnerHandle??e??!1}import{access as ce,mkdir as Vr,readFile as Yr,rename as Jr,unlink as Qr,writeFile as Xr}from"node:fs/promises";import{dirname as Zr}from"node:path";async function le(t){try{let e=await Yr(t,"utf8"),r=D(e);return await O("state.schema.json",r,"State")}catch(e){if(St(e))return null;throw e}}async function ue(t,e){await O("state.schema.json",e,"State"),await Vr(Zr(t),{recursive:!0}),await to(t),await Xr(t,Lt(e),"utf8")}async function to(t){let e=`${t}.bkp`;try{await ce(t)}catch(r){if(St(r))return;throw r}try{await ce(e),await Qr(e)}catch(r){if(!St(r))throw r}await Jr(t,e)}function St(t){return typeof t=="object"&&t!==null&&"code"in t&&t.code==="ENOENT"}async function oo(t){let e=Date.now(),r=()=>(Date.now()-e)/1e3,o=se(t),n=pe(o.configPath);await no(n,"Config file");let i=await _t(n),s=P(),a=ae(o,i.abortOnIncorrectOwnerHandle),c=await le(i.statePath),l=null,u=!1,p=null,d=()=>{u||(u=!0,process.exitCode=1,s.warn({elapsedInSeconds:r()},"SIGTERM received; stopping scrape early"),l||process.exit(1),p=E(l,s).catch(m=>{s.error({err:m,elapsedInSeconds:r()},"failed to close browser after SIGTERM: %s",m)}).finally(()=>{process.exit(1)}))};process.once("SIGTERM",d);try{l=await te(i,s),l=await bt(l,{ownerHandle:i.ownerHandle,headless:i.headless,abortOnIncorrectOwnerHandle:a,log:s});let m=await Vt(l.page,{config:i,previousState:c});if(u)throw new Error("Scrape stopped early after SIGTERM");return await ue(i.statePath,m),s.info({statePath:i.statePath,following:m.following.length,forYouSuggestions:m.forYouSuggestions.length,elapsedInSeconds:r(),monitored:Object.fromEntries(Object.entries(m.monitored).map(([h,$])=>[h,$.length]))},"scrape complete; state saved"),{state:m,config:i}}finally{process.off("SIGTERM",d),p?await p:l&&await E(l,s)}}async function no(t,e){try{await eo(t)}catch{throw new Error(`${e} not found: ${t}`)}}async function io(){await oo(process.argv)}var fe=process.argv[1],so=fe!==void 0&&pe(fe)===ro(import.meta.url)&&!0;so&&io().catch(t=>{tt.fatal({err:t},"scrape failed: %s",t),process.exit(1)});export{oo as runScrape};
50
50
  //# sourceMappingURL=scrape.mjs.map