headless-twitter 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/EXAMPLES.md ADDED
@@ -0,0 +1,47 @@
1
+ # Examples
2
+
3
+ ## First time setup
4
+
5
+ ```bash
6
+ npm install -g headless-twitter
7
+ npx playwright install chromium
8
+ headless-twitter twitter timeline '' 0 --setup
9
+ # Log in to Twitter in the browser window that opens
10
+ # Close when done — session saved, no re-auth needed
11
+ ```
12
+
13
+ ## Read timeline
14
+
15
+ ```bash
16
+ headless-twitter twitter timeline '' 20
17
+ ```
18
+
19
+ ## Search tweets
20
+
21
+ ```bash
22
+ headless-twitter twitter search "golang concurrency" 15
23
+ ```
24
+
25
+ ## Get user's tweets
26
+
27
+ ```bash
28
+ headless-twitter twitter user "@gvanrossum" 10
29
+ ```
30
+
31
+ ## JSON output (for piping)
32
+
33
+ ```bash
34
+ headless-twitter twitter timeline '' 10 --json | jq '.tweets[0]'
35
+ ```
36
+
37
+ ## Save to file
38
+
39
+ ```bash
40
+ headless-twitter twitter timeline '' 50 --json > tweets.json
41
+ ```
42
+
43
+ ## Use custom session
44
+
45
+ ```bash
46
+ HT_SESSION_DIR=~/.twitter-work headless-twitter twitter timeline '' 10
47
+ ```
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 om-ashish-soni
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,195 @@
1
+ # headless-twitter
2
+
3
+ **Headless Twitter/X reader. Zero mutations. Raw output.**
4
+
5
+ Uses Playwright to intercept Twitter's GraphQL XHR responses directly. Strictly read-only. Two modes:
6
+
7
+ - **`--use-chrome`** (recommended): Connects to your logged-in Chrome via CDP. Zero login friction. Auto-launches Chrome if needed.
8
+ - **`--setup`**: One-time Playwright session login. Headless after setup.
9
+
10
+ ## Install
11
+
12
+ ### Via npm (recommended)
13
+
14
+ ```bash
15
+ npm install -g headless-twitter
16
+ npx playwright install chromium
17
+ ```
18
+
19
+ Then:
20
+
21
+ ```bash
22
+ headless-twitter twitter timeline '' 20 --use-chrome
23
+ ```
24
+
25
+ ### From source
26
+
27
+ ```bash
28
+ git clone https://github.com/om-ashish-soni/headless-twitter.git
29
+ cd headless-twitter
30
+ npm install
31
+ npx playwright install chromium
32
+ node index.js twitter timeline '' 20 --use-chrome
33
+ ```
34
+
35
+ ## Quick Start (Chrome CDP — recommended)
36
+
37
+ ```bash
38
+ node index.js twitter timeline '' 20 --use-chrome
39
+ ```
40
+
41
+ That's it. On first run:
42
+ 1. Copies your Chrome profile to `~/.config/google-chrome-debug/`
43
+ 2. Launches Chrome with CDP debug port (9222)
44
+ 3. Connects, opens a tab, reads tweets, closes the tab
45
+ 4. Chrome stays running — subsequent runs connect instantly
46
+
47
+ No manual login. No setup step. Uses your existing Twitter session from Chrome.
48
+
49
+ ### Custom CDP port
50
+
51
+ ```bash
52
+ node index.js twitter timeline '' 20 --use-chrome --cdp-url http://localhost:9333
53
+ ```
54
+
55
+ ## Alternative: Playwright Session Setup
56
+
57
+ If you prefer not to use your Chrome profile:
58
+
59
+ ```bash
60
+ node index.js twitter timeline '' 0 --setup
61
+ ```
62
+
63
+ Opens a visible Chrome window. Log into Twitter manually. Session saved to `~/.headless-twitter/sessions/twitter/`. All future runs are headless.
64
+
65
+ ## Usage
66
+
67
+ ```bash
68
+ # Home timeline (top 20 tweets)
69
+ node index.js twitter timeline '' 20 --use-chrome
70
+
71
+ # Following feed
72
+ node index.js twitter following '' 10 --use-chrome
73
+
74
+ # Search
75
+ node index.js twitter search "rust cli tools" 15 --use-chrome
76
+
77
+ # User tweets
78
+ node index.js twitter user "@karpathy" 10 --use-chrome
79
+
80
+ # JSON output (for piping)
81
+ node index.js twitter timeline '' 20 --use-chrome --json
82
+
83
+ # Debug mode (show XHR endpoints + extraction details)
84
+ node index.js twitter timeline '' 20 --use-chrome --debug
85
+ ```
86
+
87
+ ## Architecture
88
+
89
+ ```
90
+ 4-layer read-only enforcement:
91
+
92
+ Layer 1 — Network: block all non-GET requests
93
+ Layer 2 — DOM: freeze click/submit/input events via JS injection
94
+ Layer 3 — Code: no page.click(), page.fill(), page.submit() calls
95
+ Layer 4 — Session: persistent browser state (cookies, localStorage)
96
+ ```
97
+
98
+ ## What it does
99
+
100
+ - Reads your Twitter timeline, Following feed, search results, or any user's tweets
101
+ - Uses Playwright persistent sessions (login once, reuse forever)
102
+ - Intercepts Twitter's GraphQL XHR responses directly (no DOM scraping)
103
+ - Strictly read-only: all POST/PUT/DELETE/PATCH requests blocked at network level
104
+
105
+ ## What it NEVER does
106
+
107
+ - Click any button, link, or interactive element
108
+ - Submit any form
109
+ - Like, retweet, follow, or bookmark
110
+ - Send any POST/PUT/DELETE/PATCH request
111
+ - Type into any input field
112
+ - Modify any state on Twitter
113
+
114
+ ## Options
115
+
116
+ | Flag | Description |
117
+ |------|-------------|
118
+ | `--use-chrome` | Connect to Chrome via CDP (auto-launches if needed) |
119
+ | `--cdp-url URL` | CDP endpoint (default: `http://localhost:9222`) |
120
+ | `--setup` | Interactive login for Playwright session |
121
+ | `--json` | Output as JSON instead of TUI table |
122
+ | `--debug` | Show XHR endpoints and extraction details |
123
+ | `--help` | Show help message |
124
+
125
+ ## Environment Variables
126
+
127
+ | Var | Default | Description |
128
+ |-----|---------|-------------|
129
+ | `HT_SESSION_DIR` | `~/.headless-twitter/sessions/twitter/` | Browser session path (Playwright mode only) |
130
+
131
+ ## Output
132
+
133
+ ### TUI (default)
134
+
135
+ ```
136
+ ====================================================================================================
137
+ Twitter Feed
138
+ ====================================================================================================
139
+
140
+ [1] @author
141
+
142
+ Tweet text wrapped at 110 chars per line
143
+ continued...
144
+
145
+ 📍 https://x.com/i/web/status/...
146
+ ❤️ 2,580 | 🔄 320 | 💬 85
147
+ 📅 Fri Apr 10 17:49:10 +0000 2026
148
+ ----------------------------------------------------------------------------------------------------
149
+ ```
150
+
151
+ ### JSON (`--json`)
152
+
153
+ ```json
154
+ {
155
+ "source": "twitter",
156
+ "mode": "timeline",
157
+ "count": 10,
158
+ "tweets": [
159
+ {
160
+ "id": "...",
161
+ "text": "...",
162
+ "author": "@karpathy",
163
+ "likes": 2580,
164
+ "retweets": 320,
165
+ "replies": 85,
166
+ "time": "...",
167
+ "url": "..."
168
+ }
169
+ ]
170
+ }
171
+ ```
172
+
173
+ ## Claude Code Integration
174
+
175
+ Use as Claude Code skill. Claude can directly call:
176
+
177
+ ```javascript
178
+ search_tweets(query="golang", limit=15)
179
+ fetch_tweets_timeline(limit=20)
180
+ fetch_user_tweets(username="@gvanrossum", limit=10)
181
+ fetch_following(limit=20)
182
+ ```
183
+
184
+ Example: "Fetch latest tweets about Rust and summarize discussions"
185
+
186
+ Claude automatically invokes the functions and analyzes results.
187
+
188
+ ## Requirements
189
+
190
+ - Node.js >= 18.0.0
191
+ - Playwright + Chromium
192
+
193
+ ## License
194
+
195
+ MIT
package/index.js ADDED
@@ -0,0 +1,525 @@
1
+ #!/usr/bin/env node
2
+
3
+ // ─────────────────────────────────────────────────────────────────────────────
4
+ // headless-twitter — Headless Twitter/X reader. Zero mutations. Raw output.
5
+ // https://github.com/om-ashish-soni/headless-twitter
6
+ // ─────────────────────────────────────────────────────────────────────────────
7
+
8
+ const { chromium } = require('playwright');
9
+ const path = require('path');
10
+ const os = require('os');
11
+ const fs = require('fs');
12
+ const { execSync, spawn } = require('child_process');
13
+
14
+ // ─── Help Message ───────────────────────────────────────────────────────────
15
+
16
+ function showHelp() {
17
+ console.log(`
18
+ headless-twitter — Headless Twitter/X reader. Zero mutations. Raw output.
19
+
20
+ USAGE:
21
+ node index.js [SOURCE] [MODE] [QUERY] [LIMIT] [OPTIONS]
22
+
23
+ ARGUMENTS:
24
+ SOURCE twitter (default)
25
+ MODE timeline | following | search | user (default: timeline)
26
+ QUERY search term or username (required for search/user modes)
27
+ LIMIT number of tweets (default: 20, max: 100)
28
+
29
+ OPTIONS:
30
+ --setup login to Twitter (interactive, one-time only)
31
+ --use-chrome connect to running Chrome via CDP (no login needed)
32
+ --cdp-url URL CDP endpoint (default: http://localhost:9222)
33
+ --json output as JSON instead of formatted table
34
+ --debug show XHR responses and extraction details (troubleshooting)
35
+ --help show this message
36
+
37
+ EXAMPLES:
38
+ node index.js twitter timeline '' 20
39
+ node index.js twitter search "rust cli" 15
40
+ node index.js twitter user "@gvanrossum" 10
41
+ node index.js twitter following '' 20 --json
42
+ node index.js twitter timeline '' 0 --setup
43
+
44
+ SETUP (one-time):
45
+ node index.js twitter timeline '' 0 --setup
46
+
47
+ What happens:
48
+ 1. Visible Chrome window opens
49
+ 2. Twitter login page shown (may redirect)
50
+ 3. You log in with credentials + 2FA
51
+ 4. Once logged in → tweets appear
52
+ 5. Browser closes automatically
53
+ 6. Session saved to ~/.headless-twitter/sessions/twitter/
54
+
55
+ SESSION:
56
+ Reused for all future runs (Chrome runs in background, no browser window).
57
+ Set HT_SESSION_DIR to use alternate session.
58
+
59
+ CHROME CDP (--use-chrome):
60
+ Skip login entirely — connect to your already-logged-in Chrome.
61
+
62
+ 1. Close Chrome if running
63
+ 2. Relaunch with: google-chrome --remote-debugging-port=9222
64
+ (or: chrome --remote-debugging-port=9222 on Mac)
65
+ 3. Run: node index.js twitter timeline '' 20 --use-chrome
66
+
67
+ No session dir needed. No setup. Uses your existing cookies.
68
+
69
+ READ-ONLY:
70
+ • No mutations allowed (blocks POST/PUT/DELETE/PATCH)
71
+ • Can't interact with page (no clicks, no typing, no forms)
72
+ • Reads Twitter's GraphQL responses directly
73
+ `);
74
+ }
75
+
76
+ // ─── CLI Args ───────────────────────────────────────────────────────────────
77
+
78
+ if (process.argv.includes('--help') || process.argv.includes('-h')) {
79
+ showHelp();
80
+ process.exit(0);
81
+ }
82
+
83
+ const SOURCE = process.argv[2] || 'twitter';
84
+ const MODE = process.argv[3] || 'timeline'; // timeline | following | search | user
85
+ const QUERY = process.argv[4] || '';
86
+ const LIMIT = parseInt(process.argv[5] || '20');
87
+ const SETUP = process.argv.includes('--setup');
88
+ const USE_CHROME = process.argv.includes('--use-chrome');
89
+ const CDP_URL = (() => {
90
+ const idx = process.argv.indexOf('--cdp-url');
91
+ return idx !== -1 && process.argv[idx + 1] ? process.argv[idx + 1] : 'http://localhost:9222';
92
+ })();
93
+ const JSON_OUT = process.argv.includes('--json');
94
+ const DEBUG = process.argv.includes('--debug');
95
+
96
+ // ─── Input Validation ────────────────────────────────────────────────────────
97
+
98
+ const VALID_MODES = ['timeline', 'following', 'search', 'user'];
99
+ if (!VALID_MODES.includes(MODE)) {
100
+ console.error(`Error: Invalid mode '${MODE}'`);
101
+ console.error(`Valid modes: ${VALID_MODES.join(', ')}`);
102
+ process.exit(1);
103
+ }
104
+
105
+ if (isNaN(LIMIT) || LIMIT < 0) {
106
+ console.error(`Error: LIMIT must be a non-negative number, got '${process.argv[5]}'`);
107
+ process.exit(1);
108
+ }
109
+
110
+ if (LIMIT === 0 && !SETUP) {
111
+ console.error(`Error: LIMIT 0 only allowed with --setup flag`);
112
+ process.exit(1);
113
+ }
114
+
115
+ if (LIMIT > 100) {
116
+ console.error(`Error: LIMIT max is 100, got ${LIMIT}`);
117
+ process.exit(1);
118
+ }
119
+
120
+ if ((MODE === 'search' || MODE === 'user') && !QUERY) {
121
+ console.error(`Error: MODE '${MODE}' requires QUERY argument`);
122
+ console.error(`Usage: node index.js twitter ${MODE} "<query>" [LIMIT]`);
123
+ process.exit(1);
124
+ }
125
+
126
+ // ─── Paths ──────────────────────────────────────────────────────────────────
127
+
128
+ const SESSION_DIR = process.env.HT_SESSION_DIR
129
+ || path.join(os.homedir(), '.headless-twitter', 'sessions', SOURCE);
130
+
131
+ // ─── Read-Only Guards ───────────────────────────────────────────────────────
132
+
133
+ async function installReadOnlyGuards(page) {
134
+ // Block all non-GET requests
135
+ await page.route('**/*', (route, request) => {
136
+ const method = request.method().toUpperCase();
137
+ if (['POST', 'PUT', 'DELETE', 'PATCH'].includes(method)) {
138
+ route.abort('blockedbyclient');
139
+ return;
140
+ }
141
+ route.continue();
142
+ });
143
+
144
+ // Freeze interactive elements
145
+ await page.addInitScript(() => {
146
+ const BLOCKED = ['click', 'submit', 'input', 'change', 'keydown', 'keypress', 'keyup'];
147
+ const orig = EventTarget.prototype.addEventListener;
148
+ EventTarget.prototype.addEventListener = function(type, listener, opts) {
149
+ if (BLOCKED.includes(type)) {
150
+ return orig.call(this, type, () => {}, opts);
151
+ }
152
+ return orig.call(this, type, listener, opts);
153
+ };
154
+ HTMLFormElement.prototype.submit = () => {};
155
+ HTMLFormElement.prototype.requestSubmit = () => {};
156
+ });
157
+ }
158
+
159
+ // ─── XHR Interception ───────────────────────────────────────────────────────
160
+
161
+ async function interceptXHR(page) {
162
+ const captured = [];
163
+ const urls = new Set();
164
+
165
+ page.on('response', async (response) => {
166
+ const contentType = response.headers()['content-type'] || '';
167
+ if (!contentType.includes('application/json')) return;
168
+
169
+ const url = response.url();
170
+ if (!urls.has(url)) {
171
+ urls.add(url);
172
+ if (DEBUG) {
173
+ console.error(`[DEBUG] XHR: ${url.substring(0, 100)}`);
174
+ }
175
+ }
176
+
177
+ try {
178
+ const json = await response.json();
179
+ captured.push(json);
180
+ } catch (_) {}
181
+ });
182
+
183
+ if (DEBUG) {
184
+ setTimeout(() => {
185
+ console.error(`[DEBUG] Total XHR endpoints: ${urls.size}`);
186
+ console.error(`[DEBUG] Captured responses: ${captured.length}`);
187
+ }, 2000);
188
+ }
189
+
190
+ return captured;
191
+ }
192
+
193
+ // ─── Tweet Extraction ───────────────────────────────────────────────────────
194
+
195
+ function extractTweetsFromGraphQL(data) {
196
+ const tweets = [];
197
+ const seen = new Set();
198
+ let tweetNodeCount = 0;
199
+ let tweetWithVisCount = 0;
200
+
201
+ const walk = (obj, depth = 0) => {
202
+ if (!obj || typeof obj !== 'object' || depth > 20) return;
203
+
204
+ // Handle TweetWithVisibilityResults wrapper
205
+ if (obj.__typename === 'TweetWithVisibilityResults' && obj.tweet) {
206
+ tweetWithVisCount++;
207
+ const merged = { ...obj.tweet, core: obj.core || obj.tweet.core };
208
+ walk(merged, depth + 1);
209
+ return;
210
+ }
211
+
212
+ if (obj.__typename === 'Tweet' && obj.legacy) {
213
+ tweetNodeCount++;
214
+ const t = obj.legacy;
215
+ if (seen.has(t.id_str)) return;
216
+ seen.add(t.id_str);
217
+
218
+ let author = 'unknown';
219
+ const userResult = obj.core?.user_results?.result;
220
+ const screenName = userResult?.legacy?.screen_name
221
+ || userResult?.core?.screen_name;
222
+ if (screenName) {
223
+ author = screenName;
224
+ }
225
+
226
+ tweets.push({
227
+ id: t.id_str,
228
+ text: t.full_text,
229
+ author: author,
230
+ likes: t.favorite_count || 0,
231
+ retweets: t.retweet_count || 0,
232
+ replies: t.reply_count || 0,
233
+ time: t.created_at,
234
+ url: `https://x.com/i/web/status/${t.id_str}`,
235
+ });
236
+ }
237
+
238
+ Object.values(obj).forEach(v => walk(v, depth + 1));
239
+ };
240
+ walk(data);
241
+
242
+ if (DEBUG) {
243
+ console.error(`[DEBUG] Extract: found ${tweetWithVisCount} TweetWithVisibilityResults, ${tweetNodeCount} Tweet nodes, ${tweets.length} complete tweets`);
244
+ }
245
+
246
+ return tweets;
247
+ }
248
+
249
+ // ─── Scroll ─────────────────────────────────────────────────────────────────
250
+
251
+ async function autoScroll(page, { maxScrolls = 5, delayMs = 1500 } = {}) {
252
+ for (let i = 0; i < maxScrolls; i++) {
253
+ await page.evaluate(() => window.scrollBy(0, window.innerHeight * 2));
254
+ await page.waitForTimeout(delayMs);
255
+ }
256
+ }
257
+
258
+ // ─── Output Formatters ──────────────────────────────────────────────────────
259
+
260
+ function formatTweetsForTUI(tweets, limit = 20, mode = 'timeline', query = '') {
261
+ const lines = [];
262
+ lines.push('\n' + '═'.repeat(120));
263
+
264
+ // Header with context
265
+ let title = '📊 Twitter Feed';
266
+ if (mode === 'search') title = `🔍 Search: "${query}"`;
267
+ if (mode === 'user') title = `👤 User: ${query}`;
268
+ if (mode === 'following') title = '👥 Following';
269
+
270
+ lines.push(` ${title}`);
271
+ lines.push('═'.repeat(120) + '\n');
272
+
273
+ const displayed = tweets.slice(0, limit);
274
+
275
+ if (displayed.length === 0) {
276
+ lines.push(' (no tweets found)');
277
+ lines.push('\n Tips:');
278
+ if (mode === 'search') {
279
+ lines.push(' • Try a different search term');
280
+ lines.push(' • Check spelling and filters');
281
+ } else if (mode === 'user') {
282
+ lines.push(' • User may not exist or have no tweets');
283
+ lines.push(' • Try @username format');
284
+ } else if (mode === 'timeline') {
285
+ lines.push(' • Session may be expired, try: npm run setup');
286
+ lines.push(' • Or wait a moment and retry');
287
+ }
288
+ lines.push('');
289
+ } else {
290
+ displayed.forEach((tweet, idx) => {
291
+ lines.push(`[${idx + 1}] @${tweet.author}`);
292
+ lines.push('');
293
+
294
+ const wrapped = tweet.text.split('\n').flatMap(line => {
295
+ const matches = line.match(/.{1,110}/g);
296
+ return matches || [line];
297
+ });
298
+ wrapped.forEach(line => {
299
+ lines.push(` ${line}`);
300
+ });
301
+ lines.push('');
302
+
303
+ lines.push(` 📍 ${tweet.url}`);
304
+ lines.push(` ❤️ ${tweet.likes.toLocaleString()} | 🔄 ${tweet.retweets.toLocaleString()} | 💬 ${tweet.replies.toLocaleString()}`);
305
+ lines.push(` 📅 ${tweet.time}`);
306
+ lines.push('─'.repeat(120));
307
+ });
308
+ }
309
+
310
+ return lines.join('\n');
311
+ }
312
+
313
+ // ─── Chrome CDP Auto-Launch ─────────────────────────────────────────────────
314
+
315
+ const CHROME_DEBUG_DIR = path.join(os.homedir(), '.config', 'google-chrome-debug');
316
+ const CHROME_DEFAULT_DIR = path.join(os.homedir(), '.config', 'google-chrome');
317
+
318
+ function findChromeBinary() {
319
+ const candidates = ['google-chrome', 'google-chrome-stable', 'chromium-browser', 'chromium'];
320
+ for (const bin of candidates) {
321
+ try {
322
+ execSync(`which ${bin}`, { stdio: 'ignore' });
323
+ return bin;
324
+ } catch (_) {}
325
+ }
326
+ return null;
327
+ }
328
+
329
+ function isCdpReachable(url) {
330
+ try {
331
+ execSync(`curl -s --max-time 2 ${url}/json/version`, { stdio: 'ignore' });
332
+ return true;
333
+ } catch (_) {
334
+ return false;
335
+ }
336
+ }
337
+
338
+ function ensureDebugProfile() {
339
+ if (fs.existsSync(path.join(CHROME_DEBUG_DIR, 'Default'))) return true;
340
+
341
+ if (!fs.existsSync(path.join(CHROME_DEFAULT_DIR, 'Default'))) {
342
+ console.error('Error: No Chrome profile found to copy');
343
+ console.error(`Expected: ${CHROME_DEFAULT_DIR}/Default`);
344
+ return false;
345
+ }
346
+
347
+ console.error('First run: copying Chrome profile for CDP mode...');
348
+ fs.mkdirSync(CHROME_DEBUG_DIR, { recursive: true });
349
+ execSync(`cp -r "${CHROME_DEFAULT_DIR}/Default" "${CHROME_DEBUG_DIR}/Default"`);
350
+ try { execSync(`cp "${CHROME_DEFAULT_DIR}/Local State" "${CHROME_DEBUG_DIR}/Local State"`); } catch (_) {}
351
+ console.error('Profile copied.');
352
+ return true;
353
+ }
354
+
355
+ async function autoLaunchChrome(cdpUrl) {
356
+ if (isCdpReachable(cdpUrl)) {
357
+ if (DEBUG) console.error('[DEBUG] CDP already reachable');
358
+ return;
359
+ }
360
+
361
+ const port = new URL(cdpUrl).port || '9222';
362
+ const bin = findChromeBinary();
363
+ if (!bin) {
364
+ console.error('Error: Chrome/Chromium not found in PATH');
365
+ process.exit(1);
366
+ }
367
+
368
+ if (!ensureDebugProfile()) process.exit(1);
369
+
370
+ // Kill existing Chrome (can't share debug port with existing instance)
371
+ try { execSync('killall -9 chrome google-chrome chromium 2>/dev/null'); } catch (_) {}
372
+ // Remove stale lock
373
+ try { fs.unlinkSync(path.join(CHROME_DEBUG_DIR, 'SingletonLock')); } catch (_) {}
374
+
375
+ console.error(`Launching Chrome with CDP on port ${port}...`);
376
+ const child = spawn(bin, [
377
+ `--remote-debugging-port=${port}`,
378
+ '--remote-allow-origins=*',
379
+ `--user-data-dir=${CHROME_DEBUG_DIR}`,
380
+ '--no-first-run',
381
+ '--restore-last-session',
382
+ ], { stdio: 'ignore', detached: true });
383
+ child.unref();
384
+
385
+ // Wait for CDP to come up (max 15s)
386
+ for (let i = 0; i < 30; i++) {
387
+ await new Promise(r => setTimeout(r, 500));
388
+ if (isCdpReachable(cdpUrl)) {
389
+ if (DEBUG) console.error('[DEBUG] Chrome CDP ready');
390
+ return;
391
+ }
392
+ }
393
+
394
+ console.error('Error: Chrome launched but CDP not responding after 15s');
395
+ process.exit(1);
396
+ }
397
+
398
+ // ─── Main ──────────────────────────────────────────────────────────────────
399
+
400
+ async function run() {
401
+ try {
402
+ let browser = null; // only set for CDP mode
403
+ let context;
404
+ let page;
405
+
406
+ if (USE_CHROME) {
407
+ // ─── CDP: auto-launch + connect to Chrome ───
408
+ await autoLaunchChrome(CDP_URL);
409
+ if (DEBUG) console.error(`[DEBUG] Connecting to Chrome via CDP: ${CDP_URL}`);
410
+ browser = await chromium.connectOverCDP(CDP_URL);
411
+ context = browser.contexts()[0];
412
+ if (!context) {
413
+ console.error('Error: No browser context found in Chrome');
414
+ process.exit(1);
415
+ }
416
+ page = await context.newPage();
417
+ if (DEBUG) console.error(`[DEBUG] Connected to Chrome, opened new tab`);
418
+
419
+ } else {
420
+ // ─── Default: Playwright persistent context ───
421
+ const defaultProfile = path.join(SESSION_DIR, 'Default');
422
+ if (!SETUP && !fs.existsSync(defaultProfile)) {
423
+ console.error(`Error: Twitter session not found`);
424
+ console.error('');
425
+ console.error('Options:');
426
+ console.error(' 1. First time setup: npm run setup');
427
+ console.error(' 2. Use existing Chrome: node index.js twitter timeline \'\' 20 --use-chrome');
428
+ process.exit(1);
429
+ }
430
+
431
+ context = await chromium.launchPersistentContext(SESSION_DIR, {
432
+ headless: false,
433
+ args: [
434
+ '--no-sandbox',
435
+ '--disable-blink-features=AutomationControlled',
436
+ '--disable-extensions',
437
+ ],
438
+ userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
439
+ });
440
+ page = await context.newPage();
441
+ }
442
+
443
+ await installReadOnlyGuards(page);
444
+ const captured = await interceptXHR(page);
445
+
446
+ const urls = {
447
+ timeline: 'https://twitter.com/home',
448
+ search: `https://twitter.com/search?q=${encodeURIComponent(QUERY)}&f=live`,
449
+ user: `https://twitter.com/${QUERY}`,
450
+ following: 'https://twitter.com/following',
451
+ };
452
+
453
+ const gotoUrl = urls[MODE] || urls.timeline;
454
+ if (DEBUG) console.error(`[DEBUG] Navigating to: ${gotoUrl}`);
455
+
456
+ await page.goto(gotoUrl, { waitUntil: 'networkidle', timeout: 30000 });
457
+ if (DEBUG) console.error(`[DEBUG] Page loaded`);
458
+
459
+ // Setup: wait for user login (not applicable in CDP mode)
460
+ if (SETUP && !USE_CHROME) {
461
+ if (DEBUG) console.error(`[DEBUG] Waiting for login... (keep browser window open)`);
462
+ try {
463
+ await page.waitForURL('**/home', { timeout: 5 * 60 * 1000 });
464
+ if (DEBUG) console.error(`[DEBUG] Login detected!`);
465
+ } catch (e) {
466
+ console.error(`[DEBUG] Setup timeout - no login detected after 5 minutes`);
467
+ throw e;
468
+ }
469
+ }
470
+
471
+ await autoScroll(page, { maxScrolls: 3, delayMs: 1500 });
472
+ if (DEBUG) console.error(`[DEBUG] Scrolling complete`);
473
+
474
+ await page.waitForTimeout(2000);
475
+ if (DEBUG) console.error(`[DEBUG] Wait complete, before extraction`);
476
+
477
+ if (DEBUG) {
478
+ const pageTitle = await page.title();
479
+ const tweetElements = await page.evaluate(() => {
480
+ return document.querySelectorAll('[data-testid="tweet"]').length;
481
+ });
482
+ console.error(`[DEBUG] Page title: ${pageTitle}`);
483
+ console.error(`[DEBUG] Tweet elements in DOM: ${tweetElements}`);
484
+ }
485
+
486
+ const tweets = [];
487
+ for (const data of captured) {
488
+ tweets.push(...extractTweetsFromGraphQL(data));
489
+ if (tweets.length >= LIMIT) break;
490
+ }
491
+
492
+ // Output
493
+ const result = tweets.slice(0, LIMIT);
494
+ if (JSON_OUT) {
495
+ process.stdout.write(JSON.stringify({
496
+ source: SOURCE,
497
+ mode: MODE,
498
+ query: QUERY,
499
+ count: result.length,
500
+ tweets: result,
501
+ }));
502
+ } else {
503
+ process.stdout.write(formatTweetsForTUI(result, LIMIT, MODE, QUERY));
504
+ }
505
+
506
+ // Cleanup: CDP mode closes only the tab, default mode closes context
507
+ if (USE_CHROME) {
508
+ await page.close();
509
+ browser.close();
510
+ } else {
511
+ await context.close();
512
+ }
513
+
514
+ if (SETUP && !USE_CHROME) {
515
+ console.error('\n✅ Setup complete! Session saved.');
516
+ console.error('Now run: node index.js twitter timeline \'\' 20');
517
+ console.error('Browser closed. It will run in background on future commands.');
518
+ }
519
+ } catch (e) {
520
+ process.stderr.write(`Error: ${e.message}\n`);
521
+ process.exit(1);
522
+ }
523
+ }
524
+
525
+ run();
package/mcp-server.js ADDED
@@ -0,0 +1,243 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * headless-twitter MCP Server
5
+ * Exposes tweet fetching functions to Claude
6
+ */
7
+
8
+ const { chromium } = require('playwright');
9
+ const path = require('path');
10
+ const os = require('os');
11
+
12
+ const SESSION_DIR = process.env.HT_SESSION_DIR
13
+ || path.join(os.homedir(), '.headless-twitter', 'sessions', 'twitter');
14
+
15
+ // ─── Core Functions ─────────────────────────────────────────────────────────
16
+
17
+ async function installReadOnlyGuards(page) {
18
+ await page.route('**/*', (route, request) => {
19
+ const method = request.method().toUpperCase();
20
+ if (['POST', 'PUT', 'DELETE', 'PATCH'].includes(method)) {
21
+ route.abort('blockedbyclient');
22
+ return;
23
+ }
24
+ route.continue();
25
+ });
26
+
27
+ await page.addInitScript(() => {
28
+ const BLOCKED = ['click', 'submit', 'input', 'change', 'keydown', 'keypress', 'keyup'];
29
+ const orig = EventTarget.prototype.addEventListener;
30
+ EventTarget.prototype.addEventListener = function(type, listener, opts) {
31
+ if (BLOCKED.includes(type)) return orig.call(this, type, () => {}, opts);
32
+ return orig.call(this, type, listener, opts);
33
+ };
34
+ HTMLFormElement.prototype.submit = () => {};
35
+ HTMLFormElement.prototype.requestSubmit = () => {};
36
+ });
37
+ }
38
+
39
+ async function interceptXHR(page) {
40
+ const captured = [];
41
+ page.on('response', async (response) => {
42
+ const contentType = response.headers()['content-type'] || '';
43
+ if (!contentType.includes('application/json')) return;
44
+ try {
45
+ const json = await response.json();
46
+ captured.push(json);
47
+ } catch (_) {}
48
+ });
49
+ return captured;
50
+ }
51
+
52
+ function extractTweetsFromGraphQL(data) {
53
+ const tweets = [];
54
+ const seen = new Set();
55
+
56
+ const walk = (obj, depth = 0) => {
57
+ if (!obj || typeof obj !== 'object' || depth > 20) return;
58
+
59
+ if (obj.__typename === 'TweetWithVisibilityResults' && obj.tweet) {
60
+ const merged = { ...obj.tweet, core: obj.core || obj.tweet.core };
61
+ walk(merged, depth + 1);
62
+ return;
63
+ }
64
+
65
+ if (obj.__typename === 'Tweet' && obj.legacy) {
66
+ const t = obj.legacy;
67
+ if (seen.has(t.id_str)) return;
68
+ seen.add(t.id_str);
69
+
70
+ let author = 'unknown';
71
+ const userLegacy = obj.core?.user_results?.result?.legacy;
72
+ if (userLegacy && userLegacy.screen_name) {
73
+ author = userLegacy.screen_name;
74
+ }
75
+
76
+ tweets.push({
77
+ id: t.id_str,
78
+ text: t.full_text,
79
+ author: author,
80
+ likes: t.favorite_count || 0,
81
+ retweets: t.retweet_count || 0,
82
+ replies: t.reply_count || 0,
83
+ time: t.created_at,
84
+ url: `https://x.com/i/web/status/${t.id_str}`,
85
+ });
86
+ }
87
+
88
+ Object.values(obj).forEach(v => walk(v, depth + 1));
89
+ };
90
+ walk(data);
91
+ return tweets;
92
+ }
93
+
94
+ async function autoScroll(page, { maxScrolls = 3, delayMs = 1500 } = {}) {
95
+ for (let i = 0; i < maxScrolls; i++) {
96
+ await page.evaluate(() => window.scrollBy(0, window.innerHeight * 2));
97
+ await page.waitForTimeout(delayMs);
98
+ }
99
+ }
100
+
101
+ async function fetchTweets(mode, query, limit) {
102
+ try {
103
+ const context = await chromium.launchPersistentContext(SESSION_DIR, {
104
+ headless: true,
105
+ args: [
106
+ '--no-sandbox',
107
+ '--disable-blink-features=AutomationControlled',
108
+ '--disable-extensions',
109
+ ],
110
+ userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
111
+ });
112
+
113
+ const page = await context.newPage();
114
+ await installReadOnlyGuards(page);
115
+ const captured = await interceptXHR(page);
116
+
117
+ const urls = {
118
+ timeline: 'https://twitter.com/home',
119
+ search: `https://twitter.com/search?q=${encodeURIComponent(query)}&f=live`,
120
+ user: `https://twitter.com/${query}`,
121
+ following: 'https://twitter.com/following',
122
+ };
123
+
124
+ await page.goto(urls[mode] || urls.timeline, { waitUntil: 'networkidle', timeout: 30000 });
125
+ await autoScroll(page);
126
+ await page.waitForTimeout(2000);
127
+
128
+ const tweets = [];
129
+ for (const data of captured) {
130
+ tweets.push(...extractTweetsFromGraphQL(data));
131
+ if (tweets.length >= limit) break;
132
+ }
133
+
134
+ await context.close();
135
+ return tweets.slice(0, limit);
136
+ } catch (error) {
137
+ throw new Error(`Failed to fetch tweets: ${error.message}`);
138
+ }
139
+ }
140
+
141
+ // ─── MCP Tools ──────────────────────────────────────────────────────────────
142
+
143
+ const tools = [
144
+ {
145
+ name: 'fetch_tweets_timeline',
146
+ description: 'Fetch tweets from your home timeline',
147
+ inputSchema: {
148
+ type: 'object',
149
+ properties: {
150
+ limit: {
151
+ type: 'number',
152
+ description: 'Number of tweets to fetch (default: 20, max: 100)',
153
+ default: 20,
154
+ },
155
+ },
156
+ },
157
+ },
158
+ {
159
+ name: 'search_tweets',
160
+ description: 'Search for tweets by query',
161
+ inputSchema: {
162
+ type: 'object',
163
+ properties: {
164
+ query: {
165
+ type: 'string',
166
+ description: 'Search query (e.g., "golang", "ai agents")',
167
+ },
168
+ limit: {
169
+ type: 'number',
170
+ description: 'Number of tweets to fetch (default: 15, max: 100)',
171
+ default: 15,
172
+ },
173
+ },
174
+ required: ['query'],
175
+ },
176
+ },
177
+ {
178
+ name: 'fetch_user_tweets',
179
+ description: 'Fetch tweets from a specific user',
180
+ inputSchema: {
181
+ type: 'object',
182
+ properties: {
183
+ username: {
184
+ type: 'string',
185
+ description: 'Twitter username (with or without @)',
186
+ },
187
+ limit: {
188
+ type: 'number',
189
+ description: 'Number of tweets to fetch (default: 10, max: 100)',
190
+ default: 10,
191
+ },
192
+ },
193
+ required: ['username'],
194
+ },
195
+ },
196
+ {
197
+ name: 'fetch_following',
198
+ description: 'Fetch tweets from accounts you follow',
199
+ inputSchema: {
200
+ type: 'object',
201
+ properties: {
202
+ limit: {
203
+ type: 'number',
204
+ description: 'Number of tweets to fetch (default: 20, max: 100)',
205
+ default: 20,
206
+ },
207
+ },
208
+ },
209
+ },
210
+ ];
211
+
212
+ // ─── Tool Handlers ──────────────────────────────────────────────────────────
213
+
214
+ async function handleToolCall(name, input) {
215
+ const limit = Math.min(input.limit || 20, 100);
216
+
217
+ switch (name) {
218
+ case 'fetch_tweets_timeline':
219
+ return await fetchTweets('timeline', '', limit);
220
+
221
+ case 'search_tweets':
222
+ if (!input.query) throw new Error('query is required');
223
+ return await fetchTweets('search', input.query, limit);
224
+
225
+ case 'fetch_user_tweets':
226
+ if (!input.username) throw new Error('username is required');
227
+ const username = input.username.startsWith('@') ? input.username : `@${input.username}`;
228
+ return await fetchTweets('user', username, limit);
229
+
230
+ case 'fetch_following':
231
+ return await fetchTweets('following', '', limit);
232
+
233
+ default:
234
+ throw new Error(`Unknown tool: ${name}`);
235
+ }
236
+ }
237
+
238
+ // ─── Export for Claude Code ────────────────────────────────────────────────
239
+
240
+ module.exports = {
241
+ tools,
242
+ handleToolCall,
243
+ };
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "headless-twitter",
3
+ "version": "1.1.0",
4
+ "description": "Headless Twitter/X reader. Playwright XHR interception. Strictly read-only. Zero mutations.",
5
+ "main": "index.js",
6
+ "bin": {
7
+ "headless-twitter": "./index.js"
8
+ },
9
+ "files": [
10
+ "index.js",
11
+ "mcp-server.js",
12
+ "README.md",
13
+ "EXAMPLES.md",
14
+ "LICENSE"
15
+ ],
16
+ "scripts": {
17
+ "setup": "node index.js twitter timeline '' 0 --setup",
18
+ "chrome": "node index.js twitter timeline '' 20 --use-chrome"
19
+ },
20
+ "keywords": [
21
+ "twitter",
22
+ "x",
23
+ "headless",
24
+ "playwright",
25
+ "read-only",
26
+ "scraper",
27
+ "feed-reader",
28
+ "xhr"
29
+ ],
30
+ "author": "om-ashish-soni",
31
+ "license": "MIT",
32
+ "dependencies": {
33
+ "playwright": "^1.49.0"
34
+ },
35
+ "engines": {
36
+ "node": ">=18.0.0"
37
+ }
38
+ }