spectrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,202 @@
1
+ const https = require('https')
2
+ const crypto = require('crypto')
3
+
4
+ /**
5
+ * X (Twitter) platform adapter.
6
+ * Methods: Cookie API (GraphQL) with OAuth 1.0a fallback.
7
+ */
8
+ class XAdapter {
9
+ /**
10
+ * Execute an action on X.
11
+ * @param {string} action - post, like, retweet, delete
12
+ * @param {object} params - { account, text, mediaIds, tweetId, _cookies }
13
+ * @param {object} ctx - { auth, browse }
14
+ */
15
+ async execute(action, params, ctx) {
16
+ switch (action) {
17
+ case 'post':
18
+ return this._post(params, ctx)
19
+ case 'like':
20
+ return this._like(params, ctx)
21
+ case 'retweet':
22
+ return this._retweet(params, ctx)
23
+ case 'delete':
24
+ return this._delete(params, ctx)
25
+ default:
26
+ throw new Error(`Unsupported X action: ${action}`)
27
+ }
28
+ }
29
+
30
+ async _post(params, ctx) {
31
+ const { text, account, _cookies } = params
32
+
33
+ // Try Cookie API (GraphQL) first
34
+ if (_cookies) {
35
+ return this._graphqlPost(text, _cookies)
36
+ }
37
+
38
+ // Try OAuth 1.0a if configured
39
+ const oauthCreds = await ctx.auth.getCookies('x', account)
40
+ if (oauthCreds?.oauth) {
41
+ return this._oauthPost(text, oauthCreds.oauth)
42
+ }
43
+
44
+ throw new Error(`No auth available for X account ${account}. Run: spectrawl login x --account ${account}`)
45
+ }
46
+
47
+ async _graphqlPost(text, cookies) {
48
+ // X GraphQL CreateTweet mutation
49
+ const csrfToken = cookies.find(c => c.name === 'ct0')?.value
50
+ if (!csrfToken) throw new Error('Missing ct0 CSRF token in X cookies')
51
+
52
+ const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ')
53
+
54
+ const body = JSON.stringify({
55
+ variables: {
56
+ tweet_text: text,
57
+ dark_request: false,
58
+ media: { media_entities: [], possibly_sensitive: false },
59
+ semantic_annotation_ids: []
60
+ },
61
+ features: {
62
+ communities_web_enable_tweet_community_results_fetch: true,
63
+ c9s_tweet_anatomy_moderator_badge_enabled: true,
64
+ tweetypie_unmention_optimization_enabled: true,
65
+ responsive_web_edit_tweet_api_enabled: true,
66
+ graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
67
+ view_counts_everywhere_api_enabled: true,
68
+ longform_notetweets_consumption_enabled: true,
69
+ responsive_web_twitter_article_tweet_consumption_enabled: true,
70
+ tweet_awards_web_tipping_enabled: false,
71
+ creator_subscriptions_quote_tweet_preview_enabled: false,
72
+ longform_notetweets_rich_text_read_enabled: true,
73
+ longform_notetweets_inline_media_enabled: true,
74
+ articles_preview_enabled: true,
75
+ rweb_video_timestamps_enabled: true,
76
+ rweb_tipjar_consumption_enabled: true,
77
+ responsive_web_graphql_exclude_directive_enabled: true,
78
+ verified_phone_label_enabled: false,
79
+ freedom_of_speech_not_reach_fetch_enabled: true,
80
+ standardized_nudges_misinfo: true,
81
+ tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true,
82
+ responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
83
+ responsive_web_graphql_timeline_navigation_enabled: true,
84
+ responsive_web_enhance_cards_enabled: false
85
+ },
86
+ queryId: 'bDE2rBtZb3uyrczSZ_pI9g'
87
+ })
88
+
89
+ const data = await postJson(
90
+ 'https://x.com/i/api/graphql/bDE2rBtZb3uyrczSZ_pI9g/CreateTweet',
91
+ body,
92
+ {
93
+ 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA',
94
+ 'Content-Type': 'application/json',
95
+ 'Cookie': cookieStr,
96
+ 'X-Csrf-Token': csrfToken,
97
+ 'X-Twitter-Auth-Type': 'OAuth2Session',
98
+ 'X-Twitter-Active-User': 'yes'
99
+ }
100
+ )
101
+
102
+ if (data.errors) {
103
+ throw new Error(`X API error: ${data.errors[0]?.message || JSON.stringify(data.errors)}`)
104
+ }
105
+
106
+ const tweetId = data.data?.create_tweet?.tweet_results?.result?.rest_id
107
+ return { tweetId, url: tweetId ? `https://x.com/i/status/${tweetId}` : null }
108
+ }
109
+
110
+ async _oauthPost(text, oauth) {
111
+ // OAuth 1.0a — for accounts with API keys
112
+ const { consumerKey, consumerSecret, accessToken, accessTokenSecret } = oauth
113
+
114
+ const url = 'https://api.x.com/2/tweets'
115
+ const body = JSON.stringify({ text })
116
+
117
+ const authHeader = generateOAuthHeader('POST', url, {}, {
118
+ consumerKey, consumerSecret, accessToken, accessTokenSecret
119
+ })
120
+
121
+ const data = await postJson(url, body, {
122
+ 'Authorization': authHeader,
123
+ 'Content-Type': 'application/json'
124
+ })
125
+
126
+ return { tweetId: data.data?.id, url: `https://x.com/i/status/${data.data?.id}` }
127
+ }
128
+
129
+ async _like(params, ctx) {
130
+ // TODO: implement like via GraphQL
131
+ throw new Error('X like not yet implemented')
132
+ }
133
+
134
+ async _retweet(params, ctx) {
135
+ // TODO: implement retweet via GraphQL
136
+ throw new Error('X retweet not yet implemented')
137
+ }
138
+
139
+ async _delete(params, ctx) {
140
+ // TODO: implement delete via GraphQL
141
+ throw new Error('X delete not yet implemented')
142
+ }
143
+ }
144
+
145
+ function generateOAuthHeader(method, url, params, creds) {
146
+ const oauthParams = {
147
+ oauth_consumer_key: creds.consumerKey,
148
+ oauth_nonce: crypto.randomBytes(16).toString('hex'),
149
+ oauth_signature_method: 'HMAC-SHA1',
150
+ oauth_timestamp: Math.floor(Date.now() / 1000).toString(),
151
+ oauth_token: creds.accessToken,
152
+ oauth_version: '1.0'
153
+ }
154
+
155
+ const allParams = { ...params, ...oauthParams }
156
+ const sortedKeys = Object.keys(allParams).sort()
157
+ const paramStr = sortedKeys.map(k => `${encodeRFC3986(k)}=${encodeRFC3986(allParams[k])}`).join('&')
158
+ const baseStr = `${method}&${encodeRFC3986(url)}&${encodeRFC3986(paramStr)}`
159
+ const signingKey = `${encodeRFC3986(creds.consumerSecret)}&${encodeRFC3986(creds.accessTokenSecret)}`
160
+
161
+ oauthParams.oauth_signature = crypto
162
+ .createHmac('sha1', signingKey)
163
+ .update(baseStr)
164
+ .digest('base64')
165
+
166
+ const header = Object.keys(oauthParams)
167
+ .sort()
168
+ .map(k => `${encodeRFC3986(k)}="${encodeRFC3986(oauthParams[k])}"`)
169
+ .join(', ')
170
+
171
+ return `OAuth ${header}`
172
+ }
173
+
174
+ function encodeRFC3986(str) {
175
+ return encodeURIComponent(str).replace(/[!'()*]/g, c => '%' + c.charCodeAt(0).toString(16).toUpperCase())
176
+ }
177
+
178
+ function postJson(url, body, headers) {
179
+ return new Promise((resolve, reject) => {
180
+ const urlObj = new URL(url)
181
+ const opts = {
182
+ hostname: urlObj.hostname,
183
+ path: urlObj.pathname + urlObj.search,
184
+ method: 'POST',
185
+ headers: { ...headers, 'Content-Length': Buffer.byteLength(body) }
186
+ }
187
+ const req = https.request(opts, res => {
188
+ let data = ''
189
+ res.on('data', c => data += c)
190
+ res.on('end', () => {
191
+ try { resolve(JSON.parse(data)) }
192
+ catch (e) { reject(new Error(`Invalid response: ${data.slice(0, 200)}`)) }
193
+ })
194
+ })
195
+ req.on('error', reject)
196
+ req.setTimeout(15000, () => { req.destroy(); reject(new Error('X API timeout')) })
197
+ req.write(body)
198
+ req.end()
199
+ })
200
+ }
201
+
202
+ module.exports = { XAdapter }
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Form filler — handles platform-specific input quirks.
3
+ * Solves: contentEditable divs, shadow DOMs, React controlled inputs.
4
+ */
5
+
6
+ /**
7
+ * Fill a contentEditable div (X compose box, Notion, etc.)
8
+ * Regular Playwright .fill() doesn't work on these.
9
+ */
10
+ async function fillContentEditable(page, selector, text) {
11
+ await page.click(selector)
12
+ await page.waitForTimeout(200)
13
+
14
+ // execCommand("insertText") is the only reliable method for contentEditable
15
+ await page.evaluate(({ selector, text }) => {
16
+ const el = document.querySelector(selector)
17
+ if (el) {
18
+ el.focus()
19
+ document.execCommand('selectAll', false, null)
20
+ document.execCommand('insertText', false, text)
21
+ }
22
+ }, { selector, text })
23
+ }
24
+
25
+ /**
26
+ * Fill a React controlled input.
27
+ * React ignores .value changes — need to trigger native input events.
28
+ */
29
+ async function fillReactInput(page, selector, text) {
30
+ await page.click(selector)
31
+ await page.waitForTimeout(100)
32
+
33
+ // Clear existing value
34
+ await page.evaluate((selector) => {
35
+ const el = document.querySelector(selector)
36
+ if (el) {
37
+ const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
38
+ window.HTMLInputElement.prototype, 'value'
39
+ ).set
40
+ nativeInputValueSetter.call(el, '')
41
+ el.dispatchEvent(new Event('input', { bubbles: true }))
42
+ }
43
+ }, selector)
44
+
45
+ // Type character by character (most reliable for React)
46
+ for (const char of text) {
47
+ await page.keyboard.press(char === ' ' ? 'Space' : char)
48
+ await page.waitForTimeout(10 + Math.random() * 30)
49
+ }
50
+ }
51
+
52
+ /**
53
+ * Fill a shadow DOM input.
54
+ * Playwright can pierce shadow DOM with >> syntax.
55
+ */
56
+ async function fillShadowInput(page, hostSelector, inputSelector, text) {
57
+ const selector = `${hostSelector} >> ${inputSelector}`
58
+ await page.fill(selector, text)
59
+ }
60
+
61
+ /**
62
+ * Smart fill — detects input type and uses appropriate method.
63
+ */
64
+ async function smartFill(page, selector, text, opts = {}) {
65
+ const inputType = await page.evaluate((selector) => {
66
+ const el = document.querySelector(selector)
67
+ if (!el) return 'not_found'
68
+ if (el.contentEditable === 'true' || el.getAttribute('contenteditable')) return 'contentEditable'
69
+ if (el.shadowRoot) return 'shadow'
70
+
71
+ // Check if React-controlled (has __reactFiber or __reactInternalInstance)
72
+ const keys = Object.keys(el)
73
+ if (keys.some(k => k.startsWith('__react'))) return 'react'
74
+
75
+ return 'standard'
76
+ }, selector)
77
+
78
+ switch (inputType) {
79
+ case 'contentEditable':
80
+ return fillContentEditable(page, selector, text)
81
+ case 'react':
82
+ return fillReactInput(page, selector, text)
83
+ case 'shadow':
84
+ return fillShadowInput(page, opts.hostSelector || selector, opts.inputSelector || 'input', text)
85
+ case 'standard':
86
+ return page.fill(selector, text)
87
+ default:
88
+ // Last resort: click and type
89
+ await page.click(selector)
90
+ await page.keyboard.type(text, { delay: opts.delay || 20 })
91
+ }
92
+ }
93
+
94
+ module.exports = { fillContentEditable, fillReactInput, fillShadowInput, smartFill }
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Act engine — authenticated actions on platforms.
3
+ * Delegates to platform-specific adapters.
4
+ * Includes rate limiting, deduplication, and dead letter queue.
5
+ */
6
+
7
+ const crypto = require('crypto')
8
+ const { XAdapter } = require('./adapters/x')
9
+ const { RedditAdapter } = require('./adapters/reddit')
10
+ const { DevtoAdapter } = require('./adapters/devto')
11
+ const { HashnodeAdapter } = require('./adapters/hashnode')
12
+ const { LinkedInAdapter } = require('./adapters/linkedin')
13
+ const { IHAdapter } = require('./adapters/ih')
14
+ const { RateLimiter } = require('./rate-limiter')
15
+
16
+ const adapters = {
17
+ x: new XAdapter(),
18
+ twitter: new XAdapter(),
19
+ reddit: new RedditAdapter(),
20
+ devto: new DevtoAdapter(),
21
+ 'dev.to': new DevtoAdapter(),
22
+ hashnode: new HashnodeAdapter(),
23
+ linkedin: new LinkedInAdapter(),
24
+ ih: new IHAdapter(),
25
+ indiehackers: new IHAdapter()
26
+ }
27
+
28
+ class ActEngine {
29
+ constructor(config, auth, browse) {
30
+ this.config = config
31
+ this.auth = auth
32
+ this.browse = browse
33
+ this.rateLimiter = new RateLimiter({
34
+ dbPath: config.cache?.path?.replace('cache.db', 'ratelimit.db') || './data/ratelimit.db',
35
+ limits: config.rateLimit || {}
36
+ })
37
+ }
38
+
39
+ /**
40
+ * Execute an action on a platform.
41
+ * @param {string} platform - Platform name
42
+ * @param {string} action - Action name (post, comment, like, etc.)
43
+ * @param {object} params - Action parameters
44
+ */
45
+ async execute(platform, action, params = {}) {
46
+ const adapter = this._getAdapter(platform)
47
+ if (!adapter) {
48
+ return {
49
+ success: false,
50
+ error: 'unsupported_platform',
51
+ detail: `No adapter for platform "${platform}". Supported: ${Object.keys(adapters).join(', ') || 'none yet'}`,
52
+ suggestion: 'Platform adapters are being added. Check back soon.'
53
+ }
54
+ }
55
+
56
+ // Get auth for this platform/account
57
+ const account = params.account
58
+ if (account) {
59
+ const cookies = await this.auth.getCookies(platform, account)
60
+ if (!cookies) {
61
+ return {
62
+ success: false,
63
+ error: 'auth_missing',
64
+ detail: `No auth found for ${platform}/${account}.`,
65
+ suggestion: `Run: spectrawl login ${platform} --account ${account}`
66
+ }
67
+ }
68
+ params._cookies = cookies
69
+ }
70
+
71
+ // Check rate limits
72
+ const rateCheck = this.rateLimiter.check(platform, action, params)
73
+ if (!rateCheck.allowed) {
74
+ return {
75
+ success: false,
76
+ error: 'rate_limited',
77
+ detail: rateCheck.reason,
78
+ retryAfter: rateCheck.retryAfter,
79
+ suggestion: `Wait ${rateCheck.retryAfter}s or adjust limits in spectrawl.json`
80
+ }
81
+ }
82
+
83
+ // Check deduplication (same content posted in last 24h)
84
+ const contentHash = params.text || params.title || params.body
85
+ ? crypto.createHash('md5').update(`${platform}:${action}:${params.text || ''}${params.title || ''}`).digest('hex')
86
+ : null
87
+
88
+ if (contentHash && this.rateLimiter.isDuplicate(platform, contentHash)) {
89
+ return {
90
+ success: false,
91
+ error: 'duplicate',
92
+ detail: `Same content already posted to ${platform} in the last 24h`,
93
+ suggestion: 'Change the content or wait 24h'
94
+ }
95
+ }
96
+
97
+ try {
98
+ const result = await adapter.execute(action, params, {
99
+ auth: this.auth,
100
+ browse: this.browse
101
+ })
102
+
103
+ // Log success
104
+ this.rateLimiter.log(platform, action, {
105
+ account, contentHash, status: 'success'
106
+ })
107
+
108
+ return { success: true, ...result }
109
+ } catch (err) {
110
+ // Log failure
111
+ this.rateLimiter.log(platform, action, {
112
+ account, contentHash, status: 'failed',
113
+ error: err.message, retryCount: params._retryCount || 0
114
+ })
115
+
116
+ return {
117
+ success: false,
118
+ error: categorizeError(err),
119
+ detail: err.message,
120
+ suggestion: getSuggestion(err, platform, account)
121
+ }
122
+ }
123
+ }
124
+
125
+ _getAdapter(platform) {
126
+ return adapters[platform] || null
127
+ }
128
+
129
+ /**
130
+ * Register a platform adapter.
131
+ */
132
+ static registerAdapter(platform, adapter) {
133
+ adapters[platform] = adapter
134
+ }
135
+ }
136
+
137
+ function categorizeError(err) {
138
+ const msg = err.message.toLowerCase()
139
+ if (msg.includes('cookie') || msg.includes('auth') || msg.includes('login')) return 'auth_expired'
140
+ if (msg.includes('captcha')) return 'captcha_required'
141
+ if (msg.includes('rate') || msg.includes('429')) return 'rate_limited'
142
+ if (msg.includes('fingerprint') || msg.includes('blocked')) return 'fingerprint_blocked'
143
+ if (msg.includes('timeout')) return 'timeout'
144
+ return 'unknown'
145
+ }
146
+
147
+ function getSuggestion(err, platform, account) {
148
+ const category = categorizeError(err)
149
+ const suggestions = {
150
+ auth_expired: `Run: spectrawl login ${platform}${account ? ` --account ${account}` : ''}`,
151
+ captcha_required: `Manual intervention needed. Run: spectrawl login ${platform} --manual`,
152
+ rate_limited: `Wait and retry. Check rate limits in spectrawl.json`,
153
+ fingerprint_blocked: `Try with stealth mode: spectrawl browse --stealth`,
154
+ timeout: `Network issue. Check proxy settings.`
155
+ }
156
+ return suggestions[category] || 'Check logs for details.'
157
+ }
158
+
159
+ module.exports = { ActEngine }
@@ -0,0 +1,143 @@
1
+ const Database = require('better-sqlite3')
2
+ const path = require('path')
3
+ const fs = require('fs')
4
+
5
+ /**
6
+ * Rate limiter for platform actions.
7
+ * Tracks action history and enforces per-platform limits.
8
+ * Also handles action deduplication and dead letter queue.
9
+ */
10
+ class RateLimiter {
11
+ constructor(config = {}) {
12
+ const dbPath = config.dbPath || './data/ratelimit.db'
13
+ fs.mkdirSync(path.dirname(dbPath), { recursive: true })
14
+
15
+ this.db = new Database(dbPath)
16
+ this.db.pragma('journal_mode = WAL')
17
+ this.limits = config.limits || {}
18
+
19
+ this._init()
20
+ }
21
+
22
+ _init() {
23
+ this.db.exec(`
24
+ CREATE TABLE IF NOT EXISTS action_log (
25
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
26
+ platform TEXT NOT NULL,
27
+ account TEXT,
28
+ action TEXT NOT NULL,
29
+ content_hash TEXT,
30
+ status TEXT DEFAULT 'success',
31
+ error TEXT,
32
+ retry_count INTEGER DEFAULT 0,
33
+ created_at INTEGER NOT NULL
34
+ );
35
+
36
+ CREATE INDEX IF NOT EXISTS idx_action_log_platform
37
+ ON action_log(platform, created_at);
38
+
39
+ CREATE INDEX IF NOT EXISTS idx_action_log_hash
40
+ ON action_log(content_hash, created_at);
41
+ `)
42
+ }
43
+
44
+ /**
45
+ * Check if an action is allowed under rate limits.
46
+ * @returns {{ allowed: boolean, reason?: string, retryAfter?: number }}
47
+ */
48
+ check(platform, action, params = {}) {
49
+ const limit = this.limits[platform]
50
+ if (!limit) return { allowed: true }
51
+
52
+ const now = Math.floor(Date.now() / 1000)
53
+ const hourAgo = now - 3600
54
+
55
+ // Check posts per hour
56
+ if (limit.postsPerHour) {
57
+ const count = this.db.prepare(
58
+ 'SELECT COUNT(*) as cnt FROM action_log WHERE platform = ? AND action = ? AND created_at > ? AND status = ?'
59
+ ).get(platform, action, hourAgo, 'success')
60
+
61
+ if (count.cnt >= limit.postsPerHour) {
62
+ // Find when the oldest action in this window will expire
63
+ const oldest = this.db.prepare(
64
+ 'SELECT created_at FROM action_log WHERE platform = ? AND action = ? AND created_at > ? AND status = ? ORDER BY created_at ASC LIMIT 1'
65
+ ).get(platform, action, hourAgo, 'success')
66
+
67
+ const retryAfter = oldest ? (oldest.created_at + 3600 - now) : 3600
68
+ return {
69
+ allowed: false,
70
+ reason: `Rate limit: max ${limit.postsPerHour} ${action}s per hour on ${platform}`,
71
+ retryAfter
72
+ }
73
+ }
74
+ }
75
+
76
+ // Check minimum delay between actions
77
+ if (limit.minDelayMs) {
78
+ const last = this.db.prepare(
79
+ 'SELECT created_at FROM action_log WHERE platform = ? AND status = ? ORDER BY created_at DESC LIMIT 1'
80
+ ).get(platform, 'success')
81
+
82
+ if (last) {
83
+ const elapsed = (now - last.created_at) * 1000
84
+ if (elapsed < limit.minDelayMs) {
85
+ return {
86
+ allowed: false,
87
+ reason: `Min delay: wait ${Math.ceil((limit.minDelayMs - elapsed) / 1000)}s between actions on ${platform}`,
88
+ retryAfter: Math.ceil((limit.minDelayMs - elapsed) / 1000)
89
+ }
90
+ }
91
+ }
92
+ }
93
+
94
+ return { allowed: true }
95
+ }
96
+
97
+ /**
98
+ * Check if this action is a duplicate (same content recently posted).
99
+ */
100
+ isDuplicate(platform, contentHash, windowSeconds = 86400) {
101
+ const cutoff = Math.floor(Date.now() / 1000) - windowSeconds
102
+ const existing = this.db.prepare(
103
+ 'SELECT id FROM action_log WHERE platform = ? AND content_hash = ? AND created_at > ? AND status = ?'
104
+ ).get(platform, contentHash, cutoff, 'success')
105
+
106
+ return !!existing
107
+ }
108
+
109
+ /**
110
+ * Log an action (success or failure).
111
+ */
112
+ log(platform, action, params = {}) {
113
+ const now = Math.floor(Date.now() / 1000)
114
+ this.db.prepare(`
115
+ INSERT INTO action_log (platform, account, action, content_hash, status, error, retry_count, created_at)
116
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
117
+ `).run(
118
+ platform,
119
+ params.account || null,
120
+ action,
121
+ params.contentHash || null,
122
+ params.status || 'success',
123
+ params.error || null,
124
+ params.retryCount || 0,
125
+ now
126
+ )
127
+ }
128
+
129
+ /**
130
+ * Get failed actions for retry (dead letter queue).
131
+ */
132
+ getFailedActions(maxRetries = 3) {
133
+ return this.db.prepare(
134
+ 'SELECT * FROM action_log WHERE status = ? AND retry_count < ? ORDER BY created_at ASC'
135
+ ).all('failed', maxRetries)
136
+ }
137
+
138
+ close() {
139
+ this.db.close()
140
+ }
141
+ }
142
+
143
+ module.exports = { RateLimiter }