spectrawl 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +179 -0
- package/index.d.ts +90 -0
- package/package.json +53 -0
- package/src/act/adapters/devto.js +103 -0
- package/src/act/adapters/hashnode.js +89 -0
- package/src/act/adapters/ih.js +251 -0
- package/src/act/adapters/linkedin.js +106 -0
- package/src/act/adapters/reddit.js +160 -0
- package/src/act/adapters/x.js +202 -0
- package/src/act/form-filler.js +94 -0
- package/src/act/index.js +159 -0
- package/src/act/rate-limiter.js +143 -0
- package/src/auth/index.js +132 -0
- package/src/auth/refresh.js +111 -0
- package/src/browse/camoufox.js +164 -0
- package/src/browse/index.js +278 -0
- package/src/browse/install-stealth.js +188 -0
- package/src/cache.js +82 -0
- package/src/cli.js +160 -0
- package/src/config.js +65 -0
- package/src/events.js +57 -0
- package/src/index.js +108 -0
- package/src/mcp.js +195 -0
- package/src/search/engines/brave.js +62 -0
- package/src/search/engines/ddg.js +192 -0
- package/src/search/engines/google-cse.js +50 -0
- package/src/search/engines/jina.js +76 -0
- package/src/search/engines/searxng.js +69 -0
- package/src/search/engines/serper.js +64 -0
- package/src/search/index.js +104 -0
- package/src/search/scraper.js +170 -0
- package/src/search/summarizer.js +156 -0
- package/src/server.js +111 -0
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
const https = require('https')
|
|
2
|
+
const crypto = require('crypto')
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* X (Twitter) platform adapter.
|
|
6
|
+
* Methods: Cookie API (GraphQL) with OAuth 1.0a fallback.
|
|
7
|
+
*/
|
|
8
|
+
class XAdapter {
|
|
9
|
+
/**
|
|
10
|
+
* Execute an action on X.
|
|
11
|
+
* @param {string} action - post, like, retweet, delete
|
|
12
|
+
* @param {object} params - { account, text, mediaIds, tweetId, _cookies }
|
|
13
|
+
* @param {object} ctx - { auth, browse }
|
|
14
|
+
*/
|
|
15
|
+
async execute(action, params, ctx) {
|
|
16
|
+
switch (action) {
|
|
17
|
+
case 'post':
|
|
18
|
+
return this._post(params, ctx)
|
|
19
|
+
case 'like':
|
|
20
|
+
return this._like(params, ctx)
|
|
21
|
+
case 'retweet':
|
|
22
|
+
return this._retweet(params, ctx)
|
|
23
|
+
case 'delete':
|
|
24
|
+
return this._delete(params, ctx)
|
|
25
|
+
default:
|
|
26
|
+
throw new Error(`Unsupported X action: ${action}`)
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async _post(params, ctx) {
|
|
31
|
+
const { text, account, _cookies } = params
|
|
32
|
+
|
|
33
|
+
// Try Cookie API (GraphQL) first
|
|
34
|
+
if (_cookies) {
|
|
35
|
+
return this._graphqlPost(text, _cookies)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Try OAuth 1.0a if configured
|
|
39
|
+
const oauthCreds = await ctx.auth.getCookies('x', account)
|
|
40
|
+
if (oauthCreds?.oauth) {
|
|
41
|
+
return this._oauthPost(text, oauthCreds.oauth)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
throw new Error(`No auth available for X account ${account}. Run: spectrawl login x --account ${account}`)
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async _graphqlPost(text, cookies) {
|
|
48
|
+
// X GraphQL CreateTweet mutation
|
|
49
|
+
const csrfToken = cookies.find(c => c.name === 'ct0')?.value
|
|
50
|
+
if (!csrfToken) throw new Error('Missing ct0 CSRF token in X cookies')
|
|
51
|
+
|
|
52
|
+
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ')
|
|
53
|
+
|
|
54
|
+
const body = JSON.stringify({
|
|
55
|
+
variables: {
|
|
56
|
+
tweet_text: text,
|
|
57
|
+
dark_request: false,
|
|
58
|
+
media: { media_entities: [], possibly_sensitive: false },
|
|
59
|
+
semantic_annotation_ids: []
|
|
60
|
+
},
|
|
61
|
+
features: {
|
|
62
|
+
communities_web_enable_tweet_community_results_fetch: true,
|
|
63
|
+
c9s_tweet_anatomy_moderator_badge_enabled: true,
|
|
64
|
+
tweetypie_unmention_optimization_enabled: true,
|
|
65
|
+
responsive_web_edit_tweet_api_enabled: true,
|
|
66
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
67
|
+
view_counts_everywhere_api_enabled: true,
|
|
68
|
+
longform_notetweets_consumption_enabled: true,
|
|
69
|
+
responsive_web_twitter_article_tweet_consumption_enabled: true,
|
|
70
|
+
tweet_awards_web_tipping_enabled: false,
|
|
71
|
+
creator_subscriptions_quote_tweet_preview_enabled: false,
|
|
72
|
+
longform_notetweets_rich_text_read_enabled: true,
|
|
73
|
+
longform_notetweets_inline_media_enabled: true,
|
|
74
|
+
articles_preview_enabled: true,
|
|
75
|
+
rweb_video_timestamps_enabled: true,
|
|
76
|
+
rweb_tipjar_consumption_enabled: true,
|
|
77
|
+
responsive_web_graphql_exclude_directive_enabled: true,
|
|
78
|
+
verified_phone_label_enabled: false,
|
|
79
|
+
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
80
|
+
standardized_nudges_misinfo: true,
|
|
81
|
+
tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true,
|
|
82
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
83
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
84
|
+
responsive_web_enhance_cards_enabled: false
|
|
85
|
+
},
|
|
86
|
+
queryId: 'bDE2rBtZb3uyrczSZ_pI9g'
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
const data = await postJson(
|
|
90
|
+
'https://x.com/i/api/graphql/bDE2rBtZb3uyrczSZ_pI9g/CreateTweet',
|
|
91
|
+
body,
|
|
92
|
+
{
|
|
93
|
+
'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA',
|
|
94
|
+
'Content-Type': 'application/json',
|
|
95
|
+
'Cookie': cookieStr,
|
|
96
|
+
'X-Csrf-Token': csrfToken,
|
|
97
|
+
'X-Twitter-Auth-Type': 'OAuth2Session',
|
|
98
|
+
'X-Twitter-Active-User': 'yes'
|
|
99
|
+
}
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
if (data.errors) {
|
|
103
|
+
throw new Error(`X API error: ${data.errors[0]?.message || JSON.stringify(data.errors)}`)
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const tweetId = data.data?.create_tweet?.tweet_results?.result?.rest_id
|
|
107
|
+
return { tweetId, url: tweetId ? `https://x.com/i/status/${tweetId}` : null }
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async _oauthPost(text, oauth) {
|
|
111
|
+
// OAuth 1.0a — for accounts with API keys
|
|
112
|
+
const { consumerKey, consumerSecret, accessToken, accessTokenSecret } = oauth
|
|
113
|
+
|
|
114
|
+
const url = 'https://api.x.com/2/tweets'
|
|
115
|
+
const body = JSON.stringify({ text })
|
|
116
|
+
|
|
117
|
+
const authHeader = generateOAuthHeader('POST', url, {}, {
|
|
118
|
+
consumerKey, consumerSecret, accessToken, accessTokenSecret
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
const data = await postJson(url, body, {
|
|
122
|
+
'Authorization': authHeader,
|
|
123
|
+
'Content-Type': 'application/json'
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
return { tweetId: data.data?.id, url: `https://x.com/i/status/${data.data?.id}` }
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async _like(params, ctx) {
|
|
130
|
+
// TODO: implement like via GraphQL
|
|
131
|
+
throw new Error('X like not yet implemented')
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
async _retweet(params, ctx) {
|
|
135
|
+
// TODO: implement retweet via GraphQL
|
|
136
|
+
throw new Error('X retweet not yet implemented')
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
async _delete(params, ctx) {
|
|
140
|
+
// TODO: implement delete via GraphQL
|
|
141
|
+
throw new Error('X delete not yet implemented')
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function generateOAuthHeader(method, url, params, creds) {
|
|
146
|
+
const oauthParams = {
|
|
147
|
+
oauth_consumer_key: creds.consumerKey,
|
|
148
|
+
oauth_nonce: crypto.randomBytes(16).toString('hex'),
|
|
149
|
+
oauth_signature_method: 'HMAC-SHA1',
|
|
150
|
+
oauth_timestamp: Math.floor(Date.now() / 1000).toString(),
|
|
151
|
+
oauth_token: creds.accessToken,
|
|
152
|
+
oauth_version: '1.0'
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const allParams = { ...params, ...oauthParams }
|
|
156
|
+
const sortedKeys = Object.keys(allParams).sort()
|
|
157
|
+
const paramStr = sortedKeys.map(k => `${encodeRFC3986(k)}=${encodeRFC3986(allParams[k])}`).join('&')
|
|
158
|
+
const baseStr = `${method}&${encodeRFC3986(url)}&${encodeRFC3986(paramStr)}`
|
|
159
|
+
const signingKey = `${encodeRFC3986(creds.consumerSecret)}&${encodeRFC3986(creds.accessTokenSecret)}`
|
|
160
|
+
|
|
161
|
+
oauthParams.oauth_signature = crypto
|
|
162
|
+
.createHmac('sha1', signingKey)
|
|
163
|
+
.update(baseStr)
|
|
164
|
+
.digest('base64')
|
|
165
|
+
|
|
166
|
+
const header = Object.keys(oauthParams)
|
|
167
|
+
.sort()
|
|
168
|
+
.map(k => `${encodeRFC3986(k)}="${encodeRFC3986(oauthParams[k])}"`)
|
|
169
|
+
.join(', ')
|
|
170
|
+
|
|
171
|
+
return `OAuth ${header}`
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function encodeRFC3986(str) {
|
|
175
|
+
return encodeURIComponent(str).replace(/[!'()*]/g, c => '%' + c.charCodeAt(0).toString(16).toUpperCase())
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function postJson(url, body, headers) {
|
|
179
|
+
return new Promise((resolve, reject) => {
|
|
180
|
+
const urlObj = new URL(url)
|
|
181
|
+
const opts = {
|
|
182
|
+
hostname: urlObj.hostname,
|
|
183
|
+
path: urlObj.pathname + urlObj.search,
|
|
184
|
+
method: 'POST',
|
|
185
|
+
headers: { ...headers, 'Content-Length': Buffer.byteLength(body) }
|
|
186
|
+
}
|
|
187
|
+
const req = https.request(opts, res => {
|
|
188
|
+
let data = ''
|
|
189
|
+
res.on('data', c => data += c)
|
|
190
|
+
res.on('end', () => {
|
|
191
|
+
try { resolve(JSON.parse(data)) }
|
|
192
|
+
catch (e) { reject(new Error(`Invalid response: ${data.slice(0, 200)}`)) }
|
|
193
|
+
})
|
|
194
|
+
})
|
|
195
|
+
req.on('error', reject)
|
|
196
|
+
req.setTimeout(15000, () => { req.destroy(); reject(new Error('X API timeout')) })
|
|
197
|
+
req.write(body)
|
|
198
|
+
req.end()
|
|
199
|
+
})
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
module.exports = { XAdapter }
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Form filler — handles platform-specific input quirks.
|
|
3
|
+
* Solves: contentEditable divs, shadow DOMs, React controlled inputs.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Fill a contentEditable div (X compose box, Notion, etc.)
|
|
8
|
+
* Regular Playwright .fill() doesn't work on these.
|
|
9
|
+
*/
|
|
10
|
+
async function fillContentEditable(page, selector, text) {
|
|
11
|
+
await page.click(selector)
|
|
12
|
+
await page.waitForTimeout(200)
|
|
13
|
+
|
|
14
|
+
// execCommand("insertText") is the only reliable method for contentEditable
|
|
15
|
+
await page.evaluate(({ selector, text }) => {
|
|
16
|
+
const el = document.querySelector(selector)
|
|
17
|
+
if (el) {
|
|
18
|
+
el.focus()
|
|
19
|
+
document.execCommand('selectAll', false, null)
|
|
20
|
+
document.execCommand('insertText', false, text)
|
|
21
|
+
}
|
|
22
|
+
}, { selector, text })
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Fill a React controlled input.
|
|
27
|
+
* React ignores .value changes — need to trigger native input events.
|
|
28
|
+
*/
|
|
29
|
+
async function fillReactInput(page, selector, text) {
|
|
30
|
+
await page.click(selector)
|
|
31
|
+
await page.waitForTimeout(100)
|
|
32
|
+
|
|
33
|
+
// Clear existing value
|
|
34
|
+
await page.evaluate((selector) => {
|
|
35
|
+
const el = document.querySelector(selector)
|
|
36
|
+
if (el) {
|
|
37
|
+
const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
|
|
38
|
+
window.HTMLInputElement.prototype, 'value'
|
|
39
|
+
).set
|
|
40
|
+
nativeInputValueSetter.call(el, '')
|
|
41
|
+
el.dispatchEvent(new Event('input', { bubbles: true }))
|
|
42
|
+
}
|
|
43
|
+
}, selector)
|
|
44
|
+
|
|
45
|
+
// Type character by character (most reliable for React)
|
|
46
|
+
for (const char of text) {
|
|
47
|
+
await page.keyboard.press(char === ' ' ? 'Space' : char)
|
|
48
|
+
await page.waitForTimeout(10 + Math.random() * 30)
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Fill a shadow DOM input.
|
|
54
|
+
* Playwright can pierce shadow DOM with >> syntax.
|
|
55
|
+
*/
|
|
56
|
+
async function fillShadowInput(page, hostSelector, inputSelector, text) {
|
|
57
|
+
const selector = `${hostSelector} >> ${inputSelector}`
|
|
58
|
+
await page.fill(selector, text)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Smart fill — detects input type and uses appropriate method.
|
|
63
|
+
*/
|
|
64
|
+
async function smartFill(page, selector, text, opts = {}) {
|
|
65
|
+
const inputType = await page.evaluate((selector) => {
|
|
66
|
+
const el = document.querySelector(selector)
|
|
67
|
+
if (!el) return 'not_found'
|
|
68
|
+
if (el.contentEditable === 'true' || el.getAttribute('contenteditable')) return 'contentEditable'
|
|
69
|
+
if (el.shadowRoot) return 'shadow'
|
|
70
|
+
|
|
71
|
+
// Check if React-controlled (has __reactFiber or __reactInternalInstance)
|
|
72
|
+
const keys = Object.keys(el)
|
|
73
|
+
if (keys.some(k => k.startsWith('__react'))) return 'react'
|
|
74
|
+
|
|
75
|
+
return 'standard'
|
|
76
|
+
}, selector)
|
|
77
|
+
|
|
78
|
+
switch (inputType) {
|
|
79
|
+
case 'contentEditable':
|
|
80
|
+
return fillContentEditable(page, selector, text)
|
|
81
|
+
case 'react':
|
|
82
|
+
return fillReactInput(page, selector, text)
|
|
83
|
+
case 'shadow':
|
|
84
|
+
return fillShadowInput(page, opts.hostSelector || selector, opts.inputSelector || 'input', text)
|
|
85
|
+
case 'standard':
|
|
86
|
+
return page.fill(selector, text)
|
|
87
|
+
default:
|
|
88
|
+
// Last resort: click and type
|
|
89
|
+
await page.click(selector)
|
|
90
|
+
await page.keyboard.type(text, { delay: opts.delay || 20 })
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
module.exports = { fillContentEditable, fillReactInput, fillShadowInput, smartFill }
|
package/src/act/index.js
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Act engine — authenticated actions on platforms.
|
|
3
|
+
* Delegates to platform-specific adapters.
|
|
4
|
+
* Includes rate limiting, deduplication, and dead letter queue.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const crypto = require('crypto')
|
|
8
|
+
const { XAdapter } = require('./adapters/x')
|
|
9
|
+
const { RedditAdapter } = require('./adapters/reddit')
|
|
10
|
+
const { DevtoAdapter } = require('./adapters/devto')
|
|
11
|
+
const { HashnodeAdapter } = require('./adapters/hashnode')
|
|
12
|
+
const { LinkedInAdapter } = require('./adapters/linkedin')
|
|
13
|
+
const { IHAdapter } = require('./adapters/ih')
|
|
14
|
+
const { RateLimiter } = require('./rate-limiter')
|
|
15
|
+
|
|
16
|
+
const adapters = {
|
|
17
|
+
x: new XAdapter(),
|
|
18
|
+
twitter: new XAdapter(),
|
|
19
|
+
reddit: new RedditAdapter(),
|
|
20
|
+
devto: new DevtoAdapter(),
|
|
21
|
+
'dev.to': new DevtoAdapter(),
|
|
22
|
+
hashnode: new HashnodeAdapter(),
|
|
23
|
+
linkedin: new LinkedInAdapter(),
|
|
24
|
+
ih: new IHAdapter(),
|
|
25
|
+
indiehackers: new IHAdapter()
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
class ActEngine {
|
|
29
|
+
constructor(config, auth, browse) {
|
|
30
|
+
this.config = config
|
|
31
|
+
this.auth = auth
|
|
32
|
+
this.browse = browse
|
|
33
|
+
this.rateLimiter = new RateLimiter({
|
|
34
|
+
dbPath: config.cache?.path?.replace('cache.db', 'ratelimit.db') || './data/ratelimit.db',
|
|
35
|
+
limits: config.rateLimit || {}
|
|
36
|
+
})
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Execute an action on a platform.
|
|
41
|
+
* @param {string} platform - Platform name
|
|
42
|
+
* @param {string} action - Action name (post, comment, like, etc.)
|
|
43
|
+
* @param {object} params - Action parameters
|
|
44
|
+
*/
|
|
45
|
+
async execute(platform, action, params = {}) {
|
|
46
|
+
const adapter = this._getAdapter(platform)
|
|
47
|
+
if (!adapter) {
|
|
48
|
+
return {
|
|
49
|
+
success: false,
|
|
50
|
+
error: 'unsupported_platform',
|
|
51
|
+
detail: `No adapter for platform "${platform}". Supported: ${Object.keys(adapters).join(', ') || 'none yet'}`,
|
|
52
|
+
suggestion: 'Platform adapters are being added. Check back soon.'
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Get auth for this platform/account
|
|
57
|
+
const account = params.account
|
|
58
|
+
if (account) {
|
|
59
|
+
const cookies = await this.auth.getCookies(platform, account)
|
|
60
|
+
if (!cookies) {
|
|
61
|
+
return {
|
|
62
|
+
success: false,
|
|
63
|
+
error: 'auth_missing',
|
|
64
|
+
detail: `No auth found for ${platform}/${account}.`,
|
|
65
|
+
suggestion: `Run: spectrawl login ${platform} --account ${account}`
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
params._cookies = cookies
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Check rate limits
|
|
72
|
+
const rateCheck = this.rateLimiter.check(platform, action, params)
|
|
73
|
+
if (!rateCheck.allowed) {
|
|
74
|
+
return {
|
|
75
|
+
success: false,
|
|
76
|
+
error: 'rate_limited',
|
|
77
|
+
detail: rateCheck.reason,
|
|
78
|
+
retryAfter: rateCheck.retryAfter,
|
|
79
|
+
suggestion: `Wait ${rateCheck.retryAfter}s or adjust limits in spectrawl.json`
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Check deduplication (same content posted in last 24h)
|
|
84
|
+
const contentHash = params.text || params.title || params.body
|
|
85
|
+
? crypto.createHash('md5').update(`${platform}:${action}:${params.text || ''}${params.title || ''}`).digest('hex')
|
|
86
|
+
: null
|
|
87
|
+
|
|
88
|
+
if (contentHash && this.rateLimiter.isDuplicate(platform, contentHash)) {
|
|
89
|
+
return {
|
|
90
|
+
success: false,
|
|
91
|
+
error: 'duplicate',
|
|
92
|
+
detail: `Same content already posted to ${platform} in the last 24h`,
|
|
93
|
+
suggestion: 'Change the content or wait 24h'
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
try {
|
|
98
|
+
const result = await adapter.execute(action, params, {
|
|
99
|
+
auth: this.auth,
|
|
100
|
+
browse: this.browse
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
// Log success
|
|
104
|
+
this.rateLimiter.log(platform, action, {
|
|
105
|
+
account, contentHash, status: 'success'
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
return { success: true, ...result }
|
|
109
|
+
} catch (err) {
|
|
110
|
+
// Log failure
|
|
111
|
+
this.rateLimiter.log(platform, action, {
|
|
112
|
+
account, contentHash, status: 'failed',
|
|
113
|
+
error: err.message, retryCount: params._retryCount || 0
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
success: false,
|
|
118
|
+
error: categorizeError(err),
|
|
119
|
+
detail: err.message,
|
|
120
|
+
suggestion: getSuggestion(err, platform, account)
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
_getAdapter(platform) {
|
|
126
|
+
return adapters[platform] || null
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Register a platform adapter.
|
|
131
|
+
*/
|
|
132
|
+
static registerAdapter(platform, adapter) {
|
|
133
|
+
adapters[platform] = adapter
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function categorizeError(err) {
|
|
138
|
+
const msg = err.message.toLowerCase()
|
|
139
|
+
if (msg.includes('cookie') || msg.includes('auth') || msg.includes('login')) return 'auth_expired'
|
|
140
|
+
if (msg.includes('captcha')) return 'captcha_required'
|
|
141
|
+
if (msg.includes('rate') || msg.includes('429')) return 'rate_limited'
|
|
142
|
+
if (msg.includes('fingerprint') || msg.includes('blocked')) return 'fingerprint_blocked'
|
|
143
|
+
if (msg.includes('timeout')) return 'timeout'
|
|
144
|
+
return 'unknown'
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function getSuggestion(err, platform, account) {
|
|
148
|
+
const category = categorizeError(err)
|
|
149
|
+
const suggestions = {
|
|
150
|
+
auth_expired: `Run: spectrawl login ${platform}${account ? ` --account ${account}` : ''}`,
|
|
151
|
+
captcha_required: `Manual intervention needed. Run: spectrawl login ${platform} --manual`,
|
|
152
|
+
rate_limited: `Wait and retry. Check rate limits in spectrawl.json`,
|
|
153
|
+
fingerprint_blocked: `Try with stealth mode: spectrawl browse --stealth`,
|
|
154
|
+
timeout: `Network issue. Check proxy settings.`
|
|
155
|
+
}
|
|
156
|
+
return suggestions[category] || 'Check logs for details.'
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
module.exports = { ActEngine }
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
const Database = require('better-sqlite3')
|
|
2
|
+
const path = require('path')
|
|
3
|
+
const fs = require('fs')
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Rate limiter for platform actions.
|
|
7
|
+
* Tracks action history and enforces per-platform limits.
|
|
8
|
+
* Also handles action deduplication and dead letter queue.
|
|
9
|
+
*/
|
|
10
|
+
class RateLimiter {
|
|
11
|
+
constructor(config = {}) {
|
|
12
|
+
const dbPath = config.dbPath || './data/ratelimit.db'
|
|
13
|
+
fs.mkdirSync(path.dirname(dbPath), { recursive: true })
|
|
14
|
+
|
|
15
|
+
this.db = new Database(dbPath)
|
|
16
|
+
this.db.pragma('journal_mode = WAL')
|
|
17
|
+
this.limits = config.limits || {}
|
|
18
|
+
|
|
19
|
+
this._init()
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
_init() {
|
|
23
|
+
this.db.exec(`
|
|
24
|
+
CREATE TABLE IF NOT EXISTS action_log (
|
|
25
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
26
|
+
platform TEXT NOT NULL,
|
|
27
|
+
account TEXT,
|
|
28
|
+
action TEXT NOT NULL,
|
|
29
|
+
content_hash TEXT,
|
|
30
|
+
status TEXT DEFAULT 'success',
|
|
31
|
+
error TEXT,
|
|
32
|
+
retry_count INTEGER DEFAULT 0,
|
|
33
|
+
created_at INTEGER NOT NULL
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
CREATE INDEX IF NOT EXISTS idx_action_log_platform
|
|
37
|
+
ON action_log(platform, created_at);
|
|
38
|
+
|
|
39
|
+
CREATE INDEX IF NOT EXISTS idx_action_log_hash
|
|
40
|
+
ON action_log(content_hash, created_at);
|
|
41
|
+
`)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Check if an action is allowed under rate limits.
|
|
46
|
+
* @returns {{ allowed: boolean, reason?: string, retryAfter?: number }}
|
|
47
|
+
*/
|
|
48
|
+
check(platform, action, params = {}) {
|
|
49
|
+
const limit = this.limits[platform]
|
|
50
|
+
if (!limit) return { allowed: true }
|
|
51
|
+
|
|
52
|
+
const now = Math.floor(Date.now() / 1000)
|
|
53
|
+
const hourAgo = now - 3600
|
|
54
|
+
|
|
55
|
+
// Check posts per hour
|
|
56
|
+
if (limit.postsPerHour) {
|
|
57
|
+
const count = this.db.prepare(
|
|
58
|
+
'SELECT COUNT(*) as cnt FROM action_log WHERE platform = ? AND action = ? AND created_at > ? AND status = ?'
|
|
59
|
+
).get(platform, action, hourAgo, 'success')
|
|
60
|
+
|
|
61
|
+
if (count.cnt >= limit.postsPerHour) {
|
|
62
|
+
// Find when the oldest action in this window will expire
|
|
63
|
+
const oldest = this.db.prepare(
|
|
64
|
+
'SELECT created_at FROM action_log WHERE platform = ? AND action = ? AND created_at > ? AND status = ? ORDER BY created_at ASC LIMIT 1'
|
|
65
|
+
).get(platform, action, hourAgo, 'success')
|
|
66
|
+
|
|
67
|
+
const retryAfter = oldest ? (oldest.created_at + 3600 - now) : 3600
|
|
68
|
+
return {
|
|
69
|
+
allowed: false,
|
|
70
|
+
reason: `Rate limit: max ${limit.postsPerHour} ${action}s per hour on ${platform}`,
|
|
71
|
+
retryAfter
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Check minimum delay between actions
|
|
77
|
+
if (limit.minDelayMs) {
|
|
78
|
+
const last = this.db.prepare(
|
|
79
|
+
'SELECT created_at FROM action_log WHERE platform = ? AND status = ? ORDER BY created_at DESC LIMIT 1'
|
|
80
|
+
).get(platform, 'success')
|
|
81
|
+
|
|
82
|
+
if (last) {
|
|
83
|
+
const elapsed = (now - last.created_at) * 1000
|
|
84
|
+
if (elapsed < limit.minDelayMs) {
|
|
85
|
+
return {
|
|
86
|
+
allowed: false,
|
|
87
|
+
reason: `Min delay: wait ${Math.ceil((limit.minDelayMs - elapsed) / 1000)}s between actions on ${platform}`,
|
|
88
|
+
retryAfter: Math.ceil((limit.minDelayMs - elapsed) / 1000)
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return { allowed: true }
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Check if this action is a duplicate (same content recently posted).
|
|
99
|
+
*/
|
|
100
|
+
isDuplicate(platform, contentHash, windowSeconds = 86400) {
|
|
101
|
+
const cutoff = Math.floor(Date.now() / 1000) - windowSeconds
|
|
102
|
+
const existing = this.db.prepare(
|
|
103
|
+
'SELECT id FROM action_log WHERE platform = ? AND content_hash = ? AND created_at > ? AND status = ?'
|
|
104
|
+
).get(platform, contentHash, cutoff, 'success')
|
|
105
|
+
|
|
106
|
+
return !!existing
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Log an action (success or failure).
|
|
111
|
+
*/
|
|
112
|
+
log(platform, action, params = {}) {
|
|
113
|
+
const now = Math.floor(Date.now() / 1000)
|
|
114
|
+
this.db.prepare(`
|
|
115
|
+
INSERT INTO action_log (platform, account, action, content_hash, status, error, retry_count, created_at)
|
|
116
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
117
|
+
`).run(
|
|
118
|
+
platform,
|
|
119
|
+
params.account || null,
|
|
120
|
+
action,
|
|
121
|
+
params.contentHash || null,
|
|
122
|
+
params.status || 'success',
|
|
123
|
+
params.error || null,
|
|
124
|
+
params.retryCount || 0,
|
|
125
|
+
now
|
|
126
|
+
)
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Get failed actions for retry (dead letter queue).
|
|
131
|
+
*/
|
|
132
|
+
getFailedActions(maxRetries = 3) {
|
|
133
|
+
return this.db.prepare(
|
|
134
|
+
'SELECT * FROM action_log WHERE status = ? AND retry_count < ? ORDER BY created_at ASC'
|
|
135
|
+
).all('failed', maxRetries)
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
close() {
|
|
139
|
+
this.db.close()
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
module.exports = { RateLimiter }
|