spectrawl 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +179 -0
- package/index.d.ts +90 -0
- package/package.json +53 -0
- package/src/act/adapters/devto.js +103 -0
- package/src/act/adapters/hashnode.js +89 -0
- package/src/act/adapters/ih.js +251 -0
- package/src/act/adapters/linkedin.js +106 -0
- package/src/act/adapters/reddit.js +160 -0
- package/src/act/adapters/x.js +202 -0
- package/src/act/form-filler.js +94 -0
- package/src/act/index.js +159 -0
- package/src/act/rate-limiter.js +143 -0
- package/src/auth/index.js +132 -0
- package/src/auth/refresh.js +111 -0
- package/src/browse/camoufox.js +164 -0
- package/src/browse/index.js +278 -0
- package/src/browse/install-stealth.js +188 -0
- package/src/cache.js +82 -0
- package/src/cli.js +160 -0
- package/src/config.js +65 -0
- package/src/events.js +57 -0
- package/src/index.js +108 -0
- package/src/mcp.js +195 -0
- package/src/search/engines/brave.js +62 -0
- package/src/search/engines/ddg.js +192 -0
- package/src/search/engines/google-cse.js +50 -0
- package/src/search/engines/jina.js +76 -0
- package/src/search/engines/searxng.js +69 -0
- package/src/search/engines/serper.js +64 -0
- package/src/search/index.js +104 -0
- package/src/search/scraper.js +170 -0
- package/src/search/summarizer.js +156 -0
- package/src/server.js +111 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Camoufox binary installer for Spectrawl.
|
|
5
|
+
* Downloads prebuilt anti-detect Firefox from Camoufox releases.
|
|
6
|
+
*
|
|
7
|
+
* Usage: npx spectrawl install-stealth
|
|
8
|
+
*
|
|
9
|
+
* Same model as `npx playwright install chromium` — downloads the
|
|
10
|
+
* right binary for your OS/arch and stores it locally.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const https = require('https')
|
|
14
|
+
const http = require('http')
|
|
15
|
+
const fs = require('fs')
|
|
16
|
+
const path = require('path')
|
|
17
|
+
const { execSync } = require('child_process')
|
|
18
|
+
const os = require('os')
|
|
19
|
+
|
|
20
|
+
const CAMOUFOX_VERSION = 'v135.0.1-beta.24'
|
|
21
|
+
const INSTALL_DIR = path.join(os.homedir(), '.spectrawl', 'browsers', 'camoufox')
|
|
22
|
+
|
|
23
|
+
function getPlatformAsset() {
|
|
24
|
+
const platform = os.platform()
|
|
25
|
+
const arch = os.arch()
|
|
26
|
+
|
|
27
|
+
if (platform === 'linux') {
|
|
28
|
+
if (arch === 'x64') return `camoufox-135.0.1-beta.24-lin.x86_64.zip`
|
|
29
|
+
if (arch === 'arm64') return `camoufox-135.0.1-beta.24-lin.arm64.zip`
|
|
30
|
+
if (arch === 'ia32') return `camoufox-135.0.1-beta.24-lin.i686.zip`
|
|
31
|
+
}
|
|
32
|
+
if (platform === 'darwin') {
|
|
33
|
+
return `camoufox-135.0.1-beta.24-mac.universal.zip`
|
|
34
|
+
}
|
|
35
|
+
// Windows not yet supported in Camoufox latest
|
|
36
|
+
throw new Error(`Unsupported platform: ${platform}-${arch}. Camoufox supports Linux (x64/arm64) and macOS.`)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function downloadUrl(assetName) {
|
|
40
|
+
return `https://github.com/daijro/camoufox/releases/download/${CAMOUFOX_VERSION}/${assetName}`
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function followRedirects(url) {
|
|
44
|
+
return new Promise((resolve, reject) => {
|
|
45
|
+
const proto = url.startsWith('https') ? https : http
|
|
46
|
+
proto.get(url, { headers: { 'User-Agent': 'spectrawl' } }, (res) => {
|
|
47
|
+
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
48
|
+
followRedirects(res.headers.location).then(resolve).catch(reject)
|
|
49
|
+
return
|
|
50
|
+
}
|
|
51
|
+
if (res.statusCode !== 200) {
|
|
52
|
+
reject(new Error(`Download failed: HTTP ${res.statusCode}`))
|
|
53
|
+
return
|
|
54
|
+
}
|
|
55
|
+
resolve(res)
|
|
56
|
+
}).on('error', reject)
|
|
57
|
+
})
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async function download(url, dest) {
|
|
61
|
+
const dir = path.dirname(dest)
|
|
62
|
+
fs.mkdirSync(dir, { recursive: true })
|
|
63
|
+
|
|
64
|
+
console.log(`Downloading Camoufox ${CAMOUFOX_VERSION}...`)
|
|
65
|
+
console.log(` From: ${url}`)
|
|
66
|
+
console.log(` To: ${dest}`)
|
|
67
|
+
|
|
68
|
+
const res = await followRedirects(url)
|
|
69
|
+
const total = parseInt(res.headers['content-length'], 10) || 0
|
|
70
|
+
let downloaded = 0
|
|
71
|
+
let lastPercent = 0
|
|
72
|
+
|
|
73
|
+
const file = fs.createWriteStream(dest)
|
|
74
|
+
|
|
75
|
+
return new Promise((resolve, reject) => {
|
|
76
|
+
res.on('data', (chunk) => {
|
|
77
|
+
file.write(chunk)
|
|
78
|
+
downloaded += chunk.length
|
|
79
|
+
if (total) {
|
|
80
|
+
const percent = Math.floor((downloaded / total) * 100)
|
|
81
|
+
if (percent >= lastPercent + 10) {
|
|
82
|
+
process.stdout.write(` ${percent}%`)
|
|
83
|
+
if (percent < 100) process.stdout.write(' ')
|
|
84
|
+
lastPercent = percent
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
})
|
|
88
|
+
res.on('end', () => {
|
|
89
|
+
file.end()
|
|
90
|
+
console.log('\n Download complete.')
|
|
91
|
+
resolve()
|
|
92
|
+
})
|
|
93
|
+
res.on('error', reject)
|
|
94
|
+
file.on('error', reject)
|
|
95
|
+
})
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
async function install() {
|
|
99
|
+
const asset = getPlatformAsset()
|
|
100
|
+
const url = downloadUrl(asset)
|
|
101
|
+
const zipPath = path.join(INSTALL_DIR, asset)
|
|
102
|
+
const extractDir = INSTALL_DIR
|
|
103
|
+
|
|
104
|
+
// Check if already installed
|
|
105
|
+
const markerFile = path.join(INSTALL_DIR, '.version')
|
|
106
|
+
if (fs.existsSync(markerFile)) {
|
|
107
|
+
const installed = fs.readFileSync(markerFile, 'utf8').trim()
|
|
108
|
+
if (installed === CAMOUFOX_VERSION) {
|
|
109
|
+
console.log(`Camoufox ${CAMOUFOX_VERSION} already installed at ${INSTALL_DIR}`)
|
|
110
|
+
return { path: INSTALL_DIR, version: CAMOUFOX_VERSION }
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Download
|
|
115
|
+
await download(url, zipPath)
|
|
116
|
+
|
|
117
|
+
// Extract
|
|
118
|
+
console.log(' Extracting...')
|
|
119
|
+
fs.mkdirSync(extractDir, { recursive: true })
|
|
120
|
+
|
|
121
|
+
try {
|
|
122
|
+
execSync(`unzip -o "${zipPath}" -d "${extractDir}"`, { stdio: 'pipe' })
|
|
123
|
+
} catch (e) {
|
|
124
|
+
// Try with built-in tools on systems without unzip
|
|
125
|
+
execSync(`python3 -c "import zipfile; zipfile.ZipFile('${zipPath}').extractall('${extractDir}')"`, { stdio: 'pipe' })
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Clean up zip
|
|
129
|
+
fs.unlinkSync(zipPath)
|
|
130
|
+
|
|
131
|
+
// Find the binary
|
|
132
|
+
const binaryName = os.platform() === 'darwin' ? 'camoufox' : 'camoufox-bin'
|
|
133
|
+
const possiblePaths = [
|
|
134
|
+
path.join(extractDir, 'camoufox', binaryName),
|
|
135
|
+
path.join(extractDir, binaryName),
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
let binaryPath = null
|
|
139
|
+
for (const p of possiblePaths) {
|
|
140
|
+
if (fs.existsSync(p)) {
|
|
141
|
+
binaryPath = p
|
|
142
|
+
fs.chmodSync(p, 0o755)
|
|
143
|
+
break
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Write version marker
|
|
148
|
+
fs.writeFileSync(markerFile, CAMOUFOX_VERSION)
|
|
149
|
+
|
|
150
|
+
console.log(`\n✅ Camoufox ${CAMOUFOX_VERSION} installed.`)
|
|
151
|
+
console.log(` Binary: ${binaryPath || 'in ' + extractDir}`)
|
|
152
|
+
console.log(` Spectrawl will use it automatically for stealth browsing.`)
|
|
153
|
+
|
|
154
|
+
return { path: extractDir, binary: binaryPath, version: CAMOUFOX_VERSION }
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Get the Camoufox binary path if installed.
|
|
159
|
+
*/
|
|
160
|
+
function getCamoufoxPath() {
|
|
161
|
+
const binaryName = os.platform() === 'darwin' ? 'camoufox' : 'camoufox-bin'
|
|
162
|
+
const possiblePaths = [
|
|
163
|
+
path.join(INSTALL_DIR, 'camoufox', binaryName),
|
|
164
|
+
path.join(INSTALL_DIR, binaryName),
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
for (const p of possiblePaths) {
|
|
168
|
+
if (fs.existsSync(p)) return p
|
|
169
|
+
}
|
|
170
|
+
return null
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Check if Camoufox is installed.
|
|
175
|
+
*/
|
|
176
|
+
function isInstalled() {
|
|
177
|
+
return getCamoufoxPath() !== null
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Run as CLI
|
|
181
|
+
if (require.main === module) {
|
|
182
|
+
install().catch(err => {
|
|
183
|
+
console.error('❌ Installation failed:', err.message)
|
|
184
|
+
process.exit(1)
|
|
185
|
+
})
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
module.exports = { install, getCamoufoxPath, isInstalled, INSTALL_DIR }
|
package/src/cache.js
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
const Database = require('better-sqlite3')
|
|
2
|
+
const path = require('path')
|
|
3
|
+
const fs = require('fs')
|
|
4
|
+
const crypto = require('crypto')
|
|
5
|
+
|
|
6
|
+
class Cache {
|
|
7
|
+
constructor(config = {}) {
|
|
8
|
+
const dbPath = config.path || './data/cache.db'
|
|
9
|
+
fs.mkdirSync(path.dirname(dbPath), { recursive: true })
|
|
10
|
+
|
|
11
|
+
this.db = new Database(dbPath)
|
|
12
|
+
this.db.pragma('journal_mode = WAL')
|
|
13
|
+
this.ttls = {
|
|
14
|
+
search: config.searchTtl || 3600,
|
|
15
|
+
scrape: config.scrapeTtl || 86400,
|
|
16
|
+
screenshot: config.screenshotTtl || 3600
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
this._init()
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
_init() {
|
|
23
|
+
this.db.exec(`
|
|
24
|
+
CREATE TABLE IF NOT EXISTS cache (
|
|
25
|
+
key TEXT PRIMARY KEY,
|
|
26
|
+
type TEXT NOT NULL,
|
|
27
|
+
value TEXT NOT NULL,
|
|
28
|
+
created_at INTEGER NOT NULL,
|
|
29
|
+
ttl INTEGER NOT NULL
|
|
30
|
+
)
|
|
31
|
+
`)
|
|
32
|
+
|
|
33
|
+
// Clean expired entries on startup
|
|
34
|
+
this.db.prepare('DELETE FROM cache WHERE created_at + ttl < ?').run(now())
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
get(type, key) {
|
|
38
|
+
const hash = this._hash(type, key)
|
|
39
|
+
const row = this.db.prepare(
|
|
40
|
+
'SELECT value FROM cache WHERE key = ? AND created_at + ttl > ?'
|
|
41
|
+
).get(hash, now())
|
|
42
|
+
|
|
43
|
+
return row ? JSON.parse(row.value) : null
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
set(type, key, value) {
|
|
47
|
+
const hash = this._hash(type, key)
|
|
48
|
+
const ttl = this.ttls[type] || 3600
|
|
49
|
+
|
|
50
|
+
this.db.prepare(`
|
|
51
|
+
INSERT OR REPLACE INTO cache (key, type, value, created_at, ttl)
|
|
52
|
+
VALUES (?, ?, ?, ?, ?)
|
|
53
|
+
`).run(hash, type, JSON.stringify(value), now(), ttl)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
invalidate(type, key) {
|
|
57
|
+
const hash = this._hash(type, key)
|
|
58
|
+
this.db.prepare('DELETE FROM cache WHERE key = ?').run(hash)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
clear(type) {
|
|
62
|
+
if (type) {
|
|
63
|
+
this.db.prepare('DELETE FROM cache WHERE type = ?').run(type)
|
|
64
|
+
} else {
|
|
65
|
+
this.db.prepare('DELETE FROM cache').run()
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
close() {
|
|
70
|
+
this.db.close()
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
_hash(type, key) {
|
|
74
|
+
return crypto.createHash('sha256').update(`${type}:${key}`).digest('hex')
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function now() {
|
|
79
|
+
return Math.floor(Date.now() / 1000)
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
module.exports = { Cache }
|
package/src/cli.js
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const { Spectrawl } = require('./index')
|
|
4
|
+
const { DEFAULTS } = require('./config')
|
|
5
|
+
const fs = require('fs')
|
|
6
|
+
const path = require('path')
|
|
7
|
+
|
|
8
|
+
const args = process.argv.slice(2)
|
|
9
|
+
const command = args[0]
|
|
10
|
+
|
|
11
|
+
async function main() {
|
|
12
|
+
switch (command) {
|
|
13
|
+
case 'init':
|
|
14
|
+
return init()
|
|
15
|
+
case 'search':
|
|
16
|
+
return search(args.slice(1).join(' '))
|
|
17
|
+
case 'status':
|
|
18
|
+
return status()
|
|
19
|
+
case 'serve':
|
|
20
|
+
return serve()
|
|
21
|
+
case 'mcp':
|
|
22
|
+
return mcp()
|
|
23
|
+
case 'install-stealth':
|
|
24
|
+
return installStealth()
|
|
25
|
+
case 'version':
|
|
26
|
+
console.log('spectrawl v0.1.0')
|
|
27
|
+
return
|
|
28
|
+
default:
|
|
29
|
+
return help()
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function init() {
|
|
34
|
+
const configPath = path.join(process.cwd(), 'spectrawl.json')
|
|
35
|
+
if (fs.existsSync(configPath)) {
|
|
36
|
+
console.log('spectrawl.json already exists')
|
|
37
|
+
return
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const config = {
|
|
41
|
+
port: DEFAULTS.port,
|
|
42
|
+
search: {
|
|
43
|
+
cascade: DEFAULTS.search.cascade,
|
|
44
|
+
scrapeTop: DEFAULTS.search.scrapeTop
|
|
45
|
+
},
|
|
46
|
+
cache: {
|
|
47
|
+
path: DEFAULTS.cache.path,
|
|
48
|
+
searchTtl: DEFAULTS.cache.searchTtl,
|
|
49
|
+
scrapeTtl: DEFAULTS.cache.scrapeTtl
|
|
50
|
+
},
|
|
51
|
+
concurrency: DEFAULTS.concurrency
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
fs.writeFileSync(configPath, JSON.stringify(config, null, 2))
|
|
55
|
+
console.log('Created spectrawl.json')
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async function search(query) {
|
|
59
|
+
if (!query) {
|
|
60
|
+
console.error('Usage: spectrawl search "your query"')
|
|
61
|
+
process.exit(1)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const web = new Spectrawl()
|
|
65
|
+
try {
|
|
66
|
+
console.log(`Searching: "${query}"...\n`)
|
|
67
|
+
const results = await web.search(query, { summarize: false })
|
|
68
|
+
|
|
69
|
+
if (results.answer) {
|
|
70
|
+
console.log('Answer:', results.answer, '\n')
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
for (const source of results.sources) {
|
|
74
|
+
console.log(` ${source.title}`)
|
|
75
|
+
console.log(` ${source.url}`)
|
|
76
|
+
console.log(` ${source.snippet?.slice(0, 150)}`)
|
|
77
|
+
console.log()
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
console.log(`${results.sources.length} results${results.cached ? ' (cached)' : ''}`)
|
|
81
|
+
} finally {
|
|
82
|
+
await web.close()
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async function status() {
|
|
87
|
+
const web = new Spectrawl()
|
|
88
|
+
try {
|
|
89
|
+
const accounts = await web.status()
|
|
90
|
+
|
|
91
|
+
if (accounts.length === 0) {
|
|
92
|
+
console.log('No accounts configured. Run: spectrawl login <platform> --account @handle')
|
|
93
|
+
return
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
for (const acc of accounts) {
|
|
97
|
+
const icon = acc.status === 'valid' ? '✅' :
|
|
98
|
+
acc.status === 'expiring' ? '⚠️' : '❌'
|
|
99
|
+
const extra = acc.expiresAt ? ` (expires ${acc.expiresAt})` : ''
|
|
100
|
+
console.log(`${icon} ${acc.platform}/${acc.account} — ${acc.status}${extra}`)
|
|
101
|
+
}
|
|
102
|
+
} finally {
|
|
103
|
+
await web.close()
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async function serve() {
|
|
108
|
+
// Start the HTTP server
|
|
109
|
+
require('./server')
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async function mcp() {
|
|
113
|
+
// Start as MCP server (stdio transport)
|
|
114
|
+
const { MCPServer } = require('./mcp')
|
|
115
|
+
const server = new MCPServer()
|
|
116
|
+
server.start()
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async function installStealth() {
|
|
120
|
+
const { install, isInstalled } = require('./browse/install-stealth')
|
|
121
|
+
if (isInstalled()) {
|
|
122
|
+
const { getCamoufoxPath } = require('./browse/install-stealth')
|
|
123
|
+
console.log(`Camoufox already installed at ${getCamoufoxPath()}`)
|
|
124
|
+
console.log('Spectrawl will use it automatically.')
|
|
125
|
+
return
|
|
126
|
+
}
|
|
127
|
+
await install()
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function help() {
|
|
131
|
+
console.log(`
|
|
132
|
+
🌐 Spectrawl — The unified web layer for AI agents.
|
|
133
|
+
|
|
134
|
+
Commands:
|
|
135
|
+
init Create spectrawl.json config
|
|
136
|
+
search "query" Search the web
|
|
137
|
+
status Check auth health for all accounts
|
|
138
|
+
serve [--port N] Start HTTP server
|
|
139
|
+
mcp Start MCP server (stdio)
|
|
140
|
+
install-stealth Download Camoufox anti-detect browser
|
|
141
|
+
version Show version
|
|
142
|
+
|
|
143
|
+
Examples:
|
|
144
|
+
spectrawl init
|
|
145
|
+
spectrawl search "best dental clinics in seoul"
|
|
146
|
+
spectrawl status
|
|
147
|
+
spectrawl serve --port 3900
|
|
148
|
+
spectrawl mcp
|
|
149
|
+
`)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function getFlag(flag) {
|
|
153
|
+
const idx = args.indexOf(flag)
|
|
154
|
+
return idx !== -1 ? args[idx + 1] : null
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
main().catch(err => {
|
|
158
|
+
console.error('Error:', err.message)
|
|
159
|
+
process.exit(1)
|
|
160
|
+
})
|
package/src/config.js
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
const fs = require('fs')
|
|
2
|
+
const path = require('path')
|
|
3
|
+
|
|
4
|
+
const DEFAULTS = {
|
|
5
|
+
port: 3900,
|
|
6
|
+
search: {
|
|
7
|
+
cascade: ['searxng', 'ddg', 'brave', 'serper'],
|
|
8
|
+
scrapeTop: 3,
|
|
9
|
+
searxng: { url: 'http://localhost:8888' },
|
|
10
|
+
llm: null // { provider, model, apiKey }
|
|
11
|
+
},
|
|
12
|
+
browse: {
|
|
13
|
+
defaultEngine: 'playwright',
|
|
14
|
+
proxy: null, // { type, host, port, username, password }
|
|
15
|
+
humanlike: {
|
|
16
|
+
minDelay: 500,
|
|
17
|
+
maxDelay: 2000,
|
|
18
|
+
scrollBehavior: true
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
auth: {
|
|
22
|
+
refreshInterval: '4h',
|
|
23
|
+
cookieStore: './data/cookies.db'
|
|
24
|
+
},
|
|
25
|
+
cache: {
|
|
26
|
+
path: './data/cache.db',
|
|
27
|
+
searchTtl: 3600, // 1 hour
|
|
28
|
+
scrapeTtl: 86400, // 24 hours
|
|
29
|
+
screenshotTtl: 3600 // 1 hour
|
|
30
|
+
},
|
|
31
|
+
rateLimit: {
|
|
32
|
+
x: { postsPerHour: 5, minDelayMs: 30000 },
|
|
33
|
+
reddit: { postsPerHour: 3, minDelayMs: 600000 }
|
|
34
|
+
},
|
|
35
|
+
concurrency: 3
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function loadConfig(configPath) {
|
|
39
|
+
const filePath = configPath || path.join(process.cwd(), 'spectrawl.json')
|
|
40
|
+
|
|
41
|
+
let userConfig = {}
|
|
42
|
+
if (fs.existsSync(filePath)) {
|
|
43
|
+
try {
|
|
44
|
+
userConfig = JSON.parse(fs.readFileSync(filePath, 'utf8'))
|
|
45
|
+
} catch (e) {
|
|
46
|
+
console.warn(`Warning: Could not parse ${filePath}:`, e.message)
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return deepMerge(DEFAULTS, userConfig)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function deepMerge(target, source) {
|
|
54
|
+
const result = { ...target }
|
|
55
|
+
for (const key of Object.keys(source)) {
|
|
56
|
+
if (source[key] && typeof source[key] === 'object' && !Array.isArray(source[key])) {
|
|
57
|
+
result[key] = deepMerge(target[key] || {}, source[key])
|
|
58
|
+
} else {
|
|
59
|
+
result[key] = source[key]
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return result
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
module.exports = { loadConfig, DEFAULTS }
|
package/src/events.js
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Event system for Spectrawl.
|
|
3
|
+
* Proactive notifications to agents about auth state changes.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
class EventEmitter {
|
|
7
|
+
constructor() {
|
|
8
|
+
this._handlers = {}
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
on(event, handler) {
|
|
12
|
+
if (!this._handlers[event]) this._handlers[event] = []
|
|
13
|
+
this._handlers[event].push(handler)
|
|
14
|
+
return this
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
off(event, handler) {
|
|
18
|
+
if (!this._handlers[event]) return
|
|
19
|
+
this._handlers[event] = this._handlers[event].filter(h => h !== handler)
|
|
20
|
+
return this
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
emit(event, data) {
|
|
24
|
+
const handlers = this._handlers[event] || []
|
|
25
|
+
for (const handler of handlers) {
|
|
26
|
+
try {
|
|
27
|
+
handler(data)
|
|
28
|
+
} catch (err) {
|
|
29
|
+
console.warn(`Event handler error for ${event}:`, err.message)
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Also emit to wildcard handlers
|
|
34
|
+
const wildcards = this._handlers['*'] || []
|
|
35
|
+
for (const handler of wildcards) {
|
|
36
|
+
try {
|
|
37
|
+
handler({ event, ...data })
|
|
38
|
+
} catch (err) {
|
|
39
|
+
console.warn(`Wildcard handler error:`, err.message)
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Standard events
|
|
46
|
+
const EVENTS = {
|
|
47
|
+
COOKIE_EXPIRING: 'cookie_expiring',
|
|
48
|
+
COOKIE_EXPIRED: 'cookie_expired',
|
|
49
|
+
AUTH_FAILED: 'auth_failed',
|
|
50
|
+
AUTH_REFRESHED: 'auth_refreshed',
|
|
51
|
+
RATE_LIMITED: 'rate_limited',
|
|
52
|
+
ACTION_FAILED: 'action_failed',
|
|
53
|
+
ACTION_SUCCESS: 'action_success',
|
|
54
|
+
HEALTH_CHECK: 'health_check'
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
module.exports = { EventEmitter, EVENTS }
|
package/src/index.js
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Spectrawl — The unified web layer for AI agents.
|
|
3
|
+
* Search, browse, authenticate, act.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const { SearchEngine } = require('./search')
|
|
7
|
+
const { BrowseEngine } = require('./browse')
|
|
8
|
+
const { AuthManager } = require('./auth')
|
|
9
|
+
const { ActEngine } = require('./act')
|
|
10
|
+
const { Cache } = require('./cache')
|
|
11
|
+
const { EventEmitter, EVENTS } = require('./events')
|
|
12
|
+
const { CookieRefresher } = require('./auth/refresh')
|
|
13
|
+
const { loadConfig } = require('./config')
|
|
14
|
+
|
|
15
|
+
class Spectrawl {
|
|
16
|
+
constructor(configPath) {
|
|
17
|
+
this.config = loadConfig(configPath)
|
|
18
|
+
this.events = new EventEmitter()
|
|
19
|
+
this.cache = new Cache(this.config.cache)
|
|
20
|
+
this.searchEngine = new SearchEngine(this.config.search, this.cache)
|
|
21
|
+
this.browseEngine = new BrowseEngine(this.config.browse, this.cache)
|
|
22
|
+
this.auth = new AuthManager(this.config.auth)
|
|
23
|
+
this.actEngine = new ActEngine(this.config, this.auth, this.browseEngine)
|
|
24
|
+
this.refresher = new CookieRefresher(this.auth, this.events, this.config.auth)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Search the web using free API cascade.
|
|
29
|
+
* @param {string} query - Search query
|
|
30
|
+
* @param {object} opts - { summarize, scrapeTop, engines }
|
|
31
|
+
* @returns {Promise<{answer?, sources[], cached}>}
|
|
32
|
+
*/
|
|
33
|
+
async search(query, opts = {}) {
|
|
34
|
+
return this.searchEngine.search(query, opts)
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Browse a URL with stealth and optional auth.
|
|
39
|
+
* @param {string} url - URL to browse
|
|
40
|
+
* @param {object} opts - { auth, screenshot, extract, stealth }
|
|
41
|
+
* @returns {Promise<{content, html, screenshot?, cookies?}>}
|
|
42
|
+
*/
|
|
43
|
+
async browse(url, opts = {}) {
|
|
44
|
+
if (opts.auth) {
|
|
45
|
+
const cookies = await this.auth.getCookies(opts.auth)
|
|
46
|
+
opts._cookies = cookies
|
|
47
|
+
}
|
|
48
|
+
return this.browseEngine.browse(url, opts)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Perform an authenticated action on a platform.
|
|
53
|
+
* @param {string} platform - Platform name (x, reddit, devto, etc.)
|
|
54
|
+
* @param {string} action - Action name (post, comment, like, etc.)
|
|
55
|
+
* @param {object} params - Action parameters
|
|
56
|
+
* @returns {Promise<{success, data?, error?}>}
|
|
57
|
+
*/
|
|
58
|
+
async act(platform, action, params = {}) {
|
|
59
|
+
const result = await this.actEngine.execute(platform, action, params)
|
|
60
|
+
|
|
61
|
+
if (result.success) {
|
|
62
|
+
this.events.emit(EVENTS.ACTION_SUCCESS, { platform, action, ...result })
|
|
63
|
+
} else {
|
|
64
|
+
this.events.emit(EVENTS.ACTION_FAILED, { platform, action, ...result })
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return result
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Register event handler.
|
|
72
|
+
* Events: cookie_expiring, cookie_expired, auth_failed, auth_refreshed,
|
|
73
|
+
* rate_limited, action_failed, action_success, health_check
|
|
74
|
+
* @param {string} event - Event name
|
|
75
|
+
* @param {function} handler - Event handler
|
|
76
|
+
*/
|
|
77
|
+
on(event, handler) {
|
|
78
|
+
this.events.on(event, handler)
|
|
79
|
+
return this
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Start the cookie refresh cron.
|
|
84
|
+
* Call this when running as a server to auto-monitor auth health.
|
|
85
|
+
*/
|
|
86
|
+
startRefreshCron() {
|
|
87
|
+
this.refresher.start()
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Get health status of all authenticated sessions.
|
|
92
|
+
* @returns {Promise<Array<{platform, account, status, expires?}>>}
|
|
93
|
+
*/
|
|
94
|
+
async status() {
|
|
95
|
+
return this.auth.getStatus()
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Shut down gracefully.
|
|
100
|
+
*/
|
|
101
|
+
async close() {
|
|
102
|
+
this.refresher.stop()
|
|
103
|
+
await this.browseEngine.close()
|
|
104
|
+
await this.cache.close()
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
module.exports = { Spectrawl, EVENTS }
|