spectrawl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,188 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Camoufox binary installer for Spectrawl.
5
+ * Downloads prebuilt anti-detect Firefox from Camoufox releases.
6
+ *
7
+ * Usage: npx spectrawl install-stealth
8
+ *
9
+ * Same model as `npx playwright install chromium` — downloads the
10
+ * right binary for your OS/arch and stores it locally.
11
+ */
12
+
13
+ const https = require('https')
14
+ const http = require('http')
15
+ const fs = require('fs')
16
+ const path = require('path')
17
+ const { execSync } = require('child_process')
18
+ const os = require('os')
19
+
20
+ const CAMOUFOX_VERSION = 'v135.0.1-beta.24'
21
+ const INSTALL_DIR = path.join(os.homedir(), '.spectrawl', 'browsers', 'camoufox')
22
+
23
+ function getPlatformAsset() {
24
+ const platform = os.platform()
25
+ const arch = os.arch()
26
+
27
+ if (platform === 'linux') {
28
+ if (arch === 'x64') return `camoufox-135.0.1-beta.24-lin.x86_64.zip`
29
+ if (arch === 'arm64') return `camoufox-135.0.1-beta.24-lin.arm64.zip`
30
+ if (arch === 'ia32') return `camoufox-135.0.1-beta.24-lin.i686.zip`
31
+ }
32
+ if (platform === 'darwin') {
33
+ return `camoufox-135.0.1-beta.24-mac.universal.zip`
34
+ }
35
+ // Windows not yet supported in Camoufox latest
36
+ throw new Error(`Unsupported platform: ${platform}-${arch}. Camoufox supports Linux (x64/arm64) and macOS.`)
37
+ }
38
+
39
+ function downloadUrl(assetName) {
40
+ return `https://github.com/daijro/camoufox/releases/download/${CAMOUFOX_VERSION}/${assetName}`
41
+ }
42
+
43
+ function followRedirects(url) {
44
+ return new Promise((resolve, reject) => {
45
+ const proto = url.startsWith('https') ? https : http
46
+ proto.get(url, { headers: { 'User-Agent': 'spectrawl' } }, (res) => {
47
+ if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
48
+ followRedirects(res.headers.location).then(resolve).catch(reject)
49
+ return
50
+ }
51
+ if (res.statusCode !== 200) {
52
+ reject(new Error(`Download failed: HTTP ${res.statusCode}`))
53
+ return
54
+ }
55
+ resolve(res)
56
+ }).on('error', reject)
57
+ })
58
+ }
59
+
60
+ async function download(url, dest) {
61
+ const dir = path.dirname(dest)
62
+ fs.mkdirSync(dir, { recursive: true })
63
+
64
+ console.log(`Downloading Camoufox ${CAMOUFOX_VERSION}...`)
65
+ console.log(` From: ${url}`)
66
+ console.log(` To: ${dest}`)
67
+
68
+ const res = await followRedirects(url)
69
+ const total = parseInt(res.headers['content-length'], 10) || 0
70
+ let downloaded = 0
71
+ let lastPercent = 0
72
+
73
+ const file = fs.createWriteStream(dest)
74
+
75
+ return new Promise((resolve, reject) => {
76
+ res.on('data', (chunk) => {
77
+ file.write(chunk)
78
+ downloaded += chunk.length
79
+ if (total) {
80
+ const percent = Math.floor((downloaded / total) * 100)
81
+ if (percent >= lastPercent + 10) {
82
+ process.stdout.write(` ${percent}%`)
83
+ if (percent < 100) process.stdout.write(' ')
84
+ lastPercent = percent
85
+ }
86
+ }
87
+ })
88
+ res.on('end', () => {
89
+ file.end()
90
+ console.log('\n Download complete.')
91
+ resolve()
92
+ })
93
+ res.on('error', reject)
94
+ file.on('error', reject)
95
+ })
96
+ }
97
+
98
+ async function install() {
99
+ const asset = getPlatformAsset()
100
+ const url = downloadUrl(asset)
101
+ const zipPath = path.join(INSTALL_DIR, asset)
102
+ const extractDir = INSTALL_DIR
103
+
104
+ // Check if already installed
105
+ const markerFile = path.join(INSTALL_DIR, '.version')
106
+ if (fs.existsSync(markerFile)) {
107
+ const installed = fs.readFileSync(markerFile, 'utf8').trim()
108
+ if (installed === CAMOUFOX_VERSION) {
109
+ console.log(`Camoufox ${CAMOUFOX_VERSION} already installed at ${INSTALL_DIR}`)
110
+ return { path: INSTALL_DIR, version: CAMOUFOX_VERSION }
111
+ }
112
+ }
113
+
114
+ // Download
115
+ await download(url, zipPath)
116
+
117
+ // Extract
118
+ console.log(' Extracting...')
119
+ fs.mkdirSync(extractDir, { recursive: true })
120
+
121
+ try {
122
+ execSync(`unzip -o "${zipPath}" -d "${extractDir}"`, { stdio: 'pipe' })
123
+ } catch (e) {
124
+ // Try with built-in tools on systems without unzip
125
+ execSync(`python3 -c "import zipfile; zipfile.ZipFile('${zipPath}').extractall('${extractDir}')"`, { stdio: 'pipe' })
126
+ }
127
+
128
+ // Clean up zip
129
+ fs.unlinkSync(zipPath)
130
+
131
+ // Find the binary
132
+ const binaryName = os.platform() === 'darwin' ? 'camoufox' : 'camoufox-bin'
133
+ const possiblePaths = [
134
+ path.join(extractDir, 'camoufox', binaryName),
135
+ path.join(extractDir, binaryName),
136
+ ]
137
+
138
+ let binaryPath = null
139
+ for (const p of possiblePaths) {
140
+ if (fs.existsSync(p)) {
141
+ binaryPath = p
142
+ fs.chmodSync(p, 0o755)
143
+ break
144
+ }
145
+ }
146
+
147
+ // Write version marker
148
+ fs.writeFileSync(markerFile, CAMOUFOX_VERSION)
149
+
150
+ console.log(`\n✅ Camoufox ${CAMOUFOX_VERSION} installed.`)
151
+ console.log(` Binary: ${binaryPath || 'in ' + extractDir}`)
152
+ console.log(` Spectrawl will use it automatically for stealth browsing.`)
153
+
154
+ return { path: extractDir, binary: binaryPath, version: CAMOUFOX_VERSION }
155
+ }
156
+
157
+ /**
158
+ * Get the Camoufox binary path if installed.
159
+ */
160
+ function getCamoufoxPath() {
161
+ const binaryName = os.platform() === 'darwin' ? 'camoufox' : 'camoufox-bin'
162
+ const possiblePaths = [
163
+ path.join(INSTALL_DIR, 'camoufox', binaryName),
164
+ path.join(INSTALL_DIR, binaryName),
165
+ ]
166
+
167
+ for (const p of possiblePaths) {
168
+ if (fs.existsSync(p)) return p
169
+ }
170
+ return null
171
+ }
172
+
173
+ /**
174
+ * Check if Camoufox is installed.
175
+ */
176
+ function isInstalled() {
177
+ return getCamoufoxPath() !== null
178
+ }
179
+
180
+ // Run as CLI
181
+ if (require.main === module) {
182
+ install().catch(err => {
183
+ console.error('❌ Installation failed:', err.message)
184
+ process.exit(1)
185
+ })
186
+ }
187
+
188
+ module.exports = { install, getCamoufoxPath, isInstalled, INSTALL_DIR }
package/src/cache.js ADDED
@@ -0,0 +1,82 @@
1
+ const Database = require('better-sqlite3')
2
+ const path = require('path')
3
+ const fs = require('fs')
4
+ const crypto = require('crypto')
5
+
6
+ class Cache {
7
+ constructor(config = {}) {
8
+ const dbPath = config.path || './data/cache.db'
9
+ fs.mkdirSync(path.dirname(dbPath), { recursive: true })
10
+
11
+ this.db = new Database(dbPath)
12
+ this.db.pragma('journal_mode = WAL')
13
+ this.ttls = {
14
+ search: config.searchTtl || 3600,
15
+ scrape: config.scrapeTtl || 86400,
16
+ screenshot: config.screenshotTtl || 3600
17
+ }
18
+
19
+ this._init()
20
+ }
21
+
22
+ _init() {
23
+ this.db.exec(`
24
+ CREATE TABLE IF NOT EXISTS cache (
25
+ key TEXT PRIMARY KEY,
26
+ type TEXT NOT NULL,
27
+ value TEXT NOT NULL,
28
+ created_at INTEGER NOT NULL,
29
+ ttl INTEGER NOT NULL
30
+ )
31
+ `)
32
+
33
+ // Clean expired entries on startup
34
+ this.db.prepare('DELETE FROM cache WHERE created_at + ttl < ?').run(now())
35
+ }
36
+
37
+ get(type, key) {
38
+ const hash = this._hash(type, key)
39
+ const row = this.db.prepare(
40
+ 'SELECT value FROM cache WHERE key = ? AND created_at + ttl > ?'
41
+ ).get(hash, now())
42
+
43
+ return row ? JSON.parse(row.value) : null
44
+ }
45
+
46
+ set(type, key, value) {
47
+ const hash = this._hash(type, key)
48
+ const ttl = this.ttls[type] || 3600
49
+
50
+ this.db.prepare(`
51
+ INSERT OR REPLACE INTO cache (key, type, value, created_at, ttl)
52
+ VALUES (?, ?, ?, ?, ?)
53
+ `).run(hash, type, JSON.stringify(value), now(), ttl)
54
+ }
55
+
56
+ invalidate(type, key) {
57
+ const hash = this._hash(type, key)
58
+ this.db.prepare('DELETE FROM cache WHERE key = ?').run(hash)
59
+ }
60
+
61
+ clear(type) {
62
+ if (type) {
63
+ this.db.prepare('DELETE FROM cache WHERE type = ?').run(type)
64
+ } else {
65
+ this.db.prepare('DELETE FROM cache').run()
66
+ }
67
+ }
68
+
69
+ close() {
70
+ this.db.close()
71
+ }
72
+
73
+ _hash(type, key) {
74
+ return crypto.createHash('sha256').update(`${type}:${key}`).digest('hex')
75
+ }
76
+ }
77
+
78
+ function now() {
79
+ return Math.floor(Date.now() / 1000)
80
+ }
81
+
82
+ module.exports = { Cache }
package/src/cli.js ADDED
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env node
2
+
3
+ const { Spectrawl } = require('./index')
4
+ const { DEFAULTS } = require('./config')
5
+ const fs = require('fs')
6
+ const path = require('path')
7
+
8
+ const args = process.argv.slice(2)
9
+ const command = args[0]
10
+
11
+ async function main() {
12
+ switch (command) {
13
+ case 'init':
14
+ return init()
15
+ case 'search':
16
+ return search(args.slice(1).join(' '))
17
+ case 'status':
18
+ return status()
19
+ case 'serve':
20
+ return serve()
21
+ case 'mcp':
22
+ return mcp()
23
+ case 'install-stealth':
24
+ return installStealth()
25
+ case 'version':
26
+ console.log('spectrawl v0.1.0')
27
+ return
28
+ default:
29
+ return help()
30
+ }
31
+ }
32
+
33
+ function init() {
34
+ const configPath = path.join(process.cwd(), 'spectrawl.json')
35
+ if (fs.existsSync(configPath)) {
36
+ console.log('spectrawl.json already exists')
37
+ return
38
+ }
39
+
40
+ const config = {
41
+ port: DEFAULTS.port,
42
+ search: {
43
+ cascade: DEFAULTS.search.cascade,
44
+ scrapeTop: DEFAULTS.search.scrapeTop
45
+ },
46
+ cache: {
47
+ path: DEFAULTS.cache.path,
48
+ searchTtl: DEFAULTS.cache.searchTtl,
49
+ scrapeTtl: DEFAULTS.cache.scrapeTtl
50
+ },
51
+ concurrency: DEFAULTS.concurrency
52
+ }
53
+
54
+ fs.writeFileSync(configPath, JSON.stringify(config, null, 2))
55
+ console.log('Created spectrawl.json')
56
+ }
57
+
58
+ async function search(query) {
59
+ if (!query) {
60
+ console.error('Usage: spectrawl search "your query"')
61
+ process.exit(1)
62
+ }
63
+
64
+ const web = new Spectrawl()
65
+ try {
66
+ console.log(`Searching: "${query}"...\n`)
67
+ const results = await web.search(query, { summarize: false })
68
+
69
+ if (results.answer) {
70
+ console.log('Answer:', results.answer, '\n')
71
+ }
72
+
73
+ for (const source of results.sources) {
74
+ console.log(` ${source.title}`)
75
+ console.log(` ${source.url}`)
76
+ console.log(` ${source.snippet?.slice(0, 150)}`)
77
+ console.log()
78
+ }
79
+
80
+ console.log(`${results.sources.length} results${results.cached ? ' (cached)' : ''}`)
81
+ } finally {
82
+ await web.close()
83
+ }
84
+ }
85
+
86
+ async function status() {
87
+ const web = new Spectrawl()
88
+ try {
89
+ const accounts = await web.status()
90
+
91
+ if (accounts.length === 0) {
92
+ console.log('No accounts configured. Run: spectrawl login <platform> --account @handle')
93
+ return
94
+ }
95
+
96
+ for (const acc of accounts) {
97
+ const icon = acc.status === 'valid' ? '✅' :
98
+ acc.status === 'expiring' ? '⚠️' : '❌'
99
+ const extra = acc.expiresAt ? ` (expires ${acc.expiresAt})` : ''
100
+ console.log(`${icon} ${acc.platform}/${acc.account} — ${acc.status}${extra}`)
101
+ }
102
+ } finally {
103
+ await web.close()
104
+ }
105
+ }
106
+
107
+ async function serve() {
108
+ // Start the HTTP server
109
+ require('./server')
110
+ }
111
+
112
+ async function mcp() {
113
+ // Start as MCP server (stdio transport)
114
+ const { MCPServer } = require('./mcp')
115
+ const server = new MCPServer()
116
+ server.start()
117
+ }
118
+
119
+ async function installStealth() {
120
+ const { install, isInstalled } = require('./browse/install-stealth')
121
+ if (isInstalled()) {
122
+ const { getCamoufoxPath } = require('./browse/install-stealth')
123
+ console.log(`Camoufox already installed at ${getCamoufoxPath()}`)
124
+ console.log('Spectrawl will use it automatically.')
125
+ return
126
+ }
127
+ await install()
128
+ }
129
+
130
+ function help() {
131
+ console.log(`
132
+ 🌐 Spectrawl — The unified web layer for AI agents.
133
+
134
+ Commands:
135
+ init Create spectrawl.json config
136
+ search "query" Search the web
137
+ status Check auth health for all accounts
138
+ serve [--port N] Start HTTP server
139
+ mcp Start MCP server (stdio)
140
+ install-stealth Download Camoufox anti-detect browser
141
+ version Show version
142
+
143
+ Examples:
144
+ spectrawl init
145
+ spectrawl search "best dental clinics in seoul"
146
+ spectrawl status
147
+ spectrawl serve --port 3900
148
+ spectrawl mcp
149
+ `)
150
+ }
151
+
152
+ function getFlag(flag) {
153
+ const idx = args.indexOf(flag)
154
+ return idx !== -1 ? args[idx + 1] : null
155
+ }
156
+
157
+ main().catch(err => {
158
+ console.error('Error:', err.message)
159
+ process.exit(1)
160
+ })
package/src/config.js ADDED
@@ -0,0 +1,65 @@
1
+ const fs = require('fs')
2
+ const path = require('path')
3
+
4
+ const DEFAULTS = {
5
+ port: 3900,
6
+ search: {
7
+ cascade: ['searxng', 'ddg', 'brave', 'serper'],
8
+ scrapeTop: 3,
9
+ searxng: { url: 'http://localhost:8888' },
10
+ llm: null // { provider, model, apiKey }
11
+ },
12
+ browse: {
13
+ defaultEngine: 'playwright',
14
+ proxy: null, // { type, host, port, username, password }
15
+ humanlike: {
16
+ minDelay: 500,
17
+ maxDelay: 2000,
18
+ scrollBehavior: true
19
+ }
20
+ },
21
+ auth: {
22
+ refreshInterval: '4h',
23
+ cookieStore: './data/cookies.db'
24
+ },
25
+ cache: {
26
+ path: './data/cache.db',
27
+ searchTtl: 3600, // 1 hour
28
+ scrapeTtl: 86400, // 24 hours
29
+ screenshotTtl: 3600 // 1 hour
30
+ },
31
+ rateLimit: {
32
+ x: { postsPerHour: 5, minDelayMs: 30000 },
33
+ reddit: { postsPerHour: 3, minDelayMs: 600000 }
34
+ },
35
+ concurrency: 3
36
+ }
37
+
38
+ function loadConfig(configPath) {
39
+ const filePath = configPath || path.join(process.cwd(), 'spectrawl.json')
40
+
41
+ let userConfig = {}
42
+ if (fs.existsSync(filePath)) {
43
+ try {
44
+ userConfig = JSON.parse(fs.readFileSync(filePath, 'utf8'))
45
+ } catch (e) {
46
+ console.warn(`Warning: Could not parse ${filePath}:`, e.message)
47
+ }
48
+ }
49
+
50
+ return deepMerge(DEFAULTS, userConfig)
51
+ }
52
+
53
+ function deepMerge(target, source) {
54
+ const result = { ...target }
55
+ for (const key of Object.keys(source)) {
56
+ if (source[key] && typeof source[key] === 'object' && !Array.isArray(source[key])) {
57
+ result[key] = deepMerge(target[key] || {}, source[key])
58
+ } else {
59
+ result[key] = source[key]
60
+ }
61
+ }
62
+ return result
63
+ }
64
+
65
+ module.exports = { loadConfig, DEFAULTS }
package/src/events.js ADDED
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Event system for Spectrawl.
3
+ * Proactive notifications to agents about auth state changes.
4
+ */
5
+
6
+ class EventEmitter {
7
+ constructor() {
8
+ this._handlers = {}
9
+ }
10
+
11
+ on(event, handler) {
12
+ if (!this._handlers[event]) this._handlers[event] = []
13
+ this._handlers[event].push(handler)
14
+ return this
15
+ }
16
+
17
+ off(event, handler) {
18
+ if (!this._handlers[event]) return
19
+ this._handlers[event] = this._handlers[event].filter(h => h !== handler)
20
+ return this
21
+ }
22
+
23
+ emit(event, data) {
24
+ const handlers = this._handlers[event] || []
25
+ for (const handler of handlers) {
26
+ try {
27
+ handler(data)
28
+ } catch (err) {
29
+ console.warn(`Event handler error for ${event}:`, err.message)
30
+ }
31
+ }
32
+
33
+ // Also emit to wildcard handlers
34
+ const wildcards = this._handlers['*'] || []
35
+ for (const handler of wildcards) {
36
+ try {
37
+ handler({ event, ...data })
38
+ } catch (err) {
39
+ console.warn(`Wildcard handler error:`, err.message)
40
+ }
41
+ }
42
+ }
43
+ }
44
+
45
+ // Standard events
46
+ const EVENTS = {
47
+ COOKIE_EXPIRING: 'cookie_expiring',
48
+ COOKIE_EXPIRED: 'cookie_expired',
49
+ AUTH_FAILED: 'auth_failed',
50
+ AUTH_REFRESHED: 'auth_refreshed',
51
+ RATE_LIMITED: 'rate_limited',
52
+ ACTION_FAILED: 'action_failed',
53
+ ACTION_SUCCESS: 'action_success',
54
+ HEALTH_CHECK: 'health_check'
55
+ }
56
+
57
+ module.exports = { EventEmitter, EVENTS }
package/src/index.js ADDED
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Spectrawl — The unified web layer for AI agents.
3
+ * Search, browse, authenticate, act.
4
+ */
5
+
6
+ const { SearchEngine } = require('./search')
7
+ const { BrowseEngine } = require('./browse')
8
+ const { AuthManager } = require('./auth')
9
+ const { ActEngine } = require('./act')
10
+ const { Cache } = require('./cache')
11
+ const { EventEmitter, EVENTS } = require('./events')
12
+ const { CookieRefresher } = require('./auth/refresh')
13
+ const { loadConfig } = require('./config')
14
+
15
+ class Spectrawl {
16
+ constructor(configPath) {
17
+ this.config = loadConfig(configPath)
18
+ this.events = new EventEmitter()
19
+ this.cache = new Cache(this.config.cache)
20
+ this.searchEngine = new SearchEngine(this.config.search, this.cache)
21
+ this.browseEngine = new BrowseEngine(this.config.browse, this.cache)
22
+ this.auth = new AuthManager(this.config.auth)
23
+ this.actEngine = new ActEngine(this.config, this.auth, this.browseEngine)
24
+ this.refresher = new CookieRefresher(this.auth, this.events, this.config.auth)
25
+ }
26
+
27
+ /**
28
+ * Search the web using free API cascade.
29
+ * @param {string} query - Search query
30
+ * @param {object} opts - { summarize, scrapeTop, engines }
31
+ * @returns {Promise<{answer?, sources[], cached}>}
32
+ */
33
+ async search(query, opts = {}) {
34
+ return this.searchEngine.search(query, opts)
35
+ }
36
+
37
+ /**
38
+ * Browse a URL with stealth and optional auth.
39
+ * @param {string} url - URL to browse
40
+ * @param {object} opts - { auth, screenshot, extract, stealth }
41
+ * @returns {Promise<{content, html, screenshot?, cookies?}>}
42
+ */
43
+ async browse(url, opts = {}) {
44
+ if (opts.auth) {
45
+ const cookies = await this.auth.getCookies(opts.auth)
46
+ opts._cookies = cookies
47
+ }
48
+ return this.browseEngine.browse(url, opts)
49
+ }
50
+
51
+ /**
52
+ * Perform an authenticated action on a platform.
53
+ * @param {string} platform - Platform name (x, reddit, devto, etc.)
54
+ * @param {string} action - Action name (post, comment, like, etc.)
55
+ * @param {object} params - Action parameters
56
+ * @returns {Promise<{success, data?, error?}>}
57
+ */
58
+ async act(platform, action, params = {}) {
59
+ const result = await this.actEngine.execute(platform, action, params)
60
+
61
+ if (result.success) {
62
+ this.events.emit(EVENTS.ACTION_SUCCESS, { platform, action, ...result })
63
+ } else {
64
+ this.events.emit(EVENTS.ACTION_FAILED, { platform, action, ...result })
65
+ }
66
+
67
+ return result
68
+ }
69
+
70
+ /**
71
+ * Register event handler.
72
+ * Events: cookie_expiring, cookie_expired, auth_failed, auth_refreshed,
73
+ * rate_limited, action_failed, action_success, health_check
74
+ * @param {string} event - Event name
75
+ * @param {function} handler - Event handler
76
+ */
77
+ on(event, handler) {
78
+ this.events.on(event, handler)
79
+ return this
80
+ }
81
+
82
+ /**
83
+ * Start the cookie refresh cron.
84
+ * Call this when running as a server to auto-monitor auth health.
85
+ */
86
+ startRefreshCron() {
87
+ this.refresher.start()
88
+ }
89
+
90
+ /**
91
+ * Get health status of all authenticated sessions.
92
+ * @returns {Promise<Array<{platform, account, status, expires?}>>}
93
+ */
94
+ async status() {
95
+ return this.auth.getStatus()
96
+ }
97
+
98
+ /**
99
+ * Shut down gracefully.
100
+ */
101
+ async close() {
102
+ this.refresher.stop()
103
+ await this.browseEngine.close()
104
+ await this.cache.close()
105
+ }
106
+ }
107
+
108
+ module.exports = { Spectrawl, EVENTS }