spectrawl 0.3.9 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -59
- package/package.json +1 -1
- package/src/act/adapters/devto.js +18 -3
- package/src/search/index.js +1 -1
- package/src/search/summarizer.js +3 -3
package/README.md
CHANGED
|
@@ -12,6 +12,40 @@ AI agents need to interact with the web. That means searching, browsing pages, l
|
|
|
12
12
|
npm install spectrawl
|
|
13
13
|
```
|
|
14
14
|
|
|
15
|
+
## Real Output
|
|
16
|
+
|
|
17
|
+
Here's actual output from Spectrawl vs Tavily on the same query:
|
|
18
|
+
|
|
19
|
+
**Query:** `"best open source AI agent frameworks 2025"`
|
|
20
|
+
|
|
21
|
+
### Spectrawl (free)
|
|
22
|
+
```
|
|
23
|
+
Time: 16.8s | Sources: 19
|
|
24
|
+
|
|
25
|
+
Answer: The leading open-source AI agent frameworks for 2025 include AutoGen,
|
|
26
|
+
CrewAI, LangChain, LangGraph, and Semantic Kernel [1, 2, 3]. AutoGen is
|
|
27
|
+
recognized for enabling complex multi-agent conversations, while CrewAI
|
|
28
|
+
focuses on orchestrating collaborative AI agents [1, 2]. LangChain and its
|
|
29
|
+
component LangGraph provide robust tools for building sophisticated agent
|
|
30
|
+
workflows and state management [1, 2, 3]. Semantic Kernel, developed by
|
|
31
|
+
Microsoft, integrates large language models with conventional programming
|
|
32
|
+
languages [1, 2, 3].
|
|
33
|
+
|
|
34
|
+
Other prominent frameworks include LlamaIndex, Haystack, BabyAGI, AgentGPT,
|
|
35
|
+
SuperAGI, MetaGPT, and Open Interpreter [1, 2].
|
|
36
|
+
```
|
|
37
|
+
**12 frameworks named, inline citations, 19 sources**
|
|
38
|
+
|
|
39
|
+
### Tavily ($0.01/query)
|
|
40
|
+
```
|
|
41
|
+
Time: 2s | Sources: 10
|
|
42
|
+
|
|
43
|
+
Answer: In 2025, LangGraph and Microsoft's AutoGen + Semantic Kernel are
|
|
44
|
+
top open-source AI agent frameworks, favored for their robust orchestration
|
|
45
|
+
and enterprise security features.
|
|
46
|
+
```
|
|
47
|
+
**3 frameworks named, no citations, 10 sources**
|
|
48
|
+
|
|
15
49
|
## Quick Start
|
|
16
50
|
|
|
17
51
|
```bash
|
|
@@ -23,32 +57,35 @@ export GEMINI_API_KEY=your-free-key # Get one at aistudio.google.com
|
|
|
23
57
|
const { Spectrawl } = require('spectrawl')
|
|
24
58
|
const web = new Spectrawl()
|
|
25
59
|
|
|
26
|
-
// Deep search — like Tavily but free
|
|
27
|
-
const result = await web.deepSearch('
|
|
28
|
-
console.log(result.answer) // AI-generated answer with citations
|
|
60
|
+
// Deep search — like Tavily but free, with better answers
|
|
61
|
+
const result = await web.deepSearch('how to build an MCP server in Node.js')
|
|
62
|
+
console.log(result.answer) // AI-generated answer with [1] [2] citations
|
|
29
63
|
console.log(result.sources) // [{ title, url, content, score }]
|
|
30
64
|
|
|
31
|
-
// Fast mode — snippets only,
|
|
65
|
+
// Fast mode — snippets only, skip scraping
|
|
32
66
|
const fast = await web.deepSearch('query', { mode: 'fast' })
|
|
33
67
|
|
|
34
68
|
// Basic search — raw results, no AI
|
|
35
69
|
const basic = await web.search('query')
|
|
36
70
|
```
|
|
37
71
|
|
|
38
|
-
|
|
72
|
+
## vs Tavily
|
|
39
73
|
|
|
40
74
|
| | Tavily | Spectrawl |
|
|
41
75
|
|---|---|---|
|
|
42
|
-
| Speed | ~2s | ~
|
|
43
|
-
|
|
|
44
|
-
|
|
|
45
|
-
|
|
|
76
|
+
| Speed | ~2s ✅ | ~7-17s |
|
|
77
|
+
| Answer quality | Generic (3 items) | **Detailed** (12+ items) ✅ |
|
|
78
|
+
| Inline citations | ❌ | **[1] [2] [3]** ✅ |
|
|
79
|
+
| Results per query | 10 | **12-19** ✅ |
|
|
46
80
|
| Cost | $0.01/query | **Free** ✅ |
|
|
47
81
|
| Self-hosted | No | **Yes** ✅ |
|
|
82
|
+
| Source ranking | No | **Domain trust scoring** ✅ |
|
|
48
83
|
| Stealth scraping | No | **Yes** ✅ |
|
|
49
84
|
| Auth + posting | No | **24 adapters** ✅ |
|
|
50
85
|
| Cached repeats | No | **<1ms** ✅ |
|
|
51
86
|
|
|
87
|
+
Spectrawl wins on answer quality, result volume, features, and cost. Tavily wins on speed.
|
|
88
|
+
|
|
52
89
|
## Search
|
|
53
90
|
|
|
54
91
|
Default cascade: **Gemini Grounded → Brave → DDG**
|
|
@@ -70,10 +107,10 @@ Gemini Grounded Search gives you Google-quality results through the Gemini API.
|
|
|
70
107
|
|
|
71
108
|
```
|
|
72
109
|
Query → Gemini Grounded + DDG (parallel)
|
|
73
|
-
→ Merge & deduplicate (12-
|
|
110
|
+
→ Merge & deduplicate (12-19 results)
|
|
74
111
|
→ Source quality ranking (boost GitHub/SO/Reddit, penalize SEO spam)
|
|
75
112
|
→ Parallel scraping (Jina → readability → Playwright fallback)
|
|
76
|
-
→ AI summarization with [1] [2] citations
|
|
113
|
+
→ AI summarization with [1] [2] citations (gemini-2.5-flash)
|
|
77
114
|
```
|
|
78
115
|
|
|
79
116
|
### What you get without any keys
|
|
@@ -92,56 +129,55 @@ Stealth browsing with anti-detection. Three tiers (auto-detected):
|
|
|
92
129
|
const page = await web.browse('https://example.com')
|
|
93
130
|
console.log(page.content) // extracted text/markdown
|
|
94
131
|
console.log(page.screenshot) // PNG buffer (if requested)
|
|
95
|
-
|
|
96
|
-
// With screenshot
|
|
97
|
-
const page = await web.browse('https://example.com', { screenshot: true })
|
|
98
132
|
```
|
|
99
133
|
|
|
100
134
|
Auto-fallback: if Jina and readability return too little content (<200 chars), Spectrawl renders the page with Playwright and extracts from the rendered DOM. Tavily can't do this — they fail on JS-heavy pages.
|
|
101
135
|
|
|
102
136
|
## Auth
|
|
103
137
|
|
|
104
|
-
Persistent cookie storage (SQLite), multi-account management, automatic
|
|
138
|
+
Persistent cookie storage (SQLite), multi-account management, automatic expiry detection.
|
|
105
139
|
|
|
106
140
|
```js
|
|
107
|
-
//
|
|
108
|
-
await web.auth.
|
|
141
|
+
// Add account
|
|
142
|
+
await web.auth.add('x', { account: '@myhandle', method: 'cookie', cookies })
|
|
109
143
|
|
|
110
144
|
// Check health
|
|
111
|
-
const accounts = await web.
|
|
145
|
+
const accounts = await web.auth.getStatus()
|
|
112
146
|
// [{ platform: 'x', account: '@myhandle', status: 'valid', expiresAt: '...' }]
|
|
113
147
|
```
|
|
114
148
|
|
|
149
|
+
Cookie refresh cron fires `cookie_expiring` and `cookie_expired` events before accounts go stale.
|
|
150
|
+
|
|
115
151
|
## Act — 24 Platform Adapters
|
|
116
152
|
|
|
117
|
-
Post to
|
|
153
|
+
Post to 24+ platforms with one API:
|
|
118
154
|
|
|
119
155
|
```js
|
|
120
|
-
await web.act('
|
|
156
|
+
await web.act('github', 'create-issue', { repo: 'user/repo', title: 'Bug report', body: '...' })
|
|
121
157
|
await web.act('reddit', 'post', { subreddit: 'node', title: '...', text: '...' })
|
|
122
|
-
await web.act('
|
|
158
|
+
await web.act('devto', 'post', { title: '...', body: '...', tags: ['ai'] })
|
|
159
|
+
await web.act('huggingface', 'create-repo', { name: 'my-model', type: 'model' })
|
|
123
160
|
```
|
|
124
161
|
|
|
162
|
+
**Live tested:** GitHub ✅, Reddit ✅, Dev.to ✅, HuggingFace ✅, X (reads) ✅
|
|
163
|
+
|
|
125
164
|
| Platform | Auth Method | Actions |
|
|
126
165
|
|----------|-------------|---------|
|
|
127
|
-
| X/Twitter |
|
|
128
|
-
| Reddit | Cookie API
|
|
129
|
-
| Dev.to | REST API | post |
|
|
166
|
+
| X/Twitter | Cookie + OAuth 1.0a | post |
|
|
167
|
+
| Reddit | Cookie API | post, comment, delete |
|
|
168
|
+
| Dev.to | REST API key | post, update |
|
|
130
169
|
| Hashnode | GraphQL API | post |
|
|
131
170
|
| LinkedIn | Cookie API (Voyager) | post |
|
|
132
|
-
| IndieHackers | Browser automation | post, comment
|
|
133
|
-
| Medium | REST API | post
|
|
171
|
+
| IndieHackers | Browser automation | post, comment |
|
|
172
|
+
| Medium | REST API | post |
|
|
134
173
|
| GitHub | REST v3 | repo, file, issue, release |
|
|
135
|
-
| Discord | Bot API
|
|
136
|
-
| Product Hunt | GraphQL v2 | launch, comment
|
|
137
|
-
| Hacker News | Cookie
|
|
138
|
-
| YouTube | Data API v3 | comment
|
|
139
|
-
| Quora | Browser automation | answer
|
|
174
|
+
| Discord | Bot API | send, thread |
|
|
175
|
+
| Product Hunt | GraphQL v2 | launch, comment |
|
|
176
|
+
| Hacker News | Cookie API | submit, comment |
|
|
177
|
+
| YouTube | Data API v3 | comment |
|
|
178
|
+
| Quora | Browser automation | answer |
|
|
140
179
|
| HuggingFace | Hub API | repo, model card, upload |
|
|
141
180
|
| BetaList | REST API | submit |
|
|
142
|
-
| AlternativeTo | Browser automation | submit |
|
|
143
|
-
| SaaSHub | Browser automation | submit |
|
|
144
|
-
| DevHunt | Browser automation | submit |
|
|
145
181
|
| **14 Directories** | Generic adapter | submit |
|
|
146
182
|
|
|
147
183
|
Built-in rate limiting, content dedup (MD5, 24h window), and dead letter queue for retries.
|
|
@@ -156,11 +192,9 @@ Spectrawl ranks results by domain trust — something Tavily doesn't do:
|
|
|
156
192
|
|
|
157
193
|
```js
|
|
158
194
|
const web = new Spectrawl({
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
block: ['spamsite.com']
|
|
163
|
-
}
|
|
195
|
+
sourceRanker: {
|
|
196
|
+
boost: ['github.com', 'news.ycombinator.com'],
|
|
197
|
+
block: ['spamsite.com']
|
|
164
198
|
}
|
|
165
199
|
})
|
|
166
200
|
```
|
|
@@ -174,14 +208,14 @@ npx spectrawl serve --port 3900
|
|
|
174
208
|
```
|
|
175
209
|
POST /search { "query": "...", "summarize": true }
|
|
176
210
|
POST /browse { "url": "...", "screenshot": true }
|
|
177
|
-
POST /act { "platform": "
|
|
178
|
-
GET /status
|
|
179
|
-
GET /health
|
|
211
|
+
POST /act { "platform": "github", "action": "create-issue", ... }
|
|
212
|
+
GET /status — auth account health
|
|
213
|
+
GET /health — server health
|
|
180
214
|
```
|
|
181
215
|
|
|
182
216
|
## MCP Server
|
|
183
217
|
|
|
184
|
-
Works with any MCP-compatible agent
|
|
218
|
+
Works with any MCP-compatible agent (Claude, Cursor, OpenClaw, LangChain):
|
|
185
219
|
|
|
186
220
|
```bash
|
|
187
221
|
npx spectrawl mcp
|
|
@@ -208,19 +242,12 @@ npx spectrawl install-stealth # download Camoufox browser
|
|
|
208
242
|
{
|
|
209
243
|
"search": {
|
|
210
244
|
"cascade": ["gemini-grounded", "brave", "ddg"],
|
|
211
|
-
"scrapeTop":
|
|
245
|
+
"scrapeTop": 5
|
|
212
246
|
},
|
|
213
247
|
"cache": {
|
|
214
248
|
"searchTtl": 3600,
|
|
215
249
|
"scrapeTtl": 86400
|
|
216
250
|
},
|
|
217
|
-
"proxy": {
|
|
218
|
-
"localPort": 8080,
|
|
219
|
-
"strategy": "round-robin",
|
|
220
|
-
"upstreams": [
|
|
221
|
-
{ "url": "http://user:pass@proxy.example.com:8080" }
|
|
222
|
-
]
|
|
223
|
-
},
|
|
224
251
|
"rateLimit": {
|
|
225
252
|
"x": { "postsPerHour": 3 },
|
|
226
253
|
"reddit": { "postsPerHour": 5 }
|
|
@@ -231,14 +258,14 @@ npx spectrawl install-stealth # download Camoufox browser
|
|
|
231
258
|
## Environment Variables
|
|
232
259
|
|
|
233
260
|
```
|
|
234
|
-
GEMINI_API_KEY
|
|
235
|
-
BRAVE_API_KEY Brave Search
|
|
236
|
-
SERPER_API_KEY Serper.dev
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
OPENAI_API_KEY
|
|
241
|
-
ANTHROPIC_API_KEY
|
|
261
|
+
GEMINI_API_KEY Free — primary search + summarization (aistudio.google.com)
|
|
262
|
+
BRAVE_API_KEY Brave Search (2,000 free/month)
|
|
263
|
+
SERPER_API_KEY Serper.dev (2,500 trial queries)
|
|
264
|
+
GITHUB_TOKEN For GitHub adapter
|
|
265
|
+
DEVTO_API_KEY For Dev.to adapter
|
|
266
|
+
HF_TOKEN For HuggingFace adapter
|
|
267
|
+
OPENAI_API_KEY Alternative LLM for summarization
|
|
268
|
+
ANTHROPIC_API_KEY Alternative LLM for summarization
|
|
242
269
|
```
|
|
243
270
|
|
|
244
271
|
## License
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "spectrawl",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.11",
|
|
4
4
|
"description": "The unified web layer for AI agents. Search (6 engines), stealth browse (Camoufox + Playwright), auth (cookies, multi-account), act (24 adapters, 30+ platforms), proxy rotation. Self-hosted, free.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"types": "index.d.ts",
|
|
@@ -82,16 +82,31 @@ function jsonRequest(method, url, body, headers) {
|
|
|
82
82
|
const urlObj = new URL(url)
|
|
83
83
|
const opts = {
|
|
84
84
|
hostname: urlObj.hostname,
|
|
85
|
-
path: urlObj.pathname,
|
|
85
|
+
path: urlObj.pathname + urlObj.search,
|
|
86
86
|
method,
|
|
87
|
-
headers: {
|
|
87
|
+
headers: {
|
|
88
|
+
...headers,
|
|
89
|
+
'Content-Length': Buffer.byteLength(body),
|
|
90
|
+
'User-Agent': 'Spectrawl/0.3',
|
|
91
|
+
'Accept': 'application/json'
|
|
92
|
+
}
|
|
88
93
|
}
|
|
89
94
|
const req = https.request(opts, res => {
|
|
95
|
+
// Handle redirects
|
|
96
|
+
if ([301, 302, 307, 308].includes(res.statusCode) && res.headers.location) {
|
|
97
|
+
return jsonRequest(method, res.headers.location, body, headers).then(resolve).catch(reject)
|
|
98
|
+
}
|
|
90
99
|
let data = ''
|
|
91
100
|
res.on('data', c => data += c)
|
|
92
101
|
res.on('end', () => {
|
|
102
|
+
if (!data && (res.statusCode >= 200 && res.statusCode < 300)) {
|
|
103
|
+
return resolve({ success: true, statusCode: res.statusCode })
|
|
104
|
+
}
|
|
105
|
+
if (res.statusCode >= 400) {
|
|
106
|
+
return reject(new Error(`Dev.to API ${res.statusCode}: ${data.slice(0, 200)}`))
|
|
107
|
+
}
|
|
93
108
|
try { resolve(JSON.parse(data)) }
|
|
94
|
-
catch (e) { reject(new Error(`Invalid Dev.to response: ${data.slice(0, 200)}`)) }
|
|
109
|
+
catch (e) { reject(new Error(`Invalid Dev.to response (${res.statusCode}): ${data.slice(0, 200)}`)) }
|
|
95
110
|
})
|
|
96
111
|
})
|
|
97
112
|
req.on('error', reject)
|
package/src/search/index.js
CHANGED
|
@@ -29,7 +29,7 @@ class SearchEngine {
|
|
|
29
29
|
this.config = config
|
|
30
30
|
this.cache = cache
|
|
31
31
|
this.cascade = config.cascade || ['ddg', 'brave', 'serper']
|
|
32
|
-
this.scrapeTop = config.scrapeTop ||
|
|
32
|
+
this.scrapeTop = config.scrapeTop || 5
|
|
33
33
|
this.summarizer = config.llm ? new Summarizer(config.llm) : null
|
|
34
34
|
|
|
35
35
|
// Gemini-powered features (free tier)
|
package/src/search/summarizer.js
CHANGED
|
@@ -39,8 +39,8 @@ class Summarizer {
|
|
|
39
39
|
if (!this.apiKey) return null
|
|
40
40
|
|
|
41
41
|
const context = sources
|
|
42
|
-
.slice(0,
|
|
43
|
-
.map((s, i) => `[${i + 1}] ${s.title}\n${s.url}\n${(s.fullContent || s.snippet || '').slice(0,
|
|
42
|
+
.slice(0, 8)
|
|
43
|
+
.map((s, i) => `[${i + 1}] ${s.title}\n${s.url}\n${(s.fullContent || s.snippet || '').slice(0, 1500)}`)
|
|
44
44
|
.join('\n\n')
|
|
45
45
|
|
|
46
46
|
const prompt = `Answer this question directly: "${query}"
|
|
@@ -131,7 +131,7 @@ Answer:`
|
|
|
131
131
|
const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${this.apiKey}`
|
|
132
132
|
const body = JSON.stringify({
|
|
133
133
|
contents: [{ parts: [{ text: prompt }] }],
|
|
134
|
-
generationConfig: { temperature: 0.3, maxOutputTokens:
|
|
134
|
+
generationConfig: { temperature: 0.3, maxOutputTokens: 2048 }
|
|
135
135
|
})
|
|
136
136
|
|
|
137
137
|
const data = await postJson(url, body, { 'Content-Type': 'application/json' })
|