spectrawl 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "spectrawl",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.4",
|
|
4
4
|
"description": "The unified web layer for AI agents. Search (6 engines), stealth browse (Camoufox + Playwright), auth (cookies, multi-account), act (24 adapters, 30+ platforms), proxy rotation. Self-hosted, free.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"types": "index.d.ts",
|
package/src/search/index.js
CHANGED
|
@@ -144,8 +144,8 @@ class SearchEngine {
|
|
|
144
144
|
// Step 3: Merge and deduplicate
|
|
145
145
|
let results = dedupeResults(resultSets.flat())
|
|
146
146
|
|
|
147
|
-
// Step 4: Rerank by relevance
|
|
148
|
-
if (this.reranker && opts.rerank !== false) {
|
|
147
|
+
// Step 4: Rerank by relevance (skip for Gemini Grounded — it already returns scored results)
|
|
148
|
+
if (this.reranker && opts.rerank !== false && !usesGrounded) {
|
|
149
149
|
results = await this.reranker.rerank(query, results)
|
|
150
150
|
}
|
|
151
151
|
|
|
@@ -167,7 +167,7 @@ class SearchEngine {
|
|
|
167
167
|
let answer = null
|
|
168
168
|
const summarizer = this.summarizer || (this.reranker ? new Summarizer({
|
|
169
169
|
provider: 'gemini',
|
|
170
|
-
model: 'gemini-2.
|
|
170
|
+
model: 'gemini-2.5-flash',
|
|
171
171
|
apiKey: process.env.GEMINI_API_KEY
|
|
172
172
|
}) : null)
|
|
173
173
|
|
|
@@ -177,12 +177,12 @@ class SearchEngine {
|
|
|
177
177
|
|
|
178
178
|
const response = {
|
|
179
179
|
answer,
|
|
180
|
-
sources: results.map(r => ({
|
|
180
|
+
sources: results.map((r, i) => ({
|
|
181
181
|
title: r.title,
|
|
182
182
|
url: r.url,
|
|
183
183
|
snippet: r.snippet,
|
|
184
184
|
content: r.fullContent?.slice(0, 2000) || r.snippet || '',
|
|
185
|
-
score: r.score ||
|
|
185
|
+
score: r.score || r.confidence || Math.max(0.5, 1 - (i * 0.05))
|
|
186
186
|
})),
|
|
187
187
|
queries, // show which queries were used
|
|
188
188
|
cached: false
|
|
@@ -8,7 +8,7 @@ const https = require('https')
|
|
|
8
8
|
class QueryExpander {
|
|
9
9
|
constructor(config = {}) {
|
|
10
10
|
this.provider = config.provider || 'gemini'
|
|
11
|
-
this.model = config.model || 'gemini-2.
|
|
11
|
+
this.model = config.model || 'gemini-2.5-flash'
|
|
12
12
|
this.apiKey = config.apiKey || process.env.GEMINI_API_KEY
|
|
13
13
|
this.variants = config.variants || 3
|
|
14
14
|
}
|
|
@@ -69,7 +69,7 @@ Example: ["alternative query 1", "alternative query 2", "alternative query 3"]`
|
|
|
69
69
|
|
|
70
70
|
async _call(prompt) {
|
|
71
71
|
if (this.provider === 'gemini') {
|
|
72
|
-
const model = this.model || 'gemini-2.
|
|
72
|
+
const model = this.model || 'gemini-2.5-flash'
|
|
73
73
|
const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${this.apiKey}`
|
|
74
74
|
const body = JSON.stringify({
|
|
75
75
|
contents: [{ parts: [{ text: prompt }] }],
|
package/src/search/reranker.js
CHANGED
|
@@ -8,7 +8,7 @@ const https = require('https')
|
|
|
8
8
|
class Reranker {
|
|
9
9
|
constructor(config = {}) {
|
|
10
10
|
this.provider = config.provider || 'gemini'
|
|
11
|
-
this.model = config.model || 'gemini-2.
|
|
11
|
+
this.model = config.model || 'gemini-2.5-flash'
|
|
12
12
|
this.apiKey = config.apiKey || process.env.GEMINI_API_KEY
|
|
13
13
|
}
|
|
14
14
|
|
|
@@ -55,7 +55,7 @@ No explanation, just the array.`
|
|
|
55
55
|
|
|
56
56
|
async _call(prompt) {
|
|
57
57
|
if (this.provider === 'gemini') {
|
|
58
|
-
const model = this.model || 'gemini-2.
|
|
58
|
+
const model = this.model || 'gemini-2.5-flash'
|
|
59
59
|
const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${this.apiKey}`
|
|
60
60
|
const body = JSON.stringify({
|
|
61
61
|
contents: [{ parts: [{ text: prompt }] }],
|
package/src/search/scraper.js
CHANGED
|
@@ -35,13 +35,13 @@ async function scrapeUrls(urls, opts = {}) {
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
async function scrapeUrl(url, opts = {}) {
|
|
38
|
-
const { timeout = 10000, engine = 'auto' } = opts
|
|
38
|
+
const { timeout = 10000, engine = 'auto', browse } = opts
|
|
39
39
|
|
|
40
40
|
// Try Jina first if available (better markdown output)
|
|
41
41
|
if (engine === 'jina' || engine === 'auto') {
|
|
42
42
|
try {
|
|
43
43
|
const result = await jinaExtract(url)
|
|
44
|
-
if (result.content && result.content.length >
|
|
44
|
+
if (result.content && result.content.length > 200) {
|
|
45
45
|
return result.content
|
|
46
46
|
}
|
|
47
47
|
} catch (e) {
|
|
@@ -49,9 +49,44 @@ async function scrapeUrl(url, opts = {}) {
|
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
-
// Readability fallback
|
|
53
|
-
|
|
54
|
-
|
|
52
|
+
// Readability fallback (HTTP fetch + HTML→markdown)
|
|
53
|
+
try {
|
|
54
|
+
const html = await fetchPage(url, timeout)
|
|
55
|
+
const content = extractMarkdown(html)
|
|
56
|
+
if (content && content.length > 200) {
|
|
57
|
+
return content
|
|
58
|
+
}
|
|
59
|
+
} catch (e) {
|
|
60
|
+
// Fall through to browser
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Browser fallback for JS-rendered pages or when extraction is too short
|
|
64
|
+
// This is where we beat Tavily — they can't render JS pages
|
|
65
|
+
if (browse !== false) {
|
|
66
|
+
try {
|
|
67
|
+
const { BrowseEngine } = require('../browse')
|
|
68
|
+
const browser = new BrowseEngine()
|
|
69
|
+
const result = await browser.browse(url, {
|
|
70
|
+
timeout,
|
|
71
|
+
extractText: true,
|
|
72
|
+
screenshot: false
|
|
73
|
+
})
|
|
74
|
+
await browser.close()
|
|
75
|
+
if (result.text && result.text.length > 200) {
|
|
76
|
+
return result.text
|
|
77
|
+
}
|
|
78
|
+
} catch (e) {
|
|
79
|
+
// All methods exhausted
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Return whatever we got, even if short
|
|
84
|
+
try {
|
|
85
|
+
const html = await fetchPage(url, timeout)
|
|
86
|
+
return extractMarkdown(html)
|
|
87
|
+
} catch (e) {
|
|
88
|
+
return ''
|
|
89
|
+
}
|
|
55
90
|
}
|
|
56
91
|
|
|
57
92
|
function fetchPage(url, timeout = 10000, redirects = 3) {
|
package/src/search/summarizer.js
CHANGED
|
@@ -110,7 +110,7 @@ Answer:`
|
|
|
110
110
|
}
|
|
111
111
|
|
|
112
112
|
async _gemini(prompt) {
|
|
113
|
-
const model = this.model || 'gemini-2.
|
|
113
|
+
const model = this.model || 'gemini-2.5-flash'
|
|
114
114
|
const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${this.apiKey}`
|
|
115
115
|
const body = JSON.stringify({
|
|
116
116
|
contents: [{ parts: [{ text: prompt }] }],
|