spectrawl 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "spectrawl",
3
- "version": "0.4.2",
3
+ "version": "0.4.3",
4
4
  "description": "The unified web layer for AI agents. Search (8 engines), stealth browse, auth, act on 24 platforms. Self-hosted.",
5
5
  "main": "src/index.js",
6
6
  "types": "index.d.ts",
@@ -127,12 +127,21 @@ class BrowseEngine {
127
127
 
128
128
  await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 })
129
129
 
130
- // Human-like delays
131
- await page.waitForTimeout(800 + Math.random() * 1500)
132
- await page.evaluate(() => {
133
- window.scrollBy({ top: Math.floor(Math.random() * 400) + 100, behavior: 'smooth' })
134
- })
135
- await page.waitForTimeout(300 + Math.random() * 700)
130
+ if (opts.fastMode) {
131
+ // Crawl mode: minimal delays, just enough for lazy-load triggers
132
+ await page.waitForTimeout(400)
133
+ await page.evaluate(() => {
134
+ window.scrollBy({ top: 500, behavior: 'instant' })
135
+ })
136
+ await page.waitForTimeout(200)
137
+ } else {
138
+ // Normal browse: full human-like delays
139
+ await page.waitForTimeout(800 + Math.random() * 1500)
140
+ await page.evaluate(() => {
141
+ window.scrollBy({ top: Math.floor(Math.random() * 400) + 100, behavior: 'smooth' })
142
+ })
143
+ await page.waitForTimeout(300 + Math.random() * 700)
144
+ }
136
145
 
137
146
  const result = {}
138
147
 
package/src/crawl.js CHANGED
@@ -260,8 +260,10 @@ class CrawlEngine {
260
260
  const totalMB = Math.floor(os.totalmem() / 1024 / 1024)
261
261
  const freeMB = Math.floor(os.freemem() / 1024 / 1024)
262
262
  const concurrency = detectConcurrency()
263
- // Estimate: each page takes ~4s with stealth delays
264
- const pagesPerMinute = concurrency * 15 // ~4s per page
263
+ // Realistic: ~0.8s per page with fast mode, limited by shared browser pipeline
264
+ // Concurrency helps but not linearly shared browser bottleneck
265
+ const effectiveConcurrency = Math.min(concurrency, 5) // diminishing returns past 5
266
+ const pagesPerMinute = Math.floor(effectiveConcurrency * 30) // ~2s effective per page with overhead
265
267
  return {
266
268
  totalRamMB: totalMB,
267
269
  freeRamMB: freeMB,
@@ -279,7 +281,8 @@ class CrawlEngine {
279
281
  _cookies: cookies,
280
282
  timeout: config.timeout,
281
283
  html: true,
282
- noCache: true
284
+ noCache: true,
285
+ fastMode: true // crawl mode: reduced delays for speed
283
286
  })
284
287
  if (result?.content) {
285
288
  const linkSource = result.html || result.content