imperium-crawl 2.3.1 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +146 -11
- package/dist/cli-explore.d.ts +30 -0
- package/dist/cli-explore.d.ts.map +1 -0
- package/dist/cli-explore.js +427 -0
- package/dist/cli-explore.js.map +1 -0
- package/dist/cli-recorder.d.ts +44 -0
- package/dist/cli-recorder.d.ts.map +1 -0
- package/dist/cli-recorder.js +67 -0
- package/dist/cli-recorder.js.map +1 -0
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +51 -3
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +3 -0
- package/dist/config.js.map +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +31 -1
- package/dist/constants.js.map +1 -1
- package/dist/flows/engine.d.ts +7 -0
- package/dist/flows/engine.d.ts.map +1 -0
- package/dist/flows/engine.js +183 -0
- package/dist/flows/engine.js.map +1 -0
- package/dist/flows/index.d.ts +6 -0
- package/dist/flows/index.d.ts.map +1 -0
- package/dist/flows/index.js +6 -0
- package/dist/flows/index.js.map +1 -0
- package/dist/flows/server.d.ts +11 -0
- package/dist/flows/server.d.ts.map +1 -0
- package/dist/flows/server.js +81 -0
- package/dist/flows/server.js.map +1 -0
- package/dist/flows/smart-target.d.ts +9 -0
- package/dist/flows/smart-target.d.ts.map +1 -0
- package/dist/flows/smart-target.js +84 -0
- package/dist/flows/smart-target.js.map +1 -0
- package/dist/flows/storage.d.ts +26 -0
- package/dist/flows/storage.d.ts.map +1 -0
- package/dist/flows/storage.js +118 -0
- package/dist/flows/storage.js.map +1 -0
- package/dist/flows/templates.d.ts +4 -0
- package/dist/flows/templates.d.ts.map +1 -0
- package/dist/flows/templates.js +35 -0
- package/dist/flows/templates.js.map +1 -0
- package/dist/flows/types.d.ts +3356 -0
- package/dist/flows/types.d.ts.map +1 -0
- package/dist/flows/types.js +133 -0
- package/dist/flows/types.js.map +1 -0
- package/dist/knowledge/index.d.ts +1 -0
- package/dist/knowledge/index.d.ts.map +1 -1
- package/dist/knowledge/index.js +1 -0
- package/dist/knowledge/index.js.map +1 -1
- package/dist/knowledge/record-browser.d.ts +17 -0
- package/dist/knowledge/record-browser.d.ts.map +1 -0
- package/dist/knowledge/record-browser.js +29 -0
- package/dist/knowledge/record-browser.js.map +1 -0
- package/dist/knowledge/store.d.ts +19 -0
- package/dist/knowledge/store.d.ts.map +1 -1
- package/dist/knowledge/store.js +63 -4
- package/dist/knowledge/store.js.map +1 -1
- package/dist/llm/retry.d.ts +4 -2
- package/dist/llm/retry.d.ts.map +1 -1
- package/dist/llm/retry.js +15 -4
- package/dist/llm/retry.js.map +1 -1
- package/dist/sessions/browser-connect.d.ts +30 -0
- package/dist/sessions/browser-connect.d.ts.map +1 -0
- package/dist/sessions/browser-connect.js +68 -0
- package/dist/sessions/browser-connect.js.map +1 -0
- package/dist/sessions/browser-state.d.ts +35 -0
- package/dist/sessions/browser-state.d.ts.map +1 -0
- package/dist/sessions/browser-state.js +74 -0
- package/dist/sessions/browser-state.js.map +1 -0
- package/dist/sessions/index.d.ts +1 -1
- package/dist/sessions/index.d.ts.map +1 -1
- package/dist/sessions/index.js +1 -1
- package/dist/sessions/index.js.map +1 -1
- package/dist/sessions/inject-cookies.d.ts +20 -0
- package/dist/sessions/inject-cookies.d.ts.map +1 -0
- package/dist/sessions/inject-cookies.js +57 -0
- package/dist/sessions/inject-cookies.js.map +1 -0
- package/dist/sessions/manager.d.ts +31 -1
- package/dist/sessions/manager.d.ts.map +1 -1
- package/dist/sessions/manager.js +97 -6
- package/dist/sessions/manager.js.map +1 -1
- package/dist/sessions/types.d.ts +2 -0
- package/dist/sessions/types.d.ts.map +1 -1
- package/dist/skills/chain.d.ts +61 -0
- package/dist/skills/chain.d.ts.map +1 -0
- package/dist/skills/chain.js +182 -0
- package/dist/skills/chain.js.map +1 -0
- package/dist/skills/conditions.d.ts +14 -0
- package/dist/skills/conditions.d.ts.map +1 -0
- package/dist/skills/conditions.js +208 -0
- package/dist/skills/conditions.js.map +1 -0
- package/dist/skills/manager.d.ts +47 -2
- package/dist/skills/manager.d.ts.map +1 -1
- package/dist/skills/manager.js.map +1 -1
- package/dist/skills/parameters.d.ts +49 -0
- package/dist/skills/parameters.d.ts.map +1 -0
- package/dist/skills/parameters.js +157 -0
- package/dist/skills/parameters.js.map +1 -0
- package/dist/snapshot/store.d.ts +8 -0
- package/dist/snapshot/store.d.ts.map +1 -1
- package/dist/snapshot/store.js +48 -0
- package/dist/snapshot/store.js.map +1 -1
- package/dist/stealth/antibot-detector.d.ts +1 -1
- package/dist/stealth/antibot-detector.d.ts.map +1 -1
- package/dist/stealth/antibot-detector.js +56 -0
- package/dist/stealth/antibot-detector.js.map +1 -1
- package/dist/stealth/browser-image-extract.d.ts +43 -0
- package/dist/stealth/browser-image-extract.d.ts.map +1 -0
- package/dist/stealth/browser-image-extract.js +268 -0
- package/dist/stealth/browser-image-extract.js.map +1 -0
- package/dist/stealth/browser.d.ts +5 -0
- package/dist/stealth/browser.d.ts.map +1 -1
- package/dist/stealth/browser.js +82 -1
- package/dist/stealth/browser.js.map +1 -1
- package/dist/stealth/chrome-profile.d.ts +1 -0
- package/dist/stealth/chrome-profile.d.ts.map +1 -1
- package/dist/stealth/chrome-profile.js +28 -5
- package/dist/stealth/chrome-profile.js.map +1 -1
- package/dist/stealth/detector.d.ts +10 -1
- package/dist/stealth/detector.d.ts.map +1 -1
- package/dist/stealth/detector.js +117 -25
- package/dist/stealth/detector.js.map +1 -1
- package/dist/stealth/headers.d.ts +1 -1
- package/dist/stealth/headers.d.ts.map +1 -1
- package/dist/stealth/headers.js +94 -2
- package/dist/stealth/headers.js.map +1 -1
- package/dist/stealth/index.d.ts +5 -0
- package/dist/stealth/index.d.ts.map +1 -1
- package/dist/stealth/index.js +257 -27
- package/dist/stealth/index.js.map +1 -1
- package/dist/stealth/proxy.d.ts +40 -1
- package/dist/stealth/proxy.d.ts.map +1 -1
- package/dist/stealth/proxy.js +90 -6
- package/dist/stealth/proxy.js.map +1 -1
- package/dist/tools/action-executor.d.ts +66 -0
- package/dist/tools/action-executor.d.ts.map +1 -0
- package/dist/tools/action-executor.js +403 -0
- package/dist/tools/action-executor.js.map +1 -0
- package/dist/tools/batch-download.d.ts +33 -0
- package/dist/tools/batch-download.d.ts.map +1 -0
- package/dist/tools/batch-download.js +208 -0
- package/dist/tools/batch-download.js.map +1 -0
- package/dist/tools/batch-scrape.d.ts +2 -2
- package/dist/tools/browser.d.ts +100 -0
- package/dist/tools/browser.d.ts.map +1 -0
- package/dist/tools/browser.js +448 -0
- package/dist/tools/browser.js.map +1 -0
- package/dist/tools/crawl.d.ts +2 -2
- package/dist/tools/create-skill.d.ts +2 -2
- package/dist/tools/discover-apis.d.ts +1 -1
- package/dist/tools/discover-apis.d.ts.map +1 -1
- package/dist/tools/discover-apis.js +3 -0
- package/dist/tools/discover-apis.js.map +1 -1
- package/dist/tools/download.d.ts +39 -6
- package/dist/tools/download.d.ts.map +1 -1
- package/dist/tools/download.js +248 -44
- package/dist/tools/download.js.map +1 -1
- package/dist/tools/extract.d.ts +1 -1
- package/dist/tools/image-search.d.ts +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +26 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/inspect-flow.d.ts +24 -0
- package/dist/tools/inspect-flow.d.ts.map +1 -0
- package/dist/tools/inspect-flow.js +23 -0
- package/dist/tools/inspect-flow.js.map +1 -0
- package/dist/tools/instagram.d.ts +2 -2
- package/dist/tools/interact.d.ts +91 -50
- package/dist/tools/interact.d.ts.map +1 -1
- package/dist/tools/interact.js +80 -299
- package/dist/tools/interact.js.map +1 -1
- package/dist/tools/knowledge.d.ts +24 -0
- package/dist/tools/knowledge.d.ts.map +1 -0
- package/dist/tools/knowledge.js +99 -0
- package/dist/tools/knowledge.js.map +1 -0
- package/dist/tools/list-flows.d.ts +21 -0
- package/dist/tools/list-flows.d.ts.map +1 -0
- package/dist/tools/list-flows.js +18 -0
- package/dist/tools/list-flows.js.map +1 -0
- package/dist/tools/list-skills.js +1 -1
- package/dist/tools/list-skills.js.map +1 -1
- package/dist/tools/manifest.d.ts.map +1 -1
- package/dist/tools/manifest.js +48 -0
- package/dist/tools/manifest.js.map +1 -1
- package/dist/tools/monitor-websocket.d.ts +1 -1
- package/dist/tools/monitor.d.ts +46 -0
- package/dist/tools/monitor.d.ts.map +1 -0
- package/dist/tools/monitor.js +213 -0
- package/dist/tools/monitor.js.map +1 -0
- package/dist/tools/news-search.d.ts +1 -1
- package/dist/tools/pdf-extract.d.ts +38 -0
- package/dist/tools/pdf-extract.d.ts.map +1 -0
- package/dist/tools/pdf-extract.js +244 -0
- package/dist/tools/pdf-extract.js.map +1 -0
- package/dist/tools/query-api.d.ts +6 -6
- package/dist/tools/readability.d.ts +2 -2
- package/dist/tools/record-flow.d.ts +39 -0
- package/dist/tools/record-flow.d.ts.map +1 -0
- package/dist/tools/record-flow.js +406 -0
- package/dist/tools/record-flow.js.map +1 -0
- package/dist/tools/reddit.d.ts +4 -4
- package/dist/tools/run-flow.d.ts +54 -0
- package/dist/tools/run-flow.d.ts.map +1 -0
- package/dist/tools/run-flow.js +47 -0
- package/dist/tools/run-flow.js.map +1 -0
- package/dist/tools/run-skill.d.ts +14 -4
- package/dist/tools/run-skill.d.ts.map +1 -1
- package/dist/tools/run-skill.js +74 -0
- package/dist/tools/run-skill.js.map +1 -1
- package/dist/tools/scrape.d.ts +9 -6
- package/dist/tools/scrape.d.ts.map +1 -1
- package/dist/tools/scrape.js +19 -1
- package/dist/tools/scrape.js.map +1 -1
- package/dist/tools/screenshot.d.ts.map +1 -1
- package/dist/tools/screenshot.js +6 -0
- package/dist/tools/screenshot.js.map +1 -1
- package/dist/tools/search.d.ts +1 -1
- package/dist/tools/serve-flow.d.ts +36 -0
- package/dist/tools/serve-flow.d.ts.map +1 -0
- package/dist/tools/serve-flow.js +42 -0
- package/dist/tools/serve-flow.js.map +1 -0
- package/dist/tools/snapshot.d.ts +5 -5
- package/dist/tools/snapshot.d.ts.map +1 -1
- package/dist/tools/snapshot.js +3 -0
- package/dist/tools/snapshot.js.map +1 -1
- package/dist/tools/validate-flow.d.ts +24 -0
- package/dist/tools/validate-flow.d.ts.map +1 -0
- package/dist/tools/validate-flow.js +23 -0
- package/dist/tools/validate-flow.js.map +1 -0
- package/dist/tools/video-search.d.ts +1 -1
- package/dist/tools/watch.d.ts +68 -0
- package/dist/tools/watch.d.ts.map +1 -0
- package/dist/tools/watch.js +224 -0
- package/dist/tools/watch.js.map +1 -0
- package/dist/tools/youtube.d.ts +2 -2
- package/dist/utils/fetcher.d.ts +13 -4
- package/dist/utils/fetcher.d.ts.map +1 -1
- package/dist/utils/fetcher.js +153 -23
- package/dist/utils/fetcher.js.map +1 -1
- package/package.json +19 -5
package/dist/utils/fetcher.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { smartFetch, StealthError } from "../stealth/index.js";
|
|
2
2
|
import { isAllowed } from "./robots.js";
|
|
3
3
|
import { getDomain } from "./url.js";
|
|
4
|
-
import { DEFAULT_CONCURRENCY } from "../constants.js";
|
|
4
|
+
import { DEFAULT_CONCURRENCY, DEFAULT_TIMEOUT_MS, MAX_TIMEOUT_MS } from "../constants.js";
|
|
5
|
+
import { getKnowledgeEngine } from "../knowledge/index.js";
|
|
5
6
|
// ── Concurrency Limiter ──
|
|
6
7
|
export class ConcurrencyLimiter {
|
|
7
8
|
maxConcurrent;
|
|
@@ -30,22 +31,39 @@ export const defaultLimiter = new ConcurrencyLimiter();
|
|
|
30
31
|
const CIRCUIT_FAILURE_THRESHOLD = 5;
|
|
31
32
|
const CIRCUIT_OPEN_DURATION_MS = 60_000;
|
|
32
33
|
const CIRCUIT_PROBE_SUCCESSES = 3;
|
|
34
|
+
// Domain-level circuit: higher threshold — only opens when multiple endpoints fail
|
|
35
|
+
const DOMAIN_CIRCUIT_FAILURE_THRESHOLD = 10;
|
|
33
36
|
const circuits = new Map();
|
|
34
37
|
// Periodic cleanup: remove closed circuits idle for >1 hour
|
|
35
38
|
const CIRCUIT_STALE_MS = 3_600_000;
|
|
36
39
|
setInterval(() => {
|
|
37
40
|
const now = Date.now();
|
|
38
|
-
for (const [
|
|
41
|
+
for (const [key, circuit] of circuits) {
|
|
39
42
|
if (now - circuit.lastAccessed > CIRCUIT_STALE_MS) {
|
|
40
|
-
circuits.delete(
|
|
43
|
+
circuits.delete(key);
|
|
41
44
|
}
|
|
42
45
|
}
|
|
43
46
|
}, 300_000).unref();
|
|
44
|
-
|
|
45
|
-
|
|
47
|
+
/**
|
|
48
|
+
* Get circuit breaker key for a URL.
|
|
49
|
+
* Uses domain + first 2 path segments for endpoint-level granularity.
|
|
50
|
+
*/
|
|
51
|
+
function getCircuitKey(url) {
|
|
52
|
+
try {
|
|
53
|
+
const parsed = new URL(url);
|
|
54
|
+
const pathParts = parsed.pathname.split("/").filter(Boolean);
|
|
55
|
+
const pathPrefix = pathParts.slice(0, 2).join("/");
|
|
56
|
+
return pathPrefix ? `${parsed.hostname}/${pathPrefix}` : parsed.hostname;
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
return getDomain(url);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
function getCircuit(key) {
|
|
63
|
+
let circuit = circuits.get(key);
|
|
46
64
|
if (!circuit) {
|
|
47
65
|
circuit = { state: "closed", failures: 0, openedAt: 0, probeSuccesses: 0, lastAccessed: Date.now() };
|
|
48
|
-
circuits.set(
|
|
66
|
+
circuits.set(key, circuit);
|
|
49
67
|
}
|
|
50
68
|
circuit.lastAccessed = Date.now();
|
|
51
69
|
// Check if open circuit should transition to half-open
|
|
@@ -55,8 +73,8 @@ function getCircuit(domain) {
|
|
|
55
73
|
}
|
|
56
74
|
return circuit;
|
|
57
75
|
}
|
|
58
|
-
function recordSuccess(
|
|
59
|
-
const circuit = getCircuit(
|
|
76
|
+
function recordSuccess(key) {
|
|
77
|
+
const circuit = getCircuit(key);
|
|
60
78
|
if (circuit.state === "half-open") {
|
|
61
79
|
circuit.probeSuccesses++;
|
|
62
80
|
if (circuit.probeSuccesses >= CIRCUIT_PROBE_SUCCESSES) {
|
|
@@ -69,8 +87,8 @@ function recordSuccess(domain) {
|
|
|
69
87
|
circuit.failures = 0;
|
|
70
88
|
}
|
|
71
89
|
}
|
|
72
|
-
function recordFailure(
|
|
73
|
-
const circuit = getCircuit(
|
|
90
|
+
function recordFailure(key) {
|
|
91
|
+
const circuit = getCircuit(key);
|
|
74
92
|
// Half-open probe failed → immediately reopen circuit
|
|
75
93
|
if (circuit.state === "half-open") {
|
|
76
94
|
circuit.state = "open";
|
|
@@ -84,14 +102,83 @@ function recordFailure(domain) {
|
|
|
84
102
|
circuit.openedAt = Date.now();
|
|
85
103
|
}
|
|
86
104
|
}
|
|
105
|
+
/**
|
|
106
|
+
* Check domain-level circuit: opens when 3+ endpoint circuits are open for this domain.
|
|
107
|
+
*/
|
|
108
|
+
function isDomainCircuitOpen(domain) {
|
|
109
|
+
let openEndpoints = 0;
|
|
110
|
+
for (const [key, circuit] of circuits) {
|
|
111
|
+
if (key.startsWith(domain) && circuit.state === "open") {
|
|
112
|
+
openEndpoints++;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return openEndpoints >= 3;
|
|
116
|
+
}
|
|
87
117
|
// Exported for testing
|
|
88
|
-
export { circuits, getCircuit, recordSuccess, recordFailure, CIRCUIT_FAILURE_THRESHOLD, CIRCUIT_OPEN_DURATION_MS, CIRCUIT_PROBE_SUCCESSES, CIRCUIT_STALE_MS };
|
|
118
|
+
export { circuits, getCircuit, getCircuitKey, recordSuccess, recordFailure, isDomainCircuitOpen, CIRCUIT_FAILURE_THRESHOLD, CIRCUIT_OPEN_DURATION_MS, CIRCUIT_PROBE_SUCCESSES, CIRCUIT_STALE_MS };
|
|
89
119
|
// ── Exponential Backoff with Full Jitter (AWS pattern) ──
|
|
90
120
|
const BACKOFF_BASE_MS = 1000;
|
|
91
121
|
const BACKOFF_CAP_MS = 30_000;
|
|
92
|
-
|
|
122
|
+
const RATE_LIMIT_EXTRA_JITTER_MS = 10_000; // Extra jitter for 429 responses
|
|
123
|
+
function fullJitterBackoff(attempt, is429 = false) {
|
|
93
124
|
const expDelay = Math.min(BACKOFF_CAP_MS, BACKOFF_BASE_MS * Math.pow(2, attempt));
|
|
94
|
-
|
|
125
|
+
const baseJitter = Math.random() * expDelay;
|
|
126
|
+
// On 429, add extra random jitter to avoid thundering herd
|
|
127
|
+
if (is429) {
|
|
128
|
+
return baseJitter + 5000 + Math.random() * RATE_LIMIT_EXTRA_JITTER_MS;
|
|
129
|
+
}
|
|
130
|
+
return baseJitter;
|
|
131
|
+
}
|
|
132
|
+
// ── Per-Domain Rate Limiter ──
|
|
133
|
+
const DEFAULT_DOMAIN_RATE_MS = parseInt(process.env.DOMAIN_RATE_LIMIT_MS || "500", 10);
|
|
134
|
+
class DomainThrottle {
|
|
135
|
+
lastRequest = new Map();
|
|
136
|
+
defaultDelay;
|
|
137
|
+
constructor(defaultDelayMs = DEFAULT_DOMAIN_RATE_MS) {
|
|
138
|
+
this.defaultDelay = defaultDelayMs;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Wait until enough time has passed since the last request to this domain.
|
|
142
|
+
* Uses knowledge engine's safe_rate_limit if available, else default delay.
|
|
143
|
+
*/
|
|
144
|
+
async throttle(domain, knowledgeDelayMs) {
|
|
145
|
+
const delay = knowledgeDelayMs ?? this.defaultDelay;
|
|
146
|
+
if (delay <= 0)
|
|
147
|
+
return;
|
|
148
|
+
const now = Date.now();
|
|
149
|
+
const last = this.lastRequest.get(domain) ?? 0;
|
|
150
|
+
const elapsed = now - last;
|
|
151
|
+
if (elapsed < delay) {
|
|
152
|
+
await new Promise((r) => setTimeout(r, delay - elapsed));
|
|
153
|
+
}
|
|
154
|
+
this.lastRequest.set(domain, Date.now());
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
const domainThrottle = new DomainThrottle();
|
|
158
|
+
/**
|
|
159
|
+
* Compute adaptive timeout based on knowledge engine data.
|
|
160
|
+
* Uses avg_response_time * 3 with a floor of DEFAULT_TIMEOUT_MS and ceiling of MAX_TIMEOUT_MS.
|
|
161
|
+
*/
|
|
162
|
+
function computeAdaptiveTimeout(avgResponseTimeMs) {
|
|
163
|
+
if (!avgResponseTimeMs || avgResponseTimeMs <= 0)
|
|
164
|
+
return DEFAULT_TIMEOUT_MS;
|
|
165
|
+
return Math.min(MAX_TIMEOUT_MS, Math.max(DEFAULT_TIMEOUT_MS, avgResponseTimeMs * 3));
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Determine the escalated stealth level for a retry attempt.
|
|
169
|
+
* attempt 0: user's level, attempt 1: level+1, attempt 2: L3
|
|
170
|
+
*/
|
|
171
|
+
function getEscalatedLevel(baseLevel, attempt, lastError) {
|
|
172
|
+
// If last failure was a StealthError with detected anti-bot, jump to L3
|
|
173
|
+
if (lastError instanceof StealthError && lastError.antiBotSystem) {
|
|
174
|
+
return 3;
|
|
175
|
+
}
|
|
176
|
+
const base = baseLevel || 1;
|
|
177
|
+
if (attempt === 0)
|
|
178
|
+
return base;
|
|
179
|
+
if (attempt === 1)
|
|
180
|
+
return Math.min(base + 1, 3);
|
|
181
|
+
return 3; // attempt >= 2 → always L3
|
|
95
182
|
}
|
|
96
183
|
export async function fetchPage(url, options) {
|
|
97
184
|
const respectRobots = options?.respectRobots ?? (process.env.RESPECT_ROBOTS !== "false");
|
|
@@ -101,34 +188,77 @@ export async function fetchPage(url, options) {
|
|
|
101
188
|
throw new Error(`URL blocked by robots.txt: ${url}`);
|
|
102
189
|
}
|
|
103
190
|
}
|
|
104
|
-
//
|
|
191
|
+
// Per-endpoint circuit breaker check
|
|
105
192
|
const domain = getDomain(url);
|
|
106
|
-
const
|
|
193
|
+
const circuitKey = getCircuitKey(url);
|
|
194
|
+
const circuit = getCircuit(circuitKey);
|
|
107
195
|
if (circuit.state === "open") {
|
|
108
|
-
throw new Error(`Circuit breaker open for ${
|
|
196
|
+
throw new Error(`Circuit breaker open for endpoint ${circuitKey} — too many consecutive failures. Retry after cooldown.`);
|
|
197
|
+
}
|
|
198
|
+
// Domain-level circuit check (opens when 3+ endpoints are broken)
|
|
199
|
+
if (isDomainCircuitOpen(domain)) {
|
|
200
|
+
throw new Error(`Circuit breaker open for domain ${domain} — multiple endpoints failing. Retry after cooldown.`);
|
|
109
201
|
}
|
|
202
|
+
// Per-domain rate limiting — use knowledge engine's safe_rate_limit if available
|
|
203
|
+
const engine = getKnowledgeEngine();
|
|
204
|
+
const knowledge = await engine.get(domain);
|
|
205
|
+
const knowledgeDelayMs = knowledge?.safe_rate_limit
|
|
206
|
+
? Math.round(60_000 / knowledge.safe_rate_limit)
|
|
207
|
+
: undefined;
|
|
208
|
+
await domainThrottle.throttle(domain, knowledgeDelayMs);
|
|
209
|
+
// ── Adaptive timeout from knowledge engine ──
|
|
210
|
+
const adaptiveTimeout = computeAdaptiveTimeout(knowledge?.avg_response_time_ms);
|
|
211
|
+
const timeout = options?.timeout || adaptiveTimeout;
|
|
110
212
|
const retries = options?.retries ?? 2;
|
|
111
213
|
let lastError;
|
|
214
|
+
let lastHttpStatus = 0;
|
|
112
215
|
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
113
216
|
try {
|
|
114
|
-
|
|
115
|
-
|
|
217
|
+
// ── Smart retry: escalate stealth level on each attempt ──
|
|
218
|
+
const escalatedLevel = getEscalatedLevel(options?.forceLevel, attempt, lastError);
|
|
219
|
+
const attemptOptions = {
|
|
220
|
+
...options,
|
|
221
|
+
timeout,
|
|
222
|
+
// On retry, escalate stealth level (unless user forced a specific level)
|
|
223
|
+
forceLevel: attempt > 0 && !options?.forceLevel ? escalatedLevel : options?.forceLevel,
|
|
224
|
+
};
|
|
225
|
+
const result = await smartFetch(url, attemptOptions);
|
|
226
|
+
recordSuccess(circuitKey);
|
|
227
|
+
// Feed successful strategy back to knowledge engine
|
|
228
|
+
if (attempt > 0 && result.level > 1) {
|
|
229
|
+
engine.record({
|
|
230
|
+
url, domain,
|
|
231
|
+
levelUsed: result.level,
|
|
232
|
+
success: true,
|
|
233
|
+
responseTimeMs: 0, // Already recorded by smartFetch
|
|
234
|
+
antiBotSystem: result.antiBotSystem || null,
|
|
235
|
+
captchaType: result.captchaSolved ? "detected" : null,
|
|
236
|
+
proxyUsed: !!result.proxyUsed,
|
|
237
|
+
blocked: false,
|
|
238
|
+
httpStatus: result.status,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
116
241
|
return result;
|
|
117
242
|
}
|
|
118
243
|
catch (err) {
|
|
119
244
|
lastError = err instanceof Error ? err : new Error(String(err));
|
|
120
|
-
recordFailure(
|
|
121
|
-
//
|
|
122
|
-
|
|
245
|
+
recordFailure(circuitKey);
|
|
246
|
+
// Track HTTP status for backoff decisions
|
|
247
|
+
if (err instanceof StealthError) {
|
|
248
|
+
lastHttpStatus = err.httpStatus;
|
|
249
|
+
}
|
|
250
|
+
// Check if endpoint circuit just opened
|
|
251
|
+
const updatedCircuit = getCircuit(circuitKey);
|
|
123
252
|
if (updatedCircuit.state === "open") {
|
|
124
253
|
// Enrich error message with StealthError info if available
|
|
125
254
|
const detail = err instanceof StealthError
|
|
126
255
|
? `L${err.lastLevel} HTTP ${err.httpStatus}${err.antiBotSystem ? ` [${err.antiBotSystem}]` : ""}`
|
|
127
256
|
: "";
|
|
128
|
-
throw new Error(`Circuit breaker opened for ${
|
|
257
|
+
throw new Error(`Circuit breaker opened for ${circuitKey}${detail ? ` (${detail})` : ""}: ${lastError.message}`);
|
|
129
258
|
}
|
|
130
259
|
if (attempt < retries) {
|
|
131
|
-
const
|
|
260
|
+
const is429 = lastHttpStatus === 429;
|
|
261
|
+
const delay = fullJitterBackoff(attempt, is429);
|
|
132
262
|
await new Promise((r) => setTimeout(r, delay));
|
|
133
263
|
}
|
|
134
264
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../src/utils/fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,YAAY,
|
|
1
|
+
{"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../src/utils/fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,YAAY,EAA4D,MAAM,qBAAqB,CAAC;AACzH,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAC1F,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAE3D,4BAA4B;AAE5B,MAAM,OAAO,kBAAkB;IAIT;IAHZ,OAAO,GAAG,CAAC,CAAC;IACZ,KAAK,GAAsB,EAAE,CAAC;IAEtC,YAAoB,gBAAwB,mBAAmB;QAA3C,kBAAa,GAAb,aAAa,CAA8B;IAAG,CAAC;IAEnE,KAAK,CAAC,GAAG,CAAI,EAAoB;QAC/B,OAAO,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YAC1C,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QACjE,CAAC;QACD,IAAI,CAAC,OAAO,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,MAAM,EAAE,EAAE,CAAC;QACpB,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI;gBAAE,IAAI,EAAE,CAAC;QACnB,CAAC;IACH,CAAC;CACF;AAED,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,kBAAkB,EAAE,CAAC;AAcvD,MAAM,yBAAyB,GAAG,CAAC,CAAC;AACpC,MAAM,wBAAwB,GAAG,MAAM,CAAC;AACxC,MAAM,uBAAuB,GAAG,CAAC,CAAC;AAClC,mFAAmF;AACnF,MAAM,gCAAgC,GAAG,EAAE,CAAC;AAE5C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B,CAAC;AAEnD,4DAA4D;AAC5D,MAAM,gBAAgB,GAAG,SAAS,CAAC;AACnC,WAAW,CAAC,GAAG,EAAE;IACf,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,KAAK,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,IAAI,QAAQ,EAAE,CAAC;QACtC,IAAI,GAAG,GAAG,OAAO,CAAC,YAAY,GAAG,gBAAgB,EAAE,CAAC;YAClD,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;AACH,CAAC,EAAE,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC;AAEpB;;;GAGG;AACH,SAAS,aAAa,CAAC,GAAW;IAChC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC7D,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnD,OAAO,UAAU,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC;IAC3E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,GAAW;IAC7B,IAAI,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,YAAY,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;QACrG,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAElC,uDAAuD;IACvD,IAAI,OAAO,CAAC,KAAK,KAAK,MAAM,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC,QAAQ,IAAI,wBAAwB,EAAE,CAAC;QAC1F,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC;QAC5B,OAAO,CAAC,cAAc,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CAAC,GAAW;IAChC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,OAAO,CAAC,KAAK,KAAK,WAAW,EAAE,CAAC;QAClC,OAAO,CAAC,cAAc,EAAE,CAAC;QACzB,IAAI,OAAO,CAAC,cAAc,IAAI,uBAAuB,EAAE,CAAC;YACtD,kBAAkB;YAClB,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC;YACzB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,GAAW;IAChC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;IAChC,sDAAsD;IACtD,IAAI,OAAO,CAAC,KAAK,KAAK,WAAW,EAAE,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC9B,OAAO,CAAC,QAAQ,GAAG,yBAAyB,CAAC;QAC7C,OAAO;IACT,CAAC;IACD,OAAO,CAAC,QAAQ,EAAE,CAAC;IACnB,IAAI,OAAO,CAAC,QAAQ,IAAI,yBAAyB,EAAE,CAAC;QAClD,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAChC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,MAAc;IACzC,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,KAAK,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,IAAI,QAAQ,EAAE,CAAC;QACtC,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;YACvD,aAAa,EAAE,CAAC;QAClB,CAAC;IACH,CAAC;IACD,OAAO,aAAa,IAAI,CAAC,CAAC;AAC5B,CAAC;AAED,uBAAuB;AACvB,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,aAAa,EAAE,aAAa,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,wBAAwB,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,CAAC;AAElM,2DAA2D;AAE3D,MAAM,eAAe,GAAG,IAAI,CAAC;AAC7B,MAAM,cAAc,GAAG,MAAM,CAAC;AAC9B,MAAM,0BAA0B,GAAG,MAAM,CAAC,CAAC,iCAAiC;AAE5E,SAAS,iBAAiB,CAAC,OAAe,EAAE,KAAK,GAAG,KAAK;IACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,EAAE,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;IAClF,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,QAAQ,CAAC;IAC5C,2DAA2D;IAC3D,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,UAAU,GAAG,IAAI,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,0BAA0B,CAAC;IACxE,CAAC;IACD,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,gCAAgC;AAEhC,MAAM,sBAAsB,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,KAAK,EAAE,EAAE,CAAC,CAAC;AAEvF,MAAM,cAAc;IACV,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IACxC,YAAY,CAAS;IAE7B,YAAY,iBAAyB,sBAAsB;QACzD,IAAI,CAAC,YAAY,GAAG,cAAc,CAAC;IACrC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CAAC,MAAc,EAAE,gBAAyB;QACtD,MAAM,KAAK,GAAG,gBAAgB,IAAI,IAAI,CAAC,YAAY,CAAC;QACpD,IAAI,KAAK,IAAI,CAAC;YAAE,OAAO;QAEvB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAC/C,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,CAAC;QAE3B,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC;QAC3D,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,cAAc,GAAG,IAAI,cAAc,EAAE,CAAC;AAS5C;;;GAGG;AACH,SAAS,sBAAsB,CAAC,iBAAqC;IACnE,IAAI,CAAC,iBAAiB,IAAI,iBAAiB,IAAI,CAAC;QAAE,OAAO,kBAAkB,CAAC;IAC5E,OAAO,IAAI,CAAC,GAAG,CAAC,cAAc,EAAE,IAAI,CAAC,GAAG,CAAC,kBAAkB,EAAE,iBAAiB,GAAG,CAAC,CAAC,CAAC,CAAC;AACvF,CAAC;AAED;;;GAGG;AACH,SAAS,iBAAiB,CACxB,SAAmC,EACnC,OAAe,EACf,SAA4B;IAE5B,wEAAwE;IACxE,IAAI,SAAS,YAAY,YAAY,IAAI,SAAS,CAAC,aAAa,EAAE,CAAC;QACjE,OAAO,CAAC,CAAC;IACX,CAAC;IAED,MAAM,IAAI,GAAG,SAAS,IAAI,CAAC,CAAC;IAC5B,IAAI,OAAO,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC/B,IAAI,OAAO,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC,CAAiB,CAAC;IAChE,OAAO,CAAC,CAAC,CAAC,2BAA2B;AACvC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,OAA2B;IACtE,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,KAAK,OAAO,CAAC,CAAC;IAEzF,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;IAC9B,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,UAAU,CAAC,UAAU,CAAC,CAAC;IACvC,IAAI,OAAO,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,qCAAqC,UAAU,yDAAyD,CAAC,CAAC;IAC5H,CAAC;IAED,kEAAkE;IAClE,IAAI,mBAAmB,CAAC,MAAM,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,KAAK,CAAC,mCAAmC,MAAM,sDAAsD,CAAC,CAAC;IACnH,CAAC;IAED,iFAAiF;IACjF,MAAM,MAAM,GAAG,kBAAkB,EAAE,CAAC;IACpC,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAC3C,MAAM,gBAAgB,GAAG,SAAS,EAAE,eAAe;QACjD,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,SAAS,CAAC,eAAe,CAAC;QAChD,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,cAAc,CAAC,QAAQ,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;IAExD,+CAA+C;IAC/C,MAAM,eAAe,GAAG,sBAAsB,CAAC,SAAS,EAAE,oBAAoB,CAAC,CAAC;IAChF,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,eAAe,CAAC;IAEpD,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,CAAC,CAAC;IACtC,IAAI,SAA4B,CAAC;IACjC,IAAI,cAAc,GAAG,CAAC,CAAC;IAEvB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;QACpD,IAAI,CAAC;YACH,4DAA4D;YAC5D,MAAM,cAAc,GAAG,iBAAiB,CAAC,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;YAClF,MAAM,cAAc,GAAmB;gBACrC,GAAG,OAAO;gBACV,OAAO;gBACP,yEAAyE;gBACzE,UAAU,EAAE,OAAO,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,OAAO,EAAE,UAAU;aACvF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;YACrD,aAAa,CAAC,UAAU,CAAC,CAAC;YAE1B,oDAAoD;YACpD,IAAI,OAAO,GAAG,CAAC,IAAI,MAAM,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;gBACpC,MAAM,CAAC,MAAM,CAAC;oBACZ,GAAG,EAAE,MAAM;oBACX,SAAS,EAAE,MAAM,CAAC,KAAK;oBACvB,OAAO,EAAE,IAAI;oBACb,cAAc,EAAE,CAAC,EAAE,iCAAiC;oBACpD,aAAa,EAAE,MAAM,CAAC,aAAa,IAAI,IAAI;oBAC3C,WAAW,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI;oBACrD,SAAS,EAAE,CAAC,CAAC,MAAM,CAAC,SAAS;oBAC7B,OAAO,EAAE,KAAK;oBACd,UAAU,EAAE,MAAM,CAAC,MAAM;iBAC1B,CAAC,CAAC;YACL,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;YAChE,aAAa,CAAC,UAAU,CAAC,CAAC;YAE1B,0CAA0C;YAC1C,IAAI,GAAG,YAAY,YAAY,EAAE,CAAC;gBAChC,cAAc,GAAG,GAAG,CAAC,UAAU,CAAC;YAClC,CAAC;YAED,wCAAwC;YACxC,MAAM,cAAc,GAAG,UAAU,CAAC,UAAU,CAAC,CAAC;YAC9C,IAAI,cAAc,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;gBACpC,2DAA2D;gBAC3D,MAAM,MAAM,GAAG,GAAG,YAAY,YAAY;oBACxC,CAAC,CAAC,IAAI,GAAG,CAAC,SAAS,SAAS,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;oBACjG,CAAC,CAAC,EAAE,CAAC;gBACP,MAAM,IAAI,KAAK,CAAC,8BAA8B,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,KAAK,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC;YACnH,CAAC;YAED,IAAI,OAAO,GAAG,OAAO,EAAE,CAAC;gBACtB,MAAM,KAAK,GAAG,cAAc,KAAK,GAAG,CAAC;gBACrC,MAAM,KAAK,GAAG,iBAAiB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;gBAChD,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,SAAU,CAAC;AACnB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "imperium-crawl",
|
|
3
|
-
"version": "2.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "2.5.1",
|
|
4
|
+
"description": "39-tool open-source CLI for web scraping, PDF extraction, content monitoring, reusable browser flows, RSS aggregation, and custom skills. Zero API keys for core tools.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"imperium-crawl": "dist/index.js",
|
|
@@ -11,7 +11,10 @@
|
|
|
11
11
|
"exports": {
|
|
12
12
|
".": "./dist/index.js",
|
|
13
13
|
"./tools/*": "./dist/tools/*.js",
|
|
14
|
-
"./stealth/*": "./dist/stealth/*.js"
|
|
14
|
+
"./stealth/*": "./dist/stealth/*.js",
|
|
15
|
+
"./sessions": "./dist/sessions/index.js",
|
|
16
|
+
"./sessions/*": "./dist/sessions/*.js",
|
|
17
|
+
"./utils/*": "./dist/utils/*.js"
|
|
15
18
|
},
|
|
16
19
|
"files": [
|
|
17
20
|
"dist",
|
|
@@ -23,7 +26,9 @@
|
|
|
23
26
|
"start": "node dist/index.js",
|
|
24
27
|
"test": "vitest run",
|
|
25
28
|
"test:watch": "vitest",
|
|
26
|
-
"prepublishOnly": "npm run build"
|
|
29
|
+
"prepublishOnly": "npm run build",
|
|
30
|
+
"autoresearch": "tsx autoresearch/eval.ts",
|
|
31
|
+
"autoresearch:baseline": "tsx autoresearch/eval.ts --baseline --verbose"
|
|
27
32
|
},
|
|
28
33
|
"keywords": [
|
|
29
34
|
"scraping",
|
|
@@ -31,7 +36,15 @@
|
|
|
31
36
|
"web-search",
|
|
32
37
|
"brave-search",
|
|
33
38
|
"firecrawl",
|
|
34
|
-
"cli"
|
|
39
|
+
"cli",
|
|
40
|
+
"pdf-extract",
|
|
41
|
+
"web-monitoring",
|
|
42
|
+
"url-watch",
|
|
43
|
+
"content-diff",
|
|
44
|
+
"intelligence-digest",
|
|
45
|
+
"browser-workflows",
|
|
46
|
+
"workflow-recorder",
|
|
47
|
+
"flow-api"
|
|
35
48
|
],
|
|
36
49
|
"author": "ImperiumTech",
|
|
37
50
|
"license": "MIT",
|
|
@@ -52,6 +65,7 @@
|
|
|
52
65
|
"normalize-url": "^8.1.1",
|
|
53
66
|
"ora": "^8.2.0",
|
|
54
67
|
"p-queue": "^8.1.1",
|
|
68
|
+
"pdfjs-dist": "^4.0.379",
|
|
55
69
|
"playwright": "1.52",
|
|
56
70
|
"robots-parser": "^3.0.1",
|
|
57
71
|
"rss-parser": "^3.13.0",
|