imperium-crawl 2.4.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/README.md +86 -9
  2. package/dist/cli.d.ts.map +1 -1
  3. package/dist/cli.js +23 -3
  4. package/dist/cli.js.map +1 -1
  5. package/dist/constants.d.ts +1 -1
  6. package/dist/constants.d.ts.map +1 -1
  7. package/dist/constants.js +31 -1
  8. package/dist/constants.js.map +1 -1
  9. package/dist/flows/engine.d.ts +7 -0
  10. package/dist/flows/engine.d.ts.map +1 -0
  11. package/dist/flows/engine.js +183 -0
  12. package/dist/flows/engine.js.map +1 -0
  13. package/dist/flows/index.d.ts +6 -0
  14. package/dist/flows/index.d.ts.map +1 -0
  15. package/dist/flows/index.js +6 -0
  16. package/dist/flows/index.js.map +1 -0
  17. package/dist/flows/server.d.ts +11 -0
  18. package/dist/flows/server.d.ts.map +1 -0
  19. package/dist/flows/server.js +81 -0
  20. package/dist/flows/server.js.map +1 -0
  21. package/dist/flows/smart-target.d.ts +9 -0
  22. package/dist/flows/smart-target.d.ts.map +1 -0
  23. package/dist/flows/smart-target.js +84 -0
  24. package/dist/flows/smart-target.js.map +1 -0
  25. package/dist/flows/storage.d.ts +26 -0
  26. package/dist/flows/storage.d.ts.map +1 -0
  27. package/dist/flows/storage.js +118 -0
  28. package/dist/flows/storage.js.map +1 -0
  29. package/dist/flows/templates.d.ts +4 -0
  30. package/dist/flows/templates.d.ts.map +1 -0
  31. package/dist/flows/templates.js +35 -0
  32. package/dist/flows/templates.js.map +1 -0
  33. package/dist/flows/types.d.ts +3356 -0
  34. package/dist/flows/types.d.ts.map +1 -0
  35. package/dist/flows/types.js +133 -0
  36. package/dist/flows/types.js.map +1 -0
  37. package/dist/knowledge/store.d.ts +19 -0
  38. package/dist/knowledge/store.d.ts.map +1 -1
  39. package/dist/knowledge/store.js +63 -4
  40. package/dist/knowledge/store.js.map +1 -1
  41. package/dist/sessions/browser-connect.d.ts +30 -0
  42. package/dist/sessions/browser-connect.d.ts.map +1 -0
  43. package/dist/sessions/browser-connect.js +68 -0
  44. package/dist/sessions/browser-connect.js.map +1 -0
  45. package/dist/sessions/browser-state.d.ts +35 -0
  46. package/dist/sessions/browser-state.d.ts.map +1 -0
  47. package/dist/sessions/browser-state.js +74 -0
  48. package/dist/sessions/browser-state.js.map +1 -0
  49. package/dist/sessions/inject-cookies.d.ts +20 -0
  50. package/dist/sessions/inject-cookies.d.ts.map +1 -0
  51. package/dist/sessions/inject-cookies.js +57 -0
  52. package/dist/sessions/inject-cookies.js.map +1 -0
  53. package/dist/sessions/manager.d.ts +11 -1
  54. package/dist/sessions/manager.d.ts.map +1 -1
  55. package/dist/sessions/manager.js +40 -6
  56. package/dist/sessions/manager.js.map +1 -1
  57. package/dist/snapshot/store.d.ts +8 -0
  58. package/dist/snapshot/store.d.ts.map +1 -1
  59. package/dist/snapshot/store.js +48 -0
  60. package/dist/snapshot/store.js.map +1 -1
  61. package/dist/stealth/antibot-detector.d.ts +1 -1
  62. package/dist/stealth/antibot-detector.d.ts.map +1 -1
  63. package/dist/stealth/antibot-detector.js +56 -0
  64. package/dist/stealth/antibot-detector.js.map +1 -1
  65. package/dist/stealth/browser-image-extract.d.ts +43 -0
  66. package/dist/stealth/browser-image-extract.d.ts.map +1 -0
  67. package/dist/stealth/browser-image-extract.js +268 -0
  68. package/dist/stealth/browser-image-extract.js.map +1 -0
  69. package/dist/stealth/browser.d.ts +5 -0
  70. package/dist/stealth/browser.d.ts.map +1 -1
  71. package/dist/stealth/browser.js +82 -1
  72. package/dist/stealth/browser.js.map +1 -1
  73. package/dist/stealth/chrome-profile.d.ts +1 -0
  74. package/dist/stealth/chrome-profile.d.ts.map +1 -1
  75. package/dist/stealth/chrome-profile.js +28 -5
  76. package/dist/stealth/chrome-profile.js.map +1 -1
  77. package/dist/stealth/detector.d.ts +10 -1
  78. package/dist/stealth/detector.d.ts.map +1 -1
  79. package/dist/stealth/detector.js +117 -25
  80. package/dist/stealth/detector.js.map +1 -1
  81. package/dist/stealth/headers.d.ts +1 -1
  82. package/dist/stealth/headers.d.ts.map +1 -1
  83. package/dist/stealth/headers.js +94 -2
  84. package/dist/stealth/headers.js.map +1 -1
  85. package/dist/stealth/index.d.ts +4 -0
  86. package/dist/stealth/index.d.ts.map +1 -1
  87. package/dist/stealth/index.js +207 -25
  88. package/dist/stealth/index.js.map +1 -1
  89. package/dist/stealth/proxy.d.ts +40 -1
  90. package/dist/stealth/proxy.d.ts.map +1 -1
  91. package/dist/stealth/proxy.js +90 -6
  92. package/dist/stealth/proxy.js.map +1 -1
  93. package/dist/tools/action-executor.d.ts +2 -0
  94. package/dist/tools/action-executor.d.ts.map +1 -1
  95. package/dist/tools/action-executor.js +38 -0
  96. package/dist/tools/action-executor.js.map +1 -1
  97. package/dist/tools/batch-download.d.ts +33 -0
  98. package/dist/tools/batch-download.d.ts.map +1 -0
  99. package/dist/tools/batch-download.js +208 -0
  100. package/dist/tools/batch-download.js.map +1 -0
  101. package/dist/tools/browser.d.ts +100 -0
  102. package/dist/tools/browser.d.ts.map +1 -0
  103. package/dist/tools/browser.js +448 -0
  104. package/dist/tools/browser.js.map +1 -0
  105. package/dist/tools/download.d.ts +35 -2
  106. package/dist/tools/download.d.ts.map +1 -1
  107. package/dist/tools/download.js +245 -44
  108. package/dist/tools/download.js.map +1 -1
  109. package/dist/tools/index.d.ts.map +1 -1
  110. package/dist/tools/index.js +23 -0
  111. package/dist/tools/index.js.map +1 -1
  112. package/dist/tools/inspect-flow.d.ts +24 -0
  113. package/dist/tools/inspect-flow.d.ts.map +1 -0
  114. package/dist/tools/inspect-flow.js +23 -0
  115. package/dist/tools/inspect-flow.js.map +1 -0
  116. package/dist/tools/interact.d.ts +28 -15
  117. package/dist/tools/interact.d.ts.map +1 -1
  118. package/dist/tools/interact.js +48 -1
  119. package/dist/tools/interact.js.map +1 -1
  120. package/dist/tools/list-flows.d.ts +21 -0
  121. package/dist/tools/list-flows.d.ts.map +1 -0
  122. package/dist/tools/list-flows.js +18 -0
  123. package/dist/tools/list-flows.js.map +1 -0
  124. package/dist/tools/manifest.d.ts.map +1 -1
  125. package/dist/tools/manifest.js +43 -0
  126. package/dist/tools/manifest.js.map +1 -1
  127. package/dist/tools/monitor.d.ts +46 -0
  128. package/dist/tools/monitor.d.ts.map +1 -0
  129. package/dist/tools/monitor.js +213 -0
  130. package/dist/tools/monitor.js.map +1 -0
  131. package/dist/tools/pdf-extract.d.ts +38 -0
  132. package/dist/tools/pdf-extract.d.ts.map +1 -0
  133. package/dist/tools/pdf-extract.js +244 -0
  134. package/dist/tools/pdf-extract.js.map +1 -0
  135. package/dist/tools/record-flow.d.ts +39 -0
  136. package/dist/tools/record-flow.d.ts.map +1 -0
  137. package/dist/tools/record-flow.js +406 -0
  138. package/dist/tools/record-flow.js.map +1 -0
  139. package/dist/tools/run-flow.d.ts +54 -0
  140. package/dist/tools/run-flow.d.ts.map +1 -0
  141. package/dist/tools/run-flow.js +47 -0
  142. package/dist/tools/run-flow.js.map +1 -0
  143. package/dist/tools/run-skill.d.ts +2 -2
  144. package/dist/tools/run-skill.d.ts.map +1 -1
  145. package/dist/tools/run-skill.js +1 -0
  146. package/dist/tools/run-skill.js.map +1 -1
  147. package/dist/tools/scrape.d.ts.map +1 -1
  148. package/dist/tools/scrape.js +17 -1
  149. package/dist/tools/scrape.js.map +1 -1
  150. package/dist/tools/serve-flow.d.ts +36 -0
  151. package/dist/tools/serve-flow.d.ts.map +1 -0
  152. package/dist/tools/serve-flow.js +42 -0
  153. package/dist/tools/serve-flow.js.map +1 -0
  154. package/dist/tools/validate-flow.d.ts +24 -0
  155. package/dist/tools/validate-flow.d.ts.map +1 -0
  156. package/dist/tools/validate-flow.js +23 -0
  157. package/dist/tools/validate-flow.js.map +1 -0
  158. package/dist/tools/watch.d.ts +68 -0
  159. package/dist/tools/watch.d.ts.map +1 -0
  160. package/dist/tools/watch.js +224 -0
  161. package/dist/tools/watch.js.map +1 -0
  162. package/dist/utils/fetcher.d.ts +13 -4
  163. package/dist/utils/fetcher.d.ts.map +1 -1
  164. package/dist/utils/fetcher.js +121 -24
  165. package/dist/utils/fetcher.js.map +1 -1
  166. package/package.json +15 -4
@@ -0,0 +1,224 @@
1
+ /**
2
+ * watch — one-shot change detector for URLs.
3
+ *
4
+ * v2.5.0: one-shot mode only. Snapshots content, hashes it, diffs against
5
+ * the previous snapshot for the same URL. Fires a webhook on change.
6
+ *
7
+ * Daemon mode (SIGINT loop) is deferred to v2.6.0 — use cron externally:
8
+ * * /30 * * * * imperium-crawl watch --url X --output-dir /var/watch
9
+ */
10
+ import { z } from "zod";
11
+ import { createHash } from "node:crypto";
12
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
13
+ import { existsSync } from "node:fs";
14
+ import { join, resolve as resolvePath } from "node:path";
15
+ import { JSDOM } from "jsdom";
16
+ import { Readability } from "@mozilla/readability";
17
+ import { fetchPage } from "../utils/fetcher.js";
18
+ import { htmlToMarkdown } from "../utils/markdown.js";
19
+ import { toolResult, errorResult } from "../utils/tool-response.js";
20
+ import { debugLog } from "../utils/debug.js";
21
+ import { MAX_URL_LENGTH } from "../constants.js";
22
+ export const name = "watch";
23
+ export const description = "One-shot change detector: scrape a URL, hash its content, and compare against the last snapshot. Fires a webhook on change. Run via cron for periodic monitoring.";
24
+ export const schema = z.object({
25
+ url: z
26
+ .string()
27
+ .max(MAX_URL_LENGTH)
28
+ .describe("URL to watch"),
29
+ output_dir: z
30
+ .string()
31
+ .default("./data/watch")
32
+ .describe("Directory to persist snapshots and state"),
33
+ hash_on: z
34
+ .enum(["content", "readability", "markdown"])
35
+ .default("readability")
36
+ .describe("What to hash: full HTML, readability main content, or markdown"),
37
+ webhook: z
38
+ .string()
39
+ .max(MAX_URL_LENGTH)
40
+ .optional()
41
+ .describe("If set, POST a JSON payload to this URL on detected change"),
42
+ diff_format: z
43
+ .enum(["unified", "json"])
44
+ .default("unified")
45
+ .describe("Diff representation in the result"),
46
+ one_shot: z
47
+ .boolean()
48
+ .default(true)
49
+ .describe("v2.5.0: always true. Daemon mode lands in v2.6.0."),
50
+ });
51
+ function slugify(url) {
52
+ return createHash("sha1").update(url).digest("hex").slice(0, 16);
53
+ }
54
+ function hashString(s) {
55
+ return createHash("sha256").update(s).digest("hex");
56
+ }
57
+ function simpleUnifiedDiff(prev, next, maxLines = 200) {
58
+ const prevLines = prev.split("\n");
59
+ const nextLines = next.split("\n");
60
+ const prevSet = new Set(prevLines);
61
+ const nextSet = new Set(nextLines);
62
+ const out = [];
63
+ let removed = 0;
64
+ let added = 0;
65
+ for (const line of prevLines) {
66
+ if (!nextSet.has(line)) {
67
+ out.push(`- ${line}`);
68
+ removed++;
69
+ }
70
+ }
71
+ for (const line of nextLines) {
72
+ if (!prevSet.has(line)) {
73
+ out.push(`+ ${line}`);
74
+ added++;
75
+ }
76
+ }
77
+ const header = `@@ -${prevLines.length} +${nextLines.length} @@ (${removed} removed, ${added} added)`;
78
+ const body = out.slice(0, maxLines).join("\n");
79
+ const truncated = out.length > maxLines ? `\n... (${out.length - maxLines} more lines)` : "";
80
+ return `${header}\n${body}${truncated}`;
81
+ }
82
+ function jsonDiff(prev, next) {
83
+ const prevLines = prev.split("\n");
84
+ const nextLines = next.split("\n");
85
+ const prevSet = new Set(prevLines);
86
+ const nextSet = new Set(nextLines);
87
+ const removed = prevLines.filter((l) => !nextSet.has(l));
88
+ const added = nextLines.filter((l) => !prevSet.has(l));
89
+ return JSON.stringify({ removed, added, prev_lines: prevLines.length, next_lines: nextLines.length });
90
+ }
91
+ export async function computeSignature(html, url, hashOn) {
92
+ if (hashOn === "content")
93
+ return html;
94
+ if (hashOn === "markdown")
95
+ return htmlToMarkdown(html);
96
+ // readability
97
+ try {
98
+ const dom = new JSDOM(html, { url });
99
+ const reader = new Readability(dom.window.document);
100
+ const article = reader.parse();
101
+ if (article?.textContent)
102
+ return article.textContent.trim();
103
+ return htmlToMarkdown(html);
104
+ }
105
+ catch {
106
+ return htmlToMarkdown(html);
107
+ }
108
+ }
109
+ async function loadState(stateFile) {
110
+ if (!existsSync(stateFile))
111
+ return {};
112
+ try {
113
+ const raw = await readFile(stateFile, "utf-8");
114
+ return JSON.parse(raw);
115
+ }
116
+ catch {
117
+ return {};
118
+ }
119
+ }
120
+ async function saveState(stateFile, state) {
121
+ await writeFile(stateFile, JSON.stringify(state, null, 2), "utf-8");
122
+ }
123
+ async function fireWebhook(webhook, payload) {
124
+ try {
125
+ const res = await fetch(webhook, {
126
+ method: "POST",
127
+ headers: { "content-type": "application/json" },
128
+ body: JSON.stringify(payload),
129
+ });
130
+ return { fired: true, status: res.status };
131
+ }
132
+ catch (err) {
133
+ debugLog("watch", "webhook failed", err);
134
+ return { fired: false };
135
+ }
136
+ }
137
+ export async function runWatchOnce(input) {
138
+ const outDir = resolvePath(input.output_dir);
139
+ await mkdir(outDir, { recursive: true });
140
+ const stateFile = join(outDir, ".state.json");
141
+ const state = await loadState(stateFile);
142
+ const fetched = await fetchPage(input.url);
143
+ const signature = await computeSignature(fetched.html, input.url, input.hash_on);
144
+ const currentHash = hashString(signature);
145
+ const slug = slugify(input.url);
146
+ const snapshotFile = join(outDir, `${slug}.snapshot.txt`);
147
+ const prevSnapshotFile = join(outDir, `${slug}.previous.txt`);
148
+ const existing = state[input.url];
149
+ const firstRun = !existing;
150
+ const changed = !firstRun && existing.last_hash !== currentHash;
151
+ let previousSig = null;
152
+ if (existsSync(snapshotFile)) {
153
+ try {
154
+ previousSig = await readFile(snapshotFile, "utf-8");
155
+ }
156
+ catch {
157
+ previousSig = null;
158
+ }
159
+ }
160
+ // Rotate previous snapshot only when content changed
161
+ if (changed && previousSig !== null) {
162
+ await writeFile(prevSnapshotFile, previousSig, "utf-8");
163
+ }
164
+ await writeFile(snapshotFile, signature, "utf-8");
165
+ const nowIso = new Date().toISOString();
166
+ const newState = {
167
+ url: input.url,
168
+ last_hash: currentHash,
169
+ last_checked: nowIso,
170
+ last_changed: changed ? nowIso : existing?.last_changed ?? null,
171
+ hash_on: input.hash_on,
172
+ check_count: (existing?.check_count ?? 0) + 1,
173
+ change_count: (existing?.change_count ?? 0) + (changed ? 1 : 0),
174
+ };
175
+ state[input.url] = newState;
176
+ await saveState(stateFile, state);
177
+ let diff = null;
178
+ if (changed && previousSig !== null) {
179
+ diff =
180
+ input.diff_format === "unified"
181
+ ? simpleUnifiedDiff(previousSig, signature)
182
+ : jsonDiff(previousSig, signature);
183
+ }
184
+ let webhookFired = false;
185
+ let webhookStatus;
186
+ if (changed && input.webhook) {
187
+ const payload = {
188
+ event: "watch.change",
189
+ url: input.url,
190
+ previous_hash: existing?.last_hash ?? null,
191
+ current_hash: currentHash,
192
+ detected_at: nowIso,
193
+ diff,
194
+ };
195
+ const res = await fireWebhook(input.webhook, payload);
196
+ webhookFired = res.fired;
197
+ webhookStatus = res.status;
198
+ }
199
+ return {
200
+ url: input.url,
201
+ changed,
202
+ first_run: firstRun,
203
+ previous_hash: existing?.last_hash ?? null,
204
+ current_hash: currentHash,
205
+ hash_on: input.hash_on,
206
+ snapshot_file: snapshotFile,
207
+ diff,
208
+ webhook_fired: webhookFired,
209
+ webhook_status: webhookStatus,
210
+ state: newState,
211
+ checked_at: nowIso,
212
+ };
213
+ }
214
+ export async function execute(input) {
215
+ try {
216
+ const result = await runWatchOnce(input);
217
+ return toolResult(result);
218
+ }
219
+ catch (err) {
220
+ debugLog("watch", "failed", err);
221
+ return errorResult(err instanceof Error ? err.message : String(err));
222
+ }
223
+ }
224
+ //# sourceMappingURL=watch.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"watch.js","sourceRoot":"","sources":["../../src/tools/watch.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,WAAW,CAAC;AACzD,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAChD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AACpE,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAEjD,MAAM,CAAC,MAAM,IAAI,GAAG,OAAO,CAAC;AAE5B,MAAM,CAAC,MAAM,WAAW,GACtB,mKAAmK,CAAC;AAEtK,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC;IAC7B,GAAG,EAAE,CAAC;SACH,MAAM,EAAE;SACR,GAAG,CAAC,cAAc,CAAC;SACnB,QAAQ,CAAC,cAAc,CAAC;IAC3B,UAAU,EAAE,CAAC;SACV,MAAM,EAAE;SACR,OAAO,CAAC,cAAc,CAAC;SACvB,QAAQ,CAAC,0CAA0C,CAAC;IACvD,OAAO,EAAE,CAAC;SACP,IAAI,CAAC,CAAC,SAAS,EAAE,aAAa,EAAE,UAAU,CAAC,CAAC;SAC5C,OAAO,CAAC,aAAa,CAAC;SACtB,QAAQ,CAAC,gEAAgE,CAAC;IAC7E,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,CAAC,cAAc,CAAC;SACnB,QAAQ,EAAE;SACV,QAAQ,CAAC,4DAA4D,CAAC;IACzE,WAAW,EAAE,CAAC;SACX,IAAI,CAAC,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;SACzB,OAAO,CAAC,SAAS,CAAC;SAClB,QAAQ,CAAC,mCAAmC,CAAC;IAChD,QAAQ,EAAE,CAAC;SACR,OAAO,EAAE;SACT,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,mDAAmD,CAAC;CACjE,CAAC,CAAC;AA6BH,SAAS,OAAO,CAAC,GAAW;IAC1B,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,SAAS,UAAU,CAAC,CAAS;IAC3B,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACtD,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY,EAAE,IAAY,EAAE,QAAQ,GAAG,GAAG;IACnE,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;IAEnC,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACvB,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;YACtB,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACvB,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;YACtB,KAAK,EAAE,CAAC;QACV,CAAC;IACH,CAAC;IACD,MAAM,MAAM,GAAG,OAAO,SAAS,CAAC,MAAM,KAAK,SAAS,CAAC,MAAM,QAAQ,OAAO,aAAa,KAAK,SAAS,CAAC;IACtG,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/C,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,MAAM,GAAG,QAAQ,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC;IAC7F,OAAO,GAAG,MAAM,KAAK,IAAI,GAAG,SAAS,EAAE,CAAC;AAC1C,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY,EAAE,IAAY;IAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;IACnC,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACzD,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACvD,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,SAAS,CAAC,MAAM,EAAE,UAAU,EAAE,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;AACxG,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,GAAW,EACX,MAA6B;IAE7B,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IACtC,IAAI,MAAM,KAAK,UAAU;QAAE,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;IAEvD,cAAc;IACd,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACpD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAC/B,IAAI,OAAO,EAAE,WAAW;YAAE,OAAO,OAAO,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QAC5D,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;AACH,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,SAAiB;IACxC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,EAAE,CAAC;IACtC,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAC/C,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAA+B,CAAC;IACvD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,SAAiB,EAAE,KAAiC;IAC3E,MAAM,SAAS,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AACtE,CAAC;AAED,KAAK,UAAU,WAAW,CACxB,OAAe,EACf,OAAgB;IAEhB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE;YAC/B,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;SAC9B,CAAC,CAAC;QACH,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,QAAQ,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,CAAC,CAAC;QACzC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,KAAiB;IAClD,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAC7C,MAAM,KAAK,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IAC9C,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,SAAS,CAAC,CAAC;IAEzC,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;IACjF,MAAM,WAAW,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC;IAE1C,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,CAAC;IAC1D,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,CAAC;IAE9D,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAClC,MAAM,QAAQ,GAAG,CAAC,QAAQ,CAAC;IAC3B,MAAM,OAAO,GAAG,CAAC,QAAQ,IAAI,QAAQ,CAAC,SAAS,KAAK,WAAW,CAAC;IAEhE,IAAI,WAAW,GAAkB,IAAI,CAAC;IACtC,IAAI,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;QAC7B,IAAI,CAAC;YACH,WAAW,GAAG,MAAM,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;QACtD,CAAC;QAAC,MAAM,CAAC;YACP,WAAW,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;IAED,qDAAqD;IACrD,IAAI,OAAO,IAAI,WAAW,KAAK,IAAI,EAAE,CAAC;QACpC,MAAM,SAAS,CAAC,gBAAgB,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IAC1D,CAAC;IACD,MAAM,SAAS,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAElD,MAAM,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACxC,MAAM,QAAQ,GAAe;QAC3B,GAAG,EAAE,KAAK,CAAC,GAAG;QACd,SAAS,EAAE,WAAW;QACtB,YAAY,EAAE,MAAM;QACpB,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,EAAE,YAAY,IAAI,IAAI;QAC/D,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,WAAW,EAAE,CAAC,QAAQ,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC;QAC7C,YAAY,EAAE,CAAC,QAAQ,EAAE,YAAY,IAAI,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KAChE,CAAC;IACF,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC;IAC5B,MAAM,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAElC,IAAI,IAAI,GAAkB,IAAI,CAAC;IAC/B,IAAI,OAAO,IAAI,WAAW,KAAK,IAAI,EAAE,CAAC;QACpC,IAAI;YACF,KAAK,CAAC,WAAW,KAAK,SAAS;gBAC7B,CAAC,CAAC,iBAAiB,CAAC,WAAW,EAAE,SAAS,CAAC;gBAC3C,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,YAAY,GAAG,KAAK,CAAC;IACzB,IAAI,aAAiC,CAAC;IACtC,IAAI,OAAO,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG;YACd,KAAK,EAAE,cAAc;YACrB,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,aAAa,EAAE,QAAQ,EAAE,SAAS,IAAI,IAAI;YAC1C,YAAY,EAAE,WAAW;YACzB,WAAW,EAAE,MAAM;YACnB,IAAI;SACL,CAAC;QACF,MAAM,GAAG,GAAG,MAAM,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACtD,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC;QACzB,aAAa,GAAG,GAAG,CAAC,MAAM,CAAC;IAC7B,CAAC;IAED,OAAO;QACL,GAAG,EAAE,KAAK,CAAC,GAAG;QACd,OAAO;QACP,SAAS,EAAE,QAAQ;QACnB,aAAa,EAAE,QAAQ,EAAE,SAAS,IAAI,IAAI;QAC1C,YAAY,EAAE,WAAW;QACzB,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,aAAa,EAAE,YAAY;QAC3B,IAAI;QACJ,aAAa,EAAE,YAAY;QAC3B,cAAc,EAAE,aAAa;QAC7B,KAAK,EAAE,QAAQ;QACf,UAAU,EAAE,MAAM;KACnB,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,KAAiB;IAC7C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,QAAQ,CAAC,OAAO,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAC;QACjC,OAAO,WAAW,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;IACvE,CAAC;AACH,CAAC"}
@@ -20,10 +20,19 @@ declare const CIRCUIT_OPEN_DURATION_MS = 60000;
20
20
  declare const CIRCUIT_PROBE_SUCCESSES = 3;
21
21
  declare const circuits: Map<string, CircuitBreaker>;
22
22
  declare const CIRCUIT_STALE_MS = 3600000;
23
- declare function getCircuit(domain: string): CircuitBreaker;
24
- declare function recordSuccess(domain: string): void;
25
- declare function recordFailure(domain: string): void;
26
- export { circuits, getCircuit, recordSuccess, recordFailure, CIRCUIT_FAILURE_THRESHOLD, CIRCUIT_OPEN_DURATION_MS, CIRCUIT_PROBE_SUCCESSES, CIRCUIT_STALE_MS };
23
+ /**
24
+ * Get circuit breaker key for a URL.
25
+ * Uses domain + first 2 path segments for endpoint-level granularity.
26
+ */
27
+ declare function getCircuitKey(url: string): string;
28
+ declare function getCircuit(key: string): CircuitBreaker;
29
+ declare function recordSuccess(key: string): void;
30
+ declare function recordFailure(key: string): void;
31
+ /**
32
+ * Check domain-level circuit: opens when 3+ endpoint circuits are open for this domain.
33
+ */
34
+ declare function isDomainCircuitOpen(domain: string): boolean;
35
+ export { circuits, getCircuit, getCircuitKey, recordSuccess, recordFailure, isDomainCircuitOpen, CIRCUIT_FAILURE_THRESHOLD, CIRCUIT_OPEN_DURATION_MS, CIRCUIT_PROBE_SUCCESSES, CIRCUIT_STALE_MS };
27
36
  export interface SmartFetchOptions extends StealthOptions {
28
37
  respectRobots?: boolean;
29
38
  retries?: number;
@@ -1 +1 @@
1
- {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/utils/fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,EAA4B,KAAK,WAAW,EAAE,KAAK,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAQtG,qBAAa,kBAAkB;IAIjB,OAAO,CAAC,aAAa;IAHjC,OAAO,CAAC,OAAO,CAAK;IACpB,OAAO,CAAC,KAAK,CAAyB;gBAElB,aAAa,GAAE,MAA4B;IAEzD,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;CAa/C;AAED,eAAO,MAAM,cAAc,oBAA2B,CAAC;AAIvD,KAAK,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;AAEpD,UAAU,cAAc;IACtB,KAAK,EAAE,YAAY,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,QAAA,MAAM,yBAAyB,IAAI,CAAC;AACpC,QAAA,MAAM,wBAAwB,QAAS,CAAC;AACxC,QAAA,MAAM,uBAAuB,IAAI,CAAC;AAElC,QAAA,MAAM,QAAQ,6BAAoC,CAAC;AAGnD,QAAA,MAAM,gBAAgB,UAAY,CAAC;AAUnC,iBAAS,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,cAAc,CAgBlD;AAED,iBAAS,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAY3C;AAED,iBAAS,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAc3C;AAGD,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,aAAa,EAAE,yBAAyB,EAAE,wBAAwB,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,CAAC;AAgD9J,MAAM,WAAW,iBAAkB,SAAQ,cAAc;IACvD,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,wBAAsB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,WAAW,CAAC,CAsD9F"}
1
+ {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/utils/fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,EAA4B,KAAK,WAAW,EAAqB,KAAK,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAQzH,qBAAa,kBAAkB;IAIjB,OAAO,CAAC,aAAa;IAHjC,OAAO,CAAC,OAAO,CAAK;IACpB,OAAO,CAAC,KAAK,CAAyB;gBAElB,aAAa,GAAE,MAA4B;IAEzD,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;CAa/C;AAED,eAAO,MAAM,cAAc,oBAA2B,CAAC;AAIvD,KAAK,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;AAEpD,UAAU,cAAc;IACtB,KAAK,EAAE,YAAY,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,QAAA,MAAM,yBAAyB,IAAI,CAAC;AACpC,QAAA,MAAM,wBAAwB,QAAS,CAAC;AACxC,QAAA,MAAM,uBAAuB,IAAI,CAAC;AAIlC,QAAA,MAAM,QAAQ,6BAAoC,CAAC;AAGnD,QAAA,MAAM,gBAAgB,UAAY,CAAC;AAUnC;;;GAGG;AACH,iBAAS,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAS1C;AAED,iBAAS,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,cAAc,CAgB/C;AAED,iBAAS,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAYxC;AAED,iBAAS,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAcxC;AAED;;GAEG;AACH,iBAAS,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAQpD;AAGD,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,aAAa,EAAE,aAAa,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,wBAAwB,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,CAAC;AAsDlM,MAAM,WAAW,iBAAkB,SAAQ,cAAc;IACvD,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AA+BD,wBAAsB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,WAAW,CAAC,CAiG9F"}
@@ -1,7 +1,7 @@
1
1
  import { smartFetch, StealthError } from "../stealth/index.js";
2
2
  import { isAllowed } from "./robots.js";
3
3
  import { getDomain } from "./url.js";
4
- import { DEFAULT_CONCURRENCY } from "../constants.js";
4
+ import { DEFAULT_CONCURRENCY, DEFAULT_TIMEOUT_MS, MAX_TIMEOUT_MS } from "../constants.js";
5
5
  import { getKnowledgeEngine } from "../knowledge/index.js";
6
6
  // ── Concurrency Limiter ──
7
7
  export class ConcurrencyLimiter {
@@ -31,22 +31,39 @@ export const defaultLimiter = new ConcurrencyLimiter();
31
31
  const CIRCUIT_FAILURE_THRESHOLD = 5;
32
32
  const CIRCUIT_OPEN_DURATION_MS = 60_000;
33
33
  const CIRCUIT_PROBE_SUCCESSES = 3;
34
+ // Domain-level circuit: higher threshold — only opens when multiple endpoints fail
35
+ const DOMAIN_CIRCUIT_FAILURE_THRESHOLD = 10;
34
36
  const circuits = new Map();
35
37
  // Periodic cleanup: remove closed circuits idle for >1 hour
36
38
  const CIRCUIT_STALE_MS = 3_600_000;
37
39
  setInterval(() => {
38
40
  const now = Date.now();
39
- for (const [domain, circuit] of circuits) {
41
+ for (const [key, circuit] of circuits) {
40
42
  if (now - circuit.lastAccessed > CIRCUIT_STALE_MS) {
41
- circuits.delete(domain);
43
+ circuits.delete(key);
42
44
  }
43
45
  }
44
46
  }, 300_000).unref();
45
- function getCircuit(domain) {
46
- let circuit = circuits.get(domain);
47
+ /**
48
+ * Get circuit breaker key for a URL.
49
+ * Uses domain + first 2 path segments for endpoint-level granularity.
50
+ */
51
+ function getCircuitKey(url) {
52
+ try {
53
+ const parsed = new URL(url);
54
+ const pathParts = parsed.pathname.split("/").filter(Boolean);
55
+ const pathPrefix = pathParts.slice(0, 2).join("/");
56
+ return pathPrefix ? `${parsed.hostname}/${pathPrefix}` : parsed.hostname;
57
+ }
58
+ catch {
59
+ return getDomain(url);
60
+ }
61
+ }
62
+ function getCircuit(key) {
63
+ let circuit = circuits.get(key);
47
64
  if (!circuit) {
48
65
  circuit = { state: "closed", failures: 0, openedAt: 0, probeSuccesses: 0, lastAccessed: Date.now() };
49
- circuits.set(domain, circuit);
66
+ circuits.set(key, circuit);
50
67
  }
51
68
  circuit.lastAccessed = Date.now();
52
69
  // Check if open circuit should transition to half-open
@@ -56,8 +73,8 @@ function getCircuit(domain) {
56
73
  }
57
74
  return circuit;
58
75
  }
59
- function recordSuccess(domain) {
60
- const circuit = getCircuit(domain);
76
+ function recordSuccess(key) {
77
+ const circuit = getCircuit(key);
61
78
  if (circuit.state === "half-open") {
62
79
  circuit.probeSuccesses++;
63
80
  if (circuit.probeSuccesses >= CIRCUIT_PROBE_SUCCESSES) {
@@ -70,8 +87,8 @@ function recordSuccess(domain) {
70
87
  circuit.failures = 0;
71
88
  }
72
89
  }
73
- function recordFailure(domain) {
74
- const circuit = getCircuit(domain);
90
+ function recordFailure(key) {
91
+ const circuit = getCircuit(key);
75
92
  // Half-open probe failed → immediately reopen circuit
76
93
  if (circuit.state === "half-open") {
77
94
  circuit.state = "open";
@@ -85,14 +102,32 @@ function recordFailure(domain) {
85
102
  circuit.openedAt = Date.now();
86
103
  }
87
104
  }
105
+ /**
106
+ * Check domain-level circuit: opens when 3+ endpoint circuits are open for this domain.
107
+ */
108
+ function isDomainCircuitOpen(domain) {
109
+ let openEndpoints = 0;
110
+ for (const [key, circuit] of circuits) {
111
+ if (key.startsWith(domain) && circuit.state === "open") {
112
+ openEndpoints++;
113
+ }
114
+ }
115
+ return openEndpoints >= 3;
116
+ }
88
117
  // Exported for testing
89
- export { circuits, getCircuit, recordSuccess, recordFailure, CIRCUIT_FAILURE_THRESHOLD, CIRCUIT_OPEN_DURATION_MS, CIRCUIT_PROBE_SUCCESSES, CIRCUIT_STALE_MS };
118
+ export { circuits, getCircuit, getCircuitKey, recordSuccess, recordFailure, isDomainCircuitOpen, CIRCUIT_FAILURE_THRESHOLD, CIRCUIT_OPEN_DURATION_MS, CIRCUIT_PROBE_SUCCESSES, CIRCUIT_STALE_MS };
90
119
  // ── Exponential Backoff with Full Jitter (AWS pattern) ──
91
120
  const BACKOFF_BASE_MS = 1000;
92
121
  const BACKOFF_CAP_MS = 30_000;
93
- function fullJitterBackoff(attempt) {
122
+ const RATE_LIMIT_EXTRA_JITTER_MS = 10_000; // Extra jitter for 429 responses
123
+ function fullJitterBackoff(attempt, is429 = false) {
94
124
  const expDelay = Math.min(BACKOFF_CAP_MS, BACKOFF_BASE_MS * Math.pow(2, attempt));
95
- return Math.random() * expDelay;
125
+ const baseJitter = Math.random() * expDelay;
126
+ // On 429, add extra random jitter to avoid thundering herd
127
+ if (is429) {
128
+ return baseJitter + 5000 + Math.random() * RATE_LIMIT_EXTRA_JITTER_MS;
129
+ }
130
+ return baseJitter;
96
131
  }
97
132
  // ── Per-Domain Rate Limiter ──
98
133
  const DEFAULT_DOMAIN_RATE_MS = parseInt(process.env.DOMAIN_RATE_LIMIT_MS || "500", 10);
@@ -120,6 +155,31 @@ class DomainThrottle {
120
155
  }
121
156
  }
122
157
  const domainThrottle = new DomainThrottle();
158
+ /**
159
+ * Compute adaptive timeout based on knowledge engine data.
160
+ * Uses avg_response_time * 3 with a floor of DEFAULT_TIMEOUT_MS and ceiling of MAX_TIMEOUT_MS.
161
+ */
162
+ function computeAdaptiveTimeout(avgResponseTimeMs) {
163
+ if (!avgResponseTimeMs || avgResponseTimeMs <= 0)
164
+ return DEFAULT_TIMEOUT_MS;
165
+ return Math.min(MAX_TIMEOUT_MS, Math.max(DEFAULT_TIMEOUT_MS, avgResponseTimeMs * 3));
166
+ }
167
+ /**
168
+ * Determine the escalated stealth level for a retry attempt.
169
+ * attempt 0: user's level, attempt 1: level+1, attempt 2: L3
170
+ */
171
+ function getEscalatedLevel(baseLevel, attempt, lastError) {
172
+ // If last failure was a StealthError with detected anti-bot, jump to L3
173
+ if (lastError instanceof StealthError && lastError.antiBotSystem) {
174
+ return 3;
175
+ }
176
+ const base = baseLevel || 1;
177
+ if (attempt === 0)
178
+ return base;
179
+ if (attempt === 1)
180
+ return Math.min(base + 1, 3);
181
+ return 3; // attempt >= 2 → always L3
182
+ }
123
183
  export async function fetchPage(url, options) {
124
184
  const respectRobots = options?.respectRobots ?? (process.env.RESPECT_ROBOTS !== "false");
125
185
  if (respectRobots) {
@@ -128,40 +188,77 @@ export async function fetchPage(url, options) {
128
188
  throw new Error(`URL blocked by robots.txt: ${url}`);
129
189
  }
130
190
  }
131
- // Circuit breaker check
191
+ // Per-endpoint circuit breaker check
132
192
  const domain = getDomain(url);
133
- const circuit = getCircuit(domain);
193
+ const circuitKey = getCircuitKey(url);
194
+ const circuit = getCircuit(circuitKey);
134
195
  if (circuit.state === "open") {
135
- throw new Error(`Circuit breaker open for ${domain} — too many consecutive failures. Retry after cooldown.`);
196
+ throw new Error(`Circuit breaker open for endpoint ${circuitKey} — too many consecutive failures. Retry after cooldown.`);
197
+ }
198
+ // Domain-level circuit check (opens when 3+ endpoints are broken)
199
+ if (isDomainCircuitOpen(domain)) {
200
+ throw new Error(`Circuit breaker open for domain ${domain} — multiple endpoints failing. Retry after cooldown.`);
136
201
  }
137
202
  // Per-domain rate limiting — use knowledge engine's safe_rate_limit if available
138
- const knowledge = await getKnowledgeEngine().get(domain);
203
+ const engine = getKnowledgeEngine();
204
+ const knowledge = await engine.get(domain);
139
205
  const knowledgeDelayMs = knowledge?.safe_rate_limit
140
206
  ? Math.round(60_000 / knowledge.safe_rate_limit)
141
207
  : undefined;
142
208
  await domainThrottle.throttle(domain, knowledgeDelayMs);
209
+ // ── Adaptive timeout from knowledge engine ──
210
+ const adaptiveTimeout = computeAdaptiveTimeout(knowledge?.avg_response_time_ms);
211
+ const timeout = options?.timeout || adaptiveTimeout;
143
212
  const retries = options?.retries ?? 2;
144
213
  let lastError;
214
+ let lastHttpStatus = 0;
145
215
  for (let attempt = 0; attempt <= retries; attempt++) {
146
216
  try {
147
- const result = await smartFetch(url, options);
148
- recordSuccess(domain);
217
+ // ── Smart retry: escalate stealth level on each attempt ──
218
+ const escalatedLevel = getEscalatedLevel(options?.forceLevel, attempt, lastError);
219
+ const attemptOptions = {
220
+ ...options,
221
+ timeout,
222
+ // On retry, escalate stealth level (unless user forced a specific level)
223
+ forceLevel: attempt > 0 && !options?.forceLevel ? escalatedLevel : options?.forceLevel,
224
+ };
225
+ const result = await smartFetch(url, attemptOptions);
226
+ recordSuccess(circuitKey);
227
+ // Feed successful strategy back to knowledge engine
228
+ if (attempt > 0 && result.level > 1) {
229
+ engine.record({
230
+ url, domain,
231
+ levelUsed: result.level,
232
+ success: true,
233
+ responseTimeMs: 0, // Already recorded by smartFetch
234
+ antiBotSystem: result.antiBotSystem || null,
235
+ captchaType: result.captchaSolved ? "detected" : null,
236
+ proxyUsed: !!result.proxyUsed,
237
+ blocked: false,
238
+ httpStatus: result.status,
239
+ });
240
+ }
149
241
  return result;
150
242
  }
151
243
  catch (err) {
152
244
  lastError = err instanceof Error ? err : new Error(String(err));
153
- recordFailure(domain);
154
- // Check if circuit just opened
155
- const updatedCircuit = getCircuit(domain);
245
+ recordFailure(circuitKey);
246
+ // Track HTTP status for backoff decisions
247
+ if (err instanceof StealthError) {
248
+ lastHttpStatus = err.httpStatus;
249
+ }
250
+ // Check if endpoint circuit just opened
251
+ const updatedCircuit = getCircuit(circuitKey);
156
252
  if (updatedCircuit.state === "open") {
157
253
  // Enrich error message with StealthError info if available
158
254
  const detail = err instanceof StealthError
159
255
  ? `L${err.lastLevel} HTTP ${err.httpStatus}${err.antiBotSystem ? ` [${err.antiBotSystem}]` : ""}`
160
256
  : "";
161
- throw new Error(`Circuit breaker opened for ${domain}${detail ? ` (${detail})` : ""}: ${lastError.message}`);
257
+ throw new Error(`Circuit breaker opened for ${circuitKey}${detail ? ` (${detail})` : ""}: ${lastError.message}`);
162
258
  }
163
259
  if (attempt < retries) {
164
- const delay = fullJitterBackoff(attempt);
260
+ const is429 = lastHttpStatus === 429;
261
+ const delay = fullJitterBackoff(attempt, is429);
165
262
  await new Promise((r) => setTimeout(r, delay));
166
263
  }
167
264
  }
@@ -1 +1 @@
1
- {"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../src/utils/fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,YAAY,EAAyC,MAAM,qBAAqB,CAAC;AACtG,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAE3D,4BAA4B;AAE5B,MAAM,OAAO,kBAAkB;IAIT;IAHZ,OAAO,GAAG,CAAC,CAAC;IACZ,KAAK,GAAsB,EAAE,CAAC;IAEtC,YAAoB,gBAAwB,mBAAmB;QAA3C,kBAAa,GAAb,aAAa,CAA8B;IAAG,CAAC;IAEnE,KAAK,CAAC,GAAG,CAAI,EAAoB;QAC/B,OAAO,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YAC1C,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QACjE,CAAC;QACD,IAAI,CAAC,OAAO,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,MAAM,EAAE,EAAE,CAAC;QACpB,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI;gBAAE,IAAI,EAAE,CAAC;QACnB,CAAC;IACH,CAAC;CACF;AAED,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,kBAAkB,EAAE,CAAC;AAcvD,MAAM,yBAAyB,GAAG,CAAC,CAAC;AACpC,MAAM,wBAAwB,GAAG,MAAM,CAAC;AACxC,MAAM,uBAAuB,GAAG,CAAC,CAAC;AAElC,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B,CAAC;AAEnD,4DAA4D;AAC5D,MAAM,gBAAgB,GAAG,SAAS,CAAC;AACnC,WAAW,CAAC,GAAG,EAAE;IACf,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,KAAK,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzC,IAAI,GAAG,GAAG,OAAO,CAAC,YAAY,GAAG,gBAAgB,EAAE,CAAC;YAClD,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC,EAAE,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC;AAEpB,SAAS,UAAU,CAAC,MAAc;IAChC,IAAI,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACnC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,YAAY,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;QACrG,QAAQ,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAElC,uDAAuD;IACvD,IAAI,OAAO,CAAC,KAAK,KAAK,MAAM,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC,QAAQ,IAAI,wBAAwB,EAAE,CAAC;QAC1F,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC;QAC5B,OAAO,CAAC,cAAc,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CAAC,MAAc;IACnC,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,IAAI,OAAO,CAAC,KAAK,KAAK,WAAW,EAAE,CAAC;QAClC,OAAO,CAAC,cAAc,EAAE,CAAC;QACzB,IAAI,OAAO,CAAC,cAAc,IAAI,uBAAuB,EAAE,CAAC;YACtD,kBAAkB;YAClB,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC;YACzB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,MAAc;IACnC,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,sDAAsD;IACtD,IAAI,OAAO,CAAC,KAAK,KAAK,WAAW,EAAE,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC9B,OAAO,CAAC,QAAQ,GAAG,yBAAyB,CAAC;QAC7C,OAAO;IACT,CAAC;IACD,OAAO,CAAC,QAAQ,EAAE,CAAC;IACnB,IAAI,OAAO,CAAC,QAAQ,IAAI,yBAAyB,EAAE,CAAC;QAClD,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAChC,CAAC;AACH,CAAC;AAED,uBAAuB;AACvB,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,aAAa,EAAE,yBAAyB,EAAE,wBAAwB,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,CAAC;AAE9J,2DAA2D;AAE3D,MAAM,eAAe,GAAG,IAAI,CAAC;AAC7B,MAAM,cAAc,GAAG,MAAM,CAAC;AAE9B,SAAS,iBAAiB,CAAC,OAAe;IACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,EAAE,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;IAClF,OAAO,IAAI,CAAC,MAAM,EAAE,GAAG,QAAQ,CAAC;AAClC,CAAC;AAED,gCAAgC;AAEhC,MAAM,sBAAsB,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,KAAK,EAAE,EAAE,CAAC,CAAC;AAEvF,MAAM,cAAc;IACV,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IACxC,YAAY,CAAS;IAE7B,YAAY,iBAAyB,sBAAsB;QACzD,IAAI,CAAC,YAAY,GAAG,cAAc,CAAC;IACrC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CAAC,MAAc,EAAE,gBAAyB;QACtD,MAAM,KAAK,GAAG,gBAAgB,IAAI,IAAI,CAAC,YAAY,CAAC;QACpD,IAAI,KAAK,IAAI,CAAC;YAAE,OAAO;QAEvB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAC/C,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,CAAC;QAE3B,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC;QAC3D,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,cAAc,GAAG,IAAI,cAAc,EAAE,CAAC;AAS5C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,OAA2B;IACtE,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,KAAK,OAAO,CAAC,CAAC;IAEzF,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;IAED,wBAAwB;IACxB,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;IAC9B,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;IACnC,IAAI,OAAO,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,yDAAyD,CAAC,CAAC;IAC/G,CAAC;IAED,iFAAiF;IACjF,MAAM,SAAS,GAAG,MAAM,kBAAkB,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACzD,MAAM,gBAAgB,GAAG,SAAS,EAAE,eAAe;QACjD,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,SAAS,CAAC,eAAe,CAAC;QAChD,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,cAAc,CAAC,QAAQ,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;IAExD,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,CAAC,CAAC;IACtC,IAAI,SAA4B,CAAC;IAEjC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;QACpD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAC9C,aAAa,CAAC,MAAM,CAAC,CAAC;YACtB,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;YAChE,aAAa,CAAC,MAAM,CAAC,CAAC;YAEtB,+BAA+B;YAC/B,MAAM,cAAc,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;YAC1C,IAAI,cAAc,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;gBACpC,2DAA2D;gBAC3D,MAAM,MAAM,GAAG,GAAG,YAAY,YAAY;oBACxC,CAAC,CAAC,IAAI,GAAG,CAAC,SAAS,SAAS,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;oBACjG,CAAC,CAAC,EAAE,CAAC;gBACP,MAAM,IAAI,KAAK,CAAC,8BAA8B,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,KAAK,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC;YAC/G,CAAC;YAED,IAAI,OAAO,GAAG,OAAO,EAAE,CAAC;gBACtB,MAAM,KAAK,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;gBACzC,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,SAAU,CAAC;AACnB,CAAC"}
1
+ {"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../src/utils/fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,YAAY,EAA4D,MAAM,qBAAqB,CAAC;AACzH,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAC1F,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAE3D,4BAA4B;AAE5B,MAAM,OAAO,kBAAkB;IAIT;IAHZ,OAAO,GAAG,CAAC,CAAC;IACZ,KAAK,GAAsB,EAAE,CAAC;IAEtC,YAAoB,gBAAwB,mBAAmB;QAA3C,kBAAa,GAAb,aAAa,CAA8B;IAAG,CAAC;IAEnE,KAAK,CAAC,GAAG,CAAI,EAAoB;QAC/B,OAAO,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YAC1C,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QACjE,CAAC;QACD,IAAI,CAAC,OAAO,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,MAAM,EAAE,EAAE,CAAC;QACpB,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI;gBAAE,IAAI,EAAE,CAAC;QACnB,CAAC;IACH,CAAC;CACF;AAED,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,kBAAkB,EAAE,CAAC;AAcvD,MAAM,yBAAyB,GAAG,CAAC,CAAC;AACpC,MAAM,wBAAwB,GAAG,MAAM,CAAC;AACxC,MAAM,uBAAuB,GAAG,CAAC,CAAC;AAClC,mFAAmF;AACnF,MAAM,gCAAgC,GAAG,EAAE,CAAC;AAE5C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B,CAAC;AAEnD,4DAA4D;AAC5D,MAAM,gBAAgB,GAAG,SAAS,CAAC;AACnC,WAAW,CAAC,GAAG,EAAE;IACf,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,KAAK,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,IAAI,QAAQ,EAAE,CAAC;QACtC,IAAI,GAAG,GAAG,OAAO,CAAC,YAAY,GAAG,gBAAgB,EAAE,CAAC;YAClD,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;AACH,CAAC,EAAE,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC;AAEpB;;;GAGG;AACH,SAAS,aAAa,CAAC,GAAW;IAChC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC7D,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnD,OAAO,UAAU,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC;IAC3E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,GAAW;IAC7B,IAAI,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,YAAY,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;QACrG,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAElC,uDAAuD;IACvD,IAAI,OAAO,CAAC,KAAK,KAAK,MAAM,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC,QAAQ,IAAI,wBAAwB,EAAE,CAAC;QAC1F,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC;QAC5B,OAAO,CAAC,cAAc,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CAAC,GAAW;IAChC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,OAAO,CAAC,KAAK,KAAK,WAAW,EAAE,CAAC;QAClC,OAAO,CAAC,cAAc,EAAE,CAAC;QACzB,IAAI,OAAO,CAAC,cAAc,IAAI,uBAAuB,EAAE,CAAC;YACtD,kBAAkB;YAClB,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC;YACzB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,GAAW;IAChC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;IAChC,sDAAsD;IACtD,IAAI,OAAO,CAAC,KAAK,KAAK,WAAW,EAAE,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC9B,OAAO,CAAC,QAAQ,GAAG,yBAAyB,CAAC;QAC7C,OAAO;IACT,CAAC;IACD,OAAO,CAAC,QAAQ,EAAE,CAAC;IACnB,IAAI,OAAO,CAAC,QAAQ,IAAI,yBAAyB,EAAE,CAAC;QAClD,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAChC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,MAAc;IACzC,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,KAAK,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,IAAI,QAAQ,EAAE,CAAC;QACtC,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;YACvD,aAAa,EAAE,CAAC;QAClB,CAAC;IACH,CAAC;IACD,OAAO,aAAa,IAAI,CAAC,CAAC;AAC5B,CAAC;AAED,uBAAuB;AACvB,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,aAAa,EAAE,aAAa,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,wBAAwB,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,CAAC;AAElM,2DAA2D;AAE3D,MAAM,eAAe,GAAG,IAAI,CAAC;AAC7B,MAAM,cAAc,GAAG,MAAM,CAAC;AAC9B,MAAM,0BAA0B,GAAG,MAAM,CAAC,CAAC,iCAAiC;AAE5E,SAAS,iBAAiB,CAAC,OAAe,EAAE,KAAK,GAAG,KAAK;IACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,EAAE,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;IAClF,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,QAAQ,CAAC;IAC5C,2DAA2D;IAC3D,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,UAAU,GAAG,IAAI,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,0BAA0B,CAAC;IACxE,CAAC;IACD,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,gCAAgC;AAEhC,MAAM,sBAAsB,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,KAAK,EAAE,EAAE,CAAC,CAAC;AAEvF,MAAM,cAAc;IACV,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IACxC,YAAY,CAAS;IAE7B,YAAY,iBAAyB,sBAAsB;QACzD,IAAI,CAAC,YAAY,GAAG,cAAc,CAAC;IACrC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CAAC,MAAc,EAAE,gBAAyB;QACtD,MAAM,KAAK,GAAG,gBAAgB,IAAI,IAAI,CAAC,YAAY,CAAC;QACpD,IAAI,KAAK,IAAI,CAAC;YAAE,OAAO;QAEvB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAC/C,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,CAAC;QAE3B,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;YACpB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC;QAC3D,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,cAAc,GAAG,IAAI,cAAc,EAAE,CAAC;AAS5C;;;GAGG;AACH,SAAS,sBAAsB,CAAC,iBAAqC;IACnE,IAAI,CAAC,iBAAiB,IAAI,iBAAiB,IAAI,CAAC;QAAE,OAAO,kBAAkB,CAAC;IAC5E,OAAO,IAAI,CAAC,GAAG,CAAC,cAAc,EAAE,IAAI,CAAC,GAAG,CAAC,kBAAkB,EAAE,iBAAiB,GAAG,CAAC,CAAC,CAAC,CAAC;AACvF,CAAC;AAED;;;GAGG;AACH,SAAS,iBAAiB,CACxB,SAAmC,EACnC,OAAe,EACf,SAA4B;IAE5B,wEAAwE;IACxE,IAAI,SAAS,YAAY,YAAY,IAAI,SAAS,CAAC,aAAa,EAAE,CAAC;QACjE,OAAO,CAAC,CAAC;IACX,CAAC;IAED,MAAM,IAAI,GAAG,SAAS,IAAI,CAAC,CAAC;IAC5B,IAAI,OAAO,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC/B,IAAI,OAAO,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC,CAAiB,CAAC;IAChE,OAAO,CAAC,CAAC,CAAC,2BAA2B;AACvC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,OAA2B;IACtE,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,KAAK,OAAO,CAAC,CAAC;IAEzF,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;IAC9B,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,UAAU,CAAC,UAAU,CAAC,CAAC;IACvC,IAAI,OAAO,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,qCAAqC,UAAU,yDAAyD,CAAC,CAAC;IAC5H,CAAC;IAED,kEAAkE;IAClE,IAAI,mBAAmB,CAAC,MAAM,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,KAAK,CAAC,mCAAmC,MAAM,sDAAsD,CAAC,CAAC;IACnH,CAAC;IAED,iFAAiF;IACjF,MAAM,MAAM,GAAG,kBAAkB,EAAE,CAAC;IACpC,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAC3C,MAAM,gBAAgB,GAAG,SAAS,EAAE,eAAe;QACjD,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,SAAS,CAAC,eAAe,CAAC;QAChD,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,cAAc,CAAC,QAAQ,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;IAExD,+CAA+C;IAC/C,MAAM,eAAe,GAAG,sBAAsB,CAAC,SAAS,EAAE,oBAAoB,CAAC,CAAC;IAChF,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,eAAe,CAAC;IAEpD,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,CAAC,CAAC;IACtC,IAAI,SAA4B,CAAC;IACjC,IAAI,cAAc,GAAG,CAAC,CAAC;IAEvB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;QACpD,IAAI,CAAC;YACH,4DAA4D;YAC5D,MAAM,cAAc,GAAG,iBAAiB,CAAC,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;YAClF,MAAM,cAAc,GAAmB;gBACrC,GAAG,OAAO;gBACV,OAAO;gBACP,yEAAyE;gBACzE,UAAU,EAAE,OAAO,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,OAAO,EAAE,UAAU;aACvF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;YACrD,aAAa,CAAC,UAAU,CAAC,CAAC;YAE1B,oDAAoD;YACpD,IAAI,OAAO,GAAG,CAAC,IAAI,MAAM,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;gBACpC,MAAM,CAAC,MAAM,CAAC;oBACZ,GAAG,EAAE,MAAM;oBACX,SAAS,EAAE,MAAM,CAAC,KAAK;oBACvB,OAAO,EAAE,IAAI;oBACb,cAAc,EAAE,CAAC,EAAE,iCAAiC;oBACpD,aAAa,EAAE,MAAM,CAAC,aAAa,IAAI,IAAI;oBAC3C,WAAW,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI;oBACrD,SAAS,EAAE,CAAC,CAAC,MAAM,CAAC,SAAS;oBAC7B,OAAO,EAAE,KAAK;oBACd,UAAU,EAAE,MAAM,CAAC,MAAM;iBAC1B,CAAC,CAAC;YACL,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;YAChE,aAAa,CAAC,UAAU,CAAC,CAAC;YAE1B,0CAA0C;YAC1C,IAAI,GAAG,YAAY,YAAY,EAAE,CAAC;gBAChC,cAAc,GAAG,GAAG,CAAC,UAAU,CAAC;YAClC,CAAC;YAED,wCAAwC;YACxC,MAAM,cAAc,GAAG,UAAU,CAAC,UAAU,CAAC,CAAC;YAC9C,IAAI,cAAc,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;gBACpC,2DAA2D;gBAC3D,MAAM,MAAM,GAAG,GAAG,YAAY,YAAY;oBACxC,CAAC,CAAC,IAAI,GAAG,CAAC,SAAS,SAAS,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;oBACjG,CAAC,CAAC,EAAE,CAAC;gBACP,MAAM,IAAI,KAAK,CAAC,8BAA8B,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,KAAK,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC;YACnH,CAAC;YAED,IAAI,OAAO,GAAG,OAAO,EAAE,CAAC;gBACtB,MAAM,KAAK,GAAG,cAAc,KAAK,GAAG,CAAC;gBACrC,MAAM,KAAK,GAAG,iBAAiB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;gBAChD,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,SAAU,CAAC;AACnB,CAAC"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "imperium-crawl",
3
- "version": "2.4.0",
4
- "description": "Open-source CLI tool for web scraping, crawling, search, and custom skills",
3
+ "version": "2.5.1",
4
+ "description": "39-tool open-source CLI for web scraping, PDF extraction, content monitoring, reusable browser flows, RSS aggregation, and custom skills. Zero API keys for core tools.",
5
5
  "type": "module",
6
6
  "bin": {
7
7
  "imperium-crawl": "dist/index.js",
@@ -26,7 +26,9 @@
26
26
  "start": "node dist/index.js",
27
27
  "test": "vitest run",
28
28
  "test:watch": "vitest",
29
- "prepublishOnly": "npm run build"
29
+ "prepublishOnly": "npm run build",
30
+ "autoresearch": "tsx autoresearch/eval.ts",
31
+ "autoresearch:baseline": "tsx autoresearch/eval.ts --baseline --verbose"
30
32
  },
31
33
  "keywords": [
32
34
  "scraping",
@@ -34,7 +36,15 @@
34
36
  "web-search",
35
37
  "brave-search",
36
38
  "firecrawl",
37
- "cli"
39
+ "cli",
40
+ "pdf-extract",
41
+ "web-monitoring",
42
+ "url-watch",
43
+ "content-diff",
44
+ "intelligence-digest",
45
+ "browser-workflows",
46
+ "workflow-recorder",
47
+ "flow-api"
38
48
  ],
39
49
  "author": "ImperiumTech",
40
50
  "license": "MIT",
@@ -55,6 +65,7 @@
55
65
  "normalize-url": "^8.1.1",
56
66
  "ora": "^8.2.0",
57
67
  "p-queue": "^8.1.1",
68
+ "pdfjs-dist": "^4.0.379",
58
69
  "playwright": "1.52",
59
70
  "robots-parser": "^3.0.1",
60
71
  "rss-parser": "^3.13.0",