illuma-agents 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/LICENSE +1 -5
  2. package/dist/cjs/common/enum.cjs +1 -2
  3. package/dist/cjs/common/enum.cjs.map +1 -1
  4. package/dist/cjs/events.cjs +11 -0
  5. package/dist/cjs/events.cjs.map +1 -1
  6. package/dist/cjs/graphs/Graph.cjs +2 -1
  7. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  8. package/dist/cjs/instrumentation.cjs +3 -1
  9. package/dist/cjs/instrumentation.cjs.map +1 -1
  10. package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
  11. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +79 -2
  12. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/anthropic/utils/tools.cjs.map +1 -1
  14. package/dist/cjs/llm/bedrock/index.cjs +99 -0
  15. package/dist/cjs/llm/bedrock/index.cjs.map +1 -0
  16. package/dist/cjs/llm/fake.cjs.map +1 -1
  17. package/dist/cjs/llm/openai/index.cjs +102 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/openai/utils/index.cjs +87 -1
  20. package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
  21. package/dist/cjs/llm/openrouter/index.cjs +175 -1
  22. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  23. package/dist/cjs/llm/providers.cjs +13 -16
  24. package/dist/cjs/llm/providers.cjs.map +1 -1
  25. package/dist/cjs/llm/text.cjs.map +1 -1
  26. package/dist/cjs/messages/core.cjs +14 -14
  27. package/dist/cjs/messages/core.cjs.map +1 -1
  28. package/dist/cjs/messages/ids.cjs.map +1 -1
  29. package/dist/cjs/messages/prune.cjs.map +1 -1
  30. package/dist/cjs/run.cjs +18 -1
  31. package/dist/cjs/run.cjs.map +1 -1
  32. package/dist/cjs/splitStream.cjs.map +1 -1
  33. package/dist/cjs/stream.cjs +24 -1
  34. package/dist/cjs/stream.cjs.map +1 -1
  35. package/dist/cjs/tools/ToolNode.cjs +20 -1
  36. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  37. package/dist/cjs/tools/handlers.cjs +29 -25
  38. package/dist/cjs/tools/handlers.cjs.map +1 -1
  39. package/dist/cjs/tools/search/anthropic.cjs.map +1 -1
  40. package/dist/cjs/tools/search/content.cjs.map +1 -1
  41. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
  42. package/dist/cjs/tools/search/format.cjs.map +1 -1
  43. package/dist/cjs/tools/search/highlights.cjs.map +1 -1
  44. package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
  45. package/dist/cjs/tools/search/schema.cjs +27 -25
  46. package/dist/cjs/tools/search/schema.cjs.map +1 -1
  47. package/dist/cjs/tools/search/search.cjs +6 -1
  48. package/dist/cjs/tools/search/search.cjs.map +1 -1
  49. package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -1
  50. package/dist/cjs/tools/search/tool.cjs +182 -35
  51. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  52. package/dist/cjs/tools/search/utils.cjs.map +1 -1
  53. package/dist/cjs/utils/graph.cjs.map +1 -1
  54. package/dist/cjs/utils/llm.cjs +0 -1
  55. package/dist/cjs/utils/llm.cjs.map +1 -1
  56. package/dist/cjs/utils/misc.cjs.map +1 -1
  57. package/dist/cjs/utils/run.cjs.map +1 -1
  58. package/dist/cjs/utils/title.cjs +7 -7
  59. package/dist/cjs/utils/title.cjs.map +1 -1
  60. package/dist/esm/common/enum.mjs +1 -2
  61. package/dist/esm/common/enum.mjs.map +1 -1
  62. package/dist/esm/events.mjs +11 -0
  63. package/dist/esm/events.mjs.map +1 -1
  64. package/dist/esm/graphs/Graph.mjs +2 -1
  65. package/dist/esm/graphs/Graph.mjs.map +1 -1
  66. package/dist/esm/instrumentation.mjs +3 -1
  67. package/dist/esm/instrumentation.mjs.map +1 -1
  68. package/dist/esm/llm/anthropic/types.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +79 -2
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/anthropic/utils/tools.mjs.map +1 -1
  72. package/dist/esm/llm/bedrock/index.mjs +97 -0
  73. package/dist/esm/llm/bedrock/index.mjs.map +1 -0
  74. package/dist/esm/llm/fake.mjs.map +1 -1
  75. package/dist/esm/llm/openai/index.mjs +103 -1
  76. package/dist/esm/llm/openai/index.mjs.map +1 -1
  77. package/dist/esm/llm/openai/utils/index.mjs +88 -2
  78. package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
  79. package/dist/esm/llm/openrouter/index.mjs +175 -1
  80. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  81. package/dist/esm/llm/providers.mjs +2 -5
  82. package/dist/esm/llm/providers.mjs.map +1 -1
  83. package/dist/esm/llm/text.mjs.map +1 -1
  84. package/dist/esm/messages/core.mjs +14 -14
  85. package/dist/esm/messages/core.mjs.map +1 -1
  86. package/dist/esm/messages/ids.mjs.map +1 -1
  87. package/dist/esm/messages/prune.mjs.map +1 -1
  88. package/dist/esm/run.mjs +18 -1
  89. package/dist/esm/run.mjs.map +1 -1
  90. package/dist/esm/splitStream.mjs.map +1 -1
  91. package/dist/esm/stream.mjs +24 -1
  92. package/dist/esm/stream.mjs.map +1 -1
  93. package/dist/esm/tools/ToolNode.mjs +20 -1
  94. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  95. package/dist/esm/tools/handlers.mjs +30 -26
  96. package/dist/esm/tools/handlers.mjs.map +1 -1
  97. package/dist/esm/tools/search/anthropic.mjs.map +1 -1
  98. package/dist/esm/tools/search/content.mjs.map +1 -1
  99. package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
  100. package/dist/esm/tools/search/format.mjs.map +1 -1
  101. package/dist/esm/tools/search/highlights.mjs.map +1 -1
  102. package/dist/esm/tools/search/rerankers.mjs.map +1 -1
  103. package/dist/esm/tools/search/schema.mjs +27 -25
  104. package/dist/esm/tools/search/schema.mjs.map +1 -1
  105. package/dist/esm/tools/search/search.mjs +6 -1
  106. package/dist/esm/tools/search/search.mjs.map +1 -1
  107. package/dist/esm/tools/search/serper-scraper.mjs.map +1 -1
  108. package/dist/esm/tools/search/tool.mjs +182 -35
  109. package/dist/esm/tools/search/tool.mjs.map +1 -1
  110. package/dist/esm/tools/search/utils.mjs.map +1 -1
  111. package/dist/esm/utils/graph.mjs.map +1 -1
  112. package/dist/esm/utils/llm.mjs +0 -1
  113. package/dist/esm/utils/llm.mjs.map +1 -1
  114. package/dist/esm/utils/misc.mjs.map +1 -1
  115. package/dist/esm/utils/run.mjs.map +1 -1
  116. package/dist/esm/utils/title.mjs +7 -7
  117. package/dist/esm/utils/title.mjs.map +1 -1
  118. package/dist/types/common/enum.d.ts +1 -2
  119. package/dist/types/llm/bedrock/index.d.ts +36 -0
  120. package/dist/types/llm/openai/index.d.ts +1 -0
  121. package/dist/types/llm/openai/utils/index.d.ts +10 -1
  122. package/dist/types/llm/openrouter/index.d.ts +4 -1
  123. package/dist/types/tools/search/types.d.ts +2 -0
  124. package/dist/types/types/llm.d.ts +3 -8
  125. package/package.json +16 -12
  126. package/src/common/enum.ts +1 -2
  127. package/src/common/index.ts +1 -1
  128. package/src/events.ts +11 -0
  129. package/src/graphs/Graph.ts +2 -1
  130. package/src/instrumentation.ts +25 -22
  131. package/src/llm/anthropic/llm.spec.ts +1442 -1442
  132. package/src/llm/anthropic/types.ts +140 -140
  133. package/src/llm/anthropic/utils/message_inputs.ts +757 -660
  134. package/src/llm/anthropic/utils/output_parsers.ts +133 -133
  135. package/src/llm/anthropic/utils/tools.ts +29 -29
  136. package/src/llm/bedrock/index.ts +128 -0
  137. package/src/llm/fake.ts +133 -133
  138. package/src/llm/google/llm.spec.ts +3 -1
  139. package/src/llm/google/utils/tools.ts +160 -160
  140. package/src/llm/openai/index.ts +126 -0
  141. package/src/llm/openai/types.ts +24 -24
  142. package/src/llm/openai/utils/index.ts +116 -1
  143. package/src/llm/openai/utils/isReasoningModel.test.ts +90 -90
  144. package/src/llm/openrouter/index.ts +222 -1
  145. package/src/llm/providers.ts +2 -7
  146. package/src/llm/text.ts +94 -94
  147. package/src/messages/core.ts +463 -463
  148. package/src/messages/formatAgentMessages.tools.test.ts +400 -400
  149. package/src/messages/formatMessage.test.ts +693 -693
  150. package/src/messages/ids.ts +26 -26
  151. package/src/messages/prune.ts +567 -567
  152. package/src/messages/shiftIndexTokenCountMap.test.ts +81 -81
  153. package/src/mockStream.ts +98 -98
  154. package/src/prompts/collab.ts +5 -5
  155. package/src/prompts/index.ts +1 -1
  156. package/src/prompts/taskmanager.ts +61 -61
  157. package/src/run.ts +22 -4
  158. package/src/scripts/ant_web_search_edge_case.ts +162 -0
  159. package/src/scripts/ant_web_search_error_edge_case.ts +148 -0
  160. package/src/scripts/args.ts +48 -48
  161. package/src/scripts/caching.ts +123 -123
  162. package/src/scripts/code_exec_files.ts +193 -193
  163. package/src/scripts/empty_input.ts +137 -137
  164. package/src/scripts/memory.ts +97 -97
  165. package/src/scripts/test-tools-before-handoff.ts +1 -5
  166. package/src/scripts/thinking.ts +149 -149
  167. package/src/scripts/tools.ts +1 -4
  168. package/src/specs/anthropic.simple.test.ts +67 -0
  169. package/src/specs/spec.utils.ts +3 -3
  170. package/src/specs/token-distribution-edge-case.test.ts +316 -316
  171. package/src/specs/tool-error.test.ts +193 -193
  172. package/src/splitStream.test.ts +691 -691
  173. package/src/splitStream.ts +234 -234
  174. package/src/stream.test.ts +94 -94
  175. package/src/stream.ts +30 -1
  176. package/src/tools/ToolNode.ts +24 -1
  177. package/src/tools/handlers.ts +32 -28
  178. package/src/tools/search/anthropic.ts +51 -51
  179. package/src/tools/search/content.test.ts +173 -173
  180. package/src/tools/search/content.ts +147 -147
  181. package/src/tools/search/direct-url.test.ts +530 -0
  182. package/src/tools/search/firecrawl.ts +210 -210
  183. package/src/tools/search/format.ts +250 -250
  184. package/src/tools/search/highlights.ts +320 -320
  185. package/src/tools/search/index.ts +2 -2
  186. package/src/tools/search/jina-reranker.test.ts +126 -126
  187. package/src/tools/search/output.md +2775 -2775
  188. package/src/tools/search/rerankers.ts +242 -242
  189. package/src/tools/search/schema.ts +65 -63
  190. package/src/tools/search/search.ts +766 -759
  191. package/src/tools/search/serper-scraper.ts +155 -155
  192. package/src/tools/search/test.html +883 -883
  193. package/src/tools/search/test.md +642 -642
  194. package/src/tools/search/test.ts +159 -159
  195. package/src/tools/search/tool.ts +641 -471
  196. package/src/tools/search/types.ts +689 -687
  197. package/src/tools/search/utils.ts +79 -79
  198. package/src/types/index.ts +6 -6
  199. package/src/types/llm.ts +2 -8
  200. package/src/utils/graph.ts +10 -10
  201. package/src/utils/llm.ts +26 -27
  202. package/src/utils/llmConfig.ts +13 -5
  203. package/src/utils/logging.ts +48 -48
  204. package/src/utils/misc.ts +57 -57
  205. package/src/utils/run.ts +100 -100
  206. package/src/utils/title.ts +165 -165
  207. package/dist/cjs/llm/ollama/index.cjs +0 -70
  208. package/dist/cjs/llm/ollama/index.cjs.map +0 -1
  209. package/dist/cjs/llm/ollama/utils.cjs +0 -158
  210. package/dist/cjs/llm/ollama/utils.cjs.map +0 -1
  211. package/dist/esm/llm/ollama/index.mjs +0 -68
  212. package/dist/esm/llm/ollama/index.mjs.map +0 -1
  213. package/dist/esm/llm/ollama/utils.mjs +0 -155
  214. package/dist/esm/llm/ollama/utils.mjs.map +0 -1
  215. package/dist/types/llm/ollama/index.d.ts +0 -8
  216. package/dist/types/llm/ollama/utils.d.ts +0 -7
  217. package/src/llm/ollama/index.ts +0 -92
  218. package/src/llm/ollama/utils.ts +0 -193
  219. package/src/proto/CollabGraph.ts +0 -269
  220. package/src/proto/TaskManager.ts +0 -243
  221. package/src/proto/collab.ts +0 -200
  222. package/src/proto/collab_design.ts +0 -184
  223. package/src/proto/collab_design_v2.ts +0 -224
  224. package/src/proto/collab_design_v3.ts +0 -255
  225. package/src/proto/collab_design_v4.ts +0 -220
  226. package/src/proto/collab_design_v5.ts +0 -251
  227. package/src/proto/collab_graph.ts +0 -181
  228. package/src/proto/collab_original.ts +0 -123
  229. package/src/proto/example.ts +0 -93
  230. package/src/proto/example_new.ts +0 -68
  231. package/src/proto/example_old.ts +0 -201
  232. package/src/proto/example_test.ts +0 -152
  233. package/src/proto/example_test_anthropic.ts +0 -100
  234. package/src/proto/log_stream.ts +0 -202
  235. package/src/proto/main_collab_community_event.ts +0 -133
  236. package/src/proto/main_collab_design_v2.ts +0 -96
  237. package/src/proto/main_collab_design_v4.ts +0 -100
  238. package/src/proto/main_collab_design_v5.ts +0 -135
  239. package/src/proto/main_collab_global_analysis.ts +0 -122
  240. package/src/proto/main_collab_hackathon_event.ts +0 -153
  241. package/src/proto/main_collab_space_mission.ts +0 -153
  242. package/src/proto/main_philosophy.ts +0 -210
  243. package/src/proto/original_script.ts +0 -126
  244. package/src/proto/standard.ts +0 -100
  245. package/src/proto/stream.ts +0 -56
  246. package/src/proto/tasks.ts +0 -118
  247. package/src/proto/tools/global_analysis_tools.ts +0 -86
  248. package/src/proto/tools/space_mission_tools.ts +0 -60
  249. package/src/proto/vertexai.ts +0 -54
  250. package/src/scripts/image.ts +0 -178
@@ -1,210 +1,210 @@
1
- import axios from 'axios';
2
- import { processContent } from './content';
3
- import type * as t from './types';
4
- import { createDefaultLogger } from './utils';
5
-
6
- /**
7
- * Firecrawl scraper implementation
8
- * Uses the Firecrawl API to scrape web pages
9
- */
10
- export class FirecrawlScraper implements t.BaseScraper {
11
- private apiKey: string;
12
- private apiUrl: string;
13
- private version: string;
14
- private defaultFormats: string[];
15
- private timeout: number;
16
- private logger: t.Logger;
17
- private includeTags?: string[];
18
- private excludeTags?: string[];
19
- private waitFor?: number;
20
- private maxAge?: number;
21
- private mobile?: boolean;
22
- private skipTlsVerification?: boolean;
23
- private blockAds?: boolean;
24
- private removeBase64Images?: boolean;
25
- private parsePDF?: boolean;
26
- private storeInCache?: boolean;
27
- private zeroDataRetention?: boolean;
28
- private headers?: Record<string, string>;
29
- private location?: { country?: string; languages?: string[] };
30
- private onlyMainContent?: boolean;
31
- private changeTrackingOptions?: object;
32
-
33
- constructor(config: t.FirecrawlScraperConfig = {}) {
34
- this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
35
-
36
- this.version = config.version ?? 'v2';
37
-
38
- const baseUrl =
39
- config.apiUrl ??
40
- process.env.FIRECRAWL_BASE_URL ??
41
- 'https://api.firecrawl.dev';
42
- this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/${this.version}/scrape`;
43
-
44
- this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
45
- this.timeout = config.timeout ?? 7500;
46
-
47
- this.logger = config.logger || createDefaultLogger();
48
-
49
- this.includeTags = config.includeTags;
50
- this.excludeTags = config.excludeTags;
51
- this.waitFor = config.waitFor;
52
- this.maxAge = config.maxAge;
53
- this.mobile = config.mobile;
54
- this.skipTlsVerification = config.skipTlsVerification;
55
- this.blockAds = config.blockAds;
56
- this.removeBase64Images = config.removeBase64Images;
57
- this.parsePDF = config.parsePDF;
58
- this.storeInCache = config.storeInCache;
59
- this.zeroDataRetention = config.zeroDataRetention;
60
- this.headers = config.headers;
61
- this.location = config.location;
62
- this.onlyMainContent = config.onlyMainContent;
63
- this.changeTrackingOptions = config.changeTrackingOptions;
64
-
65
- if (!this.apiKey) {
66
- this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
67
- }
68
-
69
- this.logger.debug(
70
- `Firecrawl scraper initialized with API URL: ${this.apiUrl}`
71
- );
72
- }
73
-
74
- /**
75
- * Scrape a single URL
76
- * @param url URL to scrape
77
- * @param options Scrape options
78
- * @returns Scrape response
79
- */
80
- async scrapeUrl(
81
- url: string,
82
- options: t.FirecrawlScrapeOptions = {}
83
- ): Promise<[string, t.FirecrawlScrapeResponse]> {
84
- if (!this.apiKey) {
85
- return [
86
- url,
87
- {
88
- success: false,
89
- error: 'FIRECRAWL_API_KEY is not set',
90
- },
91
- ];
92
- }
93
-
94
- try {
95
- const payload = omitUndefined({
96
- url,
97
- formats: options.formats ?? this.defaultFormats,
98
- includeTags: options.includeTags ?? this.includeTags,
99
- excludeTags: options.excludeTags ?? this.excludeTags,
100
- headers: options.headers ?? this.headers,
101
- waitFor: options.waitFor ?? this.waitFor,
102
- timeout: options.timeout ?? this.timeout,
103
- onlyMainContent: options.onlyMainContent ?? this.onlyMainContent,
104
- maxAge: options.maxAge ?? this.maxAge,
105
- mobile: options.mobile ?? this.mobile,
106
- skipTlsVerification:
107
- options.skipTlsVerification ?? this.skipTlsVerification,
108
- parsePDF: options.parsePDF ?? this.parsePDF,
109
- location: options.location ?? this.location,
110
- removeBase64Images:
111
- options.removeBase64Images ?? this.removeBase64Images,
112
- blockAds: options.blockAds ?? this.blockAds,
113
- storeInCache: options.storeInCache ?? this.storeInCache,
114
- zeroDataRetention: options.zeroDataRetention ?? this.zeroDataRetention,
115
- changeTrackingOptions:
116
- options.changeTrackingOptions ?? this.changeTrackingOptions,
117
- });
118
- const response = await axios.post(this.apiUrl, payload, {
119
- headers: {
120
- 'Content-Type': 'application/json',
121
- Authorization: `Bearer ${this.apiKey}`,
122
- },
123
- timeout: this.timeout,
124
- });
125
-
126
- return [url, response.data];
127
- } catch (error) {
128
- const errorMessage =
129
- error instanceof Error ? error.message : String(error);
130
- return [
131
- url,
132
- {
133
- success: false,
134
- error: `Firecrawl API request failed: ${errorMessage}`,
135
- },
136
- ];
137
- }
138
- }
139
-
140
- /**
141
- * Extract content from scrape response
142
- * @param response Scrape response
143
- * @returns Extracted content or empty string if not available
144
- */
145
- extractContent(
146
- response: t.FirecrawlScrapeResponse
147
- ): [string, undefined | t.References] {
148
- if (!response.success || !response.data) {
149
- return ['', undefined];
150
- }
151
-
152
- if (response.data.markdown != null && response.data.html != null) {
153
- try {
154
- const { markdown, ...rest } = processContent(
155
- response.data.html,
156
- response.data.markdown
157
- );
158
- return [markdown, rest];
159
- } catch (error) {
160
- this.logger.error('Error processing content:', error);
161
- return [response.data.markdown, undefined];
162
- }
163
- } else if (response.data.markdown != null) {
164
- return [response.data.markdown, undefined];
165
- }
166
-
167
- // Fall back to HTML content
168
- if (response.data.html != null) {
169
- return [response.data.html, undefined];
170
- }
171
-
172
- // Fall back to raw HTML content
173
- if (response.data.rawHtml != null) {
174
- return [response.data.rawHtml, undefined];
175
- }
176
-
177
- return ['', undefined];
178
- }
179
-
180
- /**
181
- * Extract metadata from scrape response
182
- * @param response Scrape response
183
- * @returns Metadata object
184
- */
185
- extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {
186
- if (!response.success || !response.data || !response.data.metadata) {
187
- return {};
188
- }
189
-
190
- return response.data.metadata;
191
- }
192
- }
193
-
194
- /**
195
- * Create a Firecrawl scraper instance
196
- * @param config Scraper configuration
197
- * @returns Firecrawl scraper instance
198
- */
199
- export const createFirecrawlScraper = (
200
- config: t.FirecrawlScraperConfig = {}
201
- ): FirecrawlScraper => {
202
- return new FirecrawlScraper(config);
203
- };
204
-
205
- // Helper function to clean up payload for firecrawl
206
- function omitUndefined<T extends object>(obj: T): Partial<T> {
207
- return Object.fromEntries(
208
- Object.entries(obj).filter(([, v]) => v !== undefined)
209
- ) as Partial<T>;
210
- }
1
+ import axios from 'axios';
2
+ import { processContent } from './content';
3
+ import type * as t from './types';
4
+ import { createDefaultLogger } from './utils';
5
+
6
+ /**
7
+ * Firecrawl scraper implementation
8
+ * Uses the Firecrawl API to scrape web pages
9
+ */
10
+ export class FirecrawlScraper implements t.BaseScraper {
11
+ private apiKey: string;
12
+ private apiUrl: string;
13
+ private version: string;
14
+ private defaultFormats: string[];
15
+ private timeout: number;
16
+ private logger: t.Logger;
17
+ private includeTags?: string[];
18
+ private excludeTags?: string[];
19
+ private waitFor?: number;
20
+ private maxAge?: number;
21
+ private mobile?: boolean;
22
+ private skipTlsVerification?: boolean;
23
+ private blockAds?: boolean;
24
+ private removeBase64Images?: boolean;
25
+ private parsePDF?: boolean;
26
+ private storeInCache?: boolean;
27
+ private zeroDataRetention?: boolean;
28
+ private headers?: Record<string, string>;
29
+ private location?: { country?: string; languages?: string[] };
30
+ private onlyMainContent?: boolean;
31
+ private changeTrackingOptions?: object;
32
+
33
+ constructor(config: t.FirecrawlScraperConfig = {}) {
34
+ this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
35
+
36
+ this.version = config.version ?? 'v2';
37
+
38
+ const baseUrl =
39
+ config.apiUrl ??
40
+ process.env.FIRECRAWL_BASE_URL ??
41
+ 'https://api.firecrawl.dev';
42
+ this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/${this.version}/scrape`;
43
+
44
+ this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
45
+ this.timeout = config.timeout ?? 7500;
46
+
47
+ this.logger = config.logger || createDefaultLogger();
48
+
49
+ this.includeTags = config.includeTags;
50
+ this.excludeTags = config.excludeTags;
51
+ this.waitFor = config.waitFor;
52
+ this.maxAge = config.maxAge;
53
+ this.mobile = config.mobile;
54
+ this.skipTlsVerification = config.skipTlsVerification;
55
+ this.blockAds = config.blockAds;
56
+ this.removeBase64Images = config.removeBase64Images;
57
+ this.parsePDF = config.parsePDF;
58
+ this.storeInCache = config.storeInCache;
59
+ this.zeroDataRetention = config.zeroDataRetention;
60
+ this.headers = config.headers;
61
+ this.location = config.location;
62
+ this.onlyMainContent = config.onlyMainContent;
63
+ this.changeTrackingOptions = config.changeTrackingOptions;
64
+
65
+ if (!this.apiKey) {
66
+ this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
67
+ }
68
+
69
+ this.logger.debug(
70
+ `Firecrawl scraper initialized with API URL: ${this.apiUrl}`
71
+ );
72
+ }
73
+
74
+ /**
75
+ * Scrape a single URL
76
+ * @param url URL to scrape
77
+ * @param options Scrape options
78
+ * @returns Scrape response
79
+ */
80
+ async scrapeUrl(
81
+ url: string,
82
+ options: t.FirecrawlScrapeOptions = {}
83
+ ): Promise<[string, t.FirecrawlScrapeResponse]> {
84
+ if (!this.apiKey) {
85
+ return [
86
+ url,
87
+ {
88
+ success: false,
89
+ error: 'FIRECRAWL_API_KEY is not set',
90
+ },
91
+ ];
92
+ }
93
+
94
+ try {
95
+ const payload = omitUndefined({
96
+ url,
97
+ formats: options.formats ?? this.defaultFormats,
98
+ includeTags: options.includeTags ?? this.includeTags,
99
+ excludeTags: options.excludeTags ?? this.excludeTags,
100
+ headers: options.headers ?? this.headers,
101
+ waitFor: options.waitFor ?? this.waitFor,
102
+ timeout: options.timeout ?? this.timeout,
103
+ onlyMainContent: options.onlyMainContent ?? this.onlyMainContent,
104
+ maxAge: options.maxAge ?? this.maxAge,
105
+ mobile: options.mobile ?? this.mobile,
106
+ skipTlsVerification:
107
+ options.skipTlsVerification ?? this.skipTlsVerification,
108
+ parsePDF: options.parsePDF ?? this.parsePDF,
109
+ location: options.location ?? this.location,
110
+ removeBase64Images:
111
+ options.removeBase64Images ?? this.removeBase64Images,
112
+ blockAds: options.blockAds ?? this.blockAds,
113
+ storeInCache: options.storeInCache ?? this.storeInCache,
114
+ zeroDataRetention: options.zeroDataRetention ?? this.zeroDataRetention,
115
+ changeTrackingOptions:
116
+ options.changeTrackingOptions ?? this.changeTrackingOptions,
117
+ });
118
+ const response = await axios.post(this.apiUrl, payload, {
119
+ headers: {
120
+ 'Content-Type': 'application/json',
121
+ Authorization: `Bearer ${this.apiKey}`,
122
+ },
123
+ timeout: this.timeout,
124
+ });
125
+
126
+ return [url, response.data];
127
+ } catch (error) {
128
+ const errorMessage =
129
+ error instanceof Error ? error.message : String(error);
130
+ return [
131
+ url,
132
+ {
133
+ success: false,
134
+ error: `Firecrawl API request failed: ${errorMessage}`,
135
+ },
136
+ ];
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Extract content from scrape response
142
+ * @param response Scrape response
143
+ * @returns Extracted content or empty string if not available
144
+ */
145
+ extractContent(
146
+ response: t.FirecrawlScrapeResponse
147
+ ): [string, undefined | t.References] {
148
+ if (!response.success || !response.data) {
149
+ return ['', undefined];
150
+ }
151
+
152
+ if (response.data.markdown != null && response.data.html != null) {
153
+ try {
154
+ const { markdown, ...rest } = processContent(
155
+ response.data.html,
156
+ response.data.markdown
157
+ );
158
+ return [markdown, rest];
159
+ } catch (error) {
160
+ this.logger.error('Error processing content:', error);
161
+ return [response.data.markdown, undefined];
162
+ }
163
+ } else if (response.data.markdown != null) {
164
+ return [response.data.markdown, undefined];
165
+ }
166
+
167
+ // Fall back to HTML content
168
+ if (response.data.html != null) {
169
+ return [response.data.html, undefined];
170
+ }
171
+
172
+ // Fall back to raw HTML content
173
+ if (response.data.rawHtml != null) {
174
+ return [response.data.rawHtml, undefined];
175
+ }
176
+
177
+ return ['', undefined];
178
+ }
179
+
180
+ /**
181
+ * Extract metadata from scrape response
182
+ * @param response Scrape response
183
+ * @returns Metadata object
184
+ */
185
+ extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {
186
+ if (!response.success || !response.data || !response.data.metadata) {
187
+ return {};
188
+ }
189
+
190
+ return response.data.metadata;
191
+ }
192
+ }
193
+
194
+ /**
195
+ * Create a Firecrawl scraper instance
196
+ * @param config Scraper configuration
197
+ * @returns Firecrawl scraper instance
198
+ */
199
+ export const createFirecrawlScraper = (
200
+ config: t.FirecrawlScraperConfig = {}
201
+ ): FirecrawlScraper => {
202
+ return new FirecrawlScraper(config);
203
+ };
204
+
205
+ // Helper function to clean up payload for firecrawl
206
+ function omitUndefined<T extends object>(obj: T): Partial<T> {
207
+ return Object.fromEntries(
208
+ Object.entries(obj).filter(([, v]) => v !== undefined)
209
+ ) as Partial<T>;
210
+ }