illuma-agents 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/LICENSE +1 -5
  2. package/dist/cjs/common/enum.cjs +1 -2
  3. package/dist/cjs/common/enum.cjs.map +1 -1
  4. package/dist/cjs/instrumentation.cjs.map +1 -1
  5. package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
  6. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +79 -2
  7. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  8. package/dist/cjs/llm/anthropic/utils/tools.cjs.map +1 -1
  9. package/dist/cjs/llm/bedrock/index.cjs +99 -0
  10. package/dist/cjs/llm/bedrock/index.cjs.map +1 -0
  11. package/dist/cjs/llm/fake.cjs.map +1 -1
  12. package/dist/cjs/llm/google/index.cjs +78 -9
  13. package/dist/cjs/llm/google/index.cjs.map +1 -1
  14. package/dist/cjs/llm/google/utils/common.cjs +185 -28
  15. package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
  16. package/dist/cjs/llm/providers.cjs +13 -16
  17. package/dist/cjs/llm/providers.cjs.map +1 -1
  18. package/dist/cjs/llm/text.cjs.map +1 -1
  19. package/dist/cjs/messages/core.cjs +14 -14
  20. package/dist/cjs/messages/core.cjs.map +1 -1
  21. package/dist/cjs/messages/ids.cjs.map +1 -1
  22. package/dist/cjs/messages/prune.cjs.map +1 -1
  23. package/dist/cjs/run.cjs +10 -1
  24. package/dist/cjs/run.cjs.map +1 -1
  25. package/dist/cjs/splitStream.cjs.map +1 -1
  26. package/dist/cjs/stream.cjs +4 -1
  27. package/dist/cjs/stream.cjs.map +1 -1
  28. package/dist/cjs/tools/ToolNode.cjs +163 -55
  29. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  30. package/dist/cjs/tools/handlers.cjs +29 -25
  31. package/dist/cjs/tools/handlers.cjs.map +1 -1
  32. package/dist/cjs/tools/search/anthropic.cjs.map +1 -1
  33. package/dist/cjs/tools/search/content.cjs.map +1 -1
  34. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
  35. package/dist/cjs/tools/search/format.cjs.map +1 -1
  36. package/dist/cjs/tools/search/highlights.cjs.map +1 -1
  37. package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
  38. package/dist/cjs/tools/search/schema.cjs +25 -25
  39. package/dist/cjs/tools/search/schema.cjs.map +1 -1
  40. package/dist/cjs/tools/search/search.cjs +6 -1
  41. package/dist/cjs/tools/search/search.cjs.map +1 -1
  42. package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -1
  43. package/dist/cjs/tools/search/tool.cjs +162 -35
  44. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  45. package/dist/cjs/tools/search/utils.cjs.map +1 -1
  46. package/dist/cjs/utils/graph.cjs.map +1 -1
  47. package/dist/cjs/utils/llm.cjs +0 -1
  48. package/dist/cjs/utils/llm.cjs.map +1 -1
  49. package/dist/cjs/utils/misc.cjs.map +1 -1
  50. package/dist/cjs/utils/run.cjs.map +1 -1
  51. package/dist/cjs/utils/title.cjs +7 -7
  52. package/dist/cjs/utils/title.cjs.map +1 -1
  53. package/dist/esm/common/enum.mjs +1 -2
  54. package/dist/esm/common/enum.mjs.map +1 -1
  55. package/dist/esm/instrumentation.mjs.map +1 -1
  56. package/dist/esm/llm/anthropic/types.mjs.map +1 -1
  57. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +79 -2
  58. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  59. package/dist/esm/llm/anthropic/utils/tools.mjs.map +1 -1
  60. package/dist/esm/llm/bedrock/index.mjs +97 -0
  61. package/dist/esm/llm/bedrock/index.mjs.map +1 -0
  62. package/dist/esm/llm/fake.mjs.map +1 -1
  63. package/dist/esm/llm/google/index.mjs +79 -10
  64. package/dist/esm/llm/google/index.mjs.map +1 -1
  65. package/dist/esm/llm/google/utils/common.mjs +184 -30
  66. package/dist/esm/llm/google/utils/common.mjs.map +1 -1
  67. package/dist/esm/llm/providers.mjs +2 -5
  68. package/dist/esm/llm/providers.mjs.map +1 -1
  69. package/dist/esm/llm/text.mjs.map +1 -1
  70. package/dist/esm/messages/core.mjs +14 -14
  71. package/dist/esm/messages/core.mjs.map +1 -1
  72. package/dist/esm/messages/ids.mjs.map +1 -1
  73. package/dist/esm/messages/prune.mjs.map +1 -1
  74. package/dist/esm/run.mjs +10 -1
  75. package/dist/esm/run.mjs.map +1 -1
  76. package/dist/esm/splitStream.mjs.map +1 -1
  77. package/dist/esm/stream.mjs +4 -1
  78. package/dist/esm/stream.mjs.map +1 -1
  79. package/dist/esm/tools/ToolNode.mjs +164 -56
  80. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  81. package/dist/esm/tools/handlers.mjs +30 -26
  82. package/dist/esm/tools/handlers.mjs.map +1 -1
  83. package/dist/esm/tools/search/anthropic.mjs.map +1 -1
  84. package/dist/esm/tools/search/content.mjs.map +1 -1
  85. package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
  86. package/dist/esm/tools/search/format.mjs.map +1 -1
  87. package/dist/esm/tools/search/highlights.mjs.map +1 -1
  88. package/dist/esm/tools/search/rerankers.mjs.map +1 -1
  89. package/dist/esm/tools/search/schema.mjs +25 -25
  90. package/dist/esm/tools/search/schema.mjs.map +1 -1
  91. package/dist/esm/tools/search/search.mjs +6 -1
  92. package/dist/esm/tools/search/search.mjs.map +1 -1
  93. package/dist/esm/tools/search/serper-scraper.mjs.map +1 -1
  94. package/dist/esm/tools/search/tool.mjs +162 -35
  95. package/dist/esm/tools/search/tool.mjs.map +1 -1
  96. package/dist/esm/tools/search/utils.mjs.map +1 -1
  97. package/dist/esm/utils/graph.mjs.map +1 -1
  98. package/dist/esm/utils/llm.mjs +0 -1
  99. package/dist/esm/utils/llm.mjs.map +1 -1
  100. package/dist/esm/utils/misc.mjs.map +1 -1
  101. package/dist/esm/utils/run.mjs.map +1 -1
  102. package/dist/esm/utils/title.mjs +7 -7
  103. package/dist/esm/utils/title.mjs.map +1 -1
  104. package/dist/types/common/enum.d.ts +1 -2
  105. package/dist/types/llm/bedrock/index.d.ts +36 -0
  106. package/dist/types/llm/google/index.d.ts +10 -0
  107. package/dist/types/llm/google/types.d.ts +11 -1
  108. package/dist/types/llm/google/utils/common.d.ts +17 -2
  109. package/dist/types/tools/ToolNode.d.ts +9 -1
  110. package/dist/types/tools/search/types.d.ts +2 -0
  111. package/dist/types/types/llm.d.ts +3 -8
  112. package/dist/types/types/tools.d.ts +1 -1
  113. package/package.json +15 -11
  114. package/src/common/enum.ts +1 -2
  115. package/src/common/index.ts +1 -1
  116. package/src/instrumentation.ts +22 -22
  117. package/src/llm/anthropic/llm.spec.ts +1442 -1442
  118. package/src/llm/anthropic/types.ts +140 -140
  119. package/src/llm/anthropic/utils/message_inputs.ts +757 -660
  120. package/src/llm/anthropic/utils/output_parsers.ts +133 -133
  121. package/src/llm/anthropic/utils/tools.ts +29 -29
  122. package/src/llm/bedrock/index.ts +128 -0
  123. package/src/llm/fake.ts +133 -133
  124. package/src/llm/google/data/gettysburg10.wav +0 -0
  125. package/src/llm/google/data/hotdog.jpg +0 -0
  126. package/src/llm/google/index.ts +129 -14
  127. package/src/llm/google/llm.spec.ts +932 -0
  128. package/src/llm/google/types.ts +56 -43
  129. package/src/llm/google/utils/common.ts +873 -660
  130. package/src/llm/google/utils/tools.ts +160 -160
  131. package/src/llm/openai/types.ts +24 -24
  132. package/src/llm/openai/utils/isReasoningModel.test.ts +90 -90
  133. package/src/llm/providers.ts +2 -7
  134. package/src/llm/text.ts +94 -94
  135. package/src/messages/core.ts +463 -463
  136. package/src/messages/formatAgentMessages.tools.test.ts +400 -400
  137. package/src/messages/formatMessage.test.ts +693 -693
  138. package/src/messages/ids.ts +26 -26
  139. package/src/messages/prune.ts +567 -567
  140. package/src/messages/shiftIndexTokenCountMap.test.ts +81 -81
  141. package/src/mockStream.ts +98 -98
  142. package/src/prompts/collab.ts +5 -5
  143. package/src/prompts/index.ts +1 -1
  144. package/src/prompts/taskmanager.ts +61 -61
  145. package/src/run.ts +13 -4
  146. package/src/scripts/ant_web_search_edge_case.ts +162 -0
  147. package/src/scripts/ant_web_search_error_edge_case.ts +148 -0
  148. package/src/scripts/args.ts +48 -48
  149. package/src/scripts/caching.ts +123 -123
  150. package/src/scripts/code_exec_files.ts +193 -193
  151. package/src/scripts/empty_input.ts +137 -137
  152. package/src/scripts/image.ts +178 -178
  153. package/src/scripts/memory.ts +97 -97
  154. package/src/scripts/thinking.ts +149 -149
  155. package/src/specs/anthropic.simple.test.ts +67 -0
  156. package/src/specs/spec.utils.ts +3 -3
  157. package/src/specs/token-distribution-edge-case.test.ts +316 -316
  158. package/src/specs/tool-error.test.ts +193 -193
  159. package/src/splitStream.test.ts +691 -691
  160. package/src/splitStream.ts +234 -234
  161. package/src/stream.test.ts +94 -94
  162. package/src/stream.ts +4 -1
  163. package/src/tools/ToolNode.ts +206 -64
  164. package/src/tools/handlers.ts +32 -28
  165. package/src/tools/search/anthropic.ts +51 -51
  166. package/src/tools/search/content.test.ts +173 -173
  167. package/src/tools/search/content.ts +147 -147
  168. package/src/tools/search/direct-url.test.ts +530 -0
  169. package/src/tools/search/firecrawl.ts +210 -210
  170. package/src/tools/search/format.ts +250 -250
  171. package/src/tools/search/highlights.ts +320 -320
  172. package/src/tools/search/index.ts +2 -2
  173. package/src/tools/search/jina-reranker.test.ts +126 -126
  174. package/src/tools/search/output.md +2775 -2775
  175. package/src/tools/search/rerankers.ts +242 -242
  176. package/src/tools/search/schema.ts +63 -63
  177. package/src/tools/search/search.ts +766 -759
  178. package/src/tools/search/serper-scraper.ts +155 -155
  179. package/src/tools/search/test.html +883 -883
  180. package/src/tools/search/test.md +642 -642
  181. package/src/tools/search/test.ts +159 -159
  182. package/src/tools/search/tool.ts +619 -471
  183. package/src/tools/search/types.ts +689 -687
  184. package/src/tools/search/utils.ts +79 -79
  185. package/src/types/index.ts +6 -6
  186. package/src/types/llm.ts +2 -8
  187. package/src/types/tools.ts +80 -80
  188. package/src/utils/graph.ts +10 -10
  189. package/src/utils/llm.ts +26 -27
  190. package/src/utils/llmConfig.ts +5 -3
  191. package/src/utils/logging.ts +48 -48
  192. package/src/utils/misc.ts +57 -57
  193. package/src/utils/run.ts +100 -100
  194. package/src/utils/title.ts +165 -165
  195. package/dist/cjs/llm/ollama/index.cjs +0 -70
  196. package/dist/cjs/llm/ollama/index.cjs.map +0 -1
  197. package/dist/cjs/llm/ollama/utils.cjs +0 -158
  198. package/dist/cjs/llm/ollama/utils.cjs.map +0 -1
  199. package/dist/esm/llm/ollama/index.mjs +0 -68
  200. package/dist/esm/llm/ollama/index.mjs.map +0 -1
  201. package/dist/esm/llm/ollama/utils.mjs +0 -155
  202. package/dist/esm/llm/ollama/utils.mjs.map +0 -1
  203. package/dist/types/llm/ollama/index.d.ts +0 -8
  204. package/dist/types/llm/ollama/utils.d.ts +0 -7
  205. package/src/llm/ollama/index.ts +0 -92
  206. package/src/llm/ollama/utils.ts +0 -193
  207. package/src/proto/CollabGraph.ts +0 -269
  208. package/src/proto/TaskManager.ts +0 -243
  209. package/src/proto/collab.ts +0 -200
  210. package/src/proto/collab_design.ts +0 -184
  211. package/src/proto/collab_design_v2.ts +0 -224
  212. package/src/proto/collab_design_v3.ts +0 -255
  213. package/src/proto/collab_design_v4.ts +0 -220
  214. package/src/proto/collab_design_v5.ts +0 -251
  215. package/src/proto/collab_graph.ts +0 -181
  216. package/src/proto/collab_original.ts +0 -123
  217. package/src/proto/example.ts +0 -93
  218. package/src/proto/example_new.ts +0 -68
  219. package/src/proto/example_old.ts +0 -201
  220. package/src/proto/example_test.ts +0 -152
  221. package/src/proto/example_test_anthropic.ts +0 -100
  222. package/src/proto/log_stream.ts +0 -202
  223. package/src/proto/main_collab_community_event.ts +0 -133
  224. package/src/proto/main_collab_design_v2.ts +0 -96
  225. package/src/proto/main_collab_design_v4.ts +0 -100
  226. package/src/proto/main_collab_design_v5.ts +0 -135
  227. package/src/proto/main_collab_global_analysis.ts +0 -122
  228. package/src/proto/main_collab_hackathon_event.ts +0 -153
  229. package/src/proto/main_collab_space_mission.ts +0 -153
  230. package/src/proto/main_philosophy.ts +0 -210
  231. package/src/proto/original_script.ts +0 -126
  232. package/src/proto/standard.ts +0 -100
  233. package/src/proto/stream.ts +0 -56
  234. package/src/proto/tasks.ts +0 -118
  235. package/src/proto/tools/global_analysis_tools.ts +0 -86
  236. package/src/proto/tools/space_mission_tools.ts +0 -60
  237. package/src/proto/vertexai.ts +0 -54
@@ -1,210 +1,210 @@
1
- import axios from 'axios';
2
- import { processContent } from './content';
3
- import type * as t from './types';
4
- import { createDefaultLogger } from './utils';
5
-
6
- /**
7
- * Firecrawl scraper implementation
8
- * Uses the Firecrawl API to scrape web pages
9
- */
10
- export class FirecrawlScraper implements t.BaseScraper {
11
- private apiKey: string;
12
- private apiUrl: string;
13
- private version: string;
14
- private defaultFormats: string[];
15
- private timeout: number;
16
- private logger: t.Logger;
17
- private includeTags?: string[];
18
- private excludeTags?: string[];
19
- private waitFor?: number;
20
- private maxAge?: number;
21
- private mobile?: boolean;
22
- private skipTlsVerification?: boolean;
23
- private blockAds?: boolean;
24
- private removeBase64Images?: boolean;
25
- private parsePDF?: boolean;
26
- private storeInCache?: boolean;
27
- private zeroDataRetention?: boolean;
28
- private headers?: Record<string, string>;
29
- private location?: { country?: string; languages?: string[] };
30
- private onlyMainContent?: boolean;
31
- private changeTrackingOptions?: object;
32
-
33
- constructor(config: t.FirecrawlScraperConfig = {}) {
34
- this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
35
-
36
- this.version = config.version ?? 'v2';
37
-
38
- const baseUrl =
39
- config.apiUrl ??
40
- process.env.FIRECRAWL_BASE_URL ??
41
- 'https://api.firecrawl.dev';
42
- this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/${this.version}/scrape`;
43
-
44
- this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
45
- this.timeout = config.timeout ?? 7500;
46
-
47
- this.logger = config.logger || createDefaultLogger();
48
-
49
- this.includeTags = config.includeTags;
50
- this.excludeTags = config.excludeTags;
51
- this.waitFor = config.waitFor;
52
- this.maxAge = config.maxAge;
53
- this.mobile = config.mobile;
54
- this.skipTlsVerification = config.skipTlsVerification;
55
- this.blockAds = config.blockAds;
56
- this.removeBase64Images = config.removeBase64Images;
57
- this.parsePDF = config.parsePDF;
58
- this.storeInCache = config.storeInCache;
59
- this.zeroDataRetention = config.zeroDataRetention;
60
- this.headers = config.headers;
61
- this.location = config.location;
62
- this.onlyMainContent = config.onlyMainContent;
63
- this.changeTrackingOptions = config.changeTrackingOptions;
64
-
65
- if (!this.apiKey) {
66
- this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
67
- }
68
-
69
- this.logger.debug(
70
- `Firecrawl scraper initialized with API URL: ${this.apiUrl}`
71
- );
72
- }
73
-
74
- /**
75
- * Scrape a single URL
76
- * @param url URL to scrape
77
- * @param options Scrape options
78
- * @returns Scrape response
79
- */
80
- async scrapeUrl(
81
- url: string,
82
- options: t.FirecrawlScrapeOptions = {}
83
- ): Promise<[string, t.FirecrawlScrapeResponse]> {
84
- if (!this.apiKey) {
85
- return [
86
- url,
87
- {
88
- success: false,
89
- error: 'FIRECRAWL_API_KEY is not set',
90
- },
91
- ];
92
- }
93
-
94
- try {
95
- const payload = omitUndefined({
96
- url,
97
- formats: options.formats ?? this.defaultFormats,
98
- includeTags: options.includeTags ?? this.includeTags,
99
- excludeTags: options.excludeTags ?? this.excludeTags,
100
- headers: options.headers ?? this.headers,
101
- waitFor: options.waitFor ?? this.waitFor,
102
- timeout: options.timeout ?? this.timeout,
103
- onlyMainContent: options.onlyMainContent ?? this.onlyMainContent,
104
- maxAge: options.maxAge ?? this.maxAge,
105
- mobile: options.mobile ?? this.mobile,
106
- skipTlsVerification:
107
- options.skipTlsVerification ?? this.skipTlsVerification,
108
- parsePDF: options.parsePDF ?? this.parsePDF,
109
- location: options.location ?? this.location,
110
- removeBase64Images:
111
- options.removeBase64Images ?? this.removeBase64Images,
112
- blockAds: options.blockAds ?? this.blockAds,
113
- storeInCache: options.storeInCache ?? this.storeInCache,
114
- zeroDataRetention: options.zeroDataRetention ?? this.zeroDataRetention,
115
- changeTrackingOptions:
116
- options.changeTrackingOptions ?? this.changeTrackingOptions,
117
- });
118
- const response = await axios.post(this.apiUrl, payload, {
119
- headers: {
120
- 'Content-Type': 'application/json',
121
- Authorization: `Bearer ${this.apiKey}`,
122
- },
123
- timeout: this.timeout,
124
- });
125
-
126
- return [url, response.data];
127
- } catch (error) {
128
- const errorMessage =
129
- error instanceof Error ? error.message : String(error);
130
- return [
131
- url,
132
- {
133
- success: false,
134
- error: `Firecrawl API request failed: ${errorMessage}`,
135
- },
136
- ];
137
- }
138
- }
139
-
140
- /**
141
- * Extract content from scrape response
142
- * @param response Scrape response
143
- * @returns Extracted content or empty string if not available
144
- */
145
- extractContent(
146
- response: t.FirecrawlScrapeResponse
147
- ): [string, undefined | t.References] {
148
- if (!response.success || !response.data) {
149
- return ['', undefined];
150
- }
151
-
152
- if (response.data.markdown != null && response.data.html != null) {
153
- try {
154
- const { markdown, ...rest } = processContent(
155
- response.data.html,
156
- response.data.markdown
157
- );
158
- return [markdown, rest];
159
- } catch (error) {
160
- this.logger.error('Error processing content:', error);
161
- return [response.data.markdown, undefined];
162
- }
163
- } else if (response.data.markdown != null) {
164
- return [response.data.markdown, undefined];
165
- }
166
-
167
- // Fall back to HTML content
168
- if (response.data.html != null) {
169
- return [response.data.html, undefined];
170
- }
171
-
172
- // Fall back to raw HTML content
173
- if (response.data.rawHtml != null) {
174
- return [response.data.rawHtml, undefined];
175
- }
176
-
177
- return ['', undefined];
178
- }
179
-
180
- /**
181
- * Extract metadata from scrape response
182
- * @param response Scrape response
183
- * @returns Metadata object
184
- */
185
- extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {
186
- if (!response.success || !response.data || !response.data.metadata) {
187
- return {};
188
- }
189
-
190
- return response.data.metadata;
191
- }
192
- }
193
-
194
- /**
195
- * Create a Firecrawl scraper instance
196
- * @param config Scraper configuration
197
- * @returns Firecrawl scraper instance
198
- */
199
- export const createFirecrawlScraper = (
200
- config: t.FirecrawlScraperConfig = {}
201
- ): FirecrawlScraper => {
202
- return new FirecrawlScraper(config);
203
- };
204
-
205
- // Helper function to clean up payload for firecrawl
206
- function omitUndefined<T extends object>(obj: T): Partial<T> {
207
- return Object.fromEntries(
208
- Object.entries(obj).filter(([, v]) => v !== undefined)
209
- ) as Partial<T>;
210
- }
1
+ import axios from 'axios';
2
+ import { processContent } from './content';
3
+ import type * as t from './types';
4
+ import { createDefaultLogger } from './utils';
5
+
6
+ /**
7
+ * Firecrawl scraper implementation
8
+ * Uses the Firecrawl API to scrape web pages
9
+ */
10
+ export class FirecrawlScraper implements t.BaseScraper {
11
+ private apiKey: string;
12
+ private apiUrl: string;
13
+ private version: string;
14
+ private defaultFormats: string[];
15
+ private timeout: number;
16
+ private logger: t.Logger;
17
+ private includeTags?: string[];
18
+ private excludeTags?: string[];
19
+ private waitFor?: number;
20
+ private maxAge?: number;
21
+ private mobile?: boolean;
22
+ private skipTlsVerification?: boolean;
23
+ private blockAds?: boolean;
24
+ private removeBase64Images?: boolean;
25
+ private parsePDF?: boolean;
26
+ private storeInCache?: boolean;
27
+ private zeroDataRetention?: boolean;
28
+ private headers?: Record<string, string>;
29
+ private location?: { country?: string; languages?: string[] };
30
+ private onlyMainContent?: boolean;
31
+ private changeTrackingOptions?: object;
32
+
33
+ constructor(config: t.FirecrawlScraperConfig = {}) {
34
+ this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
35
+
36
+ this.version = config.version ?? 'v2';
37
+
38
+ const baseUrl =
39
+ config.apiUrl ??
40
+ process.env.FIRECRAWL_BASE_URL ??
41
+ 'https://api.firecrawl.dev';
42
+ this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/${this.version}/scrape`;
43
+
44
+ this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
45
+ this.timeout = config.timeout ?? 7500;
46
+
47
+ this.logger = config.logger || createDefaultLogger();
48
+
49
+ this.includeTags = config.includeTags;
50
+ this.excludeTags = config.excludeTags;
51
+ this.waitFor = config.waitFor;
52
+ this.maxAge = config.maxAge;
53
+ this.mobile = config.mobile;
54
+ this.skipTlsVerification = config.skipTlsVerification;
55
+ this.blockAds = config.blockAds;
56
+ this.removeBase64Images = config.removeBase64Images;
57
+ this.parsePDF = config.parsePDF;
58
+ this.storeInCache = config.storeInCache;
59
+ this.zeroDataRetention = config.zeroDataRetention;
60
+ this.headers = config.headers;
61
+ this.location = config.location;
62
+ this.onlyMainContent = config.onlyMainContent;
63
+ this.changeTrackingOptions = config.changeTrackingOptions;
64
+
65
+ if (!this.apiKey) {
66
+ this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
67
+ }
68
+
69
+ this.logger.debug(
70
+ `Firecrawl scraper initialized with API URL: ${this.apiUrl}`
71
+ );
72
+ }
73
+
74
+ /**
75
+ * Scrape a single URL
76
+ * @param url URL to scrape
77
+ * @param options Scrape options
78
+ * @returns Scrape response
79
+ */
80
+ async scrapeUrl(
81
+ url: string,
82
+ options: t.FirecrawlScrapeOptions = {}
83
+ ): Promise<[string, t.FirecrawlScrapeResponse]> {
84
+ if (!this.apiKey) {
85
+ return [
86
+ url,
87
+ {
88
+ success: false,
89
+ error: 'FIRECRAWL_API_KEY is not set',
90
+ },
91
+ ];
92
+ }
93
+
94
+ try {
95
+ const payload = omitUndefined({
96
+ url,
97
+ formats: options.formats ?? this.defaultFormats,
98
+ includeTags: options.includeTags ?? this.includeTags,
99
+ excludeTags: options.excludeTags ?? this.excludeTags,
100
+ headers: options.headers ?? this.headers,
101
+ waitFor: options.waitFor ?? this.waitFor,
102
+ timeout: options.timeout ?? this.timeout,
103
+ onlyMainContent: options.onlyMainContent ?? this.onlyMainContent,
104
+ maxAge: options.maxAge ?? this.maxAge,
105
+ mobile: options.mobile ?? this.mobile,
106
+ skipTlsVerification:
107
+ options.skipTlsVerification ?? this.skipTlsVerification,
108
+ parsePDF: options.parsePDF ?? this.parsePDF,
109
+ location: options.location ?? this.location,
110
+ removeBase64Images:
111
+ options.removeBase64Images ?? this.removeBase64Images,
112
+ blockAds: options.blockAds ?? this.blockAds,
113
+ storeInCache: options.storeInCache ?? this.storeInCache,
114
+ zeroDataRetention: options.zeroDataRetention ?? this.zeroDataRetention,
115
+ changeTrackingOptions:
116
+ options.changeTrackingOptions ?? this.changeTrackingOptions,
117
+ });
118
+ const response = await axios.post(this.apiUrl, payload, {
119
+ headers: {
120
+ 'Content-Type': 'application/json',
121
+ Authorization: `Bearer ${this.apiKey}`,
122
+ },
123
+ timeout: this.timeout,
124
+ });
125
+
126
+ return [url, response.data];
127
+ } catch (error) {
128
+ const errorMessage =
129
+ error instanceof Error ? error.message : String(error);
130
+ return [
131
+ url,
132
+ {
133
+ success: false,
134
+ error: `Firecrawl API request failed: ${errorMessage}`,
135
+ },
136
+ ];
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Extract content from scrape response
142
+ * @param response Scrape response
143
+ * @returns Extracted content or empty string if not available
144
+ */
145
+ extractContent(
146
+ response: t.FirecrawlScrapeResponse
147
+ ): [string, undefined | t.References] {
148
+ if (!response.success || !response.data) {
149
+ return ['', undefined];
150
+ }
151
+
152
+ if (response.data.markdown != null && response.data.html != null) {
153
+ try {
154
+ const { markdown, ...rest } = processContent(
155
+ response.data.html,
156
+ response.data.markdown
157
+ );
158
+ return [markdown, rest];
159
+ } catch (error) {
160
+ this.logger.error('Error processing content:', error);
161
+ return [response.data.markdown, undefined];
162
+ }
163
+ } else if (response.data.markdown != null) {
164
+ return [response.data.markdown, undefined];
165
+ }
166
+
167
+ // Fall back to HTML content
168
+ if (response.data.html != null) {
169
+ return [response.data.html, undefined];
170
+ }
171
+
172
+ // Fall back to raw HTML content
173
+ if (response.data.rawHtml != null) {
174
+ return [response.data.rawHtml, undefined];
175
+ }
176
+
177
+ return ['', undefined];
178
+ }
179
+
180
+ /**
181
+ * Extract metadata from scrape response
182
+ * @param response Scrape response
183
+ * @returns Metadata object
184
+ */
185
+ extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {
186
+ if (!response.success || !response.data || !response.data.metadata) {
187
+ return {};
188
+ }
189
+
190
+ return response.data.metadata;
191
+ }
192
+ }
193
+
194
+ /**
195
+ * Create a Firecrawl scraper instance
196
+ * @param config Scraper configuration
197
+ * @returns Firecrawl scraper instance
198
+ */
199
+ export const createFirecrawlScraper = (
200
+ config: t.FirecrawlScraperConfig = {}
201
+ ): FirecrawlScraper => {
202
+ return new FirecrawlScraper(config);
203
+ };
204
+
205
+ // Helper function to clean up payload for firecrawl
206
+ function omitUndefined<T extends object>(obj: T): Partial<T> {
207
+ return Object.fromEntries(
208
+ Object.entries(obj).filter(([, v]) => v !== undefined)
209
+ ) as Partial<T>;
210
+ }