illuma-agents 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/LICENSE +1 -5
  2. package/dist/cjs/common/enum.cjs +1 -2
  3. package/dist/cjs/common/enum.cjs.map +1 -1
  4. package/dist/cjs/events.cjs +11 -0
  5. package/dist/cjs/events.cjs.map +1 -1
  6. package/dist/cjs/graphs/Graph.cjs +2 -1
  7. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  8. package/dist/cjs/instrumentation.cjs +3 -1
  9. package/dist/cjs/instrumentation.cjs.map +1 -1
  10. package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
  11. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +79 -2
  12. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/anthropic/utils/tools.cjs.map +1 -1
  14. package/dist/cjs/llm/bedrock/index.cjs +99 -0
  15. package/dist/cjs/llm/bedrock/index.cjs.map +1 -0
  16. package/dist/cjs/llm/fake.cjs.map +1 -1
  17. package/dist/cjs/llm/openai/index.cjs +102 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/openai/utils/index.cjs +87 -1
  20. package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
  21. package/dist/cjs/llm/openrouter/index.cjs +175 -1
  22. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  23. package/dist/cjs/llm/providers.cjs +13 -16
  24. package/dist/cjs/llm/providers.cjs.map +1 -1
  25. package/dist/cjs/llm/text.cjs.map +1 -1
  26. package/dist/cjs/messages/core.cjs +14 -14
  27. package/dist/cjs/messages/core.cjs.map +1 -1
  28. package/dist/cjs/messages/ids.cjs.map +1 -1
  29. package/dist/cjs/messages/prune.cjs.map +1 -1
  30. package/dist/cjs/run.cjs +18 -1
  31. package/dist/cjs/run.cjs.map +1 -1
  32. package/dist/cjs/splitStream.cjs.map +1 -1
  33. package/dist/cjs/stream.cjs +24 -1
  34. package/dist/cjs/stream.cjs.map +1 -1
  35. package/dist/cjs/tools/ToolNode.cjs +20 -1
  36. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  37. package/dist/cjs/tools/handlers.cjs +29 -25
  38. package/dist/cjs/tools/handlers.cjs.map +1 -1
  39. package/dist/cjs/tools/search/anthropic.cjs.map +1 -1
  40. package/dist/cjs/tools/search/content.cjs.map +1 -1
  41. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
  42. package/dist/cjs/tools/search/format.cjs.map +1 -1
  43. package/dist/cjs/tools/search/highlights.cjs.map +1 -1
  44. package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
  45. package/dist/cjs/tools/search/schema.cjs +27 -25
  46. package/dist/cjs/tools/search/schema.cjs.map +1 -1
  47. package/dist/cjs/tools/search/search.cjs +6 -1
  48. package/dist/cjs/tools/search/search.cjs.map +1 -1
  49. package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -1
  50. package/dist/cjs/tools/search/tool.cjs +182 -35
  51. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  52. package/dist/cjs/tools/search/utils.cjs.map +1 -1
  53. package/dist/cjs/utils/graph.cjs.map +1 -1
  54. package/dist/cjs/utils/llm.cjs +0 -1
  55. package/dist/cjs/utils/llm.cjs.map +1 -1
  56. package/dist/cjs/utils/misc.cjs.map +1 -1
  57. package/dist/cjs/utils/run.cjs.map +1 -1
  58. package/dist/cjs/utils/title.cjs +7 -7
  59. package/dist/cjs/utils/title.cjs.map +1 -1
  60. package/dist/esm/common/enum.mjs +1 -2
  61. package/dist/esm/common/enum.mjs.map +1 -1
  62. package/dist/esm/events.mjs +11 -0
  63. package/dist/esm/events.mjs.map +1 -1
  64. package/dist/esm/graphs/Graph.mjs +2 -1
  65. package/dist/esm/graphs/Graph.mjs.map +1 -1
  66. package/dist/esm/instrumentation.mjs +3 -1
  67. package/dist/esm/instrumentation.mjs.map +1 -1
  68. package/dist/esm/llm/anthropic/types.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +79 -2
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/anthropic/utils/tools.mjs.map +1 -1
  72. package/dist/esm/llm/bedrock/index.mjs +97 -0
  73. package/dist/esm/llm/bedrock/index.mjs.map +1 -0
  74. package/dist/esm/llm/fake.mjs.map +1 -1
  75. package/dist/esm/llm/openai/index.mjs +103 -1
  76. package/dist/esm/llm/openai/index.mjs.map +1 -1
  77. package/dist/esm/llm/openai/utils/index.mjs +88 -2
  78. package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
  79. package/dist/esm/llm/openrouter/index.mjs +175 -1
  80. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  81. package/dist/esm/llm/providers.mjs +2 -5
  82. package/dist/esm/llm/providers.mjs.map +1 -1
  83. package/dist/esm/llm/text.mjs.map +1 -1
  84. package/dist/esm/messages/core.mjs +14 -14
  85. package/dist/esm/messages/core.mjs.map +1 -1
  86. package/dist/esm/messages/ids.mjs.map +1 -1
  87. package/dist/esm/messages/prune.mjs.map +1 -1
  88. package/dist/esm/run.mjs +18 -1
  89. package/dist/esm/run.mjs.map +1 -1
  90. package/dist/esm/splitStream.mjs.map +1 -1
  91. package/dist/esm/stream.mjs +24 -1
  92. package/dist/esm/stream.mjs.map +1 -1
  93. package/dist/esm/tools/ToolNode.mjs +20 -1
  94. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  95. package/dist/esm/tools/handlers.mjs +30 -26
  96. package/dist/esm/tools/handlers.mjs.map +1 -1
  97. package/dist/esm/tools/search/anthropic.mjs.map +1 -1
  98. package/dist/esm/tools/search/content.mjs.map +1 -1
  99. package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
  100. package/dist/esm/tools/search/format.mjs.map +1 -1
  101. package/dist/esm/tools/search/highlights.mjs.map +1 -1
  102. package/dist/esm/tools/search/rerankers.mjs.map +1 -1
  103. package/dist/esm/tools/search/schema.mjs +27 -25
  104. package/dist/esm/tools/search/schema.mjs.map +1 -1
  105. package/dist/esm/tools/search/search.mjs +6 -1
  106. package/dist/esm/tools/search/search.mjs.map +1 -1
  107. package/dist/esm/tools/search/serper-scraper.mjs.map +1 -1
  108. package/dist/esm/tools/search/tool.mjs +182 -35
  109. package/dist/esm/tools/search/tool.mjs.map +1 -1
  110. package/dist/esm/tools/search/utils.mjs.map +1 -1
  111. package/dist/esm/utils/graph.mjs.map +1 -1
  112. package/dist/esm/utils/llm.mjs +0 -1
  113. package/dist/esm/utils/llm.mjs.map +1 -1
  114. package/dist/esm/utils/misc.mjs.map +1 -1
  115. package/dist/esm/utils/run.mjs.map +1 -1
  116. package/dist/esm/utils/title.mjs +7 -7
  117. package/dist/esm/utils/title.mjs.map +1 -1
  118. package/dist/types/common/enum.d.ts +1 -2
  119. package/dist/types/llm/bedrock/index.d.ts +36 -0
  120. package/dist/types/llm/openai/index.d.ts +1 -0
  121. package/dist/types/llm/openai/utils/index.d.ts +10 -1
  122. package/dist/types/llm/openrouter/index.d.ts +4 -1
  123. package/dist/types/tools/search/types.d.ts +2 -0
  124. package/dist/types/types/llm.d.ts +3 -8
  125. package/package.json +16 -12
  126. package/src/common/enum.ts +1 -2
  127. package/src/common/index.ts +1 -1
  128. package/src/events.ts +11 -0
  129. package/src/graphs/Graph.ts +2 -1
  130. package/src/instrumentation.ts +25 -22
  131. package/src/llm/anthropic/llm.spec.ts +1442 -1442
  132. package/src/llm/anthropic/types.ts +140 -140
  133. package/src/llm/anthropic/utils/message_inputs.ts +757 -660
  134. package/src/llm/anthropic/utils/output_parsers.ts +133 -133
  135. package/src/llm/anthropic/utils/tools.ts +29 -29
  136. package/src/llm/bedrock/index.ts +128 -0
  137. package/src/llm/fake.ts +133 -133
  138. package/src/llm/google/llm.spec.ts +3 -1
  139. package/src/llm/google/utils/tools.ts +160 -160
  140. package/src/llm/openai/index.ts +126 -0
  141. package/src/llm/openai/types.ts +24 -24
  142. package/src/llm/openai/utils/index.ts +116 -1
  143. package/src/llm/openai/utils/isReasoningModel.test.ts +90 -90
  144. package/src/llm/openrouter/index.ts +222 -1
  145. package/src/llm/providers.ts +2 -7
  146. package/src/llm/text.ts +94 -94
  147. package/src/messages/core.ts +463 -463
  148. package/src/messages/formatAgentMessages.tools.test.ts +400 -400
  149. package/src/messages/formatMessage.test.ts +693 -693
  150. package/src/messages/ids.ts +26 -26
  151. package/src/messages/prune.ts +567 -567
  152. package/src/messages/shiftIndexTokenCountMap.test.ts +81 -81
  153. package/src/mockStream.ts +98 -98
  154. package/src/prompts/collab.ts +5 -5
  155. package/src/prompts/index.ts +1 -1
  156. package/src/prompts/taskmanager.ts +61 -61
  157. package/src/run.ts +22 -4
  158. package/src/scripts/ant_web_search_edge_case.ts +162 -0
  159. package/src/scripts/ant_web_search_error_edge_case.ts +148 -0
  160. package/src/scripts/args.ts +48 -48
  161. package/src/scripts/caching.ts +123 -123
  162. package/src/scripts/code_exec_files.ts +193 -193
  163. package/src/scripts/empty_input.ts +137 -137
  164. package/src/scripts/memory.ts +97 -97
  165. package/src/scripts/test-tools-before-handoff.ts +1 -5
  166. package/src/scripts/thinking.ts +149 -149
  167. package/src/scripts/tools.ts +1 -4
  168. package/src/specs/anthropic.simple.test.ts +67 -0
  169. package/src/specs/spec.utils.ts +3 -3
  170. package/src/specs/token-distribution-edge-case.test.ts +316 -316
  171. package/src/specs/tool-error.test.ts +193 -193
  172. package/src/splitStream.test.ts +691 -691
  173. package/src/splitStream.ts +234 -234
  174. package/src/stream.test.ts +94 -94
  175. package/src/stream.ts +30 -1
  176. package/src/tools/ToolNode.ts +24 -1
  177. package/src/tools/handlers.ts +32 -28
  178. package/src/tools/search/anthropic.ts +51 -51
  179. package/src/tools/search/content.test.ts +173 -173
  180. package/src/tools/search/content.ts +147 -147
  181. package/src/tools/search/direct-url.test.ts +530 -0
  182. package/src/tools/search/firecrawl.ts +210 -210
  183. package/src/tools/search/format.ts +250 -250
  184. package/src/tools/search/highlights.ts +320 -320
  185. package/src/tools/search/index.ts +2 -2
  186. package/src/tools/search/jina-reranker.test.ts +126 -126
  187. package/src/tools/search/output.md +2775 -2775
  188. package/src/tools/search/rerankers.ts +242 -242
  189. package/src/tools/search/schema.ts +65 -63
  190. package/src/tools/search/search.ts +766 -759
  191. package/src/tools/search/serper-scraper.ts +155 -155
  192. package/src/tools/search/test.html +883 -883
  193. package/src/tools/search/test.md +642 -642
  194. package/src/tools/search/test.ts +159 -159
  195. package/src/tools/search/tool.ts +641 -471
  196. package/src/tools/search/types.ts +689 -687
  197. package/src/tools/search/utils.ts +79 -79
  198. package/src/types/index.ts +6 -6
  199. package/src/types/llm.ts +2 -8
  200. package/src/utils/graph.ts +10 -10
  201. package/src/utils/llm.ts +26 -27
  202. package/src/utils/llmConfig.ts +13 -5
  203. package/src/utils/logging.ts +48 -48
  204. package/src/utils/misc.ts +57 -57
  205. package/src/utils/run.ts +100 -100
  206. package/src/utils/title.ts +165 -165
  207. package/dist/cjs/llm/ollama/index.cjs +0 -70
  208. package/dist/cjs/llm/ollama/index.cjs.map +0 -1
  209. package/dist/cjs/llm/ollama/utils.cjs +0 -158
  210. package/dist/cjs/llm/ollama/utils.cjs.map +0 -1
  211. package/dist/esm/llm/ollama/index.mjs +0 -68
  212. package/dist/esm/llm/ollama/index.mjs.map +0 -1
  213. package/dist/esm/llm/ollama/utils.mjs +0 -155
  214. package/dist/esm/llm/ollama/utils.mjs.map +0 -1
  215. package/dist/types/llm/ollama/index.d.ts +0 -8
  216. package/dist/types/llm/ollama/utils.d.ts +0 -7
  217. package/src/llm/ollama/index.ts +0 -92
  218. package/src/llm/ollama/utils.ts +0 -193
  219. package/src/proto/CollabGraph.ts +0 -269
  220. package/src/proto/TaskManager.ts +0 -243
  221. package/src/proto/collab.ts +0 -200
  222. package/src/proto/collab_design.ts +0 -184
  223. package/src/proto/collab_design_v2.ts +0 -224
  224. package/src/proto/collab_design_v3.ts +0 -255
  225. package/src/proto/collab_design_v4.ts +0 -220
  226. package/src/proto/collab_design_v5.ts +0 -251
  227. package/src/proto/collab_graph.ts +0 -181
  228. package/src/proto/collab_original.ts +0 -123
  229. package/src/proto/example.ts +0 -93
  230. package/src/proto/example_new.ts +0 -68
  231. package/src/proto/example_old.ts +0 -201
  232. package/src/proto/example_test.ts +0 -152
  233. package/src/proto/example_test_anthropic.ts +0 -100
  234. package/src/proto/log_stream.ts +0 -202
  235. package/src/proto/main_collab_community_event.ts +0 -133
  236. package/src/proto/main_collab_design_v2.ts +0 -96
  237. package/src/proto/main_collab_design_v4.ts +0 -100
  238. package/src/proto/main_collab_design_v5.ts +0 -135
  239. package/src/proto/main_collab_global_analysis.ts +0 -122
  240. package/src/proto/main_collab_hackathon_event.ts +0 -153
  241. package/src/proto/main_collab_space_mission.ts +0 -153
  242. package/src/proto/main_philosophy.ts +0 -210
  243. package/src/proto/original_script.ts +0 -126
  244. package/src/proto/standard.ts +0 -100
  245. package/src/proto/stream.ts +0 -56
  246. package/src/proto/tasks.ts +0 -118
  247. package/src/proto/tools/global_analysis_tools.ts +0 -86
  248. package/src/proto/tools/space_mission_tools.ts +0 -60
  249. package/src/proto/vertexai.ts +0 -54
  250. package/src/scripts/image.ts +0 -178
@@ -1,471 +1,641 @@
1
- import { z } from 'zod';
2
- import { tool, DynamicStructuredTool } from '@langchain/core/tools';
3
- import type { RunnableConfig } from '@langchain/core/runnables';
4
- import type * as t from './types';
5
- import {
6
- DATE_RANGE,
7
- querySchema,
8
- dateSchema,
9
- countrySchema,
10
- imagesSchema,
11
- videosSchema,
12
- newsSchema,
13
- } from './schema';
14
- import { createSearchAPI, createSourceProcessor } from './search';
15
- import { createSerperScraper } from './serper-scraper';
16
- import { createFirecrawlScraper } from './firecrawl';
17
- import { expandHighlights } from './highlights';
18
- import { formatResultsForLLM } from './format';
19
- import { createDefaultLogger } from './utils';
20
- import { createReranker } from './rerankers';
21
- import { Constants } from '@/common';
22
-
23
- /**
24
- * Executes parallel searches and merges the results
25
- */
26
- async function executeParallelSearches({
27
- searchAPI,
28
- query,
29
- date,
30
- country,
31
- safeSearch,
32
- images,
33
- videos,
34
- news,
35
- logger,
36
- }: {
37
- searchAPI: ReturnType<typeof createSearchAPI>;
38
- query: string;
39
- date?: DATE_RANGE;
40
- country?: string;
41
- safeSearch: t.SearchToolConfig['safeSearch'];
42
- images: boolean;
43
- videos: boolean;
44
- news: boolean;
45
- logger: t.Logger;
46
- }): Promise<t.SearchResult> {
47
- // Prepare all search tasks to run in parallel
48
- const searchTasks: Promise<t.SearchResult>[] = [
49
- // Main search
50
- searchAPI.getSources({
51
- query,
52
- date,
53
- country,
54
- safeSearch,
55
- }),
56
- ];
57
-
58
- if (images) {
59
- searchTasks.push(
60
- searchAPI
61
- .getSources({
62
- query,
63
- date,
64
- country,
65
- safeSearch,
66
- type: 'images',
67
- })
68
- .catch((error) => {
69
- logger.error('Error fetching images:', error);
70
- return {
71
- success: false,
72
- error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,
73
- };
74
- })
75
- );
76
- }
77
- if (videos) {
78
- searchTasks.push(
79
- searchAPI
80
- .getSources({
81
- query,
82
- date,
83
- country,
84
- safeSearch,
85
- type: 'videos',
86
- })
87
- .catch((error) => {
88
- logger.error('Error fetching videos:', error);
89
- return {
90
- success: false,
91
- error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,
92
- };
93
- })
94
- );
95
- }
96
- if (news) {
97
- searchTasks.push(
98
- searchAPI
99
- .getSources({
100
- query,
101
- date,
102
- country,
103
- safeSearch,
104
- type: 'news',
105
- })
106
- .catch((error) => {
107
- logger.error('Error fetching news:', error);
108
- return {
109
- success: false,
110
- error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,
111
- };
112
- })
113
- );
114
- }
115
-
116
- // Run all searches in parallel
117
- const results = await Promise.all(searchTasks);
118
-
119
- // Get the main search result (first result)
120
- const mainResult = results[0];
121
- if (!mainResult.success) {
122
- throw new Error(mainResult.error ?? 'Search failed');
123
- }
124
-
125
- // Merge additional results with the main results
126
- const mergedResults = { ...mainResult.data };
127
-
128
- // Convert existing news to topStories if present
129
- if (mergedResults.news !== undefined && mergedResults.news.length > 0) {
130
- const existingNewsAsTopStories = mergedResults.news
131
- .filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')
132
- .map((newsItem) => ({
133
- title: newsItem.title ?? '',
134
- link: newsItem.link ?? '',
135
- source: newsItem.source ?? '',
136
- date: newsItem.date ?? '',
137
- imageUrl: newsItem.imageUrl ?? '',
138
- processed: false,
139
- }));
140
- mergedResults.topStories = [
141
- ...(mergedResults.topStories ?? []),
142
- ...existingNewsAsTopStories,
143
- ];
144
- delete mergedResults.news;
145
- }
146
-
147
- results.slice(1).forEach((result) => {
148
- if (result.success && result.data !== undefined) {
149
- if (result.data.images !== undefined && result.data.images.length > 0) {
150
- mergedResults.images = [
151
- ...(mergedResults.images ?? []),
152
- ...result.data.images,
153
- ];
154
- }
155
- if (result.data.videos !== undefined && result.data.videos.length > 0) {
156
- mergedResults.videos = [
157
- ...(mergedResults.videos ?? []),
158
- ...result.data.videos,
159
- ];
160
- }
161
- if (result.data.news !== undefined && result.data.news.length > 0) {
162
- const newsAsTopStories = result.data.news.map((newsItem) => ({
163
- ...newsItem,
164
- link: newsItem.link ?? '',
165
- }));
166
- mergedResults.topStories = [
167
- ...(mergedResults.topStories ?? []),
168
- ...newsAsTopStories,
169
- ];
170
- }
171
- }
172
- });
173
-
174
- return { success: true, data: mergedResults };
175
- }
176
-
177
- function createSearchProcessor({
178
- searchAPI,
179
- safeSearch,
180
- sourceProcessor,
181
- onGetHighlights,
182
- logger,
183
- }: {
184
- safeSearch: t.SearchToolConfig['safeSearch'];
185
- searchAPI: ReturnType<typeof createSearchAPI>;
186
- sourceProcessor: ReturnType<typeof createSourceProcessor>;
187
- onGetHighlights: t.SearchToolConfig['onGetHighlights'];
188
- logger: t.Logger;
189
- }) {
190
- return async function ({
191
- query,
192
- date,
193
- country,
194
- proMode = true,
195
- maxSources = 5,
196
- onSearchResults,
197
- images = false,
198
- videos = false,
199
- news = false,
200
- }: {
201
- query: string;
202
- country?: string;
203
- date?: DATE_RANGE;
204
- proMode?: boolean;
205
- maxSources?: number;
206
- onSearchResults: t.SearchToolConfig['onSearchResults'];
207
- images?: boolean;
208
- videos?: boolean;
209
- news?: boolean;
210
- }): Promise<t.SearchResultData> {
211
- try {
212
- // Execute parallel searches and merge results
213
- const searchResult = await executeParallelSearches({
214
- searchAPI,
215
- query,
216
- date,
217
- country,
218
- safeSearch,
219
- images,
220
- videos,
221
- news,
222
- logger,
223
- });
224
-
225
- onSearchResults?.(searchResult);
226
-
227
- const processedSources = await sourceProcessor.processSources({
228
- query,
229
- news,
230
- result: searchResult,
231
- proMode,
232
- onGetHighlights,
233
- numElements: maxSources,
234
- });
235
-
236
- return expandHighlights(processedSources);
237
- } catch (error) {
238
- logger.error('Error in search:', error);
239
- return {
240
- organic: [],
241
- topStories: [],
242
- images: [],
243
- videos: [],
244
- news: [],
245
- relatedSearches: [],
246
- error: error instanceof Error ? error.message : String(error),
247
- };
248
- }
249
- };
250
- }
251
-
252
- function createOnSearchResults({
253
- runnableConfig,
254
- onSearchResults,
255
- }: {
256
- runnableConfig: RunnableConfig;
257
- onSearchResults: t.SearchToolConfig['onSearchResults'];
258
- }) {
259
- return function (results: t.SearchResult): void {
260
- if (!onSearchResults) {
261
- return;
262
- }
263
- onSearchResults(results, runnableConfig);
264
- };
265
- }
266
-
267
- function createTool({
268
- schema,
269
- search,
270
- onSearchResults: _onSearchResults,
271
- }: {
272
- schema: t.SearchToolSchema;
273
- search: ReturnType<typeof createSearchProcessor>;
274
- onSearchResults: t.SearchToolConfig['onSearchResults'];
275
- }): DynamicStructuredTool<typeof schema> {
276
- return tool<typeof schema>(
277
- async (params, runnableConfig) => {
278
- const { query, date, country: _c, images, videos, news } = params;
279
- const country = typeof _c === 'string' && _c ? _c : undefined;
280
- const searchResult = await search({
281
- query,
282
- date,
283
- country,
284
- images,
285
- videos,
286
- news,
287
- onSearchResults: createOnSearchResults({
288
- runnableConfig,
289
- onSearchResults: _onSearchResults,
290
- }),
291
- });
292
- const turn = runnableConfig.toolCall?.turn ?? 0;
293
- const { output, references } = formatResultsForLLM(turn, searchResult);
294
- const data: t.SearchResultData = { turn, ...searchResult, references };
295
- return [output, { [Constants.WEB_SEARCH]: data }];
296
- },
297
- {
298
- name: Constants.WEB_SEARCH,
299
- description: `Real-time search. Results have required citation anchors.
300
-
301
- Note: Use ONCE per reply unless instructed otherwise.
302
-
303
- Anchors:
304
- - \\ue202turnXtypeY
305
- - X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx
306
-
307
- Special Markers:
308
- - \\ue203...\\ue204 — highlight start/end of cited text (for Standalone or Group citations)
309
- - \\ue200...\\ue201 — group block (e.g. \\ue200\\ue202turn0search1\\ue202turn0news2\\ue201)
310
-
311
- **CITE EVERY NON-OBVIOUS FACT/QUOTE:**
312
- Use anchor marker(s) immediately after the statement:
313
- - Standalone: "Pure functions produce same output. \\ue202turn0search0"
314
- - Standalone (multiple): "Today's News \\ue202turn0search0\\ue202turn0news0"
315
- - Highlight: "\\ue203Highlight text.\\ue204\\ue202turn0news1"
316
- - Group: "Sources. \\ue200\\ue202turn0search0\\ue202turn0news1\\ue201"
317
- - Group Highlight: "\\ue203Highlight for group.\\ue204 \\ue200\\ue202turn0search0\\ue202turn0news1\\ue201"
318
- - Image: "See photo \\ue202turn0image0."
319
-
320
- **NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**
321
- `.trim(),
322
- schema: schema,
323
- responseFormat: Constants.CONTENT_AND_ARTIFACT,
324
- }
325
- );
326
- }
327
-
328
- /**
329
- * Creates a search tool with a schema that dynamically includes the country field
330
- * only when the searchProvider is 'serper'.
331
- *
332
- * Supports multiple scraper providers:
333
- * - Firecrawl (default): Full-featured web scraping with multiple formats
334
- * - Serper: Lightweight scraping using Serper's scrape API
335
- *
336
- * @example
337
- * ```typescript
338
- * // Using Firecrawl scraper (default)
339
- * const searchTool = createSearchTool({
340
- * searchProvider: 'serper',
341
- * scraperProvider: 'firecrawl',
342
- * firecrawlApiKey: 'your-firecrawl-key'
343
- * });
344
- *
345
- * // Using Serper scraper
346
- * const searchTool = createSearchTool({
347
- * searchProvider: 'serper',
348
- * scraperProvider: 'serper',
349
- * serperApiKey: 'your-serper-key'
350
- * });
351
- * ```
352
- *
353
- * @param config - The search tool configuration
354
- * @returns A DynamicStructuredTool with a schema that depends on the searchProvider
355
- */
356
- export const createSearchTool = (
357
- config: t.SearchToolConfig = {}
358
- ): DynamicStructuredTool<typeof toolSchema> => {
359
- const {
360
- searchProvider = 'serper',
361
- serperApiKey,
362
- searxngInstanceUrl,
363
- searxngApiKey,
364
- rerankerType = 'cohere',
365
- topResults = 5,
366
- strategies = ['no_extraction'],
367
- filterContent = true,
368
- safeSearch = 1,
369
- scraperProvider = 'firecrawl',
370
- firecrawlApiKey,
371
- firecrawlApiUrl,
372
- firecrawlVersion,
373
- firecrawlOptions,
374
- serperScraperOptions,
375
- scraperTimeout,
376
- jinaApiKey,
377
- jinaApiUrl,
378
- cohereApiKey,
379
- onSearchResults: _onSearchResults,
380
- onGetHighlights,
381
- } = config;
382
-
383
- const logger = config.logger || createDefaultLogger();
384
-
385
- const schemaObject: {
386
- query: z.ZodString;
387
- date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;
388
- country?: z.ZodOptional<z.ZodString>;
389
- images: z.ZodOptional<z.ZodBoolean>;
390
- videos: z.ZodOptional<z.ZodBoolean>;
391
- news: z.ZodOptional<z.ZodBoolean>;
392
- } = {
393
- query: querySchema,
394
- date: dateSchema,
395
- images: imagesSchema,
396
- videos: videosSchema,
397
- news: newsSchema,
398
- };
399
-
400
- if (searchProvider === 'serper') {
401
- schemaObject.country = countrySchema;
402
- }
403
-
404
- const toolSchema = z.object(schemaObject);
405
-
406
- const searchAPI = createSearchAPI({
407
- searchProvider,
408
- serperApiKey,
409
- searxngInstanceUrl,
410
- searxngApiKey,
411
- });
412
-
413
- /** Create scraper based on scraperProvider */
414
- let scraperInstance: t.BaseScraper;
415
-
416
- if (scraperProvider === 'serper') {
417
- scraperInstance = createSerperScraper({
418
- ...serperScraperOptions,
419
- apiKey: serperApiKey,
420
- timeout: scraperTimeout ?? serperScraperOptions?.timeout,
421
- logger,
422
- });
423
- } else {
424
- scraperInstance = createFirecrawlScraper({
425
- ...firecrawlOptions,
426
- apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
427
- apiUrl: firecrawlApiUrl,
428
- version: firecrawlVersion,
429
- timeout: scraperTimeout ?? firecrawlOptions?.timeout,
430
- formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
431
- logger,
432
- });
433
- }
434
-
435
- const selectedReranker = createReranker({
436
- rerankerType,
437
- jinaApiKey,
438
- jinaApiUrl,
439
- cohereApiKey,
440
- logger,
441
- });
442
-
443
- if (!selectedReranker) {
444
- logger.warn('No reranker selected. Using default ranking.');
445
- }
446
-
447
- const sourceProcessor = createSourceProcessor(
448
- {
449
- reranker: selectedReranker,
450
- topResults,
451
- strategies,
452
- filterContent,
453
- logger,
454
- },
455
- scraperInstance
456
- );
457
-
458
- const search = createSearchProcessor({
459
- searchAPI,
460
- safeSearch,
461
- sourceProcessor,
462
- onGetHighlights,
463
- logger,
464
- });
465
-
466
- return createTool({
467
- search,
468
- schema: toolSchema,
469
- onSearchResults: _onSearchResults,
470
- });
471
- };
1
+ import { z } from 'zod';
2
+ import { tool, DynamicStructuredTool } from '@langchain/core/tools';
3
+ import type { RunnableConfig } from '@langchain/core/runnables';
4
+ import type * as t from './types';
5
+ import {
6
+ DATE_RANGE,
7
+ querySchema,
8
+ dateSchema,
9
+ countrySchema,
10
+ imagesSchema,
11
+ videosSchema,
12
+ newsSchema,
13
+ } from './schema';
14
+ import { createSearchAPI, createSourceProcessor } from './search';
15
+ import { createSerperScraper } from './serper-scraper';
16
+ import { createFirecrawlScraper } from './firecrawl';
17
+ import { expandHighlights } from './highlights';
18
+ import { formatResultsForLLM } from './format';
19
+ import { createDefaultLogger } from './utils';
20
+ import { createReranker } from './rerankers';
21
+ import { Constants } from '@/common';
22
+
23
+ /**
24
+ * URL regex pattern to detect direct URLs in query
25
+ */
26
+ const URL_PATTERN = /https?:\/\/[^\s<>"{}|\\^`[\]]+/gi;
27
+
28
+ /**
29
+ * Extracts URLs from a query string
30
+ * @param query - The search query
31
+ * @returns Array of URLs found in the query
32
+ */
33
+ function extractUrlsFromQuery(query: string): string[] {
34
+ const matches = query.match(URL_PATTERN);
35
+ return matches ?? [];
36
+ }
37
+
38
+ /**
39
+ * Checks if the query is primarily a URL request (contains URL and minimal other text)
40
+ * @param query - The search query
41
+ * @returns True if the query appears to be a direct URL request
42
+ */
43
+ function isDirectUrlRequest(query: string): boolean {
44
+ const urls = extractUrlsFromQuery(query);
45
+ if (urls.length === 0) {
46
+ return false;
47
+ }
48
+
49
+ // Remove URLs from query and check remaining text
50
+ let remainingText = query;
51
+ for (const url of urls) {
52
+ remainingText = remainingText.replace(url, '');
53
+ }
54
+
55
+ // Clean up and check if remaining text is minimal (just filler words or questions about the URL)
56
+ remainingText = remainingText.trim().toLowerCase();
57
+
58
+ // If very little text remains, it's likely a direct URL request
59
+ if (remainingText.length < 50) {
60
+ return true;
61
+ }
62
+
63
+ return false;
64
+ }
65
+
66
+ /**
67
+ * Directly extracts content from URLs using the scraper
68
+ * @param urls - URLs to extract content from
69
+ * @param scraper - The scraper instance to use
70
+ * @param logger - Logger instance
71
+ * @returns Search result with extracted content
72
+ */
73
+ async function extractDirectUrlContent({
74
+ urls,
75
+ scraper,
76
+ logger,
77
+ }: {
78
+ urls: string[];
79
+ scraper: t.BaseScraper;
80
+ logger: t.Logger;
81
+ }): Promise<t.SearchResult> {
82
+ try {
83
+ const results: t.ProcessedOrganic[] = [];
84
+
85
+ for (const url of urls) {
86
+ try {
87
+ logger.debug(`Direct URL extraction: ${url}`);
88
+ const [, response] = await scraper.scrapeUrl(url);
89
+
90
+ if (response.success && response.data) {
91
+ const [content, references] = scraper.extractContent(response);
92
+ const metadata = scraper.extractMetadata(response);
93
+
94
+ // Helper to safely extract string from metadata
95
+ const getString = (value: unknown): string | undefined => {
96
+ return typeof value === 'string' ? value : undefined;
97
+ };
98
+
99
+ results.push({
100
+ position: results.length + 1,
101
+ title: getString(metadata.title) ?? getString(metadata.ogTitle) ?? url,
102
+ link: url,
103
+ snippet: getString(metadata.description) ?? getString(metadata.ogDescription) ?? '',
104
+ content: content,
105
+ references: references,
106
+ processed: true,
107
+ });
108
+ } else {
109
+ logger.warn(`Failed to extract content from ${url}: ${response.error}`);
110
+ // Still add the URL as a result, but without content
111
+ results.push({
112
+ position: results.length + 1,
113
+ title: url,
114
+ link: url,
115
+ snippet: response.error ?? 'Failed to extract content',
116
+ processed: false,
117
+ });
118
+ }
119
+ } catch (error) {
120
+ logger.error(`Error extracting URL ${url}:`, error);
121
+ results.push({
122
+ position: results.length + 1,
123
+ title: url,
124
+ link: url,
125
+ snippet: error instanceof Error ? error.message : String(error),
126
+ processed: false,
127
+ });
128
+ }
129
+ }
130
+
131
+ return {
132
+ success: true,
133
+ data: {
134
+ organic: results,
135
+ topStories: [],
136
+ images: [],
137
+ videos: [],
138
+ relatedSearches: [],
139
+ },
140
+ };
141
+ } catch (error) {
142
+ logger.error('Error in direct URL extraction:', error);
143
+ return {
144
+ success: false,
145
+ error: error instanceof Error ? error.message : String(error),
146
+ };
147
+ }
148
+ }
149
+
150
+ /**
151
+ * Executes parallel searches and merges the results
152
+ */
153
+ async function executeParallelSearches({
154
+ searchAPI,
155
+ query,
156
+ date,
157
+ country,
158
+ safeSearch,
159
+ images,
160
+ videos,
161
+ news,
162
+ logger,
163
+ }: {
164
+ searchAPI: ReturnType<typeof createSearchAPI>;
165
+ query: string;
166
+ date?: DATE_RANGE;
167
+ country?: string;
168
+ safeSearch: t.SearchToolConfig['safeSearch'];
169
+ images: boolean;
170
+ videos: boolean;
171
+ news: boolean;
172
+ logger: t.Logger;
173
+ }): Promise<t.SearchResult> {
174
+ // Prepare all search tasks to run in parallel
175
+ const searchTasks: Promise<t.SearchResult>[] = [
176
+ // Main search
177
+ searchAPI.getSources({
178
+ query,
179
+ date,
180
+ country,
181
+ safeSearch,
182
+ }),
183
+ ];
184
+
185
+ if (images) {
186
+ searchTasks.push(
187
+ searchAPI
188
+ .getSources({
189
+ query,
190
+ date,
191
+ country,
192
+ safeSearch,
193
+ type: 'images',
194
+ })
195
+ .catch((error) => {
196
+ logger.error('Error fetching images:', error);
197
+ return {
198
+ success: false,
199
+ error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,
200
+ };
201
+ })
202
+ );
203
+ }
204
+ if (videos) {
205
+ searchTasks.push(
206
+ searchAPI
207
+ .getSources({
208
+ query,
209
+ date,
210
+ country,
211
+ safeSearch,
212
+ type: 'videos',
213
+ })
214
+ .catch((error) => {
215
+ logger.error('Error fetching videos:', error);
216
+ return {
217
+ success: false,
218
+ error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,
219
+ };
220
+ })
221
+ );
222
+ }
223
+ if (news) {
224
+ searchTasks.push(
225
+ searchAPI
226
+ .getSources({
227
+ query,
228
+ date,
229
+ country,
230
+ safeSearch,
231
+ type: 'news',
232
+ })
233
+ .catch((error) => {
234
+ logger.error('Error fetching news:', error);
235
+ return {
236
+ success: false,
237
+ error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,
238
+ };
239
+ })
240
+ );
241
+ }
242
+
243
+ // Run all searches in parallel
244
+ const results = await Promise.all(searchTasks);
245
+
246
+ // Get the main search result (first result)
247
+ const mainResult = results[0];
248
+ if (!mainResult.success) {
249
+ throw new Error(mainResult.error ?? 'Search failed');
250
+ }
251
+
252
+ // Merge additional results with the main results
253
+ const mergedResults = { ...mainResult.data };
254
+
255
+ // Convert existing news to topStories if present
256
+ if (mergedResults.news !== undefined && mergedResults.news.length > 0) {
257
+ const existingNewsAsTopStories = mergedResults.news
258
+ .filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')
259
+ .map((newsItem) => ({
260
+ title: newsItem.title ?? '',
261
+ link: newsItem.link ?? '',
262
+ source: newsItem.source ?? '',
263
+ date: newsItem.date ?? '',
264
+ imageUrl: newsItem.imageUrl ?? '',
265
+ processed: false,
266
+ }));
267
+ mergedResults.topStories = [
268
+ ...(mergedResults.topStories ?? []),
269
+ ...existingNewsAsTopStories,
270
+ ];
271
+ delete mergedResults.news;
272
+ }
273
+
274
+ results.slice(1).forEach((result) => {
275
+ if (result.success && result.data !== undefined) {
276
+ if (result.data.images !== undefined && result.data.images.length > 0) {
277
+ mergedResults.images = [
278
+ ...(mergedResults.images ?? []),
279
+ ...result.data.images,
280
+ ];
281
+ }
282
+ if (result.data.videos !== undefined && result.data.videos.length > 0) {
283
+ mergedResults.videos = [
284
+ ...(mergedResults.videos ?? []),
285
+ ...result.data.videos,
286
+ ];
287
+ }
288
+ if (result.data.news !== undefined && result.data.news.length > 0) {
289
+ const newsAsTopStories = result.data.news.map((newsItem) => ({
290
+ ...newsItem,
291
+ link: newsItem.link ?? '',
292
+ }));
293
+ mergedResults.topStories = [
294
+ ...(mergedResults.topStories ?? []),
295
+ ...newsAsTopStories,
296
+ ];
297
+ }
298
+ }
299
+ });
300
+
301
+ return { success: true, data: mergedResults };
302
+ }
303
+
304
+ function createSearchProcessor({
305
+ searchAPI,
306
+ safeSearch,
307
+ sourceProcessor,
308
+ scraper,
309
+ onGetHighlights,
310
+ logger,
311
+ }: {
312
+ safeSearch: t.SearchToolConfig['safeSearch'];
313
+ searchAPI: ReturnType<typeof createSearchAPI>;
314
+ sourceProcessor: ReturnType<typeof createSourceProcessor>;
315
+ scraper: t.BaseScraper;
316
+ onGetHighlights: t.SearchToolConfig['onGetHighlights'];
317
+ logger: t.Logger;
318
+ }) {
319
+ return async function ({
320
+ query,
321
+ date,
322
+ country,
323
+ proMode = true,
324
+ maxSources = 5,
325
+ onSearchResults,
326
+ images = false,
327
+ videos = false,
328
+ news = false,
329
+ }: {
330
+ query: string;
331
+ country?: string;
332
+ date?: DATE_RANGE;
333
+ proMode?: boolean;
334
+ maxSources?: number;
335
+ onSearchResults: t.SearchToolConfig['onSearchResults'];
336
+ images?: boolean;
337
+ videos?: boolean;
338
+ news?: boolean;
339
+ }): Promise<t.SearchResultData> {
340
+ try {
341
+ // Check if query contains direct URLs for extraction
342
+ const urls = extractUrlsFromQuery(query);
343
+ const isDirectUrl = isDirectUrlRequest(query);
344
+
345
+ let searchResult: t.SearchResult;
346
+
347
+ if (isDirectUrl && urls.length > 0) {
348
+ // Direct URL extraction mode - skip search API and extract directly
349
+ logger.debug(`Direct URL extraction mode for: ${urls.join(', ')}`);
350
+ searchResult = await extractDirectUrlContent({
351
+ urls,
352
+ scraper,
353
+ logger,
354
+ });
355
+ } else {
356
+ // Normal search mode - execute parallel searches and merge results
357
+ searchResult = await executeParallelSearches({
358
+ searchAPI,
359
+ query,
360
+ date,
361
+ country,
362
+ safeSearch,
363
+ images,
364
+ videos,
365
+ news,
366
+ logger,
367
+ });
368
+ }
369
+
370
+ onSearchResults?.(searchResult);
371
+
372
+ const processedSources = await sourceProcessor.processSources({
373
+ query,
374
+ news,
375
+ result: searchResult,
376
+ proMode,
377
+ onGetHighlights,
378
+ numElements: maxSources,
379
+ // Skip additional scraping if we already extracted content directly
380
+ skipScraping: isDirectUrl,
381
+ });
382
+
383
+ return expandHighlights(processedSources);
384
+ } catch (error) {
385
+ logger.error('Error in search:', error);
386
+ return {
387
+ organic: [],
388
+ topStories: [],
389
+ images: [],
390
+ videos: [],
391
+ news: [],
392
+ relatedSearches: [],
393
+ error: error instanceof Error ? error.message : String(error),
394
+ };
395
+ }
396
+ };
397
+ }
398
+
399
+ function createOnSearchResults({
400
+ runnableConfig,
401
+ onSearchResults,
402
+ }: {
403
+ runnableConfig: RunnableConfig;
404
+ onSearchResults: t.SearchToolConfig['onSearchResults'];
405
+ }) {
406
+ return function (results: t.SearchResult): void {
407
+ if (!onSearchResults) {
408
+ return;
409
+ }
410
+ onSearchResults(results, runnableConfig);
411
+ };
412
+ }
413
+
414
+ function createTool({
415
+ schema,
416
+ search,
417
+ onSearchResults: _onSearchResults,
418
+ }: {
419
+ schema: t.SearchToolSchema;
420
+ search: ReturnType<typeof createSearchProcessor>;
421
+ onSearchResults: t.SearchToolConfig['onSearchResults'];
422
+ }): DynamicStructuredTool<typeof schema> {
423
+ return tool<typeof schema>(
424
+ async (params, runnableConfig) => {
425
+ const { query, date, country: _c, images, videos, news } = params;
426
+ const country = typeof _c === 'string' && _c ? _c : undefined;
427
+
428
+ // Log the incoming query for debugging URL detection
429
+ const toolLogger = createDefaultLogger();
430
+ toolLogger.debug(`[web_search] Received query: "${query}"`);
431
+ const detectedUrls = extractUrlsFromQuery(query);
432
+ if (detectedUrls.length > 0) {
433
+ toolLogger.debug(`[web_search] Detected URLs in query: ${detectedUrls.join(', ')}`);
434
+ }
435
+
436
+ const searchResult = await search({
437
+ query,
438
+ date,
439
+ country,
440
+ images,
441
+ videos,
442
+ news,
443
+ onSearchResults: createOnSearchResults({
444
+ runnableConfig,
445
+ onSearchResults: _onSearchResults,
446
+ }),
447
+ });
448
+ const turn = runnableConfig.toolCall?.turn ?? 0;
449
+ const { output, references } = formatResultsForLLM(turn, searchResult);
450
+ const data: t.SearchResultData = { turn, ...searchResult, references };
451
+ return [output, { [Constants.WEB_SEARCH]: data }];
452
+ },
453
+ {
454
+ name: Constants.WEB_SEARCH,
455
+ description: `Real-time web search and direct URL content extraction. Results have required citation anchors.
456
+
457
+ **CAPABILITIES:**
458
+ - Search: Query the web for information on any topic
459
+ - Direct URL: Fetch and extract content from a specific URL for summarization or analysis
460
+
461
+ **CRITICAL - URL HANDLING:**
462
+ When user provides a URL (e.g., "summarize https://example.com/article"), you MUST include the FULL URL in the query parameter.
463
+ - CORRECT: query = "https://example.com/article" or "summarize https://example.com/article"
464
+ - WRONG: query = "example article summary" (do NOT convert URLs to search terms)
465
+
466
+ **USAGE:**
467
+ - For search: Use concise search terms as query
468
+ - For URL extraction: Pass the complete URL in the query field
469
+
470
+ Note: Use ONCE per reply unless instructed otherwise.
471
+
472
+ Anchors:
473
+ - \\ue202turnXtypeY
474
+ - X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx
475
+
476
+ Special Markers:
477
+ - \\ue203...\\ue204 — highlight start/end of cited text (for Standalone or Group citations)
478
+ - \\ue200...\\ue201 — group block (e.g. \\ue200\\ue202turn0search1\\ue202turn0news2\\ue201)
479
+
480
+ **CITE EVERY NON-OBVIOUS FACT/QUOTE:**
481
+ Use anchor marker(s) immediately after the statement:
482
+ - Standalone: "Pure functions produce same output. \\ue202turn0search0"
483
+ - Standalone (multiple): "Today's News \\ue202turn0search0\\ue202turn0news0"
484
+ - Highlight: "\\ue203Highlight text.\\ue204\\ue202turn0news1"
485
+ - Group: "Sources. \\ue200\\ue202turn0search0\\ue202turn0news1\\ue201"
486
+ - Group Highlight: "\\ue203Highlight for group.\\ue204 \\ue200\\ue202turn0search0\\ue202turn0news1\\ue201"
487
+ - Image: "See photo \\ue202turn0image0."
488
+
489
+ **NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**
490
+ `.trim(),
491
+ schema: schema,
492
+ responseFormat: Constants.CONTENT_AND_ARTIFACT,
493
+ }
494
+ );
495
+ }
496
+
497
+ /**
498
+ * Creates a search tool with a schema that dynamically includes the country field
499
+ * only when the searchProvider is 'serper'.
500
+ *
501
+ * Supports multiple scraper providers:
502
+ * - Firecrawl (default): Full-featured web scraping with multiple formats
503
+ * - Serper: Lightweight scraping using Serper's scrape API
504
+ *
505
+ * @example
506
+ * ```typescript
507
+ * // Using Firecrawl scraper (default)
508
+ * const searchTool = createSearchTool({
509
+ * searchProvider: 'serper',
510
+ * scraperProvider: 'firecrawl',
511
+ * firecrawlApiKey: 'your-firecrawl-key'
512
+ * });
513
+ *
514
+ * // Using Serper scraper
515
+ * const searchTool = createSearchTool({
516
+ * searchProvider: 'serper',
517
+ * scraperProvider: 'serper',
518
+ * serperApiKey: 'your-serper-key'
519
+ * });
520
+ * ```
521
+ *
522
+ * @param config - The search tool configuration
523
+ * @returns A DynamicStructuredTool with a schema that depends on the searchProvider
524
+ */
525
+ export const createSearchTool = (
526
+ config: t.SearchToolConfig = {}
527
+ ): DynamicStructuredTool<typeof toolSchema> => {
528
+ const {
529
+ searchProvider = 'serper',
530
+ serperApiKey,
531
+ searxngInstanceUrl,
532
+ searxngApiKey,
533
+ rerankerType = 'cohere',
534
+ topResults = 5,
535
+ strategies = ['no_extraction'],
536
+ filterContent = true,
537
+ safeSearch = 1,
538
+ scraperProvider = 'firecrawl',
539
+ firecrawlApiKey,
540
+ firecrawlApiUrl,
541
+ firecrawlVersion,
542
+ firecrawlOptions,
543
+ serperScraperOptions,
544
+ scraperTimeout,
545
+ jinaApiKey,
546
+ jinaApiUrl,
547
+ cohereApiKey,
548
+ onSearchResults: _onSearchResults,
549
+ onGetHighlights,
550
+ } = config;
551
+
552
+ const logger = config.logger || createDefaultLogger();
553
+
554
+ const schemaObject: {
555
+ query: z.ZodString;
556
+ date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;
557
+ country?: z.ZodOptional<z.ZodString>;
558
+ images: z.ZodOptional<z.ZodBoolean>;
559
+ videos: z.ZodOptional<z.ZodBoolean>;
560
+ news: z.ZodOptional<z.ZodBoolean>;
561
+ } = {
562
+ query: querySchema,
563
+ date: dateSchema,
564
+ images: imagesSchema,
565
+ videos: videosSchema,
566
+ news: newsSchema,
567
+ };
568
+
569
+ if (searchProvider === 'serper') {
570
+ schemaObject.country = countrySchema;
571
+ }
572
+
573
+ const toolSchema = z.object(schemaObject);
574
+
575
+ const searchAPI = createSearchAPI({
576
+ searchProvider,
577
+ serperApiKey,
578
+ searxngInstanceUrl,
579
+ searxngApiKey,
580
+ });
581
+
582
+ /** Create scraper based on scraperProvider */
583
+ let scraperInstance: t.BaseScraper;
584
+
585
+ if (scraperProvider === 'serper') {
586
+ scraperInstance = createSerperScraper({
587
+ ...serperScraperOptions,
588
+ apiKey: serperApiKey,
589
+ timeout: scraperTimeout ?? serperScraperOptions?.timeout,
590
+ logger,
591
+ });
592
+ } else {
593
+ scraperInstance = createFirecrawlScraper({
594
+ ...firecrawlOptions,
595
+ apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
596
+ apiUrl: firecrawlApiUrl,
597
+ version: firecrawlVersion,
598
+ timeout: scraperTimeout ?? firecrawlOptions?.timeout,
599
+ formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
600
+ logger,
601
+ });
602
+ }
603
+
604
+ const selectedReranker = createReranker({
605
+ rerankerType,
606
+ jinaApiKey,
607
+ jinaApiUrl,
608
+ cohereApiKey,
609
+ logger,
610
+ });
611
+
612
+ if (!selectedReranker) {
613
+ logger.warn('No reranker selected. Using default ranking.');
614
+ }
615
+
616
+ const sourceProcessor = createSourceProcessor(
617
+ {
618
+ reranker: selectedReranker,
619
+ topResults,
620
+ strategies,
621
+ filterContent,
622
+ logger,
623
+ },
624
+ scraperInstance
625
+ );
626
+
627
+ const search = createSearchProcessor({
628
+ searchAPI,
629
+ safeSearch,
630
+ sourceProcessor,
631
+ scraper: scraperInstance,
632
+ onGetHighlights,
633
+ logger,
634
+ });
635
+
636
+ return createTool({
637
+ search,
638
+ schema: toolSchema,
639
+ onSearchResults: _onSearchResults,
640
+ });
641
+ };