illuma-agents 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -5
- package/dist/cjs/common/enum.cjs +1 -2
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/instrumentation.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +79 -2
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/tools.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +99 -0
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -0
- package/dist/cjs/llm/fake.cjs.map +1 -1
- package/dist/cjs/llm/providers.cjs +13 -16
- package/dist/cjs/llm/providers.cjs.map +1 -1
- package/dist/cjs/llm/text.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +14 -14
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/ids.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +10 -1
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/splitStream.cjs.map +1 -1
- package/dist/cjs/stream.cjs +4 -1
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +10 -1
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +29 -25
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/tools/search/anthropic.cjs.map +1 -1
- package/dist/cjs/tools/search/content.cjs.map +1 -1
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +25 -25
- package/dist/cjs/tools/search/schema.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +6 -1
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +162 -35
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/cjs/utils/graph.cjs.map +1 -1
- package/dist/cjs/utils/llm.cjs +0 -1
- package/dist/cjs/utils/llm.cjs.map +1 -1
- package/dist/cjs/utils/misc.cjs.map +1 -1
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/title.cjs +7 -7
- package/dist/cjs/utils/title.cjs.map +1 -1
- package/dist/esm/common/enum.mjs +1 -2
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/instrumentation.mjs.map +1 -1
- package/dist/esm/llm/anthropic/types.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +79 -2
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/tools.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +97 -0
- package/dist/esm/llm/bedrock/index.mjs.map +1 -0
- package/dist/esm/llm/fake.mjs.map +1 -1
- package/dist/esm/llm/providers.mjs +2 -5
- package/dist/esm/llm/providers.mjs.map +1 -1
- package/dist/esm/llm/text.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +14 -14
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/ids.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +10 -1
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/splitStream.mjs.map +1 -1
- package/dist/esm/stream.mjs +4 -1
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +10 -1
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +30 -26
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/tools/search/anthropic.mjs.map +1 -1
- package/dist/esm/tools/search/content.mjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +25 -25
- package/dist/esm/tools/search/schema.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +6 -1
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/serper-scraper.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +162 -35
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/esm/utils/graph.mjs.map +1 -1
- package/dist/esm/utils/llm.mjs +0 -1
- package/dist/esm/utils/llm.mjs.map +1 -1
- package/dist/esm/utils/misc.mjs.map +1 -1
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/title.mjs +7 -7
- package/dist/esm/utils/title.mjs.map +1 -1
- package/dist/types/common/enum.d.ts +1 -2
- package/dist/types/llm/bedrock/index.d.ts +36 -0
- package/dist/types/tools/search/types.d.ts +2 -0
- package/dist/types/types/llm.d.ts +3 -8
- package/package.json +15 -11
- package/src/common/enum.ts +1 -2
- package/src/common/index.ts +1 -1
- package/src/instrumentation.ts +22 -22
- package/src/llm/anthropic/llm.spec.ts +1442 -1442
- package/src/llm/anthropic/types.ts +140 -140
- package/src/llm/anthropic/utils/message_inputs.ts +757 -660
- package/src/llm/anthropic/utils/output_parsers.ts +133 -133
- package/src/llm/anthropic/utils/tools.ts +29 -29
- package/src/llm/bedrock/index.ts +128 -0
- package/src/llm/fake.ts +133 -133
- package/src/llm/google/utils/tools.ts +160 -160
- package/src/llm/openai/types.ts +24 -24
- package/src/llm/openai/utils/isReasoningModel.test.ts +90 -90
- package/src/llm/providers.ts +2 -7
- package/src/llm/text.ts +94 -94
- package/src/messages/core.ts +463 -463
- package/src/messages/formatAgentMessages.tools.test.ts +400 -400
- package/src/messages/formatMessage.test.ts +693 -693
- package/src/messages/ids.ts +26 -26
- package/src/messages/prune.ts +567 -567
- package/src/messages/shiftIndexTokenCountMap.test.ts +81 -81
- package/src/mockStream.ts +98 -98
- package/src/prompts/collab.ts +5 -5
- package/src/prompts/index.ts +1 -1
- package/src/prompts/taskmanager.ts +61 -61
- package/src/run.ts +13 -4
- package/src/scripts/ant_web_search_edge_case.ts +162 -0
- package/src/scripts/ant_web_search_error_edge_case.ts +148 -0
- package/src/scripts/args.ts +48 -48
- package/src/scripts/caching.ts +123 -123
- package/src/scripts/code_exec_files.ts +193 -193
- package/src/scripts/empty_input.ts +137 -137
- package/src/scripts/image.ts +178 -178
- package/src/scripts/memory.ts +97 -97
- package/src/scripts/thinking.ts +149 -149
- package/src/specs/anthropic.simple.test.ts +67 -0
- package/src/specs/spec.utils.ts +3 -3
- package/src/specs/token-distribution-edge-case.test.ts +316 -316
- package/src/specs/tool-error.test.ts +193 -193
- package/src/splitStream.test.ts +691 -691
- package/src/splitStream.ts +234 -234
- package/src/stream.test.ts +94 -94
- package/src/stream.ts +4 -1
- package/src/tools/ToolNode.ts +12 -1
- package/src/tools/handlers.ts +32 -28
- package/src/tools/search/anthropic.ts +51 -51
- package/src/tools/search/content.test.ts +173 -173
- package/src/tools/search/content.ts +147 -147
- package/src/tools/search/direct-url.test.ts +530 -0
- package/src/tools/search/firecrawl.ts +210 -210
- package/src/tools/search/format.ts +250 -250
- package/src/tools/search/highlights.ts +320 -320
- package/src/tools/search/index.ts +2 -2
- package/src/tools/search/jina-reranker.test.ts +126 -126
- package/src/tools/search/output.md +2775 -2775
- package/src/tools/search/rerankers.ts +242 -242
- package/src/tools/search/schema.ts +63 -63
- package/src/tools/search/search.ts +766 -759
- package/src/tools/search/serper-scraper.ts +155 -155
- package/src/tools/search/test.html +883 -883
- package/src/tools/search/test.md +642 -642
- package/src/tools/search/test.ts +159 -159
- package/src/tools/search/tool.ts +619 -471
- package/src/tools/search/types.ts +689 -687
- package/src/tools/search/utils.ts +79 -79
- package/src/types/index.ts +6 -6
- package/src/types/llm.ts +2 -8
- package/src/utils/graph.ts +10 -10
- package/src/utils/llm.ts +26 -27
- package/src/utils/llmConfig.ts +5 -3
- package/src/utils/logging.ts +48 -48
- package/src/utils/misc.ts +57 -57
- package/src/utils/run.ts +100 -100
- package/src/utils/title.ts +165 -165
- package/dist/cjs/llm/ollama/index.cjs +0 -70
- package/dist/cjs/llm/ollama/index.cjs.map +0 -1
- package/dist/cjs/llm/ollama/utils.cjs +0 -158
- package/dist/cjs/llm/ollama/utils.cjs.map +0 -1
- package/dist/esm/llm/ollama/index.mjs +0 -68
- package/dist/esm/llm/ollama/index.mjs.map +0 -1
- package/dist/esm/llm/ollama/utils.mjs +0 -155
- package/dist/esm/llm/ollama/utils.mjs.map +0 -1
- package/dist/types/llm/ollama/index.d.ts +0 -8
- package/dist/types/llm/ollama/utils.d.ts +0 -7
- package/src/llm/ollama/index.ts +0 -92
- package/src/llm/ollama/utils.ts +0 -193
- package/src/proto/CollabGraph.ts +0 -269
- package/src/proto/TaskManager.ts +0 -243
- package/src/proto/collab.ts +0 -200
- package/src/proto/collab_design.ts +0 -184
- package/src/proto/collab_design_v2.ts +0 -224
- package/src/proto/collab_design_v3.ts +0 -255
- package/src/proto/collab_design_v4.ts +0 -220
- package/src/proto/collab_design_v5.ts +0 -251
- package/src/proto/collab_graph.ts +0 -181
- package/src/proto/collab_original.ts +0 -123
- package/src/proto/example.ts +0 -93
- package/src/proto/example_new.ts +0 -68
- package/src/proto/example_old.ts +0 -201
- package/src/proto/example_test.ts +0 -152
- package/src/proto/example_test_anthropic.ts +0 -100
- package/src/proto/log_stream.ts +0 -202
- package/src/proto/main_collab_community_event.ts +0 -133
- package/src/proto/main_collab_design_v2.ts +0 -96
- package/src/proto/main_collab_design_v4.ts +0 -100
- package/src/proto/main_collab_design_v5.ts +0 -135
- package/src/proto/main_collab_global_analysis.ts +0 -122
- package/src/proto/main_collab_hackathon_event.ts +0 -153
- package/src/proto/main_collab_space_mission.ts +0 -153
- package/src/proto/main_philosophy.ts +0 -210
- package/src/proto/original_script.ts +0 -126
- package/src/proto/standard.ts +0 -100
- package/src/proto/stream.ts +0 -56
- package/src/proto/tasks.ts +0 -118
- package/src/proto/tools/global_analysis_tools.ts +0 -86
- package/src/proto/tools/space_mission_tools.ts +0 -60
- package/src/proto/vertexai.ts +0 -54
package/src/tools/search/tool.ts
CHANGED
|
@@ -1,471 +1,619 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
|
-
import { tool, DynamicStructuredTool } from '@langchain/core/tools';
|
|
3
|
-
import type { RunnableConfig } from '@langchain/core/runnables';
|
|
4
|
-
import type * as t from './types';
|
|
5
|
-
import {
|
|
6
|
-
DATE_RANGE,
|
|
7
|
-
querySchema,
|
|
8
|
-
dateSchema,
|
|
9
|
-
countrySchema,
|
|
10
|
-
imagesSchema,
|
|
11
|
-
videosSchema,
|
|
12
|
-
newsSchema,
|
|
13
|
-
} from './schema';
|
|
14
|
-
import { createSearchAPI, createSourceProcessor } from './search';
|
|
15
|
-
import { createSerperScraper } from './serper-scraper';
|
|
16
|
-
import { createFirecrawlScraper } from './firecrawl';
|
|
17
|
-
import { expandHighlights } from './highlights';
|
|
18
|
-
import { formatResultsForLLM } from './format';
|
|
19
|
-
import { createDefaultLogger } from './utils';
|
|
20
|
-
import { createReranker } from './rerankers';
|
|
21
|
-
import { Constants } from '@/common';
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
*
|
|
25
|
-
*/
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
.
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
}
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { tool, DynamicStructuredTool } from '@langchain/core/tools';
|
|
3
|
+
import type { RunnableConfig } from '@langchain/core/runnables';
|
|
4
|
+
import type * as t from './types';
|
|
5
|
+
import {
|
|
6
|
+
DATE_RANGE,
|
|
7
|
+
querySchema,
|
|
8
|
+
dateSchema,
|
|
9
|
+
countrySchema,
|
|
10
|
+
imagesSchema,
|
|
11
|
+
videosSchema,
|
|
12
|
+
newsSchema,
|
|
13
|
+
} from './schema';
|
|
14
|
+
import { createSearchAPI, createSourceProcessor } from './search';
|
|
15
|
+
import { createSerperScraper } from './serper-scraper';
|
|
16
|
+
import { createFirecrawlScraper } from './firecrawl';
|
|
17
|
+
import { expandHighlights } from './highlights';
|
|
18
|
+
import { formatResultsForLLM } from './format';
|
|
19
|
+
import { createDefaultLogger } from './utils';
|
|
20
|
+
import { createReranker } from './rerankers';
|
|
21
|
+
import { Constants } from '@/common';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* URL regex pattern to detect direct URLs in query
|
|
25
|
+
*/
|
|
26
|
+
const URL_PATTERN = /https?:\/\/[^\s<>"{}|\\^`[\]]+/gi;
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Extracts URLs from a query string
|
|
30
|
+
* @param query - The search query
|
|
31
|
+
* @returns Array of URLs found in the query
|
|
32
|
+
*/
|
|
33
|
+
function extractUrlsFromQuery(query: string): string[] {
|
|
34
|
+
const matches = query.match(URL_PATTERN);
|
|
35
|
+
return matches ?? [];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Checks if the query is primarily a URL request (contains URL and minimal other text)
|
|
40
|
+
* @param query - The search query
|
|
41
|
+
* @returns True if the query appears to be a direct URL request
|
|
42
|
+
*/
|
|
43
|
+
function isDirectUrlRequest(query: string): boolean {
|
|
44
|
+
const urls = extractUrlsFromQuery(query);
|
|
45
|
+
if (urls.length === 0) {
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Remove URLs from query and check remaining text
|
|
50
|
+
let remainingText = query;
|
|
51
|
+
for (const url of urls) {
|
|
52
|
+
remainingText = remainingText.replace(url, '');
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Clean up and check if remaining text is minimal (just filler words or questions about the URL)
|
|
56
|
+
remainingText = remainingText.trim().toLowerCase();
|
|
57
|
+
|
|
58
|
+
// If very little text remains, it's likely a direct URL request
|
|
59
|
+
if (remainingText.length < 50) {
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Directly extracts content from URLs using the scraper
|
|
68
|
+
* @param urls - URLs to extract content from
|
|
69
|
+
* @param scraper - The scraper instance to use
|
|
70
|
+
* @param logger - Logger instance
|
|
71
|
+
* @returns Search result with extracted content
|
|
72
|
+
*/
|
|
73
|
+
async function extractDirectUrlContent({
|
|
74
|
+
urls,
|
|
75
|
+
scraper,
|
|
76
|
+
logger,
|
|
77
|
+
}: {
|
|
78
|
+
urls: string[];
|
|
79
|
+
scraper: t.BaseScraper;
|
|
80
|
+
logger: t.Logger;
|
|
81
|
+
}): Promise<t.SearchResult> {
|
|
82
|
+
try {
|
|
83
|
+
const results: t.ProcessedOrganic[] = [];
|
|
84
|
+
|
|
85
|
+
for (const url of urls) {
|
|
86
|
+
try {
|
|
87
|
+
logger.debug(`Direct URL extraction: ${url}`);
|
|
88
|
+
const [, response] = await scraper.scrapeUrl(url);
|
|
89
|
+
|
|
90
|
+
if (response.success && response.data) {
|
|
91
|
+
const [content, references] = scraper.extractContent(response);
|
|
92
|
+
const metadata = scraper.extractMetadata(response);
|
|
93
|
+
|
|
94
|
+
// Helper to safely extract string from metadata
|
|
95
|
+
const getString = (value: unknown): string | undefined => {
|
|
96
|
+
return typeof value === 'string' ? value : undefined;
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
results.push({
|
|
100
|
+
position: results.length + 1,
|
|
101
|
+
title: getString(metadata.title) ?? getString(metadata.ogTitle) ?? url,
|
|
102
|
+
link: url,
|
|
103
|
+
snippet: getString(metadata.description) ?? getString(metadata.ogDescription) ?? '',
|
|
104
|
+
content: content,
|
|
105
|
+
references: references,
|
|
106
|
+
processed: true,
|
|
107
|
+
});
|
|
108
|
+
} else {
|
|
109
|
+
logger.warn(`Failed to extract content from ${url}: ${response.error}`);
|
|
110
|
+
// Still add the URL as a result, but without content
|
|
111
|
+
results.push({
|
|
112
|
+
position: results.length + 1,
|
|
113
|
+
title: url,
|
|
114
|
+
link: url,
|
|
115
|
+
snippet: response.error ?? 'Failed to extract content',
|
|
116
|
+
processed: false,
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
} catch (error) {
|
|
120
|
+
logger.error(`Error extracting URL ${url}:`, error);
|
|
121
|
+
results.push({
|
|
122
|
+
position: results.length + 1,
|
|
123
|
+
title: url,
|
|
124
|
+
link: url,
|
|
125
|
+
snippet: error instanceof Error ? error.message : String(error),
|
|
126
|
+
processed: false,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return {
|
|
132
|
+
success: true,
|
|
133
|
+
data: {
|
|
134
|
+
organic: results,
|
|
135
|
+
topStories: [],
|
|
136
|
+
images: [],
|
|
137
|
+
videos: [],
|
|
138
|
+
relatedSearches: [],
|
|
139
|
+
},
|
|
140
|
+
};
|
|
141
|
+
} catch (error) {
|
|
142
|
+
logger.error('Error in direct URL extraction:', error);
|
|
143
|
+
return {
|
|
144
|
+
success: false,
|
|
145
|
+
error: error instanceof Error ? error.message : String(error),
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Executes parallel searches and merges the results
|
|
152
|
+
*/
|
|
153
|
+
async function executeParallelSearches({
|
|
154
|
+
searchAPI,
|
|
155
|
+
query,
|
|
156
|
+
date,
|
|
157
|
+
country,
|
|
158
|
+
safeSearch,
|
|
159
|
+
images,
|
|
160
|
+
videos,
|
|
161
|
+
news,
|
|
162
|
+
logger,
|
|
163
|
+
}: {
|
|
164
|
+
searchAPI: ReturnType<typeof createSearchAPI>;
|
|
165
|
+
query: string;
|
|
166
|
+
date?: DATE_RANGE;
|
|
167
|
+
country?: string;
|
|
168
|
+
safeSearch: t.SearchToolConfig['safeSearch'];
|
|
169
|
+
images: boolean;
|
|
170
|
+
videos: boolean;
|
|
171
|
+
news: boolean;
|
|
172
|
+
logger: t.Logger;
|
|
173
|
+
}): Promise<t.SearchResult> {
|
|
174
|
+
// Prepare all search tasks to run in parallel
|
|
175
|
+
const searchTasks: Promise<t.SearchResult>[] = [
|
|
176
|
+
// Main search
|
|
177
|
+
searchAPI.getSources({
|
|
178
|
+
query,
|
|
179
|
+
date,
|
|
180
|
+
country,
|
|
181
|
+
safeSearch,
|
|
182
|
+
}),
|
|
183
|
+
];
|
|
184
|
+
|
|
185
|
+
if (images) {
|
|
186
|
+
searchTasks.push(
|
|
187
|
+
searchAPI
|
|
188
|
+
.getSources({
|
|
189
|
+
query,
|
|
190
|
+
date,
|
|
191
|
+
country,
|
|
192
|
+
safeSearch,
|
|
193
|
+
type: 'images',
|
|
194
|
+
})
|
|
195
|
+
.catch((error) => {
|
|
196
|
+
logger.error('Error fetching images:', error);
|
|
197
|
+
return {
|
|
198
|
+
success: false,
|
|
199
|
+
error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
200
|
+
};
|
|
201
|
+
})
|
|
202
|
+
);
|
|
203
|
+
}
|
|
204
|
+
if (videos) {
|
|
205
|
+
searchTasks.push(
|
|
206
|
+
searchAPI
|
|
207
|
+
.getSources({
|
|
208
|
+
query,
|
|
209
|
+
date,
|
|
210
|
+
country,
|
|
211
|
+
safeSearch,
|
|
212
|
+
type: 'videos',
|
|
213
|
+
})
|
|
214
|
+
.catch((error) => {
|
|
215
|
+
logger.error('Error fetching videos:', error);
|
|
216
|
+
return {
|
|
217
|
+
success: false,
|
|
218
|
+
error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
219
|
+
};
|
|
220
|
+
})
|
|
221
|
+
);
|
|
222
|
+
}
|
|
223
|
+
if (news) {
|
|
224
|
+
searchTasks.push(
|
|
225
|
+
searchAPI
|
|
226
|
+
.getSources({
|
|
227
|
+
query,
|
|
228
|
+
date,
|
|
229
|
+
country,
|
|
230
|
+
safeSearch,
|
|
231
|
+
type: 'news',
|
|
232
|
+
})
|
|
233
|
+
.catch((error) => {
|
|
234
|
+
logger.error('Error fetching news:', error);
|
|
235
|
+
return {
|
|
236
|
+
success: false,
|
|
237
|
+
error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
238
|
+
};
|
|
239
|
+
})
|
|
240
|
+
);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Run all searches in parallel
|
|
244
|
+
const results = await Promise.all(searchTasks);
|
|
245
|
+
|
|
246
|
+
// Get the main search result (first result)
|
|
247
|
+
const mainResult = results[0];
|
|
248
|
+
if (!mainResult.success) {
|
|
249
|
+
throw new Error(mainResult.error ?? 'Search failed');
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Merge additional results with the main results
|
|
253
|
+
const mergedResults = { ...mainResult.data };
|
|
254
|
+
|
|
255
|
+
// Convert existing news to topStories if present
|
|
256
|
+
if (mergedResults.news !== undefined && mergedResults.news.length > 0) {
|
|
257
|
+
const existingNewsAsTopStories = mergedResults.news
|
|
258
|
+
.filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')
|
|
259
|
+
.map((newsItem) => ({
|
|
260
|
+
title: newsItem.title ?? '',
|
|
261
|
+
link: newsItem.link ?? '',
|
|
262
|
+
source: newsItem.source ?? '',
|
|
263
|
+
date: newsItem.date ?? '',
|
|
264
|
+
imageUrl: newsItem.imageUrl ?? '',
|
|
265
|
+
processed: false,
|
|
266
|
+
}));
|
|
267
|
+
mergedResults.topStories = [
|
|
268
|
+
...(mergedResults.topStories ?? []),
|
|
269
|
+
...existingNewsAsTopStories,
|
|
270
|
+
];
|
|
271
|
+
delete mergedResults.news;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
results.slice(1).forEach((result) => {
|
|
275
|
+
if (result.success && result.data !== undefined) {
|
|
276
|
+
if (result.data.images !== undefined && result.data.images.length > 0) {
|
|
277
|
+
mergedResults.images = [
|
|
278
|
+
...(mergedResults.images ?? []),
|
|
279
|
+
...result.data.images,
|
|
280
|
+
];
|
|
281
|
+
}
|
|
282
|
+
if (result.data.videos !== undefined && result.data.videos.length > 0) {
|
|
283
|
+
mergedResults.videos = [
|
|
284
|
+
...(mergedResults.videos ?? []),
|
|
285
|
+
...result.data.videos,
|
|
286
|
+
];
|
|
287
|
+
}
|
|
288
|
+
if (result.data.news !== undefined && result.data.news.length > 0) {
|
|
289
|
+
const newsAsTopStories = result.data.news.map((newsItem) => ({
|
|
290
|
+
...newsItem,
|
|
291
|
+
link: newsItem.link ?? '',
|
|
292
|
+
}));
|
|
293
|
+
mergedResults.topStories = [
|
|
294
|
+
...(mergedResults.topStories ?? []),
|
|
295
|
+
...newsAsTopStories,
|
|
296
|
+
];
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
return { success: true, data: mergedResults };
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function createSearchProcessor({
|
|
305
|
+
searchAPI,
|
|
306
|
+
safeSearch,
|
|
307
|
+
sourceProcessor,
|
|
308
|
+
scraper,
|
|
309
|
+
onGetHighlights,
|
|
310
|
+
logger,
|
|
311
|
+
}: {
|
|
312
|
+
safeSearch: t.SearchToolConfig['safeSearch'];
|
|
313
|
+
searchAPI: ReturnType<typeof createSearchAPI>;
|
|
314
|
+
sourceProcessor: ReturnType<typeof createSourceProcessor>;
|
|
315
|
+
scraper: t.BaseScraper;
|
|
316
|
+
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
317
|
+
logger: t.Logger;
|
|
318
|
+
}) {
|
|
319
|
+
return async function ({
|
|
320
|
+
query,
|
|
321
|
+
date,
|
|
322
|
+
country,
|
|
323
|
+
proMode = true,
|
|
324
|
+
maxSources = 5,
|
|
325
|
+
onSearchResults,
|
|
326
|
+
images = false,
|
|
327
|
+
videos = false,
|
|
328
|
+
news = false,
|
|
329
|
+
}: {
|
|
330
|
+
query: string;
|
|
331
|
+
country?: string;
|
|
332
|
+
date?: DATE_RANGE;
|
|
333
|
+
proMode?: boolean;
|
|
334
|
+
maxSources?: number;
|
|
335
|
+
onSearchResults: t.SearchToolConfig['onSearchResults'];
|
|
336
|
+
images?: boolean;
|
|
337
|
+
videos?: boolean;
|
|
338
|
+
news?: boolean;
|
|
339
|
+
}): Promise<t.SearchResultData> {
|
|
340
|
+
try {
|
|
341
|
+
// Check if query contains direct URLs for extraction
|
|
342
|
+
const urls = extractUrlsFromQuery(query);
|
|
343
|
+
const isDirectUrl = isDirectUrlRequest(query);
|
|
344
|
+
|
|
345
|
+
let searchResult: t.SearchResult;
|
|
346
|
+
|
|
347
|
+
if (isDirectUrl && urls.length > 0) {
|
|
348
|
+
// Direct URL extraction mode - skip search API and extract directly
|
|
349
|
+
logger.debug(`Direct URL extraction mode for: ${urls.join(', ')}`);
|
|
350
|
+
searchResult = await extractDirectUrlContent({
|
|
351
|
+
urls,
|
|
352
|
+
scraper,
|
|
353
|
+
logger,
|
|
354
|
+
});
|
|
355
|
+
} else {
|
|
356
|
+
// Normal search mode - execute parallel searches and merge results
|
|
357
|
+
searchResult = await executeParallelSearches({
|
|
358
|
+
searchAPI,
|
|
359
|
+
query,
|
|
360
|
+
date,
|
|
361
|
+
country,
|
|
362
|
+
safeSearch,
|
|
363
|
+
images,
|
|
364
|
+
videos,
|
|
365
|
+
news,
|
|
366
|
+
logger,
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
onSearchResults?.(searchResult);
|
|
371
|
+
|
|
372
|
+
const processedSources = await sourceProcessor.processSources({
|
|
373
|
+
query,
|
|
374
|
+
news,
|
|
375
|
+
result: searchResult,
|
|
376
|
+
proMode,
|
|
377
|
+
onGetHighlights,
|
|
378
|
+
numElements: maxSources,
|
|
379
|
+
// Skip additional scraping if we already extracted content directly
|
|
380
|
+
skipScraping: isDirectUrl,
|
|
381
|
+
});
|
|
382
|
+
|
|
383
|
+
return expandHighlights(processedSources);
|
|
384
|
+
} catch (error) {
|
|
385
|
+
logger.error('Error in search:', error);
|
|
386
|
+
return {
|
|
387
|
+
organic: [],
|
|
388
|
+
topStories: [],
|
|
389
|
+
images: [],
|
|
390
|
+
videos: [],
|
|
391
|
+
news: [],
|
|
392
|
+
relatedSearches: [],
|
|
393
|
+
error: error instanceof Error ? error.message : String(error),
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function createOnSearchResults({
|
|
400
|
+
runnableConfig,
|
|
401
|
+
onSearchResults,
|
|
402
|
+
}: {
|
|
403
|
+
runnableConfig: RunnableConfig;
|
|
404
|
+
onSearchResults: t.SearchToolConfig['onSearchResults'];
|
|
405
|
+
}) {
|
|
406
|
+
return function (results: t.SearchResult): void {
|
|
407
|
+
if (!onSearchResults) {
|
|
408
|
+
return;
|
|
409
|
+
}
|
|
410
|
+
onSearchResults(results, runnableConfig);
|
|
411
|
+
};
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
function createTool({
|
|
415
|
+
schema,
|
|
416
|
+
search,
|
|
417
|
+
onSearchResults: _onSearchResults,
|
|
418
|
+
}: {
|
|
419
|
+
schema: t.SearchToolSchema;
|
|
420
|
+
search: ReturnType<typeof createSearchProcessor>;
|
|
421
|
+
onSearchResults: t.SearchToolConfig['onSearchResults'];
|
|
422
|
+
}): DynamicStructuredTool<typeof schema> {
|
|
423
|
+
return tool<typeof schema>(
|
|
424
|
+
async (params, runnableConfig) => {
|
|
425
|
+
const { query, date, country: _c, images, videos, news } = params;
|
|
426
|
+
const country = typeof _c === 'string' && _c ? _c : undefined;
|
|
427
|
+
const searchResult = await search({
|
|
428
|
+
query,
|
|
429
|
+
date,
|
|
430
|
+
country,
|
|
431
|
+
images,
|
|
432
|
+
videos,
|
|
433
|
+
news,
|
|
434
|
+
onSearchResults: createOnSearchResults({
|
|
435
|
+
runnableConfig,
|
|
436
|
+
onSearchResults: _onSearchResults,
|
|
437
|
+
}),
|
|
438
|
+
});
|
|
439
|
+
const turn = runnableConfig.toolCall?.turn ?? 0;
|
|
440
|
+
const { output, references } = formatResultsForLLM(turn, searchResult);
|
|
441
|
+
const data: t.SearchResultData = { turn, ...searchResult, references };
|
|
442
|
+
return [output, { [Constants.WEB_SEARCH]: data }];
|
|
443
|
+
},
|
|
444
|
+
{
|
|
445
|
+
name: Constants.WEB_SEARCH,
|
|
446
|
+
description: `Real-time search. Results have required citation anchors.
|
|
447
|
+
|
|
448
|
+
Note: Use ONCE per reply unless instructed otherwise.
|
|
449
|
+
|
|
450
|
+
Anchors:
|
|
451
|
+
- \\ue202turnXtypeY
|
|
452
|
+
- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx
|
|
453
|
+
|
|
454
|
+
Special Markers:
|
|
455
|
+
- \\ue203...\\ue204 — highlight start/end of cited text (for Standalone or Group citations)
|
|
456
|
+
- \\ue200...\\ue201 — group block (e.g. \\ue200\\ue202turn0search1\\ue202turn0news2\\ue201)
|
|
457
|
+
|
|
458
|
+
**CITE EVERY NON-OBVIOUS FACT/QUOTE:**
|
|
459
|
+
Use anchor marker(s) immediately after the statement:
|
|
460
|
+
- Standalone: "Pure functions produce same output. \\ue202turn0search0"
|
|
461
|
+
- Standalone (multiple): "Today's News \\ue202turn0search0\\ue202turn0news0"
|
|
462
|
+
- Highlight: "\\ue203Highlight text.\\ue204\\ue202turn0news1"
|
|
463
|
+
- Group: "Sources. \\ue200\\ue202turn0search0\\ue202turn0news1\\ue201"
|
|
464
|
+
- Group Highlight: "\\ue203Highlight for group.\\ue204 \\ue200\\ue202turn0search0\\ue202turn0news1\\ue201"
|
|
465
|
+
- Image: "See photo \\ue202turn0image0."
|
|
466
|
+
|
|
467
|
+
**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**
|
|
468
|
+
`.trim(),
|
|
469
|
+
schema: schema,
|
|
470
|
+
responseFormat: Constants.CONTENT_AND_ARTIFACT,
|
|
471
|
+
}
|
|
472
|
+
);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Creates a search tool with a schema that dynamically includes the country field
|
|
477
|
+
* only when the searchProvider is 'serper'.
|
|
478
|
+
*
|
|
479
|
+
* Supports multiple scraper providers:
|
|
480
|
+
* - Firecrawl (default): Full-featured web scraping with multiple formats
|
|
481
|
+
* - Serper: Lightweight scraping using Serper's scrape API
|
|
482
|
+
*
|
|
483
|
+
* @example
|
|
484
|
+
* ```typescript
|
|
485
|
+
* // Using Firecrawl scraper (default)
|
|
486
|
+
* const searchTool = createSearchTool({
|
|
487
|
+
* searchProvider: 'serper',
|
|
488
|
+
* scraperProvider: 'firecrawl',
|
|
489
|
+
* firecrawlApiKey: 'your-firecrawl-key'
|
|
490
|
+
* });
|
|
491
|
+
*
|
|
492
|
+
* // Using Serper scraper
|
|
493
|
+
* const searchTool = createSearchTool({
|
|
494
|
+
* searchProvider: 'serper',
|
|
495
|
+
* scraperProvider: 'serper',
|
|
496
|
+
* serperApiKey: 'your-serper-key'
|
|
497
|
+
* });
|
|
498
|
+
* ```
|
|
499
|
+
*
|
|
500
|
+
* @param config - The search tool configuration
|
|
501
|
+
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
502
|
+
*/
|
|
503
|
+
export const createSearchTool = (
|
|
504
|
+
config: t.SearchToolConfig = {}
|
|
505
|
+
): DynamicStructuredTool<typeof toolSchema> => {
|
|
506
|
+
const {
|
|
507
|
+
searchProvider = 'serper',
|
|
508
|
+
serperApiKey,
|
|
509
|
+
searxngInstanceUrl,
|
|
510
|
+
searxngApiKey,
|
|
511
|
+
rerankerType = 'cohere',
|
|
512
|
+
topResults = 5,
|
|
513
|
+
strategies = ['no_extraction'],
|
|
514
|
+
filterContent = true,
|
|
515
|
+
safeSearch = 1,
|
|
516
|
+
scraperProvider = 'firecrawl',
|
|
517
|
+
firecrawlApiKey,
|
|
518
|
+
firecrawlApiUrl,
|
|
519
|
+
firecrawlVersion,
|
|
520
|
+
firecrawlOptions,
|
|
521
|
+
serperScraperOptions,
|
|
522
|
+
scraperTimeout,
|
|
523
|
+
jinaApiKey,
|
|
524
|
+
jinaApiUrl,
|
|
525
|
+
cohereApiKey,
|
|
526
|
+
onSearchResults: _onSearchResults,
|
|
527
|
+
onGetHighlights,
|
|
528
|
+
} = config;
|
|
529
|
+
|
|
530
|
+
const logger = config.logger || createDefaultLogger();
|
|
531
|
+
|
|
532
|
+
const schemaObject: {
|
|
533
|
+
query: z.ZodString;
|
|
534
|
+
date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;
|
|
535
|
+
country?: z.ZodOptional<z.ZodString>;
|
|
536
|
+
images: z.ZodOptional<z.ZodBoolean>;
|
|
537
|
+
videos: z.ZodOptional<z.ZodBoolean>;
|
|
538
|
+
news: z.ZodOptional<z.ZodBoolean>;
|
|
539
|
+
} = {
|
|
540
|
+
query: querySchema,
|
|
541
|
+
date: dateSchema,
|
|
542
|
+
images: imagesSchema,
|
|
543
|
+
videos: videosSchema,
|
|
544
|
+
news: newsSchema,
|
|
545
|
+
};
|
|
546
|
+
|
|
547
|
+
if (searchProvider === 'serper') {
|
|
548
|
+
schemaObject.country = countrySchema;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
const toolSchema = z.object(schemaObject);
|
|
552
|
+
|
|
553
|
+
const searchAPI = createSearchAPI({
|
|
554
|
+
searchProvider,
|
|
555
|
+
serperApiKey,
|
|
556
|
+
searxngInstanceUrl,
|
|
557
|
+
searxngApiKey,
|
|
558
|
+
});
|
|
559
|
+
|
|
560
|
+
/** Create scraper based on scraperProvider */
|
|
561
|
+
let scraperInstance: t.BaseScraper;
|
|
562
|
+
|
|
563
|
+
if (scraperProvider === 'serper') {
|
|
564
|
+
scraperInstance = createSerperScraper({
|
|
565
|
+
...serperScraperOptions,
|
|
566
|
+
apiKey: serperApiKey,
|
|
567
|
+
timeout: scraperTimeout ?? serperScraperOptions?.timeout,
|
|
568
|
+
logger,
|
|
569
|
+
});
|
|
570
|
+
} else {
|
|
571
|
+
scraperInstance = createFirecrawlScraper({
|
|
572
|
+
...firecrawlOptions,
|
|
573
|
+
apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
|
|
574
|
+
apiUrl: firecrawlApiUrl,
|
|
575
|
+
version: firecrawlVersion,
|
|
576
|
+
timeout: scraperTimeout ?? firecrawlOptions?.timeout,
|
|
577
|
+
formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
|
|
578
|
+
logger,
|
|
579
|
+
});
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
const selectedReranker = createReranker({
|
|
583
|
+
rerankerType,
|
|
584
|
+
jinaApiKey,
|
|
585
|
+
jinaApiUrl,
|
|
586
|
+
cohereApiKey,
|
|
587
|
+
logger,
|
|
588
|
+
});
|
|
589
|
+
|
|
590
|
+
if (!selectedReranker) {
|
|
591
|
+
logger.warn('No reranker selected. Using default ranking.');
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
const sourceProcessor = createSourceProcessor(
|
|
595
|
+
{
|
|
596
|
+
reranker: selectedReranker,
|
|
597
|
+
topResults,
|
|
598
|
+
strategies,
|
|
599
|
+
filterContent,
|
|
600
|
+
logger,
|
|
601
|
+
},
|
|
602
|
+
scraperInstance
|
|
603
|
+
);
|
|
604
|
+
|
|
605
|
+
const search = createSearchProcessor({
|
|
606
|
+
searchAPI,
|
|
607
|
+
safeSearch,
|
|
608
|
+
sourceProcessor,
|
|
609
|
+
scraper: scraperInstance,
|
|
610
|
+
onGetHighlights,
|
|
611
|
+
logger,
|
|
612
|
+
});
|
|
613
|
+
|
|
614
|
+
return createTool({
|
|
615
|
+
search,
|
|
616
|
+
schema: toolSchema,
|
|
617
|
+
onSearchResults: _onSearchResults,
|
|
618
|
+
});
|
|
619
|
+
};
|