@librechat/agents 2.4.321 → 3.0.0-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +218 -0
- package/dist/cjs/agents/AgentContext.cjs.map +1 -0
- package/dist/cjs/common/enum.cjs +14 -5
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/events.cjs +10 -6
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +309 -212
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/graphs/MultiAgentGraph.cjs +322 -0
- package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -0
- package/dist/cjs/llm/anthropic/index.cjs +54 -9
- package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +52 -6
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +22 -2
- package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/tools.cjs +29 -0
- package/dist/cjs/llm/anthropic/utils/tools.cjs.map +1 -0
- package/dist/cjs/llm/google/index.cjs +144 -0
- package/dist/cjs/llm/google/index.cjs.map +1 -0
- package/dist/cjs/llm/google/utils/common.cjs +477 -0
- package/dist/cjs/llm/google/utils/common.cjs.map +1 -0
- package/dist/cjs/llm/ollama/index.cjs +67 -0
- package/dist/cjs/llm/ollama/index.cjs.map +1 -0
- package/dist/cjs/llm/ollama/utils.cjs +158 -0
- package/dist/cjs/llm/ollama/utils.cjs.map +1 -0
- package/dist/cjs/llm/openai/index.cjs +389 -3
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/utils/index.cjs +672 -0
- package/dist/cjs/llm/openai/utils/index.cjs.map +1 -0
- package/dist/cjs/llm/providers.cjs +15 -15
- package/dist/cjs/llm/providers.cjs.map +1 -1
- package/dist/cjs/llm/text.cjs +14 -3
- package/dist/cjs/llm/text.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +330 -0
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -0
- package/dist/cjs/main.cjs +11 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/run.cjs +120 -81
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/stream.cjs +85 -51
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +10 -4
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +119 -13
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/tools/search/anthropic.cjs +40 -0
- package/dist/cjs/tools/search/anthropic.cjs.map +1 -0
- package/dist/cjs/tools/search/firecrawl.cjs +61 -13
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +9 -3
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs +35 -50
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +70 -0
- package/dist/cjs/tools/search/schema.cjs.map +1 -0
- package/dist/cjs/tools/search/search.cjs +145 -38
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +165 -48
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +34 -5
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/cjs/utils/events.cjs +31 -0
- package/dist/cjs/utils/events.cjs.map +1 -0
- package/dist/cjs/utils/title.cjs +57 -21
- package/dist/cjs/utils/title.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +54 -7
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +216 -0
- package/dist/esm/agents/AgentContext.mjs.map +1 -0
- package/dist/esm/common/enum.mjs +15 -6
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/events.mjs +10 -6
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +311 -214
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/graphs/MultiAgentGraph.mjs +320 -0
- package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -0
- package/dist/esm/llm/anthropic/index.mjs +54 -9
- package/dist/esm/llm/anthropic/index.mjs.map +1 -1
- package/dist/esm/llm/anthropic/types.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +52 -6
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_outputs.mjs +22 -2
- package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/tools.mjs +27 -0
- package/dist/esm/llm/anthropic/utils/tools.mjs.map +1 -0
- package/dist/esm/llm/google/index.mjs +142 -0
- package/dist/esm/llm/google/index.mjs.map +1 -0
- package/dist/esm/llm/google/utils/common.mjs +471 -0
- package/dist/esm/llm/google/utils/common.mjs.map +1 -0
- package/dist/esm/llm/ollama/index.mjs +65 -0
- package/dist/esm/llm/ollama/index.mjs.map +1 -0
- package/dist/esm/llm/ollama/utils.mjs +155 -0
- package/dist/esm/llm/ollama/utils.mjs.map +1 -0
- package/dist/esm/llm/openai/index.mjs +388 -4
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openai/utils/index.mjs +666 -0
- package/dist/esm/llm/openai/utils/index.mjs.map +1 -0
- package/dist/esm/llm/providers.mjs +5 -5
- package/dist/esm/llm/providers.mjs.map +1 -1
- package/dist/esm/llm/text.mjs +14 -3
- package/dist/esm/llm/text.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +328 -0
- package/dist/esm/llm/vertexai/index.mjs.map +1 -0
- package/dist/esm/main.mjs +6 -5
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/run.mjs +121 -83
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/stream.mjs +87 -54
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +10 -4
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +119 -15
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/tools/search/anthropic.mjs +37 -0
- package/dist/esm/tools/search/anthropic.mjs.map +1 -0
- package/dist/esm/tools/search/firecrawl.mjs +61 -13
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +10 -4
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs +35 -50
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +61 -0
- package/dist/esm/tools/search/schema.mjs.map +1 -0
- package/dist/esm/tools/search/search.mjs +146 -39
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +164 -47
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +33 -6
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/esm/utils/events.mjs +29 -0
- package/dist/esm/utils/events.mjs.map +1 -0
- package/dist/esm/utils/title.mjs +57 -22
- package/dist/esm/utils/title.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +54 -8
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +91 -0
- package/dist/types/common/enum.d.ts +15 -6
- package/dist/types/events.d.ts +5 -4
- package/dist/types/graphs/Graph.d.ts +64 -67
- package/dist/types/graphs/MultiAgentGraph.d.ts +37 -0
- package/dist/types/graphs/index.d.ts +1 -0
- package/dist/types/llm/anthropic/index.d.ts +11 -0
- package/dist/types/llm/anthropic/types.d.ts +9 -3
- package/dist/types/llm/anthropic/utils/message_inputs.d.ts +1 -1
- package/dist/types/llm/anthropic/utils/output_parsers.d.ts +4 -4
- package/dist/types/llm/anthropic/utils/tools.d.ts +3 -0
- package/dist/types/llm/google/index.d.ts +13 -0
- package/dist/types/llm/google/types.d.ts +32 -0
- package/dist/types/llm/google/utils/common.d.ts +19 -0
- package/dist/types/llm/google/utils/tools.d.ts +10 -0
- package/dist/types/llm/google/utils/zod_to_genai_parameters.d.ts +14 -0
- package/dist/types/llm/ollama/index.d.ts +7 -0
- package/dist/types/llm/ollama/utils.d.ts +7 -0
- package/dist/types/llm/openai/index.d.ts +72 -3
- package/dist/types/llm/openai/types.d.ts +10 -0
- package/dist/types/llm/openai/utils/index.d.ts +20 -0
- package/dist/types/llm/text.d.ts +1 -1
- package/dist/types/llm/vertexai/index.d.ts +293 -0
- package/dist/types/messages/reducer.d.ts +9 -0
- package/dist/types/run.d.ts +19 -12
- package/dist/types/scripts/ant_web_search.d.ts +1 -0
- package/dist/types/scripts/args.d.ts +2 -1
- package/dist/types/scripts/handoff-test.d.ts +1 -0
- package/dist/types/scripts/multi-agent-conditional.d.ts +1 -0
- package/dist/types/scripts/multi-agent-parallel.d.ts +1 -0
- package/dist/types/scripts/multi-agent-sequence.d.ts +1 -0
- package/dist/types/scripts/multi-agent-test.d.ts +1 -0
- package/dist/types/stream.d.ts +10 -3
- package/dist/types/tools/CodeExecutor.d.ts +2 -2
- package/dist/types/tools/ToolNode.d.ts +1 -1
- package/dist/types/tools/handlers.d.ts +17 -4
- package/dist/types/tools/search/anthropic.d.ts +16 -0
- package/dist/types/tools/search/firecrawl.d.ts +16 -0
- package/dist/types/tools/search/rerankers.d.ts +8 -5
- package/dist/types/tools/search/schema.d.ts +16 -0
- package/dist/types/tools/search/tool.d.ts +13 -0
- package/dist/types/tools/search/types.d.ts +64 -9
- package/dist/types/tools/search/utils.d.ts +9 -2
- package/dist/types/types/graph.d.ts +95 -15
- package/dist/types/types/llm.d.ts +24 -10
- package/dist/types/types/run.d.ts +46 -8
- package/dist/types/types/stream.d.ts +16 -2
- package/dist/types/types/tools.d.ts +1 -1
- package/dist/types/utils/events.d.ts +6 -0
- package/dist/types/utils/title.d.ts +2 -1
- package/dist/types/utils/tokens.d.ts +24 -0
- package/package.json +35 -18
- package/src/agents/AgentContext.ts +315 -0
- package/src/common/enum.ts +14 -5
- package/src/events.ts +24 -13
- package/src/graphs/Graph.ts +495 -312
- package/src/graphs/MultiAgentGraph.ts +381 -0
- package/src/graphs/index.ts +2 -1
- package/src/llm/anthropic/Jacob_Lee_Resume_2023.pdf +0 -0
- package/src/llm/anthropic/index.ts +78 -13
- package/src/llm/anthropic/llm.spec.ts +491 -115
- package/src/llm/anthropic/types.ts +39 -3
- package/src/llm/anthropic/utils/message_inputs.ts +67 -11
- package/src/llm/anthropic/utils/message_outputs.ts +21 -2
- package/src/llm/anthropic/utils/output_parsers.ts +25 -6
- package/src/llm/anthropic/utils/tools.ts +29 -0
- package/src/llm/google/index.ts +218 -0
- package/src/llm/google/types.ts +43 -0
- package/src/llm/google/utils/common.ts +646 -0
- package/src/llm/google/utils/tools.ts +160 -0
- package/src/llm/google/utils/zod_to_genai_parameters.ts +86 -0
- package/src/llm/ollama/index.ts +89 -0
- package/src/llm/ollama/utils.ts +193 -0
- package/src/llm/openai/index.ts +600 -14
- package/src/llm/openai/types.ts +24 -0
- package/src/llm/openai/utils/index.ts +912 -0
- package/src/llm/openai/utils/isReasoningModel.test.ts +90 -0
- package/src/llm/providers.ts +10 -9
- package/src/llm/text.ts +26 -7
- package/src/llm/vertexai/index.ts +360 -0
- package/src/messages/reducer.ts +80 -0
- package/src/run.ts +181 -112
- package/src/scripts/ant_web_search.ts +158 -0
- package/src/scripts/args.ts +12 -8
- package/src/scripts/cli4.ts +29 -21
- package/src/scripts/cli5.ts +29 -21
- package/src/scripts/code_exec.ts +54 -23
- package/src/scripts/code_exec_files.ts +48 -17
- package/src/scripts/code_exec_simple.ts +46 -27
- package/src/scripts/handoff-test.ts +135 -0
- package/src/scripts/image.ts +52 -20
- package/src/scripts/multi-agent-conditional.ts +220 -0
- package/src/scripts/multi-agent-example-output.md +110 -0
- package/src/scripts/multi-agent-parallel.ts +337 -0
- package/src/scripts/multi-agent-sequence.ts +212 -0
- package/src/scripts/multi-agent-test.ts +186 -0
- package/src/scripts/search.ts +4 -12
- package/src/scripts/simple.ts +25 -10
- package/src/scripts/tools.ts +48 -18
- package/src/specs/anthropic.simple.test.ts +150 -34
- package/src/specs/azure.simple.test.ts +325 -0
- package/src/specs/openai.simple.test.ts +140 -33
- package/src/specs/openrouter.simple.test.ts +107 -0
- package/src/specs/prune.test.ts +4 -9
- package/src/specs/reasoning.test.ts +80 -44
- package/src/specs/token-memoization.test.ts +39 -0
- package/src/stream.test.ts +94 -0
- package/src/stream.ts +139 -60
- package/src/tools/ToolNode.ts +21 -7
- package/src/tools/handlers.ts +192 -18
- package/src/tools/search/anthropic.ts +51 -0
- package/src/tools/search/firecrawl.ts +78 -24
- package/src/tools/search/format.ts +10 -5
- package/src/tools/search/rerankers.ts +50 -62
- package/src/tools/search/schema.ts +63 -0
- package/src/tools/search/search.ts +167 -34
- package/src/tools/search/tool.ts +222 -46
- package/src/tools/search/types.ts +65 -10
- package/src/tools/search/utils.ts +37 -5
- package/src/types/graph.ts +272 -103
- package/src/types/llm.ts +25 -12
- package/src/types/run.ts +51 -13
- package/src/types/stream.ts +22 -1
- package/src/types/tools.ts +16 -10
- package/src/utils/events.ts +32 -0
- package/src/utils/llmConfig.ts +20 -8
- package/src/utils/title.ts +104 -30
- package/src/utils/tokens.ts +69 -10
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
/* eslint-disable no-console */
|
|
2
1
|
import axios from 'axios';
|
|
3
2
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
4
3
|
import type * as t from './types';
|
|
4
|
+
import { getAttribution, createDefaultLogger } from './utils';
|
|
5
5
|
import { FirecrawlScraper } from './firecrawl';
|
|
6
6
|
import { BaseReranker } from './rerankers';
|
|
7
|
-
import { getAttribution } from './utils';
|
|
8
7
|
|
|
9
8
|
const chunker = {
|
|
10
9
|
cleanText: (text: string): string => {
|
|
@@ -52,12 +51,14 @@ const chunker = {
|
|
|
52
51
|
chunkSize?: number;
|
|
53
52
|
chunkOverlap?: number;
|
|
54
53
|
separators?: string[];
|
|
55
|
-
}
|
|
54
|
+
},
|
|
55
|
+
logger?: t.Logger
|
|
56
56
|
): Promise<string[][]> => {
|
|
57
57
|
// Split multiple texts
|
|
58
|
+
const logger_ = logger || createDefaultLogger();
|
|
58
59
|
const promises = texts.map((text) =>
|
|
59
60
|
chunker.splitText(text, options).catch((error) => {
|
|
60
|
-
|
|
61
|
+
logger_.error('Error splitting text:', error);
|
|
61
62
|
return [text];
|
|
62
63
|
})
|
|
63
64
|
);
|
|
@@ -82,18 +83,22 @@ const getHighlights = async ({
|
|
|
82
83
|
content,
|
|
83
84
|
reranker,
|
|
84
85
|
topResults = 5,
|
|
86
|
+
logger,
|
|
85
87
|
}: {
|
|
86
88
|
content: string;
|
|
87
89
|
query: string;
|
|
88
90
|
reranker?: BaseReranker;
|
|
89
91
|
topResults?: number;
|
|
92
|
+
logger?: t.Logger;
|
|
90
93
|
}): Promise<t.Highlight[] | undefined> => {
|
|
94
|
+
const logger_ = logger || createDefaultLogger();
|
|
95
|
+
|
|
91
96
|
if (!content) {
|
|
92
|
-
|
|
97
|
+
logger_.warn('No content provided for highlights');
|
|
93
98
|
return;
|
|
94
99
|
}
|
|
95
100
|
if (!reranker) {
|
|
96
|
-
|
|
101
|
+
logger_.warn('No reranker provided for highlights');
|
|
97
102
|
return;
|
|
98
103
|
}
|
|
99
104
|
|
|
@@ -102,14 +107,14 @@ const getHighlights = async ({
|
|
|
102
107
|
if (Array.isArray(documents)) {
|
|
103
108
|
return await reranker.rerank(query, documents, topResults);
|
|
104
109
|
} else {
|
|
105
|
-
|
|
110
|
+
logger_.error(
|
|
106
111
|
'Expected documents to be an array, got:',
|
|
107
112
|
typeof documents
|
|
108
113
|
);
|
|
109
114
|
return;
|
|
110
115
|
}
|
|
111
116
|
} catch (error) {
|
|
112
|
-
|
|
117
|
+
logger_.error('Error in content processing:', error);
|
|
113
118
|
return;
|
|
114
119
|
}
|
|
115
120
|
};
|
|
@@ -131,25 +136,49 @@ const createSerperAPI = (
|
|
|
131
136
|
|
|
132
137
|
const getSources = async ({
|
|
133
138
|
query,
|
|
139
|
+
date,
|
|
134
140
|
country,
|
|
141
|
+
safeSearch,
|
|
135
142
|
numResults = 8,
|
|
143
|
+
type,
|
|
136
144
|
}: t.GetSourcesParams): Promise<t.SearchResult> => {
|
|
137
145
|
if (!query.trim()) {
|
|
138
146
|
return { success: false, error: 'Query cannot be empty' };
|
|
139
147
|
}
|
|
140
148
|
|
|
141
149
|
try {
|
|
150
|
+
const safe = ['off', 'moderate', 'active'] as const;
|
|
142
151
|
const payload: t.SerperSearchPayload = {
|
|
143
152
|
q: query,
|
|
153
|
+
safe: safe[safeSearch ?? 1],
|
|
144
154
|
num: Math.min(Math.max(1, numResults), 10),
|
|
145
155
|
};
|
|
146
156
|
|
|
157
|
+
// Set the search type if provided
|
|
158
|
+
if (type) {
|
|
159
|
+
payload.type = type;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (date != null) {
|
|
163
|
+
payload.tbs = `qdr:${date}`;
|
|
164
|
+
}
|
|
165
|
+
|
|
147
166
|
if (country != null && country !== '') {
|
|
148
167
|
payload['gl'] = country.toLowerCase();
|
|
149
168
|
}
|
|
150
169
|
|
|
170
|
+
// Determine the API endpoint based on the search type
|
|
171
|
+
let apiEndpoint = config.apiUrl;
|
|
172
|
+
if (type === 'images') {
|
|
173
|
+
apiEndpoint = 'https://google.serper.dev/images';
|
|
174
|
+
} else if (type === 'videos') {
|
|
175
|
+
apiEndpoint = 'https://google.serper.dev/videos';
|
|
176
|
+
} else if (type === 'news') {
|
|
177
|
+
apiEndpoint = 'https://google.serper.dev/news';
|
|
178
|
+
}
|
|
179
|
+
|
|
151
180
|
const response = await axios.post<t.SerperResultData>(
|
|
152
|
-
|
|
181
|
+
apiEndpoint,
|
|
153
182
|
payload,
|
|
154
183
|
{
|
|
155
184
|
headers: {
|
|
@@ -169,6 +198,8 @@ const createSerperAPI = (
|
|
|
169
198
|
peopleAlsoAsk: data.peopleAlsoAsk,
|
|
170
199
|
knowledgeGraph: data.knowledgeGraph,
|
|
171
200
|
relatedSearches: data.relatedSearches,
|
|
201
|
+
videos: data.videos ?? [],
|
|
202
|
+
news: data.news ?? [],
|
|
172
203
|
};
|
|
173
204
|
|
|
174
205
|
return { success: true, data: results };
|
|
@@ -202,6 +233,8 @@ const createSearXNGAPI = (
|
|
|
202
233
|
const getSources = async ({
|
|
203
234
|
query,
|
|
204
235
|
numResults = 8,
|
|
236
|
+
safeSearch,
|
|
237
|
+
type,
|
|
205
238
|
}: t.GetSourcesParams): Promise<t.SearchResult> => {
|
|
206
239
|
if (!query.trim()) {
|
|
207
240
|
return { success: false, error: 'Query cannot be empty' };
|
|
@@ -218,14 +251,24 @@ const createSearXNGAPI = (
|
|
|
218
251
|
searchUrl = searchUrl.replace(/\/$/, '') + '/search';
|
|
219
252
|
}
|
|
220
253
|
|
|
254
|
+
// Determine the search category based on the type
|
|
255
|
+
let category = 'general';
|
|
256
|
+
if (type === 'images') {
|
|
257
|
+
category = 'images';
|
|
258
|
+
} else if (type === 'videos') {
|
|
259
|
+
category = 'videos';
|
|
260
|
+
} else if (type === 'news') {
|
|
261
|
+
category = 'news';
|
|
262
|
+
}
|
|
263
|
+
|
|
221
264
|
// Prepare parameters for SearXNG
|
|
222
265
|
const params: t.SearxNGSearchPayload = {
|
|
223
266
|
q: query,
|
|
224
267
|
format: 'json',
|
|
225
268
|
pageno: 1,
|
|
226
|
-
categories:
|
|
269
|
+
categories: category,
|
|
227
270
|
language: 'all',
|
|
228
|
-
safesearch:
|
|
271
|
+
safesearch: safeSearch,
|
|
229
272
|
engines: 'google,bing,duckduckgo',
|
|
230
273
|
};
|
|
231
274
|
|
|
@@ -245,32 +288,111 @@ const createSearXNGAPI = (
|
|
|
245
288
|
|
|
246
289
|
const data = response.data;
|
|
247
290
|
|
|
291
|
+
// Helper function to identify news results since SearXNG doesn't provide that classification by default
|
|
292
|
+
const isNewsResult = (result: t.SearXNGResult): boolean => {
|
|
293
|
+
const url = result.url?.toLowerCase() ?? '';
|
|
294
|
+
const title = result.title?.toLowerCase() ?? '';
|
|
295
|
+
|
|
296
|
+
// News-related keywords in title/content
|
|
297
|
+
const newsKeywords = [
|
|
298
|
+
'breaking news',
|
|
299
|
+
'latest news',
|
|
300
|
+
'top stories',
|
|
301
|
+
'news today',
|
|
302
|
+
'developing story',
|
|
303
|
+
'trending news',
|
|
304
|
+
'news',
|
|
305
|
+
];
|
|
306
|
+
|
|
307
|
+
// Check if title/content contains news keywords
|
|
308
|
+
const hasNewsKeywords = newsKeywords.some(
|
|
309
|
+
(keyword) => title.toLowerCase().includes(keyword) // just title probably fine, content parsing is overkill for what we need: || content.includes(keyword)
|
|
310
|
+
);
|
|
311
|
+
|
|
312
|
+
// Check if URL contains news-related paths
|
|
313
|
+
const hasNewsPath =
|
|
314
|
+
url.includes('/news/') ||
|
|
315
|
+
url.includes('/world/') ||
|
|
316
|
+
url.includes('/politics/') ||
|
|
317
|
+
url.includes('/breaking/');
|
|
318
|
+
|
|
319
|
+
return hasNewsKeywords || hasNewsPath;
|
|
320
|
+
};
|
|
321
|
+
|
|
248
322
|
// Transform SearXNG results to match SerperAPI format
|
|
249
323
|
const organicResults = (data.results ?? [])
|
|
250
324
|
.slice(0, numResults)
|
|
251
|
-
.map((result: t.SearXNGResult) =>
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
325
|
+
.map((result: t.SearXNGResult, index: number) => {
|
|
326
|
+
let attribution = '';
|
|
327
|
+
try {
|
|
328
|
+
attribution = new URL(result.url ?? '').hostname;
|
|
329
|
+
} catch {
|
|
330
|
+
attribution = '';
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
return {
|
|
334
|
+
position: index + 1,
|
|
335
|
+
title: result.title ?? '',
|
|
336
|
+
link: result.url ?? '',
|
|
337
|
+
snippet: result.content ?? '',
|
|
338
|
+
date: result.publishedDate ?? '',
|
|
339
|
+
attribution,
|
|
340
|
+
};
|
|
341
|
+
});
|
|
257
342
|
|
|
258
|
-
// Extract image results if available
|
|
259
343
|
const imageResults = (data.results ?? [])
|
|
260
344
|
.filter((result: t.SearXNGResult) => result.img_src)
|
|
261
345
|
.slice(0, 6)
|
|
262
|
-
.map((result: t.SearXNGResult) => ({
|
|
346
|
+
.map((result: t.SearXNGResult, index: number) => ({
|
|
263
347
|
title: result.title ?? '',
|
|
264
348
|
imageUrl: result.img_src ?? '',
|
|
349
|
+
position: index + 1,
|
|
350
|
+
source: new URL(result.url ?? '').hostname,
|
|
351
|
+
domain: new URL(result.url ?? '').hostname,
|
|
352
|
+
link: result.url ?? '',
|
|
265
353
|
}));
|
|
266
354
|
|
|
267
|
-
//
|
|
355
|
+
// Extract news results from organic results
|
|
356
|
+
const newsResults = (data.results ?? [])
|
|
357
|
+
.filter(isNewsResult)
|
|
358
|
+
.map((result: t.SearXNGResult, index: number) => {
|
|
359
|
+
let attribution = '';
|
|
360
|
+
try {
|
|
361
|
+
attribution = new URL(result.url ?? '').hostname;
|
|
362
|
+
} catch {
|
|
363
|
+
attribution = '';
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
return {
|
|
367
|
+
title: result.title ?? '',
|
|
368
|
+
link: result.url ?? '',
|
|
369
|
+
snippet: result.content ?? '',
|
|
370
|
+
date: result.publishedDate ?? '',
|
|
371
|
+
source: attribution,
|
|
372
|
+
imageUrl: result.img_src ?? '',
|
|
373
|
+
position: index + 1,
|
|
374
|
+
};
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
const topStories = newsResults.slice(0, 5);
|
|
378
|
+
|
|
379
|
+
const relatedSearches = Array.isArray(data.suggestions)
|
|
380
|
+
? data.suggestions.map((suggestion: string) => ({ query: suggestion }))
|
|
381
|
+
: [];
|
|
382
|
+
|
|
268
383
|
const results: t.SearchResultData = {
|
|
269
384
|
organic: organicResults,
|
|
270
385
|
images: imageResults,
|
|
271
|
-
topStories:
|
|
272
|
-
|
|
273
|
-
|
|
386
|
+
topStories: topStories, // Use first 5 extracted news as top stories
|
|
387
|
+
relatedSearches,
|
|
388
|
+
videos: [],
|
|
389
|
+
news: newsResults,
|
|
390
|
+
// Add empty arrays for other Serper fields to maintain parity
|
|
391
|
+
places: [],
|
|
392
|
+
shopping: [],
|
|
393
|
+
peopleAlsoAsk: [],
|
|
394
|
+
knowledgeGraph: undefined,
|
|
395
|
+
answerBox: undefined,
|
|
274
396
|
};
|
|
275
397
|
|
|
276
398
|
return { success: true, data: results };
|
|
@@ -327,8 +449,10 @@ export const createSourceProcessor = (
|
|
|
327
449
|
// strategies = ['no_extraction'],
|
|
328
450
|
// filterContent = true,
|
|
329
451
|
reranker,
|
|
452
|
+
logger,
|
|
330
453
|
} = config;
|
|
331
454
|
|
|
455
|
+
const logger_ = logger || createDefaultLogger();
|
|
332
456
|
const firecrawlScraper = scraperInstance;
|
|
333
457
|
|
|
334
458
|
const webScraper = {
|
|
@@ -341,7 +465,7 @@ export const createSourceProcessor = (
|
|
|
341
465
|
links: string[];
|
|
342
466
|
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
343
467
|
}): Promise<Array<t.ScrapeResult>> => {
|
|
344
|
-
|
|
468
|
+
logger_.debug(`Scraping ${links.length} links with Firecrawl`);
|
|
345
469
|
const promises: Array<Promise<t.ScrapeResult>> = [];
|
|
346
470
|
try {
|
|
347
471
|
for (let i = 0; i < links.length; i++) {
|
|
@@ -349,7 +473,11 @@ export const createSourceProcessor = (
|
|
|
349
473
|
const promise: Promise<t.ScrapeResult> = firecrawlScraper
|
|
350
474
|
.scrapeUrl(currentLink, {})
|
|
351
475
|
.then(([url, response]) => {
|
|
352
|
-
const attribution = getAttribution(
|
|
476
|
+
const attribution = getAttribution(
|
|
477
|
+
url,
|
|
478
|
+
response.data?.metadata,
|
|
479
|
+
logger_
|
|
480
|
+
);
|
|
353
481
|
if (response.success && response.data) {
|
|
354
482
|
const [content, references] =
|
|
355
483
|
firecrawlScraper.extractContent(response);
|
|
@@ -359,6 +487,10 @@ export const createSourceProcessor = (
|
|
|
359
487
|
attribution,
|
|
360
488
|
content: chunker.cleanText(content),
|
|
361
489
|
} as t.ScrapeResult;
|
|
490
|
+
} else {
|
|
491
|
+
logger_.error(
|
|
492
|
+
`Error scraping ${url}: ${response.error ?? 'Unknown error'}`
|
|
493
|
+
);
|
|
362
494
|
}
|
|
363
495
|
|
|
364
496
|
return {
|
|
@@ -371,7 +503,7 @@ export const createSourceProcessor = (
|
|
|
371
503
|
.then(async (result) => {
|
|
372
504
|
try {
|
|
373
505
|
if (result.error != null) {
|
|
374
|
-
|
|
506
|
+
logger_.error(
|
|
375
507
|
`Error scraping ${result.url}: ${result.content}`
|
|
376
508
|
);
|
|
377
509
|
return {
|
|
@@ -382,6 +514,7 @@ export const createSourceProcessor = (
|
|
|
382
514
|
query,
|
|
383
515
|
reranker,
|
|
384
516
|
content: result.content,
|
|
517
|
+
logger: logger_,
|
|
385
518
|
});
|
|
386
519
|
if (onGetHighlights) {
|
|
387
520
|
onGetHighlights(result.url);
|
|
@@ -391,14 +524,14 @@ export const createSourceProcessor = (
|
|
|
391
524
|
highlights,
|
|
392
525
|
};
|
|
393
526
|
} catch (error) {
|
|
394
|
-
|
|
527
|
+
logger_.error('Error processing scraped content:', error);
|
|
395
528
|
return {
|
|
396
529
|
...result,
|
|
397
530
|
};
|
|
398
531
|
}
|
|
399
532
|
})
|
|
400
533
|
.catch((error) => {
|
|
401
|
-
|
|
534
|
+
logger_.error(`Error scraping ${currentLink}:`, error);
|
|
402
535
|
return {
|
|
403
536
|
url: currentLink,
|
|
404
537
|
error: true,
|
|
@@ -409,7 +542,7 @@ export const createSourceProcessor = (
|
|
|
409
542
|
}
|
|
410
543
|
return await Promise.all(promises);
|
|
411
544
|
} catch (error) {
|
|
412
|
-
|
|
545
|
+
logger_.error('Error in scrapeMany:', error);
|
|
413
546
|
return [];
|
|
414
547
|
}
|
|
415
548
|
},
|
|
@@ -453,6 +586,7 @@ export const createSourceProcessor = (
|
|
|
453
586
|
result,
|
|
454
587
|
numElements,
|
|
455
588
|
query,
|
|
589
|
+
news,
|
|
456
590
|
proMode = true,
|
|
457
591
|
onGetHighlights,
|
|
458
592
|
}: t.ProcessSourcesFields): Promise<t.SearchResultData> => {
|
|
@@ -520,7 +654,7 @@ export const createSourceProcessor = (
|
|
|
520
654
|
organicLinksSet
|
|
521
655
|
);
|
|
522
656
|
|
|
523
|
-
if (organicLinks.length === 0 && topStoryLinks.length === 0) {
|
|
657
|
+
if (organicLinks.length === 0 && (topStoryLinks.length === 0 || !news)) {
|
|
524
658
|
return result.data;
|
|
525
659
|
}
|
|
526
660
|
|
|
@@ -541,7 +675,7 @@ export const createSourceProcessor = (
|
|
|
541
675
|
}
|
|
542
676
|
|
|
543
677
|
// Process top story links
|
|
544
|
-
if (topStoryLinks.length > 0) {
|
|
678
|
+
if (news && topStoryLinks.length > 0) {
|
|
545
679
|
promises.push(
|
|
546
680
|
fetchContents({
|
|
547
681
|
query,
|
|
@@ -555,18 +689,17 @@ export const createSourceProcessor = (
|
|
|
555
689
|
|
|
556
690
|
await Promise.all(promises);
|
|
557
691
|
|
|
558
|
-
// Update sources with scraped content
|
|
559
692
|
if (result.data.organic.length > 0) {
|
|
560
693
|
updateSourcesWithContent(result.data.organic, sourceMap);
|
|
561
694
|
}
|
|
562
695
|
|
|
563
|
-
if (topStories.length > 0) {
|
|
696
|
+
if (news && topStories.length > 0) {
|
|
564
697
|
updateSourcesWithContent(topStories, sourceMap);
|
|
565
698
|
}
|
|
566
699
|
|
|
567
700
|
return result.data;
|
|
568
701
|
} catch (error) {
|
|
569
|
-
|
|
702
|
+
logger_.error('Error in processSources:', error);
|
|
570
703
|
return {
|
|
571
704
|
organic: [],
|
|
572
705
|
topStories: [],
|