@librechat/agents 2.4.321 → 2.4.322
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/firecrawl.cjs +6 -4
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +6 -0
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs +43 -36
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +70 -0
- package/dist/cjs/tools/search/schema.cjs.map +1 -0
- package/dist/cjs/tools/search/search.cjs +62 -25
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +162 -47
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +34 -5
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs +6 -4
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +7 -1
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs +43 -36
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +61 -0
- package/dist/esm/tools/search/schema.mjs.map +1 -0
- package/dist/esm/tools/search/search.mjs +63 -26
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +161 -46
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +33 -6
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/types/tools/search/firecrawl.d.ts +1 -0
- package/dist/types/tools/search/rerankers.d.ts +8 -4
- package/dist/types/tools/search/schema.d.ts +16 -0
- package/dist/types/tools/search/tool.d.ts +13 -0
- package/dist/types/tools/search/types.d.ts +34 -0
- package/dist/types/tools/search/utils.d.ts +9 -2
- package/package.json +3 -2
- package/src/scripts/search.ts +3 -3
- package/src/tools/search/firecrawl.ts +9 -4
- package/src/tools/search/format.ts +8 -1
- package/src/tools/search/rerankers.ts +57 -36
- package/src/tools/search/schema.ts +63 -0
- package/src/tools/search/search.ts +74 -22
- package/src/tools/search/tool.ts +217 -44
- package/src/tools/search/types.ts +35 -0
- package/src/tools/search/utils.ts +37 -5
- package/src/utils/llmConfig.ts +1 -1
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
|
+
import { createDefaultLogger } from './utils.mjs';
|
|
2
3
|
|
|
3
|
-
/* eslint-disable no-console */
|
|
4
4
|
class BaseReranker {
|
|
5
5
|
apiKey;
|
|
6
|
-
|
|
6
|
+
logger;
|
|
7
|
+
constructor(logger) {
|
|
7
8
|
// Each specific reranker will set its API key
|
|
9
|
+
this.logger = logger || createDefaultLogger();
|
|
8
10
|
}
|
|
9
11
|
getDefaultRanking(documents, topK) {
|
|
10
12
|
return documents
|
|
@@ -12,22 +14,22 @@ class BaseReranker {
|
|
|
12
14
|
.map((doc) => ({ text: doc, score: 0 }));
|
|
13
15
|
}
|
|
14
16
|
logDocumentSamples(documents) {
|
|
15
|
-
|
|
17
|
+
this.logger.debug('Sample documents being sent to API:');
|
|
16
18
|
for (let i = 0; i < Math.min(3, documents.length); i++) {
|
|
17
|
-
|
|
19
|
+
this.logger.debug(`Document ${i}: ${documents[i].substring(0, 100)}...`);
|
|
18
20
|
}
|
|
19
21
|
}
|
|
20
22
|
}
|
|
21
23
|
class JinaReranker extends BaseReranker {
|
|
22
|
-
constructor({ apiKey = process.env.JINA_API_KEY }) {
|
|
23
|
-
super();
|
|
24
|
+
constructor({ apiKey = process.env.JINA_API_KEY, logger, }) {
|
|
25
|
+
super(logger);
|
|
24
26
|
this.apiKey = apiKey;
|
|
25
27
|
}
|
|
26
28
|
async rerank(query, documents, topK = 5) {
|
|
27
|
-
|
|
29
|
+
this.logger.debug(`Reranking ${documents.length} documents with Jina`);
|
|
28
30
|
try {
|
|
29
31
|
if (this.apiKey == null || this.apiKey === '') {
|
|
30
|
-
|
|
32
|
+
this.logger.warn('JINA_API_KEY is not set. Using default ranking.');
|
|
31
33
|
return this.getDefaultRanking(documents, topK);
|
|
32
34
|
}
|
|
33
35
|
this.logDocumentSamples(documents);
|
|
@@ -45,13 +47,13 @@ class JinaReranker extends BaseReranker {
|
|
|
45
47
|
},
|
|
46
48
|
});
|
|
47
49
|
// Log the response data structure
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
50
|
+
this.logger.debug('Jina API response structure:');
|
|
51
|
+
this.logger.debug('Model:', response.data?.model);
|
|
52
|
+
this.logger.debug('Usage:', response.data?.usage);
|
|
53
|
+
this.logger.debug('Results count:', response.data?.results.length);
|
|
52
54
|
// Log a sample of the results
|
|
53
55
|
if ((response.data?.results.length ?? 0) > 0) {
|
|
54
|
-
|
|
56
|
+
this.logger.debug('Sample result:', JSON.stringify(response.data?.results[0], null, 2));
|
|
55
57
|
}
|
|
56
58
|
if (response.data && response.data.results.length) {
|
|
57
59
|
return response.data.results.map((result) => {
|
|
@@ -76,27 +78,27 @@ class JinaReranker extends BaseReranker {
|
|
|
76
78
|
});
|
|
77
79
|
}
|
|
78
80
|
else {
|
|
79
|
-
|
|
81
|
+
this.logger.warn('Unexpected response format from Jina API. Using default ranking.');
|
|
80
82
|
return this.getDefaultRanking(documents, topK);
|
|
81
83
|
}
|
|
82
84
|
}
|
|
83
85
|
catch (error) {
|
|
84
|
-
|
|
86
|
+
this.logger.error('Error using Jina reranker:', error);
|
|
85
87
|
// Fallback to default ranking on error
|
|
86
88
|
return this.getDefaultRanking(documents, topK);
|
|
87
89
|
}
|
|
88
90
|
}
|
|
89
91
|
}
|
|
90
92
|
class CohereReranker extends BaseReranker {
|
|
91
|
-
constructor({ apiKey = process.env.COHERE_API_KEY }) {
|
|
92
|
-
super();
|
|
93
|
+
constructor({ apiKey = process.env.COHERE_API_KEY, logger, }) {
|
|
94
|
+
super(logger);
|
|
93
95
|
this.apiKey = apiKey;
|
|
94
96
|
}
|
|
95
97
|
async rerank(query, documents, topK = 5) {
|
|
96
|
-
|
|
98
|
+
this.logger.debug(`Reranking ${documents.length} documents with Cohere`);
|
|
97
99
|
try {
|
|
98
100
|
if (this.apiKey == null || this.apiKey === '') {
|
|
99
|
-
|
|
101
|
+
this.logger.warn('COHERE_API_KEY is not set. Using default ranking.');
|
|
100
102
|
return this.getDefaultRanking(documents, topK);
|
|
101
103
|
}
|
|
102
104
|
this.logDocumentSamples(documents);
|
|
@@ -113,13 +115,13 @@ class CohereReranker extends BaseReranker {
|
|
|
113
115
|
},
|
|
114
116
|
});
|
|
115
117
|
// Log the response data structure
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
this.logger.debug('Cohere API response structure:');
|
|
119
|
+
this.logger.debug('ID:', response.data?.id);
|
|
120
|
+
this.logger.debug('Meta:', response.data?.meta);
|
|
121
|
+
this.logger.debug('Results count:', response.data?.results.length);
|
|
120
122
|
// Log a sample of the results
|
|
121
123
|
if ((response.data?.results.length ?? 0) > 0) {
|
|
122
|
-
|
|
124
|
+
this.logger.debug('Sample result:', JSON.stringify(response.data?.results[0], null, 2));
|
|
123
125
|
}
|
|
124
126
|
if (response.data && response.data.results.length) {
|
|
125
127
|
return response.data.results.map((result) => {
|
|
@@ -130,24 +132,24 @@ class CohereReranker extends BaseReranker {
|
|
|
130
132
|
});
|
|
131
133
|
}
|
|
132
134
|
else {
|
|
133
|
-
|
|
135
|
+
this.logger.warn('Unexpected response format from Cohere API. Using default ranking.');
|
|
134
136
|
return this.getDefaultRanking(documents, topK);
|
|
135
137
|
}
|
|
136
138
|
}
|
|
137
139
|
catch (error) {
|
|
138
|
-
|
|
140
|
+
this.logger.error('Error using Cohere reranker:', error);
|
|
139
141
|
// Fallback to default ranking on error
|
|
140
142
|
return this.getDefaultRanking(documents, topK);
|
|
141
143
|
}
|
|
142
144
|
}
|
|
143
145
|
}
|
|
144
146
|
class InfinityReranker extends BaseReranker {
|
|
145
|
-
constructor() {
|
|
146
|
-
super();
|
|
147
|
+
constructor(logger) {
|
|
148
|
+
super(logger);
|
|
147
149
|
// No API key needed for the placeholder implementation
|
|
148
150
|
}
|
|
149
151
|
async rerank(query, documents, topK = 5) {
|
|
150
|
-
|
|
152
|
+
this.logger.debug(`Reranking ${documents.length} documents with Infinity (placeholder)`);
|
|
151
153
|
// This would be replaced with actual Infinity reranker implementation
|
|
152
154
|
return this.getDefaultRanking(documents, topK);
|
|
153
155
|
}
|
|
@@ -156,20 +158,25 @@ class InfinityReranker extends BaseReranker {
|
|
|
156
158
|
* Creates the appropriate reranker based on type and configuration
|
|
157
159
|
*/
|
|
158
160
|
const createReranker = (config) => {
|
|
159
|
-
const { rerankerType, jinaApiKey, cohereApiKey } = config;
|
|
161
|
+
const { rerankerType, jinaApiKey, cohereApiKey, logger } = config;
|
|
162
|
+
// Create a default logger if none is provided
|
|
163
|
+
const defaultLogger = logger || createDefaultLogger();
|
|
160
164
|
switch (rerankerType.toLowerCase()) {
|
|
161
165
|
case 'jina':
|
|
162
|
-
return new JinaReranker({ apiKey: jinaApiKey });
|
|
166
|
+
return new JinaReranker({ apiKey: jinaApiKey, logger: defaultLogger });
|
|
163
167
|
case 'cohere':
|
|
164
|
-
return new CohereReranker({
|
|
168
|
+
return new CohereReranker({
|
|
169
|
+
apiKey: cohereApiKey,
|
|
170
|
+
logger: defaultLogger,
|
|
171
|
+
});
|
|
165
172
|
case 'infinity':
|
|
166
|
-
return new InfinityReranker();
|
|
173
|
+
return new InfinityReranker(defaultLogger);
|
|
167
174
|
case 'none':
|
|
168
|
-
|
|
175
|
+
defaultLogger.debug('Skipping reranking as reranker is set to "none"');
|
|
169
176
|
return undefined;
|
|
170
177
|
default:
|
|
171
|
-
|
|
172
|
-
return new JinaReranker({ apiKey: jinaApiKey });
|
|
178
|
+
defaultLogger.warn(`Unknown reranker type: ${rerankerType}. Defaulting to InfinityReranker.`);
|
|
179
|
+
return new JinaReranker({ apiKey: jinaApiKey, logger: defaultLogger });
|
|
173
180
|
}
|
|
174
181
|
};
|
|
175
182
|
// Example usage:
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rerankers.mjs","sources":["../../../../src/tools/search/rerankers.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\nimport type * as t from './types';\n\nexport abstract class BaseReranker {\n protected apiKey: string | undefined;\n\n constructor() {\n // Each specific reranker will set its API key\n }\n\n abstract rerank(\n query: string,\n documents: string[],\n topK?: number\n ): Promise<t.Highlight[]>;\n\n protected getDefaultRanking(\n documents: string[],\n topK: number\n ): t.Highlight[] {\n return documents\n .slice(0, Math.min(topK, documents.length))\n .map((doc) => ({ text: doc, score: 0 }));\n }\n\n protected logDocumentSamples(documents: string[]): void {\n console.log('Sample documents being sent to API:');\n for (let i = 0; i < Math.min(3, documents.length); i++) {\n console.log(`Document ${i}: ${documents[i].substring(0, 100)}...`);\n }\n }\n}\n\nexport class JinaReranker extends BaseReranker {\n constructor({ apiKey = process.env.JINA_API_KEY }: { apiKey?: string }) {\n super();\n this.apiKey = apiKey;\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n console.log(`Reranking ${documents.length} documents with Jina`);\n\n try {\n if (this.apiKey == null || this.apiKey === '') {\n console.warn('JINA_API_KEY is not set. Using default ranking.');\n return this.getDefaultRanking(documents, topK);\n }\n\n this.logDocumentSamples(documents);\n\n const requestData = {\n model: 'jina-reranker-v2-base-multilingual',\n query: query,\n top_n: topK,\n documents: documents,\n return_documents: true,\n };\n\n const response = await axios.post<t.JinaRerankerResponse | undefined>(\n 'https://api.jina.ai/v1/rerank',\n requestData,\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n }\n );\n\n // Log the response data structure\n console.log('Jina API response structure:');\n console.log('Model:', response.data?.model);\n console.log('Usage:', response.data?.usage);\n console.log('Results count:', response.data?.results.length);\n\n // Log a sample of the results\n if ((response.data?.results.length ?? 0) > 0) {\n console.log(\n 'Sample result:',\n JSON.stringify(response.data?.results[0], null, 2)\n );\n }\n\n if (response.data && response.data.results.length) {\n return response.data.results.map((result) => {\n const docIndex = result.index;\n const score = result.relevance_score;\n let text = '';\n\n // If return_documents is true, the document field will be present\n if (result.document != null) {\n const doc = result.document;\n if (typeof doc === 'object' && 'text' in doc) {\n text = doc.text;\n } else if (typeof doc === 'string') {\n text = doc;\n }\n } else {\n // Otherwise, use the index to get the document\n text = documents[docIndex];\n }\n\n return { text, score };\n });\n } else {\n console.warn(\n 'Unexpected response format from Jina API. Using default ranking.'\n );\n return this.getDefaultRanking(documents, topK);\n }\n } catch (error) {\n console.error('Error using Jina reranker:', error);\n // Fallback to default ranking on error\n return this.getDefaultRanking(documents, topK);\n }\n }\n}\n\nexport class CohereReranker extends BaseReranker {\n constructor({ apiKey = process.env.COHERE_API_KEY }: { apiKey?: string }) {\n super();\n this.apiKey = apiKey;\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n console.log(`Reranking ${documents.length} documents with Cohere`);\n\n try {\n if (this.apiKey == null || this.apiKey === '') {\n console.warn('COHERE_API_KEY is not set. Using default ranking.');\n return this.getDefaultRanking(documents, topK);\n }\n\n this.logDocumentSamples(documents);\n\n const requestData = {\n model: 'rerank-v3.5',\n query: query,\n top_n: topK,\n documents: documents,\n };\n\n const response = await axios.post<t.CohereRerankerResponse | undefined>(\n 'https://api.cohere.com/v2/rerank',\n requestData,\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n }\n );\n\n // Log the response data structure\n console.log('Cohere API response structure:');\n console.log('ID:', response.data?.id);\n console.log('Meta:', response.data?.meta);\n console.log('Results count:', response.data?.results.length);\n\n // Log a sample of the results\n if ((response.data?.results.length ?? 0) > 0) {\n console.log(\n 'Sample result:',\n JSON.stringify(response.data?.results[0], null, 2)\n );\n }\n\n if (response.data && response.data.results.length) {\n return response.data.results.map((result) => {\n const docIndex = result.index;\n const score = result.relevance_score;\n const text = documents[docIndex];\n return { text, score };\n });\n } else {\n console.warn(\n 'Unexpected response format from Cohere API. Using default ranking.'\n );\n return this.getDefaultRanking(documents, topK);\n }\n } catch (error) {\n console.error('Error using Cohere reranker:', error);\n // Fallback to default ranking on error\n return this.getDefaultRanking(documents, topK);\n }\n }\n}\n\nexport class InfinityReranker extends BaseReranker {\n constructor() {\n super();\n // No API key needed for the placeholder implementation\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n console.log(\n `Reranking ${documents.length} documents with Infinity (placeholder)`\n );\n // This would be replaced with actual Infinity reranker implementation\n return this.getDefaultRanking(documents, topK);\n }\n}\n\n/**\n * Creates the appropriate reranker based on type and configuration\n */\nexport const createReranker = (config: {\n rerankerType: t.RerankerType;\n jinaApiKey?: string;\n cohereApiKey?: string;\n}): BaseReranker | undefined => {\n const { rerankerType, jinaApiKey, cohereApiKey } = config;\n\n switch (rerankerType.toLowerCase()) {\n case 'jina':\n return new JinaReranker({ apiKey: jinaApiKey });\n case 'cohere':\n return new CohereReranker({ apiKey: cohereApiKey });\n case 'infinity':\n return new InfinityReranker();\n case 'none':\n console.log('Skipping reranking as reranker is set to \"none\"');\n return undefined;\n default:\n console.warn(\n `Unknown reranker type: ${rerankerType}. Defaulting to InfinityReranker.`\n );\n return new JinaReranker({ apiKey: jinaApiKey });\n }\n};\n\n// Example usage:\n// const jinaReranker = new JinaReranker();\n// const cohereReranker = new CohereReranker();\n// const infinityReranker = new InfinityReranker();\n"],"names":[],"mappings":";;AAAA;MAIsB,YAAY,CAAA;AACtB,IAAA,MAAM;AAEhB,IAAA,WAAA,GAAA;;;IAUU,iBAAiB,CACzB,SAAmB,EACnB,IAAY,EAAA;AAEZ,QAAA,OAAO;AACJ,aAAA,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,MAAM,CAAC;AACzC,aAAA,GAAG,CAAC,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;;AAGlC,IAAA,kBAAkB,CAAC,SAAmB,EAAA;AAC9C,QAAA,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC;QAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE;AACtD,YAAA,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA,EAAA,EAAK,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA,GAAA,CAAK,CAAC;;;AAGvE;AAEK,MAAO,YAAa,SAAQ,YAAY,CAAA;IAC5C,WAAY,CAAA,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,EAAuB,EAAA;AACpE,QAAA,KAAK,EAAE;AACP,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM;;IAGtB,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,OAAO,CAAC,GAAG,CAAC,CAAA,UAAA,EAAa,SAAS,CAAC,MAAM,CAAsB,oBAAA,CAAA,CAAC;AAEhE,QAAA,IAAI;AACF,YAAA,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,EAAE,EAAE;AAC7C,gBAAA,OAAO,CAAC,IAAI,CAAC,iDAAiD,CAAC;gBAC/D,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAGhD,YAAA,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC;AAElC,YAAA,MAAM,WAAW,GAAG;AAClB,gBAAA,KAAK,EAAE,oCAAoC;AAC3C,gBAAA,KAAK,EAAE,KAAK;AACZ,gBAAA,KAAK,EAAE,IAAI;AACX,gBAAA,SAAS,EAAE,SAAS;AACpB,gBAAA,gBAAgB,EAAE,IAAI;aACvB;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,+BAA+B,EAC/B,WAAW,EACX;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;AACF,aAAA,CACF;;AAGD,YAAA,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;AAC3C,YAAA,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC;;AAG5D,YAAA,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;gBAC5C,OAAO,CAAC,GAAG,CACT,gBAAgB,EAChB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CACnD;;AAGH,YAAA,IAAI,QAAQ,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBACjD,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,KAAI;AAC1C,oBAAA,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;AAC7B,oBAAA,MAAM,KAAK,GAAG,MAAM,CAAC,eAAe;oBACpC,IAAI,IAAI,GAAG,EAAE;;AAGb,oBAAA,IAAI,MAAM,CAAC,QAAQ,IAAI,IAAI,EAAE;AAC3B,wBAAA,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ;wBAC3B,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,MAAM,IAAI,GAAG,EAAE;AAC5C,4BAAA,IAAI,GAAG,GAAG,CAAC,IAAI;;AACV,6BAAA,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE;4BAClC,IAAI,GAAG,GAAG;;;yBAEP;;AAEL,wBAAA,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC;;AAG5B,oBAAA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE;AACxB,iBAAC,CAAC;;iBACG;AACL,gBAAA,OAAO,CAAC,IAAI,CACV,kEAAkE,CACnE;gBACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;QAEhD,OAAO,KAAK,EAAE;AACd,YAAA,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC;;YAElD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;AAGnD;AAEK,MAAO,cAAe,SAAQ,YAAY,CAAA;IAC9C,WAAY,CAAA,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,EAAuB,EAAA;AACtE,QAAA,KAAK,EAAE;AACP,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM;;IAGtB,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,OAAO,CAAC,GAAG,CAAC,CAAA,UAAA,EAAa,SAAS,CAAC,MAAM,CAAwB,sBAAA,CAAA,CAAC;AAElE,QAAA,IAAI;AACF,YAAA,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,EAAE,EAAE;AAC7C,gBAAA,OAAO,CAAC,IAAI,CAAC,mDAAmD,CAAC;gBACjE,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAGhD,YAAA,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC;AAElC,YAAA,MAAM,WAAW,GAAG;AAClB,gBAAA,KAAK,EAAE,aAAa;AACpB,gBAAA,KAAK,EAAE,KAAK;AACZ,gBAAA,KAAK,EAAE,IAAI;AACX,gBAAA,SAAS,EAAE,SAAS;aACrB;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,kCAAkC,EAClC,WAAW,EACX;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;AACF,aAAA,CACF;;AAGD,YAAA,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;AACzC,YAAA,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC;;AAG5D,YAAA,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;gBAC5C,OAAO,CAAC,GAAG,CACT,gBAAgB,EAChB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CACnD;;AAGH,YAAA,IAAI,QAAQ,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBACjD,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,KAAI;AAC1C,oBAAA,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;AAC7B,oBAAA,MAAM,KAAK,GAAG,MAAM,CAAC,eAAe;AACpC,oBAAA,MAAM,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC;AAChC,oBAAA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE;AACxB,iBAAC,CAAC;;iBACG;AACL,gBAAA,OAAO,CAAC,IAAI,CACV,oEAAoE,CACrE;gBACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;QAEhD,OAAO,KAAK,EAAE;AACd,YAAA,OAAO,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC;;YAEpD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;AAGnD;AAEK,MAAO,gBAAiB,SAAQ,YAAY,CAAA;AAChD,IAAA,WAAA,GAAA;AACE,QAAA,KAAK,EAAE;;;IAIT,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,OAAO,CAAC,GAAG,CACT,CAAA,UAAA,EAAa,SAAS,CAAC,MAAM,CAAwC,sCAAA,CAAA,CACtE;;QAED,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAEjD;AAED;;AAEG;AACU,MAAA,cAAc,GAAG,CAAC,MAI9B,KAA8B;IAC7B,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,YAAY,EAAE,GAAG,MAAM;AAEzD,IAAA,QAAQ,YAAY,CAAC,WAAW,EAAE;AAClC,QAAA,KAAK,MAAM;YACT,OAAO,IAAI,YAAY,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;AACjD,QAAA,KAAK,QAAQ;YACX,OAAO,IAAI,cAAc,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC;AACrD,QAAA,KAAK,UAAU;YACb,OAAO,IAAI,gBAAgB,EAAE;AAC/B,QAAA,KAAK,MAAM;AACT,YAAA,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC;AAC9D,YAAA,OAAO,SAAS;AAClB,QAAA;AACE,YAAA,OAAO,CAAC,IAAI,CACV,0BAA0B,YAAY,CAAA,iCAAA,CAAmC,CAC1E;YACD,OAAO,IAAI,YAAY,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;;AAEnD;AAEA;AACA;AACA;AACA;;;;"}
|
|
1
|
+
{"version":3,"file":"rerankers.mjs","sources":["../../../../src/tools/search/rerankers.ts"],"sourcesContent":["import axios from 'axios';\nimport type * as t from './types';\nimport { createDefaultLogger } from './utils';\n\nexport abstract class BaseReranker {\n protected apiKey: string | undefined;\n protected logger: t.Logger;\n\n constructor(logger?: t.Logger) {\n // Each specific reranker will set its API key\n this.logger = logger || createDefaultLogger();\n }\n\n abstract rerank(\n query: string,\n documents: string[],\n topK?: number\n ): Promise<t.Highlight[]>;\n\n protected getDefaultRanking(\n documents: string[],\n topK: number\n ): t.Highlight[] {\n return documents\n .slice(0, Math.min(topK, documents.length))\n .map((doc) => ({ text: doc, score: 0 }));\n }\n\n protected logDocumentSamples(documents: string[]): void {\n this.logger.debug('Sample documents being sent to API:');\n for (let i = 0; i < Math.min(3, documents.length); i++) {\n this.logger.debug(`Document ${i}: ${documents[i].substring(0, 100)}...`);\n }\n }\n}\n\nexport class JinaReranker extends BaseReranker {\n constructor({\n apiKey = process.env.JINA_API_KEY,\n logger,\n }: {\n apiKey?: string;\n logger?: t.Logger;\n }) {\n super(logger);\n this.apiKey = apiKey;\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n this.logger.debug(`Reranking ${documents.length} documents with Jina`);\n\n try {\n if (this.apiKey == null || this.apiKey === '') {\n this.logger.warn('JINA_API_KEY is not set. Using default ranking.');\n return this.getDefaultRanking(documents, topK);\n }\n\n this.logDocumentSamples(documents);\n\n const requestData = {\n model: 'jina-reranker-v2-base-multilingual',\n query: query,\n top_n: topK,\n documents: documents,\n return_documents: true,\n };\n\n const response = await axios.post<t.JinaRerankerResponse | undefined>(\n 'https://api.jina.ai/v1/rerank',\n requestData,\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n }\n );\n\n // Log the response data structure\n this.logger.debug('Jina API response structure:');\n this.logger.debug('Model:', response.data?.model);\n this.logger.debug('Usage:', response.data?.usage);\n this.logger.debug('Results count:', response.data?.results.length);\n\n // Log a sample of the results\n if ((response.data?.results.length ?? 0) > 0) {\n this.logger.debug(\n 'Sample result:',\n JSON.stringify(response.data?.results[0], null, 2)\n );\n }\n\n if (response.data && response.data.results.length) {\n return response.data.results.map((result) => {\n const docIndex = result.index;\n const score = result.relevance_score;\n let text = '';\n\n // If return_documents is true, the document field will be present\n if (result.document != null) {\n const doc = result.document;\n if (typeof doc === 'object' && 'text' in doc) {\n text = doc.text;\n } else if (typeof doc === 'string') {\n text = doc;\n }\n } else {\n // Otherwise, use the index to get the document\n text = documents[docIndex];\n }\n\n return { text, score };\n });\n } else {\n this.logger.warn(\n 'Unexpected response format from Jina API. Using default ranking.'\n );\n return this.getDefaultRanking(documents, topK);\n }\n } catch (error) {\n this.logger.error('Error using Jina reranker:', error);\n // Fallback to default ranking on error\n return this.getDefaultRanking(documents, topK);\n }\n }\n}\n\nexport class CohereReranker extends BaseReranker {\n constructor({\n apiKey = process.env.COHERE_API_KEY,\n logger,\n }: {\n apiKey?: string;\n logger?: t.Logger;\n }) {\n super(logger);\n this.apiKey = apiKey;\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n this.logger.debug(`Reranking ${documents.length} documents with Cohere`);\n\n try {\n if (this.apiKey == null || this.apiKey === '') {\n this.logger.warn('COHERE_API_KEY is not set. Using default ranking.');\n return this.getDefaultRanking(documents, topK);\n }\n\n this.logDocumentSamples(documents);\n\n const requestData = {\n model: 'rerank-v3.5',\n query: query,\n top_n: topK,\n documents: documents,\n };\n\n const response = await axios.post<t.CohereRerankerResponse | undefined>(\n 'https://api.cohere.com/v2/rerank',\n requestData,\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n }\n );\n\n // Log the response data structure\n this.logger.debug('Cohere API response structure:');\n this.logger.debug('ID:', response.data?.id);\n this.logger.debug('Meta:', response.data?.meta);\n this.logger.debug('Results count:', response.data?.results.length);\n\n // Log a sample of the results\n if ((response.data?.results.length ?? 0) > 0) {\n this.logger.debug(\n 'Sample result:',\n JSON.stringify(response.data?.results[0], null, 2)\n );\n }\n\n if (response.data && response.data.results.length) {\n return response.data.results.map((result) => {\n const docIndex = result.index;\n const score = result.relevance_score;\n const text = documents[docIndex];\n return { text, score };\n });\n } else {\n this.logger.warn(\n 'Unexpected response format from Cohere API. Using default ranking.'\n );\n return this.getDefaultRanking(documents, topK);\n }\n } catch (error) {\n this.logger.error('Error using Cohere reranker:', error);\n // Fallback to default ranking on error\n return this.getDefaultRanking(documents, topK);\n }\n }\n}\n\nexport class InfinityReranker extends BaseReranker {\n constructor(logger?: t.Logger) {\n super(logger);\n // No API key needed for the placeholder implementation\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n this.logger.debug(\n `Reranking ${documents.length} documents with Infinity (placeholder)`\n );\n // This would be replaced with actual Infinity reranker implementation\n return this.getDefaultRanking(documents, topK);\n }\n}\n\n/**\n * Creates the appropriate reranker based on type and configuration\n */\nexport const createReranker = (config: {\n rerankerType: t.RerankerType;\n jinaApiKey?: string;\n cohereApiKey?: string;\n logger?: t.Logger;\n}): BaseReranker | undefined => {\n const { rerankerType, jinaApiKey, cohereApiKey, logger } = config;\n\n // Create a default logger if none is provided\n const defaultLogger = logger || createDefaultLogger();\n\n switch (rerankerType.toLowerCase()) {\n case 'jina':\n return new JinaReranker({ apiKey: jinaApiKey, logger: defaultLogger });\n case 'cohere':\n return new CohereReranker({\n apiKey: cohereApiKey,\n logger: defaultLogger,\n });\n case 'infinity':\n return new InfinityReranker(defaultLogger);\n case 'none':\n defaultLogger.debug('Skipping reranking as reranker is set to \"none\"');\n return undefined;\n default:\n defaultLogger.warn(\n `Unknown reranker type: ${rerankerType}. Defaulting to InfinityReranker.`\n );\n return new JinaReranker({ apiKey: jinaApiKey, logger: defaultLogger });\n }\n};\n\n// Example usage:\n// const jinaReranker = new JinaReranker();\n// const cohereReranker = new CohereReranker();\n// const infinityReranker = new InfinityReranker();\n"],"names":[],"mappings":";;;MAIsB,YAAY,CAAA;AACtB,IAAA,MAAM;AACN,IAAA,MAAM;AAEhB,IAAA,WAAA,CAAY,MAAiB,EAAA;;AAE3B,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,IAAI,mBAAmB,EAAE;;IASrC,iBAAiB,CACzB,SAAmB,EACnB,IAAY,EAAA;AAEZ,QAAA,OAAO;AACJ,aAAA,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,MAAM,CAAC;AACzC,aAAA,GAAG,CAAC,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;;AAGlC,IAAA,kBAAkB,CAAC,SAAmB,EAAA;AAC9C,QAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,qCAAqC,CAAC;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE;YACtD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAY,SAAA,EAAA,CAAC,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,CAAK,GAAA,CAAA,CAAC;;;AAG7E;AAEK,MAAO,YAAa,SAAQ,YAAY,CAAA;IAC5C,WAAY,CAAA,EACV,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,EACjC,MAAM,GAIP,EAAA;QACC,KAAK,CAAC,MAAM,CAAC;AACb,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM;;IAGtB,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAa,UAAA,EAAA,SAAS,CAAC,MAAM,CAAsB,oBAAA,CAAA,CAAC;AAEtE,QAAA,IAAI;AACF,YAAA,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,EAAE,EAAE;AAC7C,gBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,iDAAiD,CAAC;gBACnE,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAGhD,YAAA,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC;AAElC,YAAA,MAAM,WAAW,GAAG;AAClB,gBAAA,KAAK,EAAE,oCAAoC;AAC3C,gBAAA,KAAK,EAAE,KAAK;AACZ,gBAAA,KAAK,EAAE,IAAI;AACX,gBAAA,SAAS,EAAE,SAAS;AACpB,gBAAA,gBAAgB,EAAE,IAAI;aACvB;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,+BAA+B,EAC/B,WAAW,EACX;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;AACF,aAAA,CACF;;AAGD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,8BAA8B,CAAC;AACjD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;AACjD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;AACjD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC;;AAGlE,YAAA,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;gBAC5C,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,gBAAgB,EAChB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CACnD;;AAGH,YAAA,IAAI,QAAQ,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBACjD,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,KAAI;AAC1C,oBAAA,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;AAC7B,oBAAA,MAAM,KAAK,GAAG,MAAM,CAAC,eAAe;oBACpC,IAAI,IAAI,GAAG,EAAE;;AAGb,oBAAA,IAAI,MAAM,CAAC,QAAQ,IAAI,IAAI,EAAE;AAC3B,wBAAA,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ;wBAC3B,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,MAAM,IAAI,GAAG,EAAE;AAC5C,4BAAA,IAAI,GAAG,GAAG,CAAC,IAAI;;AACV,6BAAA,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE;4BAClC,IAAI,GAAG,GAAG;;;yBAEP;;AAEL,wBAAA,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC;;AAG5B,oBAAA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE;AACxB,iBAAC,CAAC;;iBACG;AACL,gBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,kEAAkE,CACnE;gBACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;QAEhD,OAAO,KAAK,EAAE;YACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC;;YAEtD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;AAGnD;AAEK,MAAO,cAAe,SAAQ,YAAY,CAAA;IAC9C,WAAY,CAAA,EACV,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,EACnC,MAAM,GAIP,EAAA;QACC,KAAK,CAAC,MAAM,CAAC;AACb,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM;;IAGtB,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAa,UAAA,EAAA,SAAS,CAAC,MAAM,CAAwB,sBAAA,CAAA,CAAC;AAExE,QAAA,IAAI;AACF,YAAA,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,EAAE,EAAE;AAC7C,gBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,mDAAmD,CAAC;gBACrE,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAGhD,YAAA,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC;AAElC,YAAA,MAAM,WAAW,GAAG;AAClB,gBAAA,KAAK,EAAE,aAAa;AACpB,gBAAA,KAAK,EAAE,KAAK;AACZ,gBAAA,KAAK,EAAE,IAAI;AACX,gBAAA,SAAS,EAAE,SAAS;aACrB;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,kCAAkC,EAClC,WAAW,EACX;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;AACF,aAAA,CACF;;AAGD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,gCAAgC,CAAC;AACnD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;AAC3C,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;AAC/C,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC;;AAGlE,YAAA,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;gBAC5C,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,gBAAgB,EAChB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CACnD;;AAGH,YAAA,IAAI,QAAQ,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBACjD,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,KAAI;AAC1C,oBAAA,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;AAC7B,oBAAA,MAAM,KAAK,GAAG,MAAM,CAAC,eAAe;AACpC,oBAAA,MAAM,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC;AAChC,oBAAA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE;AACxB,iBAAC,CAAC;;iBACG;AACL,gBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,oEAAoE,CACrE;gBACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;QAEhD,OAAO,KAAK,EAAE;YACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC;;YAExD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;AAGnD;AAEK,MAAO,gBAAiB,SAAQ,YAAY,CAAA;AAChD,IAAA,WAAA,CAAY,MAAiB,EAAA;QAC3B,KAAK,CAAC,MAAM,CAAC;;;IAIf,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,CAAa,UAAA,EAAA,SAAS,CAAC,MAAM,CAAwC,sCAAA,CAAA,CACtE;;QAED,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAEjD;AAED;;AAEG;AACU,MAAA,cAAc,GAAG,CAAC,MAK9B,KAA8B;IAC7B,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,GAAG,MAAM;;AAGjE,IAAA,MAAM,aAAa,GAAG,MAAM,IAAI,mBAAmB,EAAE;AAErD,IAAA,QAAQ,YAAY,CAAC,WAAW,EAAE;AAClC,QAAA,KAAK,MAAM;AACT,YAAA,OAAO,IAAI,YAAY,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;AACxE,QAAA,KAAK,QAAQ;YACX,OAAO,IAAI,cAAc,CAAC;AACxB,gBAAA,MAAM,EAAE,YAAY;AACpB,gBAAA,MAAM,EAAE,aAAa;AACtB,aAAA,CAAC;AACJ,QAAA,KAAK,UAAU;AACb,YAAA,OAAO,IAAI,gBAAgB,CAAC,aAAa,CAAC;AAC5C,QAAA,KAAK,MAAM;AACT,YAAA,aAAa,CAAC,KAAK,CAAC,iDAAiD,CAAC;AACtE,YAAA,OAAO,SAAS;AAClB,QAAA;AACE,YAAA,aAAa,CAAC,IAAI,CAChB,0BAA0B,YAAY,CAAA,iCAAA,CAAmC,CAC1E;AACD,YAAA,OAAO,IAAI,YAAY,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;;AAE1E;AAEA;AACA;AACA;AACA;;;;"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
var DATE_RANGE;
|
|
4
|
+
(function (DATE_RANGE) {
|
|
5
|
+
DATE_RANGE["PAST_HOUR"] = "h";
|
|
6
|
+
DATE_RANGE["PAST_24_HOURS"] = "d";
|
|
7
|
+
DATE_RANGE["PAST_WEEK"] = "w";
|
|
8
|
+
DATE_RANGE["PAST_MONTH"] = "m";
|
|
9
|
+
DATE_RANGE["PAST_YEAR"] = "y";
|
|
10
|
+
})(DATE_RANGE || (DATE_RANGE = {}));
|
|
11
|
+
const DEFAULT_QUERY_DESCRIPTION = `
|
|
12
|
+
GUIDELINES:
|
|
13
|
+
- Start broad, then narrow: Begin with key concepts, then refine with specifics
|
|
14
|
+
- Think like sources: Use terminology experts would use in the field
|
|
15
|
+
- Consider perspective: Frame queries from different viewpoints for better results
|
|
16
|
+
- Quality over quantity: A precise 3-4 word query often beats lengthy sentences
|
|
17
|
+
|
|
18
|
+
TECHNIQUES (combine for power searches):
|
|
19
|
+
- EXACT PHRASES: Use quotes ("climate change report")
|
|
20
|
+
- EXCLUDE TERMS: Use minus to remove unwanted results (-wikipedia)
|
|
21
|
+
- SITE-SPECIFIC: Restrict to websites (site:edu research)
|
|
22
|
+
- FILETYPE: Find specific documents (filetype:pdf study)
|
|
23
|
+
- OR OPERATOR: Find alternatives (electric OR hybrid cars)
|
|
24
|
+
- DATE RANGE: Recent information (data after:2020)
|
|
25
|
+
- WILDCARDS: Use * for unknown terms (how to * bread)
|
|
26
|
+
- SPECIFIC QUESTIONS: Use who/what/when/where/why/how
|
|
27
|
+
- DOMAIN TERMS: Include technical terminology for specialized topics
|
|
28
|
+
- CONCISE TERMS: Prioritize keywords over sentences
|
|
29
|
+
`.trim();
|
|
30
|
+
const DEFAULT_COUNTRY_DESCRIPTION = `Country code to localize search results.
|
|
31
|
+
Use standard 2-letter country codes: "us", "uk", "ca", "de", "fr", "jp", "br", etc.
|
|
32
|
+
Provide this when the search should return results specific to a particular country.
|
|
33
|
+
Examples:
|
|
34
|
+
- "us" for United States (default)
|
|
35
|
+
- "de" for Germany
|
|
36
|
+
- "in" for India
|
|
37
|
+
`.trim();
|
|
38
|
+
const querySchema = z.string().describe(DEFAULT_QUERY_DESCRIPTION);
|
|
39
|
+
const dateSchema = z
|
|
40
|
+
.nativeEnum(DATE_RANGE)
|
|
41
|
+
.optional()
|
|
42
|
+
.describe('Date range for search results.');
|
|
43
|
+
const countrySchema = z
|
|
44
|
+
.string()
|
|
45
|
+
.optional()
|
|
46
|
+
.describe(DEFAULT_COUNTRY_DESCRIPTION);
|
|
47
|
+
const imagesSchema = z
|
|
48
|
+
.boolean()
|
|
49
|
+
.optional()
|
|
50
|
+
.describe('Whether to also run an image search.');
|
|
51
|
+
const videosSchema = z
|
|
52
|
+
.boolean()
|
|
53
|
+
.optional()
|
|
54
|
+
.describe('Whether to also run a video search.');
|
|
55
|
+
const newsSchema = z
|
|
56
|
+
.boolean()
|
|
57
|
+
.optional()
|
|
58
|
+
.describe('Whether to also run a news search.');
|
|
59
|
+
|
|
60
|
+
export { DATE_RANGE, DEFAULT_COUNTRY_DESCRIPTION, DEFAULT_QUERY_DESCRIPTION, countrySchema, dateSchema, imagesSchema, newsSchema, querySchema, videosSchema };
|
|
61
|
+
//# sourceMappingURL=schema.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.mjs","sources":["../../../../src/tools/search/schema.ts"],"sourcesContent":["import { z } from 'zod';\n\nexport enum DATE_RANGE {\n PAST_HOUR = 'h',\n PAST_24_HOURS = 'd',\n PAST_WEEK = 'w',\n PAST_MONTH = 'm',\n PAST_YEAR = 'y',\n}\n\nexport const DEFAULT_QUERY_DESCRIPTION = `\nGUIDELINES:\n- Start broad, then narrow: Begin with key concepts, then refine with specifics\n- Think like sources: Use terminology experts would use in the field\n- Consider perspective: Frame queries from different viewpoints for better results\n- Quality over quantity: A precise 3-4 word query often beats lengthy sentences\n\nTECHNIQUES (combine for power searches):\n- EXACT PHRASES: Use quotes (\"climate change report\")\n- EXCLUDE TERMS: Use minus to remove unwanted results (-wikipedia)\n- SITE-SPECIFIC: Restrict to websites (site:edu research)\n- FILETYPE: Find specific documents (filetype:pdf study)\n- OR OPERATOR: Find alternatives (electric OR hybrid cars)\n- DATE RANGE: Recent information (data after:2020)\n- WILDCARDS: Use * for unknown terms (how to * bread)\n- SPECIFIC QUESTIONS: Use who/what/when/where/why/how\n- DOMAIN TERMS: Include technical terminology for specialized topics\n- CONCISE TERMS: Prioritize keywords over sentences\n`.trim();\n\nexport const DEFAULT_COUNTRY_DESCRIPTION =\n `Country code to localize search results.\nUse standard 2-letter country codes: \"us\", \"uk\", \"ca\", \"de\", \"fr\", \"jp\", \"br\", etc.\nProvide this when the search should return results specific to a particular country.\nExamples:\n- \"us\" for United States (default)\n- \"de\" for Germany\n- \"in\" for India\n`.trim();\n\nexport const querySchema = z.string().describe(DEFAULT_QUERY_DESCRIPTION);\nexport const dateSchema = z\n .nativeEnum(DATE_RANGE)\n .optional()\n .describe('Date range for search results.');\nexport const countrySchema = z\n .string()\n .optional()\n .describe(DEFAULT_COUNTRY_DESCRIPTION);\nexport const imagesSchema = z\n .boolean()\n .optional()\n .describe('Whether to also run an image search.');\n\nexport const videosSchema = z\n .boolean()\n .optional()\n .describe('Whether to also run a video search.');\n\nexport const newsSchema = z\n .boolean()\n .optional()\n .describe('Whether to also run a news search.');\n"],"names":[],"mappings":";;IAEY;AAAZ,CAAA,UAAY,UAAU,EAAA;AACpB,IAAA,UAAA,CAAA,WAAA,CAAA,GAAA,GAAe;AACf,IAAA,UAAA,CAAA,eAAA,CAAA,GAAA,GAAmB;AACnB,IAAA,UAAA,CAAA,WAAA,CAAA,GAAA,GAAe;AACf,IAAA,UAAA,CAAA,YAAA,CAAA,GAAA,GAAgB;AAChB,IAAA,UAAA,CAAA,WAAA,CAAA,GAAA,GAAe;AACjB,CAAC,EANW,UAAU,KAAV,UAAU,GAMrB,EAAA,CAAA,CAAA;AAEY,MAAA,yBAAyB,GAAG;;;;;;;;;;;;;;;;;;CAkBxC,CAAC,IAAI;AAEO,MAAA,2BAA2B,GACtC,CAAA;;;;;;;CAOD,CAAC,IAAI;AAEC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,yBAAyB;AACjE,MAAM,UAAU,GAAG;KACvB,UAAU,CAAC,UAAU;AACrB,KAAA,QAAQ;KACR,QAAQ,CAAC,gCAAgC;AACrC,MAAM,aAAa,GAAG;AAC1B,KAAA,MAAM;AACN,KAAA,QAAQ;KACR,QAAQ,CAAC,2BAA2B;AAChC,MAAM,YAAY,GAAG;AACzB,KAAA,OAAO;AACP,KAAA,QAAQ;KACR,QAAQ,CAAC,sCAAsC;AAE3C,MAAM,YAAY,GAAG;AACzB,KAAA,OAAO;AACP,KAAA,QAAQ;KACR,QAAQ,CAAC,qCAAqC;AAE1C,MAAM,UAAU,GAAG;AACvB,KAAA,OAAO;AACP,KAAA,QAAQ;KACR,QAAQ,CAAC,oCAAoC;;;;"}
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
2
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
3
|
-
import { getAttribution } from './utils.mjs';
|
|
3
|
+
import { createDefaultLogger, getAttribution } from './utils.mjs';
|
|
4
4
|
|
|
5
|
-
/* eslint-disable no-console */
|
|
6
5
|
const chunker = {
|
|
7
6
|
cleanText: (text) => {
|
|
8
7
|
if (!text)
|
|
@@ -29,10 +28,11 @@ const chunker = {
|
|
|
29
28
|
});
|
|
30
29
|
return await splitter.splitText(text);
|
|
31
30
|
},
|
|
32
|
-
splitTexts: async (texts, options) => {
|
|
31
|
+
splitTexts: async (texts, options, logger) => {
|
|
33
32
|
// Split multiple texts
|
|
33
|
+
const logger_ = logger || createDefaultLogger();
|
|
34
34
|
const promises = texts.map((text) => chunker.splitText(text, options).catch((error) => {
|
|
35
|
-
|
|
35
|
+
logger_.error('Error splitting text:', error);
|
|
36
36
|
return [text];
|
|
37
37
|
}));
|
|
38
38
|
return Promise.all(promises);
|
|
@@ -49,13 +49,14 @@ function createSourceUpdateCallback(sourceMap) {
|
|
|
49
49
|
}
|
|
50
50
|
};
|
|
51
51
|
}
|
|
52
|
-
const getHighlights = async ({ query, content, reranker, topResults = 5, }) => {
|
|
52
|
+
const getHighlights = async ({ query, content, reranker, topResults = 5, logger, }) => {
|
|
53
|
+
const logger_ = logger || createDefaultLogger();
|
|
53
54
|
if (!content) {
|
|
54
|
-
|
|
55
|
+
logger_.warn('No content provided for highlights');
|
|
55
56
|
return;
|
|
56
57
|
}
|
|
57
58
|
if (!reranker) {
|
|
58
|
-
|
|
59
|
+
logger_.warn('No reranker provided for highlights');
|
|
59
60
|
return;
|
|
60
61
|
}
|
|
61
62
|
try {
|
|
@@ -64,12 +65,12 @@ const getHighlights = async ({ query, content, reranker, topResults = 5, }) => {
|
|
|
64
65
|
return await reranker.rerank(query, documents, topResults);
|
|
65
66
|
}
|
|
66
67
|
else {
|
|
67
|
-
|
|
68
|
+
logger_.error('Expected documents to be an array, got:', typeof documents);
|
|
68
69
|
return;
|
|
69
70
|
}
|
|
70
71
|
}
|
|
71
72
|
catch (error) {
|
|
72
|
-
|
|
73
|
+
logger_.error('Error in content processing:', error);
|
|
73
74
|
return;
|
|
74
75
|
}
|
|
75
76
|
};
|
|
@@ -82,19 +83,39 @@ const createSerperAPI = (apiKey) => {
|
|
|
82
83
|
if (config.apiKey == null || config.apiKey === '') {
|
|
83
84
|
throw new Error('SERPER_API_KEY is required for SerperAPI');
|
|
84
85
|
}
|
|
85
|
-
const getSources = async ({ query, country, numResults = 8, }) => {
|
|
86
|
+
const getSources = async ({ query, date, country, safeSearch, numResults = 8, type, }) => {
|
|
86
87
|
if (!query.trim()) {
|
|
87
88
|
return { success: false, error: 'Query cannot be empty' };
|
|
88
89
|
}
|
|
89
90
|
try {
|
|
91
|
+
const safe = ['off', 'moderate', 'active'];
|
|
90
92
|
const payload = {
|
|
91
93
|
q: query,
|
|
94
|
+
safe: safe[safeSearch ?? 1],
|
|
92
95
|
num: Math.min(Math.max(1, numResults), 10),
|
|
93
96
|
};
|
|
97
|
+
// Set the search type if provided
|
|
98
|
+
if (type) {
|
|
99
|
+
payload.type = type;
|
|
100
|
+
}
|
|
101
|
+
if (date != null) {
|
|
102
|
+
payload.tbs = `qdr:${date}`;
|
|
103
|
+
}
|
|
94
104
|
if (country != null && country !== '') {
|
|
95
105
|
payload['gl'] = country.toLowerCase();
|
|
96
106
|
}
|
|
97
|
-
|
|
107
|
+
// Determine the API endpoint based on the search type
|
|
108
|
+
let apiEndpoint = config.apiUrl;
|
|
109
|
+
if (type === 'images') {
|
|
110
|
+
apiEndpoint = 'https://google.serper.dev/images';
|
|
111
|
+
}
|
|
112
|
+
else if (type === 'videos') {
|
|
113
|
+
apiEndpoint = 'https://google.serper.dev/videos';
|
|
114
|
+
}
|
|
115
|
+
else if (type === 'news') {
|
|
116
|
+
apiEndpoint = 'https://google.serper.dev/news';
|
|
117
|
+
}
|
|
118
|
+
const response = await axios.post(apiEndpoint, payload, {
|
|
98
119
|
headers: {
|
|
99
120
|
'X-API-KEY': config.apiKey,
|
|
100
121
|
'Content-Type': 'application/json',
|
|
@@ -110,6 +131,8 @@ const createSerperAPI = (apiKey) => {
|
|
|
110
131
|
peopleAlsoAsk: data.peopleAlsoAsk,
|
|
111
132
|
knowledgeGraph: data.knowledgeGraph,
|
|
112
133
|
relatedSearches: data.relatedSearches,
|
|
134
|
+
videos: data.videos ?? [],
|
|
135
|
+
news: data.news ?? [],
|
|
113
136
|
};
|
|
114
137
|
return { success: true, data: results };
|
|
115
138
|
}
|
|
@@ -129,7 +152,7 @@ const createSearXNGAPI = (instanceUrl, apiKey) => {
|
|
|
129
152
|
if (config.instanceUrl == null || config.instanceUrl === '') {
|
|
130
153
|
throw new Error('SEARXNG_INSTANCE_URL is required for SearXNG API');
|
|
131
154
|
}
|
|
132
|
-
const getSources = async ({ query, numResults = 8, }) => {
|
|
155
|
+
const getSources = async ({ query, numResults = 8, type, }) => {
|
|
133
156
|
if (!query.trim()) {
|
|
134
157
|
return { success: false, error: 'Query cannot be empty' };
|
|
135
158
|
}
|
|
@@ -142,12 +165,23 @@ const createSearXNGAPI = (instanceUrl, apiKey) => {
|
|
|
142
165
|
if (!searchUrl.endsWith('/search')) {
|
|
143
166
|
searchUrl = searchUrl.replace(/\/$/, '') + '/search';
|
|
144
167
|
}
|
|
168
|
+
// Determine the search category based on the type
|
|
169
|
+
let category = 'general';
|
|
170
|
+
if (type === 'images') {
|
|
171
|
+
category = 'images';
|
|
172
|
+
}
|
|
173
|
+
else if (type === 'videos') {
|
|
174
|
+
category = 'videos';
|
|
175
|
+
}
|
|
176
|
+
else if (type === 'news') {
|
|
177
|
+
category = 'news';
|
|
178
|
+
}
|
|
145
179
|
// Prepare parameters for SearXNG
|
|
146
180
|
const params = {
|
|
147
181
|
q: query,
|
|
148
182
|
format: 'json',
|
|
149
183
|
pageno: 1,
|
|
150
|
-
categories:
|
|
184
|
+
categories: category,
|
|
151
185
|
language: 'all',
|
|
152
186
|
safesearch: 0,
|
|
153
187
|
engines: 'google,bing,duckduckgo',
|
|
@@ -188,6 +222,8 @@ const createSearXNGAPI = (instanceUrl, apiKey) => {
|
|
|
188
222
|
topStories: [],
|
|
189
223
|
// Use undefined instead of null for optional properties
|
|
190
224
|
relatedSearches: data.suggestions ?? [],
|
|
225
|
+
videos: [],
|
|
226
|
+
news: [],
|
|
191
227
|
};
|
|
192
228
|
return { success: true, data: results };
|
|
193
229
|
}
|
|
@@ -220,11 +256,12 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
220
256
|
const { topResults = 5,
|
|
221
257
|
// strategies = ['no_extraction'],
|
|
222
258
|
// filterContent = true,
|
|
223
|
-
reranker, } = config;
|
|
259
|
+
reranker, logger, } = config;
|
|
260
|
+
const logger_ = logger || createDefaultLogger();
|
|
224
261
|
const firecrawlScraper = scraperInstance;
|
|
225
262
|
const webScraper = {
|
|
226
263
|
scrapeMany: async ({ query, links, onGetHighlights, }) => {
|
|
227
|
-
|
|
264
|
+
logger_.debug(`Scraping ${links.length} links with Firecrawl`);
|
|
228
265
|
const promises = [];
|
|
229
266
|
try {
|
|
230
267
|
for (let i = 0; i < links.length; i++) {
|
|
@@ -232,7 +269,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
232
269
|
const promise = firecrawlScraper
|
|
233
270
|
.scrapeUrl(currentLink, {})
|
|
234
271
|
.then(([url, response]) => {
|
|
235
|
-
const attribution = getAttribution(url, response.data?.metadata);
|
|
272
|
+
const attribution = getAttribution(url, response.data?.metadata, logger_);
|
|
236
273
|
if (response.success && response.data) {
|
|
237
274
|
const [content, references] = firecrawlScraper.extractContent(response);
|
|
238
275
|
return {
|
|
@@ -252,7 +289,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
252
289
|
.then(async (result) => {
|
|
253
290
|
try {
|
|
254
291
|
if (result.error != null) {
|
|
255
|
-
|
|
292
|
+
logger_.error(`Error scraping ${result.url}: ${result.content}`, result.error);
|
|
256
293
|
return {
|
|
257
294
|
...result,
|
|
258
295
|
};
|
|
@@ -261,6 +298,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
261
298
|
query,
|
|
262
299
|
reranker,
|
|
263
300
|
content: result.content,
|
|
301
|
+
logger: logger_,
|
|
264
302
|
});
|
|
265
303
|
if (onGetHighlights) {
|
|
266
304
|
onGetHighlights(result.url);
|
|
@@ -271,14 +309,14 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
271
309
|
};
|
|
272
310
|
}
|
|
273
311
|
catch (error) {
|
|
274
|
-
|
|
312
|
+
logger_.error('Error processing scraped content:', error);
|
|
275
313
|
return {
|
|
276
314
|
...result,
|
|
277
315
|
};
|
|
278
316
|
}
|
|
279
317
|
})
|
|
280
318
|
.catch((error) => {
|
|
281
|
-
|
|
319
|
+
logger_.error(`Error scraping ${currentLink}:`, error);
|
|
282
320
|
return {
|
|
283
321
|
url: currentLink,
|
|
284
322
|
error: true,
|
|
@@ -290,7 +328,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
290
328
|
return await Promise.all(promises);
|
|
291
329
|
}
|
|
292
330
|
catch (error) {
|
|
293
|
-
|
|
331
|
+
logger_.error('Error in scrapeMany:', error);
|
|
294
332
|
return [];
|
|
295
333
|
}
|
|
296
334
|
},
|
|
@@ -316,7 +354,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
316
354
|
});
|
|
317
355
|
}
|
|
318
356
|
};
|
|
319
|
-
const processSources = async ({ result, numElements, query, proMode = true, onGetHighlights, }) => {
|
|
357
|
+
const processSources = async ({ result, numElements, query, news, proMode = true, onGetHighlights, }) => {
|
|
320
358
|
try {
|
|
321
359
|
if (!result.data) {
|
|
322
360
|
return {
|
|
@@ -363,7 +401,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
363
401
|
// Collect top story links, excluding any that are already in organic links
|
|
364
402
|
const topStories = result.data.topStories ?? [];
|
|
365
403
|
const topStoryLinks = collectLinks(topStories, sourceMap, organicLinksSet);
|
|
366
|
-
if (organicLinks.length === 0 && topStoryLinks.length === 0) {
|
|
404
|
+
if (organicLinks.length === 0 && (topStoryLinks.length === 0 || !news)) {
|
|
367
405
|
return result.data;
|
|
368
406
|
}
|
|
369
407
|
const onContentScraped = createSourceUpdateCallback(sourceMap);
|
|
@@ -379,7 +417,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
379
417
|
}));
|
|
380
418
|
}
|
|
381
419
|
// Process top story links
|
|
382
|
-
if (topStoryLinks.length > 0) {
|
|
420
|
+
if (news && topStoryLinks.length > 0) {
|
|
383
421
|
promises.push(fetchContents({
|
|
384
422
|
query,
|
|
385
423
|
onGetHighlights,
|
|
@@ -389,17 +427,16 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
389
427
|
}));
|
|
390
428
|
}
|
|
391
429
|
await Promise.all(promises);
|
|
392
|
-
// Update sources with scraped content
|
|
393
430
|
if (result.data.organic.length > 0) {
|
|
394
431
|
updateSourcesWithContent(result.data.organic, sourceMap);
|
|
395
432
|
}
|
|
396
|
-
if (topStories.length > 0) {
|
|
433
|
+
if (news && topStories.length > 0) {
|
|
397
434
|
updateSourcesWithContent(topStories, sourceMap);
|
|
398
435
|
}
|
|
399
436
|
return result.data;
|
|
400
437
|
}
|
|
401
438
|
catch (error) {
|
|
402
|
-
|
|
439
|
+
logger_.error('Error in processSources:', error);
|
|
403
440
|
return {
|
|
404
441
|
organic: [],
|
|
405
442
|
topStories: [],
|