@librechat/agents 2.4.320 → 2.4.322
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/firecrawl.cjs +6 -4
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +117 -80
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs +43 -36
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +70 -0
- package/dist/cjs/tools/search/schema.cjs.map +1 -0
- package/dist/cjs/tools/search/search.cjs +125 -52
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +162 -47
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +34 -5
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs +6 -4
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +118 -81
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs +43 -36
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +61 -0
- package/dist/esm/tools/search/schema.mjs.map +1 -0
- package/dist/esm/tools/search/search.mjs +126 -53
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +161 -46
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +33 -6
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/types/tools/search/firecrawl.d.ts +1 -0
- package/dist/types/tools/search/rerankers.d.ts +8 -4
- package/dist/types/tools/search/schema.d.ts +16 -0
- package/dist/types/tools/search/tool.d.ts +13 -0
- package/dist/types/tools/search/types.d.ts +36 -0
- package/dist/types/tools/search/utils.d.ts +9 -2
- package/package.json +3 -2
- package/src/scripts/search.ts +3 -0
- package/src/tools/search/firecrawl.ts +9 -4
- package/src/tools/search/format.ts +157 -87
- package/src/tools/search/rerankers.ts +57 -36
- package/src/tools/search/schema.ts +63 -0
- package/src/tools/search/search.ts +165 -52
- package/src/tools/search/tool.ts +217 -44
- package/src/tools/search/types.ts +37 -0
- package/src/tools/search/utils.ts +37 -5
- package/src/utils/llmConfig.ts +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rerankers.cjs","sources":["../../../../src/tools/search/rerankers.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\nimport type * as t from './types';\n\nexport abstract class BaseReranker {\n protected apiKey: string | undefined;\n\n constructor() {\n // Each specific reranker will set its API key\n }\n\n abstract rerank(\n query: string,\n documents: string[],\n topK?: number\n ): Promise<t.Highlight[]>;\n\n protected getDefaultRanking(\n documents: string[],\n topK: number\n ): t.Highlight[] {\n return documents\n .slice(0, Math.min(topK, documents.length))\n .map((doc) => ({ text: doc, score: 0 }));\n }\n\n protected logDocumentSamples(documents: string[]): void {\n console.log('Sample documents being sent to API:');\n for (let i = 0; i < Math.min(3, documents.length); i++) {\n console.log(`Document ${i}: ${documents[i].substring(0, 100)}...`);\n }\n }\n}\n\nexport class JinaReranker extends BaseReranker {\n constructor({ apiKey = process.env.JINA_API_KEY }: { apiKey?: string }) {\n super();\n this.apiKey = apiKey;\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n console.log(`Reranking ${documents.length} documents with Jina`);\n\n try {\n if (this.apiKey == null || this.apiKey === '') {\n console.warn('JINA_API_KEY is not set. Using default ranking.');\n return this.getDefaultRanking(documents, topK);\n }\n\n this.logDocumentSamples(documents);\n\n const requestData = {\n model: 'jina-reranker-v2-base-multilingual',\n query: query,\n top_n: topK,\n documents: documents,\n return_documents: true,\n };\n\n const response = await axios.post<t.JinaRerankerResponse | undefined>(\n 'https://api.jina.ai/v1/rerank',\n requestData,\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n }\n );\n\n // Log the response data structure\n console.log('Jina API response structure:');\n console.log('Model:', response.data?.model);\n console.log('Usage:', response.data?.usage);\n console.log('Results count:', response.data?.results.length);\n\n // Log a sample of the results\n if ((response.data?.results.length ?? 0) > 0) {\n console.log(\n 'Sample result:',\n JSON.stringify(response.data?.results[0], null, 2)\n );\n }\n\n if (response.data && response.data.results.length) {\n return response.data.results.map((result) => {\n const docIndex = result.index;\n const score = result.relevance_score;\n let text = '';\n\n // If return_documents is true, the document field will be present\n if (result.document != null) {\n const doc = result.document;\n if (typeof doc === 'object' && 'text' in doc) {\n text = doc.text;\n } else if (typeof doc === 'string') {\n text = doc;\n }\n } else {\n // Otherwise, use the index to get the document\n text = documents[docIndex];\n }\n\n return { text, score };\n });\n } else {\n console.warn(\n 'Unexpected response format from Jina API. Using default ranking.'\n );\n return this.getDefaultRanking(documents, topK);\n }\n } catch (error) {\n console.error('Error using Jina reranker:', error);\n // Fallback to default ranking on error\n return this.getDefaultRanking(documents, topK);\n }\n }\n}\n\nexport class CohereReranker extends BaseReranker {\n constructor({ apiKey = process.env.COHERE_API_KEY }: { apiKey?: string }) {\n super();\n this.apiKey = apiKey;\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n console.log(`Reranking ${documents.length} documents with Cohere`);\n\n try {\n if (this.apiKey == null || this.apiKey === '') {\n console.warn('COHERE_API_KEY is not set. Using default ranking.');\n return this.getDefaultRanking(documents, topK);\n }\n\n this.logDocumentSamples(documents);\n\n const requestData = {\n model: 'rerank-v3.5',\n query: query,\n top_n: topK,\n documents: documents,\n };\n\n const response = await axios.post<t.CohereRerankerResponse | undefined>(\n 'https://api.cohere.com/v2/rerank',\n requestData,\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n }\n );\n\n // Log the response data structure\n console.log('Cohere API response structure:');\n console.log('ID:', response.data?.id);\n console.log('Meta:', response.data?.meta);\n console.log('Results count:', response.data?.results.length);\n\n // Log a sample of the results\n if ((response.data?.results.length ?? 0) > 0) {\n console.log(\n 'Sample result:',\n JSON.stringify(response.data?.results[0], null, 2)\n );\n }\n\n if (response.data && response.data.results.length) {\n return response.data.results.map((result) => {\n const docIndex = result.index;\n const score = result.relevance_score;\n const text = documents[docIndex];\n return { text, score };\n });\n } else {\n console.warn(\n 'Unexpected response format from Cohere API. Using default ranking.'\n );\n return this.getDefaultRanking(documents, topK);\n }\n } catch (error) {\n console.error('Error using Cohere reranker:', error);\n // Fallback to default ranking on error\n return this.getDefaultRanking(documents, topK);\n }\n }\n}\n\nexport class InfinityReranker extends BaseReranker {\n constructor() {\n super();\n // No API key needed for the placeholder implementation\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n console.log(\n `Reranking ${documents.length} documents with Infinity (placeholder)`\n );\n // This would be replaced with actual Infinity reranker implementation\n return this.getDefaultRanking(documents, topK);\n }\n}\n\n/**\n * Creates the appropriate reranker based on type and configuration\n */\nexport const createReranker = (config: {\n rerankerType: t.RerankerType;\n jinaApiKey?: string;\n cohereApiKey?: string;\n}): BaseReranker | undefined => {\n const { rerankerType, jinaApiKey, cohereApiKey } = config;\n\n switch (rerankerType.toLowerCase()) {\n case 'jina':\n return new JinaReranker({ apiKey: jinaApiKey });\n case 'cohere':\n return new CohereReranker({ apiKey: cohereApiKey });\n case 'infinity':\n return new InfinityReranker();\n case 'none':\n console.log('Skipping reranking as reranker is set to \"none\"');\n return undefined;\n default:\n console.warn(\n `Unknown reranker type: ${rerankerType}. Defaulting to InfinityReranker.`\n );\n return new JinaReranker({ apiKey: jinaApiKey });\n }\n};\n\n// Example usage:\n// const jinaReranker = new JinaReranker();\n// const cohereReranker = new CohereReranker();\n// const infinityReranker = new InfinityReranker();\n"],"names":[],"mappings":";;;;AAAA;MAIsB,YAAY,CAAA;AACtB,IAAA,MAAM;AAEhB,IAAA,WAAA,GAAA;;;IAUU,iBAAiB,CACzB,SAAmB,EACnB,IAAY,EAAA;AAEZ,QAAA,OAAO;AACJ,aAAA,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,MAAM,CAAC;AACzC,aAAA,GAAG,CAAC,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;;AAGlC,IAAA,kBAAkB,CAAC,SAAmB,EAAA;AAC9C,QAAA,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC;QAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE;AACtD,YAAA,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA,EAAA,EAAK,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA,GAAA,CAAK,CAAC;;;AAGvE;AAEK,MAAO,YAAa,SAAQ,YAAY,CAAA;IAC5C,WAAY,CAAA,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,EAAuB,EAAA;AACpE,QAAA,KAAK,EAAE;AACP,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM;;IAGtB,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,OAAO,CAAC,GAAG,CAAC,CAAA,UAAA,EAAa,SAAS,CAAC,MAAM,CAAsB,oBAAA,CAAA,CAAC;AAEhE,QAAA,IAAI;AACF,YAAA,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,EAAE,EAAE;AAC7C,gBAAA,OAAO,CAAC,IAAI,CAAC,iDAAiD,CAAC;gBAC/D,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAGhD,YAAA,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC;AAElC,YAAA,MAAM,WAAW,GAAG;AAClB,gBAAA,KAAK,EAAE,oCAAoC;AAC3C,gBAAA,KAAK,EAAE,KAAK;AACZ,gBAAA,KAAK,EAAE,IAAI;AACX,gBAAA,SAAS,EAAE,SAAS;AACpB,gBAAA,gBAAgB,EAAE,IAAI;aACvB;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,+BAA+B,EAC/B,WAAW,EACX;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;AACF,aAAA,CACF;;AAGD,YAAA,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;AAC3C,YAAA,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC;;AAG5D,YAAA,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;gBAC5C,OAAO,CAAC,GAAG,CACT,gBAAgB,EAChB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CACnD;;AAGH,YAAA,IAAI,QAAQ,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBACjD,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,KAAI;AAC1C,oBAAA,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;AAC7B,oBAAA,MAAM,KAAK,GAAG,MAAM,CAAC,eAAe;oBACpC,IAAI,IAAI,GAAG,EAAE;;AAGb,oBAAA,IAAI,MAAM,CAAC,QAAQ,IAAI,IAAI,EAAE;AAC3B,wBAAA,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ;wBAC3B,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,MAAM,IAAI,GAAG,EAAE;AAC5C,4BAAA,IAAI,GAAG,GAAG,CAAC,IAAI;;AACV,6BAAA,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE;4BAClC,IAAI,GAAG,GAAG;;;yBAEP;;AAEL,wBAAA,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC;;AAG5B,oBAAA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE;AACxB,iBAAC,CAAC;;iBACG;AACL,gBAAA,OAAO,CAAC,IAAI,CACV,kEAAkE,CACnE;gBACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;QAEhD,OAAO,KAAK,EAAE;AACd,YAAA,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC;;YAElD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;AAGnD;AAEK,MAAO,cAAe,SAAQ,YAAY,CAAA;IAC9C,WAAY,CAAA,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,EAAuB,EAAA;AACtE,QAAA,KAAK,EAAE;AACP,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM;;IAGtB,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,OAAO,CAAC,GAAG,CAAC,CAAA,UAAA,EAAa,SAAS,CAAC,MAAM,CAAwB,sBAAA,CAAA,CAAC;AAElE,QAAA,IAAI;AACF,YAAA,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,EAAE,EAAE;AAC7C,gBAAA,OAAO,CAAC,IAAI,CAAC,mDAAmD,CAAC;gBACjE,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAGhD,YAAA,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC;AAElC,YAAA,MAAM,WAAW,GAAG;AAClB,gBAAA,KAAK,EAAE,aAAa;AACpB,gBAAA,KAAK,EAAE,KAAK;AACZ,gBAAA,KAAK,EAAE,IAAI;AACX,gBAAA,SAAS,EAAE,SAAS;aACrB;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,kCAAkC,EAClC,WAAW,EACX;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;AACF,aAAA,CACF;;AAGD,YAAA,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;AACzC,YAAA,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC;;AAG5D,YAAA,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;gBAC5C,OAAO,CAAC,GAAG,CACT,gBAAgB,EAChB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CACnD;;AAGH,YAAA,IAAI,QAAQ,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBACjD,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,KAAI;AAC1C,oBAAA,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;AAC7B,oBAAA,MAAM,KAAK,GAAG,MAAM,CAAC,eAAe;AACpC,oBAAA,MAAM,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC;AAChC,oBAAA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE;AACxB,iBAAC,CAAC;;iBACG;AACL,gBAAA,OAAO,CAAC,IAAI,CACV,oEAAoE,CACrE;gBACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;QAEhD,OAAO,KAAK,EAAE;AACd,YAAA,OAAO,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC;;YAEpD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;AAGnD;AAEK,MAAO,gBAAiB,SAAQ,YAAY,CAAA;AAChD,IAAA,WAAA,GAAA;AACE,QAAA,KAAK,EAAE;;;IAIT,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,OAAO,CAAC,GAAG,CACT,CAAA,UAAA,EAAa,SAAS,CAAC,MAAM,CAAwC,sCAAA,CAAA,CACtE;;QAED,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAEjD;AAED;;AAEG;AACU,MAAA,cAAc,GAAG,CAAC,MAI9B,KAA8B;IAC7B,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,YAAY,EAAE,GAAG,MAAM;AAEzD,IAAA,QAAQ,YAAY,CAAC,WAAW,EAAE;AAClC,QAAA,KAAK,MAAM;YACT,OAAO,IAAI,YAAY,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;AACjD,QAAA,KAAK,QAAQ;YACX,OAAO,IAAI,cAAc,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC;AACrD,QAAA,KAAK,UAAU;YACb,OAAO,IAAI,gBAAgB,EAAE;AAC/B,QAAA,KAAK,MAAM;AACT,YAAA,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC;AAC9D,YAAA,OAAO,SAAS;AAClB,QAAA;AACE,YAAA,OAAO,CAAC,IAAI,CACV,0BAA0B,YAAY,CAAA,iCAAA,CAAmC,CAC1E;YACD,OAAO,IAAI,YAAY,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;;AAEnD;AAEA;AACA;AACA;AACA;;;;;;;;"}
|
|
1
|
+
{"version":3,"file":"rerankers.cjs","sources":["../../../../src/tools/search/rerankers.ts"],"sourcesContent":["import axios from 'axios';\nimport type * as t from './types';\nimport { createDefaultLogger } from './utils';\n\nexport abstract class BaseReranker {\n protected apiKey: string | undefined;\n protected logger: t.Logger;\n\n constructor(logger?: t.Logger) {\n // Each specific reranker will set its API key\n this.logger = logger || createDefaultLogger();\n }\n\n abstract rerank(\n query: string,\n documents: string[],\n topK?: number\n ): Promise<t.Highlight[]>;\n\n protected getDefaultRanking(\n documents: string[],\n topK: number\n ): t.Highlight[] {\n return documents\n .slice(0, Math.min(topK, documents.length))\n .map((doc) => ({ text: doc, score: 0 }));\n }\n\n protected logDocumentSamples(documents: string[]): void {\n this.logger.debug('Sample documents being sent to API:');\n for (let i = 0; i < Math.min(3, documents.length); i++) {\n this.logger.debug(`Document ${i}: ${documents[i].substring(0, 100)}...`);\n }\n }\n}\n\nexport class JinaReranker extends BaseReranker {\n constructor({\n apiKey = process.env.JINA_API_KEY,\n logger,\n }: {\n apiKey?: string;\n logger?: t.Logger;\n }) {\n super(logger);\n this.apiKey = apiKey;\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n this.logger.debug(`Reranking ${documents.length} documents with Jina`);\n\n try {\n if (this.apiKey == null || this.apiKey === '') {\n this.logger.warn('JINA_API_KEY is not set. Using default ranking.');\n return this.getDefaultRanking(documents, topK);\n }\n\n this.logDocumentSamples(documents);\n\n const requestData = {\n model: 'jina-reranker-v2-base-multilingual',\n query: query,\n top_n: topK,\n documents: documents,\n return_documents: true,\n };\n\n const response = await axios.post<t.JinaRerankerResponse | undefined>(\n 'https://api.jina.ai/v1/rerank',\n requestData,\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n }\n );\n\n // Log the response data structure\n this.logger.debug('Jina API response structure:');\n this.logger.debug('Model:', response.data?.model);\n this.logger.debug('Usage:', response.data?.usage);\n this.logger.debug('Results count:', response.data?.results.length);\n\n // Log a sample of the results\n if ((response.data?.results.length ?? 0) > 0) {\n this.logger.debug(\n 'Sample result:',\n JSON.stringify(response.data?.results[0], null, 2)\n );\n }\n\n if (response.data && response.data.results.length) {\n return response.data.results.map((result) => {\n const docIndex = result.index;\n const score = result.relevance_score;\n let text = '';\n\n // If return_documents is true, the document field will be present\n if (result.document != null) {\n const doc = result.document;\n if (typeof doc === 'object' && 'text' in doc) {\n text = doc.text;\n } else if (typeof doc === 'string') {\n text = doc;\n }\n } else {\n // Otherwise, use the index to get the document\n text = documents[docIndex];\n }\n\n return { text, score };\n });\n } else {\n this.logger.warn(\n 'Unexpected response format from Jina API. Using default ranking.'\n );\n return this.getDefaultRanking(documents, topK);\n }\n } catch (error) {\n this.logger.error('Error using Jina reranker:', error);\n // Fallback to default ranking on error\n return this.getDefaultRanking(documents, topK);\n }\n }\n}\n\nexport class CohereReranker extends BaseReranker {\n constructor({\n apiKey = process.env.COHERE_API_KEY,\n logger,\n }: {\n apiKey?: string;\n logger?: t.Logger;\n }) {\n super(logger);\n this.apiKey = apiKey;\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n this.logger.debug(`Reranking ${documents.length} documents with Cohere`);\n\n try {\n if (this.apiKey == null || this.apiKey === '') {\n this.logger.warn('COHERE_API_KEY is not set. Using default ranking.');\n return this.getDefaultRanking(documents, topK);\n }\n\n this.logDocumentSamples(documents);\n\n const requestData = {\n model: 'rerank-v3.5',\n query: query,\n top_n: topK,\n documents: documents,\n };\n\n const response = await axios.post<t.CohereRerankerResponse | undefined>(\n 'https://api.cohere.com/v2/rerank',\n requestData,\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n }\n );\n\n // Log the response data structure\n this.logger.debug('Cohere API response structure:');\n this.logger.debug('ID:', response.data?.id);\n this.logger.debug('Meta:', response.data?.meta);\n this.logger.debug('Results count:', response.data?.results.length);\n\n // Log a sample of the results\n if ((response.data?.results.length ?? 0) > 0) {\n this.logger.debug(\n 'Sample result:',\n JSON.stringify(response.data?.results[0], null, 2)\n );\n }\n\n if (response.data && response.data.results.length) {\n return response.data.results.map((result) => {\n const docIndex = result.index;\n const score = result.relevance_score;\n const text = documents[docIndex];\n return { text, score };\n });\n } else {\n this.logger.warn(\n 'Unexpected response format from Cohere API. Using default ranking.'\n );\n return this.getDefaultRanking(documents, topK);\n }\n } catch (error) {\n this.logger.error('Error using Cohere reranker:', error);\n // Fallback to default ranking on error\n return this.getDefaultRanking(documents, topK);\n }\n }\n}\n\nexport class InfinityReranker extends BaseReranker {\n constructor(logger?: t.Logger) {\n super(logger);\n // No API key needed for the placeholder implementation\n }\n\n async rerank(\n query: string,\n documents: string[],\n topK: number = 5\n ): Promise<t.Highlight[]> {\n this.logger.debug(\n `Reranking ${documents.length} documents with Infinity (placeholder)`\n );\n // This would be replaced with actual Infinity reranker implementation\n return this.getDefaultRanking(documents, topK);\n }\n}\n\n/**\n * Creates the appropriate reranker based on type and configuration\n */\nexport const createReranker = (config: {\n rerankerType: t.RerankerType;\n jinaApiKey?: string;\n cohereApiKey?: string;\n logger?: t.Logger;\n}): BaseReranker | undefined => {\n const { rerankerType, jinaApiKey, cohereApiKey, logger } = config;\n\n // Create a default logger if none is provided\n const defaultLogger = logger || createDefaultLogger();\n\n switch (rerankerType.toLowerCase()) {\n case 'jina':\n return new JinaReranker({ apiKey: jinaApiKey, logger: defaultLogger });\n case 'cohere':\n return new CohereReranker({\n apiKey: cohereApiKey,\n logger: defaultLogger,\n });\n case 'infinity':\n return new InfinityReranker(defaultLogger);\n case 'none':\n defaultLogger.debug('Skipping reranking as reranker is set to \"none\"');\n return undefined;\n default:\n defaultLogger.warn(\n `Unknown reranker type: ${rerankerType}. Defaulting to InfinityReranker.`\n );\n return new JinaReranker({ apiKey: jinaApiKey, logger: defaultLogger });\n }\n};\n\n// Example usage:\n// const jinaReranker = new JinaReranker();\n// const cohereReranker = new CohereReranker();\n// const infinityReranker = new InfinityReranker();\n"],"names":["createDefaultLogger"],"mappings":";;;;;MAIsB,YAAY,CAAA;AACtB,IAAA,MAAM;AACN,IAAA,MAAM;AAEhB,IAAA,WAAA,CAAY,MAAiB,EAAA;;AAE3B,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,IAAIA,yBAAmB,EAAE;;IASrC,iBAAiB,CACzB,SAAmB,EACnB,IAAY,EAAA;AAEZ,QAAA,OAAO;AACJ,aAAA,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,MAAM,CAAC;AACzC,aAAA,GAAG,CAAC,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;;AAGlC,IAAA,kBAAkB,CAAC,SAAmB,EAAA;AAC9C,QAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,qCAAqC,CAAC;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE;YACtD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAY,SAAA,EAAA,CAAC,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,CAAK,GAAA,CAAA,CAAC;;;AAG7E;AAEK,MAAO,YAAa,SAAQ,YAAY,CAAA;IAC5C,WAAY,CAAA,EACV,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,EACjC,MAAM,GAIP,EAAA;QACC,KAAK,CAAC,MAAM,CAAC;AACb,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM;;IAGtB,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAa,UAAA,EAAA,SAAS,CAAC,MAAM,CAAsB,oBAAA,CAAA,CAAC;AAEtE,QAAA,IAAI;AACF,YAAA,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,EAAE,EAAE;AAC7C,gBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,iDAAiD,CAAC;gBACnE,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAGhD,YAAA,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC;AAElC,YAAA,MAAM,WAAW,GAAG;AAClB,gBAAA,KAAK,EAAE,oCAAoC;AAC3C,gBAAA,KAAK,EAAE,KAAK;AACZ,gBAAA,KAAK,EAAE,IAAI;AACX,gBAAA,SAAS,EAAE,SAAS;AACpB,gBAAA,gBAAgB,EAAE,IAAI;aACvB;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,+BAA+B,EAC/B,WAAW,EACX;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;AACF,aAAA,CACF;;AAGD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,8BAA8B,CAAC;AACjD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;AACjD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;AACjD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC;;AAGlE,YAAA,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;gBAC5C,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,gBAAgB,EAChB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CACnD;;AAGH,YAAA,IAAI,QAAQ,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBACjD,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,KAAI;AAC1C,oBAAA,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;AAC7B,oBAAA,MAAM,KAAK,GAAG,MAAM,CAAC,eAAe;oBACpC,IAAI,IAAI,GAAG,EAAE;;AAGb,oBAAA,IAAI,MAAM,CAAC,QAAQ,IAAI,IAAI,EAAE;AAC3B,wBAAA,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ;wBAC3B,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,MAAM,IAAI,GAAG,EAAE;AAC5C,4BAAA,IAAI,GAAG,GAAG,CAAC,IAAI;;AACV,6BAAA,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE;4BAClC,IAAI,GAAG,GAAG;;;yBAEP;;AAEL,wBAAA,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC;;AAG5B,oBAAA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE;AACxB,iBAAC,CAAC;;iBACG;AACL,gBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,kEAAkE,CACnE;gBACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;QAEhD,OAAO,KAAK,EAAE;YACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC;;YAEtD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;AAGnD;AAEK,MAAO,cAAe,SAAQ,YAAY,CAAA;IAC9C,WAAY,CAAA,EACV,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,EACnC,MAAM,GAIP,EAAA;QACC,KAAK,CAAC,MAAM,CAAC;AACb,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM;;IAGtB,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAa,UAAA,EAAA,SAAS,CAAC,MAAM,CAAwB,sBAAA,CAAA,CAAC;AAExE,QAAA,IAAI;AACF,YAAA,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,EAAE,EAAE;AAC7C,gBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,mDAAmD,CAAC;gBACrE,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAGhD,YAAA,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC;AAElC,YAAA,MAAM,WAAW,GAAG;AAClB,gBAAA,KAAK,EAAE,aAAa;AACpB,gBAAA,KAAK,EAAE,KAAK;AACZ,gBAAA,KAAK,EAAE,IAAI;AACX,gBAAA,SAAS,EAAE,SAAS;aACrB;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,kCAAkC,EAClC,WAAW,EACX;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;AACF,aAAA,CACF;;AAGD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,gCAAgC,CAAC;AACnD,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;AAC3C,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC;AAC/C,YAAA,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC;;AAGlE,YAAA,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;gBAC5C,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,gBAAgB,EAChB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CACnD;;AAGH,YAAA,IAAI,QAAQ,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;gBACjD,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,KAAI;AAC1C,oBAAA,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;AAC7B,oBAAA,MAAM,KAAK,GAAG,MAAM,CAAC,eAAe;AACpC,oBAAA,MAAM,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC;AAChC,oBAAA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE;AACxB,iBAAC,CAAC;;iBACG;AACL,gBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,oEAAoE,CACrE;gBACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;QAEhD,OAAO,KAAK,EAAE;YACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC;;YAExD,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;;AAGnD;AAEK,MAAO,gBAAiB,SAAQ,YAAY,CAAA;AAChD,IAAA,WAAA,CAAY,MAAiB,EAAA;QAC3B,KAAK,CAAC,MAAM,CAAC;;;IAIf,MAAM,MAAM,CACV,KAAa,EACb,SAAmB,EACnB,OAAe,CAAC,EAAA;QAEhB,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,CAAa,UAAA,EAAA,SAAS,CAAC,MAAM,CAAwC,sCAAA,CAAA,CACtE;;QAED,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC;;AAEjD;AAED;;AAEG;AACU,MAAA,cAAc,GAAG,CAAC,MAK9B,KAA8B;IAC7B,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,GAAG,MAAM;;AAGjE,IAAA,MAAM,aAAa,GAAG,MAAM,IAAIA,yBAAmB,EAAE;AAErD,IAAA,QAAQ,YAAY,CAAC,WAAW,EAAE;AAClC,QAAA,KAAK,MAAM;AACT,YAAA,OAAO,IAAI,YAAY,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;AACxE,QAAA,KAAK,QAAQ;YACX,OAAO,IAAI,cAAc,CAAC;AACxB,gBAAA,MAAM,EAAE,YAAY;AACpB,gBAAA,MAAM,EAAE,aAAa;AACtB,aAAA,CAAC;AACJ,QAAA,KAAK,UAAU;AACb,YAAA,OAAO,IAAI,gBAAgB,CAAC,aAAa,CAAC;AAC5C,QAAA,KAAK,MAAM;AACT,YAAA,aAAa,CAAC,KAAK,CAAC,iDAAiD,CAAC;AACtE,YAAA,OAAO,SAAS;AAClB,QAAA;AACE,YAAA,aAAa,CAAC,IAAI,CAChB,0BAA0B,YAAY,CAAA,iCAAA,CAAmC,CAC1E;AACD,YAAA,OAAO,IAAI,YAAY,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;;AAE1E;AAEA;AACA;AACA;AACA;;;;;;;;"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var zod = require('zod');
|
|
4
|
+
|
|
5
|
+
exports.DATE_RANGE = void 0;
|
|
6
|
+
(function (DATE_RANGE) {
|
|
7
|
+
DATE_RANGE["PAST_HOUR"] = "h";
|
|
8
|
+
DATE_RANGE["PAST_24_HOURS"] = "d";
|
|
9
|
+
DATE_RANGE["PAST_WEEK"] = "w";
|
|
10
|
+
DATE_RANGE["PAST_MONTH"] = "m";
|
|
11
|
+
DATE_RANGE["PAST_YEAR"] = "y";
|
|
12
|
+
})(exports.DATE_RANGE || (exports.DATE_RANGE = {}));
|
|
13
|
+
const DEFAULT_QUERY_DESCRIPTION = `
|
|
14
|
+
GUIDELINES:
|
|
15
|
+
- Start broad, then narrow: Begin with key concepts, then refine with specifics
|
|
16
|
+
- Think like sources: Use terminology experts would use in the field
|
|
17
|
+
- Consider perspective: Frame queries from different viewpoints for better results
|
|
18
|
+
- Quality over quantity: A precise 3-4 word query often beats lengthy sentences
|
|
19
|
+
|
|
20
|
+
TECHNIQUES (combine for power searches):
|
|
21
|
+
- EXACT PHRASES: Use quotes ("climate change report")
|
|
22
|
+
- EXCLUDE TERMS: Use minus to remove unwanted results (-wikipedia)
|
|
23
|
+
- SITE-SPECIFIC: Restrict to websites (site:edu research)
|
|
24
|
+
- FILETYPE: Find specific documents (filetype:pdf study)
|
|
25
|
+
- OR OPERATOR: Find alternatives (electric OR hybrid cars)
|
|
26
|
+
- DATE RANGE: Recent information (data after:2020)
|
|
27
|
+
- WILDCARDS: Use * for unknown terms (how to * bread)
|
|
28
|
+
- SPECIFIC QUESTIONS: Use who/what/when/where/why/how
|
|
29
|
+
- DOMAIN TERMS: Include technical terminology for specialized topics
|
|
30
|
+
- CONCISE TERMS: Prioritize keywords over sentences
|
|
31
|
+
`.trim();
|
|
32
|
+
const DEFAULT_COUNTRY_DESCRIPTION = `Country code to localize search results.
|
|
33
|
+
Use standard 2-letter country codes: "us", "uk", "ca", "de", "fr", "jp", "br", etc.
|
|
34
|
+
Provide this when the search should return results specific to a particular country.
|
|
35
|
+
Examples:
|
|
36
|
+
- "us" for United States (default)
|
|
37
|
+
- "de" for Germany
|
|
38
|
+
- "in" for India
|
|
39
|
+
`.trim();
|
|
40
|
+
const querySchema = zod.z.string().describe(DEFAULT_QUERY_DESCRIPTION);
|
|
41
|
+
const dateSchema = zod.z
|
|
42
|
+
.nativeEnum(exports.DATE_RANGE)
|
|
43
|
+
.optional()
|
|
44
|
+
.describe('Date range for search results.');
|
|
45
|
+
const countrySchema = zod.z
|
|
46
|
+
.string()
|
|
47
|
+
.optional()
|
|
48
|
+
.describe(DEFAULT_COUNTRY_DESCRIPTION);
|
|
49
|
+
const imagesSchema = zod.z
|
|
50
|
+
.boolean()
|
|
51
|
+
.optional()
|
|
52
|
+
.describe('Whether to also run an image search.');
|
|
53
|
+
const videosSchema = zod.z
|
|
54
|
+
.boolean()
|
|
55
|
+
.optional()
|
|
56
|
+
.describe('Whether to also run a video search.');
|
|
57
|
+
const newsSchema = zod.z
|
|
58
|
+
.boolean()
|
|
59
|
+
.optional()
|
|
60
|
+
.describe('Whether to also run a news search.');
|
|
61
|
+
|
|
62
|
+
exports.DEFAULT_COUNTRY_DESCRIPTION = DEFAULT_COUNTRY_DESCRIPTION;
|
|
63
|
+
exports.DEFAULT_QUERY_DESCRIPTION = DEFAULT_QUERY_DESCRIPTION;
|
|
64
|
+
exports.countrySchema = countrySchema;
|
|
65
|
+
exports.dateSchema = dateSchema;
|
|
66
|
+
exports.imagesSchema = imagesSchema;
|
|
67
|
+
exports.newsSchema = newsSchema;
|
|
68
|
+
exports.querySchema = querySchema;
|
|
69
|
+
exports.videosSchema = videosSchema;
|
|
70
|
+
//# sourceMappingURL=schema.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.cjs","sources":["../../../../src/tools/search/schema.ts"],"sourcesContent":["import { z } from 'zod';\n\nexport enum DATE_RANGE {\n PAST_HOUR = 'h',\n PAST_24_HOURS = 'd',\n PAST_WEEK = 'w',\n PAST_MONTH = 'm',\n PAST_YEAR = 'y',\n}\n\nexport const DEFAULT_QUERY_DESCRIPTION = `\nGUIDELINES:\n- Start broad, then narrow: Begin with key concepts, then refine with specifics\n- Think like sources: Use terminology experts would use in the field\n- Consider perspective: Frame queries from different viewpoints for better results\n- Quality over quantity: A precise 3-4 word query often beats lengthy sentences\n\nTECHNIQUES (combine for power searches):\n- EXACT PHRASES: Use quotes (\"climate change report\")\n- EXCLUDE TERMS: Use minus to remove unwanted results (-wikipedia)\n- SITE-SPECIFIC: Restrict to websites (site:edu research)\n- FILETYPE: Find specific documents (filetype:pdf study)\n- OR OPERATOR: Find alternatives (electric OR hybrid cars)\n- DATE RANGE: Recent information (data after:2020)\n- WILDCARDS: Use * for unknown terms (how to * bread)\n- SPECIFIC QUESTIONS: Use who/what/when/where/why/how\n- DOMAIN TERMS: Include technical terminology for specialized topics\n- CONCISE TERMS: Prioritize keywords over sentences\n`.trim();\n\nexport const DEFAULT_COUNTRY_DESCRIPTION =\n `Country code to localize search results.\nUse standard 2-letter country codes: \"us\", \"uk\", \"ca\", \"de\", \"fr\", \"jp\", \"br\", etc.\nProvide this when the search should return results specific to a particular country.\nExamples:\n- \"us\" for United States (default)\n- \"de\" for Germany\n- \"in\" for India\n`.trim();\n\nexport const querySchema = z.string().describe(DEFAULT_QUERY_DESCRIPTION);\nexport const dateSchema = z\n .nativeEnum(DATE_RANGE)\n .optional()\n .describe('Date range for search results.');\nexport const countrySchema = z\n .string()\n .optional()\n .describe(DEFAULT_COUNTRY_DESCRIPTION);\nexport const imagesSchema = z\n .boolean()\n .optional()\n .describe('Whether to also run an image search.');\n\nexport const videosSchema = z\n .boolean()\n .optional()\n .describe('Whether to also run a video search.');\n\nexport const newsSchema = z\n .boolean()\n .optional()\n .describe('Whether to also run a news search.');\n"],"names":["DATE_RANGE","z"],"mappings":";;;;AAEYA;AAAZ,CAAA,UAAY,UAAU,EAAA;AACpB,IAAA,UAAA,CAAA,WAAA,CAAA,GAAA,GAAe;AACf,IAAA,UAAA,CAAA,eAAA,CAAA,GAAA,GAAmB;AACnB,IAAA,UAAA,CAAA,WAAA,CAAA,GAAA,GAAe;AACf,IAAA,UAAA,CAAA,YAAA,CAAA,GAAA,GAAgB;AAChB,IAAA,UAAA,CAAA,WAAA,CAAA,GAAA,GAAe;AACjB,CAAC,EANWA,kBAAU,KAAVA,kBAAU,GAMrB,EAAA,CAAA,CAAA;AAEY,MAAA,yBAAyB,GAAG;;;;;;;;;;;;;;;;;;CAkBxC,CAAC,IAAI;AAEO,MAAA,2BAA2B,GACtC,CAAA;;;;;;;CAOD,CAAC,IAAI;AAEC,MAAM,WAAW,GAAGC,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,yBAAyB;AACjE,MAAM,UAAU,GAAGA;KACvB,UAAU,CAACD,kBAAU;AACrB,KAAA,QAAQ;KACR,QAAQ,CAAC,gCAAgC;AACrC,MAAM,aAAa,GAAGC;AAC1B,KAAA,MAAM;AACN,KAAA,QAAQ;KACR,QAAQ,CAAC,2BAA2B;AAChC,MAAM,YAAY,GAAGA;AACzB,KAAA,OAAO;AACP,KAAA,QAAQ;KACR,QAAQ,CAAC,sCAAsC;AAE3C,MAAM,YAAY,GAAGA;AACzB,KAAA,OAAO;AACP,KAAA,QAAQ;KACR,QAAQ,CAAC,qCAAqC;AAE1C,MAAM,UAAU,GAAGA;AACvB,KAAA,OAAO;AACP,KAAA,QAAQ;KACR,QAAQ,CAAC,oCAAoC;;;;;;;;;;;"}
|
|
@@ -4,7 +4,6 @@ var axios = require('axios');
|
|
|
4
4
|
var textsplitters = require('@langchain/textsplitters');
|
|
5
5
|
var utils = require('./utils.cjs');
|
|
6
6
|
|
|
7
|
-
/* eslint-disable no-console */
|
|
8
7
|
const chunker = {
|
|
9
8
|
cleanText: (text) => {
|
|
10
9
|
if (!text)
|
|
@@ -31,10 +30,11 @@ const chunker = {
|
|
|
31
30
|
});
|
|
32
31
|
return await splitter.splitText(text);
|
|
33
32
|
},
|
|
34
|
-
splitTexts: async (texts, options) => {
|
|
33
|
+
splitTexts: async (texts, options, logger) => {
|
|
35
34
|
// Split multiple texts
|
|
35
|
+
const logger_ = logger || utils.createDefaultLogger();
|
|
36
36
|
const promises = texts.map((text) => chunker.splitText(text, options).catch((error) => {
|
|
37
|
-
|
|
37
|
+
logger_.error('Error splitting text:', error);
|
|
38
38
|
return [text];
|
|
39
39
|
}));
|
|
40
40
|
return Promise.all(promises);
|
|
@@ -51,13 +51,14 @@ function createSourceUpdateCallback(sourceMap) {
|
|
|
51
51
|
}
|
|
52
52
|
};
|
|
53
53
|
}
|
|
54
|
-
const getHighlights = async ({ query, content, reranker, topResults = 5, }) => {
|
|
54
|
+
const getHighlights = async ({ query, content, reranker, topResults = 5, logger, }) => {
|
|
55
|
+
const logger_ = logger || utils.createDefaultLogger();
|
|
55
56
|
if (!content) {
|
|
56
|
-
|
|
57
|
+
logger_.warn('No content provided for highlights');
|
|
57
58
|
return;
|
|
58
59
|
}
|
|
59
60
|
if (!reranker) {
|
|
60
|
-
|
|
61
|
+
logger_.warn('No reranker provided for highlights');
|
|
61
62
|
return;
|
|
62
63
|
}
|
|
63
64
|
try {
|
|
@@ -66,12 +67,12 @@ const getHighlights = async ({ query, content, reranker, topResults = 5, }) => {
|
|
|
66
67
|
return await reranker.rerank(query, documents, topResults);
|
|
67
68
|
}
|
|
68
69
|
else {
|
|
69
|
-
|
|
70
|
+
logger_.error('Expected documents to be an array, got:', typeof documents);
|
|
70
71
|
return;
|
|
71
72
|
}
|
|
72
73
|
}
|
|
73
74
|
catch (error) {
|
|
74
|
-
|
|
75
|
+
logger_.error('Error in content processing:', error);
|
|
75
76
|
return;
|
|
76
77
|
}
|
|
77
78
|
};
|
|
@@ -84,19 +85,39 @@ const createSerperAPI = (apiKey) => {
|
|
|
84
85
|
if (config.apiKey == null || config.apiKey === '') {
|
|
85
86
|
throw new Error('SERPER_API_KEY is required for SerperAPI');
|
|
86
87
|
}
|
|
87
|
-
const getSources = async ({ query, country, numResults = 8, }) => {
|
|
88
|
+
const getSources = async ({ query, date, country, safeSearch, numResults = 8, type, }) => {
|
|
88
89
|
if (!query.trim()) {
|
|
89
90
|
return { success: false, error: 'Query cannot be empty' };
|
|
90
91
|
}
|
|
91
92
|
try {
|
|
93
|
+
const safe = ['off', 'moderate', 'active'];
|
|
92
94
|
const payload = {
|
|
93
95
|
q: query,
|
|
96
|
+
safe: safe[safeSearch ?? 1],
|
|
94
97
|
num: Math.min(Math.max(1, numResults), 10),
|
|
95
98
|
};
|
|
99
|
+
// Set the search type if provided
|
|
100
|
+
if (type) {
|
|
101
|
+
payload.type = type;
|
|
102
|
+
}
|
|
103
|
+
if (date != null) {
|
|
104
|
+
payload.tbs = `qdr:${date}`;
|
|
105
|
+
}
|
|
96
106
|
if (country != null && country !== '') {
|
|
97
107
|
payload['gl'] = country.toLowerCase();
|
|
98
108
|
}
|
|
99
|
-
|
|
109
|
+
// Determine the API endpoint based on the search type
|
|
110
|
+
let apiEndpoint = config.apiUrl;
|
|
111
|
+
if (type === 'images') {
|
|
112
|
+
apiEndpoint = 'https://google.serper.dev/images';
|
|
113
|
+
}
|
|
114
|
+
else if (type === 'videos') {
|
|
115
|
+
apiEndpoint = 'https://google.serper.dev/videos';
|
|
116
|
+
}
|
|
117
|
+
else if (type === 'news') {
|
|
118
|
+
apiEndpoint = 'https://google.serper.dev/news';
|
|
119
|
+
}
|
|
120
|
+
const response = await axios.post(apiEndpoint, payload, {
|
|
100
121
|
headers: {
|
|
101
122
|
'X-API-KEY': config.apiKey,
|
|
102
123
|
'Content-Type': 'application/json',
|
|
@@ -112,6 +133,8 @@ const createSerperAPI = (apiKey) => {
|
|
|
112
133
|
peopleAlsoAsk: data.peopleAlsoAsk,
|
|
113
134
|
knowledgeGraph: data.knowledgeGraph,
|
|
114
135
|
relatedSearches: data.relatedSearches,
|
|
136
|
+
videos: data.videos ?? [],
|
|
137
|
+
news: data.news ?? [],
|
|
115
138
|
};
|
|
116
139
|
return { success: true, data: results };
|
|
117
140
|
}
|
|
@@ -131,7 +154,7 @@ const createSearXNGAPI = (instanceUrl, apiKey) => {
|
|
|
131
154
|
if (config.instanceUrl == null || config.instanceUrl === '') {
|
|
132
155
|
throw new Error('SEARXNG_INSTANCE_URL is required for SearXNG API');
|
|
133
156
|
}
|
|
134
|
-
const getSources = async ({ query, numResults = 8, }) => {
|
|
157
|
+
const getSources = async ({ query, numResults = 8, type, }) => {
|
|
135
158
|
if (!query.trim()) {
|
|
136
159
|
return { success: false, error: 'Query cannot be empty' };
|
|
137
160
|
}
|
|
@@ -144,12 +167,23 @@ const createSearXNGAPI = (instanceUrl, apiKey) => {
|
|
|
144
167
|
if (!searchUrl.endsWith('/search')) {
|
|
145
168
|
searchUrl = searchUrl.replace(/\/$/, '') + '/search';
|
|
146
169
|
}
|
|
170
|
+
// Determine the search category based on the type
|
|
171
|
+
let category = 'general';
|
|
172
|
+
if (type === 'images') {
|
|
173
|
+
category = 'images';
|
|
174
|
+
}
|
|
175
|
+
else if (type === 'videos') {
|
|
176
|
+
category = 'videos';
|
|
177
|
+
}
|
|
178
|
+
else if (type === 'news') {
|
|
179
|
+
category = 'news';
|
|
180
|
+
}
|
|
147
181
|
// Prepare parameters for SearXNG
|
|
148
182
|
const params = {
|
|
149
183
|
q: query,
|
|
150
184
|
format: 'json',
|
|
151
185
|
pageno: 1,
|
|
152
|
-
categories:
|
|
186
|
+
categories: category,
|
|
153
187
|
language: 'all',
|
|
154
188
|
safesearch: 0,
|
|
155
189
|
engines: 'google,bing,duckduckgo',
|
|
@@ -190,6 +224,8 @@ const createSearXNGAPI = (instanceUrl, apiKey) => {
|
|
|
190
224
|
topStories: [],
|
|
191
225
|
// Use undefined instead of null for optional properties
|
|
192
226
|
relatedSearches: data.suggestions ?? [],
|
|
227
|
+
videos: [],
|
|
228
|
+
news: [],
|
|
193
229
|
};
|
|
194
230
|
return { success: true, data: results };
|
|
195
231
|
}
|
|
@@ -222,11 +258,12 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
222
258
|
const { topResults = 5,
|
|
223
259
|
// strategies = ['no_extraction'],
|
|
224
260
|
// filterContent = true,
|
|
225
|
-
reranker, } = config;
|
|
261
|
+
reranker, logger, } = config;
|
|
262
|
+
const logger_ = logger || utils.createDefaultLogger();
|
|
226
263
|
const firecrawlScraper = scraperInstance;
|
|
227
264
|
const webScraper = {
|
|
228
265
|
scrapeMany: async ({ query, links, onGetHighlights, }) => {
|
|
229
|
-
|
|
266
|
+
logger_.debug(`Scraping ${links.length} links with Firecrawl`);
|
|
230
267
|
const promises = [];
|
|
231
268
|
try {
|
|
232
269
|
for (let i = 0; i < links.length; i++) {
|
|
@@ -234,7 +271,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
234
271
|
const promise = firecrawlScraper
|
|
235
272
|
.scrapeUrl(currentLink, {})
|
|
236
273
|
.then(([url, response]) => {
|
|
237
|
-
const attribution = utils.getAttribution(url, response.data?.metadata);
|
|
274
|
+
const attribution = utils.getAttribution(url, response.data?.metadata, logger_);
|
|
238
275
|
if (response.success && response.data) {
|
|
239
276
|
const [content, references] = firecrawlScraper.extractContent(response);
|
|
240
277
|
return {
|
|
@@ -248,13 +285,13 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
248
285
|
url,
|
|
249
286
|
attribution,
|
|
250
287
|
error: true,
|
|
251
|
-
content:
|
|
288
|
+
content: '',
|
|
252
289
|
};
|
|
253
290
|
})
|
|
254
291
|
.then(async (result) => {
|
|
255
292
|
try {
|
|
256
293
|
if (result.error != null) {
|
|
257
|
-
|
|
294
|
+
logger_.error(`Error scraping ${result.url}: ${result.content}`, result.error);
|
|
258
295
|
return {
|
|
259
296
|
...result,
|
|
260
297
|
};
|
|
@@ -263,6 +300,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
263
300
|
query,
|
|
264
301
|
reranker,
|
|
265
302
|
content: result.content,
|
|
303
|
+
logger: logger_,
|
|
266
304
|
});
|
|
267
305
|
if (onGetHighlights) {
|
|
268
306
|
onGetHighlights(result.url);
|
|
@@ -273,18 +311,18 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
273
311
|
};
|
|
274
312
|
}
|
|
275
313
|
catch (error) {
|
|
276
|
-
|
|
314
|
+
logger_.error('Error processing scraped content:', error);
|
|
277
315
|
return {
|
|
278
316
|
...result,
|
|
279
317
|
};
|
|
280
318
|
}
|
|
281
319
|
})
|
|
282
320
|
.catch((error) => {
|
|
283
|
-
|
|
321
|
+
logger_.error(`Error scraping ${currentLink}:`, error);
|
|
284
322
|
return {
|
|
285
323
|
url: currentLink,
|
|
286
324
|
error: true,
|
|
287
|
-
content:
|
|
325
|
+
content: '',
|
|
288
326
|
};
|
|
289
327
|
});
|
|
290
328
|
promises.push(promise);
|
|
@@ -292,7 +330,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
292
330
|
return await Promise.all(promises);
|
|
293
331
|
}
|
|
294
332
|
catch (error) {
|
|
295
|
-
|
|
333
|
+
logger_.error('Error in scrapeMany:', error);
|
|
296
334
|
return [];
|
|
297
335
|
}
|
|
298
336
|
},
|
|
@@ -318,7 +356,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
318
356
|
});
|
|
319
357
|
}
|
|
320
358
|
};
|
|
321
|
-
const processSources = async ({ result, numElements, query, proMode = true, onGetHighlights, }) => {
|
|
359
|
+
const processSources = async ({ result, numElements, query, news, proMode = true, onGetHighlights, }) => {
|
|
322
360
|
try {
|
|
323
361
|
if (!result.data) {
|
|
324
362
|
return {
|
|
@@ -359,44 +397,48 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
359
397
|
return result.data;
|
|
360
398
|
}
|
|
361
399
|
const sourceMap = new Map();
|
|
362
|
-
const
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
if (allLinks.length === 0) {
|
|
400
|
+
const organicLinksSet = new Set();
|
|
401
|
+
// Collect organic links
|
|
402
|
+
const organicLinks = collectLinks(result.data.organic, sourceMap, organicLinksSet);
|
|
403
|
+
// Collect top story links, excluding any that are already in organic links
|
|
404
|
+
const topStories = result.data.topStories ?? [];
|
|
405
|
+
const topStoryLinks = collectLinks(topStories, sourceMap, organicLinksSet);
|
|
406
|
+
if (organicLinks.length === 0 && (topStoryLinks.length === 0 || !news)) {
|
|
370
407
|
return result.data;
|
|
371
408
|
}
|
|
372
409
|
const onContentScraped = createSourceUpdateCallback(sourceMap);
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
if (updatedSource) {
|
|
384
|
-
result.data.organic[i] = {
|
|
385
|
-
...source,
|
|
386
|
-
...updatedSource,
|
|
387
|
-
};
|
|
388
|
-
}
|
|
410
|
+
const promises = [];
|
|
411
|
+
// Process organic links
|
|
412
|
+
if (organicLinks.length > 0) {
|
|
413
|
+
promises.push(fetchContents({
|
|
414
|
+
query,
|
|
415
|
+
onGetHighlights,
|
|
416
|
+
onContentScraped,
|
|
417
|
+
links: organicLinks,
|
|
418
|
+
target: numElements,
|
|
419
|
+
}));
|
|
389
420
|
}
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
.
|
|
393
|
-
|
|
394
|
-
|
|
421
|
+
// Process top story links
|
|
422
|
+
if (news && topStoryLinks.length > 0) {
|
|
423
|
+
promises.push(fetchContents({
|
|
424
|
+
query,
|
|
425
|
+
onGetHighlights,
|
|
426
|
+
onContentScraped,
|
|
427
|
+
links: topStoryLinks,
|
|
428
|
+
target: numElements,
|
|
429
|
+
}));
|
|
430
|
+
}
|
|
431
|
+
await Promise.all(promises);
|
|
432
|
+
if (result.data.organic.length > 0) {
|
|
433
|
+
updateSourcesWithContent(result.data.organic, sourceMap);
|
|
434
|
+
}
|
|
435
|
+
if (news && topStories.length > 0) {
|
|
436
|
+
updateSourcesWithContent(topStories, sourceMap);
|
|
395
437
|
}
|
|
396
438
|
return result.data;
|
|
397
439
|
}
|
|
398
440
|
catch (error) {
|
|
399
|
-
|
|
441
|
+
logger_.error('Error in processSources:', error);
|
|
400
442
|
return {
|
|
401
443
|
organic: [],
|
|
402
444
|
topStories: [],
|
|
@@ -412,6 +454,37 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
412
454
|
topResults,
|
|
413
455
|
};
|
|
414
456
|
};
|
|
457
|
+
/** Helper function to collect links and update sourceMap */
|
|
458
|
+
function collectLinks(sources, sourceMap, existingLinksSet) {
|
|
459
|
+
const links = [];
|
|
460
|
+
for (const source of sources) {
|
|
461
|
+
if (source.link) {
|
|
462
|
+
// For topStories, only add if not already in organic links
|
|
463
|
+
if (existingLinksSet && existingLinksSet.has(source.link)) {
|
|
464
|
+
continue;
|
|
465
|
+
}
|
|
466
|
+
links.push(source.link);
|
|
467
|
+
if (existingLinksSet) {
|
|
468
|
+
existingLinksSet.add(source.link);
|
|
469
|
+
}
|
|
470
|
+
sourceMap.set(source.link, source);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
return links;
|
|
474
|
+
}
|
|
475
|
+
/** Helper function to update sources with scraped content */
|
|
476
|
+
function updateSourcesWithContent(sources, sourceMap) {
|
|
477
|
+
for (let i = 0; i < sources.length; i++) {
|
|
478
|
+
const source = sources[i];
|
|
479
|
+
const updatedSource = sourceMap.get(source.link);
|
|
480
|
+
if (updatedSource) {
|
|
481
|
+
sources[i] = {
|
|
482
|
+
...source,
|
|
483
|
+
...updatedSource,
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
}
|
|
415
488
|
|
|
416
489
|
exports.createSearchAPI = createSearchAPI;
|
|
417
490
|
exports.createSourceProcessor = createSourceProcessor;
|