@lobehub/chat 1.94.17 → 1.95.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ## [Version 1.95.0](https://github.com/lobehub/lobe-chat/compare/v1.94.17...v1.95.0)
6
+
7
+ <sup>Released on **2025-06-20**</sup>
8
+
9
+ #### ✨ Features
10
+
11
+ - **misc**: Add Brave & Google PSE & Kagi as build-in Search Provider.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### What's improved
19
+
20
+ - **misc**: Add Brave & Google PSE & Kagi as build-in Search Provider, closes [#8172](https://github.com/lobehub/lobe-chat/issues/8172) ([16ae521](https://github.com/lobehub/lobe-chat/commit/16ae521))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
5
30
  ### [Version 1.94.17](https://github.com/lobehub/lobe-chat/compare/v1.94.16...v1.94.17)
6
31
 
7
32
  <sup>Released on **2025-06-20**</sup>
package/changelog/v1.json CHANGED
@@ -1,4 +1,13 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "features": [
5
+ "Add Brave & Google PSE & Kagi as build-in Search Provider."
6
+ ]
7
+ },
8
+ "date": "2025-06-20",
9
+ "version": "1.95.0"
10
+ },
2
11
  {
3
12
  "children": {
4
13
  "improvements": [
@@ -14,16 +14,134 @@ tags:
14
14
 
15
15
  # Configuring Online Search Functionality
16
16
 
17
- LobeChat supports configuring online search functionality for AI, allowing it to access the latest web information and provide more accurate and timely responses. The online search feature is based on the [SearXNG](https://github.com/searxng/searxng) search engine, which is a privacy-respecting metasearch engine that aggregates results from multiple search engines.
17
+ LobeChat supports configuring **web search functionality** for AI, enabling it to retrieve real-time information from the internet to provide more accurate and up-to-date responses. Web search supports multiple search engine providers, including [SearXNG](https://github.com/searxng/searxng), [Search1API](https://www.search1api.com), [Google](https://programmablesearchengine.google.com), and [Brave](https://brave.com/search/api), among others.
18
18
 
19
- <Callout type={'info'}>
20
- SearXNG is an open-source metasearch engine that can be self-hosted or accessed via public
21
- instances. By configuring SearXNG, LobeChat enables AI to retrieve the latest internet
22
- information, allowing it to answer time-sensitive questions and provide up-to-date news.
19
+ <Callout type="info">
20
+ Web search allows AI to access time-sensitive content, such as the latest news, technology trends, or product information. You can deploy the open-source SearXNG yourself, or choose to integrate mainstream search services like Search1API, Google, Brave, etc., combining them freely based on your use case.
23
21
  </Callout>
24
22
 
23
+ By setting the search service environment variable `SEARCH_PROVIDERS` and the corresponding API Keys, LobeChat will query multiple sources and return the results. You can also configure crawler service environment variables such as `CRAWLER_IMPLS` (e.g., `browserless`, `firecrawl`, `tavily`, etc.) to extract webpage content, enhancing the capability of search + reading.
24
+
25
25
  # Core Environment Variables
26
26
 
27
+ ## `CRAWLER_IMPLS`
28
+
29
+ Configure available web crawlers for structured extraction of webpage content.
30
+
31
+ ```env
32
+ CRAWLER_IMPLS="native,search1api"
33
+ ```
34
+
35
+ Supported crawler types are listed below:
36
+
37
+ | Value | Description | Environment Variable |
38
+ | ------------- | ------------------------------------------------------------------------------------------------------------------- | -------------------------- |
39
+ | `browserless` | Headless browser crawler based on [Browserless](https://www.browserless.io/), suitable for rendering complex pages. | `BROWSERLESS_TOKEN` |
40
+ | `exa` | Crawler capabilities provided by [Exa](https://exa.ai/), API required. | `EXA_API_KEY` |
41
+ | `firecrawl` | [Firecrawl](https://firecrawl.dev/) headless browser API, ideal for modern websites. | `FIRECRAWL_API_KEY` |
42
+ | `jina` | Crawler service from [Jina AI](https://jina.ai/), supports fast content summarization. | `JINA_READER_API_KEY` |
43
+ | `native` | Built-in general-purpose crawler for standard web structures. | |
44
+ | `search1api` | Page crawling capabilities from [Search1API](https://www.search1api.com), great for structured content extraction. | `SEARCH1API_CRAWL_API_KEY` |
45
+ | `tavily` | Web scraping and summarization API from [Tavily](https://www.tavily.com/). | `TAVILY_API_KEY` |
46
+
47
+ > 💡 Setting multiple crawlers increases success rate; the system will try different ones based on priority.
48
+
49
+ ---
50
+
51
+ ## `SEARCH_PROVIDERS`
52
+
53
+ Configure which search engine providers to use for web search.
54
+
55
+ ```env
56
+ SEARCH_PROVIDERS="searxng"
57
+ ```
58
+
59
+ Supported search engines include:
60
+
61
+ | Value | Description | Environment Variable |
62
+ | ------------ | ---------------------------------------------------------------------------------------- | ------------------------------------------- |
63
+ | `anspire` | Search service provided by [Anspire](https://anspire.ai/). | `ANSPIRE_API_KEY` |
64
+ | `bocha` | Search service from [Bocha](https://open.bochaai.com/). | `BOCHA_API_KEY` |
65
+ | `brave` | [Brave](https://search.brave.com/help/api), a privacy-friendly search source. | `BRAVE_API_KEY` |
66
+ | `exa` | [Exa](https://exa.ai/), a search API designed for AI. | `EXA_API_KEY` |
67
+ | `firecrawl` | Search capabilities via [Firecrawl](https://firecrawl.dev/). | `FIRECRAWL_API_KEY` |
68
+ | `google` | Uses [Google Programmable Search Engine](https://programmablesearchengine.google.com/). | `GOOGLE_PSE_API_KEY` `GOOGLE_PSE_ENGINE_ID` |
69
+ | `jina` | Semantic search provided by [Jina AI](https://jina.ai/). | `JINA_READER_API_KEY` |
70
+ | `kagi` | Premium search API by [Kagi](https://kagi.com/), requires a subscription key. | `KAGI_API_KEY` |
71
+ | `search1api` | Aggregated search capabilities from [Search1API](https://www.search1api.com). | `SEARCH1API_CRAWL_API_KEY` |
72
+ | `searxng` | Use a self-hosted or public [SearXNG](https://searx.space/) instance. | `SEARXNG_URL` |
73
+ | `tavily` | [Tavily](https://www.tavily.com/), offers fast web summaries and answers. | `TAVILY_API_KEY` |
74
+
75
+ > ⚠️ Some search providers require you to apply for an API Key and configure it in your `.env` file.
76
+
77
+ ---
78
+
79
+ ## `BROWSERLESS_URL`
80
+
81
+ Specifies the API endpoint for [Browserless](https://www.browserless.io/), used for web crawling tasks. Browserless is a browser automation platform based on Headless Chrome, ideal for rendering dynamic pages.
82
+
83
+ ```env
84
+ BROWSERLESS_URL=https://chrome.browserless.io
85
+ ```
86
+
87
+ > 📌 Usually used together with `CRAWLER_IMPLS=browserless`.
88
+
89
+ ---
90
+
91
+ ## `GOOGLE_PSE_ENGINE_ID`
92
+
93
+ Configure the Search Engine ID for Google Programmable Search Engine (Google PSE), used to restrict the search scope. Must be used alongside `GOOGLE_PSE_API_KEY`.
94
+
95
+ ```env
96
+ GOOGLE_PSE_ENGINE_ID=your-google-cx-id
97
+ ```
98
+
99
+ > 🔑 How to get it: Visit [programmablesearchengine.google.com](https://programmablesearchengine.google.com/), create a search engine, and obtain the `cx` parameter.
100
+
101
+ ---
102
+
103
+ ## `FIRECRAWL_URL`
104
+
105
+ Sets the access URL for the [Firecrawl](https://firecrawl.dev/) API, used for web content scraping. Default value:
106
+
107
+ ```env
108
+ FIRECRAWL_URL=https://api.firecrawl.dev/v1
109
+ ```
110
+
111
+ > ⚙️ Usually does not need to be changed unless you’re using a self-hosted version or a proxy service.
112
+
113
+ ---
114
+
115
+ ## `TAVILY_SEARCH_DEPTH`
116
+
117
+ Configure the result depth for [Tavily](https://www.tavily.com/) searches.
118
+
119
+ ```env
120
+ TAVILY_SEARCH_DEPTH=basic
121
+ ```
122
+
123
+ Supported values:
124
+
125
+ * `basic`: Fast search, returns brief results;
126
+ * `advanced`: Deep search, returns more context and web page details.
127
+
128
+ ---
129
+
130
+ ## `TAVILY_EXTRACT_DEPTH`
131
+
132
+ Configure how deeply Tavily extracts content from web pages.
133
+
134
+ ```env
135
+ TAVILY_EXTRACT_DEPTH=basic
136
+ ```
137
+
138
+ Supported values:
139
+
140
+ * `basic`: Extracts basic info like title and content summary;
141
+ * `advanced`: Extracts structured data, lists, charts, and more from web pages.
142
+
143
+ ---
144
+
27
145
  ## `SEARXNG_URL`
28
146
 
29
147
  The URL of the SearXNG instance, which is a necessary configuration to enable the online search functionality. For example:
@@ -10,15 +10,134 @@ tags:
10
10
 
11
11
  # 配置联网搜索功能
12
12
 
13
- LobeChat 支持为 AI 配置联网搜索功能,这使得 AI 能够获取最新的网络信息,从而提供更准确、更及时的回答。联网搜索功能基于 [SearXNG](https://github.com/searxng/searxng) 搜索引擎,它是一个尊重隐私的元搜索引擎,可以聚合多个搜索引擎的结果。
13
+ LobeChat 支持为 AI 配置**联网搜索功能**,使其能够实时获取互联网信息,从而提供更准确、最新的回答。联网搜索支持多个搜索引擎提供商,包括 [SearXNG](https://github.com/searxng/searxng)、[Search1API](https://www.search1api.com)、[Google](https://programmablesearchengine.google.com)、[Brave](https://brave.com/search/api) 等。
14
14
 
15
- <Callout type={'info'}>
16
- SearXNG 是一个开源的元搜索引擎,可以自行部署,也可以使用公共实例。通过配置 SearXNG,LobeChat
17
- 可以让 AI 获取最新的互联网信息,从而回答时效性问题、提供最新资讯。
15
+ <Callout type="info">
16
+ 联网搜索可以让 AI 获取时效性内容,如最新新闻、技术动态或产品信息。你可以使用开源的 SearXNG 自行部署,也可以选择集成主流搜索引擎服务,如 Search1API、Google、Brave 等,根据你的使用场景自由组合。
18
17
  </Callout>
19
18
 
19
+ 通过设置搜索服务环境变量 `SEARCH_PROVIDERS` 和对应的 API Key,LobeChat 将在多个搜索源中查询并返回结果。你还可以搭配配置爬虫服务环境变量 `CRAWLER_IMPLS`(如 `browserless`、`firecrawl`、`tavily` 等)以提取网页内容,实现搜索+阅读的增强能力。
20
+
20
21
  # 核心环境变量
21
22
 
23
+ ## `CRAWLER_IMPLS`
24
+
25
+ 配置可用的网页爬虫,用于对网页进行结构化内容提取。
26
+
27
+ ```env
28
+ CRAWLER_IMPLS="native,search1api"
29
+ ```
30
+
31
+ 支持的爬虫类型如下:
32
+
33
+ | 值 | 说明 | 环境变量 |
34
+ | ------------- | -------------------------------------------------------------------------------------- | -------------------------- |
35
+ | `browserless` | 基于 [Browserless](https://www.browserless.io/) 的无头浏览器爬虫,适合渲染复杂页面。 | `BROWSERLESS_TOKEN` |
36
+ | `exa` | 使用 [Exa](https://exa.ai/) 提供的爬虫能力,需申请 API。 | `EXA_API_KEY` |
37
+ | `firecrawl` | [Firecrawl](https://firecrawl.dev/) 无头浏览器 API,适合现代网站抓取。 | `FIRECRAWL_API_KEY` |
38
+ | `jina` | 使用 [Jina AI](https://jina.ai/) 的爬虫服务,支持快速提取摘要信息。 | `JINA_READER_API_KEY` |
39
+ | `native` | 内置通用爬虫,适用于标准网页结构。 | |
40
+ | `search1api` | 利用 [Search1API](https://www.search1api.com) 提供的页面抓取能力,适合结构化内容提取。 | `SEARCH1API_CRAWL_API_KEY` |
41
+ | `tavily` | 使用 [Tavily](https://www.tavily.com/) 的网页抓取与摘要 API。 | `TAVILY_API_KEY` |
42
+
43
+ > 💡 设置多个爬虫可提升成功率,系统将根据优先级尝试不同爬虫。
44
+
45
+ ---
46
+
47
+ ## `SEARCH_PROVIDERS`
48
+
49
+ 配置联网搜索使用的搜索引擎提供商。
50
+
51
+ ```env
52
+ SEARCH_PROVIDERS="searxng"
53
+ ```
54
+
55
+ 支持的搜索引擎如下:
56
+
57
+ | 值 | 说明 | 环境变量 |
58
+ | ------------ | ---------------------------------------------------------------------------------------- | ------------------------------------------- |
59
+ | `anspire` | 基于 [Anspire(安思派)](https://anspire.ai/) 提供的搜索服务。 | `ANSPIRE_API_KEY` |
60
+ | `bocha` | 基于 [Bocha(博查)](https://open.bochaai.com/) 提供的搜索服务。 | `BOCHA_API_KEY` |
61
+ | `brave` | [Brave](https://search.brave.com/help/api),隐私友好的搜索源。 | `BRAVE_API_KEY` |
62
+ | `exa` | [Exa](https://exa.ai/),面向 AI 的搜索 API。 | `EXA_API_KEY` |
63
+ | `firecrawl` | 支持 [Firecrawl](https://firecrawl.dev/) 提供的搜索服务。 | `FIRECRAWL_API_KEY` |
64
+ | `google` | 使用 [Google Programmable Search Engine](https://programmablesearchengine.google.com/)。 | `GOOGLE_PSE_API_KEY` `GOOGLE_PSE_ENGINE_ID` |
65
+ | `jina` | 使用 [Jina AI](https://jina.ai/) 提供的语义搜索服务。 | `JINA_READER_API_KEY` |
66
+ | `kagi` | [Kagi](https://kagi.com/) 提供的高级搜索 API,需订阅 Key。 | `KAGI_API_KEY` |
67
+ | `search1api` | 使用 [Search1API](https://www.search1api.com) 聚合搜索能力。 | `SEARCH1API_CRAWL_API_KEY` |
68
+ | `searxng` | 使用自托管或公共 [SearXNG](https://searx.space/) 实例。 | `SEARXNG_URL` |
69
+ | `tavily` | [Tavily](https://www.tavily.com/),快速网页摘要与答案返回。 | `TAVILY_API_KEY` |
70
+
71
+ > ⚠️ 某些搜索提供商需要单独申请 API Key,并在 `.env` 中配置相关凭证。
72
+
73
+ ---
74
+
75
+ ## `BROWSERLESS_URL`
76
+
77
+ 指定 [Browserless](https://www.browserless.io/) 服务的 API 地址,用于执行网页爬取任务。Browserless 是一个基于无头浏览器(Headless Chrome)的浏览器自动化平台,适合处理需要渲染的动态页面。
78
+
79
+ ```env
80
+ BROWSERLESS_URL=https://chrome.browserless.io
81
+ ```
82
+
83
+ > 📌 通常需要搭配 `CRAWLER_IMPLS=browserless` 启用。
84
+
85
+ ---
86
+
87
+ ## `GOOGLE_PSE_ENGINE_ID`
88
+
89
+ 配置 Google Programmable Search Engine(Google PSE)的搜索引擎 ID,用于限定搜索范围。需配合 `GOOGLE_PSE_API_KEY` 一起使用。
90
+
91
+ ```env
92
+ GOOGLE_PSE_ENGINE_ID=your-google-cx-id
93
+ ```
94
+
95
+ > 🔑 获取方式:访问 [programmablesearchengine.google.com](https://programmablesearchengine.google.com/),创建搜索引擎后获取 `cx` 参数值。
96
+
97
+ ---
98
+
99
+ ## `FIRECRAWL_URL`
100
+
101
+ 设置 [Firecrawl](https://firecrawl.dev/) API 的访问地址。用于网页内容抓取,默认值如下:
102
+
103
+ ```env
104
+ FIRECRAWL_URL=https://api.firecrawl.dev/v1
105
+ ```
106
+
107
+ > ⚙️ 一般无需修改,除非你使用的是自托管版本或代理服务。
108
+
109
+ ---
110
+
111
+ ## `TAVILY_SEARCH_DEPTH`
112
+
113
+ 配置 [Tavily](https://www.tavily.com/) 搜索的结果深度。
114
+
115
+ ```env
116
+ TAVILY_SEARCH_DEPTH=basic
117
+ ```
118
+
119
+ 支持的值:
120
+
121
+ * `basic`: 快速搜索,返回简要结果;
122
+ * `advanced`: 深度搜索,返回更多上下文和网页信息。
123
+
124
+ ---
125
+
126
+ ## `TAVILY_EXTRACT_DEPTH`
127
+
128
+ 配置 Tavily 在抓取网页内容时的提取深度。
129
+
130
+ ```env
131
+ TAVILY_EXTRACT_DEPTH=basic
132
+ ```
133
+
134
+ 支持的值:
135
+
136
+ * `basic`: 提取标题、正文摘要等基础信息;
137
+ * `advanced`: 提取网页的结构化信息、列表、图表等更多内容。
138
+
139
+ ---
140
+
22
141
  ## `SEARXNG_URL`
23
142
 
24
143
  SearXNG 实例的 URL 地址,这是启用联网搜索功能的必要配置。例如:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "1.94.17",
3
+ "version": "1.95.0",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -0,0 +1,132 @@
1
+ import { TRPCError } from '@trpc/server';
2
+ import debug from 'debug';
3
+ import urlJoin from 'url-join';
4
+
5
+ import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
6
+
7
+ import { SearchServiceImpl } from '../type';
8
+ import { AnspireSearchParameters, AnspireResponse } from './type';
9
+
10
+ const log = debug('lobe-search:Anspire');
11
+
12
+ /**
13
+ * Anspire implementation of the search service
14
+ * Primarily used for web crawling
15
+ */
16
+ export class AnspireImpl implements SearchServiceImpl {
17
+ private get apiKey(): string | undefined {
18
+ return process.env.ANSPIRE_API_KEY;
19
+ }
20
+
21
+ private get baseUrl(): string {
22
+ // Assuming the base URL is consistent with the crawl endpoint
23
+ return 'https://plugin.anspire.cn/api';
24
+ }
25
+
26
+ async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
27
+ log('Starting Anspire query with query: "%s", params: %o', query, params);
28
+ const endpoint = urlJoin(this.baseUrl, '/ntsearch/search');
29
+
30
+ const defaultQueryParams: AnspireSearchParameters = {
31
+ mode: 0,
32
+ query,
33
+ top_k: 20,
34
+ };
35
+
36
+ let body: AnspireSearchParameters = {
37
+ ...defaultQueryParams,
38
+ ...(params?.searchTimeRange && params.searchTimeRange !== 'anytime'
39
+ ? (() => {
40
+ const now = Date.now();
41
+ const days = { day: 1, month: 30, week: 7, year: 365 }[params.searchTimeRange!];
42
+
43
+ if (days === undefined) return {};
44
+
45
+ return {
46
+ FromTime: new Date(now - days * 86_400 * 1000).toISOString().slice(0, 19).replace('T', ' '),
47
+ ToTime: new Date(now).toISOString().slice(0, 19).replace('T', ' '),
48
+ };
49
+ })()
50
+ : {}),
51
+ };
52
+
53
+ log('Constructed request body: %o', body);
54
+
55
+ const searchParams = new URLSearchParams();
56
+ for (const [key, value] of Object.entries(body)) {
57
+ searchParams.append(key, String(value));
58
+ }
59
+
60
+ let response: Response;
61
+ const startAt = Date.now();
62
+ let costTime = 0;
63
+ try {
64
+ log('Sending request to endpoint: %s', endpoint);
65
+ response = await fetch(`${endpoint}?${searchParams.toString()}`, {
66
+ headers: {
67
+ 'Accept': '*/*',
68
+ 'Authorization': this.apiKey ? `Bearer ${this.apiKey}` : '',
69
+ 'Connection': 'keep-alive ',
70
+ 'Content-Type': 'application/json',
71
+ },
72
+ method: 'GET',
73
+ });
74
+ log('Received response with status: %d', response.status);
75
+ costTime = Date.now() - startAt;
76
+ } catch (error) {
77
+ log.extend('error')('Anspire fetch error: %o', error);
78
+ throw new TRPCError({
79
+ cause: error,
80
+ code: 'SERVICE_UNAVAILABLE',
81
+ message: 'Failed to connect to Anspire.',
82
+ });
83
+ }
84
+
85
+ if (!response.ok) {
86
+ const errorBody = await response.text();
87
+ log.extend('error')(
88
+ `Anspire request failed with status ${response.status}: %s`,
89
+ errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
90
+ );
91
+ throw new TRPCError({
92
+ cause: errorBody,
93
+ code: 'SERVICE_UNAVAILABLE',
94
+ message: `Anspire request failed: ${response.statusText}`,
95
+ });
96
+ }
97
+
98
+ try {
99
+ const anspireResponse = (await response.json()) as AnspireResponse;
100
+
101
+ log('Parsed Anspire response: %o', anspireResponse);
102
+
103
+ const mappedResults = (anspireResponse.results || []).map(
104
+ (result): UniformSearchResult => ({
105
+ category: 'general', // Default category
106
+ content: result.content || '', // Prioritize content
107
+ engines: ['anspire'], // Use 'anspire' as the engine name
108
+ parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
109
+ score: result.score || 0, // Default score to 0 if undefined
110
+ title: result.title || '',
111
+ url: result.url,
112
+ }),
113
+ );
114
+
115
+ log('Mapped %d results to SearchResult format', mappedResults.length);
116
+
117
+ return {
118
+ costTime,
119
+ query: query,
120
+ resultNumbers: mappedResults.length,
121
+ results: mappedResults,
122
+ };
123
+ } catch (error) {
124
+ log.extend('error')('Error parsing Anspire response: %o', error);
125
+ throw new TRPCError({
126
+ cause: error,
127
+ code: 'INTERNAL_SERVER_ERROR',
128
+ message: 'Failed to parse Anspire response.',
129
+ });
130
+ }
131
+ }
132
+ }
@@ -0,0 +1,21 @@
1
+ export interface AnspireSearchParameters {
2
+ FromTime?: string;
3
+ Insite?: string;
4
+ ToTime?: string;
5
+ mode?: number;
6
+ query: string;
7
+ top_k?: number;
8
+ }
9
+
10
+ interface AnspireResults {
11
+ content?: string;
12
+ score?: number;
13
+ title: string;
14
+ url: string;
15
+ }
16
+
17
+ export interface AnspireResponse {
18
+ Uuid?: string;
19
+ query?: string;
20
+ results?: AnspireResults[];
21
+ }
@@ -0,0 +1,129 @@
1
+ import { TRPCError } from '@trpc/server';
2
+ import debug from 'debug';
3
+ import urlJoin from 'url-join';
4
+
5
+ import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
6
+
7
+ import { SearchServiceImpl } from '../type';
8
+ import { BraveSearchParameters, BraveResponse } from './type';
9
+
10
+ const log = debug('lobe-search:Brave');
11
+
12
+ const timeRangeMapping = {
13
+ day: 'pd',
14
+ month: 'pm',
15
+ week: 'pw',
16
+ year: 'py',
17
+ };
18
+
19
+ /**
20
+ * Brave implementation of the search service
21
+ * Primarily used for web crawling
22
+ */
23
+ export class BraveImpl implements SearchServiceImpl {
24
+ private get apiKey(): string | undefined {
25
+ return process.env.BRAVE_API_KEY;
26
+ }
27
+
28
+ private get baseUrl(): string {
29
+ // Assuming the base URL is consistent with the crawl endpoint
30
+ return 'https://api.search.brave.com/res/v1';
31
+ }
32
+
33
+ async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
34
+ log('Starting Brave query with query: "%s", params: %o', query, params);
35
+ const endpoint = urlJoin(this.baseUrl, '/web/search');
36
+
37
+ const defaultQueryParams: BraveSearchParameters = {
38
+ count: 15,
39
+ q: query,
40
+ result_filter: 'web',
41
+ };
42
+
43
+ let body: BraveSearchParameters = {
44
+ ...defaultQueryParams,
45
+ freshness:
46
+ params?.searchTimeRange && params.searchTimeRange !== 'anytime'
47
+ ? timeRangeMapping[params.searchTimeRange as keyof typeof timeRangeMapping] ?? undefined
48
+ : undefined,
49
+ };
50
+
51
+ log('Constructed request body: %o', body);
52
+
53
+ const searchParams = new URLSearchParams();
54
+ for (const [key, value] of Object.entries(body)) {
55
+ searchParams.append(key, String(value));
56
+ }
57
+
58
+ let response: Response;
59
+ const startAt = Date.now();
60
+ let costTime = 0;
61
+ try {
62
+ log('Sending request to endpoint: %s', endpoint);
63
+ response = await fetch(`${endpoint}?${searchParams.toString()}`, {
64
+ headers: {
65
+ 'Accept': 'application/json',
66
+ 'Accept-Encoding': 'gzip',
67
+ 'X-Subscription-Token': this.apiKey ? this.apiKey : '',
68
+ },
69
+ method: 'GET',
70
+ });
71
+ log('Received response with status: %d', response.status);
72
+ costTime = Date.now() - startAt;
73
+ } catch (error) {
74
+ log.extend('error')('Brave fetch error: %o', error);
75
+ throw new TRPCError({
76
+ cause: error,
77
+ code: 'SERVICE_UNAVAILABLE',
78
+ message: 'Failed to connect to Brave.',
79
+ });
80
+ }
81
+
82
+ if (!response.ok) {
83
+ const errorBody = await response.text();
84
+ log.extend('error')(
85
+ `Brave request failed with status ${response.status}: %s`,
86
+ errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
87
+ );
88
+ throw new TRPCError({
89
+ cause: errorBody,
90
+ code: 'SERVICE_UNAVAILABLE',
91
+ message: `Brave request failed: ${response.statusText}`,
92
+ });
93
+ }
94
+
95
+ try {
96
+ const braveResponse = (await response.json()) as BraveResponse;
97
+
98
+ log('Parsed Brave response: %o', braveResponse);
99
+
100
+ const mappedResults = (braveResponse.web.results || []).map(
101
+ (result): UniformSearchResult => ({
102
+ category: 'general', // Default category
103
+ content: result.description || '', // Prioritize content
104
+ engines: ['brave'], // Use 'brave' as the engine name
105
+ parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
106
+ score: 1, // Default score to 1
107
+ title: result.title || '',
108
+ url: result.url,
109
+ }),
110
+ );
111
+
112
+ log('Mapped %d results to SearchResult format', mappedResults.length);
113
+
114
+ return {
115
+ costTime,
116
+ query: query,
117
+ resultNumbers: mappedResults.length,
118
+ results: mappedResults,
119
+ };
120
+ } catch (error) {
121
+ log.extend('error')('Error parsing Brave response: %o', error);
122
+ throw new TRPCError({
123
+ cause: error,
124
+ code: 'INTERNAL_SERVER_ERROR',
125
+ message: 'Failed to parse Brave response.',
126
+ });
127
+ }
128
+ }
129
+ }
@@ -0,0 +1,58 @@
1
+ export interface BraveSearchParameters {
2
+ count?: number;
3
+ country?: string;
4
+ enable_rich_callback?: boolean;
5
+ extra_snippets?: boolean;
6
+ freshness?: string;
7
+ goggles?: string[];
8
+ goggles_id?: string;
9
+ offset?: number;
10
+ q: string;
11
+ result_filter?: string;
12
+ safesearch?: string;
13
+ search_lang?: string;
14
+ spellcheck?: boolean;
15
+ summary?: boolean;
16
+ text_decorations?: boolean;
17
+ ui_lang?: string;
18
+ units?: string;
19
+ }
20
+
21
+ interface BraveResults {
22
+ age?: string;
23
+ description: string;
24
+ family_friendly?: boolean;
25
+ is_live?: boolean;
26
+ is_source_both?: boolean;
27
+ is_source_local?: boolean;
28
+ language?: string;
29
+ meta_url?: any;
30
+ page_age?: string;
31
+ profile?: any;
32
+ subtype?: string;
33
+ thumbnail?: any;
34
+ title: string;
35
+ type: string;
36
+ url: string;
37
+ video?: any;
38
+ }
39
+
40
+ interface BraveVideos {
41
+ mutated_by_goggles?: boolean;
42
+ results: BraveResults[];
43
+ type: string;
44
+ }
45
+
46
+ interface BraveWeb {
47
+ family_friendly?: boolean;
48
+ results: BraveResults[];
49
+ type: string;
50
+ }
51
+
52
+ export interface BraveResponse {
53
+ mixed: any;
54
+ query?: any;
55
+ type: string;
56
+ videos?: BraveVideos;
57
+ web: BraveWeb;
58
+ }
@@ -0,0 +1,129 @@
1
+ import { TRPCError } from '@trpc/server';
2
+ import debug from 'debug';
3
+ import urlJoin from 'url-join';
4
+
5
+ import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
6
+
7
+ import { SearchServiceImpl } from '../type';
8
+ import { GoogleSearchParameters, GoogleResponse } from './type';
9
+
10
+ const log = debug('lobe-search:Google');
11
+
12
+ const timeRangeMapping = {
13
+ day: 'd1',
14
+ month: 'm1',
15
+ week: 'w1',
16
+ year: 'y1',
17
+ };
18
+
19
+ /**
20
+ * Google implementation of the search service
21
+ * Primarily used for web crawling
22
+ */
23
+ export class GoogleImpl implements SearchServiceImpl {
24
+ private get apiKey(): string | undefined {
25
+ return process.env.GOOGLE_PSE_API_KEY;
26
+ }
27
+
28
+ private get engineId(): string | undefined {
29
+ return process.env.GOOGLE_PSE_ENGINE_ID;
30
+ }
31
+
32
+ private get baseUrl(): string {
33
+ // Assuming the base URL is consistent with the crawl endpoint
34
+ return 'https://www.googleapis.com';
35
+ }
36
+
37
+ async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
38
+ log('Starting Google query with query: "%s", params: %o', query, params);
39
+ const endpoint = urlJoin(this.baseUrl, '/customsearch/v1');
40
+
41
+ const defaultQueryParams: GoogleSearchParameters = {
42
+ cx: this.engineId || '',
43
+ key: this.apiKey || '',
44
+ num: 10,
45
+ q: query,
46
+ };
47
+
48
+ let body: GoogleSearchParameters = {
49
+ ...defaultQueryParams,
50
+ dateRestrict:
51
+ params?.searchTimeRange && params.searchTimeRange !== 'anytime'
52
+ ? timeRangeMapping[params.searchTimeRange as keyof typeof timeRangeMapping] ?? undefined
53
+ : undefined,
54
+ };
55
+
56
+ log('Constructed request body: %o', body);
57
+
58
+ const searchParams = new URLSearchParams();
59
+ for (const [key, value] of Object.entries(body)) {
60
+ searchParams.append(key, String(value));
61
+ }
62
+
63
+ let response: Response;
64
+ const startAt = Date.now();
65
+ let costTime = 0;
66
+ try {
67
+ log('Sending request to endpoint: %s', endpoint);
68
+ response = await fetch(`${endpoint}?${searchParams.toString()}`, {
69
+ method: 'GET',
70
+ });
71
+ log('Received response with status: %d', response.status);
72
+ costTime = Date.now() - startAt;
73
+ } catch (error) {
74
+ log.extend('error')('Google fetch error: %o', error);
75
+ throw new TRPCError({
76
+ cause: error,
77
+ code: 'SERVICE_UNAVAILABLE',
78
+ message: 'Failed to connect to Google.',
79
+ });
80
+ }
81
+
82
+ if (!response.ok) {
83
+ const errorBody = await response.text();
84
+ log.extend('error')(
85
+ `Google request failed with status ${response.status}: %s`,
86
+ errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
87
+ );
88
+ throw new TRPCError({
89
+ cause: errorBody,
90
+ code: 'SERVICE_UNAVAILABLE',
91
+ message: `Google request failed: ${response.statusText}`,
92
+ });
93
+ }
94
+
95
+ try {
96
+ const googleResponse = (await response.json()) as GoogleResponse;
97
+
98
+ log('Parsed Google response: %o', googleResponse);
99
+
100
+ const mappedResults = (googleResponse.items || []).map(
101
+ (result): UniformSearchResult => ({
102
+ category: 'general', // Default category
103
+ content: result.snippet || '', // Prioritize content
104
+ engines: ['google'], // Use 'google' as the engine name
105
+ parsedUrl: result.link ? new URL(result.link).hostname : '', // Basic URL parsing
106
+ score: 1, // Default score to 1
107
+ title: result.title || '',
108
+ url: result.link,
109
+ }),
110
+ );
111
+
112
+ log('Mapped %d results to SearchResult format', mappedResults.length);
113
+
114
+ return {
115
+ costTime,
116
+ query: query,
117
+ resultNumbers: mappedResults.length,
118
+ results: mappedResults,
119
+ };
120
+ } catch (error) {
121
+ log.extend('error')('Error parsing Google response: %o', error);
122
+ throw new TRPCError({
123
+ cause: error,
124
+ code: 'INTERNAL_SERVER_ERROR',
125
+ message: 'Failed to parse Google response.',
126
+ });
127
+ }
128
+ }
129
+ }
@@ -0,0 +1,53 @@
1
+ export interface GoogleSearchParameters {
2
+ c2coff?: number;
3
+ cx: string;
4
+ dateRestrict?: string;
5
+ exactTerms?: string;
6
+ excludeTerms?: string;
7
+ fileType?: string;
8
+ filter?: string;
9
+ gl?: string;
10
+ highRange?: string;
11
+ hl?: string;
12
+ hq?: string;
13
+ imgColorType?: string;
14
+ imgDominantColor?: string;
15
+ imgSize?: string;
16
+ imgType?: string;
17
+ key: string;
18
+ linkSite?: string;
19
+ lowRange?: string;
20
+ lr?: string;
21
+ num?: number;
22
+ orTerms?: string;
23
+ q: string;
24
+ rights?: string;
25
+ safe?: string;
26
+ searchType?: string;
27
+ siteSearch?: string;
28
+ siteSearchFilter?: string;
29
+ sort?: string;
30
+ start?: string;
31
+ }
32
+
33
+ interface GoogleItems {
34
+ displayLink?: string;
35
+ formattedUrl?: string;
36
+ htmlFormattedUrl?: string;
37
+ htmlSnippet?: string;
38
+ htmlTitle?: string;
39
+ kind?: string;
40
+ link: string;
41
+ pagemap?: any;
42
+ snippet: string;
43
+ title: string;
44
+ }
45
+
46
+ export interface GoogleResponse {
47
+ context?: any;
48
+ items: GoogleItems[];
49
+ kind?: string;
50
+ queries?: any;
51
+ searchInformation?: any;
52
+ url?: any;
53
+ }
@@ -1,7 +1,11 @@
1
+ import { AnspireImpl } from './anspire';
1
2
  import { BochaImpl } from './bocha';
3
+ import { BraveImpl } from './brave';
2
4
  import { ExaImpl } from './exa';
3
5
  import { FirecrawlImpl } from './firecrawl';
6
+ import { GoogleImpl } from './google';
4
7
  import { JinaImpl } from './jina';
8
+ import { KagiImpl } from './kagi';
5
9
  import { Search1APIImpl } from './search1api';
6
10
  import { SearXNGImpl } from './searxng';
7
11
  import { TavilyImpl } from './tavily';
@@ -12,10 +16,14 @@ import { SearchServiceImpl } from './type';
12
16
  * Available search service implementations
13
17
  */
14
18
  export enum SearchImplType {
19
+ Anspire = 'anspire',
15
20
  Bocha = 'bocha',
21
+ Brave = 'brave',
16
22
  Exa = 'exa',
17
23
  Firecrawl = 'firecrawl',
24
+ Google = 'google',
18
25
  Jina = 'jina',
26
+ Kagi = 'kagi',
19
27
  SearXNG = 'searxng',
20
28
  Search1API = 'search1api',
21
29
  Tavily = 'tavily',
@@ -28,10 +36,18 @@ export const createSearchServiceImpl = (
28
36
  type: SearchImplType = SearchImplType.SearXNG,
29
37
  ): SearchServiceImpl => {
30
38
  switch (type) {
39
+ case SearchImplType.Anspire: {
40
+ return new AnspireImpl();
41
+ }
42
+
31
43
  case SearchImplType.Bocha: {
32
44
  return new BochaImpl();
33
45
  }
34
46
 
47
+ case SearchImplType.Brave: {
48
+ return new BraveImpl();
49
+ }
50
+
35
51
  case SearchImplType.Exa: {
36
52
  return new ExaImpl();
37
53
  }
@@ -40,10 +56,18 @@ export const createSearchServiceImpl = (
40
56
  return new FirecrawlImpl();
41
57
  }
42
58
 
59
+ case SearchImplType.Google: {
60
+ return new GoogleImpl();
61
+ }
62
+
43
63
  case SearchImplType.Jina: {
44
64
  return new JinaImpl();
45
65
  }
46
66
 
67
+ case SearchImplType.Kagi: {
68
+ return new KagiImpl();
69
+ }
70
+
47
71
  case SearchImplType.SearXNG: {
48
72
  return new SearXNGImpl();
49
73
  }
@@ -0,0 +1,111 @@
1
+ import { TRPCError } from '@trpc/server';
2
+ import debug from 'debug';
3
+ import urlJoin from 'url-join';
4
+
5
+ import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
6
+
7
+ import { SearchServiceImpl } from '../type';
8
+ import { KagiSearchParameters, KagiResponse } from './type';
9
+
10
+ const log = debug('lobe-search:Kagi');
11
+
12
+ /**
13
+ * Kagi implementation of the search service
14
+ * Primarily used for web crawling
15
+ */
16
+ export class KagiImpl implements SearchServiceImpl {
17
+ private get apiKey(): string | undefined {
18
+ return process.env.KAGI_API_KEY;
19
+ }
20
+
21
+ private get baseUrl(): string {
22
+ // Assuming the base URL is consistent with the crawl endpoint
23
+ return 'https://kagi.com/api/v0';
24
+ }
25
+
26
+ async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
27
+ log('Starting Kagi query with query: "%s", params: %o', query, params);
28
+ const endpoint = urlJoin(this.baseUrl, '/search');
29
+
30
+ const body: KagiSearchParameters = {
31
+ limit: 15,
32
+ q: query,
33
+ };
34
+
35
+ log('Constructed request body: %o', body);
36
+
37
+ const searchParams = new URLSearchParams();
38
+ for (const [key, value] of Object.entries(body)) {
39
+ searchParams.append(key, String(value));
40
+ }
41
+
42
+ let response: Response;
43
+ const startAt = Date.now();
44
+ let costTime = 0;
45
+ try {
46
+ log('Sending request to endpoint: %s', endpoint);
47
+ response = await fetch(`${endpoint}?${searchParams.toString()}`, {
48
+ headers: {
49
+ 'Authorization': this.apiKey ? `Bot ${this.apiKey}` : '',
50
+ },
51
+ method: 'GET',
52
+ });
53
+ log('Received response with status: %d', response.status);
54
+ costTime = Date.now() - startAt;
55
+ } catch (error) {
56
+ log.extend('error')('Kagi fetch error: %o', error);
57
+ throw new TRPCError({
58
+ cause: error,
59
+ code: 'SERVICE_UNAVAILABLE',
60
+ message: 'Failed to connect to Kagi.',
61
+ });
62
+ }
63
+
64
+ if (!response.ok) {
65
+ const errorBody = await response.text();
66
+ log.extend('error')(
67
+ `Kagi request failed with status ${response.status}: %s`,
68
+ errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
69
+ );
70
+ throw new TRPCError({
71
+ cause: errorBody,
72
+ code: 'SERVICE_UNAVAILABLE',
73
+ message: `Kagi request failed: ${response.statusText}`,
74
+ });
75
+ }
76
+
77
+ try {
78
+ const kagiResponse = (await response.json()) as KagiResponse;
79
+
80
+ log('Parsed Kagi response: %o', kagiResponse);
81
+
82
+ const mappedResults = (kagiResponse.data || []).map(
83
+ (result): UniformSearchResult => ({
84
+ category: 'general', // Default category
85
+ content: result.snippet || '', // Prioritize content
86
+ engines: ['kagi'], // Use 'kagi' as the engine name
87
+ parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
88
+ score: 1, // Default score to 1
89
+ title: result.title || '',
90
+ url: result.url,
91
+ }),
92
+ );
93
+
94
+ log('Mapped %d results to SearchResult format', mappedResults.length);
95
+
96
+ return {
97
+ costTime,
98
+ query: query,
99
+ resultNumbers: mappedResults.length,
100
+ results: mappedResults,
101
+ };
102
+ } catch (error) {
103
+ log.extend('error')('Error parsing Kagi response: %o', error);
104
+ throw new TRPCError({
105
+ cause: error,
106
+ code: 'INTERNAL_SERVER_ERROR',
107
+ message: 'Failed to parse Kagi response.',
108
+ });
109
+ }
110
+ }
111
+ }
@@ -0,0 +1,24 @@
1
+ export interface KagiSearchParameters {
2
+ limit?: number;
3
+ q: string;
4
+ }
5
+
6
+ interface KagiThumbnail {
7
+ height?: number | null;
8
+ url: string;
9
+ width?: number | null;
10
+ }
11
+
12
+ interface KagiData {
13
+ published?: number;
14
+ snippet?: string;
15
+ t: number;
16
+ thumbnail?: KagiThumbnail;
17
+ title: string;
18
+ url: string;
19
+ }
20
+
21
+ export interface KagiResponse {
22
+ data: KagiData[];
23
+ meta?: any;
24
+ }