@lobehub/chat 1.90.4 → 1.91.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/changelog/v1.json +12 -0
- package/package.json +1 -1
- package/packages/web-crawler/src/crawImpl/exa.ts +93 -0
- package/packages/web-crawler/src/crawImpl/firecrawl.ts +97 -0
- package/packages/web-crawler/src/crawImpl/index.ts +6 -0
- package/packages/web-crawler/src/crawImpl/tavily.ts +94 -0
- package/src/config/aiModels/modelscope.ts +3 -3
- package/src/config/modelProviders/modelscope.ts +3 -3
- package/src/database/client/migrations.json +10 -0
- package/src/database/migrations/0023_remove_param_and_doubao.sql +6 -0
- package/src/database/migrations/meta/0023_snapshot.json +5340 -0
- package/src/database/migrations/meta/_journal.json +7 -0
- package/src/server/services/search/impls/bocha/index.ts +124 -0
- package/src/server/services/search/impls/bocha/type.ts +47 -0
- package/src/server/services/search/impls/exa/index.ts +129 -0
- package/src/server/services/search/impls/exa/type.ts +39 -0
- package/src/server/services/search/impls/firecrawl/index.ts +128 -0
- package/src/server/services/search/impls/firecrawl/type.ts +35 -0
- package/src/server/services/search/impls/index.ts +31 -0
- package/src/server/services/search/impls/jina/index.ts +109 -0
- package/src/server/services/search/impls/jina/type.ts +26 -0
- package/src/server/services/search/impls/tavily/index.ts +124 -0
- package/src/server/services/search/impls/tavily/type.ts +36 -0
@@ -161,6 +161,13 @@
|
|
161
161
|
"when": 1746724476380,
|
162
162
|
"tag": "0022_add_documents",
|
163
163
|
"breakpoints": true
|
164
|
+
},
|
165
|
+
{
|
166
|
+
"idx": 23,
|
167
|
+
"version": "7",
|
168
|
+
"when": 1748925630721,
|
169
|
+
"tag": "0023_remove_param_and_doubao",
|
170
|
+
"breakpoints": true
|
164
171
|
}
|
165
172
|
],
|
166
173
|
"version": "6"
|
@@ -0,0 +1,124 @@
|
|
1
|
+
import { TRPCError } from '@trpc/server';
|
2
|
+
import debug from 'debug';
|
3
|
+
import urlJoin from 'url-join';
|
4
|
+
|
5
|
+
import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
|
6
|
+
|
7
|
+
import { SearchServiceImpl } from '../type';
|
8
|
+
import { BochaSearchParameters, BochaResponse } from './type';
|
9
|
+
|
10
|
+
const log = debug('lobe-search:Bocha');
|
11
|
+
|
12
|
+
const timeRangeMapping = {
|
13
|
+
day: 'oneDay',
|
14
|
+
month: 'oneMonth',
|
15
|
+
week: 'oneWeek',
|
16
|
+
year: 'oneYear',
|
17
|
+
};
|
18
|
+
|
19
|
+
/**
|
20
|
+
* Bocha implementation of the search service
|
21
|
+
* Primarily used for web crawling
|
22
|
+
*/
|
23
|
+
export class BochaImpl implements SearchServiceImpl {
|
24
|
+
private get apiKey(): string | undefined {
|
25
|
+
return process.env.BOCHA_API_KEY;
|
26
|
+
}
|
27
|
+
|
28
|
+
private get baseUrl(): string {
|
29
|
+
// Assuming the base URL is consistent with the crawl endpoint
|
30
|
+
return 'https://api.bochaai.com/v1';
|
31
|
+
}
|
32
|
+
|
33
|
+
async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
|
34
|
+
log('Starting Bocha query with query: "%s", params: %o', query, params);
|
35
|
+
const endpoint = urlJoin(this.baseUrl, '/web-search');
|
36
|
+
|
37
|
+
const defaultQueryParams: BochaSearchParameters = {
|
38
|
+
count: 15,
|
39
|
+
query,
|
40
|
+
summary: true,
|
41
|
+
};
|
42
|
+
|
43
|
+
let body: BochaSearchParameters = {
|
44
|
+
...defaultQueryParams,
|
45
|
+
freshness:
|
46
|
+
params?.searchTimeRange && params.searchTimeRange !== 'anytime'
|
47
|
+
? timeRangeMapping[params.searchTimeRange as keyof typeof timeRangeMapping] ?? undefined
|
48
|
+
: undefined,
|
49
|
+
};
|
50
|
+
|
51
|
+
log('Constructed request body: %o', body);
|
52
|
+
|
53
|
+
let response: Response;
|
54
|
+
const startAt = Date.now();
|
55
|
+
let costTime = 0;
|
56
|
+
try {
|
57
|
+
log('Sending request to endpoint: %s', endpoint);
|
58
|
+
response = await fetch(endpoint, {
|
59
|
+
body: JSON.stringify(body),
|
60
|
+
headers: {
|
61
|
+
'Authorization': this.apiKey ? `Bearer ${this.apiKey}` : '',
|
62
|
+
'Content-Type': 'application/json',
|
63
|
+
},
|
64
|
+
method: 'POST',
|
65
|
+
});
|
66
|
+
log('Received response with status: %d', response.status);
|
67
|
+
costTime = Date.now() - startAt;
|
68
|
+
} catch (error) {
|
69
|
+
log.extend('error')('Bocha fetch error: %o', error);
|
70
|
+
throw new TRPCError({
|
71
|
+
cause: error,
|
72
|
+
code: 'SERVICE_UNAVAILABLE',
|
73
|
+
message: 'Failed to connect to Bocha.',
|
74
|
+
});
|
75
|
+
}
|
76
|
+
|
77
|
+
if (!response.ok) {
|
78
|
+
const errorBody = await response.text();
|
79
|
+
log.extend('error')(
|
80
|
+
`Bocha request failed with status ${response.status}: %s`,
|
81
|
+
errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
|
82
|
+
);
|
83
|
+
throw new TRPCError({
|
84
|
+
cause: errorBody,
|
85
|
+
code: 'SERVICE_UNAVAILABLE',
|
86
|
+
message: `Bocha request failed: ${response.statusText}`,
|
87
|
+
});
|
88
|
+
}
|
89
|
+
|
90
|
+
try {
|
91
|
+
const bochaResponse = (await response.json()) as BochaResponse;
|
92
|
+
|
93
|
+
log('Parsed Bocha response: %o', bochaResponse);
|
94
|
+
|
95
|
+
const mappedResults = (bochaResponse.data.webPages.value || []).map(
|
96
|
+
(result): UniformSearchResult => ({
|
97
|
+
category: 'general', // Default category
|
98
|
+
content: result.summary || result.snippet || '', // Prioritize content, fallback to snippet
|
99
|
+
engines: ['bocha'], // Use 'bocha' as the engine name
|
100
|
+
parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
|
101
|
+
score: 1, // Default score to 1
|
102
|
+
title: result.name || '',
|
103
|
+
url: result.url,
|
104
|
+
}),
|
105
|
+
);
|
106
|
+
|
107
|
+
log('Mapped %d results to SearchResult format', mappedResults.length);
|
108
|
+
|
109
|
+
return {
|
110
|
+
costTime,
|
111
|
+
query: query,
|
112
|
+
resultNumbers: mappedResults.length,
|
113
|
+
results: mappedResults,
|
114
|
+
};
|
115
|
+
} catch (error) {
|
116
|
+
log.extend('error')('Error parsing Bocha response: %o', error);
|
117
|
+
throw new TRPCError({
|
118
|
+
cause: error,
|
119
|
+
code: 'INTERNAL_SERVER_ERROR',
|
120
|
+
message: 'Failed to parse Bocha response.',
|
121
|
+
});
|
122
|
+
}
|
123
|
+
}
|
124
|
+
}
|
@@ -0,0 +1,47 @@
|
|
1
|
+
export interface BochaSearchParameters {
|
2
|
+
count?: number;
|
3
|
+
exclude?: string;
|
4
|
+
freshness?: string;
|
5
|
+
include?: string;
|
6
|
+
query: string;
|
7
|
+
summary?: boolean;
|
8
|
+
}
|
9
|
+
|
10
|
+
interface BochaQueryContext {
|
11
|
+
originalQuery: string;
|
12
|
+
}
|
13
|
+
|
14
|
+
interface BochaValue {
|
15
|
+
cachedPageUrl?: string;
|
16
|
+
dateLastCrawled?: string;
|
17
|
+
displayUrl?: string;
|
18
|
+
id?: string | null;
|
19
|
+
isFamilyFriendly?: boolean;
|
20
|
+
isNavigational?: boolean;
|
21
|
+
language?: string;
|
22
|
+
name: string;
|
23
|
+
siteName?: string;
|
24
|
+
snippet?: string;
|
25
|
+
summary?: string;
|
26
|
+
url: string;
|
27
|
+
}
|
28
|
+
|
29
|
+
interface BochaWebPages {
|
30
|
+
totalEstimatedMatches?: number;
|
31
|
+
value?: BochaValue[];
|
32
|
+
webSearchUrl?: string;
|
33
|
+
}
|
34
|
+
|
35
|
+
interface BochaData {
|
36
|
+
images?: any;
|
37
|
+
queryContext?: BochaQueryContext;
|
38
|
+
videos?: any;
|
39
|
+
webPages: BochaWebPages;
|
40
|
+
}
|
41
|
+
|
42
|
+
export interface BochaResponse {
|
43
|
+
code?: number;
|
44
|
+
data: BochaData;
|
45
|
+
log_id?: string;
|
46
|
+
msg?: string | null;
|
47
|
+
}
|
@@ -0,0 +1,129 @@
|
|
1
|
+
import { TRPCError } from '@trpc/server';
|
2
|
+
import debug from 'debug';
|
3
|
+
import urlJoin from 'url-join';
|
4
|
+
|
5
|
+
import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
|
6
|
+
|
7
|
+
import { SearchServiceImpl } from '../type';
|
8
|
+
import { ExaSearchParameters, ExaResponse } from './type';
|
9
|
+
|
10
|
+
const log = debug('lobe-search:Exa');
|
11
|
+
|
12
|
+
/**
|
13
|
+
* Exa implementation of the search service
|
14
|
+
* Primarily used for web crawling
|
15
|
+
*/
|
16
|
+
export class ExaImpl implements SearchServiceImpl {
|
17
|
+
private get apiKey(): string | undefined {
|
18
|
+
return process.env.EXA_API_KEY;
|
19
|
+
}
|
20
|
+
|
21
|
+
private get baseUrl(): string {
|
22
|
+
// Assuming the base URL is consistent with the crawl endpoint
|
23
|
+
return 'https://api.exa.ai';
|
24
|
+
}
|
25
|
+
|
26
|
+
async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
|
27
|
+
log('Starting Exa query with query: "%s", params: %o', query, params);
|
28
|
+
const endpoint = urlJoin(this.baseUrl, '/search');
|
29
|
+
|
30
|
+
const defaultQueryParams: ExaSearchParameters = {
|
31
|
+
numResults: 15,
|
32
|
+
query,
|
33
|
+
type: 'auto',
|
34
|
+
};
|
35
|
+
|
36
|
+
let body: ExaSearchParameters = {
|
37
|
+
...defaultQueryParams,
|
38
|
+
...(params?.searchTimeRange && params.searchTimeRange !== 'anytime'
|
39
|
+
? (() => {
|
40
|
+
const now = Date.now();
|
41
|
+
const days = { day: 1, month: 30, week: 7, year: 365 }[params.searchTimeRange!];
|
42
|
+
|
43
|
+
if (days === undefined) return {};
|
44
|
+
|
45
|
+
return {
|
46
|
+
endPublishedDate: new Date(now).toISOString(),
|
47
|
+
startPublishedDate: new Date(now - days * 86_400 * 1000).toISOString(),
|
48
|
+
};
|
49
|
+
})()
|
50
|
+
: {}),
|
51
|
+
category:
|
52
|
+
// Exa 只支持 news 类型
|
53
|
+
params?.searchCategories?.filter(cat => ['news'].includes(cat))?.[0],
|
54
|
+
};
|
55
|
+
|
56
|
+
log('Constructed request body: %o', body);
|
57
|
+
|
58
|
+
let response: Response;
|
59
|
+
const startAt = Date.now();
|
60
|
+
let costTime = 0;
|
61
|
+
try {
|
62
|
+
log('Sending request to endpoint: %s', endpoint);
|
63
|
+
response = await fetch(endpoint, {
|
64
|
+
body: JSON.stringify(body),
|
65
|
+
headers: {
|
66
|
+
'Content-Type': 'application/json',
|
67
|
+
'x-api-key': this.apiKey ? this.apiKey : '',
|
68
|
+
},
|
69
|
+
method: 'POST',
|
70
|
+
});
|
71
|
+
log('Received response with status: %d', response.status);
|
72
|
+
costTime = Date.now() - startAt;
|
73
|
+
} catch (error) {
|
74
|
+
log.extend('error')('Exa fetch error: %o', error);
|
75
|
+
throw new TRPCError({
|
76
|
+
cause: error,
|
77
|
+
code: 'SERVICE_UNAVAILABLE',
|
78
|
+
message: 'Failed to connect to Exa.',
|
79
|
+
});
|
80
|
+
}
|
81
|
+
|
82
|
+
if (!response.ok) {
|
83
|
+
const errorBody = await response.text();
|
84
|
+
log.extend('error')(
|
85
|
+
`Exa request failed with status ${response.status}: %s`,
|
86
|
+
errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
|
87
|
+
);
|
88
|
+
throw new TRPCError({
|
89
|
+
cause: errorBody,
|
90
|
+
code: 'SERVICE_UNAVAILABLE',
|
91
|
+
message: `Exa request failed: ${response.statusText}`,
|
92
|
+
});
|
93
|
+
}
|
94
|
+
|
95
|
+
try {
|
96
|
+
const exaResponse = (await response.json()) as ExaResponse;
|
97
|
+
|
98
|
+
log('Parsed Exa response: %o', exaResponse);
|
99
|
+
|
100
|
+
const mappedResults = (exaResponse.results || []).map(
|
101
|
+
(result): UniformSearchResult => ({
|
102
|
+
category: body.category || 'general', // Default category
|
103
|
+
content: result.text || '', // Prioritize content, fallback to snippet
|
104
|
+
engines: ['exa'], // Use 'exa' as the engine name
|
105
|
+
parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
|
106
|
+
score: result.score || 0, // Default score to 0 if undefined
|
107
|
+
title: result.title || '',
|
108
|
+
url: result.url,
|
109
|
+
}),
|
110
|
+
);
|
111
|
+
|
112
|
+
log('Mapped %d results to SearchResult format', mappedResults.length);
|
113
|
+
|
114
|
+
return {
|
115
|
+
costTime,
|
116
|
+
query: query,
|
117
|
+
resultNumbers: mappedResults.length,
|
118
|
+
results: mappedResults,
|
119
|
+
};
|
120
|
+
} catch (error) {
|
121
|
+
log.extend('error')('Error parsing Exa response: %o', error);
|
122
|
+
throw new TRPCError({
|
123
|
+
cause: error,
|
124
|
+
code: 'INTERNAL_SERVER_ERROR',
|
125
|
+
message: 'Failed to parse Exa response.',
|
126
|
+
});
|
127
|
+
}
|
128
|
+
}
|
129
|
+
}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
export interface ExaSearchParameters {
|
2
|
+
category?: string;
|
3
|
+
endCrawlDate?: string;
|
4
|
+
endPublishedDate?: string;
|
5
|
+
excludeDomains?: string[];
|
6
|
+
excludeText?: string[];
|
7
|
+
includeDomains?: string[];
|
8
|
+
includeText?: string[];
|
9
|
+
numResults?: number;
|
10
|
+
query: string;
|
11
|
+
startCrawlDate?: string;
|
12
|
+
startPublishedDate?: string;
|
13
|
+
type?: string;
|
14
|
+
}
|
15
|
+
|
16
|
+
interface ExaCostDollars {
|
17
|
+
total: number;
|
18
|
+
}
|
19
|
+
|
20
|
+
interface ExaResults {
|
21
|
+
author?: string | null;
|
22
|
+
favicon?: string;
|
23
|
+
id?: string;
|
24
|
+
image?: string;
|
25
|
+
publishedDate?: string | null;
|
26
|
+
score?: number | null;
|
27
|
+
summery?: string;
|
28
|
+
text: string;
|
29
|
+
title: string;
|
30
|
+
url: string;
|
31
|
+
}
|
32
|
+
|
33
|
+
export interface ExaResponse {
|
34
|
+
costDollars?: ExaCostDollars;
|
35
|
+
requestId?: string;
|
36
|
+
resolvedSearchType?: string;
|
37
|
+
results: ExaResults[];
|
38
|
+
searchType?: string;
|
39
|
+
}
|
@@ -0,0 +1,128 @@
|
|
1
|
+
import { TRPCError } from '@trpc/server';
|
2
|
+
import debug from 'debug';
|
3
|
+
import urlJoin from 'url-join';
|
4
|
+
|
5
|
+
import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
|
6
|
+
|
7
|
+
import { SearchServiceImpl } from '../type';
|
8
|
+
import { FirecrawlSearchParameters, FirecrawlResponse } from './type';
|
9
|
+
|
10
|
+
const log = debug('lobe-search:Firecrawl');
|
11
|
+
|
12
|
+
const timeRangeMapping = {
|
13
|
+
day: 'qdr:d',
|
14
|
+
month: 'qdr:m',
|
15
|
+
week: 'qdr:w',
|
16
|
+
year: 'qdr:y',
|
17
|
+
};
|
18
|
+
|
19
|
+
/**
|
20
|
+
* Firecrawl implementation of the search service
|
21
|
+
* Primarily used for web crawling
|
22
|
+
*/
|
23
|
+
export class FirecrawlImpl implements SearchServiceImpl {
|
24
|
+
private get apiKey(): string | undefined {
|
25
|
+
return process.env.FIRECRAWL_API_KEY;
|
26
|
+
}
|
27
|
+
|
28
|
+
private get baseUrl(): string {
|
29
|
+
// Assuming the base URL is consistent with the crawl endpoint
|
30
|
+
return process.env.FIRECRAWL_URL || 'https://api.firecrawl.dev/v1';
|
31
|
+
}
|
32
|
+
|
33
|
+
async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
|
34
|
+
log('Starting Firecrawl query with query: "%s", params: %o', query, params);
|
35
|
+
const endpoint = urlJoin(this.baseUrl, '/search');
|
36
|
+
|
37
|
+
const defaultQueryParams: FirecrawlSearchParameters = {
|
38
|
+
limit: 15,
|
39
|
+
query,
|
40
|
+
/*
|
41
|
+
scrapeOptions: {
|
42
|
+
formats: ["markdown"]
|
43
|
+
},
|
44
|
+
*/
|
45
|
+
};
|
46
|
+
|
47
|
+
let body: FirecrawlSearchParameters = {
|
48
|
+
...defaultQueryParams,
|
49
|
+
tbs:
|
50
|
+
params?.searchTimeRange && params.searchTimeRange !== 'anytime'
|
51
|
+
? timeRangeMapping[params.searchTimeRange as keyof typeof timeRangeMapping] ?? undefined
|
52
|
+
: undefined,
|
53
|
+
};
|
54
|
+
|
55
|
+
log('Constructed request body: %o', body);
|
56
|
+
|
57
|
+
let response: Response;
|
58
|
+
const startAt = Date.now();
|
59
|
+
let costTime = 0;
|
60
|
+
try {
|
61
|
+
log('Sending request to endpoint: %s', endpoint);
|
62
|
+
response = await fetch(endpoint, {
|
63
|
+
body: JSON.stringify(body),
|
64
|
+
headers: {
|
65
|
+
'Authorization': this.apiKey ? `Bearer ${this.apiKey}` : '',
|
66
|
+
'Content-Type': 'application/json',
|
67
|
+
},
|
68
|
+
method: 'POST',
|
69
|
+
});
|
70
|
+
log('Received response with status: %d', response.status);
|
71
|
+
costTime = Date.now() - startAt;
|
72
|
+
} catch (error) {
|
73
|
+
log.extend('error')('Firecrawl fetch error: %o', error);
|
74
|
+
throw new TRPCError({
|
75
|
+
cause: error,
|
76
|
+
code: 'SERVICE_UNAVAILABLE',
|
77
|
+
message: 'Failed to connect to Firecrawl.',
|
78
|
+
});
|
79
|
+
}
|
80
|
+
|
81
|
+
if (!response.ok) {
|
82
|
+
const errorBody = await response.text();
|
83
|
+
log.extend('error')(
|
84
|
+
`Firecrawl request failed with status ${response.status}: %s`,
|
85
|
+
errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
|
86
|
+
);
|
87
|
+
throw new TRPCError({
|
88
|
+
cause: errorBody,
|
89
|
+
code: 'SERVICE_UNAVAILABLE',
|
90
|
+
message: `Firecrawl request failed: ${response.statusText}`,
|
91
|
+
});
|
92
|
+
}
|
93
|
+
|
94
|
+
try {
|
95
|
+
const firecrawlResponse = (await response.json()) as FirecrawlResponse;
|
96
|
+
|
97
|
+
log('Parsed Firecrawl response: %o', firecrawlResponse);
|
98
|
+
|
99
|
+
const mappedResults = (firecrawlResponse.data || []).map(
|
100
|
+
(result): UniformSearchResult => ({
|
101
|
+
category: 'general', // Default category
|
102
|
+
content: result.description || '', // Prioritize content, fallback to snippet
|
103
|
+
engines: ['firecrawl'], // Use 'firecrawl' as the engine name
|
104
|
+
parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
|
105
|
+
score: 1, // Default score to 1
|
106
|
+
title: result.title || '',
|
107
|
+
url: result.url,
|
108
|
+
}),
|
109
|
+
);
|
110
|
+
|
111
|
+
log('Mapped %d results to SearchResult format', mappedResults.length);
|
112
|
+
|
113
|
+
return {
|
114
|
+
costTime,
|
115
|
+
query: query,
|
116
|
+
resultNumbers: mappedResults.length,
|
117
|
+
results: mappedResults,
|
118
|
+
};
|
119
|
+
} catch (error) {
|
120
|
+
log.extend('error')('Error parsing Firecrawl response: %o', error);
|
121
|
+
throw new TRPCError({
|
122
|
+
cause: error,
|
123
|
+
code: 'INTERNAL_SERVER_ERROR',
|
124
|
+
message: 'Failed to parse Firecrawl response.',
|
125
|
+
});
|
126
|
+
}
|
127
|
+
}
|
128
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
interface FirecrawlScrapeOptions {
|
2
|
+
formats: string[];
|
3
|
+
}
|
4
|
+
|
5
|
+
export interface FirecrawlSearchParameters {
|
6
|
+
country?: string;
|
7
|
+
lang?: string;
|
8
|
+
limit?: number;
|
9
|
+
query: string;
|
10
|
+
scrapeOptions?: FirecrawlScrapeOptions;
|
11
|
+
tbs?: string;
|
12
|
+
timeout?: number;
|
13
|
+
}
|
14
|
+
|
15
|
+
interface FirecrawlMetadata {
|
16
|
+
description?: string;
|
17
|
+
sourceURL?: string;
|
18
|
+
statusCode?: number;
|
19
|
+
title: string;
|
20
|
+
}
|
21
|
+
|
22
|
+
interface FirecrawlData {
|
23
|
+
description?: string;
|
24
|
+
html?: string;
|
25
|
+
links?: string[];
|
26
|
+
markdown?: string;
|
27
|
+
metadata?: FirecrawlMetadata;
|
28
|
+
title?: string;
|
29
|
+
url: string;
|
30
|
+
}
|
31
|
+
|
32
|
+
export interface FirecrawlResponse {
|
33
|
+
data: FirecrawlData[];
|
34
|
+
success?: boolean;
|
35
|
+
}
|
@@ -1,13 +1,24 @@
|
|
1
|
+
import { BochaImpl } from './bocha';
|
2
|
+
import { ExaImpl } from './exa';
|
3
|
+
import { FirecrawlImpl } from './firecrawl';
|
4
|
+
import { JinaImpl } from './jina';
|
1
5
|
import { Search1APIImpl } from './search1api';
|
2
6
|
import { SearXNGImpl } from './searxng';
|
7
|
+
import { TavilyImpl } from './tavily';
|
8
|
+
|
3
9
|
import { SearchServiceImpl } from './type';
|
4
10
|
|
5
11
|
/**
|
6
12
|
* Available search service implementations
|
7
13
|
*/
|
8
14
|
export enum SearchImplType {
|
15
|
+
Bocha = 'bocha',
|
16
|
+
Exa = 'exa',
|
17
|
+
Firecrawl = 'firecrawl',
|
18
|
+
Jina = 'jina',
|
9
19
|
SearXNG = 'searxng',
|
10
20
|
Search1API = 'search1api',
|
21
|
+
Tavily = 'tavily',
|
11
22
|
}
|
12
23
|
|
13
24
|
/**
|
@@ -17,10 +28,30 @@ export const createSearchServiceImpl = (
|
|
17
28
|
type: SearchImplType = SearchImplType.SearXNG,
|
18
29
|
): SearchServiceImpl => {
|
19
30
|
switch (type) {
|
31
|
+
case SearchImplType.Bocha: {
|
32
|
+
return new BochaImpl();
|
33
|
+
}
|
34
|
+
|
35
|
+
case SearchImplType.Exa: {
|
36
|
+
return new ExaImpl();
|
37
|
+
}
|
38
|
+
|
39
|
+
case SearchImplType.Firecrawl: {
|
40
|
+
return new FirecrawlImpl();
|
41
|
+
}
|
42
|
+
|
43
|
+
case SearchImplType.Jina: {
|
44
|
+
return new JinaImpl();
|
45
|
+
}
|
46
|
+
|
20
47
|
case SearchImplType.SearXNG: {
|
21
48
|
return new SearXNGImpl();
|
22
49
|
}
|
23
50
|
|
51
|
+
case SearchImplType.Tavily: {
|
52
|
+
return new TavilyImpl();
|
53
|
+
}
|
54
|
+
|
24
55
|
default: {
|
25
56
|
return new Search1APIImpl();
|
26
57
|
}
|
@@ -0,0 +1,109 @@
|
|
1
|
+
import { TRPCError } from '@trpc/server';
|
2
|
+
import debug from 'debug';
|
3
|
+
import urlJoin from 'url-join';
|
4
|
+
|
5
|
+
import { SearchParams, UniformSearchResponse, UniformSearchResult } from '@/types/tool/search';
|
6
|
+
|
7
|
+
import { SearchServiceImpl } from '../type';
|
8
|
+
import { JinaSearchParameters, JinaResponse } from './type';
|
9
|
+
|
10
|
+
const log = debug('lobe-search:Jina');
|
11
|
+
|
12
|
+
/**
|
13
|
+
* Jina implementation of the search service
|
14
|
+
* Primarily used for web crawling
|
15
|
+
*/
|
16
|
+
export class JinaImpl implements SearchServiceImpl {
|
17
|
+
private get apiKey(): string | undefined {
|
18
|
+
return process.env.JINA_READER_API_KEY || process.env.JINA_API_KEY;
|
19
|
+
}
|
20
|
+
|
21
|
+
private get baseUrl(): string {
|
22
|
+
// Assuming the base URL is consistent with the crawl endpoint
|
23
|
+
return 'https://s.jina.ai';
|
24
|
+
}
|
25
|
+
|
26
|
+
async query(query: string, params: SearchParams = {}): Promise<UniformSearchResponse> {
|
27
|
+
log('Starting Jina query with query: "%s", params: %o', query, params);
|
28
|
+
const endpoint = urlJoin(this.baseUrl, '/');
|
29
|
+
|
30
|
+
let body: JinaSearchParameters = {
|
31
|
+
q: query,
|
32
|
+
};
|
33
|
+
|
34
|
+
log('Constructed request body: %o', body);
|
35
|
+
|
36
|
+
let response: Response;
|
37
|
+
const startAt = Date.now();
|
38
|
+
let costTime = 0;
|
39
|
+
try {
|
40
|
+
log('Sending request to endpoint: %s', endpoint);
|
41
|
+
response = await fetch(endpoint, {
|
42
|
+
body: JSON.stringify(body),
|
43
|
+
headers: {
|
44
|
+
'Accept': 'application/json',
|
45
|
+
'Authorization': this.apiKey ? `Bearer ${this.apiKey}` : '',
|
46
|
+
'Content-Type': 'application/json',
|
47
|
+
'X-Respond-With': 'no-content',
|
48
|
+
},
|
49
|
+
method: 'POST',
|
50
|
+
});
|
51
|
+
log('Received response with status: %d', response.status);
|
52
|
+
costTime = Date.now() - startAt;
|
53
|
+
} catch (error) {
|
54
|
+
log.extend('error')('Jina fetch error: %o', error);
|
55
|
+
throw new TRPCError({
|
56
|
+
cause: error,
|
57
|
+
code: 'SERVICE_UNAVAILABLE',
|
58
|
+
message: 'Failed to connect to Jina.',
|
59
|
+
});
|
60
|
+
}
|
61
|
+
|
62
|
+
if (!response.ok) {
|
63
|
+
const errorBody = await response.text();
|
64
|
+
log.extend('error')(
|
65
|
+
`Jina request failed with status ${response.status}: %s`,
|
66
|
+
errorBody.length > 200 ? `${errorBody.slice(0, 200)}...` : errorBody,
|
67
|
+
);
|
68
|
+
throw new TRPCError({
|
69
|
+
cause: errorBody,
|
70
|
+
code: 'SERVICE_UNAVAILABLE',
|
71
|
+
message: `Jina request failed: ${response.statusText}`,
|
72
|
+
});
|
73
|
+
}
|
74
|
+
|
75
|
+
try {
|
76
|
+
const jinaResponse = (await response.json()) as JinaResponse;
|
77
|
+
|
78
|
+
log('Parsed Jina response: %o', jinaResponse);
|
79
|
+
|
80
|
+
const mappedResults = (jinaResponse.data || []).map(
|
81
|
+
(result): UniformSearchResult => ({
|
82
|
+
category: 'general', // Default category
|
83
|
+
content: result.description || '', // Prioritize content, fallback to snippet
|
84
|
+
engines: ['jina'], // Use 'jina' as the engine name
|
85
|
+
parsedUrl: result.url ? new URL(result.url).hostname : '', // Basic URL parsing
|
86
|
+
score: 1, // Default score to 1
|
87
|
+
title: result.title || '',
|
88
|
+
url: result.url,
|
89
|
+
}),
|
90
|
+
);
|
91
|
+
|
92
|
+
log('Mapped %d results to SearchResult format', mappedResults.length);
|
93
|
+
|
94
|
+
return {
|
95
|
+
costTime,
|
96
|
+
query: query,
|
97
|
+
resultNumbers: mappedResults.length,
|
98
|
+
results: mappedResults,
|
99
|
+
};
|
100
|
+
} catch (error) {
|
101
|
+
log.extend('error')('Error parsing Jina response: %o', error);
|
102
|
+
throw new TRPCError({
|
103
|
+
cause: error,
|
104
|
+
code: 'INTERNAL_SERVER_ERROR',
|
105
|
+
message: 'Failed to parse Jina response.',
|
106
|
+
});
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|