termsearch 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -3
- package/frontend/dist/app.js +212 -12
- package/frontend/dist/style.css +126 -3
- package/package.json +1 -1
- package/src/api/routes.js +112 -20
- package/src/search/engine.js +404 -73
- package/src/search/providers/github.js +91 -0
- package/src/search/providers/searxng.js +15 -5
package/src/search/engine.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// Search orchestrator — fan-out to
|
|
1
|
+
// Search orchestrator — fan-out to enabled providers, merge, rank, cache
|
|
2
2
|
|
|
3
3
|
import path from 'path';
|
|
4
4
|
import { makeTieredCache, searchCacheKey } from './cache.js';
|
|
@@ -8,74 +8,140 @@ import * as wikipedia from './providers/wikipedia.js';
|
|
|
8
8
|
import * as brave from './providers/brave.js';
|
|
9
9
|
import * as mojeek from './providers/mojeek.js';
|
|
10
10
|
import * as searxng from './providers/searxng.js';
|
|
11
|
+
import * as github from './providers/github.js';
|
|
11
12
|
|
|
12
13
|
let _searchCache = null;
|
|
13
14
|
let _docCache = null;
|
|
14
|
-
let _dataDir = null;
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
_searchCache = makeTieredCache(
|
|
20
|
-
sc.cache_l1_max_search,
|
|
21
|
-
path.join(dataDir, 'cache', 'search'),
|
|
22
|
-
sc.disk_max_search_entries,
|
|
23
|
-
sc.disk_max_search_bytes,
|
|
24
|
-
);
|
|
25
|
-
_docCache = makeTieredCache(
|
|
26
|
-
sc.cache_l1_max_docs,
|
|
27
|
-
path.join(dataDir, 'cache', 'docs'),
|
|
28
|
-
sc.disk_max_doc_entries,
|
|
29
|
-
sc.disk_max_doc_bytes,
|
|
30
|
-
);
|
|
31
|
-
}
|
|
16
|
+
const ENGINE_HEALTH_HISTORY_LIMIT = 16;
|
|
17
|
+
const ENGINE_HEALTH_STORE = new Map();
|
|
18
|
+
const INFLIGHT_SEARCH_STORE = new Map();
|
|
32
19
|
|
|
33
|
-
export
|
|
34
|
-
|
|
35
|
-
|
|
20
|
+
export const ALLOWED_ENGINES = new Set([
|
|
21
|
+
'brave',
|
|
22
|
+
'duckduckgo',
|
|
23
|
+
'startpage',
|
|
24
|
+
'qwant',
|
|
25
|
+
'mojeek',
|
|
26
|
+
'google',
|
|
27
|
+
'bing',
|
|
28
|
+
'yahoo',
|
|
29
|
+
'gigablast',
|
|
30
|
+
'yacy',
|
|
31
|
+
'wikipedia',
|
|
32
|
+
'wikidata',
|
|
33
|
+
'reddit',
|
|
34
|
+
'github',
|
|
35
|
+
'youtube',
|
|
36
|
+
'hackernews',
|
|
37
|
+
'mastodon users',
|
|
38
|
+
'mastodon hashtags',
|
|
39
|
+
'tootfinder',
|
|
40
|
+
'lemmy communities',
|
|
41
|
+
'lemmy users',
|
|
42
|
+
'lemmy posts',
|
|
43
|
+
'lemmy comments',
|
|
44
|
+
'lobste.rs',
|
|
45
|
+
'sepiasearch',
|
|
46
|
+
'crossref',
|
|
47
|
+
'openalex',
|
|
48
|
+
'openlibrary',
|
|
49
|
+
'1337x',
|
|
50
|
+
'piratebay',
|
|
51
|
+
'nyaa',
|
|
52
|
+
// local aliases for direct providers
|
|
53
|
+
'ddg',
|
|
54
|
+
'wiki',
|
|
55
|
+
'searxng',
|
|
56
|
+
'searx',
|
|
57
|
+
'github-api',
|
|
58
|
+
]);
|
|
59
|
+
|
|
60
|
+
const CURATED_WEB_ENGINES = ['bing', 'startpage', 'yahoo', 'mojeek', 'github', 'reddit', 'youtube', 'hackernews'];
|
|
36
61
|
|
|
37
62
|
const PROVIDER_REGISTRY = {
|
|
38
63
|
duckduckgo: {
|
|
39
64
|
aliases: new Set(['duckduckgo', 'ddg']),
|
|
40
65
|
enabled: (_cfg) => true,
|
|
41
66
|
run: ddg.search,
|
|
67
|
+
defaultProvider: true,
|
|
42
68
|
},
|
|
43
69
|
wikipedia: {
|
|
44
70
|
aliases: new Set(['wikipedia', 'wiki']),
|
|
45
71
|
enabled: (_cfg) => true,
|
|
46
72
|
run: wikipedia.search,
|
|
73
|
+
defaultProvider: true,
|
|
47
74
|
},
|
|
48
75
|
brave: {
|
|
49
76
|
aliases: new Set(['brave']),
|
|
50
77
|
enabled: (cfg) => Boolean(cfg.brave?.enabled && cfg.brave?.api_key),
|
|
51
78
|
run: brave.search,
|
|
79
|
+
defaultProvider: true,
|
|
52
80
|
},
|
|
53
81
|
mojeek: {
|
|
54
82
|
aliases: new Set(['mojeek']),
|
|
55
83
|
enabled: (cfg) => Boolean(cfg.mojeek?.enabled && cfg.mojeek?.api_key),
|
|
56
84
|
run: mojeek.search,
|
|
85
|
+
defaultProvider: true,
|
|
57
86
|
},
|
|
58
87
|
searxng: {
|
|
59
88
|
aliases: new Set(['searxng', 'searx']),
|
|
60
89
|
enabled: (cfg) => Boolean(cfg.searxng?.enabled && cfg.searxng?.url),
|
|
61
90
|
run: searxng.search,
|
|
91
|
+
defaultProvider: true,
|
|
92
|
+
},
|
|
93
|
+
github: {
|
|
94
|
+
aliases: new Set(['github', 'github-api']),
|
|
95
|
+
enabled: (_cfg) => true,
|
|
96
|
+
run: github.search,
|
|
97
|
+
defaultProvider: false,
|
|
62
98
|
},
|
|
63
99
|
};
|
|
64
100
|
|
|
101
|
+
export function initCaches(dataDir, cfg) {
|
|
102
|
+
const sc = cfg.search;
|
|
103
|
+
_searchCache = makeTieredCache(
|
|
104
|
+
sc.cache_l1_max_search,
|
|
105
|
+
path.join(dataDir, 'cache', 'search'),
|
|
106
|
+
sc.disk_max_search_entries,
|
|
107
|
+
sc.disk_max_search_bytes,
|
|
108
|
+
);
|
|
109
|
+
_docCache = makeTieredCache(
|
|
110
|
+
sc.cache_l1_max_docs,
|
|
111
|
+
path.join(dataDir, 'cache', 'docs'),
|
|
112
|
+
sc.disk_max_doc_entries,
|
|
113
|
+
sc.disk_max_doc_bytes,
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export function getDocCache() {
|
|
118
|
+
return _docCache;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function normalizeEngineName(engine) {
|
|
122
|
+
return String(engine || '').trim().toLowerCase();
|
|
123
|
+
}
|
|
124
|
+
|
|
65
125
|
function normalizeRequestedEngines(input) {
|
|
66
126
|
if (!Array.isArray(input)) return [];
|
|
67
127
|
return [...new Set(
|
|
68
128
|
input
|
|
69
|
-
.map((item) =>
|
|
129
|
+
.map((item) => normalizeEngineName(item))
|
|
70
130
|
.filter(Boolean)
|
|
71
131
|
)];
|
|
72
132
|
}
|
|
73
133
|
|
|
74
|
-
function resolveProviderPlan(cfg, requestedEngines = []) {
|
|
134
|
+
function resolveProviderPlan(cfg, requestedEngines = [], category = 'web') {
|
|
75
135
|
const requested = normalizeRequestedEngines(requestedEngines);
|
|
76
136
|
const enabledProviders = Object.keys(PROVIDER_REGISTRY).filter((name) => PROVIDER_REGISTRY[name].enabled(cfg));
|
|
137
|
+
|
|
138
|
+
const defaultProviders = enabledProviders.filter((name) => PROVIDER_REGISTRY[name].defaultProvider !== false);
|
|
139
|
+
|
|
77
140
|
if (requested.length === 0) {
|
|
78
|
-
return {
|
|
141
|
+
return {
|
|
142
|
+
providers: defaultProviders,
|
|
143
|
+
searxEngines: category === 'web' && defaultProviders.includes('searxng') ? CURATED_WEB_ENGINES.slice() : [],
|
|
144
|
+
};
|
|
79
145
|
}
|
|
80
146
|
|
|
81
147
|
const explicitProviders = new Set();
|
|
@@ -96,35 +162,243 @@ function resolveProviderPlan(cfg, requestedEngines = []) {
|
|
|
96
162
|
|
|
97
163
|
const providers = [...explicitProviders].filter((name) => enabledProviders.includes(name));
|
|
98
164
|
if (providers.length === 0) {
|
|
99
|
-
return {
|
|
165
|
+
return {
|
|
166
|
+
providers: defaultProviders,
|
|
167
|
+
searxEngines: category === 'web' && defaultProviders.includes('searxng') ? CURATED_WEB_ENGINES.slice() : [],
|
|
168
|
+
};
|
|
100
169
|
}
|
|
170
|
+
|
|
101
171
|
return { providers, searxEngines };
|
|
102
172
|
}
|
|
103
173
|
|
|
104
|
-
|
|
174
|
+
function classifyEngineFailure(reason) {
|
|
175
|
+
const raw = String(reason || '').toLowerCase();
|
|
176
|
+
if (!raw) return null;
|
|
177
|
+
if (raw.includes('captcha')) return 'captcha';
|
|
178
|
+
if (raw.includes('429') || raw.includes('too many')) return 'too_many_requests';
|
|
179
|
+
if (raw.includes('403') || raw.includes('access denied')) return 'access_denied';
|
|
180
|
+
if (raw.includes('timeout') || raw.includes('aborted')) return 'timeout';
|
|
181
|
+
if (raw.includes('unreachable') || raw.includes('network')) return 'network';
|
|
182
|
+
return 'other';
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function recordEngineOutcome(engine, ok, reason = null) {
|
|
186
|
+
const key = normalizeEngineName(engine);
|
|
187
|
+
if (!key) return;
|
|
188
|
+
|
|
189
|
+
const entry = ENGINE_HEALTH_STORE.get(key) || {
|
|
190
|
+
history: [],
|
|
191
|
+
failureKinds: {},
|
|
192
|
+
lastFailure: null,
|
|
193
|
+
updatedAt: 0,
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
entry.history.push(ok ? 1 : 0);
|
|
197
|
+
if (entry.history.length > ENGINE_HEALTH_HISTORY_LIMIT) entry.history.shift();
|
|
198
|
+
|
|
199
|
+
if (!ok && reason) {
|
|
200
|
+
const kind = classifyEngineFailure(reason) || 'other';
|
|
201
|
+
entry.failureKinds[kind] = Number(entry.failureKinds[kind] || 0) + 1;
|
|
202
|
+
entry.lastFailure = kind;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
entry.updatedAt = Date.now();
|
|
206
|
+
ENGINE_HEALTH_STORE.set(key, entry);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function getEngineHealth(engine) {
|
|
210
|
+
const key = normalizeEngineName(engine);
|
|
211
|
+
const entry = ENGINE_HEALTH_STORE.get(key);
|
|
212
|
+
if (!entry || entry.history.length === 0) {
|
|
213
|
+
return { status: 'unknown', samples: 0, successRate: null, penalty: 0, lastFailure: null, failureKinds: {} };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const samples = entry.history.length;
|
|
217
|
+
const successes = entry.history.reduce((sum, v) => sum + v, 0);
|
|
218
|
+
const successRate = successes / samples;
|
|
219
|
+
|
|
220
|
+
if (samples >= 2 && successRate < 0.5) {
|
|
221
|
+
return {
|
|
222
|
+
status: 'poor',
|
|
223
|
+
samples,
|
|
224
|
+
successRate: Number(successRate.toFixed(2)),
|
|
225
|
+
penalty: 1.2,
|
|
226
|
+
lastFailure: entry.lastFailure,
|
|
227
|
+
failureKinds: entry.failureKinds,
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (successRate < 0.75) {
|
|
232
|
+
return {
|
|
233
|
+
status: 'unstable',
|
|
234
|
+
samples,
|
|
235
|
+
successRate: Number(successRate.toFixed(2)),
|
|
236
|
+
penalty: 0.55,
|
|
237
|
+
lastFailure: entry.lastFailure,
|
|
238
|
+
failureKinds: entry.failureKinds,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return {
|
|
243
|
+
status: 'healthy',
|
|
244
|
+
samples,
|
|
245
|
+
successRate: Number(successRate.toFixed(2)),
|
|
246
|
+
penalty: 0,
|
|
247
|
+
lastFailure: entry.lastFailure,
|
|
248
|
+
failureKinds: entry.failureKinds,
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function getEngineHealthSummary(engines = []) {
|
|
253
|
+
const out = {};
|
|
254
|
+
for (const engine of engines) {
|
|
255
|
+
const key = normalizeEngineName(engine);
|
|
256
|
+
if (!key || out[key]) continue;
|
|
257
|
+
out[key] = getEngineHealth(key);
|
|
258
|
+
}
|
|
259
|
+
return out;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function withInflightSearch(key, factory) {
|
|
263
|
+
if (INFLIGHT_SEARCH_STORE.has(key)) return INFLIGHT_SEARCH_STORE.get(key);
|
|
264
|
+
const promise = Promise.resolve()
|
|
265
|
+
.then(factory)
|
|
266
|
+
.finally(() => {
|
|
267
|
+
if (INFLIGHT_SEARCH_STORE.get(key) === promise) INFLIGHT_SEARCH_STORE.delete(key);
|
|
268
|
+
});
|
|
269
|
+
INFLIGHT_SEARCH_STORE.set(key, promise);
|
|
270
|
+
return promise;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function normalizeProviderPayload(payload) {
|
|
274
|
+
if (Array.isArray(payload)) return { results: payload, meta: {} };
|
|
275
|
+
if (payload && typeof payload === 'object') {
|
|
276
|
+
return {
|
|
277
|
+
results: Array.isArray(payload.results) ? payload.results : [],
|
|
278
|
+
meta: payload._meta || {},
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
return { results: [], meta: { error: 'provider_invalid_payload' } };
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
async function runProviderDetailed(name, args) {
|
|
105
285
|
const provider = PROVIDER_REGISTRY[name];
|
|
106
|
-
if (!provider)
|
|
286
|
+
if (!provider) {
|
|
287
|
+
return {
|
|
288
|
+
name,
|
|
289
|
+
results: [],
|
|
290
|
+
respondedEngines: [],
|
|
291
|
+
failedEngines: [name],
|
|
292
|
+
failedDetails: [{ engine: name, reason: 'provider_not_found' }],
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
|
|
107
296
|
try {
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
297
|
+
const payload = normalizeProviderPayload(await provider.run(args));
|
|
298
|
+
const results = payload.results;
|
|
299
|
+
const meta = payload.meta || {};
|
|
300
|
+
|
|
301
|
+
const responded = new Set();
|
|
302
|
+
const failed = new Set();
|
|
303
|
+
const failedDetails = [];
|
|
304
|
+
|
|
305
|
+
if (name === 'searxng') {
|
|
306
|
+
const unresponsive = Array.isArray(meta.unresponsive) ? meta.unresponsive.map((engine) => normalizeEngineName(engine)).filter(Boolean) : [];
|
|
307
|
+
const unresponsiveDetails = Array.isArray(meta.unresponsiveDetails) ? meta.unresponsiveDetails : [];
|
|
308
|
+
|
|
309
|
+
for (const item of results) {
|
|
310
|
+
const eng = normalizeEngineName(item?.engine);
|
|
311
|
+
if (!eng || unresponsive.includes(eng)) continue;
|
|
312
|
+
responded.add(eng);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
for (const engine of unresponsive) {
|
|
316
|
+
failed.add(engine);
|
|
317
|
+
const detail = unresponsiveDetails.find((entry) => normalizeEngineName(entry?.engine) === engine);
|
|
318
|
+
failedDetails.push({ engine, reason: String(detail?.reason || 'unresponsive') });
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
if (meta.error) {
|
|
322
|
+
failed.add('searxng');
|
|
323
|
+
failedDetails.push({ engine: 'searxng', reason: String(meta.error) });
|
|
324
|
+
}
|
|
325
|
+
} else if (meta.error) {
|
|
326
|
+
failed.add(name);
|
|
327
|
+
failedDetails.push({ engine: name, reason: String(meta.error) });
|
|
328
|
+
} else {
|
|
329
|
+
responded.add(name);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
for (const engine of responded) recordEngineOutcome(engine, true);
|
|
333
|
+
for (const detail of failedDetails) recordEngineOutcome(detail.engine, false, detail.reason);
|
|
334
|
+
|
|
335
|
+
return {
|
|
336
|
+
name,
|
|
337
|
+
results,
|
|
338
|
+
respondedEngines: [...responded],
|
|
339
|
+
failedEngines: [...failed],
|
|
340
|
+
failedDetails,
|
|
341
|
+
};
|
|
342
|
+
} catch (error) {
|
|
343
|
+
const reason = String(error?.message || 'provider_failed');
|
|
344
|
+
recordEngineOutcome(name, false, reason);
|
|
345
|
+
return {
|
|
346
|
+
name,
|
|
347
|
+
results: [],
|
|
348
|
+
respondedEngines: [],
|
|
349
|
+
failedEngines: [name],
|
|
350
|
+
failedDetails: [{ engine: name, reason }],
|
|
351
|
+
};
|
|
111
352
|
}
|
|
112
353
|
}
|
|
113
354
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
355
|
+
function cacheEngineList(providerList, searxEngines = []) {
|
|
356
|
+
const searx = searxEngines.map((engine) => `searx:${engine}`);
|
|
357
|
+
return [...providerList, ...searx];
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
function buildEngineStats(respondedEngines = [], failedEngines = [], failedDetails = []) {
|
|
361
|
+
const responded = [...new Set(respondedEngines.map((engine) => normalizeEngineName(engine)).filter(Boolean))];
|
|
362
|
+
const failed = [...new Set(failedEngines.map((engine) => normalizeEngineName(engine)).filter(Boolean))];
|
|
363
|
+
const details = failedDetails
|
|
364
|
+
.map((item) => ({
|
|
365
|
+
engine: normalizeEngineName(item?.engine),
|
|
366
|
+
reason: String(item?.reason || ''),
|
|
367
|
+
}))
|
|
368
|
+
.filter((item) => item.engine);
|
|
369
|
+
|
|
370
|
+
const health = getEngineHealthSummary([...responded, ...failed]);
|
|
371
|
+
const unstable = Object.entries(health)
|
|
372
|
+
.filter(([, meta]) => meta.status === 'unstable' || meta.status === 'poor')
|
|
373
|
+
.map(([engine]) => engine)
|
|
374
|
+
.sort();
|
|
375
|
+
|
|
376
|
+
return {
|
|
377
|
+
responded,
|
|
378
|
+
failed,
|
|
379
|
+
failedDetails: details,
|
|
380
|
+
degraded: failed.length > 0,
|
|
381
|
+
unstable,
|
|
382
|
+
health,
|
|
383
|
+
};
|
|
384
|
+
}
|
|
117
385
|
|
|
118
|
-
|
|
386
|
+
async function runSearchBatch({ query, lang, safe, page, category, engines, cfg }) {
|
|
387
|
+
const plan = resolveProviderPlan(cfg, engines, category);
|
|
119
388
|
const providerList = plan.providers;
|
|
120
389
|
const timeoutMs = cfg.search.timeout_ms;
|
|
121
|
-
const cacheEngines = providerList.length ? providerList : ['none'];
|
|
122
|
-
const cacheKey = searchCacheKey(query, lang, safe, cacheEngines, 'full', category, page);
|
|
123
|
-
const cached = _searchCache.get(cacheKey);
|
|
124
|
-
if (cached) return cached;
|
|
125
390
|
|
|
126
|
-
|
|
127
|
-
|
|
391
|
+
if (providerList.length === 0) {
|
|
392
|
+
return {
|
|
393
|
+
results: [],
|
|
394
|
+
providers: [],
|
|
395
|
+
engineStats: buildEngineStats([], [], []),
|
|
396
|
+
category,
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
const runs = await Promise.all(providerList.map((providerName) =>
|
|
401
|
+
runProviderDetailed(providerName, {
|
|
128
402
|
query,
|
|
129
403
|
lang,
|
|
130
404
|
safe,
|
|
@@ -134,39 +408,66 @@ export async function search({ query, lang = 'en-US', safe = '1', page = 1, cate
|
|
|
134
408
|
timeoutMs,
|
|
135
409
|
engines: providerName === 'searxng' ? plan.searxEngines : [],
|
|
136
410
|
})
|
|
137
|
-
);
|
|
138
|
-
|
|
139
|
-
const allResults = await Promise.all(tasks);
|
|
411
|
+
));
|
|
140
412
|
|
|
141
|
-
// Merge all provider results
|
|
142
413
|
let merged = [];
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
414
|
+
const responded = [];
|
|
415
|
+
const failed = [];
|
|
416
|
+
const failedDetails = [];
|
|
146
417
|
|
|
147
|
-
|
|
148
|
-
|
|
418
|
+
for (const run of runs) {
|
|
419
|
+
merged = mergeSearchResultSets(merged, run.results);
|
|
420
|
+
responded.push(...run.respondedEngines);
|
|
421
|
+
failed.push(...run.failedEngines);
|
|
422
|
+
failedDetails.push(...run.failedDetails);
|
|
423
|
+
}
|
|
149
424
|
|
|
150
|
-
|
|
151
|
-
results:
|
|
152
|
-
query,
|
|
153
|
-
lang,
|
|
154
|
-
page: Number(page),
|
|
155
|
-
total: ranked.length,
|
|
425
|
+
return {
|
|
426
|
+
results: rankResultsBySourceDiversity(merged),
|
|
156
427
|
providers: providerList,
|
|
428
|
+
engineStats: buildEngineStats(responded, failed, failedDetails),
|
|
157
429
|
category,
|
|
430
|
+
searxEngines: plan.searxEngines,
|
|
158
431
|
};
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// Run a search across enabled providers and return merged, ranked results
|
|
435
|
+
export async function search({ query, lang = 'en-US', safe = '1', page = 1, category = 'web', engines = [] }, cfg) {
|
|
436
|
+
if (!_searchCache) throw new Error('Caches not initialized — call initCaches() first');
|
|
437
|
+
|
|
438
|
+
const plan = resolveProviderPlan(cfg, engines, category);
|
|
439
|
+
const cacheEngines = cacheEngineList(plan.providers, plan.searxEngines);
|
|
440
|
+
const cacheKey = searchCacheKey(query, lang, safe, cacheEngines.length ? cacheEngines : ['none'], 'full', category, page);
|
|
441
|
+
const cached = _searchCache.get(cacheKey);
|
|
442
|
+
if (cached) return cached;
|
|
443
|
+
|
|
444
|
+
const response = await withInflightSearch(`search:${cacheKey}`, async () => {
|
|
445
|
+
const fresh = await runSearchBatch({ query, lang, safe, page, category, engines, cfg });
|
|
446
|
+
return {
|
|
447
|
+
results: fresh.results,
|
|
448
|
+
query,
|
|
449
|
+
lang,
|
|
450
|
+
page: Number(page),
|
|
451
|
+
total: fresh.results.length,
|
|
452
|
+
providers: fresh.providers,
|
|
453
|
+
category,
|
|
454
|
+
degraded: fresh.engineStats.degraded,
|
|
455
|
+
engineStats: fresh.engineStats,
|
|
456
|
+
};
|
|
457
|
+
});
|
|
159
458
|
|
|
160
459
|
_searchCache.set(cacheKey, response, cfg.search.cache_ttl_search_ms);
|
|
161
460
|
return response;
|
|
162
461
|
}
|
|
163
462
|
|
|
164
|
-
// Streaming search: returns fast results first
|
|
463
|
+
// Streaming search: returns fast results first, then merged full results
|
|
165
464
|
export async function* searchStream({ query, lang = 'en-US', safe = '1', page = 1, category = 'web', engines = [] }, cfg) {
|
|
166
|
-
const plan = resolveProviderPlan(cfg, engines);
|
|
465
|
+
const plan = resolveProviderPlan(cfg, engines, category);
|
|
167
466
|
const providerList = plan.providers;
|
|
467
|
+
|
|
168
468
|
if (providerList.length === 0) {
|
|
169
|
-
|
|
469
|
+
const emptyStats = buildEngineStats([], [], []);
|
|
470
|
+
yield { tier: 'full', results: [], providers: [], degraded: emptyStats.degraded, engineStats: emptyStats };
|
|
170
471
|
return;
|
|
171
472
|
}
|
|
172
473
|
|
|
@@ -174,7 +475,8 @@ export async function* searchStream({ query, lang = 'en-US', safe = '1', page =
|
|
|
174
475
|
const fastProvider = providerList.includes('duckduckgo')
|
|
175
476
|
? 'duckduckgo'
|
|
176
477
|
: providerList[0];
|
|
177
|
-
|
|
478
|
+
|
|
479
|
+
const fastRun = await runProviderDetailed(fastProvider, {
|
|
178
480
|
query,
|
|
179
481
|
lang,
|
|
180
482
|
safe,
|
|
@@ -184,13 +486,23 @@ export async function* searchStream({ query, lang = 'en-US', safe = '1', page =
|
|
|
184
486
|
timeoutMs,
|
|
185
487
|
engines: fastProvider === 'searxng' ? plan.searxEngines : [],
|
|
186
488
|
});
|
|
187
|
-
|
|
188
|
-
|
|
489
|
+
|
|
490
|
+
const fastRanked = rankResultsBySourceDiversity(fastRun.results);
|
|
491
|
+
const fastStats = buildEngineStats(fastRun.respondedEngines, fastRun.failedEngines, fastRun.failedDetails);
|
|
492
|
+
|
|
493
|
+
if (fastRanked.length > 0) {
|
|
494
|
+
yield {
|
|
495
|
+
tier: 'fast',
|
|
496
|
+
results: fastRanked,
|
|
497
|
+
providers: [fastProvider],
|
|
498
|
+
degraded: fastStats.degraded,
|
|
499
|
+
engineStats: fastStats,
|
|
500
|
+
};
|
|
189
501
|
}
|
|
190
502
|
|
|
191
503
|
const remainingProviders = providerList.filter((name) => name !== fastProvider);
|
|
192
|
-
const
|
|
193
|
-
|
|
504
|
+
const additionalRuns = await Promise.all(remainingProviders.map((providerName) =>
|
|
505
|
+
runProviderDetailed(providerName, {
|
|
194
506
|
query,
|
|
195
507
|
lang,
|
|
196
508
|
safe,
|
|
@@ -200,26 +512,44 @@ export async function* searchStream({ query, lang = 'en-US', safe = '1', page =
|
|
|
200
512
|
timeoutMs,
|
|
201
513
|
engines: providerName === 'searxng' ? plan.searxEngines : [],
|
|
202
514
|
})
|
|
203
|
-
);
|
|
515
|
+
));
|
|
204
516
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
517
|
+
let full = fastRun.results.slice();
|
|
518
|
+
const responded = [...fastRun.respondedEngines];
|
|
519
|
+
const failed = [...fastRun.failedEngines];
|
|
520
|
+
const failedDetails = [...fastRun.failedDetails];
|
|
521
|
+
|
|
522
|
+
for (const run of additionalRuns) {
|
|
523
|
+
full = mergeSearchResultSets(full, run.results);
|
|
524
|
+
responded.push(...run.respondedEngines);
|
|
525
|
+
failed.push(...run.failedEngines);
|
|
526
|
+
failedDetails.push(...run.failedDetails);
|
|
209
527
|
}
|
|
528
|
+
|
|
210
529
|
const fullRanked = rankResultsBySourceDiversity(full);
|
|
530
|
+
const engineStats = buildEngineStats(responded, failed, failedDetails);
|
|
211
531
|
|
|
212
|
-
|
|
213
|
-
const
|
|
214
|
-
const cacheKey = searchCacheKey(query, lang, safe, cacheEngines, 'full', category, page);
|
|
532
|
+
const cacheEngines = cacheEngineList(providerList, plan.searxEngines);
|
|
533
|
+
const cacheKey = searchCacheKey(query, lang, safe, cacheEngines.length ? cacheEngines : ['none'], 'full', category, page);
|
|
215
534
|
_searchCache?.set(cacheKey, {
|
|
216
535
|
results: fullRanked,
|
|
217
|
-
query,
|
|
536
|
+
query,
|
|
537
|
+
lang,
|
|
538
|
+
page: Number(page),
|
|
539
|
+
total: fullRanked.length,
|
|
218
540
|
providers: providerList,
|
|
219
541
|
category,
|
|
542
|
+
degraded: engineStats.degraded,
|
|
543
|
+
engineStats,
|
|
220
544
|
}, cfg.search.cache_ttl_search_ms);
|
|
221
545
|
|
|
222
|
-
yield {
|
|
546
|
+
yield {
|
|
547
|
+
tier: 'full',
|
|
548
|
+
results: fullRanked,
|
|
549
|
+
providers: providerList,
|
|
550
|
+
degraded: engineStats.degraded,
|
|
551
|
+
engineStats,
|
|
552
|
+
};
|
|
223
553
|
}
|
|
224
554
|
|
|
225
555
|
export function getEnabledProviders(cfg) {
|
|
@@ -227,5 +557,6 @@ export function getEnabledProviders(cfg) {
|
|
|
227
557
|
if (cfg.brave?.enabled && cfg.brave?.api_key) providers.push('brave');
|
|
228
558
|
if (cfg.mojeek?.enabled && cfg.mojeek?.api_key) providers.push('mojeek');
|
|
229
559
|
if (cfg.searxng?.enabled && cfg.searxng?.url) providers.push('searxng');
|
|
560
|
+
providers.push('github-api');
|
|
230
561
|
return providers;
|
|
231
562
|
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
// GitHub Search API provider — optional fallback when SearXNG is unavailable.
|
|
2
|
+
// Works without token (rate-limited by GitHub); token can be set with TERMSEARCH_GITHUB_TOKEN.
|
|
3
|
+
|
|
4
|
+
const GITHUB_API = 'https://api.github.com';
|
|
5
|
+
|
|
6
|
+
function buildHeaders(config = {}) {
|
|
7
|
+
const token = process.env.TERMSEARCH_GITHUB_TOKEN || config?.github?.api_key || '';
|
|
8
|
+
const headers = {
|
|
9
|
+
Accept: 'application/vnd.github+json',
|
|
10
|
+
'User-Agent': 'TermSearch/1.0',
|
|
11
|
+
};
|
|
12
|
+
if (token) headers.Authorization = `Bearer ${token}`;
|
|
13
|
+
return headers;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
async function fetchJson(url, { headers, timeoutMs = 12000 }) {
|
|
17
|
+
const ac = new AbortController();
|
|
18
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
19
|
+
try {
|
|
20
|
+
const response = await fetch(url, { headers, signal: ac.signal });
|
|
21
|
+
if (!response.ok) {
|
|
22
|
+
const body = await response.text().catch(() => '');
|
|
23
|
+
throw new Error(`github_http_${response.status}:${body.slice(0, 140)}`);
|
|
24
|
+
}
|
|
25
|
+
return response.json();
|
|
26
|
+
} finally {
|
|
27
|
+
clearTimeout(timer);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function mapRepo(item) {
|
|
32
|
+
const stars = Number(item?.stargazers_count || 0);
|
|
33
|
+
const forks = Number(item?.forks_count || 0);
|
|
34
|
+
const lang = String(item?.language || '').trim();
|
|
35
|
+
const desc = String(item?.description || '').trim();
|
|
36
|
+
const metaParts = [];
|
|
37
|
+
if (lang) metaParts.push(lang);
|
|
38
|
+
metaParts.push(`★ ${stars}`);
|
|
39
|
+
metaParts.push(`forks ${forks}`);
|
|
40
|
+
const meta = metaParts.join(' · ');
|
|
41
|
+
return {
|
|
42
|
+
title: item?.full_name || item?.name || 'GitHub repository',
|
|
43
|
+
url: item?.html_url || '',
|
|
44
|
+
snippet: desc ? `${desc}${meta ? ` — ${meta}` : ''}` : meta,
|
|
45
|
+
engine: 'github-api',
|
|
46
|
+
score: 1.0 + Math.min(stars / 10000, 1.0),
|
|
47
|
+
publishedDate: item?.updated_at || item?.pushed_at || null,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function mapUser(item) {
|
|
52
|
+
return {
|
|
53
|
+
title: item?.login ? `@${item.login} · GitHub` : 'GitHub user',
|
|
54
|
+
url: item?.html_url || '',
|
|
55
|
+
snippet: item?.type ? `${item.type} profile on GitHub` : 'GitHub profile',
|
|
56
|
+
engine: 'github-api',
|
|
57
|
+
score: 0.8,
|
|
58
|
+
publishedDate: null,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export async function search({ query, page = 1, config, timeoutMs = 12000 }) {
|
|
63
|
+
const q = String(query || '').trim();
|
|
64
|
+
if (!q) return [];
|
|
65
|
+
|
|
66
|
+
const headers = buildHeaders(config);
|
|
67
|
+
const pageNo = Math.max(1, Number(page) || 1);
|
|
68
|
+
const repoParams = new URLSearchParams({
|
|
69
|
+
q,
|
|
70
|
+
per_page: '8',
|
|
71
|
+
page: String(pageNo),
|
|
72
|
+
sort: 'stars',
|
|
73
|
+
order: 'desc',
|
|
74
|
+
});
|
|
75
|
+
const usersParams = new URLSearchParams({
|
|
76
|
+
q,
|
|
77
|
+
per_page: '4',
|
|
78
|
+
page: String(pageNo),
|
|
79
|
+
sort: 'followers',
|
|
80
|
+
order: 'desc',
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
const [reposData, usersData] = await Promise.all([
|
|
84
|
+
fetchJson(`${GITHUB_API}/search/repositories?${repoParams.toString()}`, { headers, timeoutMs }),
|
|
85
|
+
fetchJson(`${GITHUB_API}/search/users?${usersParams.toString()}`, { headers, timeoutMs }),
|
|
86
|
+
]);
|
|
87
|
+
|
|
88
|
+
const repos = Array.isArray(reposData?.items) ? reposData.items.map(mapRepo).filter((r) => r.url) : [];
|
|
89
|
+
const users = Array.isArray(usersData?.items) ? usersData.items.map(mapUser).filter((u) => u.url) : [];
|
|
90
|
+
return [...repos, ...users];
|
|
91
|
+
}
|