termsearch 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- // Search orchestrator — fan-out to all enabled providers, merge, rank, cache
1
+ // Search orchestrator — fan-out to enabled providers, merge, rank, cache
2
2
 
3
3
  import path from 'path';
4
4
  import { makeTieredCache, searchCacheKey } from './cache.js';
@@ -8,74 +8,140 @@ import * as wikipedia from './providers/wikipedia.js';
8
8
  import * as brave from './providers/brave.js';
9
9
  import * as mojeek from './providers/mojeek.js';
10
10
  import * as searxng from './providers/searxng.js';
11
+ import * as github from './providers/github.js';
11
12
 
12
13
  let _searchCache = null;
13
14
  let _docCache = null;
14
- let _dataDir = null;
15
15
 
16
- export function initCaches(dataDir, cfg) {
17
- _dataDir = dataDir;
18
- const sc = cfg.search;
19
- _searchCache = makeTieredCache(
20
- sc.cache_l1_max_search,
21
- path.join(dataDir, 'cache', 'search'),
22
- sc.disk_max_search_entries,
23
- sc.disk_max_search_bytes,
24
- );
25
- _docCache = makeTieredCache(
26
- sc.cache_l1_max_docs,
27
- path.join(dataDir, 'cache', 'docs'),
28
- sc.disk_max_doc_entries,
29
- sc.disk_max_doc_bytes,
30
- );
31
- }
16
+ const ENGINE_HEALTH_HISTORY_LIMIT = 16;
17
+ const ENGINE_HEALTH_STORE = new Map();
18
+ const INFLIGHT_SEARCH_STORE = new Map();
32
19
 
33
- export function getDocCache() {
34
- return _docCache;
35
- }
20
+ export const ALLOWED_ENGINES = new Set([
21
+ 'brave',
22
+ 'duckduckgo',
23
+ 'startpage',
24
+ 'qwant',
25
+ 'mojeek',
26
+ 'google',
27
+ 'bing',
28
+ 'yahoo',
29
+ 'gigablast',
30
+ 'yacy',
31
+ 'wikipedia',
32
+ 'wikidata',
33
+ 'reddit',
34
+ 'github',
35
+ 'youtube',
36
+ 'hackernews',
37
+ 'mastodon users',
38
+ 'mastodon hashtags',
39
+ 'tootfinder',
40
+ 'lemmy communities',
41
+ 'lemmy users',
42
+ 'lemmy posts',
43
+ 'lemmy comments',
44
+ 'lobste.rs',
45
+ 'sepiasearch',
46
+ 'crossref',
47
+ 'openalex',
48
+ 'openlibrary',
49
+ '1337x',
50
+ 'piratebay',
51
+ 'nyaa',
52
+ // local aliases for direct providers
53
+ 'ddg',
54
+ 'wiki',
55
+ 'searxng',
56
+ 'searx',
57
+ 'github-api',
58
+ ]);
59
+
60
+ const CURATED_WEB_ENGINES = ['bing', 'startpage', 'yahoo', 'mojeek', 'github', 'reddit', 'youtube', 'hackernews'];
36
61
 
37
62
  const PROVIDER_REGISTRY = {
38
63
  duckduckgo: {
39
64
  aliases: new Set(['duckduckgo', 'ddg']),
40
65
  enabled: (_cfg) => true,
41
66
  run: ddg.search,
67
+ defaultProvider: true,
42
68
  },
43
69
  wikipedia: {
44
70
  aliases: new Set(['wikipedia', 'wiki']),
45
71
  enabled: (_cfg) => true,
46
72
  run: wikipedia.search,
73
+ defaultProvider: true,
47
74
  },
48
75
  brave: {
49
76
  aliases: new Set(['brave']),
50
77
  enabled: (cfg) => Boolean(cfg.brave?.enabled && cfg.brave?.api_key),
51
78
  run: brave.search,
79
+ defaultProvider: true,
52
80
  },
53
81
  mojeek: {
54
82
  aliases: new Set(['mojeek']),
55
83
  enabled: (cfg) => Boolean(cfg.mojeek?.enabled && cfg.mojeek?.api_key),
56
84
  run: mojeek.search,
85
+ defaultProvider: true,
57
86
  },
58
87
  searxng: {
59
88
  aliases: new Set(['searxng', 'searx']),
60
89
  enabled: (cfg) => Boolean(cfg.searxng?.enabled && cfg.searxng?.url),
61
90
  run: searxng.search,
91
+ defaultProvider: true,
92
+ },
93
+ github: {
94
+ aliases: new Set(['github', 'github-api']),
95
+ enabled: (_cfg) => true,
96
+ run: github.search,
97
+ defaultProvider: false,
62
98
  },
63
99
  };
64
100
 
101
+ export function initCaches(dataDir, cfg) {
102
+ const sc = cfg.search;
103
+ _searchCache = makeTieredCache(
104
+ sc.cache_l1_max_search,
105
+ path.join(dataDir, 'cache', 'search'),
106
+ sc.disk_max_search_entries,
107
+ sc.disk_max_search_bytes,
108
+ );
109
+ _docCache = makeTieredCache(
110
+ sc.cache_l1_max_docs,
111
+ path.join(dataDir, 'cache', 'docs'),
112
+ sc.disk_max_doc_entries,
113
+ sc.disk_max_doc_bytes,
114
+ );
115
+ }
116
+
117
+ export function getDocCache() {
118
+ return _docCache;
119
+ }
120
+
121
+ function normalizeEngineName(engine) {
122
+ return String(engine || '').trim().toLowerCase();
123
+ }
124
+
65
125
  function normalizeRequestedEngines(input) {
66
126
  if (!Array.isArray(input)) return [];
67
127
  return [...new Set(
68
128
  input
69
- .map((item) => String(item || '').trim().toLowerCase())
129
+ .map((item) => normalizeEngineName(item))
70
130
  .filter(Boolean)
71
131
  )];
72
132
  }
73
133
 
74
- function resolveProviderPlan(cfg, requestedEngines = []) {
134
+ function resolveProviderPlan(cfg, requestedEngines = [], category = 'web') {
75
135
  const requested = normalizeRequestedEngines(requestedEngines);
76
136
  const enabledProviders = Object.keys(PROVIDER_REGISTRY).filter((name) => PROVIDER_REGISTRY[name].enabled(cfg));
137
+
138
+ const defaultProviders = enabledProviders.filter((name) => PROVIDER_REGISTRY[name].defaultProvider !== false);
139
+
77
140
  if (requested.length === 0) {
78
- return { providers: enabledProviders, searxEngines: [] };
141
+ return {
142
+ providers: defaultProviders,
143
+ searxEngines: category === 'web' && defaultProviders.includes('searxng') ? CURATED_WEB_ENGINES.slice() : [],
144
+ };
79
145
  }
80
146
 
81
147
  const explicitProviders = new Set();
@@ -96,35 +162,243 @@ function resolveProviderPlan(cfg, requestedEngines = []) {
96
162
 
97
163
  const providers = [...explicitProviders].filter((name) => enabledProviders.includes(name));
98
164
  if (providers.length === 0) {
99
- return { providers: enabledProviders, searxEngines: [] };
165
+ return {
166
+ providers: defaultProviders,
167
+ searxEngines: category === 'web' && defaultProviders.includes('searxng') ? CURATED_WEB_ENGINES.slice() : [],
168
+ };
100
169
  }
170
+
101
171
  return { providers, searxEngines };
102
172
  }
103
173
 
104
- async function runProvider(name, args) {
174
+ function classifyEngineFailure(reason) {
175
+ const raw = String(reason || '').toLowerCase();
176
+ if (!raw) return null;
177
+ if (raw.includes('captcha')) return 'captcha';
178
+ if (raw.includes('429') || raw.includes('too many')) return 'too_many_requests';
179
+ if (raw.includes('403') || raw.includes('access denied')) return 'access_denied';
180
+ if (raw.includes('timeout') || raw.includes('aborted')) return 'timeout';
181
+ if (raw.includes('unreachable') || raw.includes('network')) return 'network';
182
+ return 'other';
183
+ }
184
+
185
+ function recordEngineOutcome(engine, ok, reason = null) {
186
+ const key = normalizeEngineName(engine);
187
+ if (!key) return;
188
+
189
+ const entry = ENGINE_HEALTH_STORE.get(key) || {
190
+ history: [],
191
+ failureKinds: {},
192
+ lastFailure: null,
193
+ updatedAt: 0,
194
+ };
195
+
196
+ entry.history.push(ok ? 1 : 0);
197
+ if (entry.history.length > ENGINE_HEALTH_HISTORY_LIMIT) entry.history.shift();
198
+
199
+ if (!ok && reason) {
200
+ const kind = classifyEngineFailure(reason) || 'other';
201
+ entry.failureKinds[kind] = Number(entry.failureKinds[kind] || 0) + 1;
202
+ entry.lastFailure = kind;
203
+ }
204
+
205
+ entry.updatedAt = Date.now();
206
+ ENGINE_HEALTH_STORE.set(key, entry);
207
+ }
208
+
209
+ function getEngineHealth(engine) {
210
+ const key = normalizeEngineName(engine);
211
+ const entry = ENGINE_HEALTH_STORE.get(key);
212
+ if (!entry || entry.history.length === 0) {
213
+ return { status: 'unknown', samples: 0, successRate: null, penalty: 0, lastFailure: null, failureKinds: {} };
214
+ }
215
+
216
+ const samples = entry.history.length;
217
+ const successes = entry.history.reduce((sum, v) => sum + v, 0);
218
+ const successRate = successes / samples;
219
+
220
+ if (samples >= 2 && successRate < 0.5) {
221
+ return {
222
+ status: 'poor',
223
+ samples,
224
+ successRate: Number(successRate.toFixed(2)),
225
+ penalty: 1.2,
226
+ lastFailure: entry.lastFailure,
227
+ failureKinds: entry.failureKinds,
228
+ };
229
+ }
230
+
231
+ if (successRate < 0.75) {
232
+ return {
233
+ status: 'unstable',
234
+ samples,
235
+ successRate: Number(successRate.toFixed(2)),
236
+ penalty: 0.55,
237
+ lastFailure: entry.lastFailure,
238
+ failureKinds: entry.failureKinds,
239
+ };
240
+ }
241
+
242
+ return {
243
+ status: 'healthy',
244
+ samples,
245
+ successRate: Number(successRate.toFixed(2)),
246
+ penalty: 0,
247
+ lastFailure: entry.lastFailure,
248
+ failureKinds: entry.failureKinds,
249
+ };
250
+ }
251
+
252
+ function getEngineHealthSummary(engines = []) {
253
+ const out = {};
254
+ for (const engine of engines) {
255
+ const key = normalizeEngineName(engine);
256
+ if (!key || out[key]) continue;
257
+ out[key] = getEngineHealth(key);
258
+ }
259
+ return out;
260
+ }
261
+
262
+ function withInflightSearch(key, factory) {
263
+ if (INFLIGHT_SEARCH_STORE.has(key)) return INFLIGHT_SEARCH_STORE.get(key);
264
+ const promise = Promise.resolve()
265
+ .then(factory)
266
+ .finally(() => {
267
+ if (INFLIGHT_SEARCH_STORE.get(key) === promise) INFLIGHT_SEARCH_STORE.delete(key);
268
+ });
269
+ INFLIGHT_SEARCH_STORE.set(key, promise);
270
+ return promise;
271
+ }
272
+
273
+ function normalizeProviderPayload(payload) {
274
+ if (Array.isArray(payload)) return { results: payload, meta: {} };
275
+ if (payload && typeof payload === 'object') {
276
+ return {
277
+ results: Array.isArray(payload.results) ? payload.results : [],
278
+ meta: payload._meta || {},
279
+ };
280
+ }
281
+ return { results: [], meta: { error: 'provider_invalid_payload' } };
282
+ }
283
+
284
+ async function runProviderDetailed(name, args) {
105
285
  const provider = PROVIDER_REGISTRY[name];
106
- if (!provider) return [];
286
+ if (!provider) {
287
+ return {
288
+ name,
289
+ results: [],
290
+ respondedEngines: [],
291
+ failedEngines: [name],
292
+ failedDetails: [{ engine: name, reason: 'provider_not_found' }],
293
+ };
294
+ }
295
+
107
296
  try {
108
- return await provider.run(args);
109
- } catch {
110
- return [];
297
+ const payload = normalizeProviderPayload(await provider.run(args));
298
+ const results = payload.results;
299
+ const meta = payload.meta || {};
300
+
301
+ const responded = new Set();
302
+ const failed = new Set();
303
+ const failedDetails = [];
304
+
305
+ if (name === 'searxng') {
306
+ const unresponsive = Array.isArray(meta.unresponsive) ? meta.unresponsive.map((engine) => normalizeEngineName(engine)).filter(Boolean) : [];
307
+ const unresponsiveDetails = Array.isArray(meta.unresponsiveDetails) ? meta.unresponsiveDetails : [];
308
+
309
+ for (const item of results) {
310
+ const eng = normalizeEngineName(item?.engine);
311
+ if (!eng || unresponsive.includes(eng)) continue;
312
+ responded.add(eng);
313
+ }
314
+
315
+ for (const engine of unresponsive) {
316
+ failed.add(engine);
317
+ const detail = unresponsiveDetails.find((entry) => normalizeEngineName(entry?.engine) === engine);
318
+ failedDetails.push({ engine, reason: String(detail?.reason || 'unresponsive') });
319
+ }
320
+
321
+ if (meta.error) {
322
+ failed.add('searxng');
323
+ failedDetails.push({ engine: 'searxng', reason: String(meta.error) });
324
+ }
325
+ } else if (meta.error) {
326
+ failed.add(name);
327
+ failedDetails.push({ engine: name, reason: String(meta.error) });
328
+ } else {
329
+ responded.add(name);
330
+ }
331
+
332
+ for (const engine of responded) recordEngineOutcome(engine, true);
333
+ for (const detail of failedDetails) recordEngineOutcome(detail.engine, false, detail.reason);
334
+
335
+ return {
336
+ name,
337
+ results,
338
+ respondedEngines: [...responded],
339
+ failedEngines: [...failed],
340
+ failedDetails,
341
+ };
342
+ } catch (error) {
343
+ const reason = String(error?.message || 'provider_failed');
344
+ recordEngineOutcome(name, false, reason);
345
+ return {
346
+ name,
347
+ results: [],
348
+ respondedEngines: [],
349
+ failedEngines: [name],
350
+ failedDetails: [{ engine: name, reason }],
351
+ };
111
352
  }
112
353
  }
113
354
 
114
- // Run a search across all enabled providers and return merged, ranked results
115
- export async function search({ query, lang = 'en-US', safe = '1', page = 1, category = 'web', engines = [] }, cfg) {
116
- if (!_searchCache) throw new Error('Caches not initialized — call initCaches() first');
355
+ function cacheEngineList(providerList, searxEngines = []) {
356
+ const searx = searxEngines.map((engine) => `searx:${engine}`);
357
+ return [...providerList, ...searx];
358
+ }
359
+
360
+ function buildEngineStats(respondedEngines = [], failedEngines = [], failedDetails = []) {
361
+ const responded = [...new Set(respondedEngines.map((engine) => normalizeEngineName(engine)).filter(Boolean))];
362
+ const failed = [...new Set(failedEngines.map((engine) => normalizeEngineName(engine)).filter(Boolean))];
363
+ const details = failedDetails
364
+ .map((item) => ({
365
+ engine: normalizeEngineName(item?.engine),
366
+ reason: String(item?.reason || ''),
367
+ }))
368
+ .filter((item) => item.engine);
369
+
370
+ const health = getEngineHealthSummary([...responded, ...failed]);
371
+ const unstable = Object.entries(health)
372
+ .filter(([, meta]) => meta.status === 'unstable' || meta.status === 'poor')
373
+ .map(([engine]) => engine)
374
+ .sort();
375
+
376
+ return {
377
+ responded,
378
+ failed,
379
+ failedDetails: details,
380
+ degraded: failed.length > 0,
381
+ unstable,
382
+ health,
383
+ };
384
+ }
117
385
 
118
- const plan = resolveProviderPlan(cfg, engines);
386
+ async function runSearchBatch({ query, lang, safe, page, category, engines, cfg }) {
387
+ const plan = resolveProviderPlan(cfg, engines, category);
119
388
  const providerList = plan.providers;
120
389
  const timeoutMs = cfg.search.timeout_ms;
121
- const cacheEngines = providerList.length ? providerList : ['none'];
122
- const cacheKey = searchCacheKey(query, lang, safe, cacheEngines, 'full', category, page);
123
- const cached = _searchCache.get(cacheKey);
124
- if (cached) return cached;
125
390
 
126
- const tasks = providerList.map((providerName) =>
127
- runProvider(providerName, {
391
+ if (providerList.length === 0) {
392
+ return {
393
+ results: [],
394
+ providers: [],
395
+ engineStats: buildEngineStats([], [], []),
396
+ category,
397
+ };
398
+ }
399
+
400
+ const runs = await Promise.all(providerList.map((providerName) =>
401
+ runProviderDetailed(providerName, {
128
402
  query,
129
403
  lang,
130
404
  safe,
@@ -134,39 +408,66 @@ export async function search({ query, lang = 'en-US', safe = '1', page = 1, cate
134
408
  timeoutMs,
135
409
  engines: providerName === 'searxng' ? plan.searxEngines : [],
136
410
  })
137
- );
138
-
139
- const allResults = await Promise.all(tasks);
411
+ ));
140
412
 
141
- // Merge all provider results
142
413
  let merged = [];
143
- for (const provResults of allResults) {
144
- merged = mergeSearchResultSets(merged, provResults);
145
- }
414
+ const responded = [];
415
+ const failed = [];
416
+ const failedDetails = [];
146
417
 
147
- // Rank by source diversity
148
- const ranked = rankResultsBySourceDiversity(merged);
418
+ for (const run of runs) {
419
+ merged = mergeSearchResultSets(merged, run.results);
420
+ responded.push(...run.respondedEngines);
421
+ failed.push(...run.failedEngines);
422
+ failedDetails.push(...run.failedDetails);
423
+ }
149
424
 
150
- const response = {
151
- results: ranked,
152
- query,
153
- lang,
154
- page: Number(page),
155
- total: ranked.length,
425
+ return {
426
+ results: rankResultsBySourceDiversity(merged),
156
427
  providers: providerList,
428
+ engineStats: buildEngineStats(responded, failed, failedDetails),
157
429
  category,
430
+ searxEngines: plan.searxEngines,
158
431
  };
432
+ }
433
+
434
+ // Run a search across enabled providers and return merged, ranked results
435
+ export async function search({ query, lang = 'en-US', safe = '1', page = 1, category = 'web', engines = [] }, cfg) {
436
+ if (!_searchCache) throw new Error('Caches not initialized — call initCaches() first');
437
+
438
+ const plan = resolveProviderPlan(cfg, engines, category);
439
+ const cacheEngines = cacheEngineList(plan.providers, plan.searxEngines);
440
+ const cacheKey = searchCacheKey(query, lang, safe, cacheEngines.length ? cacheEngines : ['none'], 'full', category, page);
441
+ const cached = _searchCache.get(cacheKey);
442
+ if (cached) return cached;
443
+
444
+ const response = await withInflightSearch(`search:${cacheKey}`, async () => {
445
+ const fresh = await runSearchBatch({ query, lang, safe, page, category, engines, cfg });
446
+ return {
447
+ results: fresh.results,
448
+ query,
449
+ lang,
450
+ page: Number(page),
451
+ total: fresh.results.length,
452
+ providers: fresh.providers,
453
+ category,
454
+ degraded: fresh.engineStats.degraded,
455
+ engineStats: fresh.engineStats,
456
+ };
457
+ });
159
458
 
160
459
  _searchCache.set(cacheKey, response, cfg.search.cache_ttl_search_ms);
161
460
  return response;
162
461
  }
163
462
 
164
- // Streaming search: returns fast results first (DDG), then merges full results
463
+ // Streaming search: returns fast results first, then merged full results
165
464
  export async function* searchStream({ query, lang = 'en-US', safe = '1', page = 1, category = 'web', engines = [] }, cfg) {
166
- const plan = resolveProviderPlan(cfg, engines);
465
+ const plan = resolveProviderPlan(cfg, engines, category);
167
466
  const providerList = plan.providers;
467
+
168
468
  if (providerList.length === 0) {
169
- yield { tier: 'full', results: [], providers: [] };
469
+ const emptyStats = buildEngineStats([], [], []);
470
+ yield { tier: 'full', results: [], providers: [], degraded: emptyStats.degraded, engineStats: emptyStats };
170
471
  return;
171
472
  }
172
473
 
@@ -174,7 +475,8 @@ export async function* searchStream({ query, lang = 'en-US', safe = '1', page =
174
475
  const fastProvider = providerList.includes('duckduckgo')
175
476
  ? 'duckduckgo'
176
477
  : providerList[0];
177
- const fastResults = await runProvider(fastProvider, {
478
+
479
+ const fastRun = await runProviderDetailed(fastProvider, {
178
480
  query,
179
481
  lang,
180
482
  safe,
@@ -184,13 +486,23 @@ export async function* searchStream({ query, lang = 'en-US', safe = '1', page =
184
486
  timeoutMs,
185
487
  engines: fastProvider === 'searxng' ? plan.searxEngines : [],
186
488
  });
187
- if (fastResults.length > 0) {
188
- yield { tier: 'fast', results: rankResultsBySourceDiversity(fastResults), providers: [fastProvider] };
489
+
490
+ const fastRanked = rankResultsBySourceDiversity(fastRun.results);
491
+ const fastStats = buildEngineStats(fastRun.respondedEngines, fastRun.failedEngines, fastRun.failedDetails);
492
+
493
+ if (fastRanked.length > 0) {
494
+ yield {
495
+ tier: 'fast',
496
+ results: fastRanked,
497
+ providers: [fastProvider],
498
+ degraded: fastStats.degraded,
499
+ engineStats: fastStats,
500
+ };
189
501
  }
190
502
 
191
503
  const remainingProviders = providerList.filter((name) => name !== fastProvider);
192
- const tasks = remainingProviders.map((providerName) =>
193
- runProvider(providerName, {
504
+ const additionalRuns = await Promise.all(remainingProviders.map((providerName) =>
505
+ runProviderDetailed(providerName, {
194
506
  query,
195
507
  lang,
196
508
  safe,
@@ -200,26 +512,44 @@ export async function* searchStream({ query, lang = 'en-US', safe = '1', page =
200
512
  timeoutMs,
201
513
  engines: providerName === 'searxng' ? plan.searxEngines : [],
202
514
  })
203
- );
515
+ ));
204
516
 
205
- const additional = await Promise.all(tasks);
206
- let full = fastResults.slice();
207
- for (const r of additional) {
208
- full = mergeSearchResultSets(full, r);
517
+ let full = fastRun.results.slice();
518
+ const responded = [...fastRun.respondedEngines];
519
+ const failed = [...fastRun.failedEngines];
520
+ const failedDetails = [...fastRun.failedDetails];
521
+
522
+ for (const run of additionalRuns) {
523
+ full = mergeSearchResultSets(full, run.results);
524
+ responded.push(...run.respondedEngines);
525
+ failed.push(...run.failedEngines);
526
+ failedDetails.push(...run.failedDetails);
209
527
  }
528
+
210
529
  const fullRanked = rankResultsBySourceDiversity(full);
530
+ const engineStats = buildEngineStats(responded, failed, failedDetails);
211
531
 
212
- // Cache the full result
213
- const cacheEngines = providerList.length ? providerList : ['none'];
214
- const cacheKey = searchCacheKey(query, lang, safe, cacheEngines, 'full', category, page);
532
+ const cacheEngines = cacheEngineList(providerList, plan.searxEngines);
533
+ const cacheKey = searchCacheKey(query, lang, safe, cacheEngines.length ? cacheEngines : ['none'], 'full', category, page);
215
534
  _searchCache?.set(cacheKey, {
216
535
  results: fullRanked,
217
- query, lang, page: Number(page), total: fullRanked.length,
536
+ query,
537
+ lang,
538
+ page: Number(page),
539
+ total: fullRanked.length,
218
540
  providers: providerList,
219
541
  category,
542
+ degraded: engineStats.degraded,
543
+ engineStats,
220
544
  }, cfg.search.cache_ttl_search_ms);
221
545
 
222
- yield { tier: 'full', results: fullRanked, providers: providerList };
546
+ yield {
547
+ tier: 'full',
548
+ results: fullRanked,
549
+ providers: providerList,
550
+ degraded: engineStats.degraded,
551
+ engineStats,
552
+ };
223
553
  }
224
554
 
225
555
  export function getEnabledProviders(cfg) {
@@ -227,5 +557,6 @@ export function getEnabledProviders(cfg) {
227
557
  if (cfg.brave?.enabled && cfg.brave?.api_key) providers.push('brave');
228
558
  if (cfg.mojeek?.enabled && cfg.mojeek?.api_key) providers.push('mojeek');
229
559
  if (cfg.searxng?.enabled && cfg.searxng?.url) providers.push('searxng');
560
+ providers.push('github-api');
230
561
  return providers;
231
562
  }
@@ -0,0 +1,91 @@
1
+ // GitHub Search API provider — optional fallback when SearXNG is unavailable.
2
+ // Works without token (rate-limited by GitHub); token can be set with TERMSEARCH_GITHUB_TOKEN.
3
+
4
+ const GITHUB_API = 'https://api.github.com';
5
+
6
+ function buildHeaders(config = {}) {
7
+ const token = process.env.TERMSEARCH_GITHUB_TOKEN || config?.github?.api_key || '';
8
+ const headers = {
9
+ Accept: 'application/vnd.github+json',
10
+ 'User-Agent': 'TermSearch/1.0',
11
+ };
12
+ if (token) headers.Authorization = `Bearer ${token}`;
13
+ return headers;
14
+ }
15
+
16
+ async function fetchJson(url, { headers, timeoutMs = 12000 }) {
17
+ const ac = new AbortController();
18
+ const timer = setTimeout(() => ac.abort(), timeoutMs);
19
+ try {
20
+ const response = await fetch(url, { headers, signal: ac.signal });
21
+ if (!response.ok) {
22
+ const body = await response.text().catch(() => '');
23
+ throw new Error(`github_http_${response.status}:${body.slice(0, 140)}`);
24
+ }
25
+ return response.json();
26
+ } finally {
27
+ clearTimeout(timer);
28
+ }
29
+ }
30
+
31
+ function mapRepo(item) {
32
+ const stars = Number(item?.stargazers_count || 0);
33
+ const forks = Number(item?.forks_count || 0);
34
+ const lang = String(item?.language || '').trim();
35
+ const desc = String(item?.description || '').trim();
36
+ const metaParts = [];
37
+ if (lang) metaParts.push(lang);
38
+ metaParts.push(`★ ${stars}`);
39
+ metaParts.push(`forks ${forks}`);
40
+ const meta = metaParts.join(' · ');
41
+ return {
42
+ title: item?.full_name || item?.name || 'GitHub repository',
43
+ url: item?.html_url || '',
44
+ snippet: desc ? `${desc}${meta ? ` — ${meta}` : ''}` : meta,
45
+ engine: 'github-api',
46
+ score: 1.0 + Math.min(stars / 10000, 1.0),
47
+ publishedDate: item?.updated_at || item?.pushed_at || null,
48
+ };
49
+ }
50
+
51
+ function mapUser(item) {
52
+ return {
53
+ title: item?.login ? `@${item.login} · GitHub` : 'GitHub user',
54
+ url: item?.html_url || '',
55
+ snippet: item?.type ? `${item.type} profile on GitHub` : 'GitHub profile',
56
+ engine: 'github-api',
57
+ score: 0.8,
58
+ publishedDate: null,
59
+ };
60
+ }
61
+
62
+ export async function search({ query, page = 1, config, timeoutMs = 12000 }) {
63
+ const q = String(query || '').trim();
64
+ if (!q) return [];
65
+
66
+ const headers = buildHeaders(config);
67
+ const pageNo = Math.max(1, Number(page) || 1);
68
+ const repoParams = new URLSearchParams({
69
+ q,
70
+ per_page: '8',
71
+ page: String(pageNo),
72
+ sort: 'stars',
73
+ order: 'desc',
74
+ });
75
+ const usersParams = new URLSearchParams({
76
+ q,
77
+ per_page: '4',
78
+ page: String(pageNo),
79
+ sort: 'followers',
80
+ order: 'desc',
81
+ });
82
+
83
+ const [reposData, usersData] = await Promise.all([
84
+ fetchJson(`${GITHUB_API}/search/repositories?${repoParams.toString()}`, { headers, timeoutMs }),
85
+ fetchJson(`${GITHUB_API}/search/users?${usersParams.toString()}`, { headers, timeoutMs }),
86
+ ]);
87
+
88
+ const repos = Array.isArray(reposData?.items) ? reposData.items.map(mapRepo).filter((r) => r.url) : [];
89
+ const users = Array.isArray(usersData?.items) ? usersData.items.map(mapUser).filter((u) => u.url) : [];
90
+ return [...repos, ...users];
91
+ }