termsearch 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +205 -0
- package/bin/termsearch.js +433 -0
- package/config.example.json +31 -0
- package/frontend/dist/app.js +1051 -0
- package/frontend/dist/icon-192.png +0 -0
- package/frontend/dist/icon-512.png +0 -0
- package/frontend/dist/icon.svg +8 -0
- package/frontend/dist/index.html +28 -0
- package/frontend/dist/manifest.json +40 -0
- package/frontend/dist/opensearch.xml +8 -0
- package/frontend/dist/style.css +756 -0
- package/package.json +48 -0
- package/scripts/postinstall.js +84 -0
- package/src/ai/orchestrator.js +163 -0
- package/src/ai/providers/openai-compat.js +255 -0
- package/src/ai/query.js +54 -0
- package/src/ai/summary.js +120 -0
- package/src/api/middleware.js +91 -0
- package/src/api/routes.js +461 -0
- package/src/autostart/manager.js +207 -0
- package/src/config/defaults.js +62 -0
- package/src/config/manager.js +188 -0
- package/src/fetch/document.js +297 -0
- package/src/fetch/ssrf-guard.js +40 -0
- package/src/profiler/scanner.js +212 -0
- package/src/search/cache.js +119 -0
- package/src/search/engine.js +231 -0
- package/src/search/providers/brave.js +57 -0
- package/src/search/providers/duckduckgo.js +148 -0
- package/src/search/providers/mojeek.js +56 -0
- package/src/search/providers/searxng.js +53 -0
- package/src/search/providers/wikipedia.js +70 -0
- package/src/search/ranking.js +155 -0
- package/src/server.js +68 -0
- package/src/social/scrapers.js +356 -0
- package/src/social/search.js +77 -0
- package/src/torrent/scrapers.js +125 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
// Search orchestrator — fan-out to all enabled providers, merge, rank, cache
|
|
2
|
+
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { makeTieredCache, searchCacheKey } from './cache.js';
|
|
5
|
+
import { mergeSearchResultSets, rankResultsBySourceDiversity } from './ranking.js';
|
|
6
|
+
import * as ddg from './providers/duckduckgo.js';
|
|
7
|
+
import * as wikipedia from './providers/wikipedia.js';
|
|
8
|
+
import * as brave from './providers/brave.js';
|
|
9
|
+
import * as mojeek from './providers/mojeek.js';
|
|
10
|
+
import * as searxng from './providers/searxng.js';
|
|
11
|
+
|
|
12
|
+
let _searchCache = null;
|
|
13
|
+
let _docCache = null;
|
|
14
|
+
let _dataDir = null;
|
|
15
|
+
|
|
16
|
+
export function initCaches(dataDir, cfg) {
|
|
17
|
+
_dataDir = dataDir;
|
|
18
|
+
const sc = cfg.search;
|
|
19
|
+
_searchCache = makeTieredCache(
|
|
20
|
+
sc.cache_l1_max_search,
|
|
21
|
+
path.join(dataDir, 'cache', 'search'),
|
|
22
|
+
sc.disk_max_search_entries,
|
|
23
|
+
sc.disk_max_search_bytes,
|
|
24
|
+
);
|
|
25
|
+
_docCache = makeTieredCache(
|
|
26
|
+
sc.cache_l1_max_docs,
|
|
27
|
+
path.join(dataDir, 'cache', 'docs'),
|
|
28
|
+
sc.disk_max_doc_entries,
|
|
29
|
+
sc.disk_max_doc_bytes,
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function getDocCache() {
|
|
34
|
+
return _docCache;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const PROVIDER_REGISTRY = {
|
|
38
|
+
duckduckgo: {
|
|
39
|
+
aliases: new Set(['duckduckgo', 'ddg']),
|
|
40
|
+
enabled: (_cfg) => true,
|
|
41
|
+
run: ddg.search,
|
|
42
|
+
},
|
|
43
|
+
wikipedia: {
|
|
44
|
+
aliases: new Set(['wikipedia', 'wiki']),
|
|
45
|
+
enabled: (_cfg) => true,
|
|
46
|
+
run: wikipedia.search,
|
|
47
|
+
},
|
|
48
|
+
brave: {
|
|
49
|
+
aliases: new Set(['brave']),
|
|
50
|
+
enabled: (cfg) => Boolean(cfg.brave?.enabled && cfg.brave?.api_key),
|
|
51
|
+
run: brave.search,
|
|
52
|
+
},
|
|
53
|
+
mojeek: {
|
|
54
|
+
aliases: new Set(['mojeek']),
|
|
55
|
+
enabled: (cfg) => Boolean(cfg.mojeek?.enabled && cfg.mojeek?.api_key),
|
|
56
|
+
run: mojeek.search,
|
|
57
|
+
},
|
|
58
|
+
searxng: {
|
|
59
|
+
aliases: new Set(['searxng', 'searx']),
|
|
60
|
+
enabled: (cfg) => Boolean(cfg.searxng?.enabled && cfg.searxng?.url),
|
|
61
|
+
run: searxng.search,
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
function normalizeRequestedEngines(input) {
|
|
66
|
+
if (!Array.isArray(input)) return [];
|
|
67
|
+
return [...new Set(
|
|
68
|
+
input
|
|
69
|
+
.map((item) => String(item || '').trim().toLowerCase())
|
|
70
|
+
.filter(Boolean)
|
|
71
|
+
)];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function resolveProviderPlan(cfg, requestedEngines = []) {
|
|
75
|
+
const requested = normalizeRequestedEngines(requestedEngines);
|
|
76
|
+
const enabledProviders = Object.keys(PROVIDER_REGISTRY).filter((name) => PROVIDER_REGISTRY[name].enabled(cfg));
|
|
77
|
+
if (requested.length === 0) {
|
|
78
|
+
return { providers: enabledProviders, searxEngines: [] };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const explicitProviders = new Set();
|
|
82
|
+
const searxEngines = [];
|
|
83
|
+
|
|
84
|
+
for (const engine of requested) {
|
|
85
|
+
const mapped = enabledProviders.find((provider) => PROVIDER_REGISTRY[provider].aliases.has(engine));
|
|
86
|
+
if (mapped) {
|
|
87
|
+
explicitProviders.add(mapped);
|
|
88
|
+
} else {
|
|
89
|
+
searxEngines.push(engine);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (searxEngines.length > 0 && enabledProviders.includes('searxng')) {
|
|
94
|
+
explicitProviders.add('searxng');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const providers = [...explicitProviders].filter((name) => enabledProviders.includes(name));
|
|
98
|
+
if (providers.length === 0) {
|
|
99
|
+
return { providers: enabledProviders, searxEngines: [] };
|
|
100
|
+
}
|
|
101
|
+
return { providers, searxEngines };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async function runProvider(name, args) {
|
|
105
|
+
const provider = PROVIDER_REGISTRY[name];
|
|
106
|
+
if (!provider) return [];
|
|
107
|
+
try {
|
|
108
|
+
return await provider.run(args);
|
|
109
|
+
} catch {
|
|
110
|
+
return [];
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Run a search across all enabled providers and return merged, ranked results
|
|
115
|
+
export async function search({ query, lang = 'en-US', safe = '1', page = 1, category = 'web', engines = [] }, cfg) {
|
|
116
|
+
if (!_searchCache) throw new Error('Caches not initialized — call initCaches() first');
|
|
117
|
+
|
|
118
|
+
const plan = resolveProviderPlan(cfg, engines);
|
|
119
|
+
const providerList = plan.providers;
|
|
120
|
+
const timeoutMs = cfg.search.timeout_ms;
|
|
121
|
+
const cacheEngines = providerList.length ? providerList : ['none'];
|
|
122
|
+
const cacheKey = searchCacheKey(query, lang, safe, cacheEngines, 'full', category, page);
|
|
123
|
+
const cached = _searchCache.get(cacheKey);
|
|
124
|
+
if (cached) return cached;
|
|
125
|
+
|
|
126
|
+
const tasks = providerList.map((providerName) =>
|
|
127
|
+
runProvider(providerName, {
|
|
128
|
+
query,
|
|
129
|
+
lang,
|
|
130
|
+
safe,
|
|
131
|
+
page,
|
|
132
|
+
category,
|
|
133
|
+
config: cfg,
|
|
134
|
+
timeoutMs,
|
|
135
|
+
engines: providerName === 'searxng' ? plan.searxEngines : [],
|
|
136
|
+
})
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
const allResults = await Promise.all(tasks);
|
|
140
|
+
|
|
141
|
+
// Merge all provider results
|
|
142
|
+
let merged = [];
|
|
143
|
+
for (const provResults of allResults) {
|
|
144
|
+
merged = mergeSearchResultSets(merged, provResults);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Rank by source diversity
|
|
148
|
+
const ranked = rankResultsBySourceDiversity(merged);
|
|
149
|
+
|
|
150
|
+
const response = {
|
|
151
|
+
results: ranked,
|
|
152
|
+
query,
|
|
153
|
+
lang,
|
|
154
|
+
page: Number(page),
|
|
155
|
+
total: ranked.length,
|
|
156
|
+
providers: providerList,
|
|
157
|
+
category,
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
_searchCache.set(cacheKey, response, cfg.search.cache_ttl_search_ms);
|
|
161
|
+
return response;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Streaming search: returns fast results first (DDG), then merges full results
|
|
165
|
+
export async function* searchStream({ query, lang = 'en-US', safe = '1', page = 1, category = 'web', engines = [] }, cfg) {
|
|
166
|
+
const plan = resolveProviderPlan(cfg, engines);
|
|
167
|
+
const providerList = plan.providers;
|
|
168
|
+
if (providerList.length === 0) {
|
|
169
|
+
yield { tier: 'full', results: [], providers: [] };
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const timeoutMs = cfg.search.timeout_ms;
|
|
174
|
+
const fastProvider = providerList.includes('duckduckgo')
|
|
175
|
+
? 'duckduckgo'
|
|
176
|
+
: providerList[0];
|
|
177
|
+
const fastResults = await runProvider(fastProvider, {
|
|
178
|
+
query,
|
|
179
|
+
lang,
|
|
180
|
+
safe,
|
|
181
|
+
page,
|
|
182
|
+
category,
|
|
183
|
+
config: cfg,
|
|
184
|
+
timeoutMs,
|
|
185
|
+
engines: fastProvider === 'searxng' ? plan.searxEngines : [],
|
|
186
|
+
});
|
|
187
|
+
if (fastResults.length > 0) {
|
|
188
|
+
yield { tier: 'fast', results: rankResultsBySourceDiversity(fastResults), providers: [fastProvider] };
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const remainingProviders = providerList.filter((name) => name !== fastProvider);
|
|
192
|
+
const tasks = remainingProviders.map((providerName) =>
|
|
193
|
+
runProvider(providerName, {
|
|
194
|
+
query,
|
|
195
|
+
lang,
|
|
196
|
+
safe,
|
|
197
|
+
page,
|
|
198
|
+
category,
|
|
199
|
+
config: cfg,
|
|
200
|
+
timeoutMs,
|
|
201
|
+
engines: providerName === 'searxng' ? plan.searxEngines : [],
|
|
202
|
+
})
|
|
203
|
+
);
|
|
204
|
+
|
|
205
|
+
const additional = await Promise.all(tasks);
|
|
206
|
+
let full = fastResults.slice();
|
|
207
|
+
for (const r of additional) {
|
|
208
|
+
full = mergeSearchResultSets(full, r);
|
|
209
|
+
}
|
|
210
|
+
const fullRanked = rankResultsBySourceDiversity(full);
|
|
211
|
+
|
|
212
|
+
// Cache the full result
|
|
213
|
+
const cacheEngines = providerList.length ? providerList : ['none'];
|
|
214
|
+
const cacheKey = searchCacheKey(query, lang, safe, cacheEngines, 'full', category, page);
|
|
215
|
+
_searchCache?.set(cacheKey, {
|
|
216
|
+
results: fullRanked,
|
|
217
|
+
query, lang, page: Number(page), total: fullRanked.length,
|
|
218
|
+
providers: providerList,
|
|
219
|
+
category,
|
|
220
|
+
}, cfg.search.cache_ttl_search_ms);
|
|
221
|
+
|
|
222
|
+
yield { tier: 'full', results: fullRanked, providers: providerList };
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
export function getEnabledProviders(cfg) {
|
|
226
|
+
const providers = ['duckduckgo', 'wikipedia'];
|
|
227
|
+
if (cfg.brave?.enabled && cfg.brave?.api_key) providers.push('brave');
|
|
228
|
+
if (cfg.mojeek?.enabled && cfg.mojeek?.api_key) providers.push('mojeek');
|
|
229
|
+
if (cfg.searxng?.enabled && cfg.searxng?.url) providers.push('searxng');
|
|
230
|
+
return providers;
|
|
231
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
// Brave Search API provider — requires API key (configured via web UI)
|
|
2
|
+
|
|
3
|
+
function localeToIso639(locale) {
|
|
4
|
+
const map = {
|
|
5
|
+
'it-IT': 'it', 'en-US': 'en', 'es-ES': 'es',
|
|
6
|
+
'fr-FR': 'fr', 'de-DE': 'de', 'pt-PT': 'pt',
|
|
7
|
+
'ru-RU': 'ru', 'zh-CN': 'zh', 'ja-JP': 'ja',
|
|
8
|
+
};
|
|
9
|
+
return map[locale] || '';
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export async function search({ query, lang = 'en-US', safe = '1', page = 1, config, timeoutMs = 12000 }) {
|
|
13
|
+
const apiKey = config?.brave?.api_key;
|
|
14
|
+
const apiBase = (config?.brave?.api_base || 'https://api.search.brave.com/res/v1').replace(/\/$/, '');
|
|
15
|
+
if (!apiKey) return [];
|
|
16
|
+
|
|
17
|
+
const isoLang = localeToIso639(lang);
|
|
18
|
+
const resultCount = config?.search?.result_count || 10;
|
|
19
|
+
const params = new URLSearchParams({
|
|
20
|
+
q: query,
|
|
21
|
+
count: String(resultCount),
|
|
22
|
+
offset: String((Number(page) - 1) * resultCount),
|
|
23
|
+
});
|
|
24
|
+
if (isoLang) params.set('search_lang', isoLang);
|
|
25
|
+
if (safe === '2') params.set('safesearch', 'strict');
|
|
26
|
+
else if (safe === '1') params.set('safesearch', 'moderate');
|
|
27
|
+
else params.set('safesearch', 'off');
|
|
28
|
+
|
|
29
|
+
const ac = new AbortController();
|
|
30
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
31
|
+
try {
|
|
32
|
+
const r = await fetch(`${apiBase}/web/search?${params.toString()}`, {
|
|
33
|
+
headers: {
|
|
34
|
+
'Accept': 'application/json',
|
|
35
|
+
'Accept-Encoding': 'gzip',
|
|
36
|
+
'X-Subscription-Token': apiKey,
|
|
37
|
+
'User-Agent': 'TermSearch/1.0',
|
|
38
|
+
},
|
|
39
|
+
signal: ac.signal,
|
|
40
|
+
});
|
|
41
|
+
clearTimeout(timer);
|
|
42
|
+
if (!r.ok) return [];
|
|
43
|
+
const data = await r.json();
|
|
44
|
+
return (data?.web?.results || []).map((item) => ({
|
|
45
|
+
title: item.title || '',
|
|
46
|
+
url: item.url || '',
|
|
47
|
+
snippet: item.description || '',
|
|
48
|
+
engine: 'brave-api',
|
|
49
|
+
score: 0,
|
|
50
|
+
publishedDate: item.age || null,
|
|
51
|
+
thumbnail_src: item.thumbnail?.src || null,
|
|
52
|
+
}));
|
|
53
|
+
} catch {
|
|
54
|
+
clearTimeout(timer);
|
|
55
|
+
return [];
|
|
56
|
+
}
|
|
57
|
+
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
// DuckDuckGo HTML scraper — zero API key required
|
|
2
|
+
// Uses html.duckduckgo.com/html/ (maintained for non-JS clients / accessibility)
|
|
3
|
+
|
|
4
|
+
const DDG_ENDPOINT = 'https://html.duckduckgo.com/html/';
|
|
5
|
+
const DDG_FALLBACK = 'https://lite.duckduckgo.com/lite/';
|
|
6
|
+
|
|
7
|
+
const USER_AGENTS = [
|
|
8
|
+
'Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0',
|
|
9
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
|
|
10
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
|
11
|
+
];
|
|
12
|
+
|
|
13
|
+
function randomUA() {
|
|
14
|
+
return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// DDG safe search parameter
|
|
18
|
+
function safeParam(safe) {
|
|
19
|
+
if (safe === '2') return '-1'; // strict
|
|
20
|
+
if (safe === '0') return '-2'; // off
|
|
21
|
+
return '-1'; // moderate → strict (default)
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// DDG language/region parameter
|
|
25
|
+
function langParam(lang) {
|
|
26
|
+
const map = {
|
|
27
|
+
'it-IT': 'it-it', 'en-US': 'us-en', 'es-ES': 'es-es',
|
|
28
|
+
'fr-FR': 'fr-fr', 'de-DE': 'de-de', 'pt-PT': 'pt-pt',
|
|
29
|
+
'ru-RU': 'ru-ru', 'zh-CN': 'cn-zh', 'ja-JP': 'jp-ja',
|
|
30
|
+
};
|
|
31
|
+
return map[lang] || 'wt-wt';
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Parse DDG HTML result — returns { title, url, snippet } or null
|
|
35
|
+
function parseResult(html, startIdx) {
|
|
36
|
+
// Extract result URL from <a class="result__a" href="...">
|
|
37
|
+
const aMatch = html.slice(startIdx).match(/<a[^>]*class="result__a"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i);
|
|
38
|
+
if (!aMatch) return null;
|
|
39
|
+
|
|
40
|
+
let url = aMatch[1];
|
|
41
|
+
// DDG wraps URLs in redirect — extract real URL
|
|
42
|
+
if (url.startsWith('//duckduckgo.com/l/?')) {
|
|
43
|
+
try {
|
|
44
|
+
const uddg = new URL('https:' + url).searchParams.get('uddg') || '';
|
|
45
|
+
if (uddg) url = decodeURIComponent(uddg);
|
|
46
|
+
} catch { /* keep raw */ }
|
|
47
|
+
}
|
|
48
|
+
if (!url.startsWith('http')) return null;
|
|
49
|
+
|
|
50
|
+
const title = aMatch[2].replace(/<[^>]+>/g, '').trim();
|
|
51
|
+
if (!title || !url) return null;
|
|
52
|
+
|
|
53
|
+
// Snippet: next <a class="result__snippet"> after the title link
|
|
54
|
+
const snippetChunk = html.slice(startIdx, startIdx + 3000);
|
|
55
|
+
const snippetMatch = snippetChunk.match(/class="result__snippet"[^>]*>([\s\S]*?)<\/a>/i);
|
|
56
|
+
const snippet = snippetMatch
|
|
57
|
+
? snippetMatch[1].replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/ /g, ' ').trim()
|
|
58
|
+
: '';
|
|
59
|
+
|
|
60
|
+
return { title, url, snippet };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Parse DDG lite HTML — simpler table-based format
|
|
64
|
+
function parseLiteHtml(html) {
|
|
65
|
+
const results = [];
|
|
66
|
+
const linkRe = /<a[^>]*class="result-link"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
67
|
+
const snippetRe = /<td[^>]*class="result-snippet"[^>]*>([\s\S]*?)<\/td>/gi;
|
|
68
|
+
const links = [...html.matchAll(linkRe)];
|
|
69
|
+
const snippets = [...html.matchAll(snippetRe)];
|
|
70
|
+
for (let i = 0; i < links.length && results.length < 15; i++) {
|
|
71
|
+
let url = links[i][1];
|
|
72
|
+
if (url.includes('duckduckgo.com/l/?')) {
|
|
73
|
+
try { url = decodeURIComponent(new URL('https://x.com' + url.replace(/^https?:\/\/[^/]+/, '')).searchParams.get('uddg') || url); } catch { /* ok */ }
|
|
74
|
+
}
|
|
75
|
+
if (!url.startsWith('http')) continue;
|
|
76
|
+
const title = links[i][2].replace(/<[^>]+>/g, '').trim();
|
|
77
|
+
const snippet = snippets[i]
|
|
78
|
+
? snippets[i][1].replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/ /g, ' ').trim()
|
|
79
|
+
: '';
|
|
80
|
+
if (title && url) results.push({ title, url, snippet, engine: 'duckduckgo', score: 0 });
|
|
81
|
+
}
|
|
82
|
+
return results;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async function fetchDDG(endpoint, formData, timeoutMs) {
|
|
86
|
+
const ac = new AbortController();
|
|
87
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
88
|
+
try {
|
|
89
|
+
const r = await fetch(endpoint, {
|
|
90
|
+
method: 'POST',
|
|
91
|
+
headers: {
|
|
92
|
+
'User-Agent': randomUA(),
|
|
93
|
+
'Content-Type': 'application/x-www-form-urlencoded',
|
|
94
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
95
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
96
|
+
'Origin': 'https://duckduckgo.com',
|
|
97
|
+
'Referer': 'https://duckduckgo.com/',
|
|
98
|
+
},
|
|
99
|
+
body: formData,
|
|
100
|
+
signal: ac.signal,
|
|
101
|
+
});
|
|
102
|
+
clearTimeout(timer);
|
|
103
|
+
if (!r.ok) return null;
|
|
104
|
+
const html = await r.text();
|
|
105
|
+
// Detect captcha/block page
|
|
106
|
+
if (html.includes('challenge-form') || html.includes('Sorry, you have been blocked')) return null;
|
|
107
|
+
return html;
|
|
108
|
+
} catch {
|
|
109
|
+
clearTimeout(timer);
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export async function search({ query, lang = 'en-US', safe = '1', page = 1, timeoutMs = 12000 }) {
|
|
115
|
+
const kp = safeParam(safe);
|
|
116
|
+
const kl = langParam(lang);
|
|
117
|
+
const offset = (Number(page) - 1) * 10;
|
|
118
|
+
|
|
119
|
+
const params = new URLSearchParams({
|
|
120
|
+
q: query,
|
|
121
|
+
kp,
|
|
122
|
+
kl,
|
|
123
|
+
kf: '-1', // site icons off (faster)
|
|
124
|
+
s: String(offset),
|
|
125
|
+
});
|
|
126
|
+
const formData = params.toString();
|
|
127
|
+
|
|
128
|
+
let html = await fetchDDG(DDG_ENDPOINT, formData, timeoutMs);
|
|
129
|
+
|
|
130
|
+
// Fallback to lite endpoint
|
|
131
|
+
if (!html) {
|
|
132
|
+
const liteParams = new URLSearchParams({ q: query, s: String(offset) });
|
|
133
|
+
html = await fetchDDG(DDG_FALLBACK, liteParams.toString(), timeoutMs);
|
|
134
|
+
if (!html) return [];
|
|
135
|
+
return parseLiteHtml(html);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Parse main HTML endpoint
|
|
139
|
+
const results = [];
|
|
140
|
+
const resultRe = /class="results_links|class="result results_links/gi;
|
|
141
|
+
let match;
|
|
142
|
+
while ((match = resultRe.exec(html)) !== null && results.length < 15) {
|
|
143
|
+
const r = parseResult(html, match.index);
|
|
144
|
+
if (r) results.push({ ...r, engine: 'duckduckgo', score: 0 });
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return results;
|
|
148
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// Mojeek Search API provider — requires API key (configured via web UI)
|
|
2
|
+
|
|
3
|
+
function localeToIso639(locale) {
|
|
4
|
+
const map = {
|
|
5
|
+
'it-IT': 'it', 'en-US': 'en', 'es-ES': 'es',
|
|
6
|
+
'fr-FR': 'fr', 'de-DE': 'de', 'pt-PT': 'pt',
|
|
7
|
+
'ru-RU': 'ru', 'zh-CN': 'zh', 'ja-JP': 'ja',
|
|
8
|
+
};
|
|
9
|
+
return map[locale] || '';
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export async function search({ query, lang = 'en-US', page = 1, config, timeoutMs = 12000 }) {
|
|
13
|
+
const apiKey = config?.mojeek?.api_key;
|
|
14
|
+
const apiBase = (config?.mojeek?.api_base || 'https://api.mojeek.com').replace(/\/$/, '');
|
|
15
|
+
if (!apiKey) return [];
|
|
16
|
+
|
|
17
|
+
const isoLang = localeToIso639(lang);
|
|
18
|
+
const resultCount = config?.search?.result_count || 10;
|
|
19
|
+
const params = new URLSearchParams({
|
|
20
|
+
api_key: apiKey,
|
|
21
|
+
q: query,
|
|
22
|
+
t: String(resultCount),
|
|
23
|
+
s: String(((Number(page) - 1) * resultCount) + 1),
|
|
24
|
+
fmt: 'json',
|
|
25
|
+
});
|
|
26
|
+
if (isoLang) {
|
|
27
|
+
params.set('lb', isoLang.toUpperCase());
|
|
28
|
+
params.set('lbb', '100');
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const ac = new AbortController();
|
|
32
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
33
|
+
try {
|
|
34
|
+
const r = await fetch(`${apiBase}/search?${params.toString()}`, {
|
|
35
|
+
headers: {
|
|
36
|
+
'Accept': 'application/json',
|
|
37
|
+
'User-Agent': 'TermSearch/1.0',
|
|
38
|
+
},
|
|
39
|
+
signal: ac.signal,
|
|
40
|
+
});
|
|
41
|
+
clearTimeout(timer);
|
|
42
|
+
if (!r.ok) return [];
|
|
43
|
+
const data = await r.json();
|
|
44
|
+
return (data?.response?.results || []).map((item) => ({
|
|
45
|
+
title: item.title || '',
|
|
46
|
+
url: item.url || '',
|
|
47
|
+
snippet: item.desc || '',
|
|
48
|
+
engine: 'mojeek-api',
|
|
49
|
+
score: Number(item.score || 0),
|
|
50
|
+
publishedDate: item.date || null,
|
|
51
|
+
}));
|
|
52
|
+
} catch {
|
|
53
|
+
clearTimeout(timer);
|
|
54
|
+
return [];
|
|
55
|
+
}
|
|
56
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// SearXNG proxy provider — for power users who run their own SearXNG instance
|
|
2
|
+
|
|
3
|
+
function mapCategory(category) {
|
|
4
|
+
const c = String(category || 'web').toLowerCase();
|
|
5
|
+
if (c === 'images') return 'images';
|
|
6
|
+
if (c === 'news') return 'news';
|
|
7
|
+
return 'general';
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export async function search({ query, lang = 'en-US', safe = '1', page = 1, category = 'web', engines = [], config, timeoutMs = 15000 }) {
|
|
11
|
+
const searxngUrl = config?.searxng?.url;
|
|
12
|
+
if (!searxngUrl) return [];
|
|
13
|
+
|
|
14
|
+
const base = searxngUrl.replace(/\/$/, '');
|
|
15
|
+
const params = new URLSearchParams({
|
|
16
|
+
q: query,
|
|
17
|
+
format: 'json',
|
|
18
|
+
language: lang,
|
|
19
|
+
safesearch: safe === '2' ? '2' : safe === '0' ? '0' : '1',
|
|
20
|
+
pageno: String(page),
|
|
21
|
+
categories: mapCategory(category),
|
|
22
|
+
});
|
|
23
|
+
if (Array.isArray(engines) && engines.length > 0) {
|
|
24
|
+
params.set('engines', engines.join(','));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const ac = new AbortController();
|
|
28
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
29
|
+
try {
|
|
30
|
+
const r = await fetch(`${base}/search?${params.toString()}`, {
|
|
31
|
+
headers: {
|
|
32
|
+
'Accept': 'application/json',
|
|
33
|
+
'User-Agent': 'TermSearch/1.0',
|
|
34
|
+
},
|
|
35
|
+
signal: ac.signal,
|
|
36
|
+
});
|
|
37
|
+
clearTimeout(timer);
|
|
38
|
+
if (!r.ok) return [];
|
|
39
|
+
const data = await r.json();
|
|
40
|
+
return (data?.results || []).map((item) => ({
|
|
41
|
+
title: item.title || '',
|
|
42
|
+
url: item.url || '',
|
|
43
|
+
snippet: item.content || '',
|
|
44
|
+
engine: item.engine ? `searxng:${item.engine}` : 'searxng',
|
|
45
|
+
score: Number(item.score || 0),
|
|
46
|
+
publishedDate: item.publishedDate || null,
|
|
47
|
+
thumbnail_src: item.thumbnail || null,
|
|
48
|
+
}));
|
|
49
|
+
} catch {
|
|
50
|
+
clearTimeout(timer);
|
|
51
|
+
return [];
|
|
52
|
+
}
|
|
53
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
// Wikipedia REST API provider — zero API key required
|
|
2
|
+
// Uses the MediaWiki action API for full-text search
|
|
3
|
+
|
|
4
|
+
const API_BASE = 'https://{lang}.wikipedia.org/w/api.php';
|
|
5
|
+
|
|
6
|
+
// Map locale to Wikipedia language subdomain
|
|
7
|
+
function langCode(locale) {
|
|
8
|
+
const map = {
|
|
9
|
+
'it-IT': 'it', 'en-US': 'en', 'es-ES': 'es',
|
|
10
|
+
'fr-FR': 'fr', 'de-DE': 'de', 'pt-PT': 'pt',
|
|
11
|
+
'ru-RU': 'ru', 'zh-CN': 'zh', 'ja-JP': 'ja',
|
|
12
|
+
};
|
|
13
|
+
return map[locale] || 'en';
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function stripHtmlBasic(html) {
|
|
17
|
+
return (html || '')
|
|
18
|
+
.replace(/<[^>]+>/g, ' ')
|
|
19
|
+
.replace(/&/g, '&')
|
|
20
|
+
.replace(/"/g, '"')
|
|
21
|
+
.replace(/'/g, "'")
|
|
22
|
+
.replace(/ /g, ' ')
|
|
23
|
+
.replace(/\s+/g, ' ')
|
|
24
|
+
.trim();
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export async function search({ query, lang = 'en-US', page = 1, timeoutMs = 12000 }) {
|
|
28
|
+
const lc = langCode(lang);
|
|
29
|
+
const endpoint = API_BASE.replace('{lang}', lc);
|
|
30
|
+
const offset = (Number(page) - 1) * 5;
|
|
31
|
+
|
|
32
|
+
const params = new URLSearchParams({
|
|
33
|
+
action: 'query',
|
|
34
|
+
list: 'search',
|
|
35
|
+
srsearch: query,
|
|
36
|
+
srlimit: '6',
|
|
37
|
+
sroffset: String(offset),
|
|
38
|
+
srprop: 'snippet|titlesnippet|sectiontitle',
|
|
39
|
+
format: 'json',
|
|
40
|
+
origin: '*',
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
const ac = new AbortController();
|
|
44
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
45
|
+
try {
|
|
46
|
+
const r = await fetch(`${endpoint}?${params.toString()}`, {
|
|
47
|
+
headers: {
|
|
48
|
+
'Accept': 'application/json',
|
|
49
|
+
'User-Agent': 'TermSearch/1.0 (personal search engine; https://github.com/DioNanos/termsearch)',
|
|
50
|
+
},
|
|
51
|
+
signal: ac.signal,
|
|
52
|
+
});
|
|
53
|
+
clearTimeout(timer);
|
|
54
|
+
if (!r.ok) return [];
|
|
55
|
+
|
|
56
|
+
const data = await r.json();
|
|
57
|
+
const items = data?.query?.search || [];
|
|
58
|
+
return items.map((item) => ({
|
|
59
|
+
title: stripHtmlBasic(item.title || ''),
|
|
60
|
+
url: `https://${lc}.wikipedia.org/wiki/${encodeURIComponent((item.title || '').replace(/ /g, '_'))}`,
|
|
61
|
+
snippet: stripHtmlBasic(item.snippet || ''),
|
|
62
|
+
engine: 'wikipedia',
|
|
63
|
+
score: 1.0,
|
|
64
|
+
publishedDate: null,
|
|
65
|
+
}));
|
|
66
|
+
} catch {
|
|
67
|
+
clearTimeout(timer);
|
|
68
|
+
return [];
|
|
69
|
+
}
|
|
70
|
+
}
|