termsearch 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/config.example.json +11 -0
- package/frontend/dist/app.js +130 -33
- package/frontend/dist/style.css +41 -2
- package/package.json +1 -1
- package/src/ai/orchestrator.js +19 -0
- package/src/api/routes.js +5 -3
- package/src/config/defaults.js +14 -0
- package/src/config/manager.js +11 -1
- package/src/search/engine.js +37 -5
- package/src/search/providers/ahmia.js +61 -0
- package/src/search/providers/marginalia.js +49 -0
- package/src/search/providers/yandex.js +68 -0
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# TermSearch - Personal Search Engine
|
|
2
2
|
|
|
3
|
-
[](#project-status)
|
|
4
4
|
[](LICENSE)
|
|
5
5
|
[](https://nodejs.org)
|
|
6
6
|
[](https://termux.dev)
|
|
@@ -24,7 +24,7 @@ Core capabilities:
|
|
|
24
24
|
|
|
25
25
|
## Project Status
|
|
26
26
|
|
|
27
|
-
- Current line: `0.3.
|
|
27
|
+
- Current line: `0.3.3`
|
|
28
28
|
- Core is MIT — zero required API keys
|
|
29
29
|
- AI features are optional, configured via Settings page in browser
|
|
30
30
|
- Tested on: Ubuntu 24.04, Termux (Android 15/16)
|
|
@@ -112,7 +112,7 @@ All providers use the OpenAI-compatible `/chat/completions` format. Leave API ke
|
|
|
112
112
|
src/
|
|
113
113
|
config/ config manager — load/save/defaults/env overrides
|
|
114
114
|
search/
|
|
115
|
-
providers/ DuckDuckGo, Wikipedia, Brave, Mojeek, SearXNG, GitHub API
|
|
115
|
+
providers/ DuckDuckGo, Wikipedia, Brave, Mojeek, SearXNG, GitHub API, Yandex, Ahmia, Marginalia
|
|
116
116
|
engine.js fan-out, merge, rank, cache
|
|
117
117
|
ranking.js source diversity ranking
|
|
118
118
|
cache.js tiered cache (L1 Map + L2 disk JSON)
|
|
@@ -177,6 +177,8 @@ TERMSEARCH_AI_API_KEY=
|
|
|
177
177
|
TERMSEARCH_AI_MODEL=glm-4.7
|
|
178
178
|
TERMSEARCH_BRAVE_API_KEY=
|
|
179
179
|
TERMSEARCH_MOJEEK_API_KEY=
|
|
180
|
+
TERMSEARCH_MARGINALIA_API_KEY=public
|
|
181
|
+
TERMSEARCH_MARGINALIA_API_BASE=https://api2.marginalia-search.com
|
|
180
182
|
TERMSEARCH_SEARXNG_URL=
|
|
181
183
|
TERMSEARCH_GITHUB_TOKEN=
|
|
182
184
|
TERMSEARCH_INSTAGRAM_SESSION=
|
package/config.example.json
CHANGED
|
@@ -24,6 +24,17 @@
|
|
|
24
24
|
"enabled": false,
|
|
25
25
|
"api_key": ""
|
|
26
26
|
},
|
|
27
|
+
"yandex": {
|
|
28
|
+
"enabled": true
|
|
29
|
+
},
|
|
30
|
+
"ahmia": {
|
|
31
|
+
"enabled": true
|
|
32
|
+
},
|
|
33
|
+
"marginalia": {
|
|
34
|
+
"enabled": true,
|
|
35
|
+
"api_key": "public",
|
|
36
|
+
"api_base": "https://api2.marginalia-search.com"
|
|
37
|
+
},
|
|
27
38
|
"searxng": {
|
|
28
39
|
"enabled": false,
|
|
29
40
|
"url": ""
|
package/frontend/dist/app.js
CHANGED
|
@@ -9,6 +9,15 @@ const state = {
|
|
|
9
9
|
aiStatus: 'idle',
|
|
10
10
|
aiError: null,
|
|
11
11
|
aiMeta: null,
|
|
12
|
+
aiProgress: 0,
|
|
13
|
+
aiSteps: [],
|
|
14
|
+
aiSources: [],
|
|
15
|
+
aiExpanded: false,
|
|
16
|
+
aiStartTime: null,
|
|
17
|
+
aiLatencyMs: null,
|
|
18
|
+
aiLastQuery: null,
|
|
19
|
+
aiLastResults: null,
|
|
20
|
+
aiLastLang: null,
|
|
12
21
|
profilerData: null,
|
|
13
22
|
profilerLoading: false,
|
|
14
23
|
torrentData: [],
|
|
@@ -140,6 +149,16 @@ function setSelectedEngines(engines) {
|
|
|
140
149
|
persistSelectedEngines();
|
|
141
150
|
}
|
|
142
151
|
|
|
152
|
+
function sanitizeHttpUrl(raw) {
|
|
153
|
+
try {
|
|
154
|
+
const url = new URL(String(raw || '').trim());
|
|
155
|
+
if (url.protocol !== 'http:' && url.protocol !== 'https:') return '';
|
|
156
|
+
return url.toString();
|
|
157
|
+
} catch {
|
|
158
|
+
return '';
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
143
162
|
// ─── SVG Icons ────────────────────────────────────────────────────────────
|
|
144
163
|
function svg(paths, size = 16, extra = '') {
|
|
145
164
|
return `<svg xmlns="http://www.w3.org/2000/svg" width="${size}" height="${size}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" ${extra}>${paths}</svg>`;
|
|
@@ -200,17 +219,18 @@ const LANGS = [
|
|
|
200
219
|
];
|
|
201
220
|
|
|
202
221
|
const AI_PRESETS = [
|
|
203
|
-
{ id: '
|
|
204
|
-
{ id: '
|
|
205
|
-
{ id: '
|
|
206
|
-
{ id: '
|
|
207
|
-
{ id: '
|
|
208
|
-
{ id: '
|
|
209
|
-
{ id: '
|
|
222
|
+
{ id: 'ollama', label: 'LocalHost — Ollama', api_base: 'http://127.0.0.1:11434/v1', keyRequired: false, defaultModel: 'qwen3.5:4b' },
|
|
223
|
+
{ id: 'lmstudio', label: 'LocalHost — LM Studio', api_base: 'http://127.0.0.1:1234/v1', keyRequired: false, defaultModel: '' },
|
|
224
|
+
{ id: 'llamacpp', label: 'LocalHost — llama.cpp', api_base: 'http://127.0.0.1:8080/v1', keyRequired: false, defaultModel: '' },
|
|
225
|
+
{ id: 'chutes', label: 'Chutes.ai TEE', api_base: 'https://llm.chutes.ai/v1', keyRequired: true, defaultModel: 'deepseek-ai/DeepSeek-V3.2-TEE' },
|
|
226
|
+
{ id: 'anthropic',label: 'Anthropic', api_base: 'https://api.anthropic.com/v1', keyRequired: true, defaultModel: 'claude-3-5-haiku-latest' },
|
|
227
|
+
{ id: 'openai', label: 'OpenAI', api_base: 'https://api.openai.com/v1', keyRequired: true, defaultModel: 'gpt-4o-mini' },
|
|
228
|
+
{ id: 'openrouter', label: 'OpenRoute/OpenRouter', api_base: 'https://openrouter.ai/api/v1', keyRequired: true, defaultModel: 'openai/gpt-4o-mini' },
|
|
210
229
|
];
|
|
211
230
|
|
|
212
231
|
const ENGINE_GROUPS = [
|
|
213
232
|
{ label: 'Web Core', items: ['duckduckgo', 'wikipedia', 'brave', 'startpage', 'qwant', 'mojeek', 'bing', 'google', 'yahoo'] },
|
|
233
|
+
{ label: 'Uncensored', items: ['yandex', 'marginalia', 'ahmia'] },
|
|
214
234
|
{ label: 'Code & Dev', items: ['github', 'github-api', 'hackernews', 'reddit'] },
|
|
215
235
|
{ label: 'Media', items: ['youtube', 'sepiasearch'] },
|
|
216
236
|
{ label: 'Research', items: ['wikidata', 'crossref', 'openalex', 'openlibrary'] },
|
|
@@ -419,37 +439,96 @@ function renderAiPanel() {
|
|
|
419
439
|
if (!isActive) { panel.style.display = 'none'; return; }
|
|
420
440
|
panel.style.display = 'block';
|
|
421
441
|
|
|
422
|
-
const
|
|
423
|
-
const
|
|
442
|
+
const isLoading = state.aiStatus === 'loading' || state.aiStatus === 'streaming';
|
|
443
|
+
const isDone = state.aiStatus === 'done';
|
|
444
|
+
const isError = state.aiStatus === 'error';
|
|
445
|
+
const dotsClass = isDone ? 'done' : isError ? 'error' : '';
|
|
446
|
+
const statusText = state.aiStatus === 'loading' ? 'Thinking…'
|
|
447
|
+
: state.aiStatus === 'streaming' ? 'Generating…'
|
|
448
|
+
: isDone ? 'AI Summary' : 'Error';
|
|
424
449
|
|
|
450
|
+
// Dots
|
|
425
451
|
const dotsEl = el('div', { className: 'ai-dots' });
|
|
426
452
|
['violet', 'indigo', 'dim'].forEach(c => {
|
|
427
453
|
dotsEl.append(el('div', { className: `ai-dot ${dotsClass || c}` }));
|
|
428
454
|
});
|
|
429
455
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
el('span', { className: 'panel-label' }, statusText),
|
|
434
|
-
state.aiMeta?.model ? el('span', { style: 'font-size:10px;color:var(--text3);margin-left:6px' }, state.aiMeta.model) : null,
|
|
435
|
-
),
|
|
436
|
-
);
|
|
456
|
+
// Latency
|
|
457
|
+
const latMs = state.aiLatencyMs;
|
|
458
|
+
const latLabel = latMs != null ? (latMs < 1000 ? `${latMs}ms` : `${(latMs / 1000).toFixed(1)}s`) : null;
|
|
437
459
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
460
|
+
// Header
|
|
461
|
+
const headerLeft = el('div', { className: 'panel-header-left' },
|
|
462
|
+
dotsEl,
|
|
463
|
+
el('span', { className: 'panel-label' }, statusText),
|
|
464
|
+
state.aiMeta?.model ? el('span', { className: 'ai-model-label' }, state.aiMeta.model) : null,
|
|
465
|
+
latLabel ? el('span', { className: 'ai-latency-label' }, `· ${latLabel}`) : null,
|
|
466
|
+
);
|
|
467
|
+
const chevronPath = state.aiExpanded ? '<polyline points="18 15 12 9 6 15"/>' : '<polyline points="6 9 12 15 18 9"/>';
|
|
468
|
+
const expandBtn = el('button', { className: 'ai-expand-btn', type: 'button', title: state.aiExpanded ? 'Collapse' : 'Expand' });
|
|
469
|
+
expandBtn.innerHTML = svg(chevronPath, 14);
|
|
470
|
+
expandBtn.onclick = () => { state.aiExpanded = !state.aiExpanded; renderAiPanel(); };
|
|
471
|
+
const header = el('div', { className: 'panel-header' }, headerLeft, expandBtn);
|
|
472
|
+
|
|
473
|
+
// Progress bar
|
|
474
|
+
const showProgress = isLoading && state.aiProgress > 0;
|
|
475
|
+
const progressEl = showProgress ? el('div', { className: 'ai-progress-wrap' },
|
|
476
|
+
el('div', { className: 'ai-progress-bar', style: `width:${state.aiProgress}%` }),
|
|
477
|
+
) : null;
|
|
478
|
+
|
|
479
|
+
// Steps
|
|
480
|
+
const showSteps = isLoading && state.aiSteps.length > 0;
|
|
481
|
+
const stepsEl = showSteps ? el('div', { className: 'ai-steps' },
|
|
482
|
+
...state.aiSteps.slice(-4).map(s => el('div', { className: 'ai-step' }, s)),
|
|
483
|
+
) : null;
|
|
484
|
+
|
|
485
|
+
// Content
|
|
486
|
+
const contentEl = el('div', { className: `ai-content${!state.aiExpanded && !isLoading ? ' ai-content-collapsed' : ''}` });
|
|
487
|
+
if (isError) {
|
|
488
|
+
contentEl.style.color = '#f87171';
|
|
489
|
+
contentEl.textContent = state.aiError;
|
|
442
490
|
} else {
|
|
443
|
-
|
|
491
|
+
contentEl.textContent = state.aiSummary;
|
|
444
492
|
}
|
|
445
493
|
|
|
446
|
-
|
|
447
|
-
|
|
494
|
+
// Sources (shown when expanded + done)
|
|
495
|
+
const showSources = isDone && state.aiExpanded && state.aiSources.length > 0;
|
|
496
|
+
const sourcesEl = showSources ? el('div', { className: 'ai-sources' },
|
|
497
|
+
...state.aiSources.slice(0, 8).map((src, i) => {
|
|
498
|
+
const safeSrc = sanitizeHttpUrl(src);
|
|
499
|
+
if (!safeSrc) return null;
|
|
500
|
+
let label = src;
|
|
501
|
+
try {
|
|
502
|
+
const { hostname, pathname } = new URL(safeSrc);
|
|
503
|
+
const host = hostname.replace(/^www\./, '');
|
|
504
|
+
const segs = pathname.replace(/\/$/, '').split('/').filter(Boolean).slice(0, 2);
|
|
505
|
+
label = segs.length ? `${host} › ${segs.join('/')}` : host;
|
|
506
|
+
} catch {}
|
|
507
|
+
const a = el('a', { className: 'ai-source-pill', href: safeSrc, target: '_blank', rel: 'noopener noreferrer' }, `[${i + 1}] ${label}`);
|
|
508
|
+
return a;
|
|
509
|
+
}),
|
|
510
|
+
) : null;
|
|
511
|
+
|
|
512
|
+
// Footer: retry + expand/collapse
|
|
513
|
+
const retryBtn = el('button', { className: 'ai-retry-btn', type: 'button' }, 'Retry');
|
|
514
|
+
retryBtn.onclick = () => {
|
|
515
|
+
if (state.aiLastQuery) startAiSummary(state.aiLastQuery, state.aiLastResults || [], state.aiLastLang || 'en-US');
|
|
516
|
+
};
|
|
517
|
+
const toggleBtn = el('button', { className: 'ai-toggle-btn', type: 'button' },
|
|
518
|
+
state.aiExpanded ? 'Show less' : 'Show more',
|
|
519
|
+
);
|
|
520
|
+
toggleBtn.onclick = () => { state.aiExpanded = !state.aiExpanded; renderAiPanel(); };
|
|
521
|
+
const footer = el('div', { className: 'ai-footer' }, retryBtn, toggleBtn);
|
|
448
522
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
if (
|
|
523
|
+
panel.innerHTML = '';
|
|
524
|
+
panel.append(header);
|
|
525
|
+
if (progressEl) panel.append(progressEl);
|
|
526
|
+
if (stepsEl) panel.append(stepsEl);
|
|
527
|
+
panel.append(contentEl);
|
|
528
|
+
if (sourcesEl) panel.append(sourcesEl);
|
|
529
|
+
panel.append(footer);
|
|
530
|
+
|
|
531
|
+
if (state.aiStatus === 'streaming' && state.aiSummary.length < 60) {
|
|
453
532
|
panel.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
|
|
454
533
|
}
|
|
455
534
|
}
|
|
@@ -793,6 +872,15 @@ async function doSearch(q, category = state.category) {
|
|
|
793
872
|
async function startAiSummary(query, results, lang) {
|
|
794
873
|
state.aiStatus = 'loading';
|
|
795
874
|
state.aiSummary = '';
|
|
875
|
+
state.aiError = null;
|
|
876
|
+
state.aiProgress = 0;
|
|
877
|
+
state.aiSteps = [];
|
|
878
|
+
state.aiSources = [];
|
|
879
|
+
state.aiStartTime = Date.now();
|
|
880
|
+
state.aiLatencyMs = null;
|
|
881
|
+
state.aiLastQuery = query;
|
|
882
|
+
state.aiLastResults = results;
|
|
883
|
+
state.aiLastLang = lang;
|
|
796
884
|
renderAiPanel();
|
|
797
885
|
|
|
798
886
|
try {
|
|
@@ -819,13 +907,22 @@ async function startAiSummary(query, results, lang) {
|
|
|
819
907
|
if (!line.startsWith('data: ')) continue;
|
|
820
908
|
try {
|
|
821
909
|
const d = JSON.parse(line.slice(6));
|
|
822
|
-
if (d.chunk)
|
|
823
|
-
else if (d.
|
|
824
|
-
else if (d.
|
|
910
|
+
if (d.chunk !== undefined) { state.aiSummary += d.chunk; renderAiPanel(); }
|
|
911
|
+
else if (d.progress !== undefined) { state.aiProgress = d.progress; renderAiPanel(); }
|
|
912
|
+
else if (d.step) { state.aiSteps = [...state.aiSteps.slice(-3), d.step]; renderAiPanel(); }
|
|
913
|
+
else if (d.error) { state.aiStatus = 'error'; state.aiError = d.message || d.error; renderAiPanel(); }
|
|
914
|
+
else if (d.model != null || d.sites != null) {
|
|
915
|
+
state.aiStatus = 'done';
|
|
916
|
+
state.aiProgress = 100;
|
|
917
|
+
state.aiSources = Array.isArray(d.sites) ? d.sites.map(sanitizeHttpUrl).filter(Boolean) : [];
|
|
918
|
+
state.aiMeta = { fetchedCount: d.fetchedCount, model: d.model };
|
|
919
|
+
state.aiLatencyMs = Date.now() - state.aiStartTime;
|
|
920
|
+
renderAiPanel();
|
|
921
|
+
}
|
|
825
922
|
} catch { /* ignore */ }
|
|
826
923
|
}
|
|
827
924
|
}
|
|
828
|
-
if (state.aiStatus === 'streaming') { state.aiStatus = 'done'; renderAiPanel(); }
|
|
925
|
+
if (state.aiStatus === 'streaming') { state.aiStatus = 'done'; state.aiLatencyMs = Date.now() - state.aiStartTime; renderAiPanel(); }
|
|
829
926
|
} catch (e) {
|
|
830
927
|
state.aiStatus = 'error';
|
|
831
928
|
state.aiError = e.message;
|
|
@@ -1278,7 +1375,7 @@ async function renderSettings() {
|
|
|
1278
1375
|
el('label', { className: 'form-label', for: 'ai-base' }, 'API Endpoint'),
|
|
1279
1376
|
makeInput('ai-base', ai.api_base, 'http://localhost:11434/v1'),
|
|
1280
1377
|
el('div', { className: 'form-hint' },
|
|
1281
|
-
'Included presets:
|
|
1378
|
+
'Included presets: LocalHost (Ollama · LM Studio · llama.cpp) · Chutes.ai TEE · Anthropic · OpenAI · OpenRoute/OpenRouter',
|
|
1282
1379
|
el('br', {}),
|
|
1283
1380
|
'You can also keep custom OpenAI-compatible endpoints.',
|
|
1284
1381
|
),
|
|
@@ -1395,7 +1492,7 @@ async function renderSettings() {
|
|
|
1395
1492
|
// Server info
|
|
1396
1493
|
el('div', { className: 'settings-section' },
|
|
1397
1494
|
el('h2', {}, 'Server Info'),
|
|
1398
|
-
el('div', { className: 'info-row' }, el('span', { className: 'info-key' }, 'Version'), el('span', { className: 'info-val' }, health?.version || '0.3.
|
|
1495
|
+
el('div', { className: 'info-row' }, el('span', { className: 'info-key' }, 'Version'), el('span', { className: 'info-val' }, health?.version || '0.3.3')),
|
|
1399
1496
|
el('div', { className: 'info-row' }, el('span', { className: 'info-key' }, 'Active providers'), el('span', { className: 'info-val' }, (health?.providers || []).join(', ') || 'none')),
|
|
1400
1497
|
el('div', { className: 'info-row' }, el('span', { className: 'info-key' }, 'AI'), el('span', { className: 'info-val' }, health?.ai_enabled ? `enabled (${health.ai_model})` : 'not configured')),
|
|
1401
1498
|
el('div', { className: 'info-row' }, el('span', { className: 'info-key' }, 'GitHub'), el('a', { href: 'https://github.com/DioNanos/termsearch', target: '_blank', className: 'info-val', style: 'color:var(--link)' }, 'DioNanos/termsearch')),
|
package/frontend/dist/style.css
CHANGED
|
@@ -565,6 +565,22 @@ a:hover { color: var(--link-h); }
|
|
|
565
565
|
.ai-dot.dim { background: #5b21b6; animation: pulse 1.4s ease-in-out 300ms infinite; }
|
|
566
566
|
.ai-dot.done { background: #34d399; animation: none; }
|
|
567
567
|
.ai-dot.error { background: #f87171; animation: none; }
|
|
568
|
+
.panel-ai .panel-header { cursor: default; }
|
|
569
|
+
.ai-model-label { font-size: 10px; color: var(--text3); margin-left: 6px; }
|
|
570
|
+
.ai-latency-label{ font-size: 10px; color: #4b5563; margin-left: 4px; }
|
|
571
|
+
.ai-expand-btn { background: none; border: none; color: var(--text3); cursor: pointer; padding: 0; margin-left: auto; display: flex; align-items: center; }
|
|
572
|
+
.ai-expand-btn:hover { color: var(--text2); }
|
|
573
|
+
|
|
574
|
+
/* Progress bar */
|
|
575
|
+
.ai-progress-wrap { height: 3px; background: rgba(255,255,255,0.06); border-radius: 99px; margin: 8px 0; overflow: hidden; }
|
|
576
|
+
.ai-progress-bar { height: 100%; background: linear-gradient(90deg, #7c3aed, #6366f1); border-radius: 99px; transition: width 0.4s ease; }
|
|
577
|
+
|
|
578
|
+
/* Steps */
|
|
579
|
+
.ai-steps { margin: 6px 0 4px; display: flex; flex-direction: column; gap: 3px; }
|
|
580
|
+
.ai-step { font-size: 10px; color: #4b5563; display: flex; align-items: center; gap: 6px; }
|
|
581
|
+
.ai-step::before { content: ''; display: inline-block; width: 4px; height: 4px; border-radius: 50%; background: #6366f1; flex-shrink: 0; }
|
|
582
|
+
|
|
583
|
+
/* Content */
|
|
568
584
|
.ai-content {
|
|
569
585
|
font-size: 14px;
|
|
570
586
|
color: #d1d5db;
|
|
@@ -575,9 +591,32 @@ a:hover { color: var(--link-h); }
|
|
|
575
591
|
border: 1px solid rgba(129,140,248,0.22);
|
|
576
592
|
border-radius: var(--radius-sm);
|
|
577
593
|
padding: 10px;
|
|
578
|
-
|
|
579
|
-
overflow: auto;
|
|
594
|
+
overflow: hidden;
|
|
580
595
|
}
|
|
596
|
+
.ai-content.ai-content-collapsed {
|
|
597
|
+
display: -webkit-box;
|
|
598
|
+
-webkit-line-clamp: 4;
|
|
599
|
+
-webkit-box-orient: vertical;
|
|
600
|
+
overflow: hidden;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
/* Sources */
|
|
604
|
+
.ai-sources { display: flex; flex-wrap: wrap; gap: 6px; margin-top: 10px; }
|
|
605
|
+
.ai-source-pill {
|
|
606
|
+
font-size: 10px; color: #6b7280;
|
|
607
|
+
border: 1px solid #1f2937; border-radius: 99px;
|
|
608
|
+
padding: 2px 8px; text-decoration: none;
|
|
609
|
+
white-space: nowrap; overflow: hidden; max-width: 200px; text-overflow: ellipsis;
|
|
610
|
+
transition: color 0.15s, border-color 0.15s;
|
|
611
|
+
}
|
|
612
|
+
.ai-source-pill:hover { color: #a78bfa; border-color: #4c1d95; }
|
|
613
|
+
|
|
614
|
+
/* Footer */
|
|
615
|
+
.ai-footer { display: flex; align-items: center; justify-content: space-between; margin-top: 10px; }
|
|
616
|
+
.ai-retry-btn { font-size: 12px; color: #a78bfa; background: none; border: none; cursor: pointer; padding: 0; }
|
|
617
|
+
.ai-retry-btn:hover { color: #c4b5fd; }
|
|
618
|
+
.ai-toggle-btn { font-size: 11px; color: #4b5563; background: none; border: none; cursor: pointer; padding: 0; }
|
|
619
|
+
.ai-toggle-btn:hover { color: #9ca3af; }
|
|
581
620
|
.ai-meta { font-size: 11px; color: var(--text3); margin-top: 8px; }
|
|
582
621
|
|
|
583
622
|
/* Profiler panel */
|
package/package.json
CHANGED
package/src/ai/orchestrator.js
CHANGED
|
@@ -45,8 +45,14 @@ export async function generateSummary({
|
|
|
45
45
|
results = [],
|
|
46
46
|
session = [],
|
|
47
47
|
onToken = null,
|
|
48
|
+
onProgress = null,
|
|
49
|
+
onStep = null,
|
|
48
50
|
docCache = null,
|
|
49
51
|
}, aiConfig) {
|
|
52
|
+
const emit = (progress, step) => {
|
|
53
|
+
if (onProgress) onProgress(progress);
|
|
54
|
+
if (step && onStep) onStep(step);
|
|
55
|
+
};
|
|
50
56
|
if (!aiConfig?.enabled || !aiConfig?.api_base || !aiConfig?.model) {
|
|
51
57
|
return { error: 'ai_not_configured', message: 'AI not configured. Add endpoint in Settings.' };
|
|
52
58
|
}
|
|
@@ -61,6 +67,7 @@ export async function generateSummary({
|
|
|
61
67
|
|
|
62
68
|
try {
|
|
63
69
|
// Phase 1: AI decides which URLs to fetch
|
|
70
|
+
emit(5, 'Analyzing query…');
|
|
64
71
|
const phase1Prompt = buildFetchDecisionPrompt({
|
|
65
72
|
query,
|
|
66
73
|
results,
|
|
@@ -83,6 +90,15 @@ export async function generateSummary({
|
|
|
83
90
|
const allResultUrls = results.slice(0, 10).map((r) => r.url).filter(Boolean);
|
|
84
91
|
const { urls: urlsToFetch } = parseFetchDecision(phase1Result?.content, allResultUrls);
|
|
85
92
|
|
|
93
|
+
// Emit step per URL before batch fetch
|
|
94
|
+
emit(15, `Fetching ${urlsToFetch.length || allResultUrls.slice(0, 2).length} source(s)…`);
|
|
95
|
+
urlsToFetch.slice(0, 6).forEach((url) => {
|
|
96
|
+
try {
|
|
97
|
+
const host = new URL(url).hostname.replace(/^www\./, '');
|
|
98
|
+
if (onStep) onStep(`Reading: ${host}`);
|
|
99
|
+
} catch { if (onStep) onStep('Reading source…'); }
|
|
100
|
+
});
|
|
101
|
+
|
|
86
102
|
// Fetch the selected URLs
|
|
87
103
|
let documents = [];
|
|
88
104
|
if (urlsToFetch.length > 0) {
|
|
@@ -99,6 +115,8 @@ export async function generateSummary({
|
|
|
99
115
|
documents = fallback.filter((d) => d.status === 'ok' && d.content);
|
|
100
116
|
}
|
|
101
117
|
|
|
118
|
+
emit(60, `Synthesizing from ${documents.length} page(s)…`);
|
|
119
|
+
|
|
102
120
|
// Phase 2: synthesize summary
|
|
103
121
|
const phase2Prompt = buildAgenticSummaryPrompt({ query, lang, results, documents, session });
|
|
104
122
|
|
|
@@ -107,6 +125,7 @@ export async function generateSummary({
|
|
|
107
125
|
|
|
108
126
|
if (typeof onToken === 'function') {
|
|
109
127
|
// Streaming mode
|
|
128
|
+
emit(65, 'Generating summary…');
|
|
110
129
|
const streamResult = await stream(phase2Prompt, onToken, {
|
|
111
130
|
...ai,
|
|
112
131
|
systemPrompt: 'You are a search assistant. Write your answer directly. Do not include reasoning or thinking.',
|
package/src/api/routes.js
CHANGED
|
@@ -11,7 +11,7 @@ import { detectProfileTarget, scanProfile, PROFILER_PLATFORMS } from '../profile
|
|
|
11
11
|
import { fetchBlueskyPosts, fetchBlueskyActors, fetchGdeltArticles } from '../social/search.js';
|
|
12
12
|
import { scrapeTPB, scrape1337x, extractMagnetFromUrl } from '../torrent/scrapers.js';
|
|
13
13
|
|
|
14
|
-
const APP_VERSION = '0.3.
|
|
14
|
+
const APP_VERSION = '0.3.3';
|
|
15
15
|
const ALLOWED_CATEGORIES = new Set(['web', 'images', 'news']);
|
|
16
16
|
const ALLOWED_LANGS = new Set(['auto', 'it-IT', 'en-US', 'es-ES', 'fr-FR', 'de-DE', 'pt-PT', 'ru-RU', 'zh-CN', 'ja-JP']);
|
|
17
17
|
|
|
@@ -409,7 +409,9 @@ export function createRouter(config, rateLimiters) {
|
|
|
409
409
|
const result = await generateSummary(
|
|
410
410
|
{
|
|
411
411
|
query, lang, results, session,
|
|
412
|
-
onToken:
|
|
412
|
+
onToken: (chunk) => sendEvent('token', { chunk }),
|
|
413
|
+
onProgress: (p) => sendEvent('progress', { progress: p }),
|
|
414
|
+
onStep: (text) => sendEvent('step', { step: text }),
|
|
413
415
|
docCache: getDocCache(),
|
|
414
416
|
},
|
|
415
417
|
cfg.ai
|
|
@@ -450,7 +452,7 @@ export function createRouter(config, rateLimiters) {
|
|
|
450
452
|
return sendJson(res, 400, { error: 'invalid_body' });
|
|
451
453
|
}
|
|
452
454
|
// Whitelist accepted config keys to prevent unexpected writes
|
|
453
|
-
const allowed = ['port', 'host', 'ai', 'brave', 'mojeek', 'searxng', 'search', 'rate_limit'];
|
|
455
|
+
const allowed = ['port', 'host', 'ai', 'brave', 'mojeek', 'yandex', 'ahmia', 'marginalia', 'searxng', 'search', 'rate_limit'];
|
|
454
456
|
const filtered = {};
|
|
455
457
|
for (const key of allowed) {
|
|
456
458
|
if (key in body) filtered[key] = body[key];
|
package/src/config/defaults.js
CHANGED
|
@@ -49,6 +49,20 @@ export const DEFAULTS = {
|
|
|
49
49
|
api_base: 'https://api.mojeek.com',
|
|
50
50
|
},
|
|
51
51
|
|
|
52
|
+
yandex: {
|
|
53
|
+
enabled: true,
|
|
54
|
+
},
|
|
55
|
+
|
|
56
|
+
ahmia: {
|
|
57
|
+
enabled: true,
|
|
58
|
+
},
|
|
59
|
+
|
|
60
|
+
marginalia: {
|
|
61
|
+
enabled: true,
|
|
62
|
+
api_key: 'public',
|
|
63
|
+
api_base: 'https://api2.marginalia-search.com',
|
|
64
|
+
},
|
|
65
|
+
|
|
52
66
|
searxng: {
|
|
53
67
|
enabled: false,
|
|
54
68
|
url: '', // e.g. http://localhost:9090
|
package/src/config/manager.js
CHANGED
|
@@ -91,6 +91,12 @@ class ConfigManager {
|
|
|
91
91
|
if (process.env.TERMSEARCH_MOJEEK_API_KEY) {
|
|
92
92
|
overrides.mojeek = { api_key: process.env.TERMSEARCH_MOJEEK_API_KEY, enabled: true };
|
|
93
93
|
}
|
|
94
|
+
if (process.env.TERMSEARCH_MARGINALIA_API_KEY) {
|
|
95
|
+
overrides.marginalia = { api_key: process.env.TERMSEARCH_MARGINALIA_API_KEY, enabled: true };
|
|
96
|
+
}
|
|
97
|
+
if (process.env.TERMSEARCH_MARGINALIA_API_BASE) {
|
|
98
|
+
overrides.marginalia = { ...(overrides.marginalia || {}), api_base: process.env.TERMSEARCH_MARGINALIA_API_BASE, enabled: true };
|
|
99
|
+
}
|
|
94
100
|
if (process.env.TERMSEARCH_SEARXNG_URL) {
|
|
95
101
|
overrides.searxng = { url: process.env.TERMSEARCH_SEARXNG_URL, enabled: true };
|
|
96
102
|
}
|
|
@@ -113,6 +119,9 @@ class ConfigManager {
|
|
|
113
119
|
if (safePartial?.mojeek?.api_key && !safePartial?.mojeek?.hasOwnProperty('enabled')) {
|
|
114
120
|
this._config.mojeek.enabled = Boolean(this._config.mojeek.api_key);
|
|
115
121
|
}
|
|
122
|
+
if (safePartial?.marginalia?.api_key && !safePartial?.marginalia?.hasOwnProperty('enabled')) {
|
|
123
|
+
this._config.marginalia.enabled = Boolean(this._config.marginalia.api_key);
|
|
124
|
+
}
|
|
116
125
|
if (safePartial?.searxng?.url && !safePartial?.searxng?.hasOwnProperty('enabled')) {
|
|
117
126
|
this._config.searxng.enabled = Boolean(this._config.searxng.url);
|
|
118
127
|
}
|
|
@@ -120,7 +129,7 @@ class ConfigManager {
|
|
|
120
129
|
}
|
|
121
130
|
|
|
122
131
|
_sanitizeSensitiveKeys(partial) {
|
|
123
|
-
const sections = ['ai', 'brave', 'mojeek'];
|
|
132
|
+
const sections = ['ai', 'brave', 'mojeek', 'marginalia'];
|
|
124
133
|
for (const section of sections) {
|
|
125
134
|
const block = partial?.[section];
|
|
126
135
|
if (!block || typeof block !== 'object' || !Object.prototype.hasOwnProperty.call(block, 'api_key')) continue;
|
|
@@ -173,6 +182,7 @@ class ConfigManager {
|
|
|
173
182
|
ai: { ...c.ai, api_key: maskKey(c.ai.api_key) },
|
|
174
183
|
brave: { ...c.brave, api_key: maskKey(c.brave.api_key) },
|
|
175
184
|
mojeek: { ...c.mojeek, api_key: maskKey(c.mojeek.api_key) },
|
|
185
|
+
marginalia: { ...c.marginalia, api_key: maskKey(c.marginalia.api_key) },
|
|
176
186
|
};
|
|
177
187
|
}
|
|
178
188
|
}
|
package/src/search/engine.js
CHANGED
|
@@ -9,6 +9,9 @@ import * as brave from './providers/brave.js';
|
|
|
9
9
|
import * as mojeek from './providers/mojeek.js';
|
|
10
10
|
import * as searxng from './providers/searxng.js';
|
|
11
11
|
import * as github from './providers/github.js';
|
|
12
|
+
import * as yandex from './providers/yandex.js';
|
|
13
|
+
import * as ahmia from './providers/ahmia.js';
|
|
14
|
+
import * as marginalia from './providers/marginalia.js';
|
|
12
15
|
|
|
13
16
|
let _searchCache = null;
|
|
14
17
|
let _docCache = null;
|
|
@@ -49,6 +52,10 @@ export const ALLOWED_ENGINES = new Set([
|
|
|
49
52
|
'1337x',
|
|
50
53
|
'piratebay',
|
|
51
54
|
'nyaa',
|
|
55
|
+
// uncensored / alternative index engines
|
|
56
|
+
'yandex',
|
|
57
|
+
'ahmia',
|
|
58
|
+
'marginalia',
|
|
52
59
|
// local aliases for direct providers
|
|
53
60
|
'ddg',
|
|
54
61
|
'wiki',
|
|
@@ -96,6 +103,24 @@ const PROVIDER_REGISTRY = {
|
|
|
96
103
|
run: github.search,
|
|
97
104
|
defaultProvider: false,
|
|
98
105
|
},
|
|
106
|
+
yandex: {
|
|
107
|
+
aliases: new Set(['yandex']),
|
|
108
|
+
enabled: (cfg) => cfg?.yandex?.enabled !== false,
|
|
109
|
+
run: yandex.search,
|
|
110
|
+
defaultProvider: false,
|
|
111
|
+
},
|
|
112
|
+
ahmia: {
|
|
113
|
+
aliases: new Set(['ahmia']),
|
|
114
|
+
enabled: (cfg) => cfg?.ahmia?.enabled !== false,
|
|
115
|
+
run: ahmia.search,
|
|
116
|
+
defaultProvider: false,
|
|
117
|
+
},
|
|
118
|
+
marginalia: {
|
|
119
|
+
aliases: new Set(['marginalia']),
|
|
120
|
+
enabled: (cfg) => cfg?.marginalia?.enabled !== false,
|
|
121
|
+
run: marginalia.search,
|
|
122
|
+
defaultProvider: false,
|
|
123
|
+
},
|
|
99
124
|
};
|
|
100
125
|
|
|
101
126
|
export function initCaches(dataDir, cfg) {
|
|
@@ -162,10 +187,7 @@ function resolveProviderPlan(cfg, requestedEngines = [], category = 'web') {
|
|
|
162
187
|
|
|
163
188
|
const providers = [...explicitProviders].filter((name) => enabledProviders.includes(name));
|
|
164
189
|
if (providers.length === 0) {
|
|
165
|
-
return {
|
|
166
|
-
providers: defaultProviders,
|
|
167
|
-
searxEngines: category === 'web' && defaultProviders.includes('searxng') ? CURATED_WEB_ENGINES.slice() : [],
|
|
168
|
-
};
|
|
190
|
+
return { providers: [], searxEngines: [] };
|
|
169
191
|
}
|
|
170
192
|
|
|
171
193
|
return { providers, searxEngines };
|
|
@@ -301,6 +323,7 @@ async function runProviderDetailed(name, args) {
|
|
|
301
323
|
const responded = new Set();
|
|
302
324
|
const failed = new Set();
|
|
303
325
|
const failedDetails = [];
|
|
326
|
+
const skipHealth = new Set();
|
|
304
327
|
|
|
305
328
|
if (name === 'searxng') {
|
|
306
329
|
const unresponsive = Array.isArray(meta.unresponsive) ? meta.unresponsive.map((engine) => normalizeEngineName(engine)).filter(Boolean) : [];
|
|
@@ -327,9 +350,15 @@ async function runProviderDetailed(name, args) {
|
|
|
327
350
|
failedDetails.push({ engine: name, reason: String(meta.error) });
|
|
328
351
|
} else {
|
|
329
352
|
responded.add(name);
|
|
353
|
+
if (results.length === 0 || meta.skipHealth === true || meta.empty === true) {
|
|
354
|
+
skipHealth.add(name);
|
|
355
|
+
}
|
|
330
356
|
}
|
|
331
357
|
|
|
332
|
-
for (const engine of responded)
|
|
358
|
+
for (const engine of responded) {
|
|
359
|
+
if (skipHealth.has(engine)) continue;
|
|
360
|
+
recordEngineOutcome(engine, true);
|
|
361
|
+
}
|
|
333
362
|
for (const detail of failedDetails) recordEngineOutcome(detail.engine, false, detail.reason);
|
|
334
363
|
|
|
335
364
|
return {
|
|
@@ -558,5 +587,8 @@ export function getEnabledProviders(cfg) {
|
|
|
558
587
|
if (cfg.mojeek?.enabled && cfg.mojeek?.api_key) providers.push('mojeek');
|
|
559
588
|
if (cfg.searxng?.enabled && cfg.searxng?.url) providers.push('searxng');
|
|
560
589
|
providers.push('github-api');
|
|
590
|
+
if (cfg?.yandex?.enabled !== false) providers.push('yandex');
|
|
591
|
+
if (cfg?.ahmia?.enabled !== false) providers.push('ahmia');
|
|
592
|
+
if (cfg?.marginalia?.enabled !== false) providers.push('marginalia');
|
|
561
593
|
return providers;
|
|
562
594
|
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
// Ahmia.fi — clearnet index of Tor hidden services (.onion)
|
|
2
|
+
// No API key required — results include .onion URLs (accessible via Tor Browser)
|
|
3
|
+
|
|
4
|
+
const AHMIA_ENDPOINT = 'https://ahmia.fi/search/';
|
|
5
|
+
const UA = 'Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0';
|
|
6
|
+
|
|
7
|
+
function parseAhmia(html) {
|
|
8
|
+
const results = [];
|
|
9
|
+
// Each result: <h4><a href="...">Title</a></h4> followed by <p>snippet</p>
|
|
10
|
+
const blockRe = /<h4[^>]*>([\s\S]*?)<\/h4>([\s\S]*?)(?=<h4|<\/ol|$)/gi;
|
|
11
|
+
let m;
|
|
12
|
+
while ((m = blockRe.exec(html)) !== null && results.length < 15) {
|
|
13
|
+
const titleBlock = m[1];
|
|
14
|
+
const afterBlock = m[2];
|
|
15
|
+
|
|
16
|
+
const aMatch = titleBlock.match(/<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i);
|
|
17
|
+
if (!aMatch) continue;
|
|
18
|
+
|
|
19
|
+
const url = aMatch[1];
|
|
20
|
+
if (!url.startsWith('http') || url.includes('ahmia.fi')) continue;
|
|
21
|
+
|
|
22
|
+
const title = aMatch[2].replace(/<[^>]+>/g, '').trim();
|
|
23
|
+
if (!title) continue;
|
|
24
|
+
|
|
25
|
+
const pMatch = afterBlock.match(/<p[^>]*>([\s\S]*?)<\/p>/i);
|
|
26
|
+
const snippet = pMatch
|
|
27
|
+
? pMatch[1].replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/ /g, ' ').trim().slice(0, 300)
|
|
28
|
+
: '';
|
|
29
|
+
|
|
30
|
+
results.push({ title, url, snippet, engine: 'ahmia', score: 0 });
|
|
31
|
+
}
|
|
32
|
+
return results;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export async function search({ query, page = 1, timeoutMs = 12000 }) {
|
|
36
|
+
const params = new URLSearchParams({ q: query });
|
|
37
|
+
if (page > 1) params.set('page', String(page - 1));
|
|
38
|
+
|
|
39
|
+
const ac = new AbortController();
|
|
40
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
41
|
+
try {
|
|
42
|
+
const r = await fetch(`${AHMIA_ENDPOINT}?${params}`, {
|
|
43
|
+
headers: {
|
|
44
|
+
'User-Agent': UA,
|
|
45
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
46
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
47
|
+
},
|
|
48
|
+
signal: ac.signal,
|
|
49
|
+
});
|
|
50
|
+
clearTimeout(timer);
|
|
51
|
+
if (!r.ok) return { results: [], _meta: { error: `ahmia_http_${r.status}` } };
|
|
52
|
+
const html = await r.text();
|
|
53
|
+
if (html.length < 500) return { results: [], _meta: { error: 'ahmia_unexpected_html' } };
|
|
54
|
+
const results = parseAhmia(html);
|
|
55
|
+
if (results.length === 0) return { results: [], _meta: { empty: true, skipHealth: true } };
|
|
56
|
+
return { results, _meta: {} };
|
|
57
|
+
} catch {
|
|
58
|
+
clearTimeout(timer);
|
|
59
|
+
return { results: [], _meta: { error: 'ahmia_unreachable' } };
|
|
60
|
+
}
|
|
61
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// Marginalia Search provider (api2, key-based).
|
|
2
|
+
// Public key works with shared limits; user key can be configured.
|
|
3
|
+
|
|
4
|
+
const DEFAULT_API = 'https://api2.marginalia-search.com';
|
|
5
|
+
|
|
6
|
+
export async function search({ query, page = 1, timeoutMs = 10000, config }) {
|
|
7
|
+
const cfg = config || {};
|
|
8
|
+
const apiBase = String(cfg?.marginalia?.api_base || DEFAULT_API).replace(/\/$/, '');
|
|
9
|
+
const apiKey = String(cfg?.marginalia?.api_key || process.env.TERMSEARCH_MARGINALIA_API_KEY || 'public').trim() || 'public';
|
|
10
|
+
const enabled = cfg?.marginalia?.enabled !== false;
|
|
11
|
+
if (!enabled) return { results: [], _meta: { error: 'marginalia_disabled' } };
|
|
12
|
+
|
|
13
|
+
const params = new URLSearchParams({
|
|
14
|
+
query,
|
|
15
|
+
count: '10',
|
|
16
|
+
page: String(Math.max(1, Number(page) || 1)),
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
const ac = new AbortController();
|
|
20
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
21
|
+
try {
|
|
22
|
+
const r = await fetch(`${apiBase}/search?${params}`, {
|
|
23
|
+
headers: {
|
|
24
|
+
Accept: 'application/json',
|
|
25
|
+
'User-Agent': 'TermSearch/1.0 (personal search)',
|
|
26
|
+
'API-Key': apiKey,
|
|
27
|
+
},
|
|
28
|
+
signal: ac.signal,
|
|
29
|
+
});
|
|
30
|
+
clearTimeout(timer);
|
|
31
|
+
if (!r.ok) return { results: [], _meta: { error: `marginalia_http_${r.status}` } };
|
|
32
|
+
const data = await r.json();
|
|
33
|
+
const list = Array.isArray(data.results)
|
|
34
|
+
? data.results
|
|
35
|
+
: (Array.isArray(data.result) ? data.result : []);
|
|
36
|
+
const results = list.slice(0, 15).map((item) => ({
|
|
37
|
+
title: String(item.title || item.url || '').trim(),
|
|
38
|
+
url: String(item.url || '').trim(),
|
|
39
|
+
snippet: String(item.description || item.snippet || '').trim(),
|
|
40
|
+
engine: 'marginalia',
|
|
41
|
+
score: 0,
|
|
42
|
+
})).filter((r) => r.url.startsWith('http'));
|
|
43
|
+
if (results.length === 0) return { results: [], _meta: { empty: true, skipHealth: true } };
|
|
44
|
+
return { results, _meta: {} };
|
|
45
|
+
} catch {
|
|
46
|
+
clearTimeout(timer);
|
|
47
|
+
return { results: [], _meta: { error: 'marginalia_unreachable' } };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
// Yandex HTML scraper — no API key required
|
|
2
|
+
// Different political/content filtering than US engines; Russian/global index
|
|
3
|
+
|
|
4
|
+
const YANDEX_ENDPOINT = 'https://yandex.com/search/';
|
|
5
|
+
const UA = 'Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0';
|
|
6
|
+
|
|
7
|
+
function parseYandex(html) {
|
|
8
|
+
const results = [];
|
|
9
|
+
|
|
10
|
+
// Primary: OrganicTitle-Link class (standard desktop layout)
|
|
11
|
+
const titleRe = /<a[^>]+class="[^"]*OrganicTitle-Link[^"]*"[^>]+href="([^"#]+)"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
12
|
+
let m;
|
|
13
|
+
while ((m = titleRe.exec(html)) !== null && results.length < 15) {
|
|
14
|
+
const url = m[1];
|
|
15
|
+
if (!url.startsWith('http') || url.includes('yandex.') || url.includes('ya.ru')) continue;
|
|
16
|
+
const title = m[2].replace(/<[^>]+>/g, '').replace(/&/g, '&').trim();
|
|
17
|
+
if (!title) continue;
|
|
18
|
+
|
|
19
|
+
// Look for snippet in the 3KB after the title match
|
|
20
|
+
const chunk = html.slice(m.index, m.index + 3000);
|
|
21
|
+
const snipM = chunk.match(/class="[^"]*(?:OrganicText|TextContainer|Organic-Text|organic__text)[^"]*"[^>]*>([\s\S]*?)<\/(?:div|span|p)>/i);
|
|
22
|
+
const snippet = snipM
|
|
23
|
+
? snipM[1].replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/ /g, ' ').trim().slice(0, 300)
|
|
24
|
+
: '';
|
|
25
|
+
|
|
26
|
+
results.push({ title, url, snippet, engine: 'yandex', score: 0 });
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return results;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export async function search({ query, lang = 'en-US', page = 1, timeoutMs = 12000 }) {
|
|
33
|
+
const params = new URLSearchParams({
|
|
34
|
+
text: query,
|
|
35
|
+
p: String(Math.max(0, Number(page) - 1)),
|
|
36
|
+
numdoc: '10',
|
|
37
|
+
lr: '10417', // world region
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
const ac = new AbortController();
|
|
41
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
42
|
+
try {
|
|
43
|
+
const r = await fetch(`${YANDEX_ENDPOINT}?${params}`, {
|
|
44
|
+
headers: {
|
|
45
|
+
'User-Agent': UA,
|
|
46
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
47
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
48
|
+
},
|
|
49
|
+
signal: ac.signal,
|
|
50
|
+
});
|
|
51
|
+
clearTimeout(timer);
|
|
52
|
+
if (!r.ok) return { results: [], _meta: { error: `yandex_http_${r.status}` } };
|
|
53
|
+
const html = await r.text();
|
|
54
|
+
// Explicit detection: Yandex can serve anti-bot pages.
|
|
55
|
+
if (html.includes('showcaptcha') || html.includes('robot-captcha')) {
|
|
56
|
+
return { results: [], _meta: { error: 'yandex_captcha' } };
|
|
57
|
+
}
|
|
58
|
+
if (html.length < 2000) {
|
|
59
|
+
return { results: [], _meta: { error: 'yandex_unexpected_html' } };
|
|
60
|
+
}
|
|
61
|
+
const results = parseYandex(html);
|
|
62
|
+
if (results.length === 0) return { results: [], _meta: { empty: true, skipHealth: true } };
|
|
63
|
+
return { results, _meta: {} };
|
|
64
|
+
} catch {
|
|
65
|
+
clearTimeout(timer);
|
|
66
|
+
return { results: [], _meta: { error: 'yandex_unreachable' } };
|
|
67
|
+
}
|
|
68
|
+
}
|