termsearch 0.3.14 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/frontend/dist/app.js +94 -24
- package/package.json +1 -1
- package/src/ai/query.js +42 -9
- package/src/api/routes.js +28 -11
- package/src/search/engine.js +6 -1
- package/src/search/ranking.js +34 -11
- package/src/torrent/scrapers.js +129 -2
package/README.md
CHANGED
|
@@ -27,7 +27,7 @@ Opens `http://localhost:3000`. That's it.
|
|
|
27
27
|
|
|
28
28
|
**Social Profiler** — Paste a GitHub/Bluesky/Reddit/Twitter URL or @handle, get a profile card with stats, top repos, similar accounts.
|
|
29
29
|
|
|
30
|
-
**Torrent Search** — The Pirate Bay
|
|
30
|
+
**Torrent Search** — The Pirate Bay, 1337x, YTS, Nyaa, EZTV, and Torrent Galaxy with magnet links, seeders, file sizes.
|
|
31
31
|
|
|
32
32
|
**Social & News** — Bluesky posts + GDELT articles inline.
|
|
33
33
|
|
|
@@ -84,7 +84,7 @@ Load Models button auto-discovers available models from the endpoint.
|
|
|
84
84
|
|
|
85
85
|
**Self-hosted**: SearXNG (proxy to 40+ engines)
|
|
86
86
|
|
|
87
|
-
**Selectable per-search**: Engine picker icon in the header lets you toggle individual engines, use presets (All /
|
|
87
|
+
**Selectable per-search**: Engine picker icon in the header lets you toggle individual engines, use presets (All / Web / Uncensored / GitHub / Torrent / Social / Research), or pick from groups (Web Core, Uncensored, Code & Dev, Media, Research, Federated, Torrent).
|
|
88
88
|
|
|
89
89
|
## Frontend
|
|
90
90
|
|
|
@@ -121,7 +121,7 @@ src/
|
|
|
121
121
|
fetch/ document fetcher + SSRF guard
|
|
122
122
|
profiler/ social profile scanner (10 platforms)
|
|
123
123
|
social/ Bluesky + GDELT + scrapers
|
|
124
|
-
torrent/ TPB + 1337x + magnet extraction
|
|
124
|
+
torrent/ TPB + 1337x + YTS + Nyaa + EZTV + TGx + magnet extraction
|
|
125
125
|
autostart/ Termux:Boot / systemd / launchd
|
|
126
126
|
api/ routes + middleware
|
|
127
127
|
|
package/frontend/dist/app.js
CHANGED
|
@@ -102,7 +102,7 @@ function route() {
|
|
|
102
102
|
const params = new URLSearchParams(queryIdx >= 0 ? hash.slice(queryIdx + 1) : '');
|
|
103
103
|
const q = params.get('q') || '';
|
|
104
104
|
const cat = (params.get('cat') || 'web').toLowerCase();
|
|
105
|
-
state.category = ['web', 'images', 'news'].includes(cat) ? cat : 'web';
|
|
105
|
+
state.category = ['web', 'images', 'news', 'torrent'].includes(cat) ? cat : 'web';
|
|
106
106
|
if (q && (q !== state.query || state.results.length === 0)) {
|
|
107
107
|
state.query = q;
|
|
108
108
|
doSearch(q);
|
|
@@ -278,13 +278,17 @@ const ENGINE_GROUPS = [
|
|
|
278
278
|
{ label: 'Media', items: ['youtube', 'sepiasearch'] },
|
|
279
279
|
{ label: 'Research', items: ['wikidata', 'crossref', 'openalex', 'openlibrary'] },
|
|
280
280
|
{ label: 'Federated', items: ['mastodon users', 'mastodon hashtags', 'tootfinder', 'lemmy communities', 'lemmy posts'] },
|
|
281
|
-
{ label: 'Torrent', items: ['piratebay', '1337x', 'nyaa'] },
|
|
281
|
+
{ label: 'Torrent', items: ['piratebay', '1337x', 'yts', 'nyaa', 'eztv', 'tgx'] },
|
|
282
282
|
];
|
|
283
283
|
|
|
284
284
|
const ENGINE_PRESETS = [
|
|
285
|
-
{ id: 'all',
|
|
286
|
-
{ id: '
|
|
287
|
-
{ id: '
|
|
285
|
+
{ id: 'all', label: 'All', engines: [] },
|
|
286
|
+
{ id: 'web', label: 'Web', engines: ['duckduckgo', 'startpage', 'qwant', 'ecosia', 'wikipedia'] },
|
|
287
|
+
{ id: 'uncensored', label: 'Uncensored', engines: ['yandex', 'marginalia', 'ahmia', 'duckduckgo'] },
|
|
288
|
+
{ id: 'github', label: 'GitHub', engines: ['github-api', 'github', 'duckduckgo', 'wikipedia'] },
|
|
289
|
+
{ id: 'torrent', label: 'Torrent', engines: ['piratebay', '1337x', 'yts', 'nyaa', 'eztv', 'tgx'] },
|
|
290
|
+
{ id: 'social', label: 'Social', engines: ['reddit', 'hackernews', 'youtube', 'mastodon users', 'lemmy posts'] },
|
|
291
|
+
{ id: 'research', label: 'Research', engines: ['wikipedia', 'wikidata', 'crossref', 'openalex', 'openlibrary'] },
|
|
288
292
|
];
|
|
289
293
|
|
|
290
294
|
// ─── Engine availability (requires config) ────────────────────────────────
|
|
@@ -294,7 +298,7 @@ const SEARXNG_ROUTED = new Set([
|
|
|
294
298
|
'wikidata', 'crossref', 'openalex', 'openlibrary',
|
|
295
299
|
'mastodon users', 'mastodon hashtags', 'tootfinder',
|
|
296
300
|
'lemmy communities', 'lemmy posts',
|
|
297
|
-
|
|
301
|
+
// piratebay, 1337x, nyaa, yts, eztv, tgx — native scrapers, always available
|
|
298
302
|
]);
|
|
299
303
|
|
|
300
304
|
function isEngineAvailable(engine) {
|
|
@@ -304,6 +308,7 @@ function isEngineAvailable(engine) {
|
|
|
304
308
|
if (engine === 'yandex') return cfg.yandex?.enabled !== false;
|
|
305
309
|
if (engine === 'ahmia') return cfg.ahmia?.enabled !== false;
|
|
306
310
|
if (engine === 'marginalia') return cfg.marginalia?.enabled !== false;
|
|
311
|
+
if (['piratebay', '1337x', 'yts', 'nyaa', 'eztv', 'tgx'].includes(engine)) return true;
|
|
307
312
|
if (engine === 'startpage') return (state.config?.startpage?.enabled) !== false;
|
|
308
313
|
if (engine === 'qwant') return (state.config?.qwant?.enabled) !== false;
|
|
309
314
|
if (engine === 'ecosia') return (state.config?.ecosia?.enabled) !== false;
|
|
@@ -362,16 +367,19 @@ function EnginePicker(opts = {}) {
|
|
|
362
367
|
const presetRow = el('div', { className: 'engine-preset-row' });
|
|
363
368
|
ENGINE_PRESETS.forEach((preset) => {
|
|
364
369
|
const isActive = preset.id === 'all' ? isAll
|
|
365
|
-
: preset.engines.length > 0
|
|
370
|
+
: preset.engines.length > 0
|
|
371
|
+
&& preset.engines.every(e => state.selectedEngines.includes(e))
|
|
372
|
+
&& state.selectedEngines.length === preset.engines.length;
|
|
366
373
|
presetRow.append(el('button', {
|
|
367
|
-
className: `btn ${isActive
|
|
374
|
+
className: `btn ${isActive ? 'btn-primary' : ''}`,
|
|
368
375
|
type: 'button',
|
|
369
376
|
onClick: () => {
|
|
370
|
-
// 'all' preset → clear filter (backend uses all configured providers)
|
|
371
377
|
setSelectedEngines(preset.id === 'all' ? [] : preset.engines);
|
|
372
|
-
//
|
|
373
|
-
[...details.querySelectorAll('.engine-chip input
|
|
374
|
-
|
|
378
|
+
// update chip checkboxes via data-engine attribute
|
|
379
|
+
[...details.querySelectorAll('.engine-chip input')].forEach((input) => {
|
|
380
|
+
const engine = input.closest('.engine-chip')?.dataset?.engine || '';
|
|
381
|
+
if (input.disabled) return;
|
|
382
|
+
input.checked = preset.id === 'all' || preset.engines.includes(engine);
|
|
375
383
|
});
|
|
376
384
|
},
|
|
377
385
|
}, preset.label));
|
|
@@ -392,7 +400,7 @@ function EnginePicker(opts = {}) {
|
|
|
392
400
|
const input = el('input', inputAttrs);
|
|
393
401
|
const chipClass = `engine-chip${available ? '' : ' engine-chip-unavailable'}`;
|
|
394
402
|
const title = available ? engine : `${engine} — not configured (Settings)`;
|
|
395
|
-
const label = el('label', { className: chipClass, for: id, title },
|
|
403
|
+
const label = el('label', { className: chipClass, for: id, title, 'data-engine': engine },
|
|
396
404
|
input,
|
|
397
405
|
el('span', {}, engine),
|
|
398
406
|
);
|
|
@@ -409,7 +417,7 @@ function EnginePicker(opts = {}) {
|
|
|
409
417
|
type: 'button',
|
|
410
418
|
onClick: () => {
|
|
411
419
|
const checked = [...details.querySelectorAll('.engine-chip input:not(:disabled):checked')]
|
|
412
|
-
.map((node) => node.closest('.engine-chip')?.
|
|
420
|
+
.map((node) => node.closest('.engine-chip')?.dataset?.engine || '')
|
|
413
421
|
.filter(Boolean);
|
|
414
422
|
const availableAll = ENGINE_GROUPS.flatMap(g => g.items).filter(isEngineAvailable);
|
|
415
423
|
// If all available engines are checked, send [] (no filter)
|
|
@@ -851,6 +859,8 @@ function SocialPanel(results) {
|
|
|
851
859
|
}
|
|
852
860
|
|
|
853
861
|
// ─── Search logic ─────────────────────────────────────────────────────────
|
|
862
|
+
const TORRENT_QUERY_RE = /\b(torrent|magnet|\.iso|\.mkv|\.avi|\.mp4|720p|1080p|2160p|4k|uhd|season|s\d{1,2}e\d{1,2}|xvid|x264|x265|hevc|blu.?ray|webrip|dvdrip|bdrip|hdtv|yify|yts|piratebay|1337x|nyaa|eztv|tgx|download\s+film|download\s+serie|scarica\s+film)\b/i;
|
|
863
|
+
|
|
854
864
|
function isProfileQuery(q) {
|
|
855
865
|
return /^https?:\/\/(github|twitter|x|instagram|bluesky|reddit|linkedin|youtube|tiktok|telegram|facebook)/.test(q)
|
|
856
866
|
|| /^@[a-zA-Z0-9_\.]{2,}$/.test(q)
|
|
@@ -947,7 +957,10 @@ async function runSearchProgressive(q, lang, category, engines = []) {
|
|
|
947
957
|
async function doSearch(q, category = state.category) {
|
|
948
958
|
if (!q.trim()) return;
|
|
949
959
|
addSearchToHistory(q);
|
|
950
|
-
|
|
960
|
+
const VALID_CATS = ['web', 'images', 'news', 'torrent'];
|
|
961
|
+
// Fast torrent intent detection — only auto-switch if user is on the default web tab
|
|
962
|
+
if (TORRENT_QUERY_RE.test(q) && category === 'web') category = 'torrent';
|
|
963
|
+
state.category = VALID_CATS.includes(category) ? category : 'web';
|
|
951
964
|
state.loading = true;
|
|
952
965
|
state.results = [];
|
|
953
966
|
state.aiSummary = '';
|
|
@@ -955,7 +968,7 @@ async function doSearch(q, category = state.category) {
|
|
|
955
968
|
state.aiError = null;
|
|
956
969
|
state.aiMeta = null;
|
|
957
970
|
state.profilerData = null;
|
|
958
|
-
state.profilerLoading =
|
|
971
|
+
state.profilerLoading = false;
|
|
959
972
|
state.torrentData = [];
|
|
960
973
|
state.socialData = [];
|
|
961
974
|
renderApp();
|
|
@@ -963,7 +976,33 @@ async function doSearch(q, category = state.category) {
|
|
|
963
976
|
const lang = getResolvedLang();
|
|
964
977
|
const engines = state.selectedEngines.slice();
|
|
965
978
|
|
|
979
|
+
// ── Torrent category: direct scraper, no stream ──────────────────────────
|
|
980
|
+
if (state.category === 'torrent') {
|
|
981
|
+
try {
|
|
982
|
+
const res = await api('/api/torrent-search', {
|
|
983
|
+
method: 'POST',
|
|
984
|
+
headers: { 'Content-Type': 'application/json' },
|
|
985
|
+
body: JSON.stringify({ q }),
|
|
986
|
+
});
|
|
987
|
+
state.loading = false;
|
|
988
|
+
state.torrentData = res?.results || [];
|
|
989
|
+
state.providers = res?.source ? [res.source] : [];
|
|
990
|
+
} catch {
|
|
991
|
+
state.loading = false;
|
|
992
|
+
state.torrentData = [];
|
|
993
|
+
}
|
|
994
|
+
renderApp();
|
|
995
|
+
return;
|
|
996
|
+
}
|
|
997
|
+
|
|
966
998
|
try {
|
|
999
|
+
// AI query intent — runs in parallel with search, uses result to extend engines
|
|
1000
|
+
const aiQueryPromise = api('/api/ai-query', {
|
|
1001
|
+
method: 'POST',
|
|
1002
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1003
|
+
body: JSON.stringify({ query: q, lang }),
|
|
1004
|
+
}).catch(() => null);
|
|
1005
|
+
|
|
967
1006
|
const searchPromise = runSearchProgressive(q, lang, state.category, engines).catch(async () => {
|
|
968
1007
|
const p = new URLSearchParams({ q, lang, cat: state.category });
|
|
969
1008
|
if (engines.length > 0) p.set('engines', engines.join(','));
|
|
@@ -974,16 +1013,46 @@ async function doSearch(q, category = state.category) {
|
|
|
974
1013
|
api(`/api/social-search?q=${encodeURIComponent(q)}`).catch(() => null),
|
|
975
1014
|
];
|
|
976
1015
|
|
|
977
|
-
if (
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
);
|
|
1016
|
+
if (isProfileQuery(q)) {
|
|
1017
|
+
state.profilerLoading = true;
|
|
1018
|
+
promises.push(api(`/api/profiler?q=${encodeURIComponent(q)}`).catch(() => null));
|
|
981
1019
|
} else {
|
|
982
1020
|
promises.push(Promise.resolve(null));
|
|
983
1021
|
}
|
|
984
1022
|
|
|
985
1023
|
const [searchRes, socialRes, profilerRes] = await Promise.all(promises);
|
|
986
1024
|
|
|
1025
|
+
// Use AI intent routing: if also_search_on has engines the user didn't select, run a second pass
|
|
1026
|
+
const aiQuery = await aiQueryPromise;
|
|
1027
|
+
if (aiQuery?.also_search_on?.length && state.category === 'web' && !engines.length) {
|
|
1028
|
+
// AI suggested category switch (e.g. torrent) — honour it if user didn't pick manually
|
|
1029
|
+
if (aiQuery.category === 'torrent' && state.category !== 'torrent') {
|
|
1030
|
+
state.category = 'torrent';
|
|
1031
|
+
navigate(buildSearchHash(q, 'torrent'));
|
|
1032
|
+
const torRes = await api('/api/torrent-search', {
|
|
1033
|
+
method: 'POST', headers: { 'Content-Type': 'application/json' },
|
|
1034
|
+
body: JSON.stringify({ q }),
|
|
1035
|
+
}).catch(() => null);
|
|
1036
|
+
state.loading = false;
|
|
1037
|
+
state.torrentData = torRes?.results || [];
|
|
1038
|
+
state.providers = torRes?.source ? [torRes.source] : [];
|
|
1039
|
+
renderApp();
|
|
1040
|
+
return;
|
|
1041
|
+
}
|
|
1042
|
+
// Otherwise run a second search with the AI-suggested engines (non-blocking supplement)
|
|
1043
|
+
const p2 = new URLSearchParams({ q, lang, cat: 'web', engines: aiQuery.also_search_on.join(',') });
|
|
1044
|
+
api(`/api/search?${p2}`).then((extra) => {
|
|
1045
|
+
if (!extra?.results?.length) return;
|
|
1046
|
+
const existing = new Set(state.results.map(r => r.url));
|
|
1047
|
+
const fresh = extra.results.filter(r => !existing.has(r.url));
|
|
1048
|
+
if (fresh.length) {
|
|
1049
|
+
state.results = [...state.results, ...fresh];
|
|
1050
|
+
state.providers = [...new Set([...state.providers, ...(extra.providers || [])])];
|
|
1051
|
+
renderApp();
|
|
1052
|
+
}
|
|
1053
|
+
}).catch(() => null);
|
|
1054
|
+
}
|
|
1055
|
+
|
|
987
1056
|
state.loading = false;
|
|
988
1057
|
state.results = searchRes?.results || state.results || [];
|
|
989
1058
|
state.providers = searchRes?.providers || state.providers || [];
|
|
@@ -995,7 +1064,7 @@ async function doSearch(q, category = state.category) {
|
|
|
995
1064
|
state.profilerData = profilerRes;
|
|
996
1065
|
state.profilerLoading = false;
|
|
997
1066
|
|
|
998
|
-
// Torrent results
|
|
1067
|
+
// Torrent results extracted from web search
|
|
999
1068
|
state.torrentData = state.results.filter(r => r.magnetLink || r.engine?.includes('torrent') || r.engine?.includes('piratebay') || r.engine?.includes('1337x'));
|
|
1000
1069
|
|
|
1001
1070
|
renderApp();
|
|
@@ -1107,9 +1176,10 @@ function renderApp() {
|
|
|
1107
1176
|
|
|
1108
1177
|
const categoryBar = el('div', { className: 'category-tabs hide-mobile' });
|
|
1109
1178
|
const categories = [
|
|
1110
|
-
{ id: 'web',
|
|
1111
|
-
{ id: 'images',
|
|
1112
|
-
{ id: 'news',
|
|
1179
|
+
{ id: 'web', label: 'Web' },
|
|
1180
|
+
{ id: 'images', label: 'Images' },
|
|
1181
|
+
{ id: 'news', label: 'News' },
|
|
1182
|
+
{ id: 'torrent', label: 'Torrent' },
|
|
1113
1183
|
];
|
|
1114
1184
|
const buildCatTabs = (container) => {
|
|
1115
1185
|
categories.forEach((cat) => {
|
package/package.json
CHANGED
package/src/ai/query.js
CHANGED
|
@@ -2,6 +2,25 @@
|
|
|
2
2
|
|
|
3
3
|
import { call } from './providers/openai-compat.js';
|
|
4
4
|
|
|
5
|
+
// Fast regex-based torrent intent detection (no AI call needed)
|
|
6
|
+
export const TORRENT_QUERY_RE = /\b(torrent|magnet|\.iso|\.mkv|\.avi|\.mp4|720p|1080p|2160p|4k|uhd|season|s\d{1,2}e\d{1,2}|xvid|x264|x265|hevc|blu.?ray|webrip|dvdrip|bdrip|hdtv|yify|yts|piratebay|1337x|nyaa|eztv|tgx|download\s+film|download\s+serie|scarica\s+film)\b/i;
|
|
7
|
+
|
|
8
|
+
// Available engine names for also_search_on routing
|
|
9
|
+
const ENGINE_ROUTING_RULES = `
|
|
10
|
+
Engine routing rules — pick from this list only:
|
|
11
|
+
- Code, libraries, APIs, "how to implement", programming → ["github-api", "duckduckgo", "hackernews"]
|
|
12
|
+
- Opinions, reviews, "best X", community advice → ["reddit", "duckduckgo"]
|
|
13
|
+
- Academic papers, research, studies, citations → ["wikidata", "duckduckgo"]
|
|
14
|
+
- Open source, FOSS, privacy tools → ["github-api", "marginalia", "duckduckgo"]
|
|
15
|
+
- Person/brand social presence → ["reddit", "duckduckgo"]
|
|
16
|
+
- Anime, manga, Japanese content → ["nyaa", "duckduckgo"]
|
|
17
|
+
- TV shows, episodes → ["eztv", "duckduckgo"]
|
|
18
|
+
- Movies, film downloads → ["yts", "piratebay", "duckduckgo"]
|
|
19
|
+
- General torrent/file downloads → ["piratebay", "1337x", "tgx"]
|
|
20
|
+
- News, current events → ["duckduckgo", "hackernews"]
|
|
21
|
+
- Definitions, encyclopedic → ["wikipedia", "duckduckgo"]
|
|
22
|
+
- Default/other → ["duckduckgo", "startpage"]`;
|
|
23
|
+
|
|
5
24
|
function buildQueryInterpretPrompt({ query, lang }) {
|
|
6
25
|
const langName = {
|
|
7
26
|
'it-IT': 'Italian', 'en-US': 'English', 'es-ES': 'Spanish',
|
|
@@ -16,18 +35,21 @@ User language: ${langName}
|
|
|
16
35
|
Respond with this exact JSON structure:
|
|
17
36
|
{
|
|
18
37
|
"refined_query": "improved version of the query (or same if already good)",
|
|
19
|
-
"intent": "one of:
|
|
20
|
-
"
|
|
38
|
+
"intent": "one of: torrent, code, social, academic, news, definition, how_to, other",
|
|
39
|
+
"also_search_on": ["engine1", "engine2"],
|
|
40
|
+
"category": "one of: web, torrent, images, news"
|
|
21
41
|
}
|
|
42
|
+
${ENGINE_ROUTING_RULES}
|
|
22
43
|
|
|
23
44
|
Rules:
|
|
24
|
-
- refined_query: fix typos, expand acronyms
|
|
25
|
-
- intent: classify
|
|
26
|
-
-
|
|
45
|
+
- refined_query: fix typos, expand acronyms — keep concise
|
|
46
|
+
- intent: classify the query type
|
|
47
|
+
- also_search_on: 2-3 engine names from the routing rules above, best match for this query
|
|
48
|
+
- category: "torrent" if the query is clearly about downloading files/media, else "web"
|
|
27
49
|
- JSON only, no explanation`;
|
|
28
50
|
}
|
|
29
51
|
|
|
30
|
-
// Returns { refined_query, intent,
|
|
52
|
+
// Returns { refined_query, intent, also_search_on, category } or null on failure
|
|
31
53
|
export async function refineQuery({ query, lang = 'en-US' }, aiConfig) {
|
|
32
54
|
if (!aiConfig?.enabled || !aiConfig?.api_base || !aiConfig?.model) return null;
|
|
33
55
|
|
|
@@ -43,10 +65,21 @@ export async function refineQuery({ query, lang = 'en-US' }, aiConfig) {
|
|
|
43
65
|
|
|
44
66
|
if (!result?.content) return null;
|
|
45
67
|
const parsed = JSON.parse(result.content);
|
|
68
|
+
|
|
69
|
+
const ALLOWED_ENGINES = new Set([
|
|
70
|
+
'duckduckgo', 'wikipedia', 'startpage', 'qwant', 'ecosia', 'brave', 'mojeek',
|
|
71
|
+
'github', 'github-api', 'hackernews', 'reddit', 'yandex', 'marginalia', 'ahmia',
|
|
72
|
+
'piratebay', '1337x', 'yts', 'nyaa', 'eztv', 'tgx',
|
|
73
|
+
'wikidata', 'youtube', 'mastodon users', 'lemmy posts',
|
|
74
|
+
]);
|
|
75
|
+
|
|
46
76
|
return {
|
|
47
|
-
refined_query:
|
|
48
|
-
intent:
|
|
49
|
-
|
|
77
|
+
refined_query: String(parsed.refined_query || query).slice(0, 240),
|
|
78
|
+
intent: String(parsed.intent || 'other'),
|
|
79
|
+
also_search_on: Array.isArray(parsed.also_search_on)
|
|
80
|
+
? parsed.also_search_on.map(String).filter(e => ALLOWED_ENGINES.has(e)).slice(0, 3)
|
|
81
|
+
: [],
|
|
82
|
+
category: ['web', 'torrent', 'images', 'news'].includes(parsed.category) ? parsed.category : 'web',
|
|
50
83
|
};
|
|
51
84
|
} catch {
|
|
52
85
|
return null;
|
package/src/api/routes.js
CHANGED
|
@@ -12,7 +12,7 @@ import { sendJson, sendRateLimited, applySecurityHeaders } from './middleware.js
|
|
|
12
12
|
import { getStatus as autostartStatus, setEnabled as autostartSetEnabled } from '../autostart/manager.js';
|
|
13
13
|
import { detectProfileTarget, scanProfile, PROFILER_PLATFORMS } from '../profiler/scanner.js';
|
|
14
14
|
import { fetchBlueskyPosts, fetchBlueskyActors, fetchGdeltArticles } from '../social/search.js';
|
|
15
|
-
import { scrapeTPB, scrape1337x, extractMagnetFromUrl } from '../torrent/scrapers.js';
|
|
15
|
+
import { scrapeTPB, scrape1337x, scrapeYTS, scrapeNyaa, scrapeEZTV, scrapeTGx, extractMagnetFromUrl } from '../torrent/scrapers.js';
|
|
16
16
|
|
|
17
17
|
const __filename = fileURLToPath(import.meta.url);
|
|
18
18
|
const __dirname = path.dirname(__filename);
|
|
@@ -25,7 +25,7 @@ const APP_VERSION = (() => {
|
|
|
25
25
|
return '0.0.0';
|
|
26
26
|
}
|
|
27
27
|
})();
|
|
28
|
-
const ALLOWED_CATEGORIES = new Set(['web', 'images', 'news']);
|
|
28
|
+
const ALLOWED_CATEGORIES = new Set(['web', 'images', 'news', 'torrent']);
|
|
29
29
|
const ALLOWED_LANGS = new Set(['auto', 'it-IT', 'en-US', 'es-ES', 'fr-FR', 'de-DE', 'pt-PT', 'ru-RU', 'zh-CN', 'ja-JP']);
|
|
30
30
|
|
|
31
31
|
function parseCategory(raw) {
|
|
@@ -393,15 +393,15 @@ export function createRouter(config, rateLimiters) {
|
|
|
393
393
|
}
|
|
394
394
|
|
|
395
395
|
const cfg = config.getConfig();
|
|
396
|
-
if (!cfg.ai?.enabled) return sendJson(res, 200, { refined_query: req.body?.query, intent: 'other', also_search: [] });
|
|
397
|
-
|
|
398
396
|
const query = String(req.body?.query || '').trim();
|
|
399
397
|
const lang = resolveLang(req.body?.lang, req.headers['accept-language']);
|
|
400
398
|
if (!query) return sendJson(res, 400, { error: 'missing_query' });
|
|
401
399
|
|
|
400
|
+
if (!cfg.ai?.enabled) return sendJson(res, 200, { refined_query: query, intent: 'other', also_search_on: [], category: 'web' });
|
|
401
|
+
|
|
402
402
|
const result = await refineQuery({ query, lang }, cfg.ai);
|
|
403
403
|
applySecurityHeaders(res);
|
|
404
|
-
res.json(result || { refined_query: query, intent: 'other',
|
|
404
|
+
res.json(result || { refined_query: query, intent: 'other', also_search_on: [], category: 'web' });
|
|
405
405
|
});
|
|
406
406
|
|
|
407
407
|
// ─── AI summary (SSE streaming) ────────────────────────────────────────────
|
|
@@ -482,7 +482,7 @@ export function createRouter(config, rateLimiters) {
|
|
|
482
482
|
return sendJson(res, 400, { error: 'invalid_body' });
|
|
483
483
|
}
|
|
484
484
|
// Whitelist accepted config keys to prevent unexpected writes
|
|
485
|
-
const allowed = ['port', 'host', 'ai', 'brave', 'mojeek', 'yandex', 'ahmia', 'marginalia', 'searxng', 'search', 'rate_limit'];
|
|
485
|
+
const allowed = ['port', 'host', 'ai', 'brave', 'mojeek', 'startpage', 'qwant', 'ecosia', 'yandex', 'ahmia', 'marginalia', 'searxng', 'search', 'rate_limit'];
|
|
486
486
|
const filtered = {};
|
|
487
487
|
for (const key of allowed) {
|
|
488
488
|
if (key in body) filtered[key] = body[key];
|
|
@@ -637,13 +637,30 @@ export function createRouter(config, rateLimiters) {
|
|
|
637
637
|
const query = String(req.body?.q || req.body?.query || '').trim().slice(0, 200);
|
|
638
638
|
if (!query) return sendJson(res, 400, { error: 'missing_query', message: 'q required' });
|
|
639
639
|
try {
|
|
640
|
-
const [tpb, lxx] = await Promise.allSettled([
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
640
|
+
const [tpb, lxx, yts, nyaa, eztv, tgx] = await Promise.allSettled([
|
|
641
|
+
scrapeTPB(query, 8),
|
|
642
|
+
scrape1337x(query, 6),
|
|
643
|
+
scrapeYTS(query, 6),
|
|
644
|
+
scrapeNyaa(query, 6),
|
|
645
|
+
scrapeEZTV(query, 6),
|
|
646
|
+
scrapeTGx(query, 6),
|
|
647
|
+
]);
|
|
648
|
+
const all = [
|
|
649
|
+
...(tpb.status === 'fulfilled' ? tpb.value : []),
|
|
650
|
+
...(lxx.status === 'fulfilled' ? lxx.value : []),
|
|
651
|
+
...(yts.status === 'fulfilled' ? yts.value : []),
|
|
652
|
+
...(nyaa.status === 'fulfilled' ? nyaa.value : []),
|
|
653
|
+
...(eztv.status === 'fulfilled' ? eztv.value : []),
|
|
654
|
+
...(tgx.status === 'fulfilled' ? tgx.value : []),
|
|
644
655
|
];
|
|
656
|
+
// Deduplicate by magnet hash, sort by seeds desc
|
|
657
|
+
const seen = new Set();
|
|
658
|
+
const results = all
|
|
659
|
+
.filter((r) => { const h = r.magnetLink?.match(/btih:([a-f0-9]+)/i)?.[1]?.toLowerCase(); if (!h || seen.has(h)) return false; seen.add(h); return true; })
|
|
660
|
+
.sort((a, b) => (b.seed || 0) - (a.seed || 0));
|
|
661
|
+
const sources = [...new Set(results.map((r) => r.engine))];
|
|
645
662
|
applySecurityHeaders(res);
|
|
646
|
-
res.json({ results, source:
|
|
663
|
+
res.json({ results, source: sources.join('+') || 'none' });
|
|
647
664
|
} catch (error) {
|
|
648
665
|
sendJson(res, 502, { error: 'scrape_failed', message: error.message });
|
|
649
666
|
}
|
package/src/search/engine.js
CHANGED
|
@@ -55,8 +55,10 @@ export const ALLOWED_ENGINES = new Set([
|
|
|
55
55
|
'1337x',
|
|
56
56
|
'piratebay',
|
|
57
57
|
'nyaa',
|
|
58
|
+
'yts',
|
|
59
|
+
'eztv',
|
|
60
|
+
'tgx',
|
|
58
61
|
// native scrapers
|
|
59
|
-
'startpage',
|
|
60
62
|
'qwant',
|
|
61
63
|
'ecosia',
|
|
62
64
|
// uncensored / alternative index engines
|
|
@@ -611,6 +613,9 @@ export async function* searchStream({ query, lang = 'en-US', safe = '1', page =
|
|
|
611
613
|
|
|
612
614
|
export function getEnabledProviders(cfg) {
|
|
613
615
|
const providers = ['duckduckgo', 'wikipedia'];
|
|
616
|
+
if (cfg?.startpage?.enabled !== false) providers.push('startpage');
|
|
617
|
+
if (cfg?.qwant?.enabled !== false) providers.push('qwant');
|
|
618
|
+
if (cfg?.ecosia?.enabled !== false) providers.push('ecosia');
|
|
614
619
|
if (cfg.brave?.enabled && cfg.brave?.api_key) providers.push('brave');
|
|
615
620
|
if (cfg.mojeek?.enabled && cfg.mojeek?.api_key) providers.push('mojeek');
|
|
616
621
|
if (cfg.searxng?.enabled && cfg.searxng?.url) providers.push('searxng');
|
package/src/search/ranking.js
CHANGED
|
@@ -2,18 +2,41 @@
|
|
|
2
2
|
|
|
3
3
|
// Source quality weights — higher = results from this source ranked first
|
|
4
4
|
const SOURCE_ENGINE_WEIGHTS = {
|
|
5
|
-
|
|
5
|
+
// Reference sources
|
|
6
|
+
'wikipedia': 1.8,
|
|
6
7
|
'wikipedia-api': 1.8,
|
|
7
|
-
'
|
|
8
|
-
|
|
9
|
-
'
|
|
10
|
-
'
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
'
|
|
14
|
-
'
|
|
15
|
-
'
|
|
16
|
-
'
|
|
8
|
+
'wikidata': 1.6,
|
|
9
|
+
// Paid API providers
|
|
10
|
+
'brave-api': 1.5,
|
|
11
|
+
'mojeek-api': 1.4,
|
|
12
|
+
'mojeek': 1.3,
|
|
13
|
+
// Native zero-config scrapers
|
|
14
|
+
'startpage': 1.35,
|
|
15
|
+
'qwant': 1.3,
|
|
16
|
+
'ecosia': 1.2,
|
|
17
|
+
'duckduckgo': 1.2,
|
|
18
|
+
// Alternative/uncensored
|
|
19
|
+
'yandex': 1.15,
|
|
20
|
+
'marginalia': 1.1,
|
|
21
|
+
'ahmia': 1.0,
|
|
22
|
+
// SearXNG-routed
|
|
23
|
+
'searxng': 1.1,
|
|
24
|
+
'bing': 1.1,
|
|
25
|
+
'google': 1.1,
|
|
26
|
+
'yahoo': 1.0,
|
|
27
|
+
// Dev/code
|
|
28
|
+
'github': 1.25,
|
|
29
|
+
'github-api': 1.3,
|
|
30
|
+
'hackernews': 1.15,
|
|
31
|
+
// Social
|
|
32
|
+
'reddit': 1.1,
|
|
33
|
+
// Torrent (rank by seeds, not source quality)
|
|
34
|
+
'piratebay': 1.0,
|
|
35
|
+
'1337x': 1.0,
|
|
36
|
+
'yts': 1.0,
|
|
37
|
+
'nyaa': 1.0,
|
|
38
|
+
'eztv': 1.0,
|
|
39
|
+
'tgx': 1.0,
|
|
17
40
|
};
|
|
18
41
|
|
|
19
42
|
function getSourceWeight(engine) {
|
package/src/torrent/scrapers.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
// Torrent scrapers —
|
|
2
|
-
// Sources:
|
|
1
|
+
// Torrent scrapers — multi-source, no API keys required
|
|
2
|
+
// Sources: TPB, 1337x, YTS (JSON API), Nyaa, EZTV (API), Torrent Galaxy
|
|
3
3
|
|
|
4
4
|
import { assertPublicUrl } from '../fetch/ssrf-guard.js';
|
|
5
5
|
|
|
@@ -114,6 +114,133 @@ export async function scrape1337x(query, limit = 5) {
|
|
|
114
114
|
return [];
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
+
// ─── YTS (YIFY) ───────────────────────────────────────────────────────────────
|
|
118
|
+
// Public JSON API — movies only, high-quality releases, direct magnets
|
|
119
|
+
|
|
120
|
+
export async function scrapeYTS(query, limit = 8) {
|
|
121
|
+
const params = new URLSearchParams({ query_term: query, limit: String(limit), sort_by: 'seeds', order_by: 'desc' });
|
|
122
|
+
try {
|
|
123
|
+
const html = await fetchTorrentPage(`https://yts.mx/api/v2/list_movies.json?${params}`, 10_000);
|
|
124
|
+
const data = JSON.parse(html);
|
|
125
|
+
const movies = data?.data?.movies || [];
|
|
126
|
+
const results = [];
|
|
127
|
+
for (const movie of movies) {
|
|
128
|
+
for (const torrent of (movie.torrents || []).slice(0, 2)) {
|
|
129
|
+
const hash = torrent.hash;
|
|
130
|
+
if (!hash) continue;
|
|
131
|
+
const magnet = `magnet:?xt=urn:btih:${hash}&dn=${encodeURIComponent(movie.title_long)}&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337%2Fannounce&tr=udp%3A%2F%2Fopen.tracker.cl%3A1337%2Fannounce`;
|
|
132
|
+
results.push({
|
|
133
|
+
title: `${movie.title_long} [${torrent.quality}] [${torrent.type || 'web'}]`,
|
|
134
|
+
url: movie.url || `https://yts.mx/movies/${movie.slug}`,
|
|
135
|
+
magnetLink: magnet,
|
|
136
|
+
seed: torrent.seeds || 0,
|
|
137
|
+
leech: torrent.peers || 0,
|
|
138
|
+
engine: 'yts',
|
|
139
|
+
filesize: torrent.size || null,
|
|
140
|
+
});
|
|
141
|
+
if (results.length >= limit) break;
|
|
142
|
+
}
|
|
143
|
+
if (results.length >= limit) break;
|
|
144
|
+
}
|
|
145
|
+
return results;
|
|
146
|
+
} catch { return []; }
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// ─── Nyaa ─────────────────────────────────────────────────────────────────────
|
|
150
|
+
// Anime/manga/JP content — simple table HTML, magnets inline
|
|
151
|
+
|
|
152
|
+
const NYAA_MIRRORS = [
|
|
153
|
+
'https://nyaa.si',
|
|
154
|
+
'https://nyaa.land',
|
|
155
|
+
];
|
|
156
|
+
|
|
157
|
+
export async function scrapeNyaa(query, limit = 8) {
|
|
158
|
+
const slug = encodeURIComponent(query.trim());
|
|
159
|
+
for (const base of NYAA_MIRRORS) {
|
|
160
|
+
try {
|
|
161
|
+
const html = await fetchTorrentPage(`${base}/?q=${slug}&s=seeders&o=desc`, 12_000);
|
|
162
|
+
if (html.includes('Cloudflare') || html.includes('cf-browser-verification')) continue;
|
|
163
|
+
const rows = html.split(/<tr[\s>]/gi).slice(1);
|
|
164
|
+
const results = [];
|
|
165
|
+
for (const row of rows) {
|
|
166
|
+
if (results.length >= limit) break;
|
|
167
|
+
const magnetM = row.match(/href="(magnet:\?xt=urn:btih:[^"]{20,}?)"/i);
|
|
168
|
+
if (!magnetM) continue;
|
|
169
|
+
const titleM = row.match(/title="([^"]{3,200})"/i) || row.match(/class="[^"]*success[^"]*"[^>]*>\s*<[^>]+>([^<]{3,200})<\/a>/i);
|
|
170
|
+
const seedM = row.match(/<td[^>]*class="[^"]*success[^"]*"[^>]*>\s*(\d+)\s*<\/td>/i);
|
|
171
|
+
const leechM = row.match(/<td[^>]*class="[^"]*danger[^"]*"[^>]*>\s*(\d+)\s*<\/td>/i);
|
|
172
|
+
const title = titleM ? titleM[1].trim() : 'Unknown';
|
|
173
|
+
results.push({
|
|
174
|
+
title, url: '', magnetLink: magnetM[1],
|
|
175
|
+
seed: seedM ? parseInt(seedM[1], 10) : 0,
|
|
176
|
+
leech: leechM ? parseInt(leechM[1], 10) : 0,
|
|
177
|
+
engine: 'nyaa',
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
if (results.length > 0) return results;
|
|
181
|
+
} catch { /* next mirror */ }
|
|
182
|
+
}
|
|
183
|
+
return [];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// ─── EZTV ─────────────────────────────────────────────────────────────────────
|
|
187
|
+
// TV shows — JSON API, direct magnets
|
|
188
|
+
|
|
189
|
+
export async function scrapeEZTV(query, limit = 8) {
|
|
190
|
+
const params = new URLSearchParams({ limit: String(limit), page: '1', q: query });
|
|
191
|
+
try {
|
|
192
|
+
const html = await fetchTorrentPage(`https://eztv.re/api/get-torrents?${params}`, 10_000);
|
|
193
|
+
const data = JSON.parse(html);
|
|
194
|
+
const torrents = data?.torrents || [];
|
|
195
|
+
return torrents.slice(0, limit).map((t) => ({
|
|
196
|
+
title: t.title || t.filename || 'Unknown',
|
|
197
|
+
url: t.episode_url || '',
|
|
198
|
+
magnetLink: t.magnet_url || '',
|
|
199
|
+
seed: t.seeds || 0,
|
|
200
|
+
leech: t.peers || 0,
|
|
201
|
+
engine: 'eztv',
|
|
202
|
+
filesize: t.size_bytes ? `${(t.size_bytes / 1_073_741_824).toFixed(2)} GB` : null,
|
|
203
|
+
})).filter((r) => r.magnetLink);
|
|
204
|
+
} catch { return []; }
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// ─── Torrent Galaxy (TGx) ─────────────────────────────────────────────────────
|
|
208
|
+
// General index — HTML scraping, good for software/movies/games
|
|
209
|
+
|
|
210
|
+
const TGX_MIRRORS = [
|
|
211
|
+
'https://torrentgalaxy.to',
|
|
212
|
+
'https://tgx.rs',
|
|
213
|
+
];
|
|
214
|
+
|
|
215
|
+
export async function scrapeTGx(query, limit = 6) {
|
|
216
|
+
const slug = encodeURIComponent(query.trim());
|
|
217
|
+
for (const base of TGX_MIRRORS) {
|
|
218
|
+
try {
|
|
219
|
+
const html = await fetchTorrentPage(`${base}/torrents.php?search=${slug}&sort=seeders&order=desc`, 14_000);
|
|
220
|
+
if (html.includes('Cloudflare') || html.length < 2000) continue;
|
|
221
|
+
|
|
222
|
+
const rows = html.split(/<div[^>]+class="[^"]*tgxtablerow[^"]*"/gi).slice(1);
|
|
223
|
+
const results = [];
|
|
224
|
+
for (const row of rows) {
|
|
225
|
+
if (results.length >= limit) break;
|
|
226
|
+
const magnetM = row.match(/href="(magnet:\?xt=urn:btih:[^"]{20,}?)"/i);
|
|
227
|
+
if (!magnetM) continue;
|
|
228
|
+
const titleM = row.match(/href="\/torrent\/\d+\/([^"]+)"[^>]*class="[^"]*txlight[^"]*"/i)
|
|
229
|
+
|| row.match(/title="([^"]{3,200})"/i);
|
|
230
|
+
const seedM = row.match(/<span[^>]*class="[^"]*tgxtable-s[^"]*"[^>]*>\s*(\d+)\s*<\/span>/i);
|
|
231
|
+
const title = titleM ? decodeURIComponent(titleM[1].replace(/\+/g, ' ')).trim() : 'Unknown';
|
|
232
|
+
results.push({
|
|
233
|
+
title, url: '', magnetLink: magnetM[1],
|
|
234
|
+
seed: seedM ? parseInt(seedM[1], 10) : 0,
|
|
235
|
+
leech: 0, engine: 'tgx',
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
if (results.length > 0) return results;
|
|
239
|
+
} catch { /* next mirror */ }
|
|
240
|
+
}
|
|
241
|
+
return [];
|
|
242
|
+
}
|
|
243
|
+
|
|
117
244
|
// ─── Magnet extraction from URL ───────────────────────────────────────────────
|
|
118
245
|
|
|
119
246
|
export async function extractMagnetFromUrl(rawUrl) {
|