termsearch 0.3.13 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -21,13 +21,13 @@ Opens `http://localhost:3000`. That's it.
21
21
 
22
22
  ## What You Get
23
23
 
24
- **Search** — DuckDuckGo, Wikipedia, Startpage, Qwant, Ecosia, GitHub, Yandex, Marginalia, Ahmia out of the box. Add Brave, Mojeek, or your own SearXNG for more coverage. Engine picker lets you mix and match per-search.
24
+ **Search** — DuckDuckGo, Wikipedia, Startpage, Qwant, Ecosia, GitHub, Yandex, Marginalia, Ahmia out of the box. Add Brave, Mojeek, or your own SearXNG for more coverage. Engine picker lets you mix and match per-search, and the new web scrapers can be toggled in Settings.
25
25
 
26
26
  **AI Summaries** — Connect any OpenAI-compatible endpoint (Ollama, LM Studio, llama.cpp, Chutes.ai, OpenRoute.ai, Anthropic, OpenAI). 2-phase agentic flow: AI picks sources, reads pages, synthesizes an answer. Session memory carries context across queries.
27
27
 
28
28
  **Social Profiler** — Paste a GitHub/Bluesky/Reddit/Twitter URL or @handle, get a profile card with stats, top repos, similar accounts.
29
29
 
30
- **Torrent Search** — The Pirate Bay + 1337x with magnet links, seeders, file sizes.
30
+ **Torrent Search** — The Pirate Bay, 1337x, YTS, Nyaa, EZTV, and Torrent Galaxy with magnet links, seeders, file sizes.
31
31
 
32
32
  **Social & News** — Bluesky posts + GDELT articles inline.
33
33
 
@@ -78,11 +78,13 @@ Load Models button auto-discovers available models from the endpoint.
78
78
 
79
79
  **Zero-config** (no API key): DuckDuckGo, Wikipedia, Startpage, Qwant, Ecosia, GitHub, Yandex, Ahmia, Marginalia
80
80
 
81
+ **Toggles in Settings**: Startpage, Qwant, Ecosia, Yandex, Ahmia, Marginalia
82
+
81
83
  **API key** (toggle in Settings): Brave Search, Mojeek
82
84
 
83
85
  **Self-hosted**: SearXNG (proxy to 40+ engines)
84
86
 
85
- **Selectable per-search**: Engine picker icon in the header lets you toggle individual engines, use presets (All / Balanced / GitHub Focus), or pick from groups (Web Core, Uncensored, Code & Dev, Media, Research, Federated, Torrent).
87
+ **Selectable per-search**: Engine picker icon in the header lets you toggle individual engines, use presets (All / Web / Uncensored / GitHub / Torrent / Social / Research), or pick from groups (Web Core, Uncensored, Code & Dev, Media, Research, Federated, Torrent).
86
88
 
87
89
  ## Frontend
88
90
 
@@ -119,7 +121,7 @@ src/
119
121
  fetch/ document fetcher + SSRF guard
120
122
  profiler/ social profile scanner (10 platforms)
121
123
  social/ Bluesky + GDELT + scrapers
122
- torrent/ TPB + 1337x + magnet extraction
124
+ torrent/ TPB + 1337x + YTS + Nyaa + EZTV + TGx + magnet extraction
123
125
  autostart/ Termux:Boot / systemd / launchd
124
126
  api/ routes + middleware
125
127
 
@@ -102,7 +102,7 @@ function route() {
102
102
  const params = new URLSearchParams(queryIdx >= 0 ? hash.slice(queryIdx + 1) : '');
103
103
  const q = params.get('q') || '';
104
104
  const cat = (params.get('cat') || 'web').toLowerCase();
105
- state.category = ['web', 'images', 'news'].includes(cat) ? cat : 'web';
105
+ state.category = ['web', 'images', 'news', 'torrent'].includes(cat) ? cat : 'web';
106
106
  if (q && (q !== state.query || state.results.length === 0)) {
107
107
  state.query = q;
108
108
  doSearch(q);
@@ -278,13 +278,17 @@ const ENGINE_GROUPS = [
278
278
  { label: 'Media', items: ['youtube', 'sepiasearch'] },
279
279
  { label: 'Research', items: ['wikidata', 'crossref', 'openalex', 'openlibrary'] },
280
280
  { label: 'Federated', items: ['mastodon users', 'mastodon hashtags', 'tootfinder', 'lemmy communities', 'lemmy posts'] },
281
- { label: 'Torrent', items: ['piratebay', '1337x', 'nyaa'] },
281
+ { label: 'Torrent', items: ['piratebay', '1337x', 'yts', 'nyaa', 'eztv', 'tgx'] },
282
282
  ];
283
283
 
284
284
  const ENGINE_PRESETS = [
285
- { id: 'all', label: 'All', engines: [] },
286
- { id: 'balanced', label: 'Balanced', engines: ['duckduckgo', 'wikipedia', 'bing', 'brave', 'github', 'reddit', 'youtube'] },
287
- { id: 'github', label: 'GitHub Focus', engines: ['github-api', 'github', 'duckduckgo', 'wikipedia'] },
285
+ { id: 'all', label: 'All', engines: [] },
286
+ { id: 'web', label: 'Web', engines: ['duckduckgo', 'startpage', 'qwant', 'ecosia', 'wikipedia'] },
287
+ { id: 'uncensored', label: 'Uncensored', engines: ['yandex', 'marginalia', 'ahmia', 'duckduckgo'] },
288
+ { id: 'github', label: 'GitHub', engines: ['github-api', 'github', 'duckduckgo', 'wikipedia'] },
289
+ { id: 'torrent', label: 'Torrent', engines: ['piratebay', '1337x', 'yts', 'nyaa', 'eztv', 'tgx'] },
290
+ { id: 'social', label: 'Social', engines: ['reddit', 'hackernews', 'youtube', 'mastodon users', 'lemmy posts'] },
291
+ { id: 'research', label: 'Research', engines: ['wikipedia', 'wikidata', 'crossref', 'openalex', 'openlibrary'] },
288
292
  ];
289
293
 
290
294
  // ─── Engine availability (requires config) ────────────────────────────────
@@ -294,7 +298,7 @@ const SEARXNG_ROUTED = new Set([
294
298
  'wikidata', 'crossref', 'openalex', 'openlibrary',
295
299
  'mastodon users', 'mastodon hashtags', 'tootfinder',
296
300
  'lemmy communities', 'lemmy posts',
297
- 'piratebay', '1337x', 'nyaa',
301
+ // piratebay, 1337x, nyaa, yts, eztv, tgx — native scrapers, always available
298
302
  ]);
299
303
 
300
304
  function isEngineAvailable(engine) {
@@ -304,7 +308,10 @@ function isEngineAvailable(engine) {
304
308
  if (engine === 'yandex') return cfg.yandex?.enabled !== false;
305
309
  if (engine === 'ahmia') return cfg.ahmia?.enabled !== false;
306
310
  if (engine === 'marginalia') return cfg.marginalia?.enabled !== false;
307
- if (engine === 'startpage' || engine === 'qwant' || engine === 'ecosia') return true;
311
+ if (['piratebay', '1337x', 'yts', 'nyaa', 'eztv', 'tgx'].includes(engine)) return true;
312
+ if (engine === 'startpage') return (state.config?.startpage?.enabled) !== false;
313
+ if (engine === 'qwant') return (state.config?.qwant?.enabled) !== false;
314
+ if (engine === 'ecosia') return (state.config?.ecosia?.enabled) !== false;
308
315
  if (SEARXNG_ROUTED.has(engine)) return Boolean(cfg.searxng?.enabled && cfg.searxng?.url);
309
316
  return true; // duckduckgo, wikipedia, github, github-api — sempre disponibili
310
317
  }
@@ -360,16 +367,19 @@ function EnginePicker(opts = {}) {
360
367
  const presetRow = el('div', { className: 'engine-preset-row' });
361
368
  ENGINE_PRESETS.forEach((preset) => {
362
369
  const isActive = preset.id === 'all' ? isAll
363
- : preset.engines.length > 0 && preset.engines.every(e => state.selectedEngines.includes(e)) && state.selectedEngines.length === preset.engines.length;
370
+ : preset.engines.length > 0
371
+ && preset.engines.every(e => state.selectedEngines.includes(e))
372
+ && state.selectedEngines.length === preset.engines.length;
364
373
  presetRow.append(el('button', {
365
- className: `btn ${isActive || preset.id === 'balanced' ? 'btn-primary' : ''}`,
374
+ className: `btn ${isActive ? 'btn-primary' : ''}`,
366
375
  type: 'button',
367
376
  onClick: () => {
368
- // 'all' preset → clear filter (backend uses all configured providers)
369
377
  setSelectedEngines(preset.id === 'all' ? [] : preset.engines);
370
- // visually check/uncheck all available chips
371
- [...details.querySelectorAll('.engine-chip input:not(:disabled)')].forEach((input) => {
372
- input.checked = preset.id === 'all' || preset.engines.includes(input.closest('.engine-chip')?.querySelector('span')?.textContent?.trim().toLowerCase() || '');
378
+ // update chip checkboxes via data-engine attribute
379
+ [...details.querySelectorAll('.engine-chip input')].forEach((input) => {
380
+ const engine = input.closest('.engine-chip')?.dataset?.engine || '';
381
+ if (input.disabled) return;
382
+ input.checked = preset.id === 'all' || preset.engines.includes(engine);
373
383
  });
374
384
  },
375
385
  }, preset.label));
@@ -390,7 +400,7 @@ function EnginePicker(opts = {}) {
390
400
  const input = el('input', inputAttrs);
391
401
  const chipClass = `engine-chip${available ? '' : ' engine-chip-unavailable'}`;
392
402
  const title = available ? engine : `${engine} — not configured (Settings)`;
393
- const label = el('label', { className: chipClass, for: id, title },
403
+ const label = el('label', { className: chipClass, for: id, title, 'data-engine': engine },
394
404
  input,
395
405
  el('span', {}, engine),
396
406
  );
@@ -407,7 +417,7 @@ function EnginePicker(opts = {}) {
407
417
  type: 'button',
408
418
  onClick: () => {
409
419
  const checked = [...details.querySelectorAll('.engine-chip input:not(:disabled):checked')]
410
- .map((node) => node.closest('.engine-chip')?.querySelector('span')?.textContent?.trim().toLowerCase())
420
+ .map((node) => node.closest('.engine-chip')?.dataset?.engine || '')
411
421
  .filter(Boolean);
412
422
  const availableAll = ENGINE_GROUPS.flatMap(g => g.items).filter(isEngineAvailable);
413
423
  // If all available engines are checked, send [] (no filter)
@@ -849,6 +859,8 @@ function SocialPanel(results) {
849
859
  }
850
860
 
851
861
  // ─── Search logic ─────────────────────────────────────────────────────────
862
+ const TORRENT_QUERY_RE = /\b(torrent|magnet|\.iso|\.mkv|\.avi|\.mp4|720p|1080p|2160p|4k|uhd|season|s\d{1,2}e\d{1,2}|xvid|x264|x265|hevc|blu.?ray|webrip|dvdrip|bdrip|hdtv|yify|yts|piratebay|1337x|nyaa|eztv|tgx|download\s+film|download\s+serie|scarica\s+film)\b/i;
863
+
852
864
  function isProfileQuery(q) {
853
865
  return /^https?:\/\/(github|twitter|x|instagram|bluesky|reddit|linkedin|youtube|tiktok|telegram|facebook)/.test(q)
854
866
  || /^@[a-zA-Z0-9_\.]{2,}$/.test(q)
@@ -945,7 +957,10 @@ async function runSearchProgressive(q, lang, category, engines = []) {
945
957
  async function doSearch(q, category = state.category) {
946
958
  if (!q.trim()) return;
947
959
  addSearchToHistory(q);
948
- state.category = ['web', 'images', 'news'].includes(category) ? category : 'web';
960
+ const VALID_CATS = ['web', 'images', 'news', 'torrent'];
961
+ // Fast torrent intent detection — only auto-switch if user is on the default web tab
962
+ if (TORRENT_QUERY_RE.test(q) && category === 'web') category = 'torrent';
963
+ state.category = VALID_CATS.includes(category) ? category : 'web';
949
964
  state.loading = true;
950
965
  state.results = [];
951
966
  state.aiSummary = '';
@@ -953,7 +968,7 @@ async function doSearch(q, category = state.category) {
953
968
  state.aiError = null;
954
969
  state.aiMeta = null;
955
970
  state.profilerData = null;
956
- state.profilerLoading = isProfileQuery(q);
971
+ state.profilerLoading = false;
957
972
  state.torrentData = [];
958
973
  state.socialData = [];
959
974
  renderApp();
@@ -961,7 +976,33 @@ async function doSearch(q, category = state.category) {
961
976
  const lang = getResolvedLang();
962
977
  const engines = state.selectedEngines.slice();
963
978
 
979
+ // ── Torrent category: direct scraper, no stream ──────────────────────────
980
+ if (state.category === 'torrent') {
981
+ try {
982
+ const res = await api('/api/torrent-search', {
983
+ method: 'POST',
984
+ headers: { 'Content-Type': 'application/json' },
985
+ body: JSON.stringify({ q }),
986
+ });
987
+ state.loading = false;
988
+ state.torrentData = res?.results || [];
989
+ state.providers = res?.source ? [res.source] : [];
990
+ } catch {
991
+ state.loading = false;
992
+ state.torrentData = [];
993
+ }
994
+ renderApp();
995
+ return;
996
+ }
997
+
964
998
  try {
999
+ // AI query intent — runs in parallel with search, uses result to extend engines
1000
+ const aiQueryPromise = api('/api/ai-query', {
1001
+ method: 'POST',
1002
+ headers: { 'Content-Type': 'application/json' },
1003
+ body: JSON.stringify({ query: q, lang }),
1004
+ }).catch(() => null);
1005
+
965
1006
  const searchPromise = runSearchProgressive(q, lang, state.category, engines).catch(async () => {
966
1007
  const p = new URLSearchParams({ q, lang, cat: state.category });
967
1008
  if (engines.length > 0) p.set('engines', engines.join(','));
@@ -972,16 +1013,46 @@ async function doSearch(q, category = state.category) {
972
1013
  api(`/api/social-search?q=${encodeURIComponent(q)}`).catch(() => null),
973
1014
  ];
974
1015
 
975
- if (state.profilerLoading) {
976
- promises.push(
977
- api(`/api/profiler?q=${encodeURIComponent(q)}`).catch(() => null)
978
- );
1016
+ if (isProfileQuery(q)) {
1017
+ state.profilerLoading = true;
1018
+ promises.push(api(`/api/profiler?q=${encodeURIComponent(q)}`).catch(() => null));
979
1019
  } else {
980
1020
  promises.push(Promise.resolve(null));
981
1021
  }
982
1022
 
983
1023
  const [searchRes, socialRes, profilerRes] = await Promise.all(promises);
984
1024
 
1025
+ // Use AI intent routing: if also_search_on has engines the user didn't select, run a second pass
1026
+ const aiQuery = await aiQueryPromise;
1027
+ if (aiQuery?.also_search_on?.length && state.category === 'web' && !engines.length) {
1028
+ // AI suggested category switch (e.g. torrent) — honour it if user didn't pick manually
1029
+ if (aiQuery.category === 'torrent' && state.category !== 'torrent') {
1030
+ state.category = 'torrent';
1031
+ navigate(buildSearchHash(q, 'torrent'));
1032
+ const torRes = await api('/api/torrent-search', {
1033
+ method: 'POST', headers: { 'Content-Type': 'application/json' },
1034
+ body: JSON.stringify({ q }),
1035
+ }).catch(() => null);
1036
+ state.loading = false;
1037
+ state.torrentData = torRes?.results || [];
1038
+ state.providers = torRes?.source ? [torRes.source] : [];
1039
+ renderApp();
1040
+ return;
1041
+ }
1042
+ // Otherwise run a second search with the AI-suggested engines (non-blocking supplement)
1043
+ const p2 = new URLSearchParams({ q, lang, cat: 'web', engines: aiQuery.also_search_on.join(',') });
1044
+ api(`/api/search?${p2}`).then((extra) => {
1045
+ if (!extra?.results?.length) return;
1046
+ const existing = new Set(state.results.map(r => r.url));
1047
+ const fresh = extra.results.filter(r => !existing.has(r.url));
1048
+ if (fresh.length) {
1049
+ state.results = [...state.results, ...fresh];
1050
+ state.providers = [...new Set([...state.providers, ...(extra.providers || [])])];
1051
+ renderApp();
1052
+ }
1053
+ }).catch(() => null);
1054
+ }
1055
+
985
1056
  state.loading = false;
986
1057
  state.results = searchRes?.results || state.results || [];
987
1058
  state.providers = searchRes?.providers || state.providers || [];
@@ -993,7 +1064,7 @@ async function doSearch(q, category = state.category) {
993
1064
  state.profilerData = profilerRes;
994
1065
  state.profilerLoading = false;
995
1066
 
996
- // Torrent results (from main search or extracted by engine)
1067
+ // Torrent results extracted from web search
997
1068
  state.torrentData = state.results.filter(r => r.magnetLink || r.engine?.includes('torrent') || r.engine?.includes('piratebay') || r.engine?.includes('1337x'));
998
1069
 
999
1070
  renderApp();
@@ -1105,9 +1176,10 @@ function renderApp() {
1105
1176
 
1106
1177
  const categoryBar = el('div', { className: 'category-tabs hide-mobile' });
1107
1178
  const categories = [
1108
- { id: 'web', label: 'Web' },
1109
- { id: 'images', label: 'Images' },
1110
- { id: 'news', label: 'News' },
1179
+ { id: 'web', label: 'Web' },
1180
+ { id: 'images', label: 'Images' },
1181
+ { id: 'news', label: 'News' },
1182
+ { id: 'torrent', label: 'Torrent' },
1111
1183
  ];
1112
1184
  const buildCatTabs = (container) => {
1113
1185
  categories.forEach((cat) => {
@@ -1298,8 +1370,11 @@ async function renderSettings() {
1298
1370
  const brave = cfg.brave || {};
1299
1371
  const mojeek = cfg.mojeek || {};
1300
1372
  const searxng = cfg.searxng || {};
1301
- const yandexCfg = cfg.yandex || {};
1302
- const ahmiaCfg = cfg.ahmia || {};
1373
+ const startpageCfg = cfg.startpage || {};
1374
+ const qwantCfg = cfg.qwant || {};
1375
+ const ecosiaCfg = cfg.ecosia || {};
1376
+ const yandexCfg = cfg.yandex || {};
1377
+ const ahmiaCfg = cfg.ahmia || {};
1303
1378
  const marginaliaCfg = cfg.marginalia || {};
1304
1379
  const detectedPreset = detectPresetFromBase(ai.api_base);
1305
1380
 
@@ -1478,6 +1553,9 @@ async function renderSettings() {
1478
1553
  brave: { enabled: isChecked('brave-enabled') },
1479
1554
  mojeek: { enabled: isChecked('mojeek-enabled') },
1480
1555
  searxng:{ url: val('searxng-url'), enabled: isChecked('searxng-enabled') },
1556
+ startpage: { enabled: isChecked('startpage-enabled') },
1557
+ qwant: { enabled: isChecked('qwant-enabled') },
1558
+ ecosia: { enabled: isChecked('ecosia-enabled') },
1481
1559
  yandex: { enabled: isChecked('yandex-enabled') },
1482
1560
  ahmia: { enabled: isChecked('ahmia-enabled') },
1483
1561
  marginalia: { enabled: isChecked('marginalia-enabled') },
@@ -1721,6 +1799,33 @@ async function renderSettings() {
1721
1799
  el('div', { id: 'provider-test-searxng', style: 'display:none' }),
1722
1800
  ),
1723
1801
 
1802
+ // Web Scrapers (zero-config)
1803
+ el('div', { style: 'padding:10px 0;border-bottom:1px solid var(--border2)' },
1804
+ el('div', { style: 'font-size:11px;color:var(--text2);margin-bottom:8px;letter-spacing:0.04em;text-transform:uppercase' }, 'Web Scrapers (zero-config)'),
1805
+ el('div', { className: 'toggle-row' },
1806
+ el('span', { className: 'toggle-label' }, 'Startpage (Google proxy, no key)'),
1807
+ el('label', { className: 'toggle' },
1808
+ el('input', { type: 'checkbox', id: 'startpage-enabled', ...(startpageCfg.enabled !== false ? { checked: '' } : {}) }),
1809
+ el('span', { className: 'toggle-slider' }),
1810
+ ),
1811
+ ),
1812
+ el('div', { className: 'toggle-row', style: 'margin-top:6px' },
1813
+ el('span', { className: 'toggle-label' }, 'Qwant (EU index, no key)'),
1814
+ el('label', { className: 'toggle' },
1815
+ el('input', { type: 'checkbox', id: 'qwant-enabled', ...(qwantCfg.enabled !== false ? { checked: '' } : {}) }),
1816
+ el('span', { className: 'toggle-slider' }),
1817
+ ),
1818
+ ),
1819
+ el('div', { className: 'toggle-row', style: 'margin-top:6px' },
1820
+ el('span', { className: 'toggle-label' }, 'Ecosia (Bing-based, no key)'),
1821
+ el('label', { className: 'toggle' },
1822
+ el('input', { type: 'checkbox', id: 'ecosia-enabled', ...(ecosiaCfg.enabled !== false ? { checked: '' } : {}) }),
1823
+ el('span', { className: 'toggle-slider' }),
1824
+ ),
1825
+ ),
1826
+ el('div', { className: 'form-hint', style: 'margin-top:6px' }, 'HTML scrapers — active by default. May hit CAPTCHA under heavy use.'),
1827
+ ),
1828
+
1724
1829
  // Uncensored / Alternative
1725
1830
  el('div', { style: 'padding:10px 0' },
1726
1831
  el('div', { style: 'font-size:11px;color:var(--text2);margin-bottom:8px;letter-spacing:0.04em;text-transform:uppercase' }, 'Uncensored / Alternative'),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "termsearch",
3
- "version": "0.3.13",
3
+ "version": "0.3.15",
4
4
  "description": "Personal search engine for Termux/Linux/macOS — zero-config, privacy-first, AI-optional",
5
5
  "type": "module",
6
6
  "bin": {
package/src/ai/query.js CHANGED
@@ -2,6 +2,25 @@
2
2
 
3
3
  import { call } from './providers/openai-compat.js';
4
4
 
5
+ // Fast regex-based torrent intent detection (no AI call needed)
6
+ export const TORRENT_QUERY_RE = /\b(torrent|magnet|\.iso|\.mkv|\.avi|\.mp4|720p|1080p|2160p|4k|uhd|season|s\d{1,2}e\d{1,2}|xvid|x264|x265|hevc|blu.?ray|webrip|dvdrip|bdrip|hdtv|yify|yts|piratebay|1337x|nyaa|eztv|tgx|download\s+film|download\s+serie|scarica\s+film)\b/i;
7
+
8
+ // Available engine names for also_search_on routing
9
+ const ENGINE_ROUTING_RULES = `
10
+ Engine routing rules — pick from this list only:
11
+ - Code, libraries, APIs, "how to implement", programming → ["github-api", "duckduckgo", "hackernews"]
12
+ - Opinions, reviews, "best X", community advice → ["reddit", "duckduckgo"]
13
+ - Academic papers, research, studies, citations → ["wikidata", "duckduckgo"]
14
+ - Open source, FOSS, privacy tools → ["github-api", "marginalia", "duckduckgo"]
15
+ - Person/brand social presence → ["reddit", "duckduckgo"]
16
+ - Anime, manga, Japanese content → ["nyaa", "duckduckgo"]
17
+ - TV shows, episodes → ["eztv", "duckduckgo"]
18
+ - Movies, film downloads → ["yts", "piratebay", "duckduckgo"]
19
+ - General torrent/file downloads → ["piratebay", "1337x", "tgx"]
20
+ - News, current events → ["duckduckgo", "hackernews"]
21
+ - Definitions, encyclopedic → ["wikipedia", "duckduckgo"]
22
+ - Default/other → ["duckduckgo", "startpage"]`;
23
+
5
24
  function buildQueryInterpretPrompt({ query, lang }) {
6
25
  const langName = {
7
26
  'it-IT': 'Italian', 'en-US': 'English', 'es-ES': 'Spanish',
@@ -16,18 +35,21 @@ User language: ${langName}
16
35
  Respond with this exact JSON structure:
17
36
  {
18
37
  "refined_query": "improved version of the query (or same if already good)",
19
- "intent": "one of: definition, how_to, news, research, comparison, navigation, other",
20
- "also_search": ["optional alternative query 1", "optional alternative query 2"]
38
+ "intent": "one of: torrent, code, social, academic, news, definition, how_to, other",
39
+ "also_search_on": ["engine1", "engine2"],
40
+ "category": "one of: web, torrent, images, news"
21
41
  }
42
+ ${ENGINE_ROUTING_RULES}
22
43
 
23
44
  Rules:
24
- - refined_query: fix typos, expand acronyms, clarify ambiguous terms — keep it concise
25
- - intent: classify what the user is looking for
26
- - also_search: at most 2 useful variant queries, empty array if not applicable
45
+ - refined_query: fix typos, expand acronyms — keep concise
46
+ - intent: classify the query type
47
+ - also_search_on: 2-3 engine names from the routing rules above, best match for this query
48
+ - category: "torrent" if the query is clearly about downloading files/media, else "web"
27
49
  - JSON only, no explanation`;
28
50
  }
29
51
 
30
- // Returns { refined_query, intent, also_search } or null on failure
52
+ // Returns { refined_query, intent, also_search_on, category } or null on failure
31
53
  export async function refineQuery({ query, lang = 'en-US' }, aiConfig) {
32
54
  if (!aiConfig?.enabled || !aiConfig?.api_base || !aiConfig?.model) return null;
33
55
 
@@ -43,10 +65,21 @@ export async function refineQuery({ query, lang = 'en-US' }, aiConfig) {
43
65
 
44
66
  if (!result?.content) return null;
45
67
  const parsed = JSON.parse(result.content);
68
+
69
+ const ALLOWED_ENGINES = new Set([
70
+ 'duckduckgo', 'wikipedia', 'startpage', 'qwant', 'ecosia', 'brave', 'mojeek',
71
+ 'github', 'github-api', 'hackernews', 'reddit', 'yandex', 'marginalia', 'ahmia',
72
+ 'piratebay', '1337x', 'yts', 'nyaa', 'eztv', 'tgx',
73
+ 'wikidata', 'youtube', 'mastodon users', 'lemmy posts',
74
+ ]);
75
+
46
76
  return {
47
- refined_query: String(parsed.refined_query || query).slice(0, 240),
48
- intent: String(parsed.intent || 'other'),
49
- also_search: Array.isArray(parsed.also_search) ? parsed.also_search.slice(0, 2).map(String) : [],
77
+ refined_query: String(parsed.refined_query || query).slice(0, 240),
78
+ intent: String(parsed.intent || 'other'),
79
+ also_search_on: Array.isArray(parsed.also_search_on)
80
+ ? parsed.also_search_on.map(String).filter(e => ALLOWED_ENGINES.has(e)).slice(0, 3)
81
+ : [],
82
+ category: ['web', 'torrent', 'images', 'news'].includes(parsed.category) ? parsed.category : 'web',
50
83
  };
51
84
  } catch {
52
85
  return null;
package/src/api/routes.js CHANGED
@@ -12,7 +12,7 @@ import { sendJson, sendRateLimited, applySecurityHeaders } from './middleware.js
12
12
  import { getStatus as autostartStatus, setEnabled as autostartSetEnabled } from '../autostart/manager.js';
13
13
  import { detectProfileTarget, scanProfile, PROFILER_PLATFORMS } from '../profiler/scanner.js';
14
14
  import { fetchBlueskyPosts, fetchBlueskyActors, fetchGdeltArticles } from '../social/search.js';
15
- import { scrapeTPB, scrape1337x, extractMagnetFromUrl } from '../torrent/scrapers.js';
15
+ import { scrapeTPB, scrape1337x, scrapeYTS, scrapeNyaa, scrapeEZTV, scrapeTGx, extractMagnetFromUrl } from '../torrent/scrapers.js';
16
16
 
17
17
  const __filename = fileURLToPath(import.meta.url);
18
18
  const __dirname = path.dirname(__filename);
@@ -25,7 +25,7 @@ const APP_VERSION = (() => {
25
25
  return '0.0.0';
26
26
  }
27
27
  })();
28
- const ALLOWED_CATEGORIES = new Set(['web', 'images', 'news']);
28
+ const ALLOWED_CATEGORIES = new Set(['web', 'images', 'news', 'torrent']);
29
29
  const ALLOWED_LANGS = new Set(['auto', 'it-IT', 'en-US', 'es-ES', 'fr-FR', 'de-DE', 'pt-PT', 'ru-RU', 'zh-CN', 'ja-JP']);
30
30
 
31
31
  function parseCategory(raw) {
@@ -393,15 +393,15 @@ export function createRouter(config, rateLimiters) {
393
393
  }
394
394
 
395
395
  const cfg = config.getConfig();
396
- if (!cfg.ai?.enabled) return sendJson(res, 200, { refined_query: req.body?.query, intent: 'other', also_search: [] });
397
-
398
396
  const query = String(req.body?.query || '').trim();
399
397
  const lang = resolveLang(req.body?.lang, req.headers['accept-language']);
400
398
  if (!query) return sendJson(res, 400, { error: 'missing_query' });
401
399
 
400
+ if (!cfg.ai?.enabled) return sendJson(res, 200, { refined_query: query, intent: 'other', also_search_on: [], category: 'web' });
401
+
402
402
  const result = await refineQuery({ query, lang }, cfg.ai);
403
403
  applySecurityHeaders(res);
404
- res.json(result || { refined_query: query, intent: 'other', also_search: [] });
404
+ res.json(result || { refined_query: query, intent: 'other', also_search_on: [], category: 'web' });
405
405
  });
406
406
 
407
407
  // ─── AI summary (SSE streaming) ────────────────────────────────────────────
@@ -482,7 +482,7 @@ export function createRouter(config, rateLimiters) {
482
482
  return sendJson(res, 400, { error: 'invalid_body' });
483
483
  }
484
484
  // Whitelist accepted config keys to prevent unexpected writes
485
- const allowed = ['port', 'host', 'ai', 'brave', 'mojeek', 'yandex', 'ahmia', 'marginalia', 'searxng', 'search', 'rate_limit'];
485
+ const allowed = ['port', 'host', 'ai', 'brave', 'mojeek', 'startpage', 'qwant', 'ecosia', 'yandex', 'ahmia', 'marginalia', 'searxng', 'search', 'rate_limit'];
486
486
  const filtered = {};
487
487
  for (const key of allowed) {
488
488
  if (key in body) filtered[key] = body[key];
@@ -637,13 +637,30 @@ export function createRouter(config, rateLimiters) {
637
637
  const query = String(req.body?.q || req.body?.query || '').trim().slice(0, 200);
638
638
  if (!query) return sendJson(res, 400, { error: 'missing_query', message: 'q required' });
639
639
  try {
640
- const [tpb, lxx] = await Promise.allSettled([scrapeTPB(query, 8), scrape1337x(query, 7)]);
641
- const results = [
642
- ...(tpb.status === 'fulfilled' ? tpb.value : []),
643
- ...(lxx.status === 'fulfilled' ? lxx.value : []),
640
+ const [tpb, lxx, yts, nyaa, eztv, tgx] = await Promise.allSettled([
641
+ scrapeTPB(query, 8),
642
+ scrape1337x(query, 6),
643
+ scrapeYTS(query, 6),
644
+ scrapeNyaa(query, 6),
645
+ scrapeEZTV(query, 6),
646
+ scrapeTGx(query, 6),
647
+ ]);
648
+ const all = [
649
+ ...(tpb.status === 'fulfilled' ? tpb.value : []),
650
+ ...(lxx.status === 'fulfilled' ? lxx.value : []),
651
+ ...(yts.status === 'fulfilled' ? yts.value : []),
652
+ ...(nyaa.status === 'fulfilled' ? nyaa.value : []),
653
+ ...(eztv.status === 'fulfilled' ? eztv.value : []),
654
+ ...(tgx.status === 'fulfilled' ? tgx.value : []),
644
655
  ];
656
+ // Deduplicate by magnet hash, sort by seeds desc
657
+ const seen = new Set();
658
+ const results = all
659
+ .filter((r) => { const h = r.magnetLink?.match(/btih:([a-f0-9]+)/i)?.[1]?.toLowerCase(); if (!h || seen.has(h)) return false; seen.add(h); return true; })
660
+ .sort((a, b) => (b.seed || 0) - (a.seed || 0));
661
+ const sources = [...new Set(results.map((r) => r.engine))];
645
662
  applySecurityHeaders(res);
646
- res.json({ results, source: results.length ? 'tpb+1337x' : 'none' });
663
+ res.json({ results, source: sources.join('+') || 'none' });
647
664
  } catch (error) {
648
665
  sendJson(res, 502, { error: 'scrape_failed', message: error.message });
649
666
  }
@@ -49,6 +49,18 @@ export const DEFAULTS = {
49
49
  api_base: 'https://api.mojeek.com',
50
50
  },
51
51
 
52
+ startpage: {
53
+ enabled: true,
54
+ },
55
+
56
+ qwant: {
57
+ enabled: true,
58
+ },
59
+
60
+ ecosia: {
61
+ enabled: true,
62
+ },
63
+
52
64
  yandex: {
53
65
  enabled: true,
54
66
  },
@@ -55,8 +55,10 @@ export const ALLOWED_ENGINES = new Set([
55
55
  '1337x',
56
56
  'piratebay',
57
57
  'nyaa',
58
+ 'yts',
59
+ 'eztv',
60
+ 'tgx',
58
61
  // native scrapers
59
- 'startpage',
60
62
  'qwant',
61
63
  'ecosia',
62
64
  // uncensored / alternative index engines
@@ -106,19 +108,19 @@ const PROVIDER_REGISTRY = {
106
108
  },
107
109
  startpage: {
108
110
  aliases: new Set(['startpage']),
109
- enabled: (_cfg) => true,
111
+ enabled: (cfg) => cfg?.startpage?.enabled !== false,
110
112
  run: startpage.search,
111
113
  defaultProvider: true,
112
114
  },
113
115
  qwant: {
114
116
  aliases: new Set(['qwant']),
115
- enabled: (_cfg) => true,
117
+ enabled: (cfg) => cfg?.qwant?.enabled !== false,
116
118
  run: qwant.search,
117
119
  defaultProvider: true,
118
120
  },
119
121
  ecosia: {
120
122
  aliases: new Set(['ecosia']),
121
- enabled: (_cfg) => true,
123
+ enabled: (cfg) => cfg?.ecosia?.enabled !== false,
122
124
  run: ecosia.search,
123
125
  defaultProvider: true,
124
126
  },
@@ -611,6 +613,9 @@ export async function* searchStream({ query, lang = 'en-US', safe = '1', page =
611
613
 
612
614
  export function getEnabledProviders(cfg) {
613
615
  const providers = ['duckduckgo', 'wikipedia'];
616
+ if (cfg?.startpage?.enabled !== false) providers.push('startpage');
617
+ if (cfg?.qwant?.enabled !== false) providers.push('qwant');
618
+ if (cfg?.ecosia?.enabled !== false) providers.push('ecosia');
614
619
  if (cfg.brave?.enabled && cfg.brave?.api_key) providers.push('brave');
615
620
  if (cfg.mojeek?.enabled && cfg.mojeek?.api_key) providers.push('mojeek');
616
621
  if (cfg.searxng?.enabled && cfg.searxng?.url) providers.push('searxng');
@@ -2,18 +2,41 @@
2
2
 
3
3
  // Source quality weights — higher = results from this source ranked first
4
4
  const SOURCE_ENGINE_WEIGHTS = {
5
- 'wikipedia': 1.8,
5
+ // Reference sources
6
+ 'wikipedia': 1.8,
6
7
  'wikipedia-api': 1.8,
7
- 'brave-api': 1.5,
8
- 'mojeek-api': 1.4,
9
- 'duckduckgo': 1.2,
10
- 'searxng': 1.1,
11
- // engines from SearXNG
12
- 'startpage': 1.3,
13
- 'qwant': 1.2,
14
- 'bing': 1.1,
15
- 'google': 1.1,
16
- 'yahoo': 1.0,
8
+ 'wikidata': 1.6,
9
+ // Paid API providers
10
+ 'brave-api': 1.5,
11
+ 'mojeek-api': 1.4,
12
+ 'mojeek': 1.3,
13
+ // Native zero-config scrapers
14
+ 'startpage': 1.35,
15
+ 'qwant': 1.3,
16
+ 'ecosia': 1.2,
17
+ 'duckduckgo': 1.2,
18
+ // Alternative/uncensored
19
+ 'yandex': 1.15,
20
+ 'marginalia': 1.1,
21
+ 'ahmia': 1.0,
22
+ // SearXNG-routed
23
+ 'searxng': 1.1,
24
+ 'bing': 1.1,
25
+ 'google': 1.1,
26
+ 'yahoo': 1.0,
27
+ // Dev/code
28
+ 'github': 1.25,
29
+ 'github-api': 1.3,
30
+ 'hackernews': 1.15,
31
+ // Social
32
+ 'reddit': 1.1,
33
+ // Torrent (rank by seeds, not source quality)
34
+ 'piratebay': 1.0,
35
+ '1337x': 1.0,
36
+ 'yts': 1.0,
37
+ 'nyaa': 1.0,
38
+ 'eztv': 1.0,
39
+ 'tgx': 1.0,
17
40
  };
18
41
 
19
42
  function getSourceWeight(engine) {
@@ -1,5 +1,5 @@
1
- // Torrent scrapers — ported from MmmSearch
2
- // Sources: The Pirate Bay + 1337x (direct HTML scraping, no API)
1
+ // Torrent scrapers — multi-source, no API keys required
2
+ // Sources: TPB, 1337x, YTS (JSON API), Nyaa, EZTV (API), Torrent Galaxy
3
3
 
4
4
  import { assertPublicUrl } from '../fetch/ssrf-guard.js';
5
5
 
@@ -114,6 +114,133 @@ export async function scrape1337x(query, limit = 5) {
114
114
  return [];
115
115
  }
116
116
 
117
+ // ─── YTS (YIFY) ───────────────────────────────────────────────────────────────
118
+ // Public JSON API — movies only, high-quality releases, direct magnets
119
+
120
+ export async function scrapeYTS(query, limit = 8) {
121
+ const params = new URLSearchParams({ query_term: query, limit: String(limit), sort_by: 'seeds', order_by: 'desc' });
122
+ try {
123
+ const html = await fetchTorrentPage(`https://yts.mx/api/v2/list_movies.json?${params}`, 10_000);
124
+ const data = JSON.parse(html);
125
+ const movies = data?.data?.movies || [];
126
+ const results = [];
127
+ for (const movie of movies) {
128
+ for (const torrent of (movie.torrents || []).slice(0, 2)) {
129
+ const hash = torrent.hash;
130
+ if (!hash) continue;
131
+ const magnet = `magnet:?xt=urn:btih:${hash}&dn=${encodeURIComponent(movie.title_long)}&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337%2Fannounce&tr=udp%3A%2F%2Fopen.tracker.cl%3A1337%2Fannounce`;
132
+ results.push({
133
+ title: `${movie.title_long} [${torrent.quality}] [${torrent.type || 'web'}]`,
134
+ url: movie.url || `https://yts.mx/movies/${movie.slug}`,
135
+ magnetLink: magnet,
136
+ seed: torrent.seeds || 0,
137
+ leech: torrent.peers || 0,
138
+ engine: 'yts',
139
+ filesize: torrent.size || null,
140
+ });
141
+ if (results.length >= limit) break;
142
+ }
143
+ if (results.length >= limit) break;
144
+ }
145
+ return results;
146
+ } catch { return []; }
147
+ }
148
+
149
+ // ─── Nyaa ─────────────────────────────────────────────────────────────────────
150
+ // Anime/manga/JP content — simple table HTML, magnets inline
151
+
152
+ const NYAA_MIRRORS = [
153
+ 'https://nyaa.si',
154
+ 'https://nyaa.land',
155
+ ];
156
+
157
+ export async function scrapeNyaa(query, limit = 8) {
158
+ const slug = encodeURIComponent(query.trim());
159
+ for (const base of NYAA_MIRRORS) {
160
+ try {
161
+ const html = await fetchTorrentPage(`${base}/?q=${slug}&s=seeders&o=desc`, 12_000);
162
+ if (html.includes('Cloudflare') || html.includes('cf-browser-verification')) continue;
163
+ const rows = html.split(/<tr[\s>]/gi).slice(1);
164
+ const results = [];
165
+ for (const row of rows) {
166
+ if (results.length >= limit) break;
167
+ const magnetM = row.match(/href="(magnet:\?xt=urn:btih:[^"]{20,}?)"/i);
168
+ if (!magnetM) continue;
169
+ const titleM = row.match(/title="([^"]{3,200})"/i) || row.match(/class="[^"]*success[^"]*"[^>]*>\s*<[^>]+>([^<]{3,200})<\/a>/i);
170
+ const seedM = row.match(/<td[^>]*class="[^"]*success[^"]*"[^>]*>\s*(\d+)\s*<\/td>/i);
171
+ const leechM = row.match(/<td[^>]*class="[^"]*danger[^"]*"[^>]*>\s*(\d+)\s*<\/td>/i);
172
+ const title = titleM ? titleM[1].trim() : 'Unknown';
173
+ results.push({
174
+ title, url: '', magnetLink: magnetM[1],
175
+ seed: seedM ? parseInt(seedM[1], 10) : 0,
176
+ leech: leechM ? parseInt(leechM[1], 10) : 0,
177
+ engine: 'nyaa',
178
+ });
179
+ }
180
+ if (results.length > 0) return results;
181
+ } catch { /* next mirror */ }
182
+ }
183
+ return [];
184
+ }
185
+
186
+ // ─── EZTV ─────────────────────────────────────────────────────────────────────
187
+ // TV shows — JSON API, direct magnets
188
+
189
+ export async function scrapeEZTV(query, limit = 8) {
190
+ const params = new URLSearchParams({ limit: String(limit), page: '1', q: query });
191
+ try {
192
+ const html = await fetchTorrentPage(`https://eztv.re/api/get-torrents?${params}`, 10_000);
193
+ const data = JSON.parse(html);
194
+ const torrents = data?.torrents || [];
195
+ return torrents.slice(0, limit).map((t) => ({
196
+ title: t.title || t.filename || 'Unknown',
197
+ url: t.episode_url || '',
198
+ magnetLink: t.magnet_url || '',
199
+ seed: t.seeds || 0,
200
+ leech: t.peers || 0,
201
+ engine: 'eztv',
202
+ filesize: t.size_bytes ? `${(t.size_bytes / 1_073_741_824).toFixed(2)} GB` : null,
203
+ })).filter((r) => r.magnetLink);
204
+ } catch { return []; }
205
+ }
206
+
207
+ // ─── Torrent Galaxy (TGx) ─────────────────────────────────────────────────────
208
+ // General index — HTML scraping, good for software/movies/games
209
+
210
+ const TGX_MIRRORS = [
211
+ 'https://torrentgalaxy.to',
212
+ 'https://tgx.rs',
213
+ ];
214
+
215
+ export async function scrapeTGx(query, limit = 6) {
216
+ const slug = encodeURIComponent(query.trim());
217
+ for (const base of TGX_MIRRORS) {
218
+ try {
219
+ const html = await fetchTorrentPage(`${base}/torrents.php?search=${slug}&sort=seeders&order=desc`, 14_000);
220
+ if (html.includes('Cloudflare') || html.length < 2000) continue;
221
+
222
+ const rows = html.split(/<div[^>]+class="[^"]*tgxtablerow[^"]*"/gi).slice(1);
223
+ const results = [];
224
+ for (const row of rows) {
225
+ if (results.length >= limit) break;
226
+ const magnetM = row.match(/href="(magnet:\?xt=urn:btih:[^"]{20,}?)"/i);
227
+ if (!magnetM) continue;
228
+ const titleM = row.match(/href="\/torrent\/\d+\/([^"]+)"[^>]*class="[^"]*txlight[^"]*"/i)
229
+ || row.match(/title="([^"]{3,200})"/i);
230
+ const seedM = row.match(/<span[^>]*class="[^"]*tgxtable-s[^"]*"[^>]*>\s*(\d+)\s*<\/span>/i);
231
+ const title = titleM ? decodeURIComponent(titleM[1].replace(/\+/g, ' ')).trim() : 'Unknown';
232
+ results.push({
233
+ title, url: '', magnetLink: magnetM[1],
234
+ seed: seedM ? parseInt(seedM[1], 10) : 0,
235
+ leech: 0, engine: 'tgx',
236
+ });
237
+ }
238
+ if (results.length > 0) return results;
239
+ } catch { /* next mirror */ }
240
+ }
241
+ return [];
242
+ }
243
+
117
244
  // ─── Magnet extraction from URL ───────────────────────────────────────────────
118
245
 
119
246
  export async function extractMagnetFromUrl(rawUrl) {