termsearch 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +205 -0
- package/bin/termsearch.js +433 -0
- package/config.example.json +31 -0
- package/frontend/dist/app.js +1051 -0
- package/frontend/dist/icon-192.png +0 -0
- package/frontend/dist/icon-512.png +0 -0
- package/frontend/dist/icon.svg +8 -0
- package/frontend/dist/index.html +28 -0
- package/frontend/dist/manifest.json +40 -0
- package/frontend/dist/opensearch.xml +8 -0
- package/frontend/dist/style.css +756 -0
- package/package.json +48 -0
- package/scripts/postinstall.js +84 -0
- package/src/ai/orchestrator.js +163 -0
- package/src/ai/providers/openai-compat.js +255 -0
- package/src/ai/query.js +54 -0
- package/src/ai/summary.js +120 -0
- package/src/api/middleware.js +91 -0
- package/src/api/routes.js +461 -0
- package/src/autostart/manager.js +207 -0
- package/src/config/defaults.js +62 -0
- package/src/config/manager.js +188 -0
- package/src/fetch/document.js +297 -0
- package/src/fetch/ssrf-guard.js +40 -0
- package/src/profiler/scanner.js +212 -0
- package/src/search/cache.js +119 -0
- package/src/search/engine.js +231 -0
- package/src/search/providers/brave.js +57 -0
- package/src/search/providers/duckduckgo.js +148 -0
- package/src/search/providers/mojeek.js +56 -0
- package/src/search/providers/searxng.js +53 -0
- package/src/search/providers/wikipedia.js +70 -0
- package/src/search/ranking.js +155 -0
- package/src/server.js +68 -0
- package/src/social/scrapers.js +356 -0
- package/src/social/search.js +77 -0
- package/src/torrent/scrapers.js +125 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
// Torrent scrapers — ported from MmmSearch
|
|
2
|
+
// Sources: The Pirate Bay + 1337x (direct HTML scraping, no API)
|
|
3
|
+
|
|
4
|
+
import { assertPublicUrl } from '../fetch/ssrf-guard.js';
|
|
5
|
+
|
|
6
|
+
const TPB_MIRRORS = [
|
|
7
|
+
'https://tpb.party',
|
|
8
|
+
'https://thepiratebay.org',
|
|
9
|
+
];
|
|
10
|
+
|
|
11
|
+
const MIRRORS_1337X = [
|
|
12
|
+
'https://www.1337xx.to',
|
|
13
|
+
'https://1337x.unblockit.bz',
|
|
14
|
+
'https://1337x.nocensor.lol',
|
|
15
|
+
];
|
|
16
|
+
|
|
17
|
+
const TORRENT_UA = 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0';
|
|
18
|
+
|
|
19
|
+
// ─── Shared fetch ─────────────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
async function fetchTorrentPage(url, timeoutMs = 10_000) {
|
|
22
|
+
const ac = new AbortController();
|
|
23
|
+
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
|
24
|
+
try {
|
|
25
|
+
const r = await fetch(url, {
|
|
26
|
+
headers: { 'User-Agent': TORRENT_UA, Accept: 'text/html,*/*;q=0.5' },
|
|
27
|
+
signal: ac.signal, redirect: 'follow',
|
|
28
|
+
});
|
|
29
|
+
if (!r.ok) throw new Error(`HTTP ${r.status}`);
|
|
30
|
+
const buf = await r.arrayBuffer();
|
|
31
|
+
return Buffer.from(buf).subarray(0, 300_000).toString('utf8');
|
|
32
|
+
} finally {
|
|
33
|
+
clearTimeout(timer);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function extractMagnetFromHtml(html) {
|
|
38
|
+
const m = html.match(/href="(magnet:\?xt=urn:btih:[^"&]{20,}[^"]*)"/i);
|
|
39
|
+
return m ? m[1] : null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// ─── The Pirate Bay ───────────────────────────────────────────────────────────
|
|
43
|
+
// Magnets are directly in search results — no per-page fetch needed
|
|
44
|
+
|
|
45
|
+
export async function scrapeTPB(query, limit = 8) {
|
|
46
|
+
const slug = encodeURIComponent(query.trim());
|
|
47
|
+
for (const base of TPB_MIRRORS) {
|
|
48
|
+
try {
|
|
49
|
+
const html = await fetchTorrentPage(`${base}/search/${slug}/0/99/0`, 12_000);
|
|
50
|
+
const rows = html.split(/<tr[\s>]/gi).slice(1);
|
|
51
|
+
const results = [];
|
|
52
|
+
for (const row of rows) {
|
|
53
|
+
if (results.length >= limit) break;
|
|
54
|
+
const magnetM = row.match(/href="(magnet:\?xt=urn:btih:[^"]{20,}?)"/i);
|
|
55
|
+
if (!magnetM) continue;
|
|
56
|
+
const titleM = row.match(/href="[^"]*\/torrent\/\d+[^"]*"[^>]*>([^<]{3,120})<\/a>/i)
|
|
57
|
+
|| row.match(/title="Details for ([^"]{3,120})"/i);
|
|
58
|
+
const seedsM = row.match(/<td align="right">(\d+)<\/td>/ig);
|
|
59
|
+
const seed = seedsM?.[0] ? parseInt(seedsM[0].replace(/<[^>]+>/g, ''), 10) : 0;
|
|
60
|
+
const leech = seedsM?.[1] ? parseInt(seedsM[1].replace(/<[^>]+>/g, ''), 10) : 0;
|
|
61
|
+
results.push({
|
|
62
|
+
title: titleM ? titleM[1].trim() : 'Unknown',
|
|
63
|
+
url: `${base}/torrent/` + (row.match(/href="[^"]*\/torrent\/(\d+)/i)?.[1] || ''),
|
|
64
|
+
magnetLink: magnetM[1], seed, leech, engine: 'piratebay',
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
if (results.length > 0) return results;
|
|
68
|
+
} catch { /* try next mirror */ }
|
|
69
|
+
}
|
|
70
|
+
return [];
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// ─── 1337x ────────────────────────────────────────────────────────────────────
|
|
74
|
+
// Must fetch each torrent page individually to get the magnet link
|
|
75
|
+
|
|
76
|
+
const QUERY_STOP_WORDS = new Set(['torrent', 'download', 'iso', 'film', 'serie', 'series', 'movie', 'full', 'free', 'crack', 'cracked', 'repack', 'pack']);
|
|
77
|
+
|
|
78
|
+
export async function scrape1337x(query, limit = 5) {
|
|
79
|
+
const slug = query.trim().split(/\s+/).join('+');
|
|
80
|
+
for (const base of MIRRORS_1337X) {
|
|
81
|
+
try {
|
|
82
|
+
const html = await fetchTorrentPage(`${base}/sort-search/${slug}/seeders/desc/1/`, 14_000);
|
|
83
|
+
if (html.includes('window.location.replace') || html.includes('FingerprintJS')) continue;
|
|
84
|
+
|
|
85
|
+
const rows = html.split(/<tr[\s>]/gi).slice(1);
|
|
86
|
+
const items = [];
|
|
87
|
+
for (const row of rows) {
|
|
88
|
+
if (items.length >= limit * 4) break;
|
|
89
|
+
const titleM = row.match(/<a href="(\/torrent\/\d+\/[^"]+\/)"[^>]*>([^<]{3,120})<\/a>/i);
|
|
90
|
+
if (!titleM) continue;
|
|
91
|
+
const seedM = row.match(/class="coll-2 seeds[^"]*"[^>]*>\s*([\d,]+)\s*<\/td>/i);
|
|
92
|
+
items.push({ path: titleM[1], title: titleM[2].trim(), seed: seedM ? parseInt(seedM[1].replace(/,/g, ''), 10) : 0 });
|
|
93
|
+
}
|
|
94
|
+
if (!items.length) continue;
|
|
95
|
+
|
|
96
|
+
const queryWords = query.toLowerCase().split(/\s+/).filter((w) => w.length >= 3 && !/^\d+$/.test(w) && !QUERY_STOP_WORDS.has(w));
|
|
97
|
+
const relevant = queryWords.length ? items.filter((r) => queryWords.every((w) => r.title.toLowerCase().includes(w))) : items;
|
|
98
|
+
if (!relevant.length) continue;
|
|
99
|
+
|
|
100
|
+
const top = relevant.sort((a, b) => b.seed - a.seed).slice(0, limit);
|
|
101
|
+
const settled = await Promise.allSettled(top.map(async (it) => {
|
|
102
|
+
const pageUrl = `${base}${it.path}`;
|
|
103
|
+
try {
|
|
104
|
+
const pageHtml = await fetchTorrentPage(pageUrl, 10_000);
|
|
105
|
+
if (pageHtml.includes('window.location.replace')) return null;
|
|
106
|
+
const magnet = extractMagnetFromHtml(pageHtml);
|
|
107
|
+
return magnet ? { ...it, url: pageUrl, magnetLink: magnet, engine: '1337x' } : null;
|
|
108
|
+
} catch { return null; }
|
|
109
|
+
}));
|
|
110
|
+
const results = settled.filter((r) => r.status === 'fulfilled' && r.value).map((r) => r.value);
|
|
111
|
+
if (results.length > 0) return results;
|
|
112
|
+
} catch { /* try next mirror */ }
|
|
113
|
+
}
|
|
114
|
+
return [];
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// ─── Magnet extraction from URL ───────────────────────────────────────────────
|
|
118
|
+
|
|
119
|
+
export async function extractMagnetFromUrl(rawUrl) {
|
|
120
|
+
await assertPublicUrl(rawUrl);
|
|
121
|
+
const html = await fetchTorrentPage(rawUrl, 10_000);
|
|
122
|
+
const magnet = extractMagnetFromHtml(html);
|
|
123
|
+
if (!magnet) throw new Error('No magnet link found on page');
|
|
124
|
+
return magnet;
|
|
125
|
+
}
|