termsearch 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +205 -0
- package/bin/termsearch.js +433 -0
- package/config.example.json +31 -0
- package/frontend/dist/app.js +1051 -0
- package/frontend/dist/icon-192.png +0 -0
- package/frontend/dist/icon-512.png +0 -0
- package/frontend/dist/icon.svg +8 -0
- package/frontend/dist/index.html +28 -0
- package/frontend/dist/manifest.json +40 -0
- package/frontend/dist/opensearch.xml +8 -0
- package/frontend/dist/style.css +756 -0
- package/package.json +48 -0
- package/scripts/postinstall.js +84 -0
- package/src/ai/orchestrator.js +163 -0
- package/src/ai/providers/openai-compat.js +255 -0
- package/src/ai/query.js +54 -0
- package/src/ai/summary.js +120 -0
- package/src/api/middleware.js +91 -0
- package/src/api/routes.js +461 -0
- package/src/autostart/manager.js +207 -0
- package/src/config/defaults.js +62 -0
- package/src/config/manager.js +188 -0
- package/src/fetch/document.js +297 -0
- package/src/fetch/ssrf-guard.js +40 -0
- package/src/profiler/scanner.js +212 -0
- package/src/search/cache.js +119 -0
- package/src/search/engine.js +231 -0
- package/src/search/providers/brave.js +57 -0
- package/src/search/providers/duckduckgo.js +148 -0
- package/src/search/providers/mojeek.js +56 -0
- package/src/search/providers/searxng.js +53 -0
- package/src/search/providers/wikipedia.js +70 -0
- package/src/search/ranking.js +155 -0
- package/src/server.js +68 -0
- package/src/social/scrapers.js +356 -0
- package/src/social/search.js +77 -0
- package/src/torrent/scrapers.js +125 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
// AI summary generation — 2-phase agentic flow
|
|
2
|
+
// Phase 1: decide which URLs to fetch
|
|
3
|
+
// Phase 2: synthesize summary from fetched content
|
|
4
|
+
|
|
5
|
+
const LANG_NAMES = {
|
|
6
|
+
'it-IT': 'Italian', 'en-US': 'English', 'es-ES': 'Spanish',
|
|
7
|
+
'fr-FR': 'French', 'de-DE': 'German', 'pt-PT': 'Portuguese',
|
|
8
|
+
'ru-RU': 'Russian', 'zh-CN': 'Chinese', 'ja-JP': 'Japanese',
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
function getLanguageOutputRule(lang) {
|
|
12
|
+
if (lang === 'it-IT') return 'FINAL OUTPUT LANGUAGE RULE: write ONLY in Italian.';
|
|
13
|
+
if (lang === 'es-ES') return 'FINAL OUTPUT LANGUAGE RULE: write ONLY in Spanish.';
|
|
14
|
+
if (lang === 'fr-FR') return 'FINAL OUTPUT LANGUAGE RULE: write ONLY in French.';
|
|
15
|
+
if (lang === 'de-DE') return 'FINAL OUTPUT LANGUAGE RULE: write ONLY in German.';
|
|
16
|
+
return 'FINAL OUTPUT LANGUAGE RULE: write in the language the user query is in.';
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Build Phase 1 prompt: AI decides which URLs to fetch
|
|
20
|
+
export function buildFetchDecisionPrompt({ query, results, maxFetch = 10, session = [] }) {
|
|
21
|
+
const list = results.slice(0, 10).map((r, i) =>
|
|
22
|
+
`[${i + 1}] ${r.title} — ${r.snippet || '(no snippet)'}\n URL: ${r.url}${r.publishedDate ? `\n Published: ${r.publishedDate}` : ''}${r.engine ? `\n Engine: ${r.engine}` : ''}`
|
|
23
|
+
).join('\n');
|
|
24
|
+
|
|
25
|
+
const sessionBlock = session.length
|
|
26
|
+
? `\n=== SESSION CONTEXT ===\n${session.map((s, i) => `${i + 1}. "${s.q}" → ${s.r}`).join('\n')}\n`
|
|
27
|
+
: '';
|
|
28
|
+
|
|
29
|
+
return `You are a search agent. Decide which URLs to read to answer the query.
|
|
30
|
+
Reply ONLY with valid JSON, no text outside the JSON:
|
|
31
|
+
{"fetch":["url1","url2"],"reason":"brief reason"}
|
|
32
|
+
|
|
33
|
+
RULE: You MUST fetch at least 1 URL unless the snippet already contains a complete, definitive answer.
|
|
34
|
+
Fetch 1-${maxFetch} URLs. Prefer Wikipedia, official sites, reputable sources. Avoid logins, PDFs, redirects.
|
|
35
|
+
${sessionBlock}
|
|
36
|
+
Query: ${query}
|
|
37
|
+
|
|
38
|
+
Results:
|
|
39
|
+
${list}`;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Parse Phase 1 response: extract list of URLs to fetch
|
|
43
|
+
export function parseFetchDecision(rawContent, allResultUrls = []) {
|
|
44
|
+
if (!rawContent) return { urls: allResultUrls.slice(0, 5), reason: '' };
|
|
45
|
+
try {
|
|
46
|
+
const json = rawContent.match(/\{[\s\S]*\}/)?.[0];
|
|
47
|
+
if (!json) throw new Error('no JSON');
|
|
48
|
+
const parsed = JSON.parse(json);
|
|
49
|
+
const urls = (parsed.fetch || [])
|
|
50
|
+
.filter((u) => typeof u === 'string' && /^https?:\/\//.test(u))
|
|
51
|
+
.slice(0, 20);
|
|
52
|
+
return { urls, reason: String(parsed.reason || '').slice(0, 200) };
|
|
53
|
+
} catch {
|
|
54
|
+
// Fallback: fetch top 3 results
|
|
55
|
+
return { urls: allResultUrls.slice(0, 3), reason: 'fallback' };
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Build Phase 2 prompt: synthesize summary from results + fetched documents
|
|
60
|
+
export function buildAgenticSummaryPrompt({ query, lang = 'en-US', results, documents, session = [] }) {
|
|
61
|
+
const langName = LANG_NAMES[lang] || 'English';
|
|
62
|
+
|
|
63
|
+
const fetchedSection = documents.length
|
|
64
|
+
? documents.map((doc, i) =>
|
|
65
|
+
`[F${i + 1}] ${doc.title}\nURL: ${doc.url}\nContent:\n${doc.content.slice(0, 3000)}`
|
|
66
|
+
).join('\n\n---\n\n')
|
|
67
|
+
: null;
|
|
68
|
+
|
|
69
|
+
const snippets = results.slice(0, 8).map((r, i) =>
|
|
70
|
+
`[${i + 1}] ${r.title}\nURL: ${r.url}\nSnippet: ${r.snippet || 'n/a'}`
|
|
71
|
+
).join('\n\n');
|
|
72
|
+
|
|
73
|
+
const sessionItems = Array.isArray(session) && session.length
|
|
74
|
+
? `=== SEARCH SESSION ===\n${session.slice(-4).map((s, i) => `${i + 1}. "${s.q}" → ${s.r}`).join('\n')}\n\n`
|
|
75
|
+
: '';
|
|
76
|
+
|
|
77
|
+
return `You are a search assistant. Answer the query based EXCLUSIVELY on the provided sources.
|
|
78
|
+
NEVER use internal knowledge or training data.
|
|
79
|
+
IMPORTANT: Web page contents have already been extracted below. Never say "I cannot access websites".
|
|
80
|
+
LANGUAGE: Respond in ${langName}.
|
|
81
|
+
${getLanguageOutputRule(lang)}
|
|
82
|
+
|
|
83
|
+
RESPONSE RULES:
|
|
84
|
+
- 1-2 sentences: direct answer to the query
|
|
85
|
+
- 3-5 bullet points with specific facts, numbers, names from sources
|
|
86
|
+
- Cite inline: [F1][F2] for pages read, [1][2] for snippets
|
|
87
|
+
- If sources conflict on key facts, note it
|
|
88
|
+
- Do not speculate or invent
|
|
89
|
+
- If the answer lists specific sites/tools/services, add at the end:
|
|
90
|
+
SITES_AI: https://url1, https://url2, https://url3
|
|
91
|
+
|
|
92
|
+
${sessionItems}SEARCH QUERY: ${query}
|
|
93
|
+
|
|
94
|
+
${fetchedSection ? `=== PAGES READ ===\n${fetchedSection}\n\n=== SEARCH RESULTS ===\n${snippets}` : `=== SEARCH RESULTS ===\n${snippets}`}
|
|
95
|
+
|
|
96
|
+
Answer now based only on these sources.`;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Extract AI-curated site URLs from summary text
|
|
100
|
+
export function extractAiSites(summaryText) {
|
|
101
|
+
const match = summaryText.match(/SITES_AI:\s*([^\n]+)/);
|
|
102
|
+
if (!match) return [];
|
|
103
|
+
return match[1]
|
|
104
|
+
.split(',')
|
|
105
|
+
.map((u) => u.trim())
|
|
106
|
+
.filter((u) => /^https?:\/\//.test(u))
|
|
107
|
+
.slice(0, 10);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Score results for reordering based on AI citations
|
|
111
|
+
export function scoreResultsFromSummary(results, summaryText, fetchedUrls = []) {
|
|
112
|
+
const fetchedSet = new Set(fetchedUrls.map((u) => String(u).toLowerCase()));
|
|
113
|
+
return results.map((r) => {
|
|
114
|
+
const urlLower = String(r.url || '').toLowerCase();
|
|
115
|
+
const isFetched = fetchedSet.has(urlLower);
|
|
116
|
+
const isCited = summaryText.includes(r.url);
|
|
117
|
+
const boost = isFetched ? 2 : isCited ? 1 : 0;
|
|
118
|
+
return { ...r, aiBoost: boost };
|
|
119
|
+
});
|
|
120
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
// Express middleware: security headers, rate limiting, IP utilities
|
|
2
|
+
|
|
3
|
+
export function applySecurityHeaders(res) {
|
|
4
|
+
res.setHeader('X-Content-Type-Options', 'nosniff');
|
|
5
|
+
res.setHeader('Referrer-Policy', 'strict-origin-when-cross-origin');
|
|
6
|
+
res.setHeader('Permissions-Policy', 'microphone=(), geolocation=(), camera=()');
|
|
7
|
+
res.setHeader('Cross-Origin-Resource-Policy', 'same-origin');
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function sendJson(res, status, payload) {
|
|
11
|
+
applySecurityHeaders(res);
|
|
12
|
+
res.status(status).json(payload);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function normalizeIp(value) {
|
|
16
|
+
const raw = String(value || '').trim();
|
|
17
|
+
if (!raw) return 'unknown';
|
|
18
|
+
return raw.replace(/^::ffff:/, '') || 'unknown';
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function isLoopbackIp(value) {
|
|
22
|
+
const ip = normalizeIp(value);
|
|
23
|
+
return ip === '127.0.0.1' || ip === '::1' || ip === 'localhost';
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function getClientIp(req) {
|
|
27
|
+
const remoteIp = normalizeIp(req.socket?.remoteAddress || req.ip || '');
|
|
28
|
+
if (isLoopbackIp(remoteIp)) {
|
|
29
|
+
const realIp = req.headers['x-real-ip'];
|
|
30
|
+
if (typeof realIp === 'string' && realIp.trim()) return normalizeIp(realIp);
|
|
31
|
+
const forwarded = req.headers['x-forwarded-for'];
|
|
32
|
+
if (typeof forwarded === 'string' && forwarded.trim()) {
|
|
33
|
+
return normalizeIp(forwarded.split(',')[0].trim());
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return remoteIp;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Rate limiting: sliding window per IP
|
|
40
|
+
export function checkWindowRateLimit(store, ip, windowMs, limit) {
|
|
41
|
+
const key = normalizeIp(ip);
|
|
42
|
+
const now = Date.now();
|
|
43
|
+
const bucket = store.get(key) || [];
|
|
44
|
+
const recent = bucket.filter((ts) => now - ts < windowMs);
|
|
45
|
+
if (recent.length >= limit) { store.set(key, recent); return false; }
|
|
46
|
+
recent.push(now);
|
|
47
|
+
store.set(key, recent);
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function sendRateLimited(res, { windowMs, message = 'Too many requests' }) {
|
|
52
|
+
res.setHeader('Retry-After', String(Math.max(1, Math.ceil(windowMs / 1000))));
|
|
53
|
+
return sendJson(res, 429, { error: 'rate_limited', message });
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Middleware factory that creates rate limit stores and checkers
|
|
57
|
+
export function createRateLimiters(cfg) {
|
|
58
|
+
const generalStore = new Map();
|
|
59
|
+
const searchStore = new Map();
|
|
60
|
+
const aiStore = new Map();
|
|
61
|
+
|
|
62
|
+
// Prune expired entries every 5 minutes
|
|
63
|
+
const pruneInterval = setInterval(() => {
|
|
64
|
+
const now = Date.now();
|
|
65
|
+
for (const store of [generalStore, searchStore, aiStore]) {
|
|
66
|
+
for (const [key, bucket] of store) {
|
|
67
|
+
const fresh = bucket.filter((ts) => now - ts < 3_600_000);
|
|
68
|
+
if (fresh.length === 0) store.delete(key);
|
|
69
|
+
else store.set(key, fresh);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}, 5 * 60 * 1000);
|
|
73
|
+
pruneInterval.unref?.();
|
|
74
|
+
|
|
75
|
+
const rl = cfg.rate_limit;
|
|
76
|
+
const aiCfg = cfg.ai;
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
checkGeneral: (ip) => checkWindowRateLimit(generalStore, ip, rl.window_ms, rl.general_per_min),
|
|
80
|
+
checkSearch: (ip) => checkWindowRateLimit(searchStore, ip, rl.window_ms, rl.search_per_min),
|
|
81
|
+
checkAi: (ip) => checkWindowRateLimit(aiStore, ip, aiCfg.rate_window_ms, aiCfg.rate_limit),
|
|
82
|
+
windowMs: rl.window_ms,
|
|
83
|
+
aiWindowMs: aiCfg.rate_window_ms,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Express middleware: attach client IP to req
|
|
88
|
+
export function ipMiddleware(req, _res, next) {
|
|
89
|
+
req.clientIp = getClientIp(req);
|
|
90
|
+
next();
|
|
91
|
+
}
|
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
// All API route handlers
|
|
2
|
+
|
|
3
|
+
import express from 'express';
|
|
4
|
+
import { search, searchStream, getEnabledProviders, getDocCache } from '../search/engine.js';
|
|
5
|
+
import { batchFetch, fetchReadableDocument } from '../fetch/document.js';
|
|
6
|
+
import { generateSummary, testConnection } from '../ai/orchestrator.js';
|
|
7
|
+
import { refineQuery } from '../ai/query.js';
|
|
8
|
+
import { sendJson, sendRateLimited, applySecurityHeaders } from './middleware.js';
|
|
9
|
+
import { getStatus as autostartStatus, setEnabled as autostartSetEnabled } from '../autostart/manager.js';
|
|
10
|
+
import { detectProfileTarget, scanProfile, PROFILER_PLATFORMS } from '../profiler/scanner.js';
|
|
11
|
+
import { fetchBlueskyPosts, fetchBlueskyActors, fetchGdeltArticles } from '../social/search.js';
|
|
12
|
+
import { scrapeTPB, scrape1337x, extractMagnetFromUrl } from '../torrent/scrapers.js';
|
|
13
|
+
|
|
14
|
+
const APP_VERSION = '0.3.0';
|
|
15
|
+
const ALLOWED_CATEGORIES = new Set(['web', 'images', 'news']);
|
|
16
|
+
|
|
17
|
+
function parseCategory(raw) {
|
|
18
|
+
const category = String(raw || 'web').trim().toLowerCase();
|
|
19
|
+
return ALLOWED_CATEGORIES.has(category) ? category : 'web';
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function parseEngines(raw) {
|
|
23
|
+
if (!raw) return [];
|
|
24
|
+
const source = Array.isArray(raw) ? raw.join(',') : String(raw);
|
|
25
|
+
return [...new Set(
|
|
26
|
+
source
|
|
27
|
+
.split(',')
|
|
28
|
+
.map((entry) => entry.trim().toLowerCase())
|
|
29
|
+
.filter(Boolean)
|
|
30
|
+
.slice(0, 12)
|
|
31
|
+
)];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function createRouter(config, rateLimiters) {
|
|
35
|
+
const router = express.Router();
|
|
36
|
+
|
|
37
|
+
// ─── Health ──────────────────────────────────────────────────────────────
|
|
38
|
+
router.get('/api/health', (req, res) => {
|
|
39
|
+
const cfg = config.getConfig();
|
|
40
|
+
sendJson(res, 200, {
|
|
41
|
+
status: 'ok',
|
|
42
|
+
version: APP_VERSION,
|
|
43
|
+
providers: getEnabledProviders(cfg),
|
|
44
|
+
ai_enabled: Boolean(cfg.ai?.enabled && cfg.ai?.api_base && cfg.ai?.model),
|
|
45
|
+
ai_model: cfg.ai?.model || null,
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
// ─── OpenAPI ─────────────────────────────────────────────────────────────
|
|
50
|
+
router.get('/api/openapi.json', (_req, res) => {
|
|
51
|
+
applySecurityHeaders(res);
|
|
52
|
+
res.json({
|
|
53
|
+
openapi: '3.1.0',
|
|
54
|
+
info: {
|
|
55
|
+
title: 'TermSearch API',
|
|
56
|
+
version: APP_VERSION,
|
|
57
|
+
},
|
|
58
|
+
paths: {
|
|
59
|
+
'/api/health': { get: { summary: 'Service health' } },
|
|
60
|
+
'/api/search': { get: { summary: 'Search results (JSON)' } },
|
|
61
|
+
'/api/search-stream': { get: { summary: 'Progressive search (SSE)' } },
|
|
62
|
+
'/api/fetch': { post: { summary: 'Fetch readable documents' } },
|
|
63
|
+
'/api/ai-summary': { post: { summary: 'AI summary (SSE/JSON)' } },
|
|
64
|
+
'/api/ai-query': { post: { summary: 'AI query refinement' } },
|
|
65
|
+
'/api/social-search': { get: { summary: 'Bluesky + GDELT search' } },
|
|
66
|
+
'/api/profiler': { get: { summary: 'Social profile scanner' } },
|
|
67
|
+
'/api/torrent-search': { post: { summary: 'Torrent direct scraping' } },
|
|
68
|
+
'/api/magnet': { post: { summary: 'Extract magnet from page URL' } },
|
|
69
|
+
'/api/scan': { post: { summary: 'Scan site pages by query' } },
|
|
70
|
+
'/api/config': { get: { summary: 'Read config (masked)' }, post: { summary: 'Update config' } },
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// ─── Search (single response) ─────────────────────────────────────────────
|
|
76
|
+
router.get('/api/search', async (req, res) => {
|
|
77
|
+
const ip = req.clientIp;
|
|
78
|
+
if (!rateLimiters.checkSearch(ip)) {
|
|
79
|
+
return sendRateLimited(res, { windowMs: rateLimiters.windowMs });
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const cfg = config.getConfig();
|
|
83
|
+
const q = String(req.query.q || '').trim();
|
|
84
|
+
if (!q) return sendJson(res, 400, { error: 'missing_query', message: 'q parameter required' });
|
|
85
|
+
if (q.length > cfg.search.max_query_length) return sendJson(res, 400, { error: 'query_too_long' });
|
|
86
|
+
|
|
87
|
+
const lang = String(req.query.lang || 'en-US');
|
|
88
|
+
const safe = String(req.query.safe || '1');
|
|
89
|
+
const page = Number(req.query.page || '1');
|
|
90
|
+
const category = parseCategory(req.query.cat);
|
|
91
|
+
const engines = parseEngines(req.query.engines);
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
const result = await search({ query: q, lang, safe, page, category, engines }, cfg);
|
|
95
|
+
applySecurityHeaders(res);
|
|
96
|
+
res.json(result);
|
|
97
|
+
} catch (error) {
|
|
98
|
+
sendJson(res, 500, { error: 'search_failed', message: error.message });
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
// ─── Search stream (SSE) ──────────────────────────────────────────────────
|
|
103
|
+
router.get('/api/search-stream', async (req, res) => {
|
|
104
|
+
const ip = req.clientIp;
|
|
105
|
+
if (!rateLimiters.checkSearch(ip)) {
|
|
106
|
+
return sendRateLimited(res, { windowMs: rateLimiters.windowMs });
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const cfg = config.getConfig();
|
|
110
|
+
const q = String(req.query.q || '').trim();
|
|
111
|
+
if (!q) return sendJson(res, 400, { error: 'missing_query' });
|
|
112
|
+
if (q.length > cfg.search.max_query_length) return sendJson(res, 400, { error: 'query_too_long' });
|
|
113
|
+
|
|
114
|
+
const lang = String(req.query.lang || 'en-US');
|
|
115
|
+
const safe = String(req.query.safe || '1');
|
|
116
|
+
const page = Number(req.query.page || '1');
|
|
117
|
+
const category = parseCategory(req.query.cat);
|
|
118
|
+
const engines = parseEngines(req.query.engines);
|
|
119
|
+
|
|
120
|
+
applySecurityHeaders(res);
|
|
121
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
122
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
123
|
+
res.setHeader('Connection', 'keep-alive');
|
|
124
|
+
res.flushHeaders?.();
|
|
125
|
+
|
|
126
|
+
const send = (data) => res.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
127
|
+
|
|
128
|
+
try {
|
|
129
|
+
for await (const chunk of searchStream({ query: q, lang, safe, page, category, engines }, cfg)) {
|
|
130
|
+
if (chunk.tier === 'fast') {
|
|
131
|
+
send({
|
|
132
|
+
batch: 'fast',
|
|
133
|
+
query: q,
|
|
134
|
+
lang,
|
|
135
|
+
results: chunk.results || [],
|
|
136
|
+
providers: chunk.providers || [],
|
|
137
|
+
});
|
|
138
|
+
} else {
|
|
139
|
+
send({
|
|
140
|
+
batch: 'full',
|
|
141
|
+
query: q,
|
|
142
|
+
lang,
|
|
143
|
+
results: chunk.results || [],
|
|
144
|
+
allResults: chunk.results || [],
|
|
145
|
+
providers: chunk.providers || [],
|
|
146
|
+
degraded: false,
|
|
147
|
+
engineStats: { responded: chunk.providers || [], failed: [], unstable: [], health: {} },
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
send({ done: true, providers: getEnabledProviders(cfg) });
|
|
152
|
+
} catch (error) {
|
|
153
|
+
send({ error: 'search_failed', message: error.message });
|
|
154
|
+
} finally {
|
|
155
|
+
res.end();
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
// ─── Fetch document(s) ────────────────────────────────────────────────────
|
|
160
|
+
router.post('/api/fetch', express.json({ limit: '32kb' }), async (req, res) => {
|
|
161
|
+
const ip = req.clientIp;
|
|
162
|
+
if (!rateLimiters.checkGeneral(ip)) {
|
|
163
|
+
return sendRateLimited(res, { windowMs: rateLimiters.windowMs });
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const cfg = config.getConfig();
|
|
167
|
+
const urls = req.body?.urls;
|
|
168
|
+
if (!Array.isArray(urls) || urls.length === 0) {
|
|
169
|
+
return sendJson(res, 400, { error: 'missing_urls' });
|
|
170
|
+
}
|
|
171
|
+
if (urls.length > 10) return sendJson(res, 400, { error: 'too_many_urls', max: 10 });
|
|
172
|
+
|
|
173
|
+
const results = await batchFetch(urls.slice(0, 10), {
|
|
174
|
+
timeoutMs: cfg.search.timeout_ms,
|
|
175
|
+
docCache: getDocCache(),
|
|
176
|
+
});
|
|
177
|
+
applySecurityHeaders(res);
|
|
178
|
+
res.json({ results });
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
// ─── AI query refinement ──────────────────────────────────────────────────
|
|
182
|
+
router.post('/api/ai-query', express.json({ limit: '16kb' }), async (req, res) => {
|
|
183
|
+
const ip = req.clientIp;
|
|
184
|
+
if (!rateLimiters.checkAi(ip)) {
|
|
185
|
+
return sendRateLimited(res, { windowMs: rateLimiters.aiWindowMs });
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const cfg = config.getConfig();
|
|
189
|
+
if (!cfg.ai?.enabled) return sendJson(res, 200, { refined_query: req.body?.query, intent: 'other', also_search: [] });
|
|
190
|
+
|
|
191
|
+
const query = String(req.body?.query || '').trim();
|
|
192
|
+
const lang = String(req.body?.lang || 'en-US');
|
|
193
|
+
if (!query) return sendJson(res, 400, { error: 'missing_query' });
|
|
194
|
+
|
|
195
|
+
const result = await refineQuery({ query, lang }, cfg.ai);
|
|
196
|
+
applySecurityHeaders(res);
|
|
197
|
+
res.json(result || { refined_query: query, intent: 'other', also_search: [] });
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
// ─── AI summary (SSE streaming) ────────────────────────────────────────────
|
|
201
|
+
router.post('/api/ai-summary', express.json({ limit: '256kb' }), async (req, res) => {
|
|
202
|
+
const ip = req.clientIp;
|
|
203
|
+
if (!rateLimiters.checkAi(ip)) {
|
|
204
|
+
return sendRateLimited(res, { windowMs: rateLimiters.aiWindowMs });
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const cfg = config.getConfig();
|
|
208
|
+
if (!cfg.ai?.enabled || !cfg.ai?.api_base || !cfg.ai?.model) {
|
|
209
|
+
return sendJson(res, 200, {
|
|
210
|
+
error: 'ai_not_configured',
|
|
211
|
+
message: 'AI not configured. Go to Settings to add your endpoint.',
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const query = String(req.body?.query || '').trim();
|
|
216
|
+
const lang = String(req.body?.lang || 'en-US');
|
|
217
|
+
const results = Array.isArray(req.body?.results) ? req.body.results : [];
|
|
218
|
+
const session = Array.isArray(req.body?.session) ? req.body.session.slice(-4) : [];
|
|
219
|
+
const streamMode = req.body?.stream !== false;
|
|
220
|
+
|
|
221
|
+
if (!query) return sendJson(res, 400, { error: 'missing_query' });
|
|
222
|
+
|
|
223
|
+
if (streamMode) {
|
|
224
|
+
applySecurityHeaders(res);
|
|
225
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
226
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
227
|
+
res.setHeader('Connection', 'keep-alive');
|
|
228
|
+
res.flushHeaders?.();
|
|
229
|
+
|
|
230
|
+
const sendEvent = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
|
|
231
|
+
|
|
232
|
+
const result = await generateSummary(
|
|
233
|
+
{
|
|
234
|
+
query, lang, results, session,
|
|
235
|
+
onToken: (chunk) => sendEvent('token', { chunk }),
|
|
236
|
+
docCache: getDocCache(),
|
|
237
|
+
},
|
|
238
|
+
cfg.ai
|
|
239
|
+
);
|
|
240
|
+
|
|
241
|
+
if (result.error) {
|
|
242
|
+
sendEvent('error', { error: result.error, message: result.message });
|
|
243
|
+
} else {
|
|
244
|
+
sendEvent('done', {
|
|
245
|
+
sites: result.sites,
|
|
246
|
+
fetchedCount: result.fetchedCount,
|
|
247
|
+
scoredResults: result.scoredResults,
|
|
248
|
+
model: result.model,
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
return res.end();
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Non-streaming mode
|
|
255
|
+
const result = await generateSummary({ query, lang, results, session, docCache: getDocCache() }, cfg.ai);
|
|
256
|
+
if (result.error) {
|
|
257
|
+
return sendJson(res, result.status || 502, { error: result.error, message: result.message });
|
|
258
|
+
}
|
|
259
|
+
applySecurityHeaders(res);
|
|
260
|
+
res.json(result);
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
// ─── Config (public — keys masked) ────────────────────────────────────────
|
|
264
|
+
router.get('/api/config', (req, res) => {
|
|
265
|
+
applySecurityHeaders(res);
|
|
266
|
+
res.json(config.getPublicConfig());
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
// ─── Config update ─────────────────────────────────────────────────────────
|
|
270
|
+
router.post('/api/config', express.json({ limit: '16kb' }), (req, res) => {
|
|
271
|
+
const body = req.body;
|
|
272
|
+
if (!body || typeof body !== 'object') {
|
|
273
|
+
return sendJson(res, 400, { error: 'invalid_body' });
|
|
274
|
+
}
|
|
275
|
+
// Whitelist accepted config keys to prevent unexpected writes
|
|
276
|
+
const allowed = ['port', 'host', 'ai', 'brave', 'mojeek', 'searxng', 'search', 'rate_limit'];
|
|
277
|
+
const filtered = {};
|
|
278
|
+
for (const key of allowed) {
|
|
279
|
+
if (key in body) filtered[key] = body[key];
|
|
280
|
+
}
|
|
281
|
+
try {
|
|
282
|
+
config.update(filtered);
|
|
283
|
+
applySecurityHeaders(res);
|
|
284
|
+
res.json({ ok: true, config: config.getPublicConfig() });
|
|
285
|
+
} catch (error) {
|
|
286
|
+
sendJson(res, 500, { error: 'config_save_failed', message: error.message });
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
// ─── Test AI connection ────────────────────────────────────────────────────
|
|
291
|
+
router.post('/api/config/test-ai', express.json({ limit: '8kb' }), async (req, res) => {
|
|
292
|
+
const cfg = config.getConfig();
|
|
293
|
+
const body = req.body || {};
|
|
294
|
+
const testCfg = {
|
|
295
|
+
api_base: String(body.api_base || cfg.ai?.api_base || ''),
|
|
296
|
+
api_key: String(body.api_key || cfg.ai?.api_key || ''),
|
|
297
|
+
model: String(body.model || cfg.ai?.model || ''),
|
|
298
|
+
};
|
|
299
|
+
const result = await testConnection(testCfg);
|
|
300
|
+
applySecurityHeaders(res);
|
|
301
|
+
res.json(result);
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
router.get('/api/config/test-ai', (_req, res) => {
|
|
305
|
+
sendJson(res, 405, { error: 'method_not_allowed', message: 'Use POST /api/config/test-ai' });
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
// ─── Test search provider ─────────────────────────────────────────────────
|
|
309
|
+
router.get('/api/config/test-provider/:name', async (req, res) => {
|
|
310
|
+
const cfg = config.getConfig();
|
|
311
|
+
const name = String(req.params.name || '');
|
|
312
|
+
const testQuery = 'test';
|
|
313
|
+
|
|
314
|
+
try {
|
|
315
|
+
let results = [];
|
|
316
|
+
if (name === 'duckduckgo') {
|
|
317
|
+
const { search: ddgSearch } = await import('../search/providers/duckduckgo.js');
|
|
318
|
+
results = await ddgSearch({ query: testQuery, timeoutMs: 8000 });
|
|
319
|
+
} else if (name === 'wikipedia') {
|
|
320
|
+
const { search: wikiSearch } = await import('../search/providers/wikipedia.js');
|
|
321
|
+
results = await wikiSearch({ query: testQuery, timeoutMs: 8000 });
|
|
322
|
+
} else if (name === 'brave') {
|
|
323
|
+
const { search: braveSearch } = await import('../search/providers/brave.js');
|
|
324
|
+
results = await braveSearch({ query: testQuery, config: cfg, timeoutMs: 8000 });
|
|
325
|
+
} else if (name === 'mojeek') {
|
|
326
|
+
const { search: mojeekSearch } = await import('../search/providers/mojeek.js');
|
|
327
|
+
results = await mojeekSearch({ query: testQuery, config: cfg, timeoutMs: 8000 });
|
|
328
|
+
} else if (name === 'searxng') {
|
|
329
|
+
const { search: searxSearch } = await import('../search/providers/searxng.js');
|
|
330
|
+
results = await searxSearch({ query: testQuery, config: cfg, timeoutMs: 8000 });
|
|
331
|
+
} else {
|
|
332
|
+
return sendJson(res, 400, { error: 'unknown_provider' });
|
|
333
|
+
}
|
|
334
|
+
applySecurityHeaders(res);
|
|
335
|
+
res.json({ ok: results.length > 0, count: results.length, sample: results.slice(0, 2) });
|
|
336
|
+
} catch (error) {
|
|
337
|
+
sendJson(res, 200, { ok: false, error: error.message });
|
|
338
|
+
}
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
// ─── Stats ────────────────────────────────────────────────────────────────
|
|
342
|
+
router.get('/api/stats', (req, res) => {
|
|
343
|
+
applySecurityHeaders(res);
|
|
344
|
+
// TODO: implement persistent stats counter
|
|
345
|
+
res.json({ searches: 0, uptime_ms: process.uptime() * 1000 });
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
// ─── Profiler ─────────────────────────────────────────────────────────────────
|
|
349
|
+
router.get('/api/profiler', async (req, res) => {
|
|
350
|
+
const ip = req.clientIp;
|
|
351
|
+
if (!rateLimiters.checkGeneral(ip)) return sendRateLimited(res, { windowMs: rateLimiters.windowMs });
|
|
352
|
+
const raw = String(req.query.q || '').trim();
|
|
353
|
+
if (!raw) return sendJson(res, 400, { error: 'missing_query', message: 'q required (URL or @handle)' });
|
|
354
|
+
const target = detectProfileTarget(raw);
|
|
355
|
+
if (!target) return sendJson(res, 400, { error: 'not_a_profile', message: 'Could not detect a social profile in query' });
|
|
356
|
+
try {
|
|
357
|
+
applySecurityHeaders(res);
|
|
358
|
+
const result = await scanProfile(target);
|
|
359
|
+
res.json(result);
|
|
360
|
+
} catch (error) {
|
|
361
|
+
sendJson(res, 500, { error: 'profiler_failed', message: error.message });
|
|
362
|
+
}
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
// ─── Social search ─────────────────────────────────────────────────────────
|
|
366
|
+
router.get('/api/social-search', async (req, res) => {
|
|
367
|
+
const ip = req.clientIp;
|
|
368
|
+
if (!rateLimiters.checkGeneral(ip)) return sendRateLimited(res, { windowMs: rateLimiters.windowMs });
|
|
369
|
+
const q = String(req.query.q || '').trim();
|
|
370
|
+
if (!q) return sendJson(res, 400, { error: 'missing_query' });
|
|
371
|
+
const limit = Math.min(parseInt(req.query.limit) || 25, 50);
|
|
372
|
+
const sources = String(req.query.sources || 'bluesky,gdelt').split(',').map((s) => s.trim()).filter(Boolean);
|
|
373
|
+
const taskMap = {};
|
|
374
|
+
if (sources.includes('bluesky')) {
|
|
375
|
+
taskMap.bluesky_posts = fetchBlueskyPosts(q, limit);
|
|
376
|
+
taskMap.bluesky_actors = fetchBlueskyActors(q, Math.min(limit, 20));
|
|
377
|
+
}
|
|
378
|
+
if (sources.includes('gdelt')) taskMap.gdelt = fetchGdeltArticles(q, limit);
|
|
379
|
+
const keys = Object.keys(taskMap);
|
|
380
|
+
const settled = await Promise.allSettled(Object.values(taskMap));
|
|
381
|
+
const results = {};
|
|
382
|
+
keys.forEach((key, i) => { results[key] = settled[i].status === 'fulfilled' ? settled[i].value : []; });
|
|
383
|
+
const total = Object.values(results).reduce((s, arr) => s + arr.length, 0);
|
|
384
|
+
applySecurityHeaders(res);
|
|
385
|
+
res.json({ query: q, total, results });
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
// ─── Torrent search ─────────────────────────────────────────────────────────
|
|
389
|
+
router.post('/api/torrent-search', express.json(), async (req, res) => {
|
|
390
|
+
const ip = req.clientIp;
|
|
391
|
+
if (!rateLimiters.checkGeneral(ip)) return sendRateLimited(res, { windowMs: rateLimiters.windowMs });
|
|
392
|
+
const query = String(req.body?.q || req.body?.query || '').trim().slice(0, 200);
|
|
393
|
+
if (!query) return sendJson(res, 400, { error: 'missing_query', message: 'q required' });
|
|
394
|
+
try {
|
|
395
|
+
const [tpb, lxx] = await Promise.allSettled([scrapeTPB(query, 8), scrape1337x(query, 7)]);
|
|
396
|
+
const results = [
|
|
397
|
+
...(tpb.status === 'fulfilled' ? tpb.value : []),
|
|
398
|
+
...(lxx.status === 'fulfilled' ? lxx.value : []),
|
|
399
|
+
];
|
|
400
|
+
applySecurityHeaders(res);
|
|
401
|
+
res.json({ results, source: results.length ? 'tpb+1337x' : 'none' });
|
|
402
|
+
} catch (error) {
|
|
403
|
+
sendJson(res, 502, { error: 'scrape_failed', message: error.message });
|
|
404
|
+
}
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
router.post('/api/magnet', express.json(), async (req, res) => {
|
|
408
|
+
const ip = req.clientIp;
|
|
409
|
+
if (!rateLimiters.checkGeneral(ip)) return sendRateLimited(res, { windowMs: rateLimiters.windowMs });
|
|
410
|
+
const rawUrl = String(req.body?.url || '').trim();
|
|
411
|
+
if (!rawUrl || !/^https?:\/\//.test(rawUrl)) return sendJson(res, 400, { error: 'invalid_url' });
|
|
412
|
+
try {
|
|
413
|
+
const magnet = await extractMagnetFromUrl(rawUrl);
|
|
414
|
+
applySecurityHeaders(res);
|
|
415
|
+
res.json({ magnet });
|
|
416
|
+
} catch (error) {
|
|
417
|
+
sendJson(res, error.message.includes('SSRF') ? 400 : 502, { error: 'fetch_failed', message: error.message });
|
|
418
|
+
}
|
|
419
|
+
});
|
|
420
|
+
|
|
421
|
+
// ─── Site scan ──────────────────────────────────────────────────────────────
|
|
422
|
+
router.post('/api/scan', express.json(), async (req, res) => {
|
|
423
|
+
const ip = req.clientIp;
|
|
424
|
+
if (!rateLimiters.checkGeneral(ip)) return sendRateLimited(res, { windowMs: rateLimiters.windowMs });
|
|
425
|
+
const rawUrl = String(req.body?.url || '').trim();
|
|
426
|
+
const query = String(req.body?.query || '').trim().slice(0, 200);
|
|
427
|
+
const maxPages = Math.min(Number(req.body?.max_pages) || 4, 8);
|
|
428
|
+
if (!rawUrl || !query) return sendJson(res, 400, { error: 'invalid_input', message: 'url and query required' });
|
|
429
|
+
try {
|
|
430
|
+
const { scanSitePages } = await import('../fetch/document.js');
|
|
431
|
+
const pages = await scanSitePages(rawUrl, query, maxPages);
|
|
432
|
+
applySecurityHeaders(res);
|
|
433
|
+
res.json({ pages: pages.map((p) => ({ url: p.url, title: p.title, content: p.content.slice(0, 3000) })) });
|
|
434
|
+
} catch (error) {
|
|
435
|
+
sendJson(res, 502, { error: 'scan_failed', message: error.message });
|
|
436
|
+
}
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
// ─── Autostart ────────────────────────────────────────────────────────────
|
|
440
|
+
router.get('/api/autostart', (req, res) => {
|
|
441
|
+
applySecurityHeaders(res);
|
|
442
|
+
try {
|
|
443
|
+
res.json(autostartStatus());
|
|
444
|
+
} catch (error) {
|
|
445
|
+
sendJson(res, 500, { error: 'autostart_check_failed', message: error.message });
|
|
446
|
+
}
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
router.post('/api/autostart', express.json(), (req, res) => {
|
|
450
|
+
applySecurityHeaders(res);
|
|
451
|
+
const enable = Boolean(req.body?.enabled);
|
|
452
|
+
try {
|
|
453
|
+
const status = autostartSetEnabled(enable);
|
|
454
|
+
res.json({ ok: true, ...status });
|
|
455
|
+
} catch (error) {
|
|
456
|
+
sendJson(res, 500, { error: 'autostart_failed', message: error.message });
|
|
457
|
+
}
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
return router;
|
|
461
|
+
}
|