@staticn0va/wigolo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +74 -0
- package/README.md +272 -0
- package/dist/cache/db.d.ts +5 -0
- package/dist/cache/db.d.ts.map +1 -0
- package/dist/cache/db.js +97 -0
- package/dist/cache/db.js.map +1 -0
- package/dist/cache/store.d.ts +26 -0
- package/dist/cache/store.d.ts.map +1 -0
- package/dist/cache/store.js +214 -0
- package/dist/cache/store.js.map +1 -0
- package/dist/cli/daemon.d.ts +2 -0
- package/dist/cli/daemon.d.ts.map +1 -0
- package/dist/cli/daemon.js +5 -0
- package/dist/cli/daemon.js.map +1 -0
- package/dist/cli/health.d.ts +2 -0
- package/dist/cli/health.d.ts.map +1 -0
- package/dist/cli/health.js +5 -0
- package/dist/cli/health.js.map +1 -0
- package/dist/cli/index.d.ts +7 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +9 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/warmup.d.ts +11 -0
- package/dist/cli/warmup.d.ts.map +1 -0
- package/dist/cli/warmup.js +107 -0
- package/dist/cli/warmup.js.map +1 -0
- package/dist/config.d.ts +41 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +66 -0
- package/dist/config.js.map +1 -0
- package/dist/crawl/crawler.d.ts +18 -0
- package/dist/crawl/crawler.d.ts.map +1 -0
- package/dist/crawl/crawler.js +228 -0
- package/dist/crawl/crawler.js.map +1 -0
- package/dist/crawl/dedup.d.ts +15 -0
- package/dist/crawl/dedup.d.ts.map +1 -0
- package/dist/crawl/dedup.js +93 -0
- package/dist/crawl/dedup.js.map +1 -0
- package/dist/crawl/mapper.d.ts +17 -0
- package/dist/crawl/mapper.d.ts.map +1 -0
- package/dist/crawl/mapper.js +178 -0
- package/dist/crawl/mapper.js.map +1 -0
- package/dist/crawl/rate-limiter.d.ts +10 -0
- package/dist/crawl/rate-limiter.d.ts.map +1 -0
- package/dist/crawl/rate-limiter.js +72 -0
- package/dist/crawl/rate-limiter.js.map +1 -0
- package/dist/crawl/robots.d.ts +9 -0
- package/dist/crawl/robots.d.ts.map +1 -0
- package/dist/crawl/robots.js +63 -0
- package/dist/crawl/robots.js.map +1 -0
- package/dist/crawl/sitemap.d.ts +4 -0
- package/dist/crawl/sitemap.d.ts.map +1 -0
- package/dist/crawl/sitemap.js +38 -0
- package/dist/crawl/sitemap.js.map +1 -0
- package/dist/crawl/url-utils.d.ts +3 -0
- package/dist/crawl/url-utils.d.ts.map +1 -0
- package/dist/crawl/url-utils.js +41 -0
- package/dist/crawl/url-utils.js.map +1 -0
- package/dist/extraction/defuddle.d.ts +3 -0
- package/dist/extraction/defuddle.d.ts.map +1 -0
- package/dist/extraction/defuddle.js +26 -0
- package/dist/extraction/defuddle.js.map +1 -0
- package/dist/extraction/extract.d.ts +5 -0
- package/dist/extraction/extract.d.ts.map +1 -0
- package/dist/extraction/extract.js +83 -0
- package/dist/extraction/extract.js.map +1 -0
- package/dist/extraction/jsonld.d.ts +4 -0
- package/dist/extraction/jsonld.d.ts.map +1 -0
- package/dist/extraction/jsonld.js +64 -0
- package/dist/extraction/jsonld.js.map +1 -0
- package/dist/extraction/markdown.d.ts +10 -0
- package/dist/extraction/markdown.d.ts.map +1 -0
- package/dist/extraction/markdown.js +107 -0
- package/dist/extraction/markdown.js.map +1 -0
- package/dist/extraction/pipeline.d.ts +11 -0
- package/dist/extraction/pipeline.d.ts.map +1 -0
- package/dist/extraction/pipeline.js +95 -0
- package/dist/extraction/pipeline.js.map +1 -0
- package/dist/extraction/readability.d.ts +3 -0
- package/dist/extraction/readability.d.ts.map +1 -0
- package/dist/extraction/readability.js +32 -0
- package/dist/extraction/readability.js.map +1 -0
- package/dist/extraction/schema.d.ts +7 -0
- package/dist/extraction/schema.d.ts.map +1 -0
- package/dist/extraction/schema.js +86 -0
- package/dist/extraction/schema.js.map +1 -0
- package/dist/extraction/site-extractors/docs-generic.d.ts +3 -0
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -0
- package/dist/extraction/site-extractors/docs-generic.js +104 -0
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -0
- package/dist/extraction/site-extractors/github.d.ts +3 -0
- package/dist/extraction/site-extractors/github.d.ts.map +1 -0
- package/dist/extraction/site-extractors/github.js +107 -0
- package/dist/extraction/site-extractors/github.js.map +1 -0
- package/dist/extraction/site-extractors/mdn.d.ts +3 -0
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -0
- package/dist/extraction/site-extractors/mdn.js +58 -0
- package/dist/extraction/site-extractors/mdn.js.map +1 -0
- package/dist/extraction/site-extractors/stackoverflow.d.ts +3 -0
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -0
- package/dist/extraction/site-extractors/stackoverflow.js +88 -0
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -0
- package/dist/extraction/trafilatura.d.ts +6 -0
- package/dist/extraction/trafilatura.d.ts.map +1 -0
- package/dist/extraction/trafilatura.js +105 -0
- package/dist/extraction/trafilatura.js.map +1 -0
- package/dist/fetch/auth.d.ts +8 -0
- package/dist/fetch/auth.d.ts.map +1 -0
- package/dist/fetch/auth.js +32 -0
- package/dist/fetch/auth.js.map +1 -0
- package/dist/fetch/browser-pool.d.ts +28 -0
- package/dist/fetch/browser-pool.d.ts.map +1 -0
- package/dist/fetch/browser-pool.js +138 -0
- package/dist/fetch/browser-pool.js.map +1 -0
- package/dist/fetch/content-check.d.ts +2 -0
- package/dist/fetch/content-check.d.ts.map +1 -0
- package/dist/fetch/content-check.js +62 -0
- package/dist/fetch/content-check.js.map +1 -0
- package/dist/fetch/http-client.d.ts +15 -0
- package/dist/fetch/http-client.d.ts.map +1 -0
- package/dist/fetch/http-client.js +146 -0
- package/dist/fetch/http-client.js.map +1 -0
- package/dist/fetch/router.d.ts +45 -0
- package/dist/fetch/router.d.ts.map +1 -0
- package/dist/fetch/router.js +89 -0
- package/dist/fetch/router.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +22 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +10 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +39 -0
- package/dist/logger.js.map +1 -0
- package/dist/search/dedup.d.ts +10 -0
- package/dist/search/dedup.d.ts.map +1 -0
- package/dist/search/dedup.js +35 -0
- package/dist/search/dedup.js.map +1 -0
- package/dist/search/engines/bing.d.ts +7 -0
- package/dist/search/engines/bing.d.ts.map +1 -0
- package/dist/search/engines/bing.js +48 -0
- package/dist/search/engines/bing.js.map +1 -0
- package/dist/search/engines/duckduckgo.d.ts +7 -0
- package/dist/search/engines/duckduckgo.d.ts.map +1 -0
- package/dist/search/engines/duckduckgo.js +50 -0
- package/dist/search/engines/duckduckgo.js.map +1 -0
- package/dist/search/engines/startpage.d.ts +7 -0
- package/dist/search/engines/startpage.d.ts.map +1 -0
- package/dist/search/engines/startpage.js +50 -0
- package/dist/search/engines/startpage.js.map +1 -0
- package/dist/search/filters.d.ts +16 -0
- package/dist/search/filters.d.ts.map +1 -0
- package/dist/search/filters.js +63 -0
- package/dist/search/filters.js.map +1 -0
- package/dist/search/flashrank.d.ts +12 -0
- package/dist/search/flashrank.d.ts.map +1 -0
- package/dist/search/flashrank.js +63 -0
- package/dist/search/flashrank.js.map +1 -0
- package/dist/search/query.d.ts +2 -0
- package/dist/search/query.d.ts.map +1 -0
- package/dist/search/query.js +41 -0
- package/dist/search/query.js.map +1 -0
- package/dist/search/rerank.d.ts +3 -0
- package/dist/search/rerank.d.ts.map +1 -0
- package/dist/search/rerank.js +40 -0
- package/dist/search/rerank.js.map +1 -0
- package/dist/search/searxng.d.ts +8 -0
- package/dist/search/searxng.d.ts.map +1 -0
- package/dist/search/searxng.js +87 -0
- package/dist/search/searxng.js.map +1 -0
- package/dist/search/validator.d.ts +6 -0
- package/dist/search/validator.d.ts.map +1 -0
- package/dist/search/validator.js +35 -0
- package/dist/search/validator.js.map +1 -0
- package/dist/searxng/bootstrap.d.ts +18 -0
- package/dist/searxng/bootstrap.d.ts.map +1 -0
- package/dist/searxng/bootstrap.js +136 -0
- package/dist/searxng/bootstrap.js.map +1 -0
- package/dist/searxng/docker.d.ts +9 -0
- package/dist/searxng/docker.d.ts.map +1 -0
- package/dist/searxng/docker.js +67 -0
- package/dist/searxng/docker.js.map +1 -0
- package/dist/searxng/process.d.ts +23 -0
- package/dist/searxng/process.d.ts.map +1 -0
- package/dist/searxng/process.js +188 -0
- package/dist/searxng/process.js.map +1 -0
- package/dist/server.d.ts +2 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +311 -0
- package/dist/server.js.map +1 -0
- package/dist/tools/cache.d.ts +3 -0
- package/dist/tools/cache.d.ts.map +1 -0
- package/dist/tools/cache.js +50 -0
- package/dist/tools/cache.js.map +1 -0
- package/dist/tools/crawl.d.ts +6 -0
- package/dist/tools/crawl.d.ts.map +1 -0
- package/dist/tools/crawl.js +97 -0
- package/dist/tools/crawl.js.map +1 -0
- package/dist/tools/extract.d.ts +4 -0
- package/dist/tools/extract.d.ts.map +1 -0
- package/dist/tools/extract.js +69 -0
- package/dist/tools/extract.js.map +1 -0
- package/dist/tools/fetch.d.ts +4 -0
- package/dist/tools/fetch.d.ts.map +1 -0
- package/dist/tools/fetch.js +76 -0
- package/dist/tools/fetch.js.map +1 -0
- package/dist/tools/search.d.ts +4 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +160 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/types.d.ts +222 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +61 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { getDatabase } from './db.js';
|
|
3
|
+
import { getConfig } from '../config.js';
|
|
4
|
+
const TRACKING_PARAMS = new Set([
|
|
5
|
+
'utm_source',
|
|
6
|
+
'utm_medium',
|
|
7
|
+
'utm_campaign',
|
|
8
|
+
'utm_content',
|
|
9
|
+
'utm_term',
|
|
10
|
+
'utm_id',
|
|
11
|
+
'fbclid',
|
|
12
|
+
'gclid',
|
|
13
|
+
'msclkid',
|
|
14
|
+
'mc_cid',
|
|
15
|
+
'mc_eid',
|
|
16
|
+
]);
|
|
17
|
+
export function normalizeUrl(url) {
|
|
18
|
+
const parsed = new URL(url);
|
|
19
|
+
parsed.protocol = parsed.protocol.toLowerCase();
|
|
20
|
+
parsed.hostname = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
21
|
+
for (const key of [...parsed.searchParams.keys()]) {
|
|
22
|
+
if (TRACKING_PARAMS.has(key) || key.startsWith('utm_')) {
|
|
23
|
+
parsed.searchParams.delete(key);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
parsed.searchParams.sort();
|
|
27
|
+
let result = parsed.toString();
|
|
28
|
+
// Strip trailing slash from path (but not root)
|
|
29
|
+
if (parsed.pathname !== '/' && result.endsWith('/')) {
|
|
30
|
+
result = result.slice(0, -1);
|
|
31
|
+
}
|
|
32
|
+
// Remove trailing slash from origin-only URLs too
|
|
33
|
+
if (parsed.pathname === '/' && !parsed.search && !parsed.hash) {
|
|
34
|
+
result = result.replace(/\/$/, '');
|
|
35
|
+
}
|
|
36
|
+
return result;
|
|
37
|
+
}
|
|
38
|
+
function toIsoSeconds(date) {
|
|
39
|
+
return date.toISOString().replace('T', ' ').replace(/\.\d+Z$/, '');
|
|
40
|
+
}
|
|
41
|
+
export function cacheContent(result, extraction) {
|
|
42
|
+
const db = getDatabase();
|
|
43
|
+
const config = getConfig();
|
|
44
|
+
const normalizedUrl = normalizeUrl(result.finalUrl || result.url);
|
|
45
|
+
const contentHash = createHash('sha256').update(extraction.markdown).digest('hex');
|
|
46
|
+
const now = new Date();
|
|
47
|
+
const expiresAt = new Date(now.getTime() + config.cacheTtlContent * 1000);
|
|
48
|
+
const stmt = db.prepare(`
|
|
49
|
+
INSERT OR REPLACE INTO url_cache (
|
|
50
|
+
url, normalized_url, title, markdown, raw_html,
|
|
51
|
+
metadata, links, images, fetch_method, extractor_used,
|
|
52
|
+
content_hash, fetched_at, expires_at
|
|
53
|
+
)
|
|
54
|
+
VALUES (
|
|
55
|
+
@url, @normalizedUrl, @title, @markdown, @rawHtml,
|
|
56
|
+
@metadata, @links, @images, @fetchMethod, @extractorUsed,
|
|
57
|
+
@contentHash, @fetchedAt, @expiresAt
|
|
58
|
+
)
|
|
59
|
+
`);
|
|
60
|
+
stmt.run({
|
|
61
|
+
url: result.url,
|
|
62
|
+
normalizedUrl,
|
|
63
|
+
title: extraction.title,
|
|
64
|
+
markdown: extraction.markdown,
|
|
65
|
+
rawHtml: result.html,
|
|
66
|
+
metadata: JSON.stringify(extraction.metadata),
|
|
67
|
+
links: JSON.stringify(extraction.links),
|
|
68
|
+
images: JSON.stringify(extraction.images),
|
|
69
|
+
fetchMethod: result.method,
|
|
70
|
+
extractorUsed: extraction.extractor,
|
|
71
|
+
contentHash: contentHash,
|
|
72
|
+
fetchedAt: toIsoSeconds(now),
|
|
73
|
+
expiresAt: toIsoSeconds(expiresAt),
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
function rowToCachedContent(row) {
|
|
77
|
+
return {
|
|
78
|
+
id: row.id,
|
|
79
|
+
url: row.url,
|
|
80
|
+
normalizedUrl: row.normalized_url,
|
|
81
|
+
title: row.title,
|
|
82
|
+
markdown: row.markdown,
|
|
83
|
+
rawHtml: row.raw_html,
|
|
84
|
+
metadata: row.metadata,
|
|
85
|
+
links: row.links,
|
|
86
|
+
images: row.images,
|
|
87
|
+
fetchMethod: row.fetch_method,
|
|
88
|
+
extractorUsed: row.extractor_used,
|
|
89
|
+
contentHash: row.content_hash,
|
|
90
|
+
fetchedAt: row.fetched_at,
|
|
91
|
+
expiresAt: row.expires_at,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
export function getCachedContent(url) {
|
|
95
|
+
const db = getDatabase();
|
|
96
|
+
const normalizedUrl = normalizeUrl(url);
|
|
97
|
+
const row = db.prepare(`
|
|
98
|
+
SELECT * FROM url_cache WHERE url = ? OR normalized_url = ? LIMIT 1
|
|
99
|
+
`).get(url, normalizedUrl);
|
|
100
|
+
return row ? rowToCachedContent(row) : null;
|
|
101
|
+
}
|
|
102
|
+
export function isExpired(cached) {
|
|
103
|
+
if (!cached.expiresAt)
|
|
104
|
+
return false;
|
|
105
|
+
return new Date(cached.expiresAt).getTime() < Date.now();
|
|
106
|
+
}
|
|
107
|
+
export function searchCache(query) {
|
|
108
|
+
const db = getDatabase();
|
|
109
|
+
const rows = db.prepare(`
|
|
110
|
+
SELECT url_cache.*
|
|
111
|
+
FROM url_cache
|
|
112
|
+
JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid
|
|
113
|
+
WHERE url_cache_fts MATCH ?
|
|
114
|
+
ORDER BY rank
|
|
115
|
+
`).all(query);
|
|
116
|
+
return rows.map(rowToCachedContent);
|
|
117
|
+
}
|
|
118
|
+
export function cacheSearchResults(query, results, enginesUsed) {
|
|
119
|
+
const db = getDatabase();
|
|
120
|
+
const config = getConfig();
|
|
121
|
+
const queryHash = createHash('sha256').update(query.toLowerCase().trim()).digest('hex');
|
|
122
|
+
const now = new Date();
|
|
123
|
+
const expiresAt = new Date(now.getTime() + config.cacheTtlSearch * 1000);
|
|
124
|
+
const stmt = db.prepare(`
|
|
125
|
+
INSERT OR REPLACE INTO search_cache (query, query_hash, results, engines_used, searched_at, expires_at)
|
|
126
|
+
VALUES (@query, @queryHash, @results, @enginesUsed, @searchedAt, @expiresAt)
|
|
127
|
+
`);
|
|
128
|
+
stmt.run({
|
|
129
|
+
query,
|
|
130
|
+
queryHash,
|
|
131
|
+
results: JSON.stringify(results),
|
|
132
|
+
enginesUsed: JSON.stringify(enginesUsed),
|
|
133
|
+
searchedAt: toIsoSeconds(now),
|
|
134
|
+
expiresAt: toIsoSeconds(expiresAt),
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
export function getCachedSearchResults(query) {
|
|
138
|
+
const db = getDatabase();
|
|
139
|
+
const queryHash = createHash('sha256').update(query.toLowerCase().trim()).digest('hex');
|
|
140
|
+
const row = db.prepare(`
|
|
141
|
+
SELECT * FROM search_cache WHERE query_hash = ? AND (expires_at IS NULL OR expires_at > datetime('now'))
|
|
142
|
+
`).get(queryHash);
|
|
143
|
+
if (!row)
|
|
144
|
+
return null;
|
|
145
|
+
return {
|
|
146
|
+
query: row.query,
|
|
147
|
+
results: JSON.parse(row.results),
|
|
148
|
+
engines_used: JSON.parse(row.engines_used),
|
|
149
|
+
searched_at: row.searched_at,
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
export function searchCacheFiltered(options) {
|
|
153
|
+
const db = getDatabase();
|
|
154
|
+
const conditions = [];
|
|
155
|
+
const params = [];
|
|
156
|
+
let fromClause = 'url_cache';
|
|
157
|
+
if (options.query) {
|
|
158
|
+
fromClause = 'url_cache JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid';
|
|
159
|
+
conditions.push('url_cache_fts MATCH ?');
|
|
160
|
+
params.push(options.query);
|
|
161
|
+
}
|
|
162
|
+
if (options.urlPattern) {
|
|
163
|
+
conditions.push('url_cache.normalized_url GLOB ?');
|
|
164
|
+
params.push(options.urlPattern);
|
|
165
|
+
}
|
|
166
|
+
if (options.since) {
|
|
167
|
+
conditions.push('url_cache.fetched_at > datetime(?)');
|
|
168
|
+
params.push(options.since);
|
|
169
|
+
}
|
|
170
|
+
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
|
171
|
+
const orderClause = options.query ? 'ORDER BY rank' : 'ORDER BY url_cache.fetched_at DESC';
|
|
172
|
+
const sql = `SELECT url_cache.* FROM ${fromClause} ${whereClause} ${orderClause} LIMIT 100`;
|
|
173
|
+
const rows = db.prepare(sql).all(...params);
|
|
174
|
+
return rows.map(rowToCachedContent);
|
|
175
|
+
}
|
|
176
|
+
export function clearCacheEntries(options) {
|
|
177
|
+
const db = getDatabase();
|
|
178
|
+
const conditions = [];
|
|
179
|
+
const params = [];
|
|
180
|
+
if (options.query) {
|
|
181
|
+
conditions.push('id IN (SELECT url_cache.id FROM url_cache JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid WHERE url_cache_fts MATCH ?)');
|
|
182
|
+
params.push(options.query);
|
|
183
|
+
}
|
|
184
|
+
if (options.urlPattern) {
|
|
185
|
+
conditions.push('normalized_url GLOB ?');
|
|
186
|
+
params.push(options.urlPattern);
|
|
187
|
+
}
|
|
188
|
+
if (options.since) {
|
|
189
|
+
conditions.push('fetched_at > datetime(?)');
|
|
190
|
+
params.push(options.since);
|
|
191
|
+
}
|
|
192
|
+
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
|
193
|
+
const sql = `DELETE FROM url_cache ${whereClause}`;
|
|
194
|
+
const result = db.prepare(sql).run(...params);
|
|
195
|
+
return result.changes;
|
|
196
|
+
}
|
|
197
|
+
export function getCacheStats() {
|
|
198
|
+
const db = getDatabase();
|
|
199
|
+
const row = db.prepare(`
|
|
200
|
+
SELECT
|
|
201
|
+
COUNT(*) as total_urls,
|
|
202
|
+
COALESCE(SUM(LENGTH(markdown) + LENGTH(COALESCE(raw_html, ''))), 0) as total_bytes,
|
|
203
|
+
MIN(fetched_at) as oldest,
|
|
204
|
+
MAX(fetched_at) as newest
|
|
205
|
+
FROM url_cache
|
|
206
|
+
`).get();
|
|
207
|
+
return {
|
|
208
|
+
total_urls: row.total_urls,
|
|
209
|
+
total_size_mb: Math.round((row.total_bytes / (1024 * 1024)) * 1e6) / 1e6,
|
|
210
|
+
oldest: row.oldest ?? '',
|
|
211
|
+
newest: row.newest ?? '',
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
//# sourceMappingURL=store.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"store.js","sourceRoot":"","sources":["../../src/cache/store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAGzC,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,YAAY;IACZ,YAAY;IACZ,cAAc;IACd,aAAa;IACb,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,OAAO;IACP,SAAS;IACT,QAAQ;IACR,QAAQ;CACT,CAAC,CAAC;AAEH,MAAM,UAAU,YAAY,CAAC,GAAW;IACtC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAE5B,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAChD,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IAEtE,KAAK,MAAM,GAAG,IAAI,CAAC,GAAG,MAAM,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;QAClD,IAAI,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YACvD,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,MAAM,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;IAE3B,IAAI,MAAM,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;IAE/B,gDAAgD;IAChD,IAAI,MAAM,CAAC,QAAQ,KAAK,GAAG,IAAI,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACpD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC;IACD,kDAAkD;IAClD,IAAI,MAAM,CAAC,QAAQ,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QAC9D,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACrC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,YAAY,CAAC,IAAU;IAC9B,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,MAAsB,EAAE,UAA4B;IAC/E,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAE3B,MAAM,aAAa,GAAG,YAAY,CAAC,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC;IAClE,MAAM,WAAW,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAEnF,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;IACvB,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC,CAAC;IAE1E,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;;;;;;GAWvB,CAAC,CAAC;IAEH,IAAI,CAAC,GAAG,CAAC;QACP,GAAG,EAAE,MAAM,CAAC,GAAG;QACf,aAAa;QACb,KAAK,EAAE,UAAU,CAAC,KAAK;QACvB,QAAQ,EAAE,UAAU,CAAC,QAAQ;QAC7B,OAAO,EAAE,MAAM,CAAC,IAAI;QACpB,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,QAAQ,CAAC;QAC7C,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,KAAK,CAAC;QACvC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,MAAM,CAAC;QACzC,WAAW,EAAE,MAAM,CAAC,MAAM;QAC1B,aAAa,EAAE,UAAU,CAAC,SAAS;QACnC,WAAW,EAAE,WAAW;QACxB,SAAS,EAAE,YAAY,CAAC,GAAG,CAAC;QAC5B,SAAS,EAAE,YAAY,CAAC,SAAS,CAAC;KACnC,CAAC,CAAC;AACL,CAAC;AAmBD,SAAS,kBAAkB,CAAC,GAAU;IACpC,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,GAAG,EAAE,GAAG,CAAC,GAAG;QACZ,aAAa,EAAE,GAAG,CAAC,cAAc;QACjC,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,OAAO,EAAE,GAAG,CAAC,QAAQ;QACrB,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,WAAW,EAAE,GAAG,CAAC,YAA4C;QAC7D,aAAa,EAAE,GAAG,CAAC,cAAgD;QACnE,WAAW,EAAE,GAAG,CAAC,YAAY;QAC7B,SAAS,EAAE,GAAG,CAAC,UAAU;QACzB,SAAS,EAAE,GAAG,CAAC,UAAU;KAC1B,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAW;IAC1C,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IAExC,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC;;GAEtB,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,aAAa,CAAsB,CAAC;IAEhD,OAAO,GAAG,CAAC,CAAC,CAAC,kBAAkB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC9C,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,MAAqB;IAC7C,IAAI,CAAC,MAAM,CAAC,SAAS;QAAE,OAAO,KAAK,CAAC;IACpC,OAAO,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;AAC3D,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,KAAa;IACvC,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IAEzB,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;GAMvB,CAAC,CAAC,GAAG,CAAC,KAAK,CAAY,CAAC;IAEzB,OAAO,IAAI,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;AACtC,CAAC;AASD,MAAM,UAAU,kBAAkB,CAChC,KAAa,EACb,OAA2B,EAC3B,WAAqB;IAErB,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAE3B,MAAM,SAAS,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACxF,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;IACvB,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,MAAM,CAAC,cAAc,GAAG,IAAI,CAAC,CAAC;IAEzE,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC;;;GAGvB,CAAC,CAAC;IAEH,IAAI,CAAC,GAAG,CAAC;QACP,KAAK;QACL,SAAS;QACT,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;QAChC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;QACxC,UAAU,EAAE,YAAY,CAAC,GAAG,CAAC;QAC7B,SAAS,EAAE,YAAY,CAAC,SAAS,CAAC;KACnC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,KAAa;IAClD,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,SAAS,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAExF,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC;;GAEtB,CAAC,CAAC,GAAG,CAAC,SAAS,CAA8F,CAAC;IAE/G,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,OAAO;QACL,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC;QAChC,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,YAAY,CAAC;QAC1C,WAAW,EAAE,GAAG,CAAC,WAAW;KAC7B,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,OAInC;IACC,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,UAAU,GAAa,EAAE,CAAC;IAChC,MAAM,MAAM,GAAc,EAAE,CAAC;IAC7B,IAAI,UAAU,GAAG,WAAW,CAAC;IAE7B,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAClB,UAAU,GAAG,oEAAoE,CAAC;QAClF,UAAU,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;QACvB,UAAU,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACnD,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAClB,UAAU,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;IAED,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACrF,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,oCAAoC,CAAC;IAE3F,MAAM,GAAG,GAAG,2BAA2B,UAAU,IAAI,WAAW,IAAI,WAAW,YAAY,CAAC;IAC5F,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,CAAY,CAAC;IACvD,OAAO,IAAI,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;AACtC,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,OAIjC;IACC,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,UAAU,GAAa,EAAE,CAAC;IAChC,MAAM,MAAM,GAAc,EAAE,CAAC;IAE7B,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAClB,UAAU,CAAC,IAAI,CACb,iIAAiI,CAClI,CAAC;QACF,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;QACvB,UAAU,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAClB,UAAU,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;IAED,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACrF,MAAM,GAAG,GAAG,yBAAyB,WAAW,EAAE,CAAC;IACnD,MAAM,MAAM,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;IAC9C,OAAO,MAAM,CAAC,OAAO,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,aAAa;IAC3B,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;;GAOtB,CAAC,CAAC,GAAG,EAA+F,CAAC;IAEtG,OAAO;QACL,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,WAAW,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;QACxE,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,EAAE;QACxB,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,EAAE;KACzB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"daemon.d.ts","sourceRoot":"","sources":["../../src/cli/daemon.ts"],"names":[],"mappings":"AAAA,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,IAAI,CAG/C"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"daemon.js","sourceRoot":"","sources":["../../src/cli/daemon.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,SAAS,CAAC,KAAe;IACvC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC7D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;AACzE,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"health.d.ts","sourceRoot":"","sources":["../../src/cli/health.ts"],"names":[],"mappings":"AAAA,wBAAgB,cAAc,IAAI,IAAI,CAGrC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"health.js","sourceRoot":"","sources":["../../src/cli/health.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,cAAc;IAC5B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC9D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC;AAChF,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,OAAO,GAAG,KAAK,GAAG,QAAQ,GAAG,OAAO,GAAG,QAAQ,CAAC;AAE5D,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,EAAE,MAAM,EAAE,CAAC;CAChB;AAID,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,aAAa,CAQ1D"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
const KNOWN_COMMANDS = new Set(['warmup', 'serve', 'health']);
|
|
2
|
+
export function parseCommand(argv) {
|
|
3
|
+
const first = argv[0];
|
|
4
|
+
if (first && KNOWN_COMMANDS.has(first)) {
|
|
5
|
+
return { command: first, args: argv.slice(1) };
|
|
6
|
+
}
|
|
7
|
+
return { command: 'mcp', args: [] };
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":"AAOA,MAAM,cAAc,GAAwB,IAAI,GAAG,CAAC,CAAC,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEnF,MAAM,UAAU,YAAY,CAAC,IAAc;IACzC,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IAEtB,IAAI,KAAK,IAAI,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QACvC,OAAO,EAAE,OAAO,EAAE,KAAgB,EAAE,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;IAC5D,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;AACtC,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface WarmupResult {
|
|
2
|
+
playwright: 'ok' | 'failed';
|
|
3
|
+
playwrightError?: string;
|
|
4
|
+
searxng: 'ready' | 'bootstrapped' | 'failed' | 'no_python';
|
|
5
|
+
searxngError?: string;
|
|
6
|
+
trafilatura?: 'ok' | 'failed' | 'skipped';
|
|
7
|
+
reranker?: 'ok' | 'failed';
|
|
8
|
+
rerankerError?: string;
|
|
9
|
+
}
|
|
10
|
+
export declare function runWarmup(flags?: string[]): Promise<WarmupResult>;
|
|
11
|
+
//# sourceMappingURL=warmup.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"warmup.d.ts","sourceRoot":"","sources":["../../src/cli/warmup.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,IAAI,GAAG,QAAQ,CAAC;IAC5B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,OAAO,EAAE,OAAO,GAAG,cAAc,GAAG,QAAQ,GAAG,WAAW,CAAC;IAC3D,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,IAAI,GAAG,QAAQ,GAAG,SAAS,CAAC;IAC1C,QAAQ,CAAC,EAAE,IAAI,GAAG,QAAQ,CAAC;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAqED,wBAAsB,SAAS,CAAC,KAAK,GAAE,MAAM,EAAO,GAAG,OAAO,CAAC,YAAY,CAAC,CAiD3E"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { execSync } from 'node:child_process';
|
|
2
|
+
import { getConfig } from '../config.js';
|
|
3
|
+
import { checkPythonAvailable, bootstrapNativeSearxng, getBootstrapState } from '../searxng/bootstrap.js';
|
|
4
|
+
import { resetAvailabilityCache } from '../search/flashrank.js';
|
|
5
|
+
function log(msg) {
|
|
6
|
+
process.stderr.write(`[wigolo warmup] ${msg}\n`);
|
|
7
|
+
}
|
|
8
|
+
function installPlaywright() {
|
|
9
|
+
log('Installing Playwright Chromium...');
|
|
10
|
+
try {
|
|
11
|
+
execSync('npx playwright install chromium', { stdio: 'pipe', timeout: 120000 });
|
|
12
|
+
log('Playwright Chromium installed');
|
|
13
|
+
return { playwright: 'ok' };
|
|
14
|
+
}
|
|
15
|
+
catch (err) {
|
|
16
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
17
|
+
log(`Playwright install failed: ${message}`);
|
|
18
|
+
return { playwright: 'failed', playwrightError: message };
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
function installTrafilatura() {
|
|
22
|
+
log('Installing Trafilatura...');
|
|
23
|
+
try {
|
|
24
|
+
execSync('python3 -m pip install --quiet trafilatura', {
|
|
25
|
+
stdio: 'pipe',
|
|
26
|
+
timeout: 120000,
|
|
27
|
+
});
|
|
28
|
+
log('Trafilatura installed');
|
|
29
|
+
return 'ok';
|
|
30
|
+
}
|
|
31
|
+
catch (err) {
|
|
32
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
33
|
+
log(`Trafilatura install failed: ${message}`);
|
|
34
|
+
return 'failed';
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function installFlashRank() {
|
|
38
|
+
log('Installing FlashRank...');
|
|
39
|
+
try {
|
|
40
|
+
execSync('python3 -m pip install --quiet flashrank', { stdio: 'pipe', timeout: 120000 });
|
|
41
|
+
resetAvailabilityCache();
|
|
42
|
+
log('FlashRank installed successfully');
|
|
43
|
+
return { reranker: 'ok' };
|
|
44
|
+
}
|
|
45
|
+
catch (err) {
|
|
46
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
47
|
+
log(`FlashRank install failed: ${message}`);
|
|
48
|
+
return { reranker: 'failed', rerankerError: message };
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
function setupSearxng(dataDir) {
|
|
52
|
+
const state = getBootstrapState(dataDir);
|
|
53
|
+
if (state?.status === 'ready') {
|
|
54
|
+
log('SearXNG already set up');
|
|
55
|
+
return { searxng: 'ready' };
|
|
56
|
+
}
|
|
57
|
+
if (!checkPythonAvailable()) {
|
|
58
|
+
log('Python 3 not found — SearXNG requires Python. Install Python 3 or use Docker mode (SEARXNG_MODE=docker)');
|
|
59
|
+
return { searxng: 'no_python' };
|
|
60
|
+
}
|
|
61
|
+
return { needsBootstrap: true };
|
|
62
|
+
}
|
|
63
|
+
export async function runWarmup(flags = []) {
|
|
64
|
+
log('Starting warmup...');
|
|
65
|
+
const config = getConfig();
|
|
66
|
+
const pwResult = installPlaywright();
|
|
67
|
+
const searxngCheck = setupSearxng(config.dataDir);
|
|
68
|
+
let searxngResult;
|
|
69
|
+
if ('needsBootstrap' in searxngCheck) {
|
|
70
|
+
log('Bootstrapping SearXNG (this may take a minute)...');
|
|
71
|
+
try {
|
|
72
|
+
await bootstrapNativeSearxng(config.dataDir);
|
|
73
|
+
log('SearXNG bootstrapped successfully');
|
|
74
|
+
searxngResult = { searxng: 'bootstrapped' };
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
78
|
+
log(`SearXNG bootstrap failed: ${message}`);
|
|
79
|
+
searxngResult = { searxng: 'failed', searxngError: message };
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
searxngResult = searxngCheck;
|
|
84
|
+
}
|
|
85
|
+
const flagSet = new Set(flags);
|
|
86
|
+
let trafStatus = 'skipped';
|
|
87
|
+
if (flagSet.has('--trafilatura') || flagSet.has('--all')) {
|
|
88
|
+
trafStatus = installTrafilatura();
|
|
89
|
+
}
|
|
90
|
+
let rerankerResult = {};
|
|
91
|
+
if (flagSet.has('--reranker') || flagSet.has('--all')) {
|
|
92
|
+
rerankerResult = installFlashRank();
|
|
93
|
+
}
|
|
94
|
+
const result = { ...pwResult, ...searxngResult, trafilatura: trafStatus, ...rerankerResult };
|
|
95
|
+
log('');
|
|
96
|
+
log('Summary:');
|
|
97
|
+
log(` Playwright: ${result.playwright}${result.playwrightError ? ` (${result.playwrightError})` : ''}`);
|
|
98
|
+
log(` SearXNG: ${result.searxng}${result.searxngError ? ` (${result.searxngError})` : ''}`);
|
|
99
|
+
if (trafStatus !== 'skipped') {
|
|
100
|
+
log(` Trafilatura: ${trafStatus}`);
|
|
101
|
+
}
|
|
102
|
+
if (result.reranker) {
|
|
103
|
+
log(` FlashRank: ${result.reranker}${result.rerankerError ? ` (${result.rerankerError})` : ''}`);
|
|
104
|
+
}
|
|
105
|
+
return result;
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=warmup.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"warmup.js","sourceRoot":"","sources":["../../src/cli/warmup.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAC1G,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAYhE,SAAS,GAAG,CAAC,GAAW;IACtB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,mBAAmB,GAAG,IAAI,CAAC,CAAC;AACnD,CAAC;AAED,SAAS,iBAAiB;IACxB,GAAG,CAAC,mCAAmC,CAAC,CAAC;IACzC,IAAI,CAAC;QACH,QAAQ,CAAC,iCAAiC,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;QAChF,GAAG,CAAC,+BAA+B,CAAC,CAAC;QACrC,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IAC9B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjE,GAAG,CAAC,8BAA8B,OAAO,EAAE,CAAC,CAAC;QAC7C,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,eAAe,EAAE,OAAO,EAAE,CAAC;IAC5D,CAAC;AACH,CAAC;AAED,SAAS,kBAAkB;IACzB,GAAG,CAAC,2BAA2B,CAAC,CAAC;IACjC,IAAI,CAAC;QACH,QAAQ,CAAC,4CAA4C,EAAE;YACrD,KAAK,EAAE,MAAM;YACb,OAAO,EAAE,MAAM;SAChB,CAAC,CAAC;QACH,GAAG,CAAC,uBAAuB,CAAC,CAAC;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjE,GAAG,CAAC,+BAA+B,OAAO,EAAE,CAAC,CAAC;QAC9C,OAAO,QAAQ,CAAC;IAClB,CAAC;AACH,CAAC;AAED,SAAS,gBAAgB;IACvB,GAAG,CAAC,yBAAyB,CAAC,CAAC;IAC/B,IAAI,CAAC;QACH,QAAQ,CAAC,0CAA0C,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;QACzF,sBAAsB,EAAE,CAAC;QACzB,GAAG,CAAC,kCAAkC,CAAC,CAAC;QACxC,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAC5B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjE,GAAG,CAAC,6BAA6B,OAAO,EAAE,CAAC,CAAC;QAC5C,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC;IACxD,CAAC;AACH,CAAC;AAMD,SAAS,YAAY,CAAC,OAAe;IACnC,MAAM,KAAK,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAEzC,IAAI,KAAK,EAAE,MAAM,KAAK,OAAO,EAAE,CAAC;QAC9B,GAAG,CAAC,wBAAwB,CAAC,CAAC;QAC9B,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC;IAC9B,CAAC;IAED,IAAI,CAAC,oBAAoB,EAAE,EAAE,CAAC;QAC5B,GAAG,CAAC,yGAAyG,CAAC,CAAC;QAC/G,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;IAClC,CAAC;IAED,OAAO,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC;AAClC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAkB,EAAE;IAClD,GAAG,CAAC,oBAAoB,CAAC,CAAC;IAC1B,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAE3B,MAAM,QAAQ,GAAG,iBAAiB,EAAE,CAAC;IAErC,MAAM,YAAY,GAAG,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAClD,IAAI,aAA6D,CAAC;IAElE,IAAI,gBAAgB,IAAI,YAAY,EAAE,CAAC;QACrC,GAAG,CAAC,mDAAmD,CAAC,CAAC;QACzD,IAAI,CAAC;YACH,MAAM,sBAAsB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC7C,GAAG,CAAC,mCAAmC,CAAC,CAAC;YACzC,aAAa,GAAG,EAAE,OAAO,EAAE,cAAc,EAAE,CAAC;QAC9C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACjE,GAAG,CAAC,6BAA6B,OAAO,EAAE,CAAC,CAAC;YAC5C,aAAa,GAAG,EAAE,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;QAC/D,CAAC;IACH,CAAC;SAAM,CAAC;QACN,aAAa,GAAG,YAAY,CAAC;IAC/B,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC;IAC/B,IAAI,UAAU,GAAgC,SAAS,CAAC;IACxD,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,IAAI,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QACzD,UAAU,GAAG,kBAAkB,EAAE,CAAC;IACpC,CAAC;IAED,IAAI,cAAc,GAAqD,EAAE,CAAC;IAC1E,IAAI,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QACtD,cAAc,GAAG,gBAAgB,EAAE,CAAC;IACtC,CAAC;IAED,MAAM,MAAM,GAAiB,EAAE,GAAG,QAAQ,EAAE,GAAG,aAAa,EAAE,WAAW,EAAE,UAAU,EAAE,GAAG,cAAc,EAAE,CAAC;IAE3G,GAAG,CAAC,EAAE,CAAC,CAAC;IACR,GAAG,CAAC,UAAU,CAAC,CAAC;IAChB,GAAG,CAAC,oBAAoB,MAAM,CAAC,UAAU,GAAG,MAAM,CAAC,eAAe,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,eAAe,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC5G,GAAG,CAAC,oBAAoB,MAAM,CAAC,OAAO,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACnG,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;QAC7B,GAAG,CAAC,oBAAoB,UAAU,EAAE,CAAC,CAAC;IACxC,CAAC;IACD,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,GAAG,CAAC,oBAAoB,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACxG,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/dist/config.d.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
export interface Config {
|
|
2
|
+
searxngUrl: string | null;
|
|
3
|
+
searxngMode: 'native' | 'docker';
|
|
4
|
+
searxngPort: number;
|
|
5
|
+
fetchTimeoutMs: number;
|
|
6
|
+
fetchMaxRetries: number;
|
|
7
|
+
maxRedirects: number;
|
|
8
|
+
playwrightLoadTimeoutMs: number;
|
|
9
|
+
playwrightNavTimeoutMs: number;
|
|
10
|
+
searxngQueryTimeoutMs: number;
|
|
11
|
+
searchFetchTimeoutMs: number;
|
|
12
|
+
searchTotalTimeoutMs: number;
|
|
13
|
+
validateTimeoutMs: number;
|
|
14
|
+
maxBrowsers: number;
|
|
15
|
+
browserIdleTimeoutMs: number;
|
|
16
|
+
browserFallbackThreshold: number;
|
|
17
|
+
authStatePath: string | null;
|
|
18
|
+
chromeProfilePath: string | null;
|
|
19
|
+
dataDir: string;
|
|
20
|
+
cacheTtlSearch: number;
|
|
21
|
+
cacheTtlContent: number;
|
|
22
|
+
crawlConcurrency: number;
|
|
23
|
+
crawlDelayMs: number;
|
|
24
|
+
crawlPrivateConcurrency: number;
|
|
25
|
+
crawlPrivateDelayMs: number;
|
|
26
|
+
useProxy: boolean;
|
|
27
|
+
proxyUrl: string | null;
|
|
28
|
+
userAgent: string | null;
|
|
29
|
+
validateLinks: boolean;
|
|
30
|
+
respectRobotsTxt: boolean;
|
|
31
|
+
braveApiKey: string | null;
|
|
32
|
+
logLevel: 'debug' | 'info' | 'warn' | 'error';
|
|
33
|
+
logFormat: 'json' | 'text';
|
|
34
|
+
trafilatura: 'auto' | 'always' | 'never';
|
|
35
|
+
reranker: 'flashrank' | 'none' | 'custom';
|
|
36
|
+
rerankerModel: string;
|
|
37
|
+
relevanceThreshold: number;
|
|
38
|
+
}
|
|
39
|
+
export declare function getConfig(): Config;
|
|
40
|
+
export declare function resetConfig(): void;
|
|
41
|
+
//# sourceMappingURL=config.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,MAAM;IACrB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,WAAW,EAAE,QAAQ,GAAG,QAAQ,CAAC;IACjC,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,uBAAuB,EAAE,MAAM,CAAC;IAChC,sBAAsB,EAAE,MAAM,CAAC;IAC/B,qBAAqB,EAAE,MAAM,CAAC;IAC9B,oBAAoB,EAAE,MAAM,CAAC;IAC7B,oBAAoB,EAAE,MAAM,CAAC;IAC7B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,WAAW,EAAE,MAAM,CAAC;IACpB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,wBAAwB,EAAE,MAAM,CAAC;IACjC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,uBAAuB,EAAE,MAAM,CAAC;IAChC,mBAAmB,EAAE,MAAM,CAAC;IAC5B,QAAQ,EAAE,OAAO,CAAC;IAClB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,gBAAgB,EAAE,OAAO,CAAC;IAC1B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,QAAQ,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IAC9C,SAAS,EAAE,MAAM,GAAG,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;IACzC,QAAQ,EAAE,WAAW,GAAG,MAAM,GAAG,QAAQ,CAAC;IAC1C,aAAa,EAAE,MAAM,CAAC;IACtB,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AAqBD,wBAAgB,SAAS,IAAI,MAAM,CA2ClC;AAED,wBAAgB,WAAW,IAAI,IAAI,CAElC"}
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { homedir } from 'node:os';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
function envStr(key, fallback = null) {
|
|
4
|
+
return process.env[key] ?? fallback;
|
|
5
|
+
}
|
|
6
|
+
function envInt(key, fallback) {
|
|
7
|
+
const val = process.env[key];
|
|
8
|
+
if (val === undefined)
|
|
9
|
+
return fallback;
|
|
10
|
+
const parsed = parseInt(val, 10);
|
|
11
|
+
return isNaN(parsed) ? fallback : parsed;
|
|
12
|
+
}
|
|
13
|
+
function envBool(key, fallback) {
|
|
14
|
+
const val = process.env[key];
|
|
15
|
+
if (val === undefined)
|
|
16
|
+
return fallback;
|
|
17
|
+
return val.toLowerCase() !== 'false' && val !== '0';
|
|
18
|
+
}
|
|
19
|
+
let cachedConfig = null;
|
|
20
|
+
export function getConfig() {
|
|
21
|
+
if (cachedConfig)
|
|
22
|
+
return cachedConfig;
|
|
23
|
+
cachedConfig = {
|
|
24
|
+
searxngUrl: envStr('SEARXNG_URL'),
|
|
25
|
+
searxngMode: envStr('SEARXNG_MODE', 'native'),
|
|
26
|
+
searxngPort: envInt('SEARXNG_PORT', 8888),
|
|
27
|
+
fetchTimeoutMs: envInt('FETCH_TIMEOUT_MS', 10000),
|
|
28
|
+
fetchMaxRetries: envInt('FETCH_MAX_RETRIES', 2),
|
|
29
|
+
maxRedirects: envInt('MAX_REDIRECTS', 5),
|
|
30
|
+
playwrightLoadTimeoutMs: envInt('PLAYWRIGHT_LOAD_TIMEOUT_MS', 15000),
|
|
31
|
+
playwrightNavTimeoutMs: envInt('PLAYWRIGHT_NAV_TIMEOUT_MS', 10000),
|
|
32
|
+
searxngQueryTimeoutMs: envInt('SEARXNG_QUERY_TIMEOUT_MS', 8000),
|
|
33
|
+
searchFetchTimeoutMs: envInt('SEARCH_FETCH_TIMEOUT_MS', 15000),
|
|
34
|
+
searchTotalTimeoutMs: envInt('SEARCH_TOTAL_TIMEOUT_MS', 30000),
|
|
35
|
+
validateTimeoutMs: envInt('VALIDATE_TIMEOUT_MS', 5000),
|
|
36
|
+
maxBrowsers: envInt('MAX_BROWSERS', 3),
|
|
37
|
+
browserIdleTimeoutMs: envInt('BROWSER_IDLE_TIMEOUT', 60000),
|
|
38
|
+
browserFallbackThreshold: envInt('BROWSER_FALLBACK_THRESHOLD', 3),
|
|
39
|
+
authStatePath: envStr('WIGOLO_AUTH_STATE_PATH'),
|
|
40
|
+
chromeProfilePath: envStr('WIGOLO_CHROME_PROFILE_PATH'),
|
|
41
|
+
dataDir: envStr('WIGOLO_DATA_DIR') ?? join(homedir(), '.wigolo'),
|
|
42
|
+
cacheTtlSearch: envInt('CACHE_TTL_SEARCH', 86400),
|
|
43
|
+
cacheTtlContent: envInt('CACHE_TTL_CONTENT', 604800),
|
|
44
|
+
crawlConcurrency: envInt('CRAWL_CONCURRENCY', 2),
|
|
45
|
+
crawlDelayMs: envInt('CRAWL_DELAY_MS', 500),
|
|
46
|
+
crawlPrivateConcurrency: envInt('CRAWL_PRIVATE_CONCURRENCY', 10),
|
|
47
|
+
crawlPrivateDelayMs: envInt('CRAWL_PRIVATE_DELAY_MS', 0),
|
|
48
|
+
useProxy: envBool('USE_PROXY', false),
|
|
49
|
+
proxyUrl: envStr('PROXY_URL'),
|
|
50
|
+
userAgent: envStr('USER_AGENT'),
|
|
51
|
+
validateLinks: envBool('VALIDATE_LINKS', true),
|
|
52
|
+
respectRobotsTxt: envBool('RESPECT_ROBOTS_TXT', true),
|
|
53
|
+
braveApiKey: envStr('BRAVE_API_KEY'),
|
|
54
|
+
logLevel: envStr('LOG_LEVEL', 'info'),
|
|
55
|
+
logFormat: envStr('LOG_FORMAT', 'json'),
|
|
56
|
+
trafilatura: envStr('WIGOLO_TRAFILATURA', 'auto'),
|
|
57
|
+
reranker: (envStr('WIGOLO_RERANKER') ?? 'none'),
|
|
58
|
+
rerankerModel: envStr('WIGOLO_RERANKER_MODEL') ?? 'ms-marco-MiniLM-L-12-v2',
|
|
59
|
+
relevanceThreshold: parseFloat(envStr('WIGOLO_RELEVANCE_THRESHOLD') ?? '0') || 0,
|
|
60
|
+
};
|
|
61
|
+
return cachedConfig;
|
|
62
|
+
}
|
|
63
|
+
export function resetConfig() {
|
|
64
|
+
cachedConfig = null;
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAyCjC,SAAS,MAAM,CAAC,GAAW,EAAE,WAA0B,IAAI;IACzD,OAAO,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC;AACtC,CAAC;AAED,SAAS,MAAM,CAAC,GAAW,EAAE,QAAgB;IAC3C,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,GAAG,KAAK,SAAS;QAAE,OAAO,QAAQ,CAAC;IACvC,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;IACjC,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC;AAC3C,CAAC;AAED,SAAS,OAAO,CAAC,GAAW,EAAE,QAAiB;IAC7C,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,GAAG,KAAK,SAAS;QAAE,OAAO,QAAQ,CAAC;IACvC,OAAO,GAAG,CAAC,WAAW,EAAE,KAAK,OAAO,IAAI,GAAG,KAAK,GAAG,CAAC;AACtD,CAAC;AAED,IAAI,YAAY,GAAkB,IAAI,CAAC;AAEvC,MAAM,UAAU,SAAS;IACvB,IAAI,YAAY;QAAE,OAAO,YAAY,CAAC;IAEtC,YAAY,GAAG;QACb,UAAU,EAAE,MAAM,CAAC,aAAa,CAAC;QACjC,WAAW,EAAG,MAAM,CAAC,cAAc,EAAE,QAAQ,CAAyB;QACtE,WAAW,EAAE,MAAM,CAAC,cAAc,EAAE,IAAI,CAAC;QACzC,cAAc,EAAE,MAAM,CAAC,kBAAkB,EAAE,KAAK,CAAC;QACjD,eAAe,EAAE,MAAM,CAAC,mBAAmB,EAAE,CAAC,CAAC;QAC/C,YAAY,EAAE,MAAM,CAAC,eAAe,EAAE,CAAC,CAAC;QACxC,uBAAuB,EAAE,MAAM,CAAC,4BAA4B,EAAE,KAAK,CAAC;QACpE,sBAAsB,EAAE,MAAM,CAAC,2BAA2B,EAAE,KAAK,CAAC;QAClE,qBAAqB,EAAE,MAAM,CAAC,0BAA0B,EAAE,IAAI,CAAC;QAC/D,oBAAoB,EAAE,MAAM,CAAC,yBAAyB,EAAE,KAAK,CAAC;QAC9D,oBAAoB,EAAE,MAAM,CAAC,yBAAyB,EAAE,KAAK,CAAC;QAC9D,iBAAiB,EAAE,MAAM,CAAC,qBAAqB,EAAE,IAAI,CAAC;QACtD,WAAW,EAAE,MAAM,CAAC,cAAc,EAAE,CAAC,CAAC;QACtC,oBAAoB,EAAE,MAAM,CAAC,sBAAsB,EAAE,KAAK,CAAC;QAC3D,wBAAwB,EAAE,MAAM,CAAC,4BAA4B,EAAE,CAAC,CAAC;QACjE,aAAa,EAAE,MAAM,CAAC,wBAAwB,CAAC;QAC/C,iBAAiB,EAAE,MAAM,CAAC,4BAA4B,CAAC;QACvD,OAAO,EAAE,MAAM,CAAC,iBAAiB,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE,SAAS,CAAC;QAChE,cAAc,EAAE,MAAM,CAAC,kBAAkB,EAAE,KAAK,CAAC;QACjD,eAAe,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAAC;QACpD,gBAAgB,EAAE,MAAM,CAAC,mBAAmB,EAAE,CAAC,CAAC;QAChD,YAAY,EAAE,MAAM,CAAC,gBAAgB,EAAE,GAAG,CAAC;QAC3C,uBAAuB,EAAE,MAAM,CAAC,2BAA2B,EAAE,EAAE,CAAC;QAChE,mBAAmB,EAAE,MAAM,CAAC,wBAAwB,EAAE,CAAC,CAAC;QACxD,QAAQ,EAAE,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC;QACrC,QAAQ,EAAE,MAAM,CAAC,WAAW,CAAC;QAC7B,SAAS,EAAE,MAAM,CAAC,YAAY,CAAC;QAC/B,aAAa,EAAE,OAAO,CAAC,gBAAgB,EAAE,IAAI,CAAC;QAC9C,gBAAgB,EAAE,OAAO,CAAC,oBAAoB,EAAE,IAAI,CAAC;QACrD,WAAW,EAAE,MAAM,CAAC,eAAe,CAAC;QACpC,QAAQ,EAAG,MAAM,CAAC,WAAW,EAAE,MAAM,CAAwB;QAC7D,SAAS,EAAG,MAAM,CAAC,YAAY,EAAE,MAAM,CAAyB;QAChE,WAAW,EAAG,MAAM,CAAC,oBAAoB,EAAE,MAAM,CAAiC;QAClF,QAAQ,EAAE,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI,MAAM,CAAuB;QACrE,aAAa,EAAE,MAAM,CAAC,uBAAuB,CAAC,IAAI,yBAAyB;QAC3E,kBAAkB,EAAE,UAAU,CAAC,MAAM,CAAC,4BAA4B,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC;KACjF,CAAC;IAEF,OAAO,YAAY,CAAC;AACtB,CAAC;AAED,MAAM,UAAU,WAAW;IACzB,YAAY,GAAG,IAAI,CAAC;AACtB,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { FetchOutput, CrawlInput, CrawlOutput, RawFetchResult } from '../types.js';
|
|
2
|
+
import { RobotsParser } from './robots.js';
|
|
3
|
+
export type FetchFn = (url: string) => Promise<FetchOutput>;
|
|
4
|
+
export type RawFetchFn = (url: string) => Promise<RawFetchResult>;
|
|
5
|
+
export declare class Crawler {
|
|
6
|
+
private fetchFn;
|
|
7
|
+
private rawFetchFn;
|
|
8
|
+
private rateLimiter;
|
|
9
|
+
constructor(fetchFn: FetchFn, rawFetchFn: RawFetchFn);
|
|
10
|
+
crawl(input: CrawlInput): Promise<CrawlOutput>;
|
|
11
|
+
private robotsTxtContent;
|
|
12
|
+
private fetchRobots;
|
|
13
|
+
private crawlTraversal;
|
|
14
|
+
private filterLinks;
|
|
15
|
+
crawlSitemap(input: CrawlInput, seedOrigin: string, maxPages: number, robotsParser: RobotsParser | null): Promise<CrawlOutput>;
|
|
16
|
+
private discoverSitemapUrls;
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=crawler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawler.d.ts","sourceRoot":"","sources":["../../src/crawl/crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,WAAW,EAA6B,cAAc,EAAE,MAAM,aAAa,CAAC;AAGnH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAO3C,MAAM,MAAM,OAAO,GAAG,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC;AAC5D,MAAM,MAAM,UAAU,GAAG,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,cAAc,CAAC,CAAC;AAElE,qBAAa,OAAO;IAClB,OAAO,CAAC,OAAO,CAAU;IACzB,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,WAAW,CAAqB;gBAE5B,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,UAAU;IAK9C,KAAK,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC;IAsBpD,OAAO,CAAC,gBAAgB,CAAuB;YAEjC,WAAW;YAmBX,cAAc;IA8E5B,OAAO,CAAC,WAAW;IA6Bb,YAAY,CAChB,KAAK,EAAE,UAAU,EACjB,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,YAAY,GAAG,IAAI,GAChC,OAAO,CAAC,WAAW,CAAC;YAqDT,mBAAmB;CA4ClC"}
|