vektor-slipstream 1.4.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -306
- package/package.json +14 -146
- package/CHANGELOG.md +0 -139
- package/LICENSE +0 -33
- package/TENETS.md +0 -189
- package/audn-log.js +0 -143
- package/axon.js +0 -389
- package/boot-patch.js +0 -33
- package/boot-screen.html +0 -210
- package/briefing.js +0 -150
- package/cerebellum.js +0 -439
- package/cloak-behaviour.js +0 -596
- package/cloak-captcha.js +0 -541
- package/cloak-core.js +0 -499
- package/cloak-identity.js +0 -484
- package/cloak-index.js +0 -261
- package/cloak-llms.js +0 -163
- package/cloak-pattern-store.js +0 -471
- package/cloak-recorder-auto.js +0 -297
- package/cloak-recorder-snippet.js +0 -119
- package/cloak-turbo-quant.js +0 -357
- package/cloak-warmup.js +0 -240
- package/cortex.js +0 -221
- package/detect-hardware.js +0 -181
- package/entity-resolver.js +0 -298
- package/errors.js +0 -66
- package/examples/example-claude-mcp.js +0 -220
- package/examples/example-langchain-researcher.js +0 -82
- package/examples/example-openai-assistant.js +0 -84
- package/examples/examples-README.md +0 -161
- package/export-import.js +0 -221
- package/forget.js +0 -148
- package/inspect.js +0 -199
- package/mistral/README-mistral.md +0 -123
- package/mistral/mistral-bridge.js +0 -218
- package/mistral/mistral-setup.js +0 -220
- package/mistral/vektor-tool-manifest.json +0 -41
- package/models/model_quantized.onnx +0 -0
- package/models/vocab.json +0 -1
- package/namespace.js +0 -186
- package/pin.js +0 -91
- package/slipstream-core-extended.js +0 -134
- package/slipstream-core.js +0 -1
- package/slipstream-db.js +0 -140
- package/slipstream-embedder.js +0 -338
- package/sovereign.js +0 -142
- package/token.js +0 -322
- package/types/index.d.ts +0 -269
- package/vektor-banner-loader.js +0 -109
- package/vektor-cli.js +0 -259
- package/vektor-licence-prompt.js +0 -128
- package/vektor-licence.js +0 -192
- package/vektor-setup.js +0 -270
- package/vektor-slipstream.dxt +0 -0
- package/vektor-tui.js +0 -373
- package/visualize.js +0 -235
package/cloak-index.js
DELETED
|
@@ -1,261 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* vektor-slipstream/cloak
|
|
5
|
-
* ─────────────────────────────────────────────
|
|
6
|
-
* Re-exports all Cloak tools + new v1.2 modules:
|
|
7
|
-
*
|
|
8
|
-
* EXISTING (v1.1):
|
|
9
|
-
* cloak_fetch — stealth headless browser fetch with cache
|
|
10
|
-
* cloak_render — full layout sensor (CSS, fonts, gap analysis)
|
|
11
|
-
* cloak_diff — semantic page diff over time
|
|
12
|
-
* cloak_diff_text — structural text diff
|
|
13
|
-
* cloak_passport — AES-256 session credential vault
|
|
14
|
-
* tokens_saved — ROI audit per session
|
|
15
|
-
*
|
|
16
|
-
* NEW (v1.2):
|
|
17
|
-
* llms — llms.txt pre-flight checker
|
|
18
|
-
* captcha — CAPTCHA detection + solving pipeline
|
|
19
|
-
* turboQuant — 3-bit vector compression for embeddings
|
|
20
|
-
* CloakIdentity — persistent browser fingerprint identity
|
|
21
|
-
*
|
|
22
|
-
* MCP_TOOLS — all tool definitions for MCP server
|
|
23
|
-
*/
|
|
24
|
-
|
|
25
|
-
// ── v1.1 core (existing) ─────────────────────────────────────────────────────
|
|
26
|
-
const {
|
|
27
|
-
cloak_fetch,
|
|
28
|
-
cloak_render,
|
|
29
|
-
cloak_diff,
|
|
30
|
-
cloak_diff_text,
|
|
31
|
-
cloak_passport,
|
|
32
|
-
tokens_saved,
|
|
33
|
-
CLOAK_MCP_TOOLS: _CORE_MCP_TOOLS,
|
|
34
|
-
} = require('./cloak-core'); // existing cloak.js renamed to cloak-core.js
|
|
35
|
-
|
|
36
|
-
// ── v1.2 new modules ─────────────────────────────────────────────────────────
|
|
37
|
-
const llms = require('./cloak-llms');
|
|
38
|
-
const captcha = require('./cloak-captcha');
|
|
39
|
-
const turboQuant = require('./cloak-turbo-quant');
|
|
40
|
-
const { CloakIdentity } = require('./cloak-identity');
|
|
41
|
-
const behaviour = require('./cloak-behaviour');
|
|
42
|
-
let _patternStore = null; const patternStore = new Proxy({}, { get(_, k) { if (!_patternStore) _patternStore = require('./cloak-pattern-store'); return _patternStore[k]; } });
|
|
43
|
-
let _autoRecorder = null; const autoRecorder = new Proxy({}, { get(_, k) { if (!_autoRecorder) _autoRecorder = require('./cloak-recorder-auto'); return _autoRecorder[k]; } });
|
|
44
|
-
|
|
45
|
-
// ── Enhanced cloak_fetch with llms.txt pre-flight ────────────────────────────
|
|
46
|
-
/**
|
|
47
|
-
* cloak_fetch_smart: drop-in replacement for cloak_fetch.
|
|
48
|
-
* Checks for llms.txt first — if found, returns structured content
|
|
49
|
-
* without spinning up Playwright (faster, cheaper, cleaner).
|
|
50
|
-
* Falls back to stealth browser if llms.txt not found.
|
|
51
|
-
*/
|
|
52
|
-
async function cloak_fetch_smart(url, opts = {}) {
|
|
53
|
-
if (!opts.skipLlmsCheck) {
|
|
54
|
-
const llmsResult = await llms.checkLlmsTxt(url);
|
|
55
|
-
if (llmsResult.found) {
|
|
56
|
-
const parsed = llms.parseLlmsTxt(llmsResult.content);
|
|
57
|
-
const content = llms.extractRelevantContent(parsed, url);
|
|
58
|
-
const rawLen = llmsResult.content.length;
|
|
59
|
-
return {
|
|
60
|
-
text: content,
|
|
61
|
-
tokensSaved: Math.floor((rawLen * 4 - content.length) / 4),
|
|
62
|
-
fromCache: llmsResult.fromCache || false,
|
|
63
|
-
source: 'llms.txt',
|
|
64
|
-
llmsFriendly: true,
|
|
65
|
-
};
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
// Fall through to stealth browser
|
|
69
|
-
const result = await cloak_fetch(url, opts);
|
|
70
|
-
result.source = 'stealth';
|
|
71
|
-
result.llmsFriendly = false;
|
|
72
|
-
return result;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
// ── New MCP tool definitions ──────────────────────────────────────────────────
|
|
76
|
-
const NEW_MCP_TOOLS = [
|
|
77
|
-
{
|
|
78
|
-
name: 'cloak_inject_behaviour',
|
|
79
|
-
description: 'Inject human-realistic mouse/scroll behaviour into the current browser session. Call before or during cloak_fetch / cloak_identity_use on sites with behavioural fingerprinting (reCAPTCHA v3, Cloudflare, DataDome). Significantly improves trust score.',
|
|
80
|
-
input_schema: {
|
|
81
|
-
type: 'object',
|
|
82
|
-
properties: {
|
|
83
|
-
url: { type: 'string', description: 'URL to open and inject behaviour on' },
|
|
84
|
-
category: { type: 'string', enum: ['reading', 'form', 'shopping', 'login', 'idle'], description: 'Behaviour category to simulate (default: reading)' },
|
|
85
|
-
patternName: { type: 'string', description: 'Specific pattern name to replay (overrides category)' },
|
|
86
|
-
speedFactor: { type: 'number', description: 'Replay speed: 1.0 = real speed, 0.5 = 2x faster (default 1.0)' },
|
|
87
|
-
synthetic: { type: 'boolean', description: 'Use procedural synthesis instead of recorded pattern (default false)' },
|
|
88
|
-
durationMs: { type: 'integer', description: 'Duration for synthetic mode in ms (default 5000)' },
|
|
89
|
-
},
|
|
90
|
-
required: ['url'],
|
|
91
|
-
},
|
|
92
|
-
},
|
|
93
|
-
{
|
|
94
|
-
name: 'cloak_behaviour_stats',
|
|
95
|
-
description: 'List available behaviour patterns, categories, and how they work.',
|
|
96
|
-
input_schema: { type: 'object', properties: {} },
|
|
97
|
-
},
|
|
98
|
-
{
|
|
99
|
-
name: 'cloak_load_pattern',
|
|
100
|
-
description: 'Load a custom behaviour pattern recorded with the cloak-recorder browser snippet.',
|
|
101
|
-
input_schema: {
|
|
102
|
-
type: 'object',
|
|
103
|
-
properties: {
|
|
104
|
-
name: { type: 'string', description: 'Pattern name to register' },
|
|
105
|
-
pattern: { type: 'string', description: 'JSON string from cloak-recorder-snippet.js' },
|
|
106
|
-
save: { type: 'boolean', description: 'Persist pattern to ~/.vektor/behaviour-patterns/ (default true)' },
|
|
107
|
-
},
|
|
108
|
-
required: ['name', 'pattern'],
|
|
109
|
-
},
|
|
110
|
-
},
|
|
111
|
-
{
|
|
112
|
-
name: 'cloak_fetch_smart',
|
|
113
|
-
description: 'Fetch a URL intelligently — checks llms.txt first (agent-native, no browser), falls back to stealth Playwright browser. Returns compressed clean text. Shows whether site is agent-friendly.',
|
|
114
|
-
input_schema: {
|
|
115
|
-
type: 'object',
|
|
116
|
-
properties: {
|
|
117
|
-
url: { type: 'string', description: 'URL to fetch.' },
|
|
118
|
-
force: { type: 'boolean', description: 'Bypass cache.' },
|
|
119
|
-
skipLlmsCheck: { type: 'boolean', description: 'Skip llms.txt pre-flight and go straight to stealth browser.' },
|
|
120
|
-
},
|
|
121
|
-
required: ['url'],
|
|
122
|
-
},
|
|
123
|
-
},
|
|
124
|
-
{
|
|
125
|
-
name: 'cloak_detect_captcha',
|
|
126
|
-
description: 'Detect if the current page has a CAPTCHA challenge. Returns type (hcaptcha, recaptcha_v2, recaptcha_v3, turnstile) and sitekey if found.',
|
|
127
|
-
input_schema: {
|
|
128
|
-
type: 'object',
|
|
129
|
-
properties: {
|
|
130
|
-
url: { type: 'string', description: 'URL to check for CAPTCHA.' },
|
|
131
|
-
},
|
|
132
|
-
required: ['url'],
|
|
133
|
-
},
|
|
134
|
-
},
|
|
135
|
-
{
|
|
136
|
-
name: 'cloak_solve_captcha',
|
|
137
|
-
description: 'Automatically solve a detected CAPTCHA using vision AI. Supports hCaptcha (Claude/GPT-4o vision), reCAPTCHA v3 session warmup, audio CAPTCHAs (Whisper).',
|
|
138
|
-
input_schema: {
|
|
139
|
-
type: 'object',
|
|
140
|
-
properties: {
|
|
141
|
-
url: { type: 'string', description: 'URL with CAPTCHA.' },
|
|
142
|
-
captchaType: { type: 'string', enum: ['hcaptcha', 'recaptcha_v2', 'recaptcha_v3', 'turnstile', 'audio'], description: 'Type of CAPTCHA to solve.' },
|
|
143
|
-
provider: { type: 'string', enum: ['claude', 'openai', '2captcha'], description: 'Vision model provider.' },
|
|
144
|
-
},
|
|
145
|
-
required: ['url', 'captchaType'],
|
|
146
|
-
},
|
|
147
|
-
},
|
|
148
|
-
{
|
|
149
|
-
name: 'cloak_identity_create',
|
|
150
|
-
description: 'Create a persistent browser fingerprint identity. Generates consistent UA, WebGL, canvas, fonts, timezone. Use the same identity across sessions to build site trust.',
|
|
151
|
-
input_schema: {
|
|
152
|
-
type: 'object',
|
|
153
|
-
properties: {
|
|
154
|
-
name: { type: 'string', description: 'Identity name (e.g. "shopping-agent-1").' },
|
|
155
|
-
seed: { type: 'string', description: 'Optional seed for deterministic fingerprint.' },
|
|
156
|
-
},
|
|
157
|
-
required: ['name'],
|
|
158
|
-
},
|
|
159
|
-
},
|
|
160
|
-
{
|
|
161
|
-
name: 'cloak_identity_use',
|
|
162
|
-
description: 'Use a saved identity for the next cloak_fetch/cloak_render call. Applies full fingerprint to browser session.',
|
|
163
|
-
input_schema: {
|
|
164
|
-
type: 'object',
|
|
165
|
-
properties: {
|
|
166
|
-
name: { type: 'string', description: 'Identity name to use.' },
|
|
167
|
-
url: { type: 'string', description: 'URL to fetch with this identity.' },
|
|
168
|
-
},
|
|
169
|
-
required: ['name', 'url'],
|
|
170
|
-
},
|
|
171
|
-
},
|
|
172
|
-
{
|
|
173
|
-
name: 'cloak_identity_list',
|
|
174
|
-
description: 'List all saved browser identities with their age, visit count, and trust summary.',
|
|
175
|
-
input_schema: { type: 'object', properties: {} },
|
|
176
|
-
},
|
|
177
|
-
{
|
|
178
|
-
name: 'cloak_pattern_stats',
|
|
179
|
-
description: 'Show the self-improving pattern store stats — tier breakdown, win/loss rates, total sessions.',
|
|
180
|
-
input_schema: { type: 'object', properties: {} },
|
|
181
|
-
},
|
|
182
|
-
{
|
|
183
|
-
name: 'cloak_pattern_list',
|
|
184
|
-
description: 'List all patterns in the store with scores and tier.',
|
|
185
|
-
input_schema: {
|
|
186
|
-
type: 'object',
|
|
187
|
-
properties: {
|
|
188
|
-
tier: { type: 'string', enum: ['elite', 'active', 'probation'], description: 'Filter by tier (optional)' },
|
|
189
|
-
},
|
|
190
|
-
},
|
|
191
|
-
},
|
|
192
|
-
{
|
|
193
|
-
name: 'cloak_pattern_prune',
|
|
194
|
-
description: 'Force a prune pass on the pattern store. Removes stale, low-scoring, and duplicate patterns.',
|
|
195
|
-
input_schema: { type: 'object', properties: {} },
|
|
196
|
-
},
|
|
197
|
-
{
|
|
198
|
-
name: 'cloak_pattern_seed',
|
|
199
|
-
description: 'Seed the pattern store with built-in patterns. Only runs if store is empty.',
|
|
200
|
-
input_schema: { type: 'object', properties: {} },
|
|
201
|
-
},
|
|
202
|
-
{
|
|
203
|
-
name: 'turbo_quant_compress',
|
|
204
|
-
description: 'Compress the VEKTOR memory database using TurboQuant 3-bit vector quantisation. Reduces embedding storage by ~87% with <2% recall accuracy loss.',
|
|
205
|
-
input_schema: {
|
|
206
|
-
type: 'object',
|
|
207
|
-
properties: {
|
|
208
|
-
dbPath: { type: 'string', description: 'Path to vektor-slipstream-memory.db. Leave blank for default.' },
|
|
209
|
-
},
|
|
210
|
-
},
|
|
211
|
-
},
|
|
212
|
-
{
|
|
213
|
-
name: 'turbo_quant_stats',
|
|
214
|
-
description: 'Show TurboQuant compression stats — original vs compressed size, ratio, savings percentage.',
|
|
215
|
-
input_schema: { type: 'object', properties: {} },
|
|
216
|
-
},
|
|
217
|
-
];
|
|
218
|
-
|
|
219
|
-
const CLOAK_MCP_TOOLS = [..._CORE_MCP_TOOLS, ...NEW_MCP_TOOLS];
|
|
220
|
-
|
|
221
|
-
// ── MCP tool handler additions ────────────────────────────────────────────────
|
|
222
|
-
// Add these cases to your runTool() switch in example-claude-mcp.js:
|
|
223
|
-
//
|
|
224
|
-
// case 'cloak_fetch_smart': return cloak_fetch_smart(input.url, input);
|
|
225
|
-
// case 'cloak_identity_create': {
|
|
226
|
-
// const id = CloakIdentity.create(input.name, input.seed);
|
|
227
|
-
// id.save();
|
|
228
|
-
// return id.summary;
|
|
229
|
-
// }
|
|
230
|
-
// case 'cloak_identity_list': return CloakIdentity.list().map(n => CloakIdentity.load(n)?.summary);
|
|
231
|
-
// case 'turbo_quant_stats': return turboQuant.compressionStats();
|
|
232
|
-
// case 'turbo_quant_compress': {
|
|
233
|
-
// const db = require('better-sqlite3')(input.dbPath || defaultDbPath);
|
|
234
|
-
// return turboQuant.migrateDatabase(db);
|
|
235
|
-
// }
|
|
236
|
-
|
|
237
|
-
module.exports = {
|
|
238
|
-
// v1.1 unchanged exports
|
|
239
|
-
cloak_fetch,
|
|
240
|
-
cloak_render,
|
|
241
|
-
cloak_diff,
|
|
242
|
-
cloak_diff_text,
|
|
243
|
-
cloak_passport,
|
|
244
|
-
tokens_saved,
|
|
245
|
-
|
|
246
|
-
// v1.2 new exports
|
|
247
|
-
cloak_fetch_smart,
|
|
248
|
-
llms,
|
|
249
|
-
captcha,
|
|
250
|
-
turboQuant,
|
|
251
|
-
CloakIdentity,
|
|
252
|
-
|
|
253
|
-
// v1.3 new exports
|
|
254
|
-
behaviour,
|
|
255
|
-
patternStore,
|
|
256
|
-
autoRecorder,
|
|
257
|
-
|
|
258
|
-
// MCP
|
|
259
|
-
CLOAK_MCP_TOOLS,
|
|
260
|
-
NEW_MCP_TOOLS,
|
|
261
|
-
};
|
package/cloak-llms.js
DELETED
|
@@ -1,163 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* cloak-llms.js
|
|
5
|
-
* Pre-flight llms.txt checker for cloak_fetch.
|
|
6
|
-
* Checks if a site publishes llms.txt (agent-native access).
|
|
7
|
-
* If found, returns clean structured content — no Playwright needed.
|
|
8
|
-
* Falls back to stealth browser if not found or malformed.
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
const https = require('https');
|
|
12
|
-
const http = require('http');
|
|
13
|
-
const { URL } = require('url');
|
|
14
|
-
|
|
15
|
-
const LLMS_TTL_MS = 24 * 60 * 60 * 1000; // 24hr cache
|
|
16
|
-
const _llmsCache = new Map(); // origin → { found, content, checkedAt }
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* Check for llms.txt at the origin of a URL.
|
|
20
|
-
* Returns { found: bool, content: string|null, url: string }
|
|
21
|
-
*/
|
|
22
|
-
async function checkLlmsTxt(targetUrl) {
|
|
23
|
-
let origin;
|
|
24
|
-
try {
|
|
25
|
-
const u = new URL(targetUrl);
|
|
26
|
-
origin = u.origin; // e.g. https://example.com
|
|
27
|
-
} catch {
|
|
28
|
-
return { found: false, content: null, url: targetUrl };
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
// Cache hit
|
|
32
|
-
const cached = _llmsCache.get(origin);
|
|
33
|
-
if (cached && Date.now() - cached.checkedAt < LLMS_TTL_MS) {
|
|
34
|
-
return { found: cached.found, content: cached.content, url: origin + '/llms.txt', fromCache: true };
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
const llmsUrl = origin + '/llms.txt';
|
|
38
|
-
try {
|
|
39
|
-
const content = await _fetchText(llmsUrl, 3000); // 3s timeout — don't block
|
|
40
|
-
const found = content && content.length > 20;
|
|
41
|
-
_llmsCache.set(origin, { found, content: found ? content : null, checkedAt: Date.now() });
|
|
42
|
-
return { found, content: found ? content : null, url: llmsUrl };
|
|
43
|
-
} catch {
|
|
44
|
-
_llmsCache.set(origin, { found: false, content: null, checkedAt: Date.now() });
|
|
45
|
-
return { found: false, content: null, url: llmsUrl };
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
/**
|
|
50
|
-
* Parse llms.txt content into a structured object.
|
|
51
|
-
* Spec: https://llmstxt.org
|
|
52
|
-
* Returns { title, description, sections: [{heading, links: [{url, title, desc}]}] }
|
|
53
|
-
*/
|
|
54
|
-
function parseLlmsTxt(content) {
|
|
55
|
-
const lines = content.split('\n').map(l => l.trimEnd());
|
|
56
|
-
const result = { title: '', description: '', sections: [], raw: content };
|
|
57
|
-
let section = null;
|
|
58
|
-
|
|
59
|
-
for (const line of lines) {
|
|
60
|
-
if (!line.trim()) continue;
|
|
61
|
-
|
|
62
|
-
// H1 = page title
|
|
63
|
-
if (line.startsWith('# ')) {
|
|
64
|
-
result.title = line.slice(2).trim();
|
|
65
|
-
continue;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
// Blockquote = description
|
|
69
|
-
if (line.startsWith('> ')) {
|
|
70
|
-
result.description += (result.description ? ' ' : '') + line.slice(2).trim();
|
|
71
|
-
continue;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
// H2 = section heading
|
|
75
|
-
if (line.startsWith('## ')) {
|
|
76
|
-
section = { heading: line.slice(3).trim(), links: [] };
|
|
77
|
-
result.sections.push(section);
|
|
78
|
-
continue;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
// Markdown link line: - [Title](url): description
|
|
82
|
-
if (line.startsWith('- [') && section) {
|
|
83
|
-
const m = line.match(/^- \[([^\]]+)\]\(([^)]+)\)(?::\s*(.*))?$/);
|
|
84
|
-
if (m) {
|
|
85
|
-
section.links.push({ title: m[1], url: m[2], desc: m[3] || '' });
|
|
86
|
-
}
|
|
87
|
-
continue;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// Plain text in a section
|
|
91
|
-
if (section && line.trim()) {
|
|
92
|
-
section.links.push({ title: line.trim(), url: null, desc: '' });
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
return result;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
/**
|
|
100
|
-
* Given llms.txt content and a target URL, extract the most relevant
|
|
101
|
-
* section/links for the requested page. Returns a focused text summary.
|
|
102
|
-
*/
|
|
103
|
-
function extractRelevantContent(parsed, targetUrl) {
|
|
104
|
-
if (!parsed.sections.length) return parsed.raw;
|
|
105
|
-
|
|
106
|
-
const urlLower = targetUrl.toLowerCase();
|
|
107
|
-
let best = null, bestScore = -1;
|
|
108
|
-
|
|
109
|
-
for (const section of parsed.sections) {
|
|
110
|
-
// Score section by how many links match the target URL path
|
|
111
|
-
let score = 0;
|
|
112
|
-
for (const link of section.links) {
|
|
113
|
-
if (link.url && urlLower.includes(link.url.toLowerCase().replace(/^https?:\/\/[^/]+/, ''))) {
|
|
114
|
-
score += 2;
|
|
115
|
-
}
|
|
116
|
-
if (link.title && urlLower.includes(link.title.toLowerCase())) score += 1;
|
|
117
|
-
}
|
|
118
|
-
if (score > bestScore) { bestScore = score; best = section; }
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
const target = bestScore > 0 ? best : parsed.sections[0];
|
|
122
|
-
let out = '';
|
|
123
|
-
if (parsed.title) out += `# ${parsed.title}\n`;
|
|
124
|
-
if (parsed.description) out += `${parsed.description}\n\n`;
|
|
125
|
-
if (target) {
|
|
126
|
-
out += `## ${target.heading}\n`;
|
|
127
|
-
for (const link of target.links) {
|
|
128
|
-
out += link.url
|
|
129
|
-
? `- [${link.title}](${link.url})${link.desc ? ': ' + link.desc : ''}\n`
|
|
130
|
-
: `- ${link.title}\n`;
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
return out.trim();
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
// ── Internal helpers ──────────────────────────────────────────────────────────
|
|
137
|
-
|
|
138
|
-
function _fetchText(url, timeoutMs = 5000) {
|
|
139
|
-
return new Promise((resolve, reject) => {
|
|
140
|
-
const lib = url.startsWith('https') ? https : http;
|
|
141
|
-
const timer = setTimeout(() => reject(new Error('timeout')), timeoutMs);
|
|
142
|
-
const req = lib.get(url, {
|
|
143
|
-
headers: {
|
|
144
|
-
'User-Agent': 'vektor-slipstream/cloak llms-checker',
|
|
145
|
-
'Accept': 'text/plain, text/markdown',
|
|
146
|
-
}
|
|
147
|
-
}, res => {
|
|
148
|
-
if (res.statusCode !== 200) {
|
|
149
|
-
clearTimeout(timer);
|
|
150
|
-
res.resume();
|
|
151
|
-
return reject(new Error(`HTTP ${res.statusCode}`));
|
|
152
|
-
}
|
|
153
|
-
let body = '';
|
|
154
|
-
res.setEncoding('utf8');
|
|
155
|
-
res.on('data', c => { body += c; if (body.length > 50000) res.destroy(); });
|
|
156
|
-
res.on('end', () => { clearTimeout(timer); resolve(body); });
|
|
157
|
-
res.on('error', e => { clearTimeout(timer); reject(e); });
|
|
158
|
-
});
|
|
159
|
-
req.on('error', e => { clearTimeout(timer); reject(e); });
|
|
160
|
-
});
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
module.exports = { checkLlmsTxt, parseLlmsTxt, extractRelevantContent };
|