recker 1.0.33 → 1.0.34-next.c0d9cdb
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/presets.d.ts +5 -1
- package/dist/cli/presets.js +34 -10
- package/dist/cli/tui/ai-chat.d.ts +6 -1
- package/dist/cli/tui/ai-chat.js +267 -63
- package/dist/cli/tui/shell.js +16 -15
- package/dist/scrape/spider.d.ts +37 -0
- package/dist/scrape/spider.js +187 -1
- package/package.json +1 -1
package/dist/cli/presets.d.ts
CHANGED
package/dist/cli/presets.js
CHANGED
|
@@ -10,38 +10,62 @@ const ENV_MAPPING = {
|
|
|
10
10
|
slack: ['SLACK_TOKEN'],
|
|
11
11
|
vercel: ['VERCEL_TOKEN'],
|
|
12
12
|
supabase: ['SUPABASE_URL', 'SUPABASE_KEY'],
|
|
13
|
+
groq: ['GROQ_API_KEY'],
|
|
14
|
+
google: ['GOOGLE_API_KEY'],
|
|
15
|
+
xai: ['XAI_API_KEY'],
|
|
16
|
+
mistral: ['MISTRAL_API_KEY'],
|
|
17
|
+
cohere: ['COHERE_API_KEY'],
|
|
18
|
+
deepseek: ['DEEPSEEK_API_KEY'],
|
|
19
|
+
fireworks: ['FIREWORKS_API_KEY'],
|
|
20
|
+
together: ['TOGETHER_API_KEY'],
|
|
21
|
+
perplexity: ['PERPLEXITY_API_KEY'],
|
|
13
22
|
};
|
|
14
|
-
export function resolvePreset(name) {
|
|
23
|
+
export function resolvePreset(name, options = {}) {
|
|
24
|
+
const { throwOnError = false, silent = false } = options;
|
|
15
25
|
const presetFn = presets[name];
|
|
16
26
|
if (!presetFn) {
|
|
17
|
-
|
|
18
|
-
|
|
27
|
+
const msg = `Unknown preset '@${name}'`;
|
|
28
|
+
if (throwOnError) {
|
|
29
|
+
throw new Error(msg);
|
|
30
|
+
}
|
|
31
|
+
if (!silent) {
|
|
32
|
+
console.error(colors.red(`Error: ${msg}`));
|
|
33
|
+
}
|
|
34
|
+
return null;
|
|
19
35
|
}
|
|
20
36
|
const requiredEnvs = ENV_MAPPING[name];
|
|
21
|
-
const
|
|
37
|
+
const presetOptions = {};
|
|
22
38
|
if (requiredEnvs) {
|
|
23
39
|
let missing = false;
|
|
24
40
|
for (const envVar of requiredEnvs) {
|
|
25
41
|
const value = process.env[envVar];
|
|
26
42
|
if (!value) {
|
|
27
|
-
|
|
43
|
+
if (!silent) {
|
|
44
|
+
console.error(colors.yellow(`Warning: Missing env variable ${envVar} for preset @${name}`));
|
|
45
|
+
}
|
|
28
46
|
missing = true;
|
|
29
47
|
}
|
|
30
48
|
else {
|
|
31
49
|
const key = mapEnvToOption(name, envVar);
|
|
32
|
-
|
|
50
|
+
presetOptions[key] = value;
|
|
33
51
|
}
|
|
34
52
|
}
|
|
35
|
-
if (missing) {
|
|
53
|
+
if (missing && !silent) {
|
|
36
54
|
console.log(colors.gray(`Tip: export ${requiredEnvs.join('=... ')}=...`));
|
|
37
55
|
}
|
|
38
56
|
}
|
|
39
57
|
try {
|
|
40
|
-
return presetFn(
|
|
58
|
+
return presetFn(presetOptions);
|
|
41
59
|
}
|
|
42
60
|
catch (error) {
|
|
43
|
-
|
|
44
|
-
|
|
61
|
+
const msg = `Error initializing preset @${name}: ${error.message}`;
|
|
62
|
+
if (throwOnError) {
|
|
63
|
+
throw new Error(msg);
|
|
64
|
+
}
|
|
65
|
+
if (!silent) {
|
|
66
|
+
console.error(colors.red(msg));
|
|
67
|
+
}
|
|
68
|
+
return null;
|
|
45
69
|
}
|
|
46
70
|
}
|
|
47
71
|
function mapEnvToOption(preset, env) {
|
|
@@ -1,2 +1,7 @@
|
|
|
1
1
|
import readline from 'node:readline';
|
|
2
|
-
|
|
2
|
+
import type { Client } from '../../core/client.js';
|
|
3
|
+
export interface AIMode {
|
|
4
|
+
aiClients: Map<string, Client>;
|
|
5
|
+
variables?: Record<string, any>;
|
|
6
|
+
}
|
|
7
|
+
export declare function startAIChat(rl: readline.Interface, provider: string | undefined, context: AIMode): Promise<void>;
|
package/dist/cli/tui/ai-chat.js
CHANGED
|
@@ -1,100 +1,304 @@
|
|
|
1
1
|
import readline from 'node:readline';
|
|
2
2
|
import colors from '../../utils/colors.js';
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
3
|
+
import { createClient } from '../../core/client.js';
|
|
4
|
+
import { resolvePreset } from '../presets.js';
|
|
5
|
+
const AI_PRESETS = [
|
|
6
|
+
'openai', 'anthropic', 'groq', 'google', 'xai',
|
|
7
|
+
'mistral', 'cohere', 'deepseek', 'fireworks', 'together', 'perplexity'
|
|
8
|
+
];
|
|
9
|
+
const ENV_VAR_MAP = {
|
|
10
|
+
openai: 'OPENAI_API_KEY',
|
|
11
|
+
anthropic: 'ANTHROPIC_API_KEY',
|
|
12
|
+
google: 'GOOGLE_API_KEY',
|
|
13
|
+
groq: 'GROQ_API_KEY',
|
|
14
|
+
xai: 'XAI_API_KEY',
|
|
15
|
+
mistral: 'MISTRAL_API_KEY',
|
|
16
|
+
cohere: 'COHERE_API_KEY',
|
|
17
|
+
deepseek: 'DEEPSEEK_API_KEY',
|
|
18
|
+
fireworks: 'FIREWORKS_API_KEY',
|
|
19
|
+
together: 'TOGETHER_API_KEY',
|
|
20
|
+
perplexity: 'PERPLEXITY_API_KEY',
|
|
21
|
+
};
|
|
22
|
+
export async function startAIChat(rl, provider = 'openai', context) {
|
|
23
|
+
let currentProvider = provider.toLowerCase();
|
|
24
|
+
if (!AI_PRESETS.includes(currentProvider)) {
|
|
25
|
+
console.log(colors.red(`Unknown AI provider: ${currentProvider}`));
|
|
26
|
+
console.log(colors.gray(`Available: ${AI_PRESETS.join(', ')}`));
|
|
15
27
|
return;
|
|
16
28
|
}
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
const history = [
|
|
25
|
-
{ role: 'system', content: 'You are Recker AI, a helpful and concise assistant in a terminal environment.' }
|
|
26
|
-
];
|
|
27
|
-
rl.setPrompt(colors.magenta('You › '));
|
|
29
|
+
console.clear();
|
|
30
|
+
printHeader(currentProvider);
|
|
31
|
+
let client = await getOrCreateClient(currentProvider, context);
|
|
32
|
+
if (!client)
|
|
33
|
+
return;
|
|
34
|
+
const getPrompt = () => colors.magenta(`${currentProvider} › `);
|
|
35
|
+
rl.setPrompt(getPrompt());
|
|
28
36
|
rl.prompt();
|
|
37
|
+
let isGenerating = false;
|
|
38
|
+
let abortController = null;
|
|
29
39
|
return new Promise((resolve) => {
|
|
40
|
+
const cleanup = () => {
|
|
41
|
+
rl.off('line', onLine);
|
|
42
|
+
rl.off('SIGINT', onSigInt);
|
|
43
|
+
if (process.stdin.isTTY) {
|
|
44
|
+
process.stdin.off('keypress', onKeypress);
|
|
45
|
+
}
|
|
46
|
+
console.log('');
|
|
47
|
+
console.log(colors.gray('Exiting AI mode...'));
|
|
48
|
+
};
|
|
30
49
|
const onLine = async (line) => {
|
|
31
50
|
const input = line.trim();
|
|
32
51
|
if (!input) {
|
|
33
52
|
rl.prompt();
|
|
34
53
|
return;
|
|
35
54
|
}
|
|
36
|
-
if (input.
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
55
|
+
if (input.startsWith('/')) {
|
|
56
|
+
const handled = await handleCommand(input, currentProvider, client, context, rl, () => {
|
|
57
|
+
cleanup();
|
|
58
|
+
resolve();
|
|
59
|
+
}, async (newProvider) => {
|
|
60
|
+
currentProvider = newProvider;
|
|
61
|
+
client = await getOrCreateClient(currentProvider, context);
|
|
62
|
+
if (client) {
|
|
63
|
+
rl.setPrompt(getPrompt());
|
|
64
|
+
console.log(colors.green(`Switched to ${currentProvider}`));
|
|
65
|
+
const memory = client.ai.getMemory();
|
|
66
|
+
if (memory.length > 0) {
|
|
67
|
+
console.log(colors.gray(`Memory: ${Math.floor(memory.length / 2)} pairs`));
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
rl.prompt();
|
|
71
|
+
});
|
|
72
|
+
if (handled)
|
|
73
|
+
return;
|
|
42
74
|
}
|
|
43
|
-
if (
|
|
44
|
-
|
|
45
|
-
|
|
75
|
+
if (!client || !client.hasAI) {
|
|
76
|
+
console.log(colors.red('AI client not available'));
|
|
77
|
+
rl.prompt();
|
|
46
78
|
return;
|
|
47
79
|
}
|
|
48
|
-
history.push({ role: 'user', content: input });
|
|
49
80
|
rl.pause();
|
|
50
|
-
|
|
51
|
-
|
|
81
|
+
isGenerating = true;
|
|
82
|
+
abortController = new AbortController();
|
|
83
|
+
const model = client._aiConfig?.model || currentProvider;
|
|
84
|
+
process.stdout.write(colors.gray(`${model} › `));
|
|
52
85
|
try {
|
|
53
|
-
const stream = await client.
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
if (
|
|
61
|
-
|
|
62
|
-
|
|
86
|
+
const stream = await client.ai.chatStream(input);
|
|
87
|
+
let hasContent = false;
|
|
88
|
+
for await (const event of stream) {
|
|
89
|
+
if (abortController?.signal.aborted) {
|
|
90
|
+
console.log(colors.yellow('\n[Interrupted]'));
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
if (event.type === 'text') {
|
|
94
|
+
if (!hasContent) {
|
|
95
|
+
process.stdout.write('\n' + colors.orange(event.content));
|
|
96
|
+
hasContent = true;
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
process.stdout.write(colors.orange(event.content));
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
else if (event.type === 'error') {
|
|
103
|
+
console.log(colors.red(`\nError: ${event.error}`));
|
|
63
104
|
}
|
|
64
105
|
}
|
|
65
|
-
|
|
66
|
-
|
|
106
|
+
console.log(colors.reset(''));
|
|
107
|
+
const memory = client.ai.getMemory();
|
|
108
|
+
const pairs = Math.floor(memory.length / 2);
|
|
109
|
+
console.log(colors.gray(`Memory: ${pairs}/12 pairs`));
|
|
67
110
|
}
|
|
68
111
|
catch (error) {
|
|
69
|
-
|
|
70
|
-
Error: ${error.message}`));
|
|
71
|
-
if (error.cause)
|
|
72
|
-
console.log(colors.gray(error.cause));
|
|
112
|
+
handleError(error, currentProvider);
|
|
73
113
|
}
|
|
74
114
|
finally {
|
|
115
|
+
isGenerating = false;
|
|
116
|
+
abortController = null;
|
|
75
117
|
rl.resume();
|
|
76
118
|
rl.prompt();
|
|
77
119
|
}
|
|
78
120
|
};
|
|
79
|
-
const cleanup = () => {
|
|
80
|
-
rl.off('line', onLine);
|
|
81
|
-
rl.off('SIGINT', onSigInt);
|
|
82
|
-
process.stdin.off('keypress', onKeypress);
|
|
83
|
-
};
|
|
84
|
-
if (process.stdin.isTTY)
|
|
85
|
-
readline.emitKeypressEvents(process.stdin);
|
|
86
121
|
const onKeypress = (_str, key) => {
|
|
87
122
|
if (key && key.name === 'escape') {
|
|
88
|
-
|
|
89
|
-
|
|
123
|
+
if (isGenerating && abortController) {
|
|
124
|
+
abortController.abort();
|
|
125
|
+
}
|
|
126
|
+
else {
|
|
127
|
+
cleanup();
|
|
128
|
+
resolve();
|
|
129
|
+
}
|
|
90
130
|
}
|
|
91
131
|
};
|
|
92
|
-
process.stdin.
|
|
132
|
+
if (process.stdin.isTTY) {
|
|
133
|
+
readline.emitKeypressEvents(process.stdin);
|
|
134
|
+
process.stdin.on('keypress', onKeypress);
|
|
135
|
+
}
|
|
93
136
|
rl.on('line', onLine);
|
|
94
137
|
const onSigInt = () => {
|
|
95
|
-
|
|
96
|
-
|
|
138
|
+
if (isGenerating && abortController) {
|
|
139
|
+
abortController.abort();
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
cleanup();
|
|
143
|
+
resolve();
|
|
144
|
+
}
|
|
97
145
|
};
|
|
98
146
|
rl.once('SIGINT', onSigInt);
|
|
99
147
|
});
|
|
100
148
|
}
|
|
149
|
+
async function getOrCreateClient(provider, context) {
|
|
150
|
+
let client = context.aiClients.get(provider);
|
|
151
|
+
if (client) {
|
|
152
|
+
return client;
|
|
153
|
+
}
|
|
154
|
+
try {
|
|
155
|
+
const presetConfig = resolvePreset(provider);
|
|
156
|
+
if (!presetConfig) {
|
|
157
|
+
console.log(colors.red(`Unknown preset: ${provider}`));
|
|
158
|
+
return null;
|
|
159
|
+
}
|
|
160
|
+
if (!presetConfig._aiConfig) {
|
|
161
|
+
console.log(colors.red(`Preset ${provider} does not support AI features.`));
|
|
162
|
+
return null;
|
|
163
|
+
}
|
|
164
|
+
client = createClient(presetConfig);
|
|
165
|
+
context.aiClients.set(provider, client);
|
|
166
|
+
return client;
|
|
167
|
+
}
|
|
168
|
+
catch (error) {
|
|
169
|
+
const envVar = ENV_VAR_MAP[provider] || `${provider.toUpperCase()}_API_KEY`;
|
|
170
|
+
console.log(colors.red(`Failed to initialize ${provider}`));
|
|
171
|
+
console.log(colors.gray(`Make sure ${envVar} is set.`));
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
async function handleCommand(input, currentProvider, client, context, rl, onExit, onSwitch) {
|
|
176
|
+
const parts = input.slice(1).split(/\s+/);
|
|
177
|
+
const cmd = parts[0].toLowerCase();
|
|
178
|
+
const args = parts.slice(1);
|
|
179
|
+
switch (cmd) {
|
|
180
|
+
case 'exit':
|
|
181
|
+
case 'quit':
|
|
182
|
+
case 'q':
|
|
183
|
+
onExit();
|
|
184
|
+
return true;
|
|
185
|
+
case 'clear':
|
|
186
|
+
if (client?.hasAI) {
|
|
187
|
+
client.ai.clearMemory();
|
|
188
|
+
console.log(colors.green(`Memory cleared for ${currentProvider}`));
|
|
189
|
+
}
|
|
190
|
+
rl.prompt();
|
|
191
|
+
return true;
|
|
192
|
+
case 'switch':
|
|
193
|
+
case 's':
|
|
194
|
+
const newProvider = args[0]?.toLowerCase();
|
|
195
|
+
if (!newProvider) {
|
|
196
|
+
console.log(colors.yellow('Usage: /switch <provider>'));
|
|
197
|
+
console.log(colors.gray(`Available: ${AI_PRESETS.join(', ')}`));
|
|
198
|
+
rl.prompt();
|
|
199
|
+
return true;
|
|
200
|
+
}
|
|
201
|
+
if (!AI_PRESETS.includes(newProvider)) {
|
|
202
|
+
console.log(colors.red(`Unknown provider: ${newProvider}`));
|
|
203
|
+
console.log(colors.gray(`Available: ${AI_PRESETS.join(', ')}`));
|
|
204
|
+
rl.prompt();
|
|
205
|
+
return true;
|
|
206
|
+
}
|
|
207
|
+
await onSwitch(newProvider);
|
|
208
|
+
return true;
|
|
209
|
+
case 'model':
|
|
210
|
+
case 'm':
|
|
211
|
+
const model = client?._aiConfig?.model || 'default';
|
|
212
|
+
console.log(colors.cyan(`Current model: ${model}`));
|
|
213
|
+
console.log(colors.gray('Note: Model is set by the preset configuration.'));
|
|
214
|
+
rl.prompt();
|
|
215
|
+
return true;
|
|
216
|
+
case 'memory':
|
|
217
|
+
case 'mem':
|
|
218
|
+
if (client?.hasAI) {
|
|
219
|
+
const memory = client.ai.getMemory();
|
|
220
|
+
const pairs = Math.floor(memory.length / 2);
|
|
221
|
+
console.log(colors.cyan(`Memory: ${pairs}/12 pairs (${memory.length} messages)`));
|
|
222
|
+
if (pairs > 0) {
|
|
223
|
+
console.log(colors.gray('Last exchange:'));
|
|
224
|
+
const lastTwo = memory.slice(-2);
|
|
225
|
+
for (const msg of lastTwo) {
|
|
226
|
+
const role = msg.role === 'user' ? colors.magenta('You') : colors.orange('AI');
|
|
227
|
+
const preview = msg.content.slice(0, 100) + (msg.content.length > 100 ? '...' : '');
|
|
228
|
+
console.log(` ${role}: ${colors.gray(preview)}`);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
rl.prompt();
|
|
233
|
+
return true;
|
|
234
|
+
case 'providers':
|
|
235
|
+
case 'list':
|
|
236
|
+
console.log(colors.cyan('Available AI providers:'));
|
|
237
|
+
for (const p of AI_PRESETS) {
|
|
238
|
+
const isActive = p === currentProvider ? colors.green(' (active)') : '';
|
|
239
|
+
const hasClient = context.aiClients.has(p) ? colors.gray(' [loaded]') : '';
|
|
240
|
+
console.log(` ${p}${isActive}${hasClient}`);
|
|
241
|
+
}
|
|
242
|
+
rl.prompt();
|
|
243
|
+
return true;
|
|
244
|
+
case 'help':
|
|
245
|
+
case 'h':
|
|
246
|
+
case '?':
|
|
247
|
+
printHelp();
|
|
248
|
+
rl.prompt();
|
|
249
|
+
return true;
|
|
250
|
+
default:
|
|
251
|
+
console.log(colors.yellow(`Unknown command: /${cmd}`));
|
|
252
|
+
console.log(colors.gray('Type /help for available commands'));
|
|
253
|
+
rl.prompt();
|
|
254
|
+
return true;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
function handleError(error, provider) {
|
|
258
|
+
if (error.message?.includes('API key') || error.message?.includes('401') || error.message?.includes('Unauthorized')) {
|
|
259
|
+
const envVar = ENV_VAR_MAP[provider] || `${provider.toUpperCase()}_API_KEY`;
|
|
260
|
+
console.log(colors.red(`\nAuthentication error for ${provider}`));
|
|
261
|
+
console.log(colors.gray(`Set ${envVar} environment variable.`));
|
|
262
|
+
}
|
|
263
|
+
else if (error.message?.includes('429') || error.message?.includes('rate limit')) {
|
|
264
|
+
console.log(colors.yellow(`\nRate limited by ${provider}. Wait a moment and try again.`));
|
|
265
|
+
}
|
|
266
|
+
else {
|
|
267
|
+
console.log(colors.red(`\nError: ${error.message || error}`));
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
function printHeader(provider) {
|
|
271
|
+
console.log(colors.bold(colors.cyan('Rek AI Mode')));
|
|
272
|
+
console.log('');
|
|
273
|
+
console.log(` ${colors.gray('Provider:')} ${colors.white(provider)}`);
|
|
274
|
+
console.log('');
|
|
275
|
+
console.log(` ${colors.green('/switch')} ${colors.gray('<provider>')} ${colors.gray('Change AI provider')}`);
|
|
276
|
+
console.log(` ${colors.green('/clear')} ${colors.gray('Clear conversation memory')}`);
|
|
277
|
+
console.log(` ${colors.green('/memory')} ${colors.gray('Show memory status')}`);
|
|
278
|
+
console.log(` ${colors.green('/help')} ${colors.gray('Show all commands')}`);
|
|
279
|
+
console.log('');
|
|
280
|
+
console.log(` ${colors.yellow('ESC')} ${colors.gray('or')} ${colors.yellow('/exit')} ${colors.gray('Exit AI mode')}`);
|
|
281
|
+
console.log('');
|
|
282
|
+
console.log(colors.gray('─'.repeat(50)));
|
|
283
|
+
console.log('');
|
|
284
|
+
}
|
|
285
|
+
function printHelp() {
|
|
286
|
+
console.log(`
|
|
287
|
+
${colors.bold(colors.cyan('AI Mode Commands'))}
|
|
288
|
+
|
|
289
|
+
${colors.green('/switch <provider>')} Switch to another AI provider
|
|
290
|
+
${colors.green('/clear')} Clear conversation memory
|
|
291
|
+
${colors.green('/memory')} Show memory status
|
|
292
|
+
${colors.green('/model')} Show current model
|
|
293
|
+
${colors.green('/providers')} List available providers
|
|
294
|
+
${colors.green('/help')} Show this help
|
|
295
|
+
${colors.green('/exit')} Exit AI mode
|
|
296
|
+
|
|
297
|
+
${colors.bold('Shortcuts')}
|
|
298
|
+
${colors.gray('ESC')} Exit AI mode (or abort generation)
|
|
299
|
+
${colors.gray('Ctrl+C')} Exit AI mode
|
|
300
|
+
|
|
301
|
+
${colors.bold('Available Providers')}
|
|
302
|
+
${AI_PRESETS.join(', ')}
|
|
303
|
+
`);
|
|
304
|
+
}
|
package/dist/cli/tui/shell.js
CHANGED
|
@@ -623,12 +623,12 @@ export class RekShell {
|
|
|
623
623
|
}
|
|
624
624
|
async runAIChat(args) {
|
|
625
625
|
const provider = args[0] || 'openai';
|
|
626
|
-
const model = args[1];
|
|
627
|
-
const envKeyName = provider === 'openai' ? 'OPENAI_API_KEY' : 'ANTHROPIC_API_KEY';
|
|
628
|
-
const apiKey = this.variables[envKeyName] || process.env[envKeyName];
|
|
629
626
|
const { startAIChat } = await import('./ai-chat.js');
|
|
630
627
|
await this.runInteractiveMode(async (rl) => {
|
|
631
|
-
await startAIChat(rl, provider,
|
|
628
|
+
await startAIChat(rl, provider, {
|
|
629
|
+
aiClients: this.aiClients,
|
|
630
|
+
variables: this.variables
|
|
631
|
+
});
|
|
632
632
|
});
|
|
633
633
|
}
|
|
634
634
|
async runAIPresetChat(presetName, message) {
|
|
@@ -4474,21 +4474,22 @@ ${colors.bold('Network:')}
|
|
|
4474
4474
|
${colors.white('mode=realistic')} ${colors.gray('realistic | throughput | stress')}
|
|
4475
4475
|
${colors.white('http2=false')} ${colors.gray('Force HTTP/2')}
|
|
4476
4476
|
|
|
4477
|
-
${colors.green('chat <provider>')} Start AI Chat.
|
|
4478
|
-
${colors.gray('Providers:')} ${colors.white('openai')}, ${colors.white('anthropic')}
|
|
4479
|
-
${colors.gray('Arg:')} ${colors.white('model=...')} (optional)
|
|
4480
|
-
|
|
4481
4477
|
${colors.green('ws <url>')} Start interactive WebSocket session.
|
|
4482
4478
|
${colors.green('udp <url>')} Send UDP packet.
|
|
4483
4479
|
|
|
4484
4480
|
${colors.bold('AI Chat:')}
|
|
4485
|
-
${colors.green('
|
|
4486
|
-
|
|
4487
|
-
|
|
4488
|
-
|
|
4489
|
-
|
|
4490
|
-
${colors.green('
|
|
4491
|
-
${colors.gray('
|
|
4481
|
+
${colors.green('ai [provider]')} Enter AI mode (interactive conversation).
|
|
4482
|
+
${colors.gray('Default: openai. Use /switch to change provider.')}
|
|
4483
|
+
${colors.gray('Exit: ESC, Ctrl+C, or /exit')}
|
|
4484
|
+
${colors.gray('Commands: /switch, /clear, /memory, /help')}
|
|
4485
|
+
|
|
4486
|
+
${colors.green('@<provider> <msg>')} Quick AI message (inline, no mode switch).
|
|
4487
|
+
${colors.gray('Examples: @openai Hello!, @anthropic Explain this')}
|
|
4488
|
+
${colors.gray('Providers: openai, anthropic, groq, google, xai,')}
|
|
4489
|
+
${colors.gray(' mistral, cohere, deepseek, fireworks,')}
|
|
4490
|
+
${colors.gray(' together, perplexity')}
|
|
4491
|
+
|
|
4492
|
+
${colors.gray('Memory:')} ${colors.white('12 pairs (24 messages)')} per provider.
|
|
4492
4493
|
${colors.gray('Env:')} Set ${colors.white('OPENAI_API_KEY')}, ${colors.white('ANTHROPIC_API_KEY')}, etc.
|
|
4493
4494
|
${colors.green('ai:clear [preset]')} Clear AI memory (all or specific preset).
|
|
4494
4495
|
|
package/dist/scrape/spider.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ExtractedLink } from './types.js';
|
|
2
|
+
import { type SitemapUrl } from '../seo/validators/sitemap.js';
|
|
2
3
|
export interface SpiderOptions {
|
|
3
4
|
maxDepth?: number;
|
|
4
5
|
maxPages?: number;
|
|
@@ -10,6 +11,8 @@ export interface SpiderOptions {
|
|
|
10
11
|
include?: RegExp[];
|
|
11
12
|
userAgent?: string;
|
|
12
13
|
respectRobotsTxt?: boolean;
|
|
14
|
+
useSitemap?: boolean;
|
|
15
|
+
sitemapUrl?: string;
|
|
13
16
|
onPage?: (result: SpiderPageResult) => void;
|
|
14
17
|
onProgress?: (progress: SpiderProgress) => void;
|
|
15
18
|
}
|
|
@@ -29,6 +32,29 @@ export interface SpiderProgress {
|
|
|
29
32
|
currentUrl: string;
|
|
30
33
|
depth: number;
|
|
31
34
|
}
|
|
35
|
+
export interface SitemapAnalysis {
|
|
36
|
+
found: boolean;
|
|
37
|
+
url?: string;
|
|
38
|
+
totalUrls: number;
|
|
39
|
+
crawledFromSitemap: number;
|
|
40
|
+
orphanUrls: string[];
|
|
41
|
+
missingFromSitemap: string[];
|
|
42
|
+
blockedBySitemapRobots: string[];
|
|
43
|
+
validationIssues: Array<{
|
|
44
|
+
type: string;
|
|
45
|
+
message: string;
|
|
46
|
+
}>;
|
|
47
|
+
sitemapUrls: SitemapUrl[];
|
|
48
|
+
}
|
|
49
|
+
export interface RobotsAnalysis {
|
|
50
|
+
found: boolean;
|
|
51
|
+
sitemaps: string[];
|
|
52
|
+
blocksAll: boolean;
|
|
53
|
+
issues: Array<{
|
|
54
|
+
type: string;
|
|
55
|
+
message: string;
|
|
56
|
+
}>;
|
|
57
|
+
}
|
|
32
58
|
export interface SpiderResult {
|
|
33
59
|
startUrl: string;
|
|
34
60
|
pages: SpiderPageResult[];
|
|
@@ -38,6 +64,8 @@ export interface SpiderResult {
|
|
|
38
64
|
url: string;
|
|
39
65
|
error: string;
|
|
40
66
|
}>;
|
|
67
|
+
sitemap?: SitemapAnalysis;
|
|
68
|
+
robots?: RobotsAnalysis;
|
|
41
69
|
}
|
|
42
70
|
export declare class Spider {
|
|
43
71
|
private options;
|
|
@@ -51,8 +79,17 @@ export declare class Spider {
|
|
|
51
79
|
private running;
|
|
52
80
|
private aborted;
|
|
53
81
|
private pendingCount;
|
|
82
|
+
private sitemapUrls;
|
|
83
|
+
private sitemapUrlSet;
|
|
84
|
+
private robotsData;
|
|
85
|
+
private sitemapValidation;
|
|
86
|
+
private robotsValidation;
|
|
54
87
|
constructor(options?: SpiderOptions);
|
|
55
88
|
crawl(startUrl: string): Promise<SpiderResult>;
|
|
89
|
+
private fetchRobotsTxt;
|
|
90
|
+
private fetchSitemaps;
|
|
91
|
+
private buildSitemapAnalysis;
|
|
92
|
+
private buildRobotsAnalysis;
|
|
56
93
|
private crawlPage;
|
|
57
94
|
abort(): void;
|
|
58
95
|
isRunning(): boolean;
|
package/dist/scrape/spider.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { createClient } from '../core/client.js';
|
|
2
2
|
import { ScrapeDocument } from './document.js';
|
|
3
3
|
import { RequestPool } from '../utils/request-pool.js';
|
|
4
|
+
import { discoverSitemaps, fetchAndValidateSitemap, } from '../seo/validators/sitemap.js';
|
|
5
|
+
import { fetchAndValidateRobotsTxt, isPathAllowed, } from '../seo/validators/robots.js';
|
|
4
6
|
const TRACKING_PARAMS = new Set([
|
|
5
7
|
'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
|
|
6
8
|
'gclid', 'gclsrc', 'dclid',
|
|
@@ -84,6 +86,11 @@ export class Spider {
|
|
|
84
86
|
running = false;
|
|
85
87
|
aborted = false;
|
|
86
88
|
pendingCount = 0;
|
|
89
|
+
sitemapUrls = [];
|
|
90
|
+
sitemapUrlSet = new Set();
|
|
91
|
+
robotsData = null;
|
|
92
|
+
sitemapValidation = null;
|
|
93
|
+
robotsValidation = null;
|
|
87
94
|
constructor(options = {}) {
|
|
88
95
|
this.options = {
|
|
89
96
|
maxDepth: options.maxDepth ?? 5,
|
|
@@ -94,6 +101,8 @@ export class Spider {
|
|
|
94
101
|
delay: options.delay ?? 100,
|
|
95
102
|
userAgent: options.userAgent ?? 'Recker Spider/1.0',
|
|
96
103
|
respectRobotsTxt: options.respectRobotsTxt ?? true,
|
|
104
|
+
useSitemap: options.useSitemap ?? false,
|
|
105
|
+
sitemapUrl: options.sitemapUrl,
|
|
97
106
|
exclude: options.exclude,
|
|
98
107
|
include: options.include,
|
|
99
108
|
onPage: options.onPage,
|
|
@@ -117,6 +126,7 @@ export class Spider {
|
|
|
117
126
|
async crawl(startUrl) {
|
|
118
127
|
const startTime = performance.now();
|
|
119
128
|
const normalizedStart = normalizeUrl(startUrl);
|
|
129
|
+
const baseUrl = new URL(normalizedStart).origin;
|
|
120
130
|
this.baseHost = new URL(normalizedStart).hostname;
|
|
121
131
|
this.visited.clear();
|
|
122
132
|
this.queue = [];
|
|
@@ -125,8 +135,19 @@ export class Spider {
|
|
|
125
135
|
this.running = true;
|
|
126
136
|
this.aborted = false;
|
|
127
137
|
this.pendingCount = 0;
|
|
138
|
+
this.sitemapUrls = [];
|
|
139
|
+
this.sitemapUrlSet.clear();
|
|
140
|
+
this.robotsData = null;
|
|
141
|
+
this.sitemapValidation = null;
|
|
142
|
+
this.robotsValidation = null;
|
|
143
|
+
if (this.options.respectRobotsTxt || this.options.useSitemap) {
|
|
144
|
+
await this.fetchRobotsTxt(baseUrl);
|
|
145
|
+
}
|
|
146
|
+
if (this.options.useSitemap) {
|
|
147
|
+
await this.fetchSitemaps(baseUrl);
|
|
148
|
+
}
|
|
128
149
|
const pending = new Map();
|
|
129
|
-
const scheduleUrl = (item) => {
|
|
150
|
+
const scheduleUrl = (item, fromSitemap = false) => {
|
|
130
151
|
const normalized = normalizeUrl(item.url);
|
|
131
152
|
if (this.visited.has(normalized))
|
|
132
153
|
return;
|
|
@@ -136,6 +157,17 @@ export class Spider {
|
|
|
136
157
|
return;
|
|
137
158
|
if (this.results.length + pending.size >= this.options.maxPages)
|
|
138
159
|
return;
|
|
160
|
+
if (this.options.respectRobotsTxt && this.robotsData) {
|
|
161
|
+
try {
|
|
162
|
+
const urlPath = new URL(normalized).pathname;
|
|
163
|
+
if (!isPathAllowed(this.robotsData, urlPath, this.options.userAgent)) {
|
|
164
|
+
return;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
catch {
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
139
171
|
this.visited.add(normalized);
|
|
140
172
|
this.pendingCount++;
|
|
141
173
|
const promise = this.pool.run(() => this.crawlPage({ ...item, url: normalized }))
|
|
@@ -146,6 +178,18 @@ export class Spider {
|
|
|
146
178
|
pending.set(normalized, promise);
|
|
147
179
|
};
|
|
148
180
|
scheduleUrl({ url: normalizedStart, depth: 0 });
|
|
181
|
+
if (this.options.useSitemap && this.sitemapUrls.length > 0) {
|
|
182
|
+
for (const sitemapUrl of this.sitemapUrls) {
|
|
183
|
+
try {
|
|
184
|
+
const urlHost = new URL(sitemapUrl.loc).hostname;
|
|
185
|
+
if (urlHost === this.baseHost) {
|
|
186
|
+
scheduleUrl({ url: sitemapUrl.loc, depth: 1 }, true);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
catch {
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
149
193
|
while ((pending.size > 0 || this.queue.length > 0) && !this.aborted) {
|
|
150
194
|
while (this.queue.length > 0 && !this.aborted) {
|
|
151
195
|
const item = this.queue.shift();
|
|
@@ -161,12 +205,154 @@ export class Spider {
|
|
|
161
205
|
await Promise.all(pending.values());
|
|
162
206
|
}
|
|
163
207
|
this.running = false;
|
|
208
|
+
const sitemapAnalysis = this.buildSitemapAnalysis();
|
|
209
|
+
const robotsAnalysis = this.buildRobotsAnalysis();
|
|
164
210
|
return {
|
|
165
211
|
startUrl: normalizedStart,
|
|
166
212
|
pages: this.results,
|
|
167
213
|
visited: this.visited,
|
|
168
214
|
duration: Math.round(performance.now() - startTime),
|
|
169
215
|
errors: this.errors,
|
|
216
|
+
sitemap: this.options.useSitemap ? sitemapAnalysis : undefined,
|
|
217
|
+
robots: robotsAnalysis,
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
async fetchRobotsTxt(baseUrl) {
|
|
221
|
+
try {
|
|
222
|
+
const fetcher = async (url) => {
|
|
223
|
+
const response = await this.client.get(url);
|
|
224
|
+
return {
|
|
225
|
+
status: response.status,
|
|
226
|
+
text: await response.text(),
|
|
227
|
+
};
|
|
228
|
+
};
|
|
229
|
+
const result = await fetchAndValidateRobotsTxt(baseUrl, fetcher);
|
|
230
|
+
if (result.exists) {
|
|
231
|
+
this.robotsData = result.parseResult;
|
|
232
|
+
this.robotsValidation = {
|
|
233
|
+
found: true,
|
|
234
|
+
issues: result.issues.map(i => ({ type: i.type, message: i.message })),
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
else {
|
|
238
|
+
this.robotsValidation = {
|
|
239
|
+
found: false,
|
|
240
|
+
issues: [{ type: 'info', message: 'robots.txt not found' }],
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
catch (error) {
|
|
245
|
+
this.robotsValidation = {
|
|
246
|
+
found: false,
|
|
247
|
+
issues: [{ type: 'error', message: `Failed to fetch robots.txt: ${error instanceof Error ? error.message : 'Unknown error'}` }],
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
async fetchSitemaps(baseUrl) {
|
|
252
|
+
const fetcher = async (url) => {
|
|
253
|
+
const response = await this.client.get(url);
|
|
254
|
+
return {
|
|
255
|
+
status: response.status,
|
|
256
|
+
text: await response.text(),
|
|
257
|
+
headers: Object.fromEntries([...response.headers.entries()]),
|
|
258
|
+
};
|
|
259
|
+
};
|
|
260
|
+
try {
|
|
261
|
+
let sitemapUrls = [];
|
|
262
|
+
if (this.options.sitemapUrl) {
|
|
263
|
+
sitemapUrls = [this.options.sitemapUrl];
|
|
264
|
+
}
|
|
265
|
+
else if (this.robotsData?.sitemaps.length) {
|
|
266
|
+
sitemapUrls = this.robotsData.sitemaps;
|
|
267
|
+
}
|
|
268
|
+
else {
|
|
269
|
+
sitemapUrls = await discoverSitemaps(baseUrl, undefined, fetcher);
|
|
270
|
+
}
|
|
271
|
+
for (const sitemapUrl of sitemapUrls) {
|
|
272
|
+
try {
|
|
273
|
+
const result = await fetchAndValidateSitemap(sitemapUrl, fetcher);
|
|
274
|
+
if (result.exists && result.parseResult.valid) {
|
|
275
|
+
this.sitemapValidation = result;
|
|
276
|
+
if (result.parseResult.type === 'sitemapindex') {
|
|
277
|
+
for (const childSitemap of result.parseResult.sitemaps) {
|
|
278
|
+
try {
|
|
279
|
+
const childResult = await fetchAndValidateSitemap(childSitemap.loc, fetcher);
|
|
280
|
+
if (childResult.exists && childResult.parseResult.urls.length > 0) {
|
|
281
|
+
this.sitemapUrls.push(...childResult.parseResult.urls);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
catch {
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
else {
|
|
289
|
+
this.sitemapUrls.push(...result.parseResult.urls);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
catch {
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
for (const url of this.sitemapUrls) {
|
|
297
|
+
this.sitemapUrlSet.add(normalizeUrl(url.loc));
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
catch (error) {
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
buildSitemapAnalysis() {
|
|
304
|
+
const crawledUrls = new Set(this.results.map(r => normalizeUrl(r.url)));
|
|
305
|
+
const crawledFromSitemap = this.sitemapUrls.filter(u => crawledUrls.has(normalizeUrl(u.loc))).length;
|
|
306
|
+
const linkedUrls = new Set();
|
|
307
|
+
for (const page of this.results) {
|
|
308
|
+
for (const link of page.links) {
|
|
309
|
+
if (link.href) {
|
|
310
|
+
linkedUrls.add(normalizeUrl(link.href));
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
const orphanUrls = this.sitemapUrls
|
|
315
|
+
.filter(u => {
|
|
316
|
+
const normalized = normalizeUrl(u.loc);
|
|
317
|
+
return !linkedUrls.has(normalized) && crawledUrls.has(normalized);
|
|
318
|
+
})
|
|
319
|
+
.map(u => u.loc);
|
|
320
|
+
const missingFromSitemap = Array.from(crawledUrls)
|
|
321
|
+
.filter(url => !this.sitemapUrlSet.has(url));
|
|
322
|
+
const blockedBySitemapRobots = [];
|
|
323
|
+
if (this.robotsData) {
|
|
324
|
+
for (const sitemapUrl of this.sitemapUrls) {
|
|
325
|
+
try {
|
|
326
|
+
const urlPath = new URL(sitemapUrl.loc).pathname;
|
|
327
|
+
if (!isPathAllowed(this.robotsData, urlPath, this.options.userAgent)) {
|
|
328
|
+
blockedBySitemapRobots.push(sitemapUrl.loc);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
catch {
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
return {
|
|
336
|
+
found: this.sitemapUrls.length > 0,
|
|
337
|
+
url: this.sitemapValidation?.parseResult ? undefined : undefined,
|
|
338
|
+
totalUrls: this.sitemapUrls.length,
|
|
339
|
+
crawledFromSitemap,
|
|
340
|
+
orphanUrls,
|
|
341
|
+
missingFromSitemap,
|
|
342
|
+
blockedBySitemapRobots,
|
|
343
|
+
validationIssues: this.sitemapValidation?.issues.map(i => ({
|
|
344
|
+
type: i.type,
|
|
345
|
+
message: i.message,
|
|
346
|
+
})) || [],
|
|
347
|
+
sitemapUrls: this.sitemapUrls,
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
buildRobotsAnalysis() {
|
|
351
|
+
return {
|
|
352
|
+
found: this.robotsValidation?.found ?? false,
|
|
353
|
+
sitemaps: this.robotsData?.sitemaps ?? [],
|
|
354
|
+
blocksAll: this.robotsData?.blocksAllRobots ?? false,
|
|
355
|
+
issues: this.robotsValidation?.issues ?? [],
|
|
170
356
|
};
|
|
171
357
|
}
|
|
172
358
|
async crawlPage(item) {
|