docs-agent 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -0
- package/LICENSE +21 -0
- package/README.md +239 -0
- package/docs/DEPLOYMENT.md +142 -0
- package/docs/mcp-client-prompt.md +26 -0
- package/docs/reference.md +258 -0
- package/env.example +47 -0
- package/package.json +67 -0
- package/src/CodeSearch.js +125 -0
- package/src/DocsAgent.js +728 -0
- package/src/FileUtility.js +130 -0
- package/src/GitHubApi.js +337 -0
- package/src/LLM.js +463 -0
- package/src/UrlValidator.js +190 -0
- package/src/api.js +107 -0
- package/src/cli.js +0 -0
- package/src/config/principles.diataxis.js +28 -0
- package/src/config/principles.first.js +11 -0
- package/src/config/prompt.docs.vs.code.js +52 -0
- package/src/config/prompt.edit.disruptive.js +9 -0
- package/src/config/prompt.edit.js +40 -0
- package/src/config/prompt.extract.code.js +45 -0
- package/src/config/prompt.formatting.js +38 -0
- package/src/config/prompt.gen.referencedocs.js +181 -0
- package/src/config/prompt.linking.js +14 -0
- package/src/config/prompt.prioritize.js +24 -0
- package/src/config/prompt.relatedfiles.js +14 -0
- package/src/config/prompt.review.js +23 -0
- package/src/config/prompt.scoring.js +9 -0
- package/src/config/prompt.writing.js +13 -0
- package/src/config/rules.linking.js +10 -0
- package/src/index.js +49 -0
- package/src/lib.js +4 -0
- package/src/mcp.js +268 -0
package/src/LLM.js
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A common interface to interact with AI models
|
|
3
|
+
* @example
|
|
4
|
+
* const llm = new LLM({ aiService: "anthropic", model: "claude-3-5-sonnet-20240620", maxTokens: 2048, temperature: 0 });
|
|
5
|
+
* const response = await llm.chat([{ role: "user", content: "Hello, world!" }]);
|
|
6
|
+
* console.log(response);
|
|
7
|
+
*/
|
|
8
|
+
import { createHash } from "node:crypto";
|
|
9
|
+
import { Tiktoken } from "js-tiktoken/lite";
|
|
10
|
+
import o200k_base from "js-tiktoken/ranks/o200k_base";
|
|
11
|
+
|
|
12
|
+
/** AI SDKs */
|
|
13
|
+
import { generateText, generateObject } from "ai";
|
|
14
|
+
import { google } from "@ai-sdk/google";
|
|
15
|
+
import { anthropic } from "@ai-sdk/anthropic";
|
|
16
|
+
import { openai } from "@ai-sdk/openai";
|
|
17
|
+
|
|
18
|
+
import { registerOTel } from '@vercel/otel';
|
|
19
|
+
|
|
20
|
+
registerOTel({
|
|
21
|
+
serviceName: "docs-agent"
|
|
22
|
+
});
|
|
23
|
+
/** End AI SDKs */
|
|
24
|
+
|
|
25
|
+
class LLM {
|
|
26
|
+
static DEFAULT_MODELS = {
|
|
27
|
+
anthropic: "claude-3-5-sonnet-20240620",
|
|
28
|
+
openai: "gpt-4o-mini",
|
|
29
|
+
gemini: "gemini-2.0-flash",
|
|
30
|
+
ollama: "llama3.1:8b"
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
constructor(options) {
|
|
34
|
+
this.aiService = options?.aiService || process.env.PREFERRED_AI_SERVICE || "anthropic"; // e.g. openai, anthropic, ollama, etc.
|
|
35
|
+
this.model = options?.model || process.env.PREFERRED_AI_MODEL || "claude-3-5-sonnet-20240620";
|
|
36
|
+
this.temperature = options?.temperature || process.env.AI_TEMPERATURE || 0;
|
|
37
|
+
this.maxTokens = options?.maxTokens || process.env.MAX_TOKENS || 2048;
|
|
38
|
+
this.timeout = options?.timeout || process.env.LLM_TIMEOUT || 60000;
|
|
39
|
+
this.cacheStore = options?.cacheStore || {};
|
|
40
|
+
this.maxInputTokens = options?.maxInputTokens || process.env.MAX_INPUT_TOKENS || 100000;
|
|
41
|
+
this.topP = options?.topP || process.env.AI_TOP_P || 0.95;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
getApiUrl(aiService) {
|
|
45
|
+
let apiUrl = null;
|
|
46
|
+
if (aiService === 'openai') {
|
|
47
|
+
apiUrl = "https://api.openai.com/v1/chat/completions";
|
|
48
|
+
} else if (aiService === 'anthropic') {
|
|
49
|
+
apiUrl = "https://api.anthropic.com/v1/messages";
|
|
50
|
+
} else if (aiService === 'ollama') {
|
|
51
|
+
apiUrl = process.env.OLLAMA_API_URL || "http://localhost:11434/api/generate";
|
|
52
|
+
} else if (aiService === 'gemini') {
|
|
53
|
+
apiUrl = "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions";
|
|
54
|
+
} else {
|
|
55
|
+
throw new Error('Invalid AI service specified. Use "openai", "anthropic", "ollama", or "gemini".');
|
|
56
|
+
}
|
|
57
|
+
return apiUrl;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
getApiKey(aiService) {
|
|
61
|
+
let apiKey = null;
|
|
62
|
+
if (aiService === 'openai') {
|
|
63
|
+
apiKey = process.env.OPENAI_API_KEY;
|
|
64
|
+
} else if (aiService === 'anthropic') {
|
|
65
|
+
apiKey = process.env.ANTHROPIC_API_KEY;
|
|
66
|
+
} else if (aiService === 'ollama') {
|
|
67
|
+
apiKey = process.env.OLLAMA_API_KEY;
|
|
68
|
+
} else if (aiService === 'gemini') {
|
|
69
|
+
apiKey = process.env.GEMINI_API_KEY;
|
|
70
|
+
} else {
|
|
71
|
+
throw new Error('Invalid AI service specified. Use "openai", "anthropic", "ollama", or "gemini".');
|
|
72
|
+
}
|
|
73
|
+
return apiKey;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Chat with the LLM
|
|
78
|
+
* @param {Array} conversationHistory - The conversation history
|
|
79
|
+
* @param {Object} llmOptions - The LLM options
|
|
80
|
+
* @returns {Promise<string>} - The response from the LLM
|
|
81
|
+
*/
|
|
82
|
+
async chatWithoutTracing(conversationHistory, llmOptions = {}) {
|
|
83
|
+
let requestBody, headers;
|
|
84
|
+
let aiService = llmOptions?.aiService || this.aiService;
|
|
85
|
+
let model = llmOptions?.model || this.model;
|
|
86
|
+
const apiUrl = this.getApiUrl(aiService);
|
|
87
|
+
const apiKey = this.getApiKey(aiService);
|
|
88
|
+
const maxInputTokens = llmOptions?.maxInputTokens || this.maxInputTokens;
|
|
89
|
+
const estimatedTokens = this.estimateTokens(conversationHistory?.map(message => message?.content)?.join("\n"));
|
|
90
|
+
console.log("Estimated LLM input tokens:", estimatedTokens);
|
|
91
|
+
if(estimatedTokens > maxInputTokens){
|
|
92
|
+
throw new Error("Input tokens exceed the maximum limit of " + maxInputTokens);
|
|
93
|
+
}
|
|
94
|
+
requestBody = {
|
|
95
|
+
model: model
|
|
96
|
+
};
|
|
97
|
+
if(requestBody.model?.startsWith("o")){
|
|
98
|
+
// Reasoning model parameters
|
|
99
|
+
requestBody.max_completion_tokens = Number(llmOptions?.maxCompletionTokens || this.maxCompletionTokens || llmOptions?.maxTokens || this.maxTokens);
|
|
100
|
+
requestBody.reasoning_effort = llmOptions?.reasoningEffort || this.reasoningEffort || "medium";
|
|
101
|
+
} else {
|
|
102
|
+
requestBody.max_tokens = Number(llmOptions?.maxTokens || this.maxTokens);
|
|
103
|
+
requestBody.temperature = Number(llmOptions?.temperature || this.temperature);
|
|
104
|
+
requestBody.top_p = Number(llmOptions?.topP || this.topP || 0.95);
|
|
105
|
+
}
|
|
106
|
+
if (aiService === 'anthropic') {
|
|
107
|
+
if(!apiKey) throw new Error("Anthropic API key is not set");
|
|
108
|
+
const { system, messages } = this.formatMessageForAnthropic(conversationHistory);
|
|
109
|
+
if(system) requestBody.system = system;
|
|
110
|
+
requestBody.messages = messages;
|
|
111
|
+
headers = {
|
|
112
|
+
'Content-Type': 'application/json',
|
|
113
|
+
'x-api-key': `${apiKey}`,
|
|
114
|
+
'anthropic-version': '2023-06-01',
|
|
115
|
+
};
|
|
116
|
+
} else if (aiService === 'openai' || aiService === 'ollama' || aiService === 'gemini') {
|
|
117
|
+
requestBody.messages = conversationHistory;
|
|
118
|
+
headers = {
|
|
119
|
+
'Content-Type': 'application/json',
|
|
120
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
121
|
+
};
|
|
122
|
+
} else {
|
|
123
|
+
throw new Error('Invalid provider specified. Use "anthropic" or "openai" or "gemini" or "ollama".');
|
|
124
|
+
}
|
|
125
|
+
if(llmOptions?.tools){
|
|
126
|
+
requestBody.tools = llmOptions.tools;
|
|
127
|
+
}
|
|
128
|
+
if(llmOptions?.response_format){
|
|
129
|
+
requestBody.response_format = llmOptions.response_format;
|
|
130
|
+
}
|
|
131
|
+
try {
|
|
132
|
+
const { data, statusCode } = await this.requestWrapper(apiUrl, requestBody, headers);
|
|
133
|
+
/**
|
|
134
|
+
* OpenAI chat completion response
|
|
135
|
+
* {
|
|
136
|
+
"id": "chatcmpl-123456",
|
|
137
|
+
"object": "chat.completion",
|
|
138
|
+
"created": 1728933352,
|
|
139
|
+
"model": "gpt-4o-2024-08-06",
|
|
140
|
+
"choices": [
|
|
141
|
+
{
|
|
142
|
+
"index": 0,
|
|
143
|
+
"message": {
|
|
144
|
+
"role": "assistant",
|
|
145
|
+
"content": "Hi there! How can I assist you today?",
|
|
146
|
+
"refusal": null
|
|
147
|
+
},
|
|
148
|
+
"logprobs": null,
|
|
149
|
+
"finish_reason": "stop"
|
|
150
|
+
}
|
|
151
|
+
],
|
|
152
|
+
"usage": {
|
|
153
|
+
"prompt_tokens": 19,
|
|
154
|
+
"completion_tokens": 10,
|
|
155
|
+
"total_tokens": 29,
|
|
156
|
+
"prompt_tokens_details": {
|
|
157
|
+
"cached_tokens": 0
|
|
158
|
+
},
|
|
159
|
+
"completion_tokens_details": {
|
|
160
|
+
"reasoning_tokens": 0,
|
|
161
|
+
"accepted_prediction_tokens": 0,
|
|
162
|
+
"rejected_prediction_tokens": 0
|
|
163
|
+
}
|
|
164
|
+
},
|
|
165
|
+
"system_fingerprint": "fp_6b68a8204b"
|
|
166
|
+
*/
|
|
167
|
+
console.log("LLM chat status code:", statusCode, data?.error?.message);
|
|
168
|
+
if([429, 529].includes(statusCode)){
|
|
169
|
+
return await this.retryChatWithAlternateService(conversationHistory, llmOptions);
|
|
170
|
+
}
|
|
171
|
+
if(data?.error || (Array.isArray(data) && data[0]?.error)){
|
|
172
|
+
throw new Error(data?.error?.message || data[0]?.error?.message);
|
|
173
|
+
}
|
|
174
|
+
if(statusCode !== 200){
|
|
175
|
+
//TODO: Handle other status codes
|
|
176
|
+
}
|
|
177
|
+
let content = null;
|
|
178
|
+
switch(aiService){
|
|
179
|
+
case 'anthropic':
|
|
180
|
+
content = this.parseAnthropicChatCompletion(data, llmOptions?.tools);
|
|
181
|
+
break;
|
|
182
|
+
case 'openai':
|
|
183
|
+
content = this.parseOpenAIChatCompletion(data, llmOptions?.tools);
|
|
184
|
+
break;
|
|
185
|
+
case 'gemini':
|
|
186
|
+
content = this.parseGeminiChatCompletion(data, llmOptions?.tools);
|
|
187
|
+
break;
|
|
188
|
+
case 'ollama':
|
|
189
|
+
content = this.parseOllamaChatCompletion(data, llmOptions?.tools);
|
|
190
|
+
break;
|
|
191
|
+
}
|
|
192
|
+
return content;
|
|
193
|
+
} catch (error) {
|
|
194
|
+
console.error(`Error calling ${aiService} API:`, error);
|
|
195
|
+
return await this.parseError(null, error)
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Chat with the LLM, with telemetry support by default
|
|
201
|
+
* @param {Array<Object>} conversationHistory - The conversation history
|
|
202
|
+
* @param {Object} llmOptions - The LLM options
|
|
203
|
+
* @param {Object} telemetryOptions - The telemetry options, if not provided, the default telemetry options will be used
|
|
204
|
+
* @param {Object} telemetryOptions.isEnabled - Whether to enable telemetry
|
|
205
|
+
* @param {string} telemetryOptions.functionId - The function ID to be used for telemetry
|
|
206
|
+
* @param {Object} telemetryOptions.metadata - Additional metadata for telemetry
|
|
207
|
+
* @returns {Promise<string>} - The response from the LLM
|
|
208
|
+
*/
|
|
209
|
+
async chat(conversationHistory, llmOptions = {}, telemetryOptions){
|
|
210
|
+
let aiService = llmOptions?.aiService || this.aiService;
|
|
211
|
+
let model = llmOptions?.model || this.model;
|
|
212
|
+
let modelProvider = this.getAIModelProvider(aiService, model);
|
|
213
|
+
let advancedModelConfig = {};
|
|
214
|
+
if(!modelProvider?.modelId?.startsWith("o")){
|
|
215
|
+
advancedModelConfig = {
|
|
216
|
+
maxTokens: Number(llmOptions?.maxTokens || this.maxTokens),
|
|
217
|
+
temperature: Number(llmOptions?.temperature || this.temperature),
|
|
218
|
+
topP: Number(llmOptions?.topP || this.topP || 0.95)
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
const aiOptions = {
|
|
222
|
+
model: modelProvider,
|
|
223
|
+
messages: conversationHistory,
|
|
224
|
+
experimental_telemetry: telemetryOptions || {
|
|
225
|
+
isEnabled: true,
|
|
226
|
+
functionId: "docs-agent"
|
|
227
|
+
},
|
|
228
|
+
...advancedModelConfig
|
|
229
|
+
}
|
|
230
|
+
if(llmOptions?.responseFormat && llmOptions?.responseFormat === "json"){
|
|
231
|
+
aiOptions.output = "object";
|
|
232
|
+
aiOptions.schema = llmOptions?.schema;
|
|
233
|
+
const { object } = await generateObject(aiOptions);
|
|
234
|
+
return object;
|
|
235
|
+
}
|
|
236
|
+
const { text, usage } = await generateText(aiOptions);
|
|
237
|
+
if (typeof text !== 'string') {
|
|
238
|
+
console.error('generateText did not return a string:', text);
|
|
239
|
+
}
|
|
240
|
+
console.log('Usage:', usage);
|
|
241
|
+
if(text && llmOptions?.responseFormat && llmOptions?.responseFormat === "json"){
|
|
242
|
+
return JSON.parse(text);
|
|
243
|
+
}
|
|
244
|
+
return text;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
getAIModelProvider(aiService, model){
|
|
248
|
+
let modelProvider;
|
|
249
|
+
if(aiService === "anthropic"){
|
|
250
|
+
modelProvider = anthropic(model);
|
|
251
|
+
} else if(aiService === "openai"){
|
|
252
|
+
modelProvider = openai(model);
|
|
253
|
+
} else if(aiService === "gemini"){
|
|
254
|
+
modelProvider = google(model);
|
|
255
|
+
} else {
|
|
256
|
+
throw new Error("Invalid AI service specified. Use 'anthropic', 'openai', or 'gemini'.");
|
|
257
|
+
}
|
|
258
|
+
return modelProvider;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
async retryChatWithAlternateService(conversationHistory, llmOptions = {}){
|
|
262
|
+
console.log("LLM out of service:", llmOptions.aiService || this.aiService);
|
|
263
|
+
this.llmOutOfService = this.llmOutOfService || [];
|
|
264
|
+
this.llmOutOfService.push(llmOptions.aiService || this.aiService);
|
|
265
|
+
for(let aiService in LLM.DEFAULT_MODELS){
|
|
266
|
+
if(!this.llmOutOfService.includes(aiService)){
|
|
267
|
+
console.log("Switching LLM service to:", aiService, LLM.DEFAULT_MODELS[aiService]);
|
|
268
|
+
let newLLMOptions = Object.assign(llmOptions, {
|
|
269
|
+
aiService: aiService,
|
|
270
|
+
model: LLM.DEFAULT_MODELS[aiService]
|
|
271
|
+
});
|
|
272
|
+
return this.chatWithoutTracing(conversationHistory, newLLMOptions);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
throw new Error("No alternative model found");
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* A wrapper around fetch to cache and rate limit requests, with timeout support
|
|
280
|
+
* @param {string} apiUrl
|
|
281
|
+
* @param {Object} requestBody
|
|
282
|
+
* @param {Object} headers
|
|
283
|
+
* @param {number} retryCount - Number of retries attempted (internal use)
|
|
284
|
+
* @param {number} maxRetries - Maximum number of retries allowed
|
|
285
|
+
* @returns {Promise<Response>}
|
|
286
|
+
*/
|
|
287
|
+
async requestWrapper(apiUrl, requestBody, headers, retryCount = 0, maxRetries = 3) {
|
|
288
|
+
const startTime = Date.now();
|
|
289
|
+
const controller = new AbortController();
|
|
290
|
+
const timeoutMs = Number(this.timeout || 30000);
|
|
291
|
+
const timeoutController = setTimeout(() => controller.abort(), timeoutMs);
|
|
292
|
+
try {
|
|
293
|
+
const cacheKey = this.cache().createUniqueHashKey(apiUrl, requestBody, headers);
|
|
294
|
+
const cachedResponse = this.cache().get(cacheKey);
|
|
295
|
+
if (cachedResponse) {
|
|
296
|
+
return cachedResponse;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const response = await fetch(apiUrl, {
|
|
300
|
+
method: 'POST',
|
|
301
|
+
headers: headers,
|
|
302
|
+
body: JSON.stringify(requestBody),
|
|
303
|
+
signal: controller.signal
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
const data = await response.json();
|
|
307
|
+
|
|
308
|
+
// If we get a 429 or 529 error and haven't exceeded max retries, attempt retry with exponential backoff
|
|
309
|
+
if ([429, 529].includes(response.status) && retryCount < maxRetries) {
|
|
310
|
+
// Calculate base delay with exponential backoff (starting at 2s)
|
|
311
|
+
const baseDelay = Math.min(2000 * Math.pow(2, retryCount+2), 60000); // Cap at 60 seconds
|
|
312
|
+
|
|
313
|
+
// Add random jitter between 0.5x and 1.5x
|
|
314
|
+
const jitter = 0.5 + Math.random();
|
|
315
|
+
const backoffDelay = Math.round(baseDelay * jitter);
|
|
316
|
+
|
|
317
|
+
console.log(`API ${response.status === 429 ? 'rate limited' : 'overloaded'}. Retrying in ${backoffDelay/1000} seconds... (Attempt ${retryCount + 1}/${maxRetries})`);
|
|
318
|
+
|
|
319
|
+
await new Promise(resolve => setTimeout(resolve, backoffDelay));
|
|
320
|
+
return this.requestWrapper(apiUrl, requestBody, headers, retryCount + 1, maxRetries);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
this.cache().set(cacheKey, response);
|
|
324
|
+
const totalTime = Date.now() - startTime; // Calculate total time taken
|
|
325
|
+
console.log(`Request to ${apiUrl} completed in ${totalTime} ms`); // Log the total time
|
|
326
|
+
return { data, statusCode: response.status };
|
|
327
|
+
} catch (error) {
|
|
328
|
+
const totalTime = Date.now() - startTime; // Calculate total time taken
|
|
329
|
+
console.log(`Request to ${apiUrl} failed in ${totalTime} ms`);
|
|
330
|
+
console.log("Timeout was set to", this.timeout + "ms");
|
|
331
|
+
if (error.name === 'AbortError') {
|
|
332
|
+
console.error(`Request to ${apiUrl} timed out`);
|
|
333
|
+
}
|
|
334
|
+
console.error(`Error in request to ${apiUrl}:`, error);
|
|
335
|
+
throw error;
|
|
336
|
+
} finally {
|
|
337
|
+
clearTimeout(timeoutController);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Parse errors from various LLM APIs to create uniform error communication
|
|
343
|
+
* @param {*} error
|
|
344
|
+
* @reference https://platform.openai.com/docs/guides/error-codes/api-error-codes
|
|
345
|
+
* @reference https://docs.anthropic.com/en/api/errors
|
|
346
|
+
*/
|
|
347
|
+
parseError(statusCode, error){
|
|
348
|
+
switch(statusCode){
|
|
349
|
+
case 400:
|
|
350
|
+
console.error("Bad request");
|
|
351
|
+
throw new Error(error?.message || "Bad request");
|
|
352
|
+
case 401:
|
|
353
|
+
console.error("Invalid API Key");
|
|
354
|
+
throw new Error(error?.message || "Invalid API Key");
|
|
355
|
+
case 403:
|
|
356
|
+
throw new Error(error?.message || "You are not authorized to access this resource");
|
|
357
|
+
case 400:
|
|
358
|
+
throw new Error(error?.message || "Bad request");
|
|
359
|
+
case 429:
|
|
360
|
+
throw new Error(error?.message || "Rate limit exceeded");
|
|
361
|
+
case 404:
|
|
362
|
+
throw new Error(error?.message || "Not found");
|
|
363
|
+
case 500:
|
|
364
|
+
throw new Error(error?.message || "Internal server error");
|
|
365
|
+
case 503:
|
|
366
|
+
throw new Error(error?.message || "Service unavailable");
|
|
367
|
+
case 529:
|
|
368
|
+
throw new Error(error?.message || "API temporarily overloaded");
|
|
369
|
+
default:
|
|
370
|
+
throw new Error(error?.message || "Unknown error");
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
/**
|
|
375
|
+
* Converts the messages array to the format required by Anthropic
|
|
376
|
+
* @param {Array} messages
|
|
377
|
+
* @returns {Object}
|
|
378
|
+
* @example
|
|
379
|
+
* const { system, messages } = AI.formatMessageForAnthropic(originalMessages);
|
|
380
|
+
* // originalMessages: [{ role: "system", content: "You are a helpful assistant." }, { role: "user", content: "Hello, world!" }]
|
|
381
|
+
* // system: { role: "system", content: "You are a helpful assistant." }
|
|
382
|
+
* // messages: [{ role: "user", content: "Hello, world!" }]
|
|
383
|
+
*/
|
|
384
|
+
formatMessageForAnthropic(messages){
|
|
385
|
+
let system;
|
|
386
|
+
let messagesWithoutSystemMessage = [];
|
|
387
|
+
for(let i = 0; i < messages.length; i++){
|
|
388
|
+
if(messages[i].role === "system" && messages[i].content){
|
|
389
|
+
system = messages[i].content;
|
|
390
|
+
}else{
|
|
391
|
+
messagesWithoutSystemMessage.push(messages[i]);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
return { system, messages: messagesWithoutSystemMessage };
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
*
|
|
399
|
+
* @param {*} data
|
|
400
|
+
* @returns
|
|
401
|
+
*/
|
|
402
|
+
parseOpenAIChatCompletion(data, tools){
|
|
403
|
+
if(tools){
|
|
404
|
+
return { content: data?.choices?.[0]?.message?.content, toolCalls: data?.choices?.[0]?.message?.tool_calls };
|
|
405
|
+
}
|
|
406
|
+
return data?.choices?.[0]?.message?.content;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/**
|
|
410
|
+
*
|
|
411
|
+
* @param {*} data
|
|
412
|
+
* @returns
|
|
413
|
+
*/
|
|
414
|
+
parseAnthropicChatCompletion(data, tools){
|
|
415
|
+
return data?.content[0]?.text;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/**
|
|
419
|
+
*
|
|
420
|
+
* @param {*} data
|
|
421
|
+
* @returns
|
|
422
|
+
*/
|
|
423
|
+
parseOllamaChatCompletion(data, tools){
|
|
424
|
+
return data.response;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
parseGeminiChatCompletion(data, tools){
|
|
428
|
+
// Assuming we're calling OpenAI compatible endpoint https://ai.google.dev/gemini-api/docs/openai
|
|
429
|
+
return data.choices[0].message.content;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
cache(){
|
|
433
|
+
return {
|
|
434
|
+
get: (key) => {
|
|
435
|
+
return this.cacheStore[key] || null;
|
|
436
|
+
},
|
|
437
|
+
set: (key, value) => {
|
|
438
|
+
this.cacheStore[key] = value;
|
|
439
|
+
},
|
|
440
|
+
clear: () => {
|
|
441
|
+
this.cacheStore = {};
|
|
442
|
+
},
|
|
443
|
+
createUniqueHashKey(apiUrl, requestBody, headers){
|
|
444
|
+
const hash = createHash('sha256');
|
|
445
|
+
hash.update(apiUrl);
|
|
446
|
+
if(requestBody) hash.update(JSON.stringify(requestBody));
|
|
447
|
+
if(headers) hash.update(JSON.stringify(headers));
|
|
448
|
+
return hash.digest('hex');
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Estimate the number of tokens in a text
|
|
455
|
+
* @param {string} text
|
|
456
|
+
* @returns {number}
|
|
457
|
+
*/
|
|
458
|
+
estimateTokens(text){
|
|
459
|
+
const enc = new Tiktoken(o200k_base);
|
|
460
|
+
return enc.encode(text).length;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
export default LLM;
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL validation utility for SSRF protection using allowlist approach
|
|
3
|
+
* Only allows URLs that are explicitly whitelisted via environment variable
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Validates a webhook URL against the allowlist
|
|
8
|
+
* @param {string} url - The URL to validate
|
|
9
|
+
* @returns {string} - The validated URL
|
|
10
|
+
* @throws {Error} - If the URL is not in the allowlist
|
|
11
|
+
*/
|
|
12
|
+
export function validateWebhookUrl(url) {
|
|
13
|
+
if (!url || typeof url !== 'string') {
|
|
14
|
+
throw new Error('Invalid URL: URL must be a non-empty string');
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
let parsedUrl;
|
|
18
|
+
try {
|
|
19
|
+
parsedUrl = new URL(url);
|
|
20
|
+
} catch (error) {
|
|
21
|
+
throw new Error(`Invalid URL format: ${error.message}`);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Only allow HTTP and HTTPS protocols
|
|
25
|
+
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
|
|
26
|
+
throw new Error(`Blocked protocol: Only HTTP and HTTPS are allowed, got ${parsedUrl.protocol}`);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Disallow embedded credentials in the URL
|
|
30
|
+
if (parsedUrl.username || parsedUrl.password) {
|
|
31
|
+
throw new Error('Blocked URL: Credentials in URL are not allowed');
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Basic path traversal guard (normalize and compare)
|
|
35
|
+
// Note: URL API already normalizes, but we defensively reject ".." segments
|
|
36
|
+
if (parsedUrl.pathname.split('/').some(seg => seg === '..')) {
|
|
37
|
+
throw new Error('Blocked URL: Path traversal (..) is not allowed');
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Get allowlist from environment variable
|
|
41
|
+
const allowlist = getAllowlist();
|
|
42
|
+
|
|
43
|
+
if (allowlist.length === 0) {
|
|
44
|
+
throw new Error('No webhook URLs are allowed. Set ALLOWED_WEBHOOK_URLS environment variable to enable webhooks.');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Exact match only: URL must exactly match one of the allowlisted URLs
|
|
48
|
+
const isAllowed = allowlist.some(allowedUrl => {
|
|
49
|
+
try {
|
|
50
|
+
const allowedParsedUrl = new URL(allowedUrl);
|
|
51
|
+
|
|
52
|
+
// Exact match of full URL (scheme, host, path, query)
|
|
53
|
+
return parsedUrl.href === allowedParsedUrl.href;
|
|
54
|
+
} catch (error) {
|
|
55
|
+
// If allowedUrl is not a valid URL, skip it
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
if (!isAllowed) {
|
|
61
|
+
throw new Error(`URL not in allowlist: ${url}. Allowed URLs: ${allowlist.join(', ')}`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Return the canonical, validated user URL (fragments removed)
|
|
65
|
+
parsedUrl.hash = "";
|
|
66
|
+
return parsedUrl.toString();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Validates that a remote file URL is hosted on GitHub or GitLab
|
|
71
|
+
* @param {string} url - The URL to validate
|
|
72
|
+
* @returns {string} - The canonical validated URL (string)
|
|
73
|
+
* @throws {Error} - If the URL is not GitHub/GitLab or invalid
|
|
74
|
+
*/
|
|
75
|
+
export function validateGithubOrGitlabUrl(url){
|
|
76
|
+
if (!url || typeof url !== 'string') {
|
|
77
|
+
throw new Error('Invalid URL: URL must be a non-empty string');
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
let parsedUrl;
|
|
81
|
+
try {
|
|
82
|
+
parsedUrl = new URL(url);
|
|
83
|
+
} catch (error) {
|
|
84
|
+
throw new Error(`Invalid URL format: ${error.message}`);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
|
|
88
|
+
throw new Error(`Blocked protocol: Only HTTP and HTTPS are allowed, got ${parsedUrl.protocol}`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (parsedUrl.username || parsedUrl.password) {
|
|
92
|
+
throw new Error('Blocked URL: Credentials in URL are not allowed');
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const hostname = parsedUrl.hostname.toLowerCase();
|
|
96
|
+
// Built-in allowed hosts
|
|
97
|
+
const builtinAllowedHosts = new Set([
|
|
98
|
+
'github.com',
|
|
99
|
+
'raw.githubusercontent.com',
|
|
100
|
+
'gist.github.com',
|
|
101
|
+
'gitlab.com',
|
|
102
|
+
]);
|
|
103
|
+
|
|
104
|
+
// Only allow custom remote hosts if REMOTE_FILE_ALLOW_CUSTOM_HOSTS is "true"
|
|
105
|
+
const allowCustomHosts = process.env.REMOTE_FILE_ALLOW_CUSTOM_HOSTS === "true";
|
|
106
|
+
// Env-driven custom domains support: REMOTE_FILE_ALLOWED_HOSTS (comma-separated)
|
|
107
|
+
// Supports exact hosts and wildcard entries like *.example.com
|
|
108
|
+
const { exactHosts, wildcardHosts } = allowCustomHosts ? getCustomRemoteHostsFromEnv() : { exactHosts: new Set(), wildcardHosts: [] };
|
|
109
|
+
if (!allowCustomHosts && (process.env.REMOTE_FILE_ALLOWED_HOSTS && process.env.REMOTE_FILE_ALLOWED_HOSTS.trim() !== "")) {
|
|
110
|
+
console.warn("Custom remote hosts in REMOTE_FILE_ALLOWED_HOSTS are disabled because REMOTE_FILE_ALLOW_CUSTOM_HOSTS is not set to 'true'. Only builtin hosts are used for remote file reads.");
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Check exact host allow
|
|
114
|
+
const isExactAllowed = builtinAllowedHosts.has(hostname) || exactHosts.has(hostname);
|
|
115
|
+
|
|
116
|
+
// Check wildcard hosts
|
|
117
|
+
const isWildcardAllowed = !isExactAllowed && wildcardHosts.some((pattern) => {
|
|
118
|
+
// pattern without leading '*.'
|
|
119
|
+
const base = pattern;
|
|
120
|
+
return hostname === base || hostname.endsWith('.' + base);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
if (!isExactAllowed && !isWildcardAllowed) {
|
|
124
|
+
const allAllowed = Array.from(builtinAllowedHosts);
|
|
125
|
+
if (allowCustomHosts) {
|
|
126
|
+
allAllowed.push(...Array.from(exactHosts));
|
|
127
|
+
allAllowed.push(...wildcardHosts.map(w => `*.${w}`));
|
|
128
|
+
}
|
|
129
|
+
throw new Error(`URL host not allowed: ${hostname}. Allowed: ${allAllowed.join(', ')}`);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
parsedUrl.hash = '';
|
|
133
|
+
return parsedUrl.toString();
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Reads custom allowed remote hosts from env var REMOTE_FILE_ALLOWED_HOSTS
|
|
138
|
+
* Returns exact hosts and wildcard base domains (without the leading '*.')
|
|
139
|
+
*/
|
|
140
|
+
function getCustomRemoteHostsFromEnv(){
|
|
141
|
+
const envVal = process.env.REMOTE_FILE_ALLOWED_HOSTS || '';
|
|
142
|
+
const parts = envVal
|
|
143
|
+
.split(',')
|
|
144
|
+
.map(v => v.trim())
|
|
145
|
+
.filter(Boolean);
|
|
146
|
+
|
|
147
|
+
const exactHosts = new Set();
|
|
148
|
+
const wildcardHosts = [];
|
|
149
|
+
|
|
150
|
+
for (const entry of parts) {
|
|
151
|
+
// Normalize entry by parsing as URL if it looks like one, otherwise treat as hostname
|
|
152
|
+
let hostCandidate = entry;
|
|
153
|
+
try {
|
|
154
|
+
if (entry.startsWith('http://') || entry.startsWith('https://')) {
|
|
155
|
+
hostCandidate = new URL(entry).hostname;
|
|
156
|
+
}
|
|
157
|
+
} catch (_) {
|
|
158
|
+
// ignore parse errors, treat as raw host
|
|
159
|
+
}
|
|
160
|
+
const lower = hostCandidate.toLowerCase();
|
|
161
|
+
if (lower.startsWith('*.')) {
|
|
162
|
+
const base = lower.slice(2);
|
|
163
|
+
if (base) {
|
|
164
|
+
wildcardHosts.push(base);
|
|
165
|
+
}
|
|
166
|
+
} else if (lower) {
|
|
167
|
+
exactHosts.add(lower);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return { exactHosts, wildcardHosts };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Gets the allowlist from environment variable
|
|
176
|
+
* @returns {string[]} - Array of allowed URLs
|
|
177
|
+
*/
|
|
178
|
+
function getAllowlist() {
|
|
179
|
+
const allowlistEnv = process.env.ALLOWED_WEBHOOK_URLS;
|
|
180
|
+
|
|
181
|
+
if (!allowlistEnv) {
|
|
182
|
+
return [];
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Split by comma and trim whitespace
|
|
186
|
+
return allowlistEnv
|
|
187
|
+
.split(',')
|
|
188
|
+
.map(url => url.trim())
|
|
189
|
+
.filter(url => url.length > 0);
|
|
190
|
+
}
|