docs-agent 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/LLM.js ADDED
@@ -0,0 +1,463 @@
1
+ /**
2
+ * A common interface to interact with AI models
3
+ * @example
4
+ * const llm = new LLM({ aiService: "anthropic", model: "claude-3-5-sonnet-20240620", maxTokens: 2048, temperature: 0 });
5
+ * const response = await llm.chat([{ role: "user", content: "Hello, world!" }]);
6
+ * console.log(response);
7
+ */
8
+ import { createHash } from "node:crypto";
9
+ import { Tiktoken } from "js-tiktoken/lite";
10
+ import o200k_base from "js-tiktoken/ranks/o200k_base";
11
+
12
+ /** AI SDKs */
13
+ import { generateText, generateObject } from "ai";
14
+ import { google } from "@ai-sdk/google";
15
+ import { anthropic } from "@ai-sdk/anthropic";
16
+ import { openai } from "@ai-sdk/openai";
17
+
18
+ import { registerOTel } from '@vercel/otel';
19
+
20
+ registerOTel({
21
+ serviceName: "docs-agent"
22
+ });
23
+ /** End AI SDKs */
24
+
25
+ class LLM {
26
+ static DEFAULT_MODELS = {
27
+ anthropic: "claude-3-5-sonnet-20240620",
28
+ openai: "gpt-4o-mini",
29
+ gemini: "gemini-2.0-flash",
30
+ ollama: "llama3.1:8b"
31
+ }
32
+
33
+ constructor(options) {
34
+ this.aiService = options?.aiService || process.env.PREFERRED_AI_SERVICE || "anthropic"; // e.g. openai, anthropic, ollama, etc.
35
+ this.model = options?.model || process.env.PREFERRED_AI_MODEL || "claude-3-5-sonnet-20240620";
36
+ this.temperature = options?.temperature || process.env.AI_TEMPERATURE || 0;
37
+ this.maxTokens = options?.maxTokens || process.env.MAX_TOKENS || 2048;
38
+ this.timeout = options?.timeout || process.env.LLM_TIMEOUT || 60000;
39
+ this.cacheStore = options?.cacheStore || {};
40
+ this.maxInputTokens = options?.maxInputTokens || process.env.MAX_INPUT_TOKENS || 100000;
41
+ this.topP = options?.topP || process.env.AI_TOP_P || 0.95;
42
+ }
43
+
44
+ getApiUrl(aiService) {
45
+ let apiUrl = null;
46
+ if (aiService === 'openai') {
47
+ apiUrl = "https://api.openai.com/v1/chat/completions";
48
+ } else if (aiService === 'anthropic') {
49
+ apiUrl = "https://api.anthropic.com/v1/messages";
50
+ } else if (aiService === 'ollama') {
51
+ apiUrl = process.env.OLLAMA_API_URL || "http://localhost:11434/api/generate";
52
+ } else if (aiService === 'gemini') {
53
+ apiUrl = "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions";
54
+ } else {
55
+ throw new Error('Invalid AI service specified. Use "openai", "anthropic", "ollama", or "gemini".');
56
+ }
57
+ return apiUrl;
58
+ }
59
+
60
+ getApiKey(aiService) {
61
+ let apiKey = null;
62
+ if (aiService === 'openai') {
63
+ apiKey = process.env.OPENAI_API_KEY;
64
+ } else if (aiService === 'anthropic') {
65
+ apiKey = process.env.ANTHROPIC_API_KEY;
66
+ } else if (aiService === 'ollama') {
67
+ apiKey = process.env.OLLAMA_API_KEY;
68
+ } else if (aiService === 'gemini') {
69
+ apiKey = process.env.GEMINI_API_KEY;
70
+ } else {
71
+ throw new Error('Invalid AI service specified. Use "openai", "anthropic", "ollama", or "gemini".');
72
+ }
73
+ return apiKey;
74
+ }
75
+
76
+ /**
77
+ * Chat with the LLM
78
+ * @param {Array} conversationHistory - The conversation history
79
+ * @param {Object} llmOptions - The LLM options
80
+ * @returns {Promise<string>} - The response from the LLM
81
+ */
82
+ async chatWithoutTracing(conversationHistory, llmOptions = {}) {
83
+ let requestBody, headers;
84
+ let aiService = llmOptions?.aiService || this.aiService;
85
+ let model = llmOptions?.model || this.model;
86
+ const apiUrl = this.getApiUrl(aiService);
87
+ const apiKey = this.getApiKey(aiService);
88
+ const maxInputTokens = llmOptions?.maxInputTokens || this.maxInputTokens;
89
+ const estimatedTokens = this.estimateTokens(conversationHistory?.map(message => message?.content)?.join("\n"));
90
+ console.log("Estimated LLM input tokens:", estimatedTokens);
91
+ if(estimatedTokens > maxInputTokens){
92
+ throw new Error("Input tokens exceed the maximum limit of " + maxInputTokens);
93
+ }
94
+ requestBody = {
95
+ model: model
96
+ };
97
+ if(requestBody.model?.startsWith("o")){
98
+ // Reasoning model parameters
99
+ requestBody.max_completion_tokens = Number(llmOptions?.maxCompletionTokens || this.maxCompletionTokens || llmOptions?.maxTokens || this.maxTokens);
100
+ requestBody.reasoning_effort = llmOptions?.reasoningEffort || this.reasoningEffort || "medium";
101
+ } else {
102
+ requestBody.max_tokens = Number(llmOptions?.maxTokens || this.maxTokens);
103
+ requestBody.temperature = Number(llmOptions?.temperature || this.temperature);
104
+ requestBody.top_p = Number(llmOptions?.topP || this.topP || 0.95);
105
+ }
106
+ if (aiService === 'anthropic') {
107
+ if(!apiKey) throw new Error("Anthropic API key is not set");
108
+ const { system, messages } = this.formatMessageForAnthropic(conversationHistory);
109
+ if(system) requestBody.system = system;
110
+ requestBody.messages = messages;
111
+ headers = {
112
+ 'Content-Type': 'application/json',
113
+ 'x-api-key': `${apiKey}`,
114
+ 'anthropic-version': '2023-06-01',
115
+ };
116
+ } else if (aiService === 'openai' || aiService === 'ollama' || aiService === 'gemini') {
117
+ requestBody.messages = conversationHistory;
118
+ headers = {
119
+ 'Content-Type': 'application/json',
120
+ 'Authorization': `Bearer ${apiKey}`,
121
+ };
122
+ } else {
123
+ throw new Error('Invalid provider specified. Use "anthropic" or "openai" or "gemini" or "ollama".');
124
+ }
125
+ if(llmOptions?.tools){
126
+ requestBody.tools = llmOptions.tools;
127
+ }
128
+ if(llmOptions?.response_format){
129
+ requestBody.response_format = llmOptions.response_format;
130
+ }
131
+ try {
132
+ const { data, statusCode } = await this.requestWrapper(apiUrl, requestBody, headers);
133
+ /**
134
+ * OpenAI chat completion response
135
+ * {
136
+ "id": "chatcmpl-123456",
137
+ "object": "chat.completion",
138
+ "created": 1728933352,
139
+ "model": "gpt-4o-2024-08-06",
140
+ "choices": [
141
+ {
142
+ "index": 0,
143
+ "message": {
144
+ "role": "assistant",
145
+ "content": "Hi there! How can I assist you today?",
146
+ "refusal": null
147
+ },
148
+ "logprobs": null,
149
+ "finish_reason": "stop"
150
+ }
151
+ ],
152
+ "usage": {
153
+ "prompt_tokens": 19,
154
+ "completion_tokens": 10,
155
+ "total_tokens": 29,
156
+ "prompt_tokens_details": {
157
+ "cached_tokens": 0
158
+ },
159
+ "completion_tokens_details": {
160
+ "reasoning_tokens": 0,
161
+ "accepted_prediction_tokens": 0,
162
+ "rejected_prediction_tokens": 0
163
+ }
164
+ },
165
+ "system_fingerprint": "fp_6b68a8204b"
166
+ */
167
+ console.log("LLM chat status code:", statusCode, data?.error?.message);
168
+ if([429, 529].includes(statusCode)){
169
+ return await this.retryChatWithAlternateService(conversationHistory, llmOptions);
170
+ }
171
+ if(data?.error || (Array.isArray(data) && data[0]?.error)){
172
+ throw new Error(data?.error?.message || data[0]?.error?.message);
173
+ }
174
+ if(statusCode !== 200){
175
+ //TODO: Handle other status codes
176
+ }
177
+ let content = null;
178
+ switch(aiService){
179
+ case 'anthropic':
180
+ content = this.parseAnthropicChatCompletion(data, llmOptions?.tools);
181
+ break;
182
+ case 'openai':
183
+ content = this.parseOpenAIChatCompletion(data, llmOptions?.tools);
184
+ break;
185
+ case 'gemini':
186
+ content = this.parseGeminiChatCompletion(data, llmOptions?.tools);
187
+ break;
188
+ case 'ollama':
189
+ content = this.parseOllamaChatCompletion(data, llmOptions?.tools);
190
+ break;
191
+ }
192
+ return content;
193
+ } catch (error) {
194
+ console.error(`Error calling ${aiService} API:`, error);
195
+ return await this.parseError(null, error)
196
+ }
197
+ }
198
+
199
+ /**
200
+ * Chat with the LLM, with telemetry support by default
201
+ * @param {Array<Object>} conversationHistory - The conversation history
202
+ * @param {Object} llmOptions - The LLM options
203
+ * @param {Object} telemetryOptions - The telemetry options, if not provided, the default telemetry options will be used
204
+ * @param {Object} telemetryOptions.isEnabled - Whether to enable telemetry
205
+ * @param {string} telemetryOptions.functionId - The function ID to be used for telemetry
206
+ * @param {Object} telemetryOptions.metadata - Additional metadata for telemetry
207
+ * @returns {Promise<string>} - The response from the LLM
208
+ */
209
+ async chat(conversationHistory, llmOptions = {}, telemetryOptions){
210
+ let aiService = llmOptions?.aiService || this.aiService;
211
+ let model = llmOptions?.model || this.model;
212
+ let modelProvider = this.getAIModelProvider(aiService, model);
213
+ let advancedModelConfig = {};
214
+ if(!modelProvider?.modelId?.startsWith("o")){
215
+ advancedModelConfig = {
216
+ maxTokens: Number(llmOptions?.maxTokens || this.maxTokens),
217
+ temperature: Number(llmOptions?.temperature || this.temperature),
218
+ topP: Number(llmOptions?.topP || this.topP || 0.95)
219
+ }
220
+ }
221
+ const aiOptions = {
222
+ model: modelProvider,
223
+ messages: conversationHistory,
224
+ experimental_telemetry: telemetryOptions || {
225
+ isEnabled: true,
226
+ functionId: "docs-agent"
227
+ },
228
+ ...advancedModelConfig
229
+ }
230
+ if(llmOptions?.responseFormat && llmOptions?.responseFormat === "json"){
231
+ aiOptions.output = "object";
232
+ aiOptions.schema = llmOptions?.schema;
233
+ const { object } = await generateObject(aiOptions);
234
+ return object;
235
+ }
236
+ const { text, usage } = await generateText(aiOptions);
237
+ if (typeof text !== 'string') {
238
+ console.error('generateText did not return a string:', text);
239
+ }
240
+ console.log('Usage:', usage);
241
+ if(text && llmOptions?.responseFormat && llmOptions?.responseFormat === "json"){
242
+ return JSON.parse(text);
243
+ }
244
+ return text;
245
+ }
246
+
247
+ getAIModelProvider(aiService, model){
248
+ let modelProvider;
249
+ if(aiService === "anthropic"){
250
+ modelProvider = anthropic(model);
251
+ } else if(aiService === "openai"){
252
+ modelProvider = openai(model);
253
+ } else if(aiService === "gemini"){
254
+ modelProvider = google(model);
255
+ } else {
256
+ throw new Error("Invalid AI service specified. Use 'anthropic', 'openai', or 'gemini'.");
257
+ }
258
+ return modelProvider;
259
+ }
260
+
261
+ async retryChatWithAlternateService(conversationHistory, llmOptions = {}){
262
+ console.log("LLM out of service:", llmOptions.aiService || this.aiService);
263
+ this.llmOutOfService = this.llmOutOfService || [];
264
+ this.llmOutOfService.push(llmOptions.aiService || this.aiService);
265
+ for(let aiService in LLM.DEFAULT_MODELS){
266
+ if(!this.llmOutOfService.includes(aiService)){
267
+ console.log("Switching LLM service to:", aiService, LLM.DEFAULT_MODELS[aiService]);
268
+ let newLLMOptions = Object.assign(llmOptions, {
269
+ aiService: aiService,
270
+ model: LLM.DEFAULT_MODELS[aiService]
271
+ });
272
+ return this.chatWithoutTracing(conversationHistory, newLLMOptions);
273
+ }
274
+ }
275
+ throw new Error("No alternative model found");
276
+ }
277
+
278
+ /**
279
+ * A wrapper around fetch to cache and rate limit requests, with timeout support
280
+ * @param {string} apiUrl
281
+ * @param {Object} requestBody
282
+ * @param {Object} headers
283
+ * @param {number} retryCount - Number of retries attempted (internal use)
284
+ * @param {number} maxRetries - Maximum number of retries allowed
285
+ * @returns {Promise<Response>}
286
+ */
287
+ async requestWrapper(apiUrl, requestBody, headers, retryCount = 0, maxRetries = 3) {
288
+ const startTime = Date.now();
289
+ const controller = new AbortController();
290
+ const timeoutMs = Number(this.timeout || 30000);
291
+ const timeoutController = setTimeout(() => controller.abort(), timeoutMs);
292
+ try {
293
+ const cacheKey = this.cache().createUniqueHashKey(apiUrl, requestBody, headers);
294
+ const cachedResponse = this.cache().get(cacheKey);
295
+ if (cachedResponse) {
296
+ return cachedResponse;
297
+ }
298
+
299
+ const response = await fetch(apiUrl, {
300
+ method: 'POST',
301
+ headers: headers,
302
+ body: JSON.stringify(requestBody),
303
+ signal: controller.signal
304
+ });
305
+
306
+ const data = await response.json();
307
+
308
+ // If we get a 429 or 529 error and haven't exceeded max retries, attempt retry with exponential backoff
309
+ if ([429, 529].includes(response.status) && retryCount < maxRetries) {
310
+ // Calculate base delay with exponential backoff (starting at 2s)
311
+ const baseDelay = Math.min(2000 * Math.pow(2, retryCount+2), 60000); // Cap at 60 seconds
312
+
313
+ // Add random jitter between 0.5x and 1.5x
314
+ const jitter = 0.5 + Math.random();
315
+ const backoffDelay = Math.round(baseDelay * jitter);
316
+
317
+ console.log(`API ${response.status === 429 ? 'rate limited' : 'overloaded'}. Retrying in ${backoffDelay/1000} seconds... (Attempt ${retryCount + 1}/${maxRetries})`);
318
+
319
+ await new Promise(resolve => setTimeout(resolve, backoffDelay));
320
+ return this.requestWrapper(apiUrl, requestBody, headers, retryCount + 1, maxRetries);
321
+ }
322
+
323
+ this.cache().set(cacheKey, response);
324
+ const totalTime = Date.now() - startTime; // Calculate total time taken
325
+ console.log(`Request to ${apiUrl} completed in ${totalTime} ms`); // Log the total time
326
+ return { data, statusCode: response.status };
327
+ } catch (error) {
328
+ const totalTime = Date.now() - startTime; // Calculate total time taken
329
+ console.log(`Request to ${apiUrl} failed in ${totalTime} ms`);
330
+ console.log("Timeout was set to", this.timeout + "ms");
331
+ if (error.name === 'AbortError') {
332
+ console.error(`Request to ${apiUrl} timed out`);
333
+ }
334
+ console.error(`Error in request to ${apiUrl}:`, error);
335
+ throw error;
336
+ } finally {
337
+ clearTimeout(timeoutController);
338
+ }
339
+ }
340
+
341
+ /**
342
+ * Parse errors from various LLM APIs to create uniform error communication
343
+ * @param {*} error
344
+ * @reference https://platform.openai.com/docs/guides/error-codes/api-error-codes
345
+ * @reference https://docs.anthropic.com/en/api/errors
346
+ */
347
+ parseError(statusCode, error){
348
+ switch(statusCode){
349
+ case 400:
350
+ console.error("Bad request");
351
+ throw new Error(error?.message || "Bad request");
352
+ case 401:
353
+ console.error("Invalid API Key");
354
+ throw new Error(error?.message || "Invalid API Key");
355
+ case 403:
356
+ throw new Error(error?.message || "You are not authorized to access this resource");
357
+ case 400:
358
+ throw new Error(error?.message || "Bad request");
359
+ case 429:
360
+ throw new Error(error?.message || "Rate limit exceeded");
361
+ case 404:
362
+ throw new Error(error?.message || "Not found");
363
+ case 500:
364
+ throw new Error(error?.message || "Internal server error");
365
+ case 503:
366
+ throw new Error(error?.message || "Service unavailable");
367
+ case 529:
368
+ throw new Error(error?.message || "API temporarily overloaded");
369
+ default:
370
+ throw new Error(error?.message || "Unknown error");
371
+ }
372
+ }
373
+
374
+ /**
375
+ * Converts the messages array to the format required by Anthropic
376
+ * @param {Array} messages
377
+ * @returns {Object}
378
+ * @example
379
+ * const { system, messages } = AI.formatMessageForAnthropic(originalMessages);
380
+ * // originalMessages: [{ role: "system", content: "You are a helpful assistant." }, { role: "user", content: "Hello, world!" }]
381
+ * // system: { role: "system", content: "You are a helpful assistant." }
382
+ * // messages: [{ role: "user", content: "Hello, world!" }]
383
+ */
384
+ formatMessageForAnthropic(messages){
385
+ let system;
386
+ let messagesWithoutSystemMessage = [];
387
+ for(let i = 0; i < messages.length; i++){
388
+ if(messages[i].role === "system" && messages[i].content){
389
+ system = messages[i].content;
390
+ }else{
391
+ messagesWithoutSystemMessage.push(messages[i]);
392
+ }
393
+ }
394
+ return { system, messages: messagesWithoutSystemMessage };
395
+ }
396
+
397
+ /**
398
+ *
399
+ * @param {*} data
400
+ * @returns
401
+ */
402
+ parseOpenAIChatCompletion(data, tools){
403
+ if(tools){
404
+ return { content: data?.choices?.[0]?.message?.content, toolCalls: data?.choices?.[0]?.message?.tool_calls };
405
+ }
406
+ return data?.choices?.[0]?.message?.content;
407
+ }
408
+
409
+ /**
410
+ *
411
+ * @param {*} data
412
+ * @returns
413
+ */
414
+ parseAnthropicChatCompletion(data, tools){
415
+ return data?.content[0]?.text;
416
+ }
417
+
418
+ /**
419
+ *
420
+ * @param {*} data
421
+ * @returns
422
+ */
423
+ parseOllamaChatCompletion(data, tools){
424
+ return data.response;
425
+ }
426
+
427
+ parseGeminiChatCompletion(data, tools){
428
+ // Assuming we're calling OpenAI compatible endpoint https://ai.google.dev/gemini-api/docs/openai
429
+ return data.choices[0].message.content;
430
+ }
431
+
432
+ cache(){
433
+ return {
434
+ get: (key) => {
435
+ return this.cacheStore[key] || null;
436
+ },
437
+ set: (key, value) => {
438
+ this.cacheStore[key] = value;
439
+ },
440
+ clear: () => {
441
+ this.cacheStore = {};
442
+ },
443
+ createUniqueHashKey(apiUrl, requestBody, headers){
444
+ const hash = createHash('sha256');
445
+ hash.update(apiUrl);
446
+ if(requestBody) hash.update(JSON.stringify(requestBody));
447
+ if(headers) hash.update(JSON.stringify(headers));
448
+ return hash.digest('hex');
449
+ }
450
+ }
451
+ }
452
+
453
+ /**
454
+ * Estimate the number of tokens in a text
455
+ * @param {string} text
456
+ * @returns {number}
457
+ */
458
+ estimateTokens(text){
459
+ const enc = new Tiktoken(o200k_base);
460
+ return enc.encode(text).length;
461
+ }
462
+ }
463
+ export default LLM;
@@ -0,0 +1,190 @@
1
+ /**
2
+ * URL validation utility for SSRF protection using allowlist approach
3
+ * Only allows URLs that are explicitly whitelisted via environment variable
4
+ */
5
+
6
+ /**
7
+ * Validates a webhook URL against the allowlist
8
+ * @param {string} url - The URL to validate
9
+ * @returns {string} - The validated URL
10
+ * @throws {Error} - If the URL is not in the allowlist
11
+ */
12
+ export function validateWebhookUrl(url) {
13
+ if (!url || typeof url !== 'string') {
14
+ throw new Error('Invalid URL: URL must be a non-empty string');
15
+ }
16
+
17
+ let parsedUrl;
18
+ try {
19
+ parsedUrl = new URL(url);
20
+ } catch (error) {
21
+ throw new Error(`Invalid URL format: ${error.message}`);
22
+ }
23
+
24
+ // Only allow HTTP and HTTPS protocols
25
+ if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
26
+ throw new Error(`Blocked protocol: Only HTTP and HTTPS are allowed, got ${parsedUrl.protocol}`);
27
+ }
28
+
29
+ // Disallow embedded credentials in the URL
30
+ if (parsedUrl.username || parsedUrl.password) {
31
+ throw new Error('Blocked URL: Credentials in URL are not allowed');
32
+ }
33
+
34
+ // Basic path traversal guard (normalize and compare)
35
+ // Note: URL API already normalizes, but we defensively reject ".." segments
36
+ if (parsedUrl.pathname.split('/').some(seg => seg === '..')) {
37
+ throw new Error('Blocked URL: Path traversal (..) is not allowed');
38
+ }
39
+
40
+ // Get allowlist from environment variable
41
+ const allowlist = getAllowlist();
42
+
43
+ if (allowlist.length === 0) {
44
+ throw new Error('No webhook URLs are allowed. Set ALLOWED_WEBHOOK_URLS environment variable to enable webhooks.');
45
+ }
46
+
47
+ // Exact match only: URL must exactly match one of the allowlisted URLs
48
+ const isAllowed = allowlist.some(allowedUrl => {
49
+ try {
50
+ const allowedParsedUrl = new URL(allowedUrl);
51
+
52
+ // Exact match of full URL (scheme, host, path, query)
53
+ return parsedUrl.href === allowedParsedUrl.href;
54
+ } catch (error) {
55
+ // If allowedUrl is not a valid URL, skip it
56
+ return false;
57
+ }
58
+ });
59
+
60
+ if (!isAllowed) {
61
+ throw new Error(`URL not in allowlist: ${url}. Allowed URLs: ${allowlist.join(', ')}`);
62
+ }
63
+
64
+ // Return the canonical, validated user URL (fragments removed)
65
+ parsedUrl.hash = "";
66
+ return parsedUrl.toString();
67
+ }
68
+
69
+ /**
70
+ * Validates that a remote file URL is hosted on GitHub or GitLab
71
+ * @param {string} url - The URL to validate
72
+ * @returns {string} - The canonical validated URL (string)
73
+ * @throws {Error} - If the URL is not GitHub/GitLab or invalid
74
+ */
75
+ export function validateGithubOrGitlabUrl(url){
76
+ if (!url || typeof url !== 'string') {
77
+ throw new Error('Invalid URL: URL must be a non-empty string');
78
+ }
79
+
80
+ let parsedUrl;
81
+ try {
82
+ parsedUrl = new URL(url);
83
+ } catch (error) {
84
+ throw new Error(`Invalid URL format: ${error.message}`);
85
+ }
86
+
87
+ if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
88
+ throw new Error(`Blocked protocol: Only HTTP and HTTPS are allowed, got ${parsedUrl.protocol}`);
89
+ }
90
+
91
+ if (parsedUrl.username || parsedUrl.password) {
92
+ throw new Error('Blocked URL: Credentials in URL are not allowed');
93
+ }
94
+
95
+ const hostname = parsedUrl.hostname.toLowerCase();
96
+ // Built-in allowed hosts
97
+ const builtinAllowedHosts = new Set([
98
+ 'github.com',
99
+ 'raw.githubusercontent.com',
100
+ 'gist.github.com',
101
+ 'gitlab.com',
102
+ ]);
103
+
104
+ // Only allow custom remote hosts if REMOTE_FILE_ALLOW_CUSTOM_HOSTS is "true"
105
+ const allowCustomHosts = process.env.REMOTE_FILE_ALLOW_CUSTOM_HOSTS === "true";
106
+ // Env-driven custom domains support: REMOTE_FILE_ALLOWED_HOSTS (comma-separated)
107
+ // Supports exact hosts and wildcard entries like *.example.com
108
+ const { exactHosts, wildcardHosts } = allowCustomHosts ? getCustomRemoteHostsFromEnv() : { exactHosts: new Set(), wildcardHosts: [] };
109
+ if (!allowCustomHosts && (process.env.REMOTE_FILE_ALLOWED_HOSTS && process.env.REMOTE_FILE_ALLOWED_HOSTS.trim() !== "")) {
110
+ console.warn("Custom remote hosts in REMOTE_FILE_ALLOWED_HOSTS are disabled because REMOTE_FILE_ALLOW_CUSTOM_HOSTS is not set to 'true'. Only builtin hosts are used for remote file reads.");
111
+ }
112
+
113
+ // Check exact host allow
114
+ const isExactAllowed = builtinAllowedHosts.has(hostname) || exactHosts.has(hostname);
115
+
116
+ // Check wildcard hosts
117
+ const isWildcardAllowed = !isExactAllowed && wildcardHosts.some((pattern) => {
118
+ // pattern without leading '*.'
119
+ const base = pattern;
120
+ return hostname === base || hostname.endsWith('.' + base);
121
+ });
122
+
123
+ if (!isExactAllowed && !isWildcardAllowed) {
124
+ const allAllowed = Array.from(builtinAllowedHosts);
125
+ if (allowCustomHosts) {
126
+ allAllowed.push(...Array.from(exactHosts));
127
+ allAllowed.push(...wildcardHosts.map(w => `*.${w}`));
128
+ }
129
+ throw new Error(`URL host not allowed: ${hostname}. Allowed: ${allAllowed.join(', ')}`);
130
+ }
131
+
132
+ parsedUrl.hash = '';
133
+ return parsedUrl.toString();
134
+ }
135
+
136
+ /**
137
+ * Reads custom allowed remote hosts from env var REMOTE_FILE_ALLOWED_HOSTS
138
+ * Returns exact hosts and wildcard base domains (without the leading '*.')
139
+ */
140
+ function getCustomRemoteHostsFromEnv(){
141
+ const envVal = process.env.REMOTE_FILE_ALLOWED_HOSTS || '';
142
+ const parts = envVal
143
+ .split(',')
144
+ .map(v => v.trim())
145
+ .filter(Boolean);
146
+
147
+ const exactHosts = new Set();
148
+ const wildcardHosts = [];
149
+
150
+ for (const entry of parts) {
151
+ // Normalize entry by parsing as URL if it looks like one, otherwise treat as hostname
152
+ let hostCandidate = entry;
153
+ try {
154
+ if (entry.startsWith('http://') || entry.startsWith('https://')) {
155
+ hostCandidate = new URL(entry).hostname;
156
+ }
157
+ } catch (_) {
158
+ // ignore parse errors, treat as raw host
159
+ }
160
+ const lower = hostCandidate.toLowerCase();
161
+ if (lower.startsWith('*.')) {
162
+ const base = lower.slice(2);
163
+ if (base) {
164
+ wildcardHosts.push(base);
165
+ }
166
+ } else if (lower) {
167
+ exactHosts.add(lower);
168
+ }
169
+ }
170
+
171
+ return { exactHosts, wildcardHosts };
172
+ }
173
+
174
+ /**
175
+ * Gets the allowlist from environment variable
176
+ * @returns {string[]} - Array of allowed URLs
177
+ */
178
+ function getAllowlist() {
179
+ const allowlistEnv = process.env.ALLOWED_WEBHOOK_URLS;
180
+
181
+ if (!allowlistEnv) {
182
+ return [];
183
+ }
184
+
185
+ // Split by comma and trim whitespace
186
+ return allowlistEnv
187
+ .split(',')
188
+ .map(url => url.trim())
189
+ .filter(url => url.length > 0);
190
+ }