docs-agent 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,130 @@
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
+ import { validateGithubOrGitlabUrl } from './UrlValidator.js';
4
+
5
+ // Default allowed directory - can be overridden via environment variable
6
+ const DEFAULT_ALLOWED_DIRECTORY = process.env.ALLOWED_FILE_DIRECTORY || process.cwd();
7
+
8
+ /**
9
+ * Safely resolve a file path to prevent directory traversal attacks
10
+ * @param {string} filePath - The file path to resolve
11
+ * @param {string} basePath - The base directory to resolve from
12
+ * @param {string} accessMode - The access mode ('api' or 'mcp')
13
+ * @returns {Promise<string>} - The safely resolved absolute path
14
+ * @throws {Error} - If the path is outside the allowed directory
15
+ */
16
+ async function safeResolvePath(filePath, basePath, accessMode = 'mcp') {
17
+ // For MCP mode, allow access to any file in the system (no restrictions)
18
+ if (accessMode === 'mcp') {
19
+ return path.resolve(filePath);
20
+ }
21
+
22
+ // For API mode, restrict to basePath only
23
+ // Normalize the path to remove any ".." segments
24
+ const normalizedPath = path.resolve(basePath, filePath);
25
+
26
+ // Check if the normalized path is within the base directory before resolving symlinks
27
+ if (!normalizedPath.startsWith(path.resolve(basePath))) {
28
+ throw new Error(`Path traversal detected: ${filePath} resolves outside allowed directory (${basePath})`);
29
+ }
30
+
31
+ // Try to resolve any symbolic links, but don't fail if file doesn't exist
32
+ try {
33
+ const realPath = await fs.realpath(normalizedPath);
34
+ // Double-check after resolving symlinks
35
+ if (!realPath.startsWith(path.resolve(basePath))) {
36
+ throw new Error(`Path traversal detected: ${filePath} resolves outside allowed directory (${basePath})`);
37
+ }
38
+ return realPath;
39
+ } catch (error) {
40
+ // If realpath fails (file doesn't exist), return the normalized path
41
+ // The path traversal check above already validated it's within bounds
42
+ return normalizedPath;
43
+ }
44
+ }
45
+
46
+ /**
47
+ * Read the content of the file
48
+ * @param {string} filePath - The path to the file, supports absolute paths, remote URLs and relative paths
49
+ * @param {string} accessMode - The access mode ('api' or 'mcp')
50
+ * @returns {Promise<string>} - The content of the file
51
+ */
52
+ async function readFile(filePath, accessMode = 'mcp'){
53
+ if(isRemoteFileUrl(filePath)){
54
+ const safeUrl = validateGithubOrGitlabUrl(filePath);
55
+ const response = await fetch(safeUrl);
56
+ return await response.text();
57
+ }
58
+
59
+ // Determine base path based on access mode
60
+ let basePath;
61
+ if (accessMode === 'api') {
62
+ basePath = path.join(process.cwd(), 'public');
63
+ } else {
64
+ // MCP mode - no restrictions, use current working directory as base
65
+ basePath = process.cwd();
66
+ }
67
+
68
+ // Validate paths to prevent unauthorized file access
69
+ filePath = await safeResolvePath(filePath, basePath, accessMode);
70
+
71
+ return await fs.readFile(filePath, 'utf8');
72
+ }
73
+
74
+ /**
75
+ * Write the content to the file
76
+ * @param {string} filePath - The path to the file, supports absolute paths, remote URLs and relative paths
77
+ * @param {string} content - The content to write to the file
78
+ * @param {string} accessMode - The access mode ('api' or 'mcp')
79
+ * @returns {Promise<void>} - The promise that resolves when the file is written
80
+ */
81
+ async function writeFile(filePath, content, accessMode = 'mcp'){
82
+ if(isRemoteFileUrl(filePath)){
83
+ throw new Error("Cannot write to remote file");
84
+ }
85
+
86
+ // Determine base path based on access mode
87
+ let basePath;
88
+ if (accessMode === 'api') {
89
+ basePath = path.join(process.cwd(), 'public');
90
+ } else {
91
+ // MCP mode - no restrictions, use current working directory as base
92
+ basePath = process.cwd();
93
+ }
94
+
95
+ // Validate paths to prevent unauthorized file access
96
+ filePath = await safeResolvePath(filePath, basePath, accessMode);
97
+
98
+ await fs.mkdir(path.dirname(filePath), { recursive: true }); // Create directory if it does not exist
99
+ return await fs.writeFile(filePath, content);
100
+ }
101
+
102
+ /**
103
+ * Check if the file path is absolute
104
+ * @param {string} filePath - The path to the file
105
+ * @returns {boolean} - True if the file path is absolute, false otherwise
106
+ */
107
+ function isAbsoluteFilePath(filePath){
108
+ return path.isAbsolute(filePath);
109
+ }
110
+
111
+ /**
112
+ * Check if the file path is a remote URL
113
+ * @param {string} url - The URL to check
114
+ * @returns {boolean} - True if the URL is a remote URL, false otherwise
115
+ */
116
+ function isRemoteFileUrl(url){
117
+ return url.startsWith("http") || url.startsWith("https");
118
+ }
119
+
120
+ /**
121
+ * Convert a relative file path to an absolute file path
122
+ * @param {string} filePath - The relative file path
123
+ * @param {string} basePath - The base path to resolve the relative file path from
124
+ * @returns {string} - The absolute file path
125
+ */
126
+ function toAbsoluteFilePath(filePath, basePath){
127
+ return path.resolve(basePath, filePath);
128
+ }
129
+
130
+ export { readFile, writeFile, isAbsoluteFilePath, isRemoteFileUrl, toAbsoluteFilePath, safeResolvePath };
@@ -0,0 +1,337 @@
1
+ /**
2
+ * GitHub API
3
+ */
4
+
5
+ import { Octokit } from "@octokit/core";
6
+ import { retry } from "@octokit/plugin-retry";
7
+ import { throttling } from "@octokit/plugin-throttling";
8
+
9
+ class GitHubApi {
10
+ constructor() {
11
+ this.MyOctokit = Octokit.plugin(retry, throttling);
12
+ this.cache = new Map();
13
+ }
14
+
15
+ async getRepoInfo(repository) {
16
+ const { owner, repo } = this._parseRepoNameAndOwner(repository);
17
+ if(!owner || !repo){
18
+ throw new Error(`Invalid repository string: ${repository}`);
19
+ }
20
+ // Check cache first
21
+ if (this.cache.has(`repo:${owner}/${repo}`)) {
22
+ console.log(`Cache hit for repository: ${owner}/${repo}`);
23
+ return this.cache.get(`repo:${owner}/${repo}`);
24
+ }
25
+ try {
26
+ const octokit = this._createOctokit();
27
+ const repoResponse = await octokit.request("GET /repos/{owner}/{repo}", {
28
+ owner,
29
+ repo
30
+ });
31
+ const repoInfo = repoResponse.data;
32
+ // Cache the repo info
33
+ this.cache.set(`repo:${owner}/${repo}`, repoInfo);
34
+ console.log(`Repository info: ${JSON.stringify(repoInfo, null, 2)}`);
35
+ return repoInfo;
36
+ } catch (error) {
37
+ console.error("Error getting repository info for "+repository+": "+error.message);
38
+ return { owner, repo };
39
+ }
40
+ }
41
+
42
+ /**
43
+ * Search for code in GitHub repositories
44
+ * Limitations:
45
+ * - Only default branch is searched
46
+ * - Symbol search is not supported by GitHub API
47
+ * @param {string} query - The search query e.g. "keyword" | "repo:owner/repo" (symbol search not supported)
48
+ * @param {object} context - Search context and options
49
+ * @param {string} context.repository - GitHub repository URL e.g. "https://github.com/owner/repo" or "owner/repo"
50
+ * @param {number} context.scoreThreshold - Minimum relevance score (default: 0, maximum: 1)
51
+ * @param {number} context.maxResults - Maximum number of results (default: 10)
52
+ * @param {boolean} context.cache - Whether to use caching (default: true)
53
+ * @returns {Promise<Array<Object>>} searchResults - Array of search results with search results containing filepath and metadata
54
+ * @returns {Promise<string>} searchResults[].repository - Full repository path, supports both remote or local repository paths
55
+ * @returns {Promise<string>} searchResults[].path - Relative path to the file from the repository root
56
+ * @returns {Promise<number>} searchResults[].score - Relevance score of the search result (0-1)
57
+ * @returns {Promise<string>} searchResults[].sha - SHA of the search result (if available e.g. dsf2w32.. or main)
58
+ * @returns {Promise<string>} searchResults[].snippet - Snippet of the search result (not available as of now)
59
+ * @example
60
+ * const searchResults = await githubApi.searchCode('function main', {
61
+ * repository: 'https://github.com/owner/repo',
62
+ * scoreThreshold: 0,
63
+ * maxResults: 10,
64
+ * cache: true
65
+ * });
66
+ * console.log(searchResults);
67
+ * // [
68
+ * // {
69
+ * // repository: 'https://github.com/owner/repo',
70
+ * // path: 'path/to/file',
71
+ * // score: 0.8,
72
+ * // snippet: 'function main() { ... }'
73
+ * // }
74
+ * // ]
75
+ */
76
+ async searchCode(query, context = {}) {
77
+ const {
78
+ repository,
79
+ githubToken = process.env.GITHUB_TOKEN,
80
+ scoreThreshold = 0,
81
+ maxResults = 10,
82
+ cache = true
83
+ } = context;
84
+
85
+ if(query?.includes('symbol:')) {
86
+ // Symbol search is not supported by GitHub API
87
+ // Remove the symbol prefix from the query
88
+ query = query.replace('symbol:', '');
89
+ console.log(`Symbol search is not supported by GitHub API, removing the symbol prefix from the query: "${query}"`);
90
+ }
91
+
92
+ let owner, repo;
93
+ if (repository) {
94
+ ({ owner, repo } = this._parseRepoNameAndOwner(repository));
95
+ if(owner && repo && !query.includes(`repo:`)) {
96
+ query = `${query} repo:${owner}/${repo}`;
97
+ }
98
+ }
99
+
100
+ let repoInfo;
101
+ if(repository){
102
+ repoInfo = await this.getRepoInfo(repository);
103
+ }
104
+
105
+ if(repoInfo?.fork){
106
+ query = `${query} fork:true`;
107
+ }
108
+
109
+ // Build cache key
110
+ const cacheKey = `search:${query}|${scoreThreshold}|${maxResults}`;
111
+
112
+ // Check cache first
113
+ if (cache && this.cache.has(cacheKey)) {
114
+ console.log(`Cache hit for query: "${query}"`);
115
+ return this.cache.get(cacheKey);
116
+ }
117
+
118
+ console.log(`Searching GitHub for: "${query}" (max ${maxResults} results, score threshold: ${scoreThreshold})`);
119
+
120
+ try {
121
+ const octokit = this._createOctokit(githubToken);
122
+
123
+ // Search for code
124
+ const searchResponse = await octokit.request("GET /search/code", {
125
+ q: query,
126
+ per_page: Math.min(maxResults, 30), // GitHub API limit is 100
127
+ sort: "best-match"
128
+ });
129
+
130
+ const searchResults = searchResponse?.data?.items || [];
131
+ console.log(`Found ${searchResults.length} initial search results`);
132
+
133
+ // Process results and fetch file contents
134
+ const results = searchResults?.map(item => ({
135
+ repository: item.repository.html_url,
136
+ path: item.path,
137
+ score: item.score,
138
+ snippet: item.text_matches?.[0]?.fragment,
139
+ sha: item.sha || null,
140
+ filepath: this._convertToAbsoluteFilePath(item.path, item.repository.html_url, repoInfo?.default_branch || "main")
141
+ }))
142
+
143
+ console.log(`Returning ${results.length} GitHub code search results`);
144
+
145
+ // Cache the results
146
+ if (cache) {
147
+ this.cache.set(cacheKey, results);
148
+ }
149
+
150
+ return results;
151
+
152
+ } catch (error) {
153
+ this._handleError(error, "GitHub search");
154
+ }
155
+ }
156
+
157
+ /**
158
+ * Get file content from a GitHub repository
159
+ * @param {string} repository - Repository GitHub URL e.g. "https://github.com/owner/repo" or "owner/repo"
160
+ * @param {string} path - File path in the repository
161
+ * @param {string} [sha] - Optional commit SHA (default: uses default branch)
162
+ * @param {object} [context={}] - Additional context
163
+ * @param {string} [context.githubToken=process.env.GITHUB_TOKEN] - GitHub personal access token
164
+ * @param {boolean} [context.cache=true] - Whether to use caching (default: true)
165
+ * @returns {Promise<string>} File content as string
166
+ */
167
+ async getFileContent(repository, path, sha = null, context = {}) {
168
+ const {
169
+ githubToken = process.env.GITHUB_TOKEN,
170
+ cache = true
171
+ } = context;
172
+
173
+ const { owner, repo } = this._parseRepoNameAndOwner(repository);
174
+
175
+ // Build cache key - don't include SHA in cache key since we're not using it reliably
176
+ const cacheKey = `file:${owner}/${repo}/${path}`;
177
+
178
+ // Check cache first
179
+ if (cache && this.cache.has(cacheKey)) {
180
+ console.log(`Cache hit for file: ${owner}/${repo}/${path}`);
181
+ return this.cache.get(cacheKey);
182
+ }
183
+
184
+ console.log(`Fetching file content: ${owner}/${repo}/${path}`);
185
+
186
+ try {
187
+ const octokit = this._createOctokit(githubToken);
188
+
189
+ // Always use default branch for file content - SHA from search results is unreliable
190
+ const requestParams = {
191
+ owner,
192
+ repo,
193
+ path
194
+ };
195
+
196
+ // Only use SHA if it's a valid branch name or tag, not a commit SHA
197
+ if (sha && !sha.match(/^[a-f0-9]{40}$/)) {
198
+ // This looks like a branch name or tag, not a commit SHA
199
+ requestParams.ref = sha;
200
+ }
201
+
202
+ const contentResponse = await octokit.request("GET /repos/{owner}/{repo}/contents/{path}", requestParams);
203
+
204
+ // Decode Base64 content
205
+ const content = Buffer.from(contentResponse?.data?.content, 'base64')?.toString('utf-8');
206
+
207
+ // Cache the content
208
+ if (cache) {
209
+ this.cache.set(cacheKey, content);
210
+ }
211
+
212
+ console.log(`Successfully fetched content for: ${owner}/${repo}/${path}`);
213
+ return content;
214
+
215
+ } catch (error) {
216
+ this._handleError(error, `File content fetch for ${owner}/${repo}/${path}`);
217
+ }
218
+ }
219
+
220
+ /**
221
+ * Clear the cache
222
+ */
223
+ clearCache() {
224
+ this.cache.clear();
225
+ console.log("GitHub API cache cleared");
226
+ }
227
+
228
+ /**
229
+ * Get cache statistics
230
+ * @returns {object} Cache statistics
231
+ */
232
+ getCacheStats() {
233
+ return {
234
+ size: this.cache.size,
235
+ keys: Array.from(this.cache.keys())
236
+ };
237
+ }
238
+
239
+ /**
240
+ * Convert search results to absolute file path
241
+ * @param {Array<Object>} searchResults - Array of search results
242
+ * @returns {Array<Object>} searchResults - Array of search results with absolute file path
243
+ */
244
+ _convertToAbsoluteFilePath(path, repository, sha="main") {
245
+ const { owner, repo } = this._parseRepoNameAndOwner(repository);
246
+ return `https://raw.githubusercontent.com/${owner}/${repo}/refs/heads/${sha}/${path}`;
247
+ }
248
+
249
+ /**
250
+ * Initialize Octokit instance with authentication and plugins
251
+ * @param {string} githubToken - GitHub personal access token
252
+ * @returns {Octokit} Configured Octokit instance
253
+ */
254
+ _createOctokit(githubToken = process.env.GITHUB_TOKEN) {
255
+ if (!githubToken) {
256
+ throw new Error("GitHub token is required. Set GITHUB_TOKEN environment variable or pass githubToken parameter.");
257
+ }
258
+
259
+ return new this.MyOctokit({
260
+ auth: githubToken,
261
+ userAgent: "docs-agent/1.0.0",
262
+ retry: {
263
+ maxRetries: 3,
264
+ doNotRetry: ["429"]
265
+ },
266
+ throttle: {
267
+ onRateLimit: (retryAfter, options, octo, retryCount) => {
268
+ console.log(`Rate limit hit, retry ${retryCount}/3 after ${retryAfter}s`);
269
+ return retryCount <= 3;
270
+ },
271
+ onSecondaryRateLimit: (retryAfter, options, octo, retryCount) => {
272
+ console.log(`Secondary rate limit hit, retry ${retryCount}/3 after ${retryAfter}s`);
273
+ return retryCount <= 3;
274
+ },
275
+ onAbuseLimit: (retryAfter, options, octo, retryCount) => {
276
+ console.log(`Abuse limit hit, retry ${retryCount}/3 after ${retryAfter}s`);
277
+ return retryCount <= 3;
278
+ },
279
+ },
280
+ });
281
+ }
282
+
283
+ /**
284
+ * Handle GitHub API errors with specific error messages
285
+ * @param {Error} error - The error to handle
286
+ * @param {string} operation - The operation that failed
287
+ * @throws {Error} Specific error message
288
+ */
289
+ _handleError(error, operation = "GitHub API operation") {
290
+ console.error(`${operation} failed:`, error.message);
291
+
292
+ if (error.status === 401) {
293
+ throw new Error("Invalid GitHub token. Please check your authentication.");
294
+ } else if (error.status === 403) {
295
+ throw new Error("GitHub API access denied. Check your token permissions.");
296
+ } else if (error.status === 429) {
297
+ throw new Error("GitHub rate limit exceeded. Please try again later.");
298
+ } else {
299
+ throw new Error(`${operation} failed: ${error.message}`);
300
+ }
301
+ }
302
+
303
+ /**
304
+ * Parses owner and repo from repository string or http url
305
+ * @param {string} repository - Repository string in format "owner/repo" or a GitHub URL
306
+ * @returns {Object} Object with owner and repo properties
307
+ * @example
308
+ * const { owner, repo } = githubApi._parseRepoNameAndOwner('owner/repo');
309
+ * console.log(owner); // 'owner'
310
+ * console.log(repo); // 'repo'
311
+ * const { owner, repo } = githubApi._parseRepoNameAndOwner('https://github.com/owner/repo');
312
+ * console.log(owner); // 'owner'
313
+ * console.log(repo); // 'repo'
314
+ */
315
+ _parseRepoNameAndOwner(repository) {
316
+ // Check if it's a URL
317
+ let owner, repo;
318
+ try {
319
+ const url = new URL(repository);
320
+ // Handles URLs like https://github.com/owner/repo or http://github.com/owner/repo
321
+ const parts = url?.pathname?.replace(/^\/+|\/+$/g, '')?.split('/');
322
+ owner = parts?.[0];
323
+ repo = parts?.[1];
324
+ } catch (e) {
325
+ // Not a valid URL, assume "owner/repo" shorthand
326
+ const parts = repository.split('/');
327
+ owner = parts?.[0];
328
+ repo = parts?.[1];
329
+ }
330
+ if (!owner || !repo) {
331
+ throw new Error(`Invalid repository string: ${repository}`);
332
+ }
333
+ return { owner, repo };
334
+ }
335
+ }
336
+
337
+ export default GitHubApi;