agentic-knowledge-mcp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -0
- package/README.md +530 -0
- package/package.json +94 -0
- package/packages/cli/dist/cli.d.ts +5 -0
- package/packages/cli/dist/cli.js +21 -0
- package/packages/cli/dist/commands/create.d.ts +5 -0
- package/packages/cli/dist/commands/create.js +90 -0
- package/packages/cli/dist/commands/init.d.ts +5 -0
- package/packages/cli/dist/commands/init.js +182 -0
- package/packages/cli/dist/commands/refresh.d.ts +5 -0
- package/packages/cli/dist/commands/refresh.js +322 -0
- package/packages/cli/dist/commands/status.d.ts +5 -0
- package/packages/cli/dist/commands/status.js +268 -0
- package/packages/cli/dist/index.d.ts +6 -0
- package/packages/cli/dist/index.js +6 -0
- package/packages/cli/package.json +57 -0
- package/packages/content-loader/dist/__tests__/debug-filtering.d.ts +1 -0
- package/packages/content-loader/dist/__tests__/debug-filtering.js +17 -0
- package/packages/content-loader/dist/__tests__/test-filtering.d.ts +1 -0
- package/packages/content-loader/dist/__tests__/test-filtering.js +19 -0
- package/packages/content-loader/dist/content/api-documentation-loader.d.ts +26 -0
- package/packages/content-loader/dist/content/api-documentation-loader.js +45 -0
- package/packages/content-loader/dist/content/content-processor.d.ts +44 -0
- package/packages/content-loader/dist/content/content-processor.js +86 -0
- package/packages/content-loader/dist/content/documentation-site-loader.d.ts +26 -0
- package/packages/content-loader/dist/content/documentation-site-loader.js +45 -0
- package/packages/content-loader/dist/content/git-repo-loader.d.ts +79 -0
- package/packages/content-loader/dist/content/git-repo-loader.js +368 -0
- package/packages/content-loader/dist/content/index.d.ts +9 -0
- package/packages/content-loader/dist/content/index.js +9 -0
- package/packages/content-loader/dist/content/loader.d.ts +47 -0
- package/packages/content-loader/dist/content/loader.js +8 -0
- package/packages/content-loader/dist/content/metadata-manager.d.ts +65 -0
- package/packages/content-loader/dist/content/metadata-manager.js +160 -0
- package/packages/content-loader/dist/index.d.ts +5 -0
- package/packages/content-loader/dist/index.js +5 -0
- package/packages/content-loader/dist/types.d.ts +127 -0
- package/packages/content-loader/dist/types.js +48 -0
- package/packages/content-loader/package.json +50 -0
- package/packages/core/dist/config/discovery.d.ts +15 -0
- package/packages/core/dist/config/discovery.js +65 -0
- package/packages/core/dist/config/loader.d.ts +22 -0
- package/packages/core/dist/config/loader.js +236 -0
- package/packages/core/dist/config/manager.d.ts +55 -0
- package/packages/core/dist/config/manager.js +180 -0
- package/packages/core/dist/content/api-documentation-loader.d.ts +26 -0
- package/packages/core/dist/content/api-documentation-loader.js +45 -0
- package/packages/core/dist/content/content-processor.d.ts +44 -0
- package/packages/core/dist/content/content-processor.js +81 -0
- package/packages/core/dist/content/documentation-site-loader.d.ts +26 -0
- package/packages/core/dist/content/documentation-site-loader.js +45 -0
- package/packages/core/dist/content/git-repo-loader.d.ts +54 -0
- package/packages/core/dist/content/git-repo-loader.js +264 -0
- package/packages/core/dist/content/index.d.ts +9 -0
- package/packages/core/dist/content/index.js +9 -0
- package/packages/core/dist/content/loader.d.ts +50 -0
- package/packages/core/dist/content/loader.js +7 -0
- package/packages/core/dist/content/metadata-manager.d.ts +65 -0
- package/packages/core/dist/content/metadata-manager.js +160 -0
- package/packages/core/dist/index.d.ts +12 -0
- package/packages/core/dist/index.js +30 -0
- package/packages/core/dist/paths/calculator.d.ts +46 -0
- package/packages/core/dist/paths/calculator.js +166 -0
- package/packages/core/dist/templates/processor.d.ts +40 -0
- package/packages/core/dist/templates/processor.js +111 -0
- package/packages/core/dist/types.d.ts +129 -0
- package/packages/core/dist/types.js +79 -0
- package/packages/core/package.json +50 -0
- package/packages/mcp-server/dist/bin.d.ts +5 -0
- package/packages/mcp-server/dist/bin.js +10 -0
- package/packages/mcp-server/dist/cli.d.ts +7 -0
- package/packages/mcp-server/dist/cli.js +17 -0
- package/packages/mcp-server/dist/index.d.ts +8 -0
- package/packages/mcp-server/dist/index.js +9 -0
- package/packages/mcp-server/dist/server.d.ts +35 -0
- package/packages/mcp-server/dist/server.js +244 -0
- package/packages/mcp-server/package.json +54 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Git repository content loader
|
|
3
|
+
*/
|
|
4
|
+
import { ContentLoader, type LoadResult } from "./loader.js";
|
|
5
|
+
import { WebSourceConfig } from "../types.js";
|
|
6
|
+
/**
|
|
7
|
+
* Content loader for Git repositories (GitHub, GitLab, any Git repo)
|
|
8
|
+
*/
|
|
9
|
+
export declare class GitRepoLoader extends ContentLoader {
|
|
10
|
+
/**
|
|
11
|
+
* Check if this loader can handle the given web source type
|
|
12
|
+
*/
|
|
13
|
+
canHandle(webSource: WebSourceConfig): boolean;
|
|
14
|
+
/**
|
|
15
|
+
* Validate the web source configuration
|
|
16
|
+
*/
|
|
17
|
+
validateConfig(webSource: WebSourceConfig): true | string;
|
|
18
|
+
/**
|
|
19
|
+
* Load content from a Git repository
|
|
20
|
+
*/
|
|
21
|
+
load(webSource: WebSourceConfig, targetPath: string): Promise<LoadResult>;
|
|
22
|
+
/**
|
|
23
|
+
* Get content identifier for change detection
|
|
24
|
+
*/
|
|
25
|
+
getContentId(webSource: WebSourceConfig): Promise<string>;
|
|
26
|
+
/**
|
|
27
|
+
* Validate if URL is a valid Git repository URL
|
|
28
|
+
*/
|
|
29
|
+
private isValidGitUrl;
|
|
30
|
+
/**
|
|
31
|
+
* Create a temporary directory for cloning
|
|
32
|
+
*/
|
|
33
|
+
private createTempDirectory;
|
|
34
|
+
/**
|
|
35
|
+
* Clone the Git repository
|
|
36
|
+
*/
|
|
37
|
+
private cloneRepository;
|
|
38
|
+
/**
|
|
39
|
+
* Extract content from cloned repository to target directory
|
|
40
|
+
*/
|
|
41
|
+
private extractContent;
|
|
42
|
+
/**
|
|
43
|
+
* Copy directory recursively
|
|
44
|
+
*/
|
|
45
|
+
private copyDirectory;
|
|
46
|
+
/**
|
|
47
|
+
* Generate content hash for change detection
|
|
48
|
+
*/
|
|
49
|
+
private generateContentHash;
|
|
50
|
+
/**
|
|
51
|
+
* Clean up temporary directory
|
|
52
|
+
*/
|
|
53
|
+
private cleanupTempDirectory;
|
|
54
|
+
/**
|
|
55
|
+
* Filter list of files to only include documentation-relevant files (REQ-18)
|
|
56
|
+
* @param files - Array of file paths to filter
|
|
57
|
+
* @returns Array of file paths that are considered documentation
|
|
58
|
+
*/
|
|
59
|
+
private filterDocumentationFiles;
|
|
60
|
+
/**
|
|
61
|
+
* Determine if a file is considered documentation content (REQ-18)
|
|
62
|
+
* @param filePath - Path to the file to check
|
|
63
|
+
* @returns True if file should be included as documentation
|
|
64
|
+
*/
|
|
65
|
+
private isDocumentationFile;
|
|
66
|
+
/**
|
|
67
|
+
* Extract only documentation files from source directory (REQ-18)
|
|
68
|
+
* @param sourceDir - Source directory to scan
|
|
69
|
+
* @param targetDir - Target directory to copy files to
|
|
70
|
+
* @param extractedFiles - Array to track extracted file paths
|
|
71
|
+
*/
|
|
72
|
+
private extractDocumentationFiles;
|
|
73
|
+
/**
|
|
74
|
+
* Recursively scan all files in a directory
|
|
75
|
+
* @param dir - Directory to scan
|
|
76
|
+
* @returns Array of absolute file paths
|
|
77
|
+
*/
|
|
78
|
+
private scanAllFiles;
|
|
79
|
+
}
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Git repository content loader
|
|
3
|
+
*/
|
|
4
|
+
import { promises as fs } from "node:fs";
|
|
5
|
+
import * as path from "node:path";
|
|
6
|
+
import { execSync } from "node:child_process";
|
|
7
|
+
import { ContentLoader } from "./loader.js";
|
|
8
|
+
import { WebSourceType, WebSourceError, WebSourceErrorType, } from "../types.js";
|
|
9
|
+
import * as crypto from "node:crypto";
|
|
10
|
+
/**
|
|
11
|
+
* Content loader for Git repositories (GitHub, GitLab, any Git repo)
|
|
12
|
+
*/
|
|
13
|
+
export class GitRepoLoader extends ContentLoader {
|
|
14
|
+
/**
|
|
15
|
+
* Check if this loader can handle the given web source type
|
|
16
|
+
*/
|
|
17
|
+
canHandle(webSource) {
|
|
18
|
+
return webSource.type === WebSourceType.GIT_REPO;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Validate the web source configuration
|
|
22
|
+
*/
|
|
23
|
+
validateConfig(webSource) {
|
|
24
|
+
if (!webSource.url) {
|
|
25
|
+
return "Git repository URL is required";
|
|
26
|
+
}
|
|
27
|
+
// Basic URL validation for Git repos
|
|
28
|
+
if (!this.isValidGitUrl(webSource.url)) {
|
|
29
|
+
return "Invalid Git repository URL";
|
|
30
|
+
}
|
|
31
|
+
return true;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Load content from a Git repository
|
|
35
|
+
*/
|
|
36
|
+
async load(webSource, targetPath) {
|
|
37
|
+
try {
|
|
38
|
+
const options = webSource.options;
|
|
39
|
+
const tempDir = await this.createTempDirectory();
|
|
40
|
+
try {
|
|
41
|
+
// Clone the repository
|
|
42
|
+
await this.cloneRepository(webSource.url, tempDir, options);
|
|
43
|
+
// Extract specified paths or all content
|
|
44
|
+
const extractedFiles = await this.extractContent(tempDir, targetPath, options?.paths);
|
|
45
|
+
// Generate content hash
|
|
46
|
+
const contentHash = await this.generateContentHash(targetPath, extractedFiles);
|
|
47
|
+
return {
|
|
48
|
+
success: true,
|
|
49
|
+
files: extractedFiles,
|
|
50
|
+
contentHash,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
finally {
|
|
54
|
+
// Clean up temp directory
|
|
55
|
+
await this.cleanupTempDirectory(tempDir);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
catch (error) {
|
|
59
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
60
|
+
return {
|
|
61
|
+
success: false,
|
|
62
|
+
files: [],
|
|
63
|
+
contentHash: "",
|
|
64
|
+
error: `Git repository loading failed: ${errorMessage}`,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Get content identifier for change detection
|
|
70
|
+
*/
|
|
71
|
+
async getContentId(webSource) {
|
|
72
|
+
try {
|
|
73
|
+
const options = webSource.options;
|
|
74
|
+
const branch = options?.branch || "HEAD";
|
|
75
|
+
// Get the latest commit hash from the remote repository
|
|
76
|
+
const command = `git ls-remote ${webSource.url} ${branch}`;
|
|
77
|
+
const output = execSync(command, { encoding: "utf8", timeout: 30000 });
|
|
78
|
+
const commitHash = output.trim().split("\t")[0];
|
|
79
|
+
// Combine with URL and paths for a unique identifier
|
|
80
|
+
const paths = options?.paths ? options.paths.sort().join(",") : "all";
|
|
81
|
+
return crypto
|
|
82
|
+
.createHash("sha256")
|
|
83
|
+
.update(`${webSource.url}:${commitHash}:${paths}`)
|
|
84
|
+
.digest("hex");
|
|
85
|
+
}
|
|
86
|
+
catch {
|
|
87
|
+
// Fallback to URL-based hash if remote access fails
|
|
88
|
+
const options = webSource.options;
|
|
89
|
+
const paths = options?.paths ? options.paths.sort().join(",") : "all";
|
|
90
|
+
return crypto
|
|
91
|
+
.createHash("sha256")
|
|
92
|
+
.update(`${webSource.url}:${paths}`)
|
|
93
|
+
.digest("hex");
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Validate if URL is a valid Git repository URL
|
|
98
|
+
*/
|
|
99
|
+
isValidGitUrl(url) {
|
|
100
|
+
const gitUrlPatterns = [
|
|
101
|
+
/^https:\/\/github\.com\/[\w\-._]+\/[\w\-._]+(?:\.git)?$/,
|
|
102
|
+
/^https:\/\/gitlab\.com\/[\w\-._/]+(?:\.git)?$/,
|
|
103
|
+
/^https:\/\/[\w\-._]+\/[\w\-._/]+\.git$/,
|
|
104
|
+
/^git@[\w\-._]+:[\w\-._/]+\.git$/,
|
|
105
|
+
];
|
|
106
|
+
return gitUrlPatterns.some((pattern) => pattern.test(url));
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Create a temporary directory for cloning
|
|
110
|
+
*/
|
|
111
|
+
async createTempDirectory() {
|
|
112
|
+
const tempDir = path.join(process.cwd(), ".tmp", `git-clone-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
|
113
|
+
await fs.mkdir(tempDir, { recursive: true });
|
|
114
|
+
return tempDir;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Clone the Git repository
|
|
118
|
+
*/
|
|
119
|
+
async cloneRepository(url, targetDir, options) {
|
|
120
|
+
const branch = options?.branch || "main";
|
|
121
|
+
const depth = "--depth 1"; // Shallow clone for efficiency
|
|
122
|
+
let gitCommand = `git clone ${depth} --branch ${branch} ${url} ${targetDir}`;
|
|
123
|
+
// Add authentication if token is provided
|
|
124
|
+
if (options?.token) {
|
|
125
|
+
// For HTTPS URLs, inject token
|
|
126
|
+
if (url.startsWith("https://")) {
|
|
127
|
+
const urlWithToken = url.replace("https://", `https://${options.token}@`);
|
|
128
|
+
gitCommand = `git clone ${depth} --branch ${branch} ${urlWithToken} ${targetDir}`;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
try {
|
|
132
|
+
execSync(gitCommand, {
|
|
133
|
+
stdio: "pipe",
|
|
134
|
+
timeout: 120000, // 2 minutes timeout
|
|
135
|
+
cwd: process.cwd(),
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
catch (error) {
|
|
139
|
+
// Try with master branch if main fails
|
|
140
|
+
if (branch === "main") {
|
|
141
|
+
const masterCommand = gitCommand.replace("--branch main", "--branch master");
|
|
142
|
+
try {
|
|
143
|
+
execSync(masterCommand, {
|
|
144
|
+
stdio: "pipe",
|
|
145
|
+
timeout: 120000,
|
|
146
|
+
cwd: process.cwd(),
|
|
147
|
+
});
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
// If both fail, throw the original error
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
throw new WebSourceError(WebSourceErrorType.GIT_REPO_ERROR, `Failed to clone repository: ${error instanceof Error ? error.message : String(error)}`, { url, branch, command: gitCommand });
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Extract content from cloned repository to target directory
|
|
159
|
+
*/
|
|
160
|
+
async extractContent(sourceDir, targetDir, paths) {
|
|
161
|
+
await fs.mkdir(targetDir, { recursive: true });
|
|
162
|
+
const extractedFiles = [];
|
|
163
|
+
if (paths && paths.length > 0) {
|
|
164
|
+
// Extract only specified paths
|
|
165
|
+
for (const relPath of paths) {
|
|
166
|
+
const sourcePath = path.join(sourceDir, relPath);
|
|
167
|
+
const targetPath = path.join(targetDir, relPath);
|
|
168
|
+
try {
|
|
169
|
+
const stats = await fs.stat(sourcePath);
|
|
170
|
+
if (stats.isDirectory()) {
|
|
171
|
+
await this.copyDirectory(sourcePath, targetPath, extractedFiles);
|
|
172
|
+
}
|
|
173
|
+
else if (stats.isFile()) {
|
|
174
|
+
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
|
175
|
+
await fs.copyFile(sourcePath, targetPath);
|
|
176
|
+
extractedFiles.push(relPath);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
catch (error) {
|
|
180
|
+
// Skip files that don't exist or can't be accessed
|
|
181
|
+
console.warn(`Warning: Could not extract ${relPath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
else {
|
|
186
|
+
// Use smart filtering to extract only documentation files (REQ-18)
|
|
187
|
+
await this.extractDocumentationFiles(sourceDir, targetDir, extractedFiles);
|
|
188
|
+
}
|
|
189
|
+
return extractedFiles;
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Copy directory recursively
|
|
193
|
+
*/
|
|
194
|
+
async copyDirectory(source, target, fileList, excludeDirs = []) {
|
|
195
|
+
await fs.mkdir(target, { recursive: true });
|
|
196
|
+
const items = await fs.readdir(source);
|
|
197
|
+
for (const item of items) {
|
|
198
|
+
if (excludeDirs.includes(item)) {
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
const sourcePath = path.join(source, item);
|
|
202
|
+
const targetPath = path.join(target, item);
|
|
203
|
+
const stats = await fs.stat(sourcePath);
|
|
204
|
+
if (stats.isDirectory()) {
|
|
205
|
+
await this.copyDirectory(sourcePath, targetPath, fileList, excludeDirs);
|
|
206
|
+
}
|
|
207
|
+
else {
|
|
208
|
+
await fs.copyFile(sourcePath, targetPath);
|
|
209
|
+
// Calculate relative path from the initial target directory
|
|
210
|
+
const relativePath = path.relative(target, targetPath);
|
|
211
|
+
fileList.push(relativePath);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Generate content hash for change detection
|
|
217
|
+
*/
|
|
218
|
+
async generateContentHash(targetDir, files) {
|
|
219
|
+
const hash = crypto.createHash("sha256");
|
|
220
|
+
// Sort files for consistent hashing
|
|
221
|
+
const sortedFiles = files.slice().sort();
|
|
222
|
+
for (const file of sortedFiles) {
|
|
223
|
+
const filePath = path.join(targetDir, file);
|
|
224
|
+
try {
|
|
225
|
+
const content = await fs.readFile(filePath);
|
|
226
|
+
hash.update(file); // Include filename
|
|
227
|
+
hash.update(content); // Include content
|
|
228
|
+
}
|
|
229
|
+
catch (error) {
|
|
230
|
+
// Skip files that can't be read
|
|
231
|
+
console.warn(`Warning: Could not hash ${file}: ${error instanceof Error ? error.message : String(error)}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
return hash.digest("hex");
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Clean up temporary directory
|
|
238
|
+
*/
|
|
239
|
+
async cleanupTempDirectory(tempDir) {
|
|
240
|
+
try {
|
|
241
|
+
await fs.rm(tempDir, { recursive: true, force: true });
|
|
242
|
+
}
|
|
243
|
+
catch (error) {
|
|
244
|
+
console.warn(`Warning: Could not clean up temp directory ${tempDir}: ${error instanceof Error ? error.message : String(error)}`);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Filter list of files to only include documentation-relevant files (REQ-18)
|
|
249
|
+
* @param files - Array of file paths to filter
|
|
250
|
+
* @returns Array of file paths that are considered documentation
|
|
251
|
+
*/
|
|
252
|
+
filterDocumentationFiles(files) {
|
|
253
|
+
return files.filter((file) => this.isDocumentationFile(file));
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Determine if a file is considered documentation content (REQ-18)
|
|
257
|
+
* @param filePath - Path to the file to check
|
|
258
|
+
* @returns True if file should be included as documentation
|
|
259
|
+
*/
|
|
260
|
+
isDocumentationFile(filePath) {
|
|
261
|
+
const filename = path.basename(filePath);
|
|
262
|
+
const extension = path.extname(filePath).toLowerCase();
|
|
263
|
+
const directory = path.dirname(filePath);
|
|
264
|
+
// Exclude project metadata files (REQ-18)
|
|
265
|
+
const metadataFiles = /^(CHANGELOG|LICENSE|CONTRIBUTING|AUTHORS|CODE_OF_CONDUCT)/i;
|
|
266
|
+
if (metadataFiles.test(filename)) {
|
|
267
|
+
return false;
|
|
268
|
+
}
|
|
269
|
+
// Exclude source code, build, and development directories (REQ-18)
|
|
270
|
+
const excludedDirPatterns = [
|
|
271
|
+
"node_modules",
|
|
272
|
+
"vendor",
|
|
273
|
+
".git",
|
|
274
|
+
"build",
|
|
275
|
+
"dist",
|
|
276
|
+
"target",
|
|
277
|
+
".cache",
|
|
278
|
+
"src",
|
|
279
|
+
"lib",
|
|
280
|
+
"components",
|
|
281
|
+
"__tests__",
|
|
282
|
+
".github",
|
|
283
|
+
".vscode",
|
|
284
|
+
".idea",
|
|
285
|
+
];
|
|
286
|
+
for (const pattern of excludedDirPatterns) {
|
|
287
|
+
if (directory.includes(pattern)) {
|
|
288
|
+
return false;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
// Include README files anywhere (REQ-18)
|
|
292
|
+
if (/^README/i.test(filename)) {
|
|
293
|
+
return true;
|
|
294
|
+
}
|
|
295
|
+
// Include documentation file extensions anywhere, regardless of directory (REQ-18)
|
|
296
|
+
const docExtensions = [".md", ".mdx", ".rst", ".txt", ".adoc", ".asciidoc"];
|
|
297
|
+
if (docExtensions.includes(extension)) {
|
|
298
|
+
return true;
|
|
299
|
+
}
|
|
300
|
+
// Special case: examples directory - include other file types as they're often documentation (REQ-18)
|
|
301
|
+
const isInExamples = /\b(examples?)\b/i.test(directory);
|
|
302
|
+
if (isInExamples) {
|
|
303
|
+
// In examples, exclude only binary files
|
|
304
|
+
const excludedInExamples = [
|
|
305
|
+
".exe",
|
|
306
|
+
".bin",
|
|
307
|
+
".so",
|
|
308
|
+
".dll",
|
|
309
|
+
".dylib",
|
|
310
|
+
".a",
|
|
311
|
+
".o",
|
|
312
|
+
".obj",
|
|
313
|
+
];
|
|
314
|
+
return !excludedInExamples.includes(extension);
|
|
315
|
+
}
|
|
316
|
+
return false;
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Extract only documentation files from source directory (REQ-18)
|
|
320
|
+
* @param sourceDir - Source directory to scan
|
|
321
|
+
* @param targetDir - Target directory to copy files to
|
|
322
|
+
* @param extractedFiles - Array to track extracted file paths
|
|
323
|
+
*/
|
|
324
|
+
async extractDocumentationFiles(sourceDir, targetDir, extractedFiles) {
|
|
325
|
+
// First, scan all files in the repository
|
|
326
|
+
const allFiles = await this.scanAllFiles(sourceDir);
|
|
327
|
+
// Filter to only documentation files
|
|
328
|
+
const docFiles = this.filterDocumentationFiles(allFiles);
|
|
329
|
+
// Copy the filtered files
|
|
330
|
+
for (const filePath of docFiles) {
|
|
331
|
+
const relativePath = path.relative(sourceDir, filePath);
|
|
332
|
+
const targetPath = path.join(targetDir, relativePath);
|
|
333
|
+
try {
|
|
334
|
+
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
|
335
|
+
await fs.copyFile(filePath, targetPath);
|
|
336
|
+
extractedFiles.push(relativePath);
|
|
337
|
+
}
|
|
338
|
+
catch (error) {
|
|
339
|
+
console.warn(`Warning: Could not copy ${relativePath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Recursively scan all files in a directory
|
|
345
|
+
* @param dir - Directory to scan
|
|
346
|
+
* @returns Array of absolute file paths
|
|
347
|
+
*/
|
|
348
|
+
async scanAllFiles(dir) {
|
|
349
|
+
const files = [];
|
|
350
|
+
async function scan(currentDir) {
|
|
351
|
+
const items = await fs.readdir(currentDir);
|
|
352
|
+
for (const item of items) {
|
|
353
|
+
if (item === ".git")
|
|
354
|
+
continue; // Always skip .git
|
|
355
|
+
const fullPath = path.join(currentDir, item);
|
|
356
|
+
const stat = await fs.stat(fullPath);
|
|
357
|
+
if (stat.isDirectory()) {
|
|
358
|
+
await scan(fullPath);
|
|
359
|
+
}
|
|
360
|
+
else if (stat.isFile()) {
|
|
361
|
+
files.push(fullPath);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
await scan(dir);
|
|
366
|
+
return files;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content loading and processing exports
|
|
3
|
+
*/
|
|
4
|
+
export { ContentLoader } from "./loader.js";
|
|
5
|
+
export { GitRepoLoader } from "./git-repo-loader.js";
|
|
6
|
+
export { DocumentationSiteLoader } from "./documentation-site-loader.js";
|
|
7
|
+
export { ApiDocumentationLoader } from "./api-documentation-loader.js";
|
|
8
|
+
export { ContentProcessor } from "./content-processor.js";
|
|
9
|
+
export { MetadataManager } from "./metadata-manager.js";
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content loading and processing exports
|
|
3
|
+
*/
|
|
4
|
+
export { ContentLoader } from "./loader.js";
|
|
5
|
+
export { GitRepoLoader } from "./git-repo-loader.js";
|
|
6
|
+
export { DocumentationSiteLoader } from "./documentation-site-loader.js";
|
|
7
|
+
export { ApiDocumentationLoader } from "./api-documentation-loader.js";
|
|
8
|
+
export { ContentProcessor } from "./content-processor.js";
|
|
9
|
+
export { MetadataManager } from "./metadata-manager.js";
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Abstract base class for content loaders
|
|
3
|
+
*/
|
|
4
|
+
import type { WebSourceConfig } from "../types.js";
|
|
5
|
+
/**
|
|
6
|
+
* Result of a content loading operation
|
|
7
|
+
*/
|
|
8
|
+
export interface LoadResult {
|
|
9
|
+
/** Whether the operation was successful */
|
|
10
|
+
success: boolean;
|
|
11
|
+
/** Files that were created or updated */
|
|
12
|
+
files: string[];
|
|
13
|
+
/** Hash of the loaded content for change detection */
|
|
14
|
+
contentHash: string;
|
|
15
|
+
/** Error message if success is false */
|
|
16
|
+
error?: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Abstract base class for loading content from different web sources
|
|
20
|
+
*/
|
|
21
|
+
export declare abstract class ContentLoader {
|
|
22
|
+
/**
|
|
23
|
+
* Load content from a web source to the specified target directory
|
|
24
|
+
* @param _webSource - Configuration for the web source
|
|
25
|
+
* @param _targetPath - Local directory to store the content
|
|
26
|
+
* @returns Promise with load result
|
|
27
|
+
*/
|
|
28
|
+
abstract load(_webSource: WebSourceConfig, _targetPath: string): Promise<LoadResult>;
|
|
29
|
+
/**
|
|
30
|
+
* Check if this loader can handle the given web source type
|
|
31
|
+
* @param _webSource - Web source configuration
|
|
32
|
+
* @returns True if this loader can handle the source type
|
|
33
|
+
*/
|
|
34
|
+
abstract canHandle(_webSource: WebSourceConfig): boolean;
|
|
35
|
+
/**
|
|
36
|
+
* Validate the web source configuration for this loader
|
|
37
|
+
* @param _webSource - Web source configuration
|
|
38
|
+
* @returns True if configuration is valid, error message if not
|
|
39
|
+
*/
|
|
40
|
+
abstract validateConfig(_webSource: WebSourceConfig): true | string;
|
|
41
|
+
/**
|
|
42
|
+
* Get a unique identifier for the content (used for change detection)
|
|
43
|
+
* @param _webSource - Web source configuration
|
|
44
|
+
* @returns Promise with content identifier
|
|
45
|
+
*/
|
|
46
|
+
abstract getContentId(_webSource: WebSourceConfig): Promise<string>;
|
|
47
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metadata manager for tracking web source downloads
|
|
3
|
+
*/
|
|
4
|
+
import { DocsetMetadata, WebSourceMetadata } from "../types.js";
|
|
5
|
+
/**
|
|
6
|
+
* Manager for docset metadata files
|
|
7
|
+
*/
|
|
8
|
+
export declare class MetadataManager {
|
|
9
|
+
/**
|
|
10
|
+
* Load metadata from a docset directory
|
|
11
|
+
* @param docsetPath - Path to the docset directory
|
|
12
|
+
* @returns Metadata object or null if not found
|
|
13
|
+
*/
|
|
14
|
+
loadMetadata(docsetPath: string): Promise<DocsetMetadata | null>;
|
|
15
|
+
/**
|
|
16
|
+
* Save metadata to a docset directory
|
|
17
|
+
* @param docsetPath - Path to the docset directory
|
|
18
|
+
* @param metadata - Metadata to save
|
|
19
|
+
*/
|
|
20
|
+
saveMetadata(docsetPath: string, metadata: DocsetMetadata): Promise<void>;
|
|
21
|
+
/**
|
|
22
|
+
* Create initial metadata for a docset
|
|
23
|
+
* @param docsetId - ID of the docset
|
|
24
|
+
* @returns Initial metadata structure
|
|
25
|
+
*/
|
|
26
|
+
createInitialMetadata(docsetId: string): DocsetMetadata;
|
|
27
|
+
/**
|
|
28
|
+
* Update metadata for a specific web source
|
|
29
|
+
* @param metadata - Current metadata
|
|
30
|
+
* @param sourceUrl - URL of the web source
|
|
31
|
+
* @param sourceMetadata - New metadata for the source
|
|
32
|
+
* @returns Updated metadata
|
|
33
|
+
*/
|
|
34
|
+
updateSourceMetadata(
|
|
35
|
+
metadata: DocsetMetadata,
|
|
36
|
+
sourceUrl: string,
|
|
37
|
+
sourceMetadata: Partial<WebSourceMetadata>,
|
|
38
|
+
): DocsetMetadata;
|
|
39
|
+
/**
|
|
40
|
+
* Get metadata for a specific web source
|
|
41
|
+
* @param metadata - Metadata to search
|
|
42
|
+
* @param sourceUrl - URL of the web source
|
|
43
|
+
* @returns Source metadata or null if not found
|
|
44
|
+
*/
|
|
45
|
+
getSourceMetadata(
|
|
46
|
+
metadata: DocsetMetadata,
|
|
47
|
+
sourceUrl: string,
|
|
48
|
+
): WebSourceMetadata | null;
|
|
49
|
+
/**
|
|
50
|
+
* Check if metadata file exists
|
|
51
|
+
* @param docsetPath - Path to the docset directory
|
|
52
|
+
* @returns True if metadata file exists
|
|
53
|
+
*/
|
|
54
|
+
metadataExists(docsetPath: string): Promise<boolean>;
|
|
55
|
+
/**
|
|
56
|
+
* Remove metadata for a specific web source
|
|
57
|
+
* @param metadata - Current metadata
|
|
58
|
+
* @param sourceUrl - URL of the web source to remove
|
|
59
|
+
* @returns Updated metadata
|
|
60
|
+
*/
|
|
61
|
+
removeSourceMetadata(
|
|
62
|
+
metadata: DocsetMetadata,
|
|
63
|
+
sourceUrl: string,
|
|
64
|
+
): DocsetMetadata;
|
|
65
|
+
}
|