agentic-knowledge-mcp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/LICENSE +674 -0
  2. package/README.md +530 -0
  3. package/package.json +94 -0
  4. package/packages/cli/dist/cli.d.ts +5 -0
  5. package/packages/cli/dist/cli.js +21 -0
  6. package/packages/cli/dist/commands/create.d.ts +5 -0
  7. package/packages/cli/dist/commands/create.js +90 -0
  8. package/packages/cli/dist/commands/init.d.ts +5 -0
  9. package/packages/cli/dist/commands/init.js +182 -0
  10. package/packages/cli/dist/commands/refresh.d.ts +5 -0
  11. package/packages/cli/dist/commands/refresh.js +322 -0
  12. package/packages/cli/dist/commands/status.d.ts +5 -0
  13. package/packages/cli/dist/commands/status.js +268 -0
  14. package/packages/cli/dist/index.d.ts +6 -0
  15. package/packages/cli/dist/index.js +6 -0
  16. package/packages/cli/package.json +57 -0
  17. package/packages/content-loader/dist/__tests__/debug-filtering.d.ts +1 -0
  18. package/packages/content-loader/dist/__tests__/debug-filtering.js +17 -0
  19. package/packages/content-loader/dist/__tests__/test-filtering.d.ts +1 -0
  20. package/packages/content-loader/dist/__tests__/test-filtering.js +19 -0
  21. package/packages/content-loader/dist/content/api-documentation-loader.d.ts +26 -0
  22. package/packages/content-loader/dist/content/api-documentation-loader.js +45 -0
  23. package/packages/content-loader/dist/content/content-processor.d.ts +44 -0
  24. package/packages/content-loader/dist/content/content-processor.js +86 -0
  25. package/packages/content-loader/dist/content/documentation-site-loader.d.ts +26 -0
  26. package/packages/content-loader/dist/content/documentation-site-loader.js +45 -0
  27. package/packages/content-loader/dist/content/git-repo-loader.d.ts +79 -0
  28. package/packages/content-loader/dist/content/git-repo-loader.js +368 -0
  29. package/packages/content-loader/dist/content/index.d.ts +9 -0
  30. package/packages/content-loader/dist/content/index.js +9 -0
  31. package/packages/content-loader/dist/content/loader.d.ts +47 -0
  32. package/packages/content-loader/dist/content/loader.js +8 -0
  33. package/packages/content-loader/dist/content/metadata-manager.d.ts +65 -0
  34. package/packages/content-loader/dist/content/metadata-manager.js +160 -0
  35. package/packages/content-loader/dist/index.d.ts +5 -0
  36. package/packages/content-loader/dist/index.js +5 -0
  37. package/packages/content-loader/dist/types.d.ts +127 -0
  38. package/packages/content-loader/dist/types.js +48 -0
  39. package/packages/content-loader/package.json +50 -0
  40. package/packages/core/dist/config/discovery.d.ts +15 -0
  41. package/packages/core/dist/config/discovery.js +65 -0
  42. package/packages/core/dist/config/loader.d.ts +22 -0
  43. package/packages/core/dist/config/loader.js +236 -0
  44. package/packages/core/dist/config/manager.d.ts +55 -0
  45. package/packages/core/dist/config/manager.js +180 -0
  46. package/packages/core/dist/content/api-documentation-loader.d.ts +26 -0
  47. package/packages/core/dist/content/api-documentation-loader.js +45 -0
  48. package/packages/core/dist/content/content-processor.d.ts +44 -0
  49. package/packages/core/dist/content/content-processor.js +81 -0
  50. package/packages/core/dist/content/documentation-site-loader.d.ts +26 -0
  51. package/packages/core/dist/content/documentation-site-loader.js +45 -0
  52. package/packages/core/dist/content/git-repo-loader.d.ts +54 -0
  53. package/packages/core/dist/content/git-repo-loader.js +264 -0
  54. package/packages/core/dist/content/index.d.ts +9 -0
  55. package/packages/core/dist/content/index.js +9 -0
  56. package/packages/core/dist/content/loader.d.ts +50 -0
  57. package/packages/core/dist/content/loader.js +7 -0
  58. package/packages/core/dist/content/metadata-manager.d.ts +65 -0
  59. package/packages/core/dist/content/metadata-manager.js +160 -0
  60. package/packages/core/dist/index.d.ts +12 -0
  61. package/packages/core/dist/index.js +30 -0
  62. package/packages/core/dist/paths/calculator.d.ts +46 -0
  63. package/packages/core/dist/paths/calculator.js +166 -0
  64. package/packages/core/dist/templates/processor.d.ts +40 -0
  65. package/packages/core/dist/templates/processor.js +111 -0
  66. package/packages/core/dist/types.d.ts +129 -0
  67. package/packages/core/dist/types.js +79 -0
  68. package/packages/core/package.json +50 -0
  69. package/packages/mcp-server/dist/bin.d.ts +5 -0
  70. package/packages/mcp-server/dist/bin.js +10 -0
  71. package/packages/mcp-server/dist/cli.d.ts +7 -0
  72. package/packages/mcp-server/dist/cli.js +17 -0
  73. package/packages/mcp-server/dist/index.d.ts +8 -0
  74. package/packages/mcp-server/dist/index.js +9 -0
  75. package/packages/mcp-server/dist/server.d.ts +35 -0
  76. package/packages/mcp-server/dist/server.js +244 -0
  77. package/packages/mcp-server/package.json +54 -0
@@ -0,0 +1,268 @@
1
+ /**
2
+ * Status command - show status of web sources
3
+ */
4
+ import { Command } from "commander";
5
+ import chalk from "chalk";
6
+ import { promises as fs } from "node:fs";
7
+ import * as path from "node:path";
8
+ import {
9
+ findConfigPathSync,
10
+ loadConfigSync,
11
+ calculateLocalPath,
12
+ } from "@codemcp/knowledge-core";
13
+ export const statusCommand = new Command("status")
14
+ .description("Show status of web sources for docsets")
15
+ .option("-c, --config <path>", "Path to configuration file")
16
+ .option("-v, --verbose", "Show detailed status information", false)
17
+ .action(async (options) => {
18
+ try {
19
+ console.log(chalk.blue("📊 Agentic Knowledge Status\n"));
20
+ // Find and load configuration
21
+ const configPath = options.config || findConfigPathSync(process.cwd());
22
+ if (!configPath) {
23
+ throw new Error(
24
+ "No configuration file found. Run this command from a directory with .knowledge/config.yaml",
25
+ );
26
+ }
27
+ console.log(chalk.gray(`📄 Config: ${configPath}`));
28
+ const config = loadConfigSync(configPath);
29
+ // Find docsets with web sources
30
+ const webDocsets = config.docsets.filter(
31
+ (d) => d.web_sources && d.web_sources.length > 0,
32
+ );
33
+ if (webDocsets.length === 0) {
34
+ console.log(chalk.yellow("\n⚠️ No docsets with web sources found."));
35
+ // Show all docsets for reference
36
+ if (config.docsets.length > 0) {
37
+ console.log(chalk.gray("\nAvailable docsets (local only):"));
38
+ for (const docset of config.docsets) {
39
+ console.log(chalk.gray(` • ${docset.id} - ${docset.name}`));
40
+ }
41
+ }
42
+ return;
43
+ }
44
+ console.log(
45
+ chalk.green(
46
+ `\n✅ Found ${webDocsets.length} docset(s) with web sources\n`,
47
+ ),
48
+ );
49
+ // Get status for each docset
50
+ const statuses = [];
51
+ for (const docset of webDocsets) {
52
+ const status = await getDocsetStatus(docset, configPath);
53
+ statuses.push(status);
54
+ }
55
+ // Display summary
56
+ displaySummary(statuses);
57
+ // Display detailed status if verbose
58
+ if (options.verbose) {
59
+ console.log(chalk.blue("\n📋 Detailed Status\n"));
60
+ for (const status of statuses) {
61
+ displayDetailedStatus(status);
62
+ }
63
+ }
64
+ } catch (error) {
65
+ console.error(chalk.red("❌ Failed to get status"));
66
+ console.error(
67
+ chalk.red(error instanceof Error ? error.message : String(error)),
68
+ );
69
+ process.exit(1);
70
+ }
71
+ });
72
+ async function getDocsetStatus(docset, configPath) {
73
+ try {
74
+ const localPath = calculateLocalPath(docset, configPath);
75
+ const metadataPath = path.join(localPath, ".agentic-metadata.json");
76
+ // Check if docset is initialized
77
+ let metadata = null;
78
+ try {
79
+ const metadataContent = await fs.readFile(metadataPath, "utf8");
80
+ metadata = JSON.parse(metadataContent);
81
+ } catch {
82
+ return {
83
+ docset,
84
+ initialized: false,
85
+ metadata: null,
86
+ sources: [],
87
+ };
88
+ }
89
+ // Load source metadata
90
+ const sources = [];
91
+ for (let i = 0; i < (docset.web_sources?.length || 0); i++) {
92
+ try {
93
+ const sourceMetadataPath = path.join(
94
+ localPath,
95
+ `.agentic-source-${i}.json`,
96
+ );
97
+ const sourceContent = await fs.readFile(sourceMetadataPath, "utf8");
98
+ const sourceMetadata = JSON.parse(sourceContent);
99
+ sources.push(sourceMetadata);
100
+ } catch {
101
+ // Source metadata missing - this might indicate an issue
102
+ }
103
+ }
104
+ return {
105
+ docset,
106
+ initialized: true,
107
+ metadata,
108
+ sources,
109
+ };
110
+ } catch (error) {
111
+ return {
112
+ docset,
113
+ initialized: false,
114
+ metadata: null,
115
+ sources: [],
116
+ error: error instanceof Error ? error.message : String(error),
117
+ };
118
+ }
119
+ }
120
+ function displaySummary(statuses) {
121
+ console.log(chalk.blue("📈 Summary"));
122
+ console.log("─".repeat(50));
123
+ for (const status of statuses) {
124
+ const { docset, initialized, metadata, sources, error } = status;
125
+ if (error) {
126
+ console.log(
127
+ `${chalk.red("❌")} ${chalk.bold(docset.id)} - ${chalk.red("Error: " + error)}`,
128
+ );
129
+ continue;
130
+ }
131
+ if (!initialized) {
132
+ console.log(
133
+ `${chalk.yellow("⚠️")} ${chalk.bold(docset.id)} - ${chalk.yellow("Not initialized")}`,
134
+ );
135
+ console.log(
136
+ chalk.gray(
137
+ ` ${docset.web_sources?.length || 0} web source(s) configured`,
138
+ ),
139
+ );
140
+ continue;
141
+ }
142
+ if (!metadata) {
143
+ console.log(
144
+ `${chalk.red("❌")} ${chalk.bold(docset.id)} - ${chalk.red("Metadata corrupted")}`,
145
+ );
146
+ continue;
147
+ }
148
+ // Calculate status
149
+ const lastActivity = metadata.last_refreshed || metadata.initialized_at;
150
+ const lastActivityTime = new Date(lastActivity);
151
+ const timeSince = Date.now() - lastActivityTime.getTime();
152
+ const hoursSince = timeSince / (1000 * 60 * 60);
153
+ const daysSince = timeSince / (1000 * 60 * 60 * 24);
154
+ let timeDisplay;
155
+ let statusIcon;
156
+ if (hoursSince < 1) {
157
+ timeDisplay = `${Math.round(hoursSince * 60)} minutes ago`;
158
+ statusIcon = chalk.green("✅");
159
+ } else if (hoursSince < 24) {
160
+ timeDisplay = `${Math.round(hoursSince)} hours ago`;
161
+ statusIcon = chalk.green("✅");
162
+ } else if (daysSince < 7) {
163
+ timeDisplay = `${Math.round(daysSince)} days ago`;
164
+ statusIcon = chalk.yellow("⚠️");
165
+ } else {
166
+ timeDisplay = `${Math.round(daysSince)} days ago`;
167
+ statusIcon = chalk.red("🔄");
168
+ }
169
+ console.log(
170
+ `${statusIcon} ${chalk.bold(docset.id)} - ${chalk.gray(metadata.total_files)} files`,
171
+ );
172
+ console.log(
173
+ chalk.gray(
174
+ ` Last updated: ${timeDisplay} | ${sources.length}/${metadata.web_sources_count} sources loaded`,
175
+ ),
176
+ );
177
+ }
178
+ }
179
+ function displayDetailedStatus(status) {
180
+ const { docset, initialized, metadata, sources, error } = status;
181
+ console.log(chalk.bold(`🔸 ${docset.id} (${docset.name})`));
182
+ console.log("─".repeat(40));
183
+ if (error) {
184
+ console.log(chalk.red(`❌ Error: ${error}`));
185
+ console.log();
186
+ return;
187
+ }
188
+ if (!initialized) {
189
+ console.log(chalk.yellow("⚠️ Status: Not initialized"));
190
+ console.log(
191
+ chalk.gray(`📝 Description: ${docset.description || "No description"}`),
192
+ );
193
+ console.log(
194
+ chalk.gray(
195
+ `🔗 Web sources configured: ${docset.web_sources?.length || 0}`,
196
+ ),
197
+ );
198
+ if (docset.web_sources && docset.web_sources.length > 0) {
199
+ console.log(chalk.gray(" Sources:"));
200
+ for (const [i, source] of docset.web_sources.entries()) {
201
+ console.log(
202
+ chalk.gray(` ${i + 1}. ${source.url} (${source.type})`),
203
+ );
204
+ }
205
+ console.log(
206
+ chalk.blue(
207
+ "\n 💡 Run 'agentic-knowledge init " +
208
+ docset.id +
209
+ "' to initialize",
210
+ ),
211
+ );
212
+ }
213
+ console.log();
214
+ return;
215
+ }
216
+ if (!metadata) {
217
+ console.log(chalk.red("❌ Status: Metadata corrupted"));
218
+ console.log();
219
+ return;
220
+ }
221
+ // Display basic info
222
+ console.log(chalk.green("✅ Status: Initialized"));
223
+ console.log(
224
+ chalk.gray(`📝 Description: ${docset.description || "No description"}`),
225
+ );
226
+ console.log(chalk.gray(`📄 Total files: ${metadata.total_files}`));
227
+ console.log(chalk.gray(`🔗 Web sources: ${metadata.web_sources_count}`));
228
+ // Display timing info
229
+ const initTime = new Date(metadata.initialized_at);
230
+ const lastRefresh = metadata.last_refreshed
231
+ ? new Date(metadata.last_refreshed)
232
+ : null;
233
+ console.log(chalk.gray(`📅 Initialized: ${initTime.toLocaleString()}`));
234
+ if (lastRefresh) {
235
+ console.log(
236
+ chalk.gray(`🔄 Last refreshed: ${lastRefresh.toLocaleString()}`),
237
+ );
238
+ }
239
+ // Display source details
240
+ if (sources.length > 0) {
241
+ console.log(chalk.gray("\n🔗 Sources:"));
242
+ for (const [i, source] of sources.entries()) {
243
+ const downloadTime = new Date(source.downloaded_at);
244
+ console.log(
245
+ chalk.gray(
246
+ ` ${i + 1}. ${source.source_url} (${source.files_count} files, ${downloadTime.toLocaleString()})`,
247
+ ),
248
+ );
249
+ if (source.last_commit) {
250
+ console.log(
251
+ chalk.gray(
252
+ ` Last commit: ${source.last_commit.substring(0, 8)}`,
253
+ ),
254
+ );
255
+ }
256
+ }
257
+ }
258
+ // Display missing sources
259
+ const missingSources = (docset.web_sources?.length || 0) - sources.length;
260
+ if (missingSources > 0) {
261
+ console.log(
262
+ chalk.yellow(
263
+ `⚠️ ${missingSources} source(s) missing metadata - run refresh`,
264
+ ),
265
+ );
266
+ }
267
+ console.log();
268
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * CLI exports for agentic-knowledge
3
+ */
4
+ export * from "./commands/init.js";
5
+ export * from "./commands/refresh.js";
6
+ export * from "./commands/status.js";
@@ -0,0 +1,6 @@
1
+ /**
2
+ * CLI exports for agentic-knowledge
3
+ */
4
+ export * from "./commands/init.js";
5
+ export * from "./commands/refresh.js";
6
+ export * from "./commands/status.js";
@@ -0,0 +1,57 @@
1
+ {
2
+ "name": "@codemcp/knowledge-cli",
3
+ "version": "0.0.1",
4
+ "description": "Command-line interface for agentic knowledge web content management",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "bin": {
9
+ "agentic-knowledge": "./dist/cli.js"
10
+ },
11
+ "exports": {
12
+ ".": {
13
+ "import": "./dist/index.js",
14
+ "types": "./dist/index.d.ts"
15
+ }
16
+ },
17
+ "files": [
18
+ "dist"
19
+ ],
20
+ "scripts": {
21
+ "build": "tsc -p tsconfig.build.json",
22
+ "build:watch": "tsc -p tsconfig.build.json --watch",
23
+ "clean": "rimraf dist",
24
+ "dev": "tsc -p tsconfig.build.json --watch",
25
+ "lint": "oxlint && eslint .",
26
+ "lint:fix": "oxlint --fix && eslint . --fix",
27
+ "format:check": "prettier --check .",
28
+ "format:fix": "prettier --write .",
29
+ "test": "vitest run",
30
+ "test:watch": "vitest",
31
+ "test:coverage": "vitest run --coverage",
32
+ "typecheck": "tsc --noEmit"
33
+ },
34
+ "dependencies": {
35
+ "@codemcp/knowledge-core": "workspace:*",
36
+ "@codemcp/knowledge-content-loader": "workspace:*",
37
+ "commander": "^12.0.0",
38
+ "chalk": "^5.3.0",
39
+ "ora": "^8.0.1"
40
+ },
41
+ "devDependencies": {
42
+ "@types/node": "^24.3.0",
43
+ "rimraf": "^6.0.1",
44
+ "typescript": "^5.9.2",
45
+ "vitest": "^3.2.4"
46
+ },
47
+ "keywords": [
48
+ "agentic",
49
+ "knowledge",
50
+ "cli",
51
+ "web-content",
52
+ "documentation",
53
+ "git"
54
+ ],
55
+ "author": "Oliver Jägle <github@beimir.net>",
56
+ "license": "MIT"
57
+ }
@@ -0,0 +1,17 @@
1
+ import { GitRepoLoader } from "../content/git-repo-loader.js";
2
+ import * as path from "node:path";
3
+ const loader = new GitRepoLoader();
4
+ const testFiles = [
5
+ ".github/commit-convention.md",
6
+ "docs/getting-started.md",
7
+ "src/index.js",
8
+ ];
9
+ console.log("Testing directory exclusion logic:");
10
+ for (const file of testFiles) {
11
+ const directory = path.dirname(file);
12
+ console.log(`File: ${file}`);
13
+ console.log(`Directory: "${directory}"`);
14
+ console.log(`Includes .github: ${directory.includes(".github")}`);
15
+ console.log(`Result: ${loader.isDocumentationFile(file)}`);
16
+ console.log("---");
17
+ }
@@ -0,0 +1,19 @@
1
+ // Quick test of filtering logic
2
+ import { GitRepoLoader } from "../content/git-repo-loader.js";
3
+ const loader = new GitRepoLoader();
4
+ const testFiles = [
5
+ "README.md", // Should include: README anywhere
6
+ "docs/getting-started.md", // Should include: .md extension
7
+ "docs/.postcssrc.json", // Should exclude: hidden file
8
+ "docs/.vitepress/config.ts", // Should exclude: .ts extension (not in examples)
9
+ "docs/.vitepress/theme/index.ts", // Should exclude: .ts extension + hidden directory
10
+ "docs/.vitepress/theme/styles.css", // Should exclude: .css extension
11
+ "examples/config.json", // Should include: in examples directory
12
+ "examples/demo.js", // Should include: in examples directory
13
+ "src/main.ts", // Should exclude: in src directory
14
+ ];
15
+ console.log("Testing filtering logic:");
16
+ for (const file of testFiles) {
17
+ const result = loader.isDocumentationFile(file);
18
+ console.log(`${result ? "✅" : "❌"} ${file}`);
19
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * API documentation content loader (STUB - not implemented yet)
3
+ */
4
+ import { ContentLoader, type LoadResult } from "./loader.js";
5
+ import { WebSourceConfig } from "../types.js";
6
+ /**
7
+ * Content loader for API documentation (STUB IMPLEMENTATION)
8
+ */
9
+ export declare class ApiDocumentationLoader extends ContentLoader {
10
+ /**
11
+ * Check if this loader can handle the given web source type
12
+ */
13
+ canHandle(webSource: WebSourceConfig): boolean;
14
+ /**
15
+ * Validate the web source configuration
16
+ */
17
+ validateConfig(webSource: WebSourceConfig): true | string;
18
+ /**
19
+ * Load content from API documentation (NOT IMPLEMENTED)
20
+ */
21
+ load(webSource: WebSourceConfig, targetPath: string): Promise<LoadResult>;
22
+ /**
23
+ * Get content identifier (NOT IMPLEMENTED)
24
+ */
25
+ getContentId(webSource: WebSourceConfig): Promise<string>;
26
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * API documentation content loader (STUB - not implemented yet)
3
+ */
4
+ import { ContentLoader } from "./loader.js";
5
+ import { WebSourceType, WebSourceError, WebSourceErrorType } from "../types.js";
6
+ /**
7
+ * Content loader for API documentation (STUB IMPLEMENTATION)
8
+ */
9
+ export class ApiDocumentationLoader extends ContentLoader {
10
+ /**
11
+ * Check if this loader can handle the given web source type
12
+ */
13
+ canHandle(webSource) {
14
+ return webSource.type === WebSourceType.API_DOCUMENTATION;
15
+ }
16
+ /**
17
+ * Validate the web source configuration
18
+ */
19
+ validateConfig(webSource) {
20
+ if (!webSource.url) {
21
+ return "API documentation URL is required";
22
+ }
23
+ return true;
24
+ }
25
+ /**
26
+ * Load content from API documentation (NOT IMPLEMENTED)
27
+ */
28
+ async load(webSource, targetPath) {
29
+ throw new WebSourceError(
30
+ WebSourceErrorType.NOT_IMPLEMENTED,
31
+ "API documentation loading is not yet implemented. Use git_repo type for repositories with API documentation.",
32
+ { webSource: webSource.url, targetPath },
33
+ );
34
+ }
35
+ /**
36
+ * Get content identifier (NOT IMPLEMENTED)
37
+ */
38
+ async getContentId(webSource) {
39
+ throw new WebSourceError(
40
+ WebSourceErrorType.NOT_IMPLEMENTED,
41
+ "API documentation content ID generation is not yet implemented.",
42
+ { webSource: webSource.url },
43
+ );
44
+ }
45
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Content processor for converting and preparing content
3
+ */
4
+ /**
5
+ * Options for content processing
6
+ */
7
+ export interface ProcessingOptions {
8
+ /** Add frontmatter with source metadata */
9
+ addFrontmatter?: boolean;
10
+ /** Source URL to include in frontmatter */
11
+ sourceUrl?: string;
12
+ /** Additional metadata to include */
13
+ metadata?: Record<string, unknown>;
14
+ }
15
+ /**
16
+ * Content processor for converting and preparing documentation content
17
+ */
18
+ export declare class ContentProcessor {
19
+ /**
20
+ * Process a file, optionally adding frontmatter metadata
21
+ * @param filePath - Path to the file to process
22
+ * @param options - Processing options
23
+ */
24
+ processFile(filePath: string, options?: ProcessingOptions): Promise<void>;
25
+ /**
26
+ * Add frontmatter to markdown content
27
+ * @param content - Original markdown content
28
+ * @param options - Processing options with metadata
29
+ * @returns Content with frontmatter added
30
+ */
31
+ private addFrontmatter;
32
+ /**
33
+ * Simple object to YAML conversion for frontmatter
34
+ * @param obj - Object to convert
35
+ * @returns YAML string
36
+ */
37
+ private objectToYaml;
38
+ /**
39
+ * Check if a file should be processed based on its extension
40
+ * @param filePath - Path to check
41
+ * @returns True if file should be processed
42
+ */
43
+ shouldProcess(filePath: string): boolean;
44
+ }
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Content processor for converting and preparing content
3
+ */
4
+ import { promises as fs } from "node:fs";
5
+ import * as path from "node:path";
6
+ /**
7
+ * Content processor for converting and preparing documentation content
8
+ */
9
+ export class ContentProcessor {
10
+ /**
11
+ * Process a file, optionally adding frontmatter metadata
12
+ * @param filePath - Path to the file to process
13
+ * @param options - Processing options
14
+ */
15
+ async processFile(filePath, options = {}) {
16
+ const content = await fs.readFile(filePath, "utf-8");
17
+ const extension = path.extname(filePath).toLowerCase();
18
+ // For markdown files, optionally add frontmatter
19
+ if (extension === ".md" || extension === ".mdx") {
20
+ if (options.addFrontmatter) {
21
+ const processedContent = this.addFrontmatter(content, options);
22
+ await fs.writeFile(filePath, processedContent, "utf-8");
23
+ }
24
+ }
25
+ // For other file types, we keep them as-is for now
26
+ // Future: HTML to Markdown conversion would go here
27
+ }
28
+ /**
29
+ * Add frontmatter to markdown content
30
+ * @param content - Original markdown content
31
+ * @param options - Processing options with metadata
32
+ * @returns Content with frontmatter added
33
+ */
34
+ addFrontmatter(content, options) {
35
+ // Check if frontmatter already exists
36
+ if (content.startsWith("---\n")) {
37
+ return content; // Don't modify existing frontmatter
38
+ }
39
+ const frontmatter = {};
40
+ if (options.sourceUrl) {
41
+ frontmatter["source_url"] = options.sourceUrl;
42
+ }
43
+ if (options.metadata) {
44
+ Object.assign(frontmatter, options.metadata);
45
+ }
46
+ // Add processed timestamp
47
+ frontmatter["processed_at"] = new Date().toISOString();
48
+ // Convert to YAML frontmatter
49
+ const yamlFrontmatter = this.objectToYaml(frontmatter);
50
+ return `---\n${yamlFrontmatter}---\n\n${content}`;
51
+ }
52
+ /**
53
+ * Simple object to YAML conversion for frontmatter
54
+ * @param obj - Object to convert
55
+ * @returns YAML string
56
+ */
57
+ objectToYaml(obj) {
58
+ return (Object.entries(obj)
59
+ .map(([key, value]) => {
60
+ if (typeof value === "string") {
61
+ // Quote strings that contain special characters
62
+ const needsQuotes = /[:\n\r\t"']/.test(value);
63
+ return `${key}: ${needsQuotes ? `"${value.replace(/"/g, '\\"')}"` : value}`;
64
+ }
65
+ return `${key}: ${value}`;
66
+ })
67
+ .join("\n") + "\n");
68
+ }
69
+ /**
70
+ * Check if a file should be processed based on its extension
71
+ * @param filePath - Path to check
72
+ * @returns True if file should be processed
73
+ */
74
+ shouldProcess(filePath) {
75
+ const extension = path.extname(filePath).toLowerCase();
76
+ const processableExtensions = [
77
+ ".md",
78
+ ".mdx",
79
+ ".txt",
80
+ ".rst",
81
+ ".adoc",
82
+ ".asciidoc",
83
+ ];
84
+ return processableExtensions.includes(extension);
85
+ }
86
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Documentation site content loader (STUB - not implemented yet)
3
+ */
4
+ import { ContentLoader, type LoadResult } from "./loader.js";
5
+ import { WebSourceConfig } from "../types.js";
6
+ /**
7
+ * Content loader for documentation websites (STUB IMPLEMENTATION)
8
+ */
9
+ export declare class DocumentationSiteLoader extends ContentLoader {
10
+ /**
11
+ * Check if this loader can handle the given web source type
12
+ */
13
+ canHandle(webSource: WebSourceConfig): boolean;
14
+ /**
15
+ * Validate the web source configuration
16
+ */
17
+ validateConfig(webSource: WebSourceConfig): true | string;
18
+ /**
19
+ * Load content from a documentation site (NOT IMPLEMENTED)
20
+ */
21
+ load(webSource: WebSourceConfig, targetPath: string): Promise<LoadResult>;
22
+ /**
23
+ * Get content identifier (NOT IMPLEMENTED)
24
+ */
25
+ getContentId(webSource: WebSourceConfig): Promise<string>;
26
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Documentation site content loader (STUB - not implemented yet)
3
+ */
4
+ import { ContentLoader } from "./loader.js";
5
+ import { WebSourceType, WebSourceError, WebSourceErrorType } from "../types.js";
6
+ /**
7
+ * Content loader for documentation websites (STUB IMPLEMENTATION)
8
+ */
9
+ export class DocumentationSiteLoader extends ContentLoader {
10
+ /**
11
+ * Check if this loader can handle the given web source type
12
+ */
13
+ canHandle(webSource) {
14
+ return webSource.type === WebSourceType.DOCUMENTATION_SITE;
15
+ }
16
+ /**
17
+ * Validate the web source configuration
18
+ */
19
+ validateConfig(webSource) {
20
+ if (!webSource.url) {
21
+ return "Documentation site URL is required";
22
+ }
23
+ return true;
24
+ }
25
+ /**
26
+ * Load content from a documentation site (NOT IMPLEMENTED)
27
+ */
28
+ async load(webSource, targetPath) {
29
+ throw new WebSourceError(
30
+ WebSourceErrorType.NOT_IMPLEMENTED,
31
+ "Documentation site loading is not yet implemented. Use git_repo type for repositories with documentation.",
32
+ { webSource: webSource.url, targetPath },
33
+ );
34
+ }
35
+ /**
36
+ * Get content identifier (NOT IMPLEMENTED)
37
+ */
38
+ async getContentId(webSource) {
39
+ throw new WebSourceError(
40
+ WebSourceErrorType.NOT_IMPLEMENTED,
41
+ "Documentation site content ID generation is not yet implemented.",
42
+ { webSource: webSource.url },
43
+ );
44
+ }
45
+ }