agentic-knowledge-mcp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -0
- package/README.md +530 -0
- package/package.json +94 -0
- package/packages/cli/dist/cli.d.ts +5 -0
- package/packages/cli/dist/cli.js +21 -0
- package/packages/cli/dist/commands/create.d.ts +5 -0
- package/packages/cli/dist/commands/create.js +90 -0
- package/packages/cli/dist/commands/init.d.ts +5 -0
- package/packages/cli/dist/commands/init.js +182 -0
- package/packages/cli/dist/commands/refresh.d.ts +5 -0
- package/packages/cli/dist/commands/refresh.js +322 -0
- package/packages/cli/dist/commands/status.d.ts +5 -0
- package/packages/cli/dist/commands/status.js +268 -0
- package/packages/cli/dist/index.d.ts +6 -0
- package/packages/cli/dist/index.js +6 -0
- package/packages/cli/package.json +57 -0
- package/packages/content-loader/dist/__tests__/debug-filtering.d.ts +1 -0
- package/packages/content-loader/dist/__tests__/debug-filtering.js +17 -0
- package/packages/content-loader/dist/__tests__/test-filtering.d.ts +1 -0
- package/packages/content-loader/dist/__tests__/test-filtering.js +19 -0
- package/packages/content-loader/dist/content/api-documentation-loader.d.ts +26 -0
- package/packages/content-loader/dist/content/api-documentation-loader.js +45 -0
- package/packages/content-loader/dist/content/content-processor.d.ts +44 -0
- package/packages/content-loader/dist/content/content-processor.js +86 -0
- package/packages/content-loader/dist/content/documentation-site-loader.d.ts +26 -0
- package/packages/content-loader/dist/content/documentation-site-loader.js +45 -0
- package/packages/content-loader/dist/content/git-repo-loader.d.ts +79 -0
- package/packages/content-loader/dist/content/git-repo-loader.js +368 -0
- package/packages/content-loader/dist/content/index.d.ts +9 -0
- package/packages/content-loader/dist/content/index.js +9 -0
- package/packages/content-loader/dist/content/loader.d.ts +47 -0
- package/packages/content-loader/dist/content/loader.js +8 -0
- package/packages/content-loader/dist/content/metadata-manager.d.ts +65 -0
- package/packages/content-loader/dist/content/metadata-manager.js +160 -0
- package/packages/content-loader/dist/index.d.ts +5 -0
- package/packages/content-loader/dist/index.js +5 -0
- package/packages/content-loader/dist/types.d.ts +127 -0
- package/packages/content-loader/dist/types.js +48 -0
- package/packages/content-loader/package.json +50 -0
- package/packages/core/dist/config/discovery.d.ts +15 -0
- package/packages/core/dist/config/discovery.js +65 -0
- package/packages/core/dist/config/loader.d.ts +22 -0
- package/packages/core/dist/config/loader.js +236 -0
- package/packages/core/dist/config/manager.d.ts +55 -0
- package/packages/core/dist/config/manager.js +180 -0
- package/packages/core/dist/content/api-documentation-loader.d.ts +26 -0
- package/packages/core/dist/content/api-documentation-loader.js +45 -0
- package/packages/core/dist/content/content-processor.d.ts +44 -0
- package/packages/core/dist/content/content-processor.js +81 -0
- package/packages/core/dist/content/documentation-site-loader.d.ts +26 -0
- package/packages/core/dist/content/documentation-site-loader.js +45 -0
- package/packages/core/dist/content/git-repo-loader.d.ts +54 -0
- package/packages/core/dist/content/git-repo-loader.js +264 -0
- package/packages/core/dist/content/index.d.ts +9 -0
- package/packages/core/dist/content/index.js +9 -0
- package/packages/core/dist/content/loader.d.ts +50 -0
- package/packages/core/dist/content/loader.js +7 -0
- package/packages/core/dist/content/metadata-manager.d.ts +65 -0
- package/packages/core/dist/content/metadata-manager.js +160 -0
- package/packages/core/dist/index.d.ts +12 -0
- package/packages/core/dist/index.js +30 -0
- package/packages/core/dist/paths/calculator.d.ts +46 -0
- package/packages/core/dist/paths/calculator.js +166 -0
- package/packages/core/dist/templates/processor.d.ts +40 -0
- package/packages/core/dist/templates/processor.js +111 -0
- package/packages/core/dist/types.d.ts +129 -0
- package/packages/core/dist/types.js +79 -0
- package/packages/core/package.json +50 -0
- package/packages/mcp-server/dist/bin.d.ts +5 -0
- package/packages/mcp-server/dist/bin.js +10 -0
- package/packages/mcp-server/dist/cli.d.ts +7 -0
- package/packages/mcp-server/dist/cli.js +17 -0
- package/packages/mcp-server/dist/index.d.ts +8 -0
- package/packages/mcp-server/dist/index.js +9 -0
- package/packages/mcp-server/dist/server.d.ts +35 -0
- package/packages/mcp-server/dist/server.js +244 -0
- package/packages/mcp-server/package.json +54 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Create command - add new docsets using presets
|
|
3
|
+
*/
|
|
4
|
+
import { Command } from "commander";
|
|
5
|
+
import chalk from "chalk";
|
|
6
|
+
import { promises as fs } from "node:fs";
|
|
7
|
+
import * as path from "node:path";
|
|
8
|
+
import { ConfigManager } from "@codemcp/knowledge-core";
|
|
9
|
+
export const createCommand = new Command("create")
|
|
10
|
+
.description("Create a new docset using presets")
|
|
11
|
+
.requiredOption("--preset <type>", "Preset type: git-repo or local-folder")
|
|
12
|
+
.requiredOption("--id <id>", "Unique docset ID")
|
|
13
|
+
.requiredOption("--name <name>", "Human-readable docset name")
|
|
14
|
+
.option("--description <desc>", "Docset description")
|
|
15
|
+
.option("--url <url>", "Git repository URL (required for git-repo preset)")
|
|
16
|
+
.option("--path <path>", "Local folder path (required for local-folder preset)")
|
|
17
|
+
.option("--branch <branch>", "Git branch (default: main)", "main")
|
|
18
|
+
.action(async (options) => {
|
|
19
|
+
try {
|
|
20
|
+
console.log(chalk.blue("🚀 Creating new docset..."));
|
|
21
|
+
const configManager = new ConfigManager();
|
|
22
|
+
const { config, configPath } = await configManager.loadConfig(process.cwd());
|
|
23
|
+
// Check if docset ID already exists
|
|
24
|
+
if (config.docsets.find(d => d.id === options.id)) {
|
|
25
|
+
throw new Error(`Docset with ID '${options.id}' already exists`);
|
|
26
|
+
}
|
|
27
|
+
let newDocset;
|
|
28
|
+
if (options.preset === "git-repo") {
|
|
29
|
+
newDocset = await createGitRepoDocset(options);
|
|
30
|
+
}
|
|
31
|
+
else if (options.preset === "local-folder") {
|
|
32
|
+
newDocset = await createLocalFolderDocset(options);
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
throw new Error(`Unknown preset: ${options.preset}. Use 'git-repo' or 'local-folder'`);
|
|
36
|
+
}
|
|
37
|
+
// Add to config
|
|
38
|
+
config.docsets.push(newDocset);
|
|
39
|
+
await configManager.saveConfig(config, configPath);
|
|
40
|
+
console.log(chalk.green(`✅ Created docset '${options.id}' successfully`));
|
|
41
|
+
console.log(chalk.gray(` Config saved to: ${configPath}`));
|
|
42
|
+
}
|
|
43
|
+
catch (error) {
|
|
44
|
+
console.error(chalk.red("❌ Error creating docset:"), error.message);
|
|
45
|
+
process.exit(1);
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
async function createGitRepoDocset(options) {
|
|
49
|
+
if (!options.url) {
|
|
50
|
+
throw new Error("--url is required for git-repo preset");
|
|
51
|
+
}
|
|
52
|
+
// Basic URL validation
|
|
53
|
+
if (!options.url.match(/^https?:\/\/.*\.git$|^git@.*\.git$/)) {
|
|
54
|
+
throw new Error("Invalid git URL format. Expected .git URL");
|
|
55
|
+
}
|
|
56
|
+
return {
|
|
57
|
+
id: options.id,
|
|
58
|
+
name: options.name,
|
|
59
|
+
description: options.description || `Git repository: ${options.url}`,
|
|
60
|
+
web_sources: [{
|
|
61
|
+
url: options.url,
|
|
62
|
+
type: "git_repo",
|
|
63
|
+
options: {
|
|
64
|
+
branch: options.branch
|
|
65
|
+
}
|
|
66
|
+
}]
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
async function createLocalFolderDocset(options) {
|
|
70
|
+
if (!options.path) {
|
|
71
|
+
throw new Error("--path is required for local-folder preset");
|
|
72
|
+
}
|
|
73
|
+
// Validate path exists
|
|
74
|
+
const fullPath = path.resolve(options.path);
|
|
75
|
+
try {
|
|
76
|
+
const stat = await fs.stat(fullPath);
|
|
77
|
+
if (!stat.isDirectory()) {
|
|
78
|
+
throw new Error(`Path is not a directory: ${options.path}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
catch (error) {
|
|
82
|
+
throw new Error(`Path does not exist: ${options.path}`);
|
|
83
|
+
}
|
|
84
|
+
return {
|
|
85
|
+
id: options.id,
|
|
86
|
+
name: options.name,
|
|
87
|
+
description: options.description || `Local documentation: ${options.path}`,
|
|
88
|
+
local_path: options.path
|
|
89
|
+
};
|
|
90
|
+
}
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Initialize command - set up web sources for a docset using GitRepoLoader
|
|
3
|
+
*/
|
|
4
|
+
import { Command } from "commander";
|
|
5
|
+
import chalk from "chalk";
|
|
6
|
+
import { promises as fs } from "node:fs";
|
|
7
|
+
import * as path from "node:path";
|
|
8
|
+
import {
|
|
9
|
+
ConfigManager,
|
|
10
|
+
calculateLocalPath,
|
|
11
|
+
ensureKnowledgeGitignoreSync,
|
|
12
|
+
} from "@codemcp/knowledge-core";
|
|
13
|
+
import {
|
|
14
|
+
GitRepoLoader,
|
|
15
|
+
WebSourceType,
|
|
16
|
+
} from "@codemcp/knowledge-content-loader";
|
|
17
|
+
export const initCommand = new Command("init")
|
|
18
|
+
.description("Initialize web sources for a docset from configuration")
|
|
19
|
+
.argument("<docset-id>", "ID of the docset to initialize")
|
|
20
|
+
.option("-c, --config <path>", "Path to configuration file")
|
|
21
|
+
.option("--force", "Force re-initialization even if already exists", false)
|
|
22
|
+
.action(async (docsetId, options) => {
|
|
23
|
+
console.log(chalk.blue("🚀 Agentic Knowledge Integration Test"));
|
|
24
|
+
try {
|
|
25
|
+
// Use ConfigManager for all config operations
|
|
26
|
+
const configManager = new ConfigManager();
|
|
27
|
+
const { config, configPath } = await configManager.loadConfig(
|
|
28
|
+
process.cwd(),
|
|
29
|
+
);
|
|
30
|
+
// Ensure .knowledge/.gitignore exists and contains docsets/ ignore rule
|
|
31
|
+
ensureKnowledgeGitignoreSync(configPath);
|
|
32
|
+
const docset = config.docsets.find((d) => d.id === docsetId);
|
|
33
|
+
if (!docset) {
|
|
34
|
+
throw new Error(
|
|
35
|
+
`Docset '${docsetId}' not found in configuration. Available: ${config.docsets.map((d) => d.id).join(", ")}`,
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
if (!docset.web_sources || docset.web_sources.length === 0) {
|
|
39
|
+
throw new Error(`Docset '${docsetId}' has no web sources configured`);
|
|
40
|
+
}
|
|
41
|
+
console.log(chalk.green(`✅ Found docset: ${docset.name}`));
|
|
42
|
+
console.log(chalk.gray(`📝 Description: ${docset.description}`));
|
|
43
|
+
console.log(chalk.gray(`🔗 Web sources: ${docset.web_sources.length}`));
|
|
44
|
+
// Calculate the local path for this docset
|
|
45
|
+
const localPath = calculateLocalPath(docset, configPath);
|
|
46
|
+
console.log(chalk.yellow(`\n📁 Target directory: ${localPath}`));
|
|
47
|
+
// Check if already exists
|
|
48
|
+
let existsAlready = false;
|
|
49
|
+
try {
|
|
50
|
+
const stat = await fs.stat(localPath);
|
|
51
|
+
if (stat.isDirectory()) {
|
|
52
|
+
existsAlready = true;
|
|
53
|
+
}
|
|
54
|
+
} catch {
|
|
55
|
+
// Directory doesn't exist, which is fine
|
|
56
|
+
}
|
|
57
|
+
if (existsAlready && !options.force) {
|
|
58
|
+
console.log(
|
|
59
|
+
chalk.yellow(
|
|
60
|
+
"⚠️ Directory already exists. Use --force to overwrite.",
|
|
61
|
+
),
|
|
62
|
+
);
|
|
63
|
+
const files = await fs.readdir(localPath);
|
|
64
|
+
console.log(
|
|
65
|
+
chalk.gray(
|
|
66
|
+
`Existing files: ${files.slice(0, 5).join(", ")}${files.length > 5 ? "..." : ""}`,
|
|
67
|
+
),
|
|
68
|
+
);
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
// Create target directory
|
|
72
|
+
await fs.mkdir(localPath, { recursive: true });
|
|
73
|
+
let totalFiles = 0;
|
|
74
|
+
const allDiscoveredPaths = [];
|
|
75
|
+
// Process each web source
|
|
76
|
+
for (const [index, webSource] of docset.web_sources.entries()) {
|
|
77
|
+
console.log(
|
|
78
|
+
chalk.yellow(
|
|
79
|
+
`\n🔄 Loading source ${index + 1}/${docset.web_sources.length}: ${webSource.url}`,
|
|
80
|
+
),
|
|
81
|
+
);
|
|
82
|
+
if (webSource.type === "git_repo") {
|
|
83
|
+
// Use GitRepoLoader for all Git operations (REQ-19)
|
|
84
|
+
const loader = new GitRepoLoader();
|
|
85
|
+
console.log(
|
|
86
|
+
chalk.gray(` Using GitRepoLoader for smart content filtering`),
|
|
87
|
+
);
|
|
88
|
+
const webSourceConfig = {
|
|
89
|
+
url: webSource.url,
|
|
90
|
+
type: WebSourceType.GIT_REPO,
|
|
91
|
+
options: webSource.options || {},
|
|
92
|
+
};
|
|
93
|
+
// Validate configuration
|
|
94
|
+
const validation = loader.validateConfig(webSourceConfig);
|
|
95
|
+
if (validation !== true) {
|
|
96
|
+
throw new Error(
|
|
97
|
+
`Invalid Git repository configuration: ${validation}`,
|
|
98
|
+
);
|
|
99
|
+
}
|
|
100
|
+
// Load content using GitRepoLoader
|
|
101
|
+
const result = await loader.load(webSourceConfig, localPath);
|
|
102
|
+
if (!result.success) {
|
|
103
|
+
throw new Error(`Git repository loading failed: ${result.error}`);
|
|
104
|
+
}
|
|
105
|
+
// Collect discovered paths for config update
|
|
106
|
+
allDiscoveredPaths.push(...result.files);
|
|
107
|
+
totalFiles += result.files.length;
|
|
108
|
+
console.log(
|
|
109
|
+
chalk.green(
|
|
110
|
+
` ✅ Copied ${result.files.length} files using smart filtering`,
|
|
111
|
+
),
|
|
112
|
+
);
|
|
113
|
+
// Create source metadata
|
|
114
|
+
const metadata = {
|
|
115
|
+
source_url: webSource.url,
|
|
116
|
+
source_type: webSource.type,
|
|
117
|
+
downloaded_at: new Date().toISOString(),
|
|
118
|
+
files_count: result.files.length,
|
|
119
|
+
files: result.files,
|
|
120
|
+
docset_id: docsetId,
|
|
121
|
+
content_hash: result.contentHash,
|
|
122
|
+
};
|
|
123
|
+
await fs.writeFile(
|
|
124
|
+
path.join(localPath, `.agentic-source-${index}.json`),
|
|
125
|
+
JSON.stringify(metadata, null, 2),
|
|
126
|
+
);
|
|
127
|
+
} else {
|
|
128
|
+
console.log(
|
|
129
|
+
chalk.red(
|
|
130
|
+
` ❌ Web source type '${webSource.type}' not yet supported`,
|
|
131
|
+
),
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
// Create overall metadata
|
|
136
|
+
const overallMetadata = {
|
|
137
|
+
docset_id: docsetId,
|
|
138
|
+
docset_name: docset.name,
|
|
139
|
+
initialized_at: new Date().toISOString(),
|
|
140
|
+
total_files: totalFiles,
|
|
141
|
+
web_sources_count: docset.web_sources.length,
|
|
142
|
+
};
|
|
143
|
+
await fs.writeFile(
|
|
144
|
+
path.join(localPath, ".agentic-metadata.json"),
|
|
145
|
+
JSON.stringify(overallMetadata, null, 2),
|
|
146
|
+
);
|
|
147
|
+
// Update configuration with discovered paths (only if paths were discovered and force flag used)
|
|
148
|
+
if (allDiscoveredPaths.length > 0 && options.force) {
|
|
149
|
+
console.log(
|
|
150
|
+
chalk.yellow(`\n📝 Updating configuration with discovered paths...`),
|
|
151
|
+
);
|
|
152
|
+
try {
|
|
153
|
+
await configManager.updateDocsetPaths(docsetId, allDiscoveredPaths);
|
|
154
|
+
console.log(
|
|
155
|
+
chalk.green(
|
|
156
|
+
` ✅ Updated config with ${allDiscoveredPaths.length} discovered paths`,
|
|
157
|
+
),
|
|
158
|
+
);
|
|
159
|
+
} catch (configError) {
|
|
160
|
+
console.log(
|
|
161
|
+
chalk.yellow(
|
|
162
|
+
` ⚠️ Could not update config: ${configError instanceof Error ? configError.message : String(configError)}`,
|
|
163
|
+
),
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
console.log(
|
|
168
|
+
chalk.green(`\n🎉 Successfully initialized docset '${docsetId}'`),
|
|
169
|
+
);
|
|
170
|
+
console.log(chalk.gray(`📁 Location: ${localPath}`));
|
|
171
|
+
console.log(chalk.gray(`📄 Total files: ${totalFiles}`));
|
|
172
|
+
console.log(
|
|
173
|
+
chalk.gray(`🔗 Sources processed: ${docset.web_sources.length}`),
|
|
174
|
+
);
|
|
175
|
+
} catch (error) {
|
|
176
|
+
console.error(chalk.red("\n❌ Error:"));
|
|
177
|
+
console.error(
|
|
178
|
+
chalk.red(error instanceof Error ? error.message : String(error)),
|
|
179
|
+
);
|
|
180
|
+
process.exit(1);
|
|
181
|
+
}
|
|
182
|
+
});
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Refresh command - update web sources for docsets
|
|
3
|
+
*/
|
|
4
|
+
import { Command } from "commander";
|
|
5
|
+
import chalk from "chalk";
|
|
6
|
+
import ora from "ora";
|
|
7
|
+
import { promises as fs } from "node:fs";
|
|
8
|
+
import * as path from "node:path";
|
|
9
|
+
import { execSync } from "node:child_process";
|
|
10
|
+
import {
|
|
11
|
+
findConfigPathSync,
|
|
12
|
+
loadConfigSync,
|
|
13
|
+
calculateLocalPath,
|
|
14
|
+
ensureKnowledgeGitignoreSync,
|
|
15
|
+
} from "@codemcp/knowledge-core";
|
|
16
|
+
export const refreshCommand = new Command("refresh")
|
|
17
|
+
.description("Refresh web sources for docsets")
|
|
18
|
+
.argument(
|
|
19
|
+
"[docset-id]",
|
|
20
|
+
"ID of specific docset to refresh (refresh all if not specified)",
|
|
21
|
+
)
|
|
22
|
+
.option("-c, --config <path>", "Path to configuration file")
|
|
23
|
+
.option("-f, --force", "Force refresh even if content unchanged", false)
|
|
24
|
+
.action(async (docsetId, options) => {
|
|
25
|
+
console.log(chalk.blue("🔄 Agentic Knowledge Refresh"));
|
|
26
|
+
try {
|
|
27
|
+
// Find and load configuration
|
|
28
|
+
const configPath = options.config || findConfigPathSync(process.cwd());
|
|
29
|
+
if (!configPath) {
|
|
30
|
+
throw new Error(
|
|
31
|
+
"No configuration file found. Run this command from a directory with .knowledge/config.yaml",
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
console.log(chalk.gray(`📄 Loading config: ${configPath}`));
|
|
35
|
+
const config = loadConfigSync(configPath);
|
|
36
|
+
// Ensure .knowledge/.gitignore exists
|
|
37
|
+
ensureKnowledgeGitignoreSync(configPath);
|
|
38
|
+
// Determine which docsets to refresh
|
|
39
|
+
const docsetsToRefresh = docsetId
|
|
40
|
+
? config.docsets.filter((d) => d.id === docsetId)
|
|
41
|
+
: config.docsets.filter(
|
|
42
|
+
(d) => d.web_sources && d.web_sources.length > 0,
|
|
43
|
+
);
|
|
44
|
+
if (docsetsToRefresh.length === 0) {
|
|
45
|
+
if (docsetId) {
|
|
46
|
+
throw new Error(
|
|
47
|
+
`Docset '${docsetId}' not found or has no web sources. Available docsets with web sources: ${
|
|
48
|
+
config.docsets
|
|
49
|
+
.filter((d) => d.web_sources && d.web_sources.length > 0)
|
|
50
|
+
.map((d) => d.id)
|
|
51
|
+
.join(", ") || "none"
|
|
52
|
+
}`,
|
|
53
|
+
);
|
|
54
|
+
} else {
|
|
55
|
+
console.log(chalk.yellow("⚠️ No docsets with web sources found."));
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
console.log(
|
|
60
|
+
chalk.green(
|
|
61
|
+
`✅ Found ${docsetsToRefresh.length} docset(s) to refresh: ${docsetsToRefresh.map((d) => d.id).join(", ")}`,
|
|
62
|
+
),
|
|
63
|
+
);
|
|
64
|
+
// Refresh each docset
|
|
65
|
+
for (const docset of docsetsToRefresh) {
|
|
66
|
+
await refreshDocset(docset, configPath, options.force);
|
|
67
|
+
}
|
|
68
|
+
console.log(chalk.green("\n🎉 All docsets refreshed successfully!"));
|
|
69
|
+
} catch (error) {
|
|
70
|
+
console.error(chalk.red("\n❌ Error:"));
|
|
71
|
+
console.error(
|
|
72
|
+
chalk.red(error instanceof Error ? error.message : String(error)),
|
|
73
|
+
);
|
|
74
|
+
process.exit(1);
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
async function refreshDocset(docset, configPath, force) {
|
|
78
|
+
const spinner = ora(`Refreshing ${docset.id}...`).start();
|
|
79
|
+
try {
|
|
80
|
+
const localPath = calculateLocalPath(docset, configPath);
|
|
81
|
+
spinner.text = `Checking ${docset.id} metadata...`;
|
|
82
|
+
// Check if docset has been initialized
|
|
83
|
+
const metadataPath = path.join(localPath, ".agentic-metadata.json");
|
|
84
|
+
let metadata = null;
|
|
85
|
+
try {
|
|
86
|
+
const metadataContent = await fs.readFile(metadataPath, "utf8");
|
|
87
|
+
metadata = JSON.parse(metadataContent);
|
|
88
|
+
} catch {
|
|
89
|
+
spinner.warn(`${docset.id}: Not initialized, use 'init' command first`);
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
// Check if forced or if we should check for updates
|
|
93
|
+
if (!force && metadata) {
|
|
94
|
+
const lastRefresh = metadata.last_refreshed || metadata.initialized_at;
|
|
95
|
+
const lastRefreshTime = new Date(lastRefresh);
|
|
96
|
+
const timeSinceRefresh = Date.now() - lastRefreshTime.getTime();
|
|
97
|
+
const hoursSince = timeSinceRefresh / (1000 * 60 * 60);
|
|
98
|
+
if (hoursSince < 1) {
|
|
99
|
+
spinner.succeed(
|
|
100
|
+
`${docset.id}: Recently refreshed (${Math.round(hoursSince * 60)} minutes ago), skipping`,
|
|
101
|
+
);
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
spinner.text = `Refreshing ${docset.id} web sources...`;
|
|
106
|
+
// Create backup of current metadata
|
|
107
|
+
const backupPath = path.join(localPath, `.agentic-metadata.backup.json`);
|
|
108
|
+
await fs.copyFile(metadataPath, backupPath);
|
|
109
|
+
let totalFiles = 0;
|
|
110
|
+
const refreshedSources = [];
|
|
111
|
+
// Process each web source
|
|
112
|
+
for (const [index, webSource] of (docset.web_sources || []).entries()) {
|
|
113
|
+
spinner.text = `${docset.id}: Refreshing source ${index + 1}/${docset.web_sources.length}...`;
|
|
114
|
+
if (webSource.type === "git_repo") {
|
|
115
|
+
const sourceFiles = await refreshGitSource(
|
|
116
|
+
webSource,
|
|
117
|
+
localPath,
|
|
118
|
+
index,
|
|
119
|
+
docset.id,
|
|
120
|
+
force,
|
|
121
|
+
);
|
|
122
|
+
totalFiles += sourceFiles.files_count;
|
|
123
|
+
refreshedSources.push(sourceFiles);
|
|
124
|
+
} else {
|
|
125
|
+
console.log(
|
|
126
|
+
chalk.yellow(
|
|
127
|
+
` ⚠️ Web source type '${webSource.type}' not yet supported, skipping`,
|
|
128
|
+
),
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
// Update metadata
|
|
133
|
+
if (!metadata) {
|
|
134
|
+
throw new Error("Metadata is null - this should not happen");
|
|
135
|
+
}
|
|
136
|
+
const updatedMetadata = {
|
|
137
|
+
docset_id: metadata.docset_id,
|
|
138
|
+
docset_name: metadata.docset_name,
|
|
139
|
+
initialized_at: metadata.initialized_at,
|
|
140
|
+
last_refreshed: new Date().toISOString(),
|
|
141
|
+
total_files: totalFiles,
|
|
142
|
+
web_sources_count: docset.web_sources?.length || 0,
|
|
143
|
+
};
|
|
144
|
+
await fs.writeFile(metadataPath, JSON.stringify(updatedMetadata, null, 2));
|
|
145
|
+
// Remove backup if successful
|
|
146
|
+
await fs.unlink(backupPath);
|
|
147
|
+
spinner.succeed(
|
|
148
|
+
`${docset.id}: Refreshed successfully (${totalFiles} files from ${refreshedSources.length} sources)`,
|
|
149
|
+
);
|
|
150
|
+
} catch (error) {
|
|
151
|
+
spinner.fail(
|
|
152
|
+
`${docset.id}: Failed to refresh - ${error instanceof Error ? error.message : String(error)}`,
|
|
153
|
+
);
|
|
154
|
+
// Try to restore from backup
|
|
155
|
+
const backupPath = path.join(
|
|
156
|
+
calculateLocalPath(docset, configPath),
|
|
157
|
+
`.agentic-metadata.backup.json`,
|
|
158
|
+
);
|
|
159
|
+
try {
|
|
160
|
+
const metadataPath = path.join(
|
|
161
|
+
calculateLocalPath(docset, configPath),
|
|
162
|
+
".agentic-metadata.json",
|
|
163
|
+
);
|
|
164
|
+
await fs.copyFile(backupPath, metadataPath);
|
|
165
|
+
await fs.unlink(backupPath);
|
|
166
|
+
console.log(chalk.gray(` Restored metadata from backup`));
|
|
167
|
+
} catch {
|
|
168
|
+
// Backup restore failed, but don't throw
|
|
169
|
+
}
|
|
170
|
+
throw error;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
async function refreshGitSource(webSource, localPath, index, docsetId, force) {
|
|
174
|
+
// Check existing source metadata
|
|
175
|
+
const sourceMetadataPath = path.join(
|
|
176
|
+
localPath,
|
|
177
|
+
`.agentic-source-${index}.json`,
|
|
178
|
+
);
|
|
179
|
+
let existingSourceMetadata = null;
|
|
180
|
+
try {
|
|
181
|
+
const content = await fs.readFile(sourceMetadataPath, "utf8");
|
|
182
|
+
existingSourceMetadata = JSON.parse(content);
|
|
183
|
+
} catch {
|
|
184
|
+
// No existing metadata, will do full refresh
|
|
185
|
+
}
|
|
186
|
+
// Create temp directory for cloning
|
|
187
|
+
const tempDir = path.join(localPath, ".tmp", `git-refresh-${Date.now()}`);
|
|
188
|
+
await fs.mkdir(tempDir, { recursive: true });
|
|
189
|
+
try {
|
|
190
|
+
// Clone repository
|
|
191
|
+
const options = webSource.options || {};
|
|
192
|
+
const branch = options.branch || "main";
|
|
193
|
+
const paths = options.paths || [];
|
|
194
|
+
execSync(
|
|
195
|
+
`git clone --depth 1 --branch ${branch} ${webSource.url} ${tempDir}`,
|
|
196
|
+
{
|
|
197
|
+
stdio: "pipe",
|
|
198
|
+
timeout: 60000,
|
|
199
|
+
},
|
|
200
|
+
);
|
|
201
|
+
// Get latest commit hash for change detection
|
|
202
|
+
const latestCommit = execSync("git rev-parse HEAD", {
|
|
203
|
+
cwd: tempDir,
|
|
204
|
+
encoding: "utf8",
|
|
205
|
+
}).trim();
|
|
206
|
+
// Check if we need to update (compare with last known commit if available)
|
|
207
|
+
if (!force && existingSourceMetadata) {
|
|
208
|
+
const lastCommit = existingSourceMetadata.last_commit;
|
|
209
|
+
if (lastCommit === latestCommit) {
|
|
210
|
+
// No changes, update timestamp only
|
|
211
|
+
const updatedMetadata = {
|
|
212
|
+
...existingSourceMetadata,
|
|
213
|
+
downloaded_at: new Date().toISOString(),
|
|
214
|
+
};
|
|
215
|
+
await fs.writeFile(
|
|
216
|
+
sourceMetadataPath,
|
|
217
|
+
JSON.stringify(updatedMetadata, null, 2),
|
|
218
|
+
);
|
|
219
|
+
return updatedMetadata;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
// Remove old files from this source (if we have metadata)
|
|
223
|
+
if (existingSourceMetadata) {
|
|
224
|
+
for (const file of existingSourceMetadata.files) {
|
|
225
|
+
const filePath = path.join(localPath, file);
|
|
226
|
+
try {
|
|
227
|
+
await fs.unlink(filePath);
|
|
228
|
+
} catch {
|
|
229
|
+
// File might already be deleted, ignore
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
// Copy new files
|
|
234
|
+
const filesToCopy = [];
|
|
235
|
+
if (paths.length > 0) {
|
|
236
|
+
// Copy specified paths
|
|
237
|
+
for (const relPath of paths) {
|
|
238
|
+
const sourcePath = path.join(tempDir, relPath);
|
|
239
|
+
const targetPath = path.join(localPath, relPath);
|
|
240
|
+
try {
|
|
241
|
+
const stat = await fs.stat(sourcePath);
|
|
242
|
+
if (stat.isDirectory()) {
|
|
243
|
+
const dirFiles = await copyDirectory(sourcePath, targetPath);
|
|
244
|
+
filesToCopy.push(...dirFiles);
|
|
245
|
+
} else {
|
|
246
|
+
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
|
247
|
+
await fs.copyFile(sourcePath, targetPath);
|
|
248
|
+
filesToCopy.push(relPath);
|
|
249
|
+
}
|
|
250
|
+
} catch (error) {
|
|
251
|
+
console.log(
|
|
252
|
+
chalk.yellow(
|
|
253
|
+
` ⚠️ Skipping ${relPath}: ${error instanceof Error ? error.message : String(error)}`,
|
|
254
|
+
),
|
|
255
|
+
);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
} else {
|
|
259
|
+
// Copy all markdown files
|
|
260
|
+
const allFiles = await findMarkdownFiles(tempDir);
|
|
261
|
+
for (const file of allFiles) {
|
|
262
|
+
const relativePath = path.relative(tempDir, file);
|
|
263
|
+
const targetPath = path.join(localPath, relativePath);
|
|
264
|
+
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
|
265
|
+
await fs.copyFile(file, targetPath);
|
|
266
|
+
filesToCopy.push(relativePath);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
// Create updated source metadata
|
|
270
|
+
const metadata = {
|
|
271
|
+
source_url: webSource.url,
|
|
272
|
+
source_type: webSource.type,
|
|
273
|
+
downloaded_at: new Date().toISOString(),
|
|
274
|
+
files_count: filesToCopy.length,
|
|
275
|
+
files: filesToCopy,
|
|
276
|
+
docset_id: docsetId,
|
|
277
|
+
last_commit: latestCommit,
|
|
278
|
+
};
|
|
279
|
+
await fs.writeFile(sourceMetadataPath, JSON.stringify(metadata, null, 2));
|
|
280
|
+
return metadata;
|
|
281
|
+
} finally {
|
|
282
|
+
// Cleanup temp directory
|
|
283
|
+
await fs.rm(tempDir, { recursive: true, force: true });
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
// Reuse utility functions from init.ts
|
|
287
|
+
async function findMarkdownFiles(dir) {
|
|
288
|
+
const files = [];
|
|
289
|
+
async function scan(currentDir) {
|
|
290
|
+
const items = await fs.readdir(currentDir);
|
|
291
|
+
for (const item of items) {
|
|
292
|
+
if (item.startsWith(".git")) continue;
|
|
293
|
+
const fullPath = path.join(currentDir, item);
|
|
294
|
+
const stat = await fs.stat(fullPath);
|
|
295
|
+
if (stat.isDirectory()) {
|
|
296
|
+
await scan(fullPath);
|
|
297
|
+
} else if (item.endsWith(".md") || item.endsWith(".mdx")) {
|
|
298
|
+
files.push(fullPath);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
await scan(dir);
|
|
303
|
+
return files;
|
|
304
|
+
}
|
|
305
|
+
async function copyDirectory(source, target) {
|
|
306
|
+
const files = [];
|
|
307
|
+
await fs.mkdir(target, { recursive: true });
|
|
308
|
+
const items = await fs.readdir(source);
|
|
309
|
+
for (const item of items) {
|
|
310
|
+
const sourcePath = path.join(source, item);
|
|
311
|
+
const targetPath = path.join(target, item);
|
|
312
|
+
const stat = await fs.stat(sourcePath);
|
|
313
|
+
if (stat.isDirectory()) {
|
|
314
|
+
const subFiles = await copyDirectory(sourcePath, targetPath);
|
|
315
|
+
files.push(...subFiles.map((f) => path.join(item, f)));
|
|
316
|
+
} else {
|
|
317
|
+
await fs.copyFile(sourcePath, targetPath);
|
|
318
|
+
files.push(item);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
return files;
|
|
322
|
+
}
|