solo-doc 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -1
- package/dist/bin/solo-doc.js +6 -46
- package/dist/src/commands/VSCommand.js +2 -32
- package/dist/src/utils/multiUrlCrawler.js +118 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -44,6 +44,16 @@ solo-doc "https://docs.alauda.io/container_platform/4.2/developer/building_appli
|
|
|
44
44
|
solo-doc "https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html-single/building_applications/index"
|
|
45
45
|
```
|
|
46
46
|
|
|
47
|
+
### 🔗 多地址聚合 (Multi-URL Aggregation)
|
|
48
|
+
|
|
49
|
+
支持一次性爬取多个 URL 并将其合并为一个 Markdown 文件。多个地址之间使用逗号分隔。
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# 爬取多个 URL 并合并
|
|
53
|
+
# 将依次爬取 url1 和 url2,合并输出到 combined.md
|
|
54
|
+
solo-doc "https://url1/...,https://url2/..." -o combined.md
|
|
55
|
+
```
|
|
56
|
+
|
|
47
57
|
### 📝 自定义输出文件名
|
|
48
58
|
|
|
49
59
|
使用 `-o` 参数指定自定义输出路径。
|
|
@@ -112,8 +122,33 @@ solo-doc vs \
|
|
|
112
122
|
solo-doc vs <url1> <url2>
|
|
113
123
|
```
|
|
114
124
|
|
|
125
|
+
#### 📂 本地文件与多源聚合
|
|
126
|
+
|
|
127
|
+
VS 模式支持直接使用本地 Markdown 文件进行对比,无需重复爬取。同时也支持将多个源(URL 或本地文件)聚合后再进行对比。
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# 1. 对比多个在线文档 (聚合对比)
|
|
131
|
+
# 场景:将 v1.0 的多个模块文档聚合,与 v2.0 的对应模块进行对比
|
|
132
|
+
solo-doc vs \
|
|
133
|
+
"https://docs.site/v1/module-a,https://docs.site/v1/module-b" \
|
|
134
|
+
"https://docs.site/v2/module-a,https://docs.site/v2/module-b"
|
|
135
|
+
|
|
136
|
+
# 2. 对比本地文件与在线文档
|
|
137
|
+
solo-doc vs ./local-draft.md "https://docs.prod.com/..."
|
|
138
|
+
|
|
139
|
+
# 3. 多文件聚合对比
|
|
140
|
+
# 将 part1.md 和 part2.md 合并作为基准,与 target.md 对比
|
|
141
|
+
solo-doc vs ./part1.md,./part2.md ./target.md
|
|
142
|
+
|
|
143
|
+
# 4. 混合使用 (URL + 本地文件)
|
|
144
|
+
solo-doc vs ./intro.md,"https://docs.site/chapter1" ./v2-draft.md
|
|
145
|
+
```
|
|
146
|
+
|
|
115
147
|
此命令将按顺序执行:
|
|
116
|
-
1.
|
|
148
|
+
1. **数据准备**:
|
|
149
|
+
- 如果是 URL:自动爬取并保存为 Markdown。
|
|
150
|
+
- 如果是本地文件:直接读取内容。
|
|
151
|
+
- 如果是多个源:按顺序合并为一个聚合文件。
|
|
117
152
|
2. **提取目录**: 提取两个文档的目录树结构。
|
|
118
153
|
3. **AI 分析**: 调用配置的 AI 模型,根据 `solo-doc-prompt.md` 定义的提示词进行两步分析:
|
|
119
154
|
> **提示**: `solo-doc` 会优先使用当前执行目录下的 `solo-doc-prompt.md` 文件。你可以复制默认模板到当前目录进行自定义修改。若当前目录不存在该文件,则使用内置默认模板。
|
package/dist/bin/solo-doc.js
CHANGED
|
@@ -5,12 +5,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
};
|
|
6
6
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
7
|
const commander_1 = require("commander");
|
|
8
|
-
const CrawlerContext_1 = require("../src/CrawlerContext");
|
|
9
|
-
const OCPStrategy_1 = require("../src/strategies/OCPStrategy");
|
|
10
|
-
const ACPStrategy_1 = require("../src/strategies/ACPStrategy");
|
|
11
|
-
const StrategyDetector_1 = require("../src/utils/StrategyDetector");
|
|
12
|
-
const filename_1 = require("../src/utils/filename");
|
|
13
8
|
const VSCommand_1 = require("../src/commands/VSCommand");
|
|
9
|
+
const multiUrlCrawler_1 = require("../src/utils/multiUrlCrawler");
|
|
14
10
|
const chalk_1 = __importDefault(require("chalk"));
|
|
15
11
|
const path_1 = __importDefault(require("path"));
|
|
16
12
|
const fs_1 = __importDefault(require("fs"));
|
|
@@ -63,48 +59,12 @@ program
|
|
|
63
59
|
.option('-f, --force', 'Force overwrite existing file')
|
|
64
60
|
.action(async (url, options) => {
|
|
65
61
|
try {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
const detected = StrategyDetector_1.StrategyDetector.detect(url);
|
|
70
|
-
if (detected !== StrategyDetector_1.StrategyType.UNKNOWN) {
|
|
71
|
-
type = detected;
|
|
72
|
-
console.log(chalk_1.default.blue(`[Solo-Doc] Auto-detected strategy: ${type.toUpperCase()}`));
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
if (!type || (type !== 'ocp' && type !== 'acp')) {
|
|
76
|
-
console.error(chalk_1.default.red('Error: Could not detect documentation type.'));
|
|
77
|
-
console.error(chalk_1.default.yellow('Please use --type <ocp|acp> to specify the documentation type manually.'));
|
|
78
|
-
process.exit(1);
|
|
79
|
-
}
|
|
80
|
-
// 2. Instantiate Strategy
|
|
81
|
-
let strategy;
|
|
82
|
-
let defaultOutput;
|
|
83
|
-
if (type === 'ocp' || type === StrategyDetector_1.StrategyType.OCP) {
|
|
84
|
-
strategy = new OCPStrategy_1.OCPStrategy();
|
|
85
|
-
defaultOutput = (0, filename_1.generateDefaultFilename)(url, 'ocp');
|
|
86
|
-
}
|
|
87
|
-
else {
|
|
88
|
-
strategy = new ACPStrategy_1.ACPStrategy();
|
|
89
|
-
defaultOutput = (0, filename_1.generateDefaultFilename)(url, 'acp');
|
|
90
|
-
}
|
|
91
|
-
// 3. Prepare Context
|
|
92
|
-
const context = new CrawlerContext_1.CrawlerContext(strategy);
|
|
93
|
-
const outputPath = path_1.default.resolve(process.cwd(), options.output || defaultOutput);
|
|
94
|
-
// Check if file exists
|
|
95
|
-
if (fs_1.default.existsSync(outputPath) && !options.force) {
|
|
96
|
-
console.log(chalk_1.default.yellow('--------------------------------------------------'));
|
|
97
|
-
console.log(chalk_1.default.yellow(`ℹ File already exists: ${outputPath}`));
|
|
98
|
-
console.log(chalk_1.default.yellow(' Skipping crawl to save time.'));
|
|
99
|
-
console.log(chalk_1.default.gray(' Use --force or -f to overwrite.'));
|
|
100
|
-
console.log(chalk_1.default.yellow('--------------------------------------------------'));
|
|
101
|
-
return;
|
|
102
|
-
}
|
|
103
|
-
// 4. Run
|
|
104
|
-
await context.run(url, {
|
|
105
|
-
output: outputPath,
|
|
62
|
+
await (0, multiUrlCrawler_1.crawlAndAggregate)(url, {
|
|
63
|
+
output: options.output,
|
|
64
|
+
type: options.type,
|
|
106
65
|
limit: options.limit,
|
|
107
|
-
headless: options.headless
|
|
66
|
+
headless: options.headless,
|
|
67
|
+
force: options.force
|
|
108
68
|
});
|
|
109
69
|
}
|
|
110
70
|
catch (error) {
|
|
@@ -8,14 +8,10 @@ const fs_1 = __importDefault(require("fs"));
|
|
|
8
8
|
const path_1 = __importDefault(require("path"));
|
|
9
9
|
const chalk_1 = __importDefault(require("chalk"));
|
|
10
10
|
const ora_1 = __importDefault(require("ora"));
|
|
11
|
-
const CrawlerContext_1 = require("../CrawlerContext");
|
|
12
|
-
const OCPStrategy_1 = require("../strategies/OCPStrategy");
|
|
13
|
-
const ACPStrategy_1 = require("../strategies/ACPStrategy");
|
|
14
|
-
const StrategyDetector_1 = require("../utils/StrategyDetector");
|
|
15
|
-
const filename_1 = require("../utils/filename");
|
|
16
11
|
const TocExtractor_1 = require("../utils/TocExtractor");
|
|
17
12
|
const AIClient_1 = require("../ai/AIClient");
|
|
18
13
|
const config_1 = require("../utils/config");
|
|
14
|
+
const multiUrlCrawler_1 = require("../utils/multiUrlCrawler");
|
|
19
15
|
class VSCommand {
|
|
20
16
|
static async run(baselineUrl, targetUrl, options) {
|
|
21
17
|
console.log(chalk_1.default.blue(`[VS Mode] Starting comparison between:`));
|
|
@@ -124,33 +120,7 @@ ${result1}
|
|
|
124
120
|
console.log(chalk_1.default.green(`[VS Mode] All tasks finished.`));
|
|
125
121
|
}
|
|
126
122
|
static async crawlUrl(url, prefix) {
|
|
127
|
-
|
|
128
|
-
let type = StrategyDetector_1.StrategyDetector.detect(url);
|
|
129
|
-
let strategy;
|
|
130
|
-
// Simple logic: if detects OCP, use OCP. Else ACP (more generic).
|
|
131
|
-
if (type === StrategyDetector_1.StrategyType.OCP) {
|
|
132
|
-
strategy = new OCPStrategy_1.OCPStrategy();
|
|
133
|
-
}
|
|
134
|
-
else {
|
|
135
|
-
// Default to ACP which uses Puppeteer
|
|
136
|
-
strategy = new ACPStrategy_1.ACPStrategy();
|
|
137
|
-
}
|
|
138
|
-
const filename = (0, filename_1.generateDefaultFilename)(url, prefix);
|
|
139
|
-
const outputPath = path_1.default.resolve(process.cwd(), filename);
|
|
140
|
-
// Check if file exists
|
|
141
|
-
if (fs_1.default.existsSync(outputPath)) {
|
|
142
|
-
console.log(chalk_1.default.yellow('--------------------------------------------------'));
|
|
143
|
-
console.log(chalk_1.default.yellow(`ℹ File already exists: ${outputPath}`));
|
|
144
|
-
console.log(chalk_1.default.yellow(' Using cached version for comparison.'));
|
|
145
|
-
console.log(chalk_1.default.yellow('--------------------------------------------------'));
|
|
146
|
-
return outputPath;
|
|
147
|
-
}
|
|
148
|
-
console.log(chalk_1.default.blue(`[VS Mode] Crawling ${url} -> ${filename}...`));
|
|
149
|
-
const context = new CrawlerContext_1.CrawlerContext(strategy);
|
|
150
|
-
// Suppress console log from crawler to keep output clean?
|
|
151
|
-
// Or keep it to show progress. Keep it.
|
|
152
|
-
await context.run(url, { output: outputPath, headless: true });
|
|
153
|
-
return outputPath;
|
|
123
|
+
return await (0, multiUrlCrawler_1.crawlAndAggregate)(url, { prefix });
|
|
154
124
|
}
|
|
155
125
|
static parsePrompts(content) {
|
|
156
126
|
// Match content inside ``` ... ``` blocks that follow "Prompt模板"
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.crawlAndAggregate = crawlAndAggregate;
|
|
7
|
+
const fs_1 = __importDefault(require("fs"));
|
|
8
|
+
const path_1 = __importDefault(require("path"));
|
|
9
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
10
|
+
const StrategyDetector_1 = require("./StrategyDetector");
|
|
11
|
+
const OCPStrategy_1 = require("../strategies/OCPStrategy");
|
|
12
|
+
const ACPStrategy_1 = require("../strategies/ACPStrategy");
|
|
13
|
+
const filename_1 = require("./filename");
|
|
14
|
+
async function crawlAndAggregate(urlInput, options) {
|
|
15
|
+
const inputs = urlInput.split(',').map(u => u.trim()).filter(u => u.length > 0);
|
|
16
|
+
if (inputs.length === 0)
|
|
17
|
+
throw new Error("No valid inputs provided");
|
|
18
|
+
// Determine output path
|
|
19
|
+
let outputPath;
|
|
20
|
+
if (options.output) {
|
|
21
|
+
outputPath = path_1.default.resolve(process.cwd(), options.output);
|
|
22
|
+
}
|
|
23
|
+
else {
|
|
24
|
+
const firstInput = inputs[0];
|
|
25
|
+
// Check if first input is a local file
|
|
26
|
+
const isFile = fs_1.default.existsSync(firstInput) && fs_1.default.statSync(firstInput).isFile();
|
|
27
|
+
let prefix = options.prefix;
|
|
28
|
+
if (!prefix) {
|
|
29
|
+
if (isFile) {
|
|
30
|
+
prefix = 'local';
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
let type = options.type;
|
|
34
|
+
if (!type) {
|
|
35
|
+
const detected = StrategyDetector_1.StrategyDetector.detect(firstInput);
|
|
36
|
+
type = detected !== StrategyDetector_1.StrategyType.UNKNOWN ? detected : 'acp';
|
|
37
|
+
}
|
|
38
|
+
prefix = type;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
if (isFile) {
|
|
42
|
+
const basename = path_1.default.basename(firstInput, path_1.default.extname(firstInput));
|
|
43
|
+
outputPath = path_1.default.resolve(process.cwd(), `${prefix}-${basename}.md`);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
const defaultName = (0, filename_1.generateDefaultFilename)(firstInput, prefix);
|
|
47
|
+
outputPath = path_1.default.resolve(process.cwd(), defaultName);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
// Check cache
|
|
51
|
+
if (fs_1.default.existsSync(outputPath) && !options.force) {
|
|
52
|
+
console.log(chalk_1.default.yellow('--------------------------------------------------'));
|
|
53
|
+
console.log(chalk_1.default.yellow(`ℹ File already exists: ${outputPath}`));
|
|
54
|
+
console.log(chalk_1.default.yellow(' Skipping crawl/read to save time. (Use --force to overwrite)'));
|
|
55
|
+
console.log(chalk_1.default.yellow('--------------------------------------------------'));
|
|
56
|
+
return outputPath;
|
|
57
|
+
}
|
|
58
|
+
let aggregatedMarkdown = '';
|
|
59
|
+
console.log(chalk_1.default.blue(`[Multi-URL] Found ${inputs.length} inputs to process.`));
|
|
60
|
+
for (const [index, input] of inputs.entries()) {
|
|
61
|
+
console.log(chalk_1.default.blue(`\n[Multi-URL] Processing ${index + 1}/${inputs.length}: ${input}`));
|
|
62
|
+
// 1. Check if local file
|
|
63
|
+
if (fs_1.default.existsSync(input) && fs_1.default.statSync(input).isFile()) {
|
|
64
|
+
console.log(chalk_1.default.green(`[Solo-Doc] Detected local file. Reading content...`));
|
|
65
|
+
const content = fs_1.default.readFileSync(input, 'utf-8');
|
|
66
|
+
if (index > 0) {
|
|
67
|
+
aggregatedMarkdown += '\n\n---\n\n';
|
|
68
|
+
}
|
|
69
|
+
aggregatedMarkdown += content;
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
// 2. Assume URL and Crawl
|
|
73
|
+
let strategy;
|
|
74
|
+
let strategyType = options.type;
|
|
75
|
+
if (!strategyType) {
|
|
76
|
+
const detected = StrategyDetector_1.StrategyDetector.detect(input);
|
|
77
|
+
if (detected !== StrategyDetector_1.StrategyType.UNKNOWN) {
|
|
78
|
+
strategyType = detected;
|
|
79
|
+
console.log(chalk_1.default.blue(`[Solo-Doc] Auto-detected strategy: ${strategyType.toUpperCase()}`));
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
strategyType = 'acp'; // Default
|
|
83
|
+
console.log(chalk_1.default.yellow(`[Solo-Doc] Could not detect strategy, defaulting to ACP`));
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
console.log(chalk_1.default.blue(`[Solo-Doc] Using forced strategy: ${strategyType.toUpperCase()}`));
|
|
88
|
+
}
|
|
89
|
+
if (strategyType === 'ocp' || strategyType === StrategyDetector_1.StrategyType.OCP) {
|
|
90
|
+
strategy = new OCPStrategy_1.OCPStrategy();
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
strategy = new ACPStrategy_1.ACPStrategy();
|
|
94
|
+
}
|
|
95
|
+
try {
|
|
96
|
+
const markdown = await strategy.execute(input, {
|
|
97
|
+
output: outputPath,
|
|
98
|
+
limit: options.limit,
|
|
99
|
+
headless: options.headless
|
|
100
|
+
});
|
|
101
|
+
// Add separator if not first
|
|
102
|
+
if (index > 0) {
|
|
103
|
+
aggregatedMarkdown += '\n\n---\n\n';
|
|
104
|
+
}
|
|
105
|
+
aggregatedMarkdown += markdown;
|
|
106
|
+
}
|
|
107
|
+
catch (e) {
|
|
108
|
+
console.error(chalk_1.default.red(`[Solo-Doc] Error during crawl of ${input}: ${e.message}`));
|
|
109
|
+
// Continue with other URLs? Or fail hard?
|
|
110
|
+
// Requirement doesn't specify, but failing hard is safer to avoid incomplete docs.
|
|
111
|
+
throw e;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// Write to file
|
|
115
|
+
fs_1.default.writeFileSync(outputPath, aggregatedMarkdown);
|
|
116
|
+
console.log(chalk_1.default.green(`\n✔ Aggregated content written to: ${outputPath}`));
|
|
117
|
+
return outputPath;
|
|
118
|
+
}
|