solo-doc 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -75,12 +75,23 @@ solo-doc "http://10.1.2.3/docs/index.html" --type acp
75
75
 
76
76
  ### 🤖 文档对比 (AI VS 模式)
77
77
 
78
- 使用本地 AI 模型对比两个文档的内容差异和结构差异。此功能处于 Beta 阶段。
78
+ 使用 AI 模型对比两个文档的内容差异和结构差异。此功能处于 Beta 阶段。
79
79
 
80
80
  > **⚠️ 前置要求**:
81
- > 1. 本地已安装并运行 [Ollama](https://ollama.com/)。
82
- > 2. 已拉取所需的模型(推荐 `qwen3-vl:8b` 或类似多模态/大文本模型)。
83
- > 3. 确保 Ollama 服务监听在 `http://127.0.0.1:11434`。
81
+ > 1. 需要在项目根目录或用户主目录下创建 `.solodocrc.yml` 配置文件。
82
+ > 2. 配置兼容 OpenAI 接口的 AI 服务(支持Azure OpenAI、DeepSeek 等)。
83
+
84
+ #### ⚙️ AI 配置 (.solodocrc.yml)
85
+
86
+ 在执行对比命令前,请创建 `.solodocrc.yml` 文件配置 AI 服务。
87
+
88
+ **示例: 使用 Azure OpenAI**
89
+ ```yaml
90
+ # 系统会自动识别 Azure 格式并处理 api-version
91
+ MODEL_BASE_URL: "https://your-resource.openai.azure.com/2024-12-01-preview"
92
+ MODEL_API_KEY: "your-azure-api-key"
93
+ MODEL_NAME: "gpt-4o-mini"
94
+ ```
84
95
 
85
96
  #### 用法
86
97
 
@@ -92,16 +103,19 @@ solo-doc vs <baseline-url> <target-url> [options]
92
103
 
93
104
  ```bash
94
105
  # 对比 OpenShift 和 Alauda 的文档
106
+ # 将使用 .solodocrc.yml 中的默认模型
95
107
  solo-doc vs \
96
108
  "https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html-single/building_applications/index" \
97
- "https://docs.alauda.io/container_platform/4.2/developer/building_application/index.html" \
98
- --model qwen3-vl:8b
109
+ "https://docs.alauda.io/container_platform/4.2/developer/building_application/index.html"
110
+
111
+ # 临时覆盖使用的模型
112
+ solo-doc vs <url1> <url2> --model gpt-4
99
113
  ```
100
114
 
101
115
  此命令将按顺序执行:
102
116
  1. **自动爬取**: 分别爬取两个 URL 并保存为 Markdown 文件(如果已存在则跳过)。
103
117
  2. **提取目录**: 提取两个文档的目录树结构。
104
- 3. **AI 分析**: 调用本地 Ollama 模型,根据 `solo-doc-prompt.md` 定义的提示词进行两步分析:
118
+ 3. **AI 分析**: 调用配置的 AI 模型,根据 `solo-doc-prompt.md` 定义的提示词进行两步分析:
105
119
  - 生成 `vs-result.md`: 详细的内容与结构差异分析。
106
120
  - 生成 `vs-tree.md`: 包含差异标注的合并目录树。
107
121
 
@@ -109,7 +123,7 @@ solo-doc vs \
109
123
 
110
124
  | 选项 | 描述 | 默认值 |
111
125
  |--------|-------------|---------|
112
- | `--model <name>` | 指定使用的 Ollama 模型名称。 | `qwen3-vl:8b` |
126
+ | `--model <name>` | 临时覆盖配置文件中的模型名称。 | Config中的 `MODEL_NAME` |
113
127
  | `-f, --force` | 强制重新爬取文档,即使文件已存在。 | false |
114
128
 
115
129
  ## ✅ 环境要求
@@ -43,10 +43,10 @@ program
43
43
  .description('Compare two documentation sites using AI (Beta)')
44
44
  .argument('<baseline>', 'Baseline documentation URL')
45
45
  .argument('<target>', 'Target documentation URL')
46
- .option('--model <model>', 'Ollama model to use', 'qwen3-vl:8b')
46
+ .option('--model <model>', 'AI Model name (overrides .solodocrc.yml)')
47
47
  .action(async (baseline, target, options) => {
48
48
  try {
49
- console.log(chalk_1.default.yellow('⚠️ [Beta Feature] This feature requires a local Ollama instance running at http://127.0.0.1:11434'));
49
+ console.log(chalk_1.default.yellow('⚠️ [Beta Feature] This feature requires AI configuration in .solodocrc.yml or --model argument'));
50
50
  await VSCommand_1.VSCommand.run(baseline, target, options);
51
51
  }
52
52
  catch (error) {
@@ -0,0 +1,185 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.AIClient = void 0;
7
+ const axios_1 = __importDefault(require("axios"));
8
+ class AIClient {
9
+ constructor(options) {
10
+ this.isAzure = false;
11
+ this.apiVersion = '';
12
+ const { config, modelOverride } = options;
13
+ // Ensure baseURL doesn't end with slash
14
+ let url = config.MODEL_BASE_URL || 'http://127.0.0.1:11434/v1';
15
+ if (url.endsWith('/')) {
16
+ url = url.slice(0, -1);
17
+ }
18
+ // Check for Azure URL pattern (heuristic based on user config format)
19
+ // User config: https://.../2024-12-01-preview
20
+ const azureDatePattern = /\/\d{4}-\d{2}-\d{2}(-preview)?$/;
21
+ if (azureDatePattern.test(url)) {
22
+ this.isAzure = true;
23
+ // Extract version from the end of URL
24
+ const match = url.match(azureDatePattern);
25
+ if (match) {
26
+ this.apiVersion = match[0].substring(1); // Remove leading slash
27
+ this.baseURL = url.substring(0, match.index); // Remove version from base
28
+ }
29
+ else {
30
+ this.baseURL = url;
31
+ }
32
+ }
33
+ else {
34
+ this.baseURL = url;
35
+ }
36
+ this.apiKey = config.MODEL_API_KEY || 'ollama';
37
+ // Priority: CLI Override > Config > Default
38
+ this.model = modelOverride || config.MODEL_NAME || 'qwen3-vl:8b';
39
+ }
40
+ async generate(prompt, onToken) {
41
+ try {
42
+ let requestUrl;
43
+ let headers = {
44
+ 'Content-Type': 'application/json'
45
+ };
46
+ if (this.isAzure) {
47
+ // Azure Format: {endpoint}/openai/deployments/{model}/chat/completions?api-version={version}
48
+ requestUrl = `${this.baseURL}/openai/deployments/${this.model}/chat/completions?api-version=${this.apiVersion}`;
49
+ headers['api-key'] = this.apiKey;
50
+ }
51
+ else {
52
+ // Standard OpenAI Format
53
+ requestUrl = `${this.baseURL}/chat/completions`;
54
+ headers['Authorization'] = `Bearer ${this.apiKey}`;
55
+ }
56
+ const body = {
57
+ messages: [{ role: 'user', content: prompt }],
58
+ stream: true
59
+ };
60
+ if (!this.isAzure) {
61
+ body.model = this.model;
62
+ }
63
+ const response = await axios_1.default.post(requestUrl, body, {
64
+ headers,
65
+ responseType: 'stream'
66
+ });
67
+ let fullResponse = '';
68
+ return new Promise((resolve, reject) => {
69
+ const stream = response.data;
70
+ // console.log('DEBUG: Stream started');
71
+ let buffer = '';
72
+ // State for decoding JSON-stringified stream
73
+ let isJsonStringMode = false;
74
+ let isEscaping = false;
75
+ let hasStarted = false;
76
+ stream.on('data', (chunk) => {
77
+ let str = chunk.toString();
78
+ // Detect JSON string wrapping on the first chunk
79
+ if (!hasStarted) {
80
+ const trimmed = str.trimStart();
81
+ if (trimmed.startsWith('"')) {
82
+ isJsonStringMode = true;
83
+ // Remove leading quote and anything before it
84
+ const quoteIndex = str.indexOf('"');
85
+ str = str.slice(quoteIndex + 1);
86
+ }
87
+ hasStarted = true;
88
+ }
89
+ if (isJsonStringMode) {
90
+ let decoded = '';
91
+ for (let i = 0; i < str.length; i++) {
92
+ const char = str[i];
93
+ if (isEscaping) {
94
+ if (char === 'n')
95
+ decoded += '\n';
96
+ else if (char === 'r')
97
+ decoded += '\r';
98
+ else if (char === 't')
99
+ decoded += '\t';
100
+ else if (char === '"')
101
+ decoded += '"';
102
+ else
103
+ decoded += '\\' + char; // Keep other escapes (e.g. \u, \/)
104
+ isEscaping = false;
105
+ }
106
+ else {
107
+ if (char === '\\') {
108
+ isEscaping = true;
109
+ }
110
+ else if (char === '"') {
111
+ // End of JSON string (or segment), ignore or stop
112
+ // In a stream, this likely means EOF or end of wrapper
113
+ }
114
+ else {
115
+ decoded += char;
116
+ }
117
+ }
118
+ }
119
+ buffer += decoded;
120
+ }
121
+ else {
122
+ buffer += str;
123
+ }
124
+ const lines = buffer.split('\n');
125
+ // console.log('DEBUG First line:', lines[0].substring(0, 100));
126
+ // Keep the last line in buffer if it's incomplete
127
+ buffer = lines.pop() || '';
128
+ for (const line of lines) {
129
+ const trimmed = line.trim();
130
+ if (!trimmed || !trimmed.startsWith('data: '))
131
+ continue;
132
+ const data = trimmed.slice(6).trim(); // Remove 'data: '
133
+ if (data === '[DONE]')
134
+ continue;
135
+ try {
136
+ const json = JSON.parse(data);
137
+ const content = json.choices?.[0]?.delta?.content || '';
138
+ if (content) {
139
+ fullResponse += content;
140
+ if (onToken) {
141
+ onToken(content);
142
+ }
143
+ }
144
+ }
145
+ catch (e) {
146
+ // ignore parse error for partial lines
147
+ }
148
+ }
149
+ });
150
+ stream.on('end', () => {
151
+ // console.log('DEBUG Stream ended. Full response length:', fullResponse.length);
152
+ // Process remaining buffer if any
153
+ if (buffer) {
154
+ const trimmed = buffer.trim();
155
+ if (trimmed.startsWith('data: ') && trimmed !== 'data: [DONE]') {
156
+ try {
157
+ const data = trimmed.slice(6).trim();
158
+ const json = JSON.parse(data);
159
+ const content = json.choices?.[0]?.delta?.content || '';
160
+ if (content) {
161
+ fullResponse += content;
162
+ if (onToken)
163
+ onToken(content);
164
+ }
165
+ }
166
+ catch (e) {
167
+ // ignore
168
+ }
169
+ }
170
+ }
171
+ resolve(fullResponse);
172
+ });
173
+ stream.on('error', (err) => {
174
+ reject(err);
175
+ });
176
+ });
177
+ }
178
+ catch (error) {
179
+ const msg = error.response?.data ? JSON.stringify(error.response.data) : error.message;
180
+ // Use the requestUrl if available, otherwise fallback
181
+ throw new Error(`AI API call failed: ${msg}.`);
182
+ }
183
+ }
184
+ }
185
+ exports.AIClient = AIClient;
@@ -14,13 +14,18 @@ const ACPStrategy_1 = require("../strategies/ACPStrategy");
14
14
  const StrategyDetector_1 = require("../utils/StrategyDetector");
15
15
  const filename_1 = require("../utils/filename");
16
16
  const TocExtractor_1 = require("../utils/TocExtractor");
17
- const OllamaClient_1 = require("../ai/OllamaClient");
17
+ const AIClient_1 = require("../ai/AIClient");
18
+ const config_1 = require("../utils/config");
18
19
  class VSCommand {
19
20
  static async run(baselineUrl, targetUrl, options) {
20
21
  console.log(chalk_1.default.blue(`[VS Mode] Starting comparison between:`));
21
22
  console.log(chalk_1.default.gray(`Baseline: ${baselineUrl}`));
22
23
  console.log(chalk_1.default.gray(`Target: ${targetUrl}`));
23
- console.log(chalk_1.default.gray(`Model: ${options.model}`));
24
+ const config = config_1.ConfigLoader.load();
25
+ const client = new AIClient_1.AIClient({
26
+ config,
27
+ modelOverride: options.model
28
+ });
24
29
  // 1. Crawl Baseline
25
30
  const baselineFile = await VSCommand.crawlUrl(baselineUrl, 'baseline');
26
31
  // 2. Crawl Target
@@ -55,7 +60,6 @@ class VSCommand {
55
60
  if (prompts.length < 2) {
56
61
  throw new Error('Found fewer than 2 prompt templates in solo-doc-prompt.md');
57
62
  }
58
- const client = new OllamaClient_1.OllamaClient({ model: options.model });
59
63
  // 5. Step 1: Independent Comparison
60
64
  console.log(chalk_1.default.blue(`[VS Mode] Step 1: Analyzing differences...`));
61
65
  // Replace placeholders
@@ -0,0 +1,42 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.ConfigLoader = void 0;
7
+ const fs_1 = __importDefault(require("fs"));
8
+ const path_1 = __importDefault(require("path"));
9
+ const os_1 = __importDefault(require("os"));
10
+ const js_yaml_1 = __importDefault(require("js-yaml"));
11
+ const CONFIG_FILENAME = '.solodocrc.yml';
12
+ class ConfigLoader {
13
+ static load() {
14
+ const locations = [
15
+ path_1.default.join(process.cwd(), CONFIG_FILENAME),
16
+ path_1.default.join(os_1.default.homedir(), CONFIG_FILENAME)
17
+ ];
18
+ for (const location of locations) {
19
+ if (fs_1.default.existsSync(location)) {
20
+ try {
21
+ const fileContents = fs_1.default.readFileSync(location, 'utf8');
22
+ const config = js_yaml_1.default.load(fileContents);
23
+ if (config) {
24
+ // Trim string values to avoid issues like "model "
25
+ Object.keys(config).forEach(key => {
26
+ if (typeof config[key] === 'string') {
27
+ config[key] = config[key].trim();
28
+ }
29
+ });
30
+ return config;
31
+ }
32
+ return {};
33
+ }
34
+ catch (e) {
35
+ console.error(`Failed to load config from ${location}:`, e);
36
+ }
37
+ }
38
+ }
39
+ return {};
40
+ }
41
+ }
42
+ exports.ConfigLoader = ConfigLoader;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "solo-doc",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "main": "dist/bin/solo-doc.js",
5
5
  "bin": {
6
6
  "solo-doc": "dist/bin/solo-doc.js"
@@ -12,8 +12,7 @@
12
12
  "scripts": {
13
13
  "clean": "rm -rf dist",
14
14
  "build": "tsc",
15
- "prepublishOnly": "npm run clean && npm run build",
16
- "release": "npm run clean && npm run build && npm version patch --force && npm publish --access=public",
15
+ "publish": "npm run clean && npm run build && npm version patch --force && npm publish --access=public",
17
16
  "start": "node dist/bin/solo-doc.js",
18
17
  "dev": "ts-node bin/solo-doc.ts"
19
18
  },
@@ -22,10 +21,13 @@
22
21
  "license": "ISC",
23
22
  "description": "A CLI tool to crawl documentation sites (OCP, ACP) and convert them to a single Markdown file preserving hierarchy.",
24
23
  "dependencies": {
24
+ "@types/js-yaml": "^4.0.9",
25
25
  "axios": "^1.6.0",
26
26
  "chalk": "^4.1.2",
27
27
  "cheerio": "^1.0.0-rc.12",
28
28
  "commander": "^12.0.0",
29
+ "js-yaml": "^4.1.1",
30
+ "openai": "^6.15.0",
29
31
  "ora": "^5.4.1",
30
32
  "puppeteer-core": "^24.1.0",
31
33
  "turndown": "^7.2.0"
@@ -1,59 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.OllamaClient = void 0;
7
- const axios_1 = __importDefault(require("axios"));
8
- class OllamaClient {
9
- constructor(options) {
10
- // Use 127.0.0.1 instead of localhost to avoid IPv6 issues (ECONNREFUSED ::1)
11
- this.endpoint = options.endpoint || 'http://127.0.0.1:11434';
12
- this.model = options.model;
13
- }
14
- async generate(prompt, onToken) {
15
- try {
16
- const response = await axios_1.default.post(`${this.endpoint}/api/generate`, {
17
- model: this.model,
18
- prompt: prompt,
19
- stream: true
20
- }, {
21
- responseType: 'stream'
22
- });
23
- let fullResponse = '';
24
- return new Promise((resolve, reject) => {
25
- const stream = response.data;
26
- stream.on('data', (chunk) => {
27
- const lines = chunk.toString().split('\n').filter(Boolean);
28
- for (const line of lines) {
29
- try {
30
- const json = JSON.parse(line);
31
- if (json.response) {
32
- fullResponse += json.response;
33
- if (onToken) {
34
- onToken(json.response);
35
- }
36
- }
37
- if (json.done) {
38
- // stream ended
39
- }
40
- }
41
- catch (e) {
42
- // ignore partial JSON
43
- }
44
- }
45
- });
46
- stream.on('end', () => {
47
- resolve(fullResponse);
48
- });
49
- stream.on('error', (err) => {
50
- reject(err);
51
- });
52
- });
53
- }
54
- catch (error) {
55
- throw new Error(`Ollama API call failed: ${error.message}. Is Ollama running at ${this.endpoint}?`);
56
- }
57
- }
58
- }
59
- exports.OllamaClient = OllamaClient;