solo-doc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -0
- package/package.json +39 -0
package/README.md
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Solo-Doc CLI
|
|
2
|
+
|
|
3
|
+
Solo-Doc is a powerful Node.js CLI tool designed to crawl complex documentation sites and convert them into a single, hierarchically structured Markdown file.
|
|
4
|
+
|
|
5
|
+
**Name Origin**: "Solo" represents the capability to consolidate multiple documentation pages into a "single" (solo) file, and "Doc" stands for documentation.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Multi-Strategy Support**: Specialized strategies for different documentation frameworks:
|
|
10
|
+
- **OCP (Red Hat OpenShift)**: Optimised for static single-page HTML documentation.
|
|
11
|
+
- **ACP (Alauda Container Platform)**: Optimised for dynamic, client-side rendered (Rspress-based) documentation using Puppeteer.
|
|
12
|
+
- **Hierarchy Preservation**: Maintains the original directory structure (1, 1.1, 1.1.1...) of the documentation.
|
|
13
|
+
- **Clean Output**: Removes navigation bars, sidebars, headers, and footers, keeping only the relevant content.
|
|
14
|
+
- **Single File Output**: Merges all crawled pages into one comprehensive Markdown file.
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
### From NPM (Recommended)
|
|
19
|
+
|
|
20
|
+
Once published, you can install the tool globally:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
npm install -g solo-doc
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
Once installed globally, you can run the `solo-doc` command from any terminal window.
|
|
29
|
+
|
|
30
|
+
### 1. Crawl OpenShift (OCP) Docs
|
|
31
|
+
|
|
32
|
+
For Red Hat OpenShift documentation (HTML Single format):
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
solo-doc ocp "https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html-single/building_applications/index" -o openshift_docs.md
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### 2. Crawl Alauda (ACP) Docs
|
|
39
|
+
|
|
40
|
+
For Alauda Container Platform documentation (Rspress format):
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
solo-doc acp "https://docs.alauda.io/container_platform/4.2/developer/building_application/index.html" -o alauda_docs.md
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**Options:**
|
|
47
|
+
- `-o, --output <path>`: Specify output file path (default: `[strategy]-docs.md`).
|
|
48
|
+
- `--limit <number>`: Limit the number of pages to crawl (useful for testing).
|
|
49
|
+
- `--no-headless`: Run browser in visible mode (for ACP debugging).
|
|
50
|
+
|
|
51
|
+
## Requirements
|
|
52
|
+
|
|
53
|
+
- Node.js >= 18
|
|
54
|
+
- Google Chrome (for ACP crawling)
|
|
55
|
+
|
|
56
|
+
## Development
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
# Run in development mode
|
|
60
|
+
npm run dev -- acp "url" ...
|
|
61
|
+
```
|
package/package.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "solo-doc",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"main": "dist/bin/solo-doc.js",
|
|
5
|
+
"bin": {
|
|
6
|
+
"solo-doc": "dist/bin/solo-doc.js"
|
|
7
|
+
},
|
|
8
|
+
"files": [
|
|
9
|
+
"dist/**/*",
|
|
10
|
+
"README.md"
|
|
11
|
+
],
|
|
12
|
+
"scripts": {
|
|
13
|
+
"build": "tsc",
|
|
14
|
+
"package": "npm run build && cp README.md package.json dist/",
|
|
15
|
+
"prepublishOnly": "npm run build",
|
|
16
|
+
"start": "node dist/bin/solo-doc.js",
|
|
17
|
+
"dev": "ts-node bin/solo-doc.ts"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [],
|
|
20
|
+
"author": "",
|
|
21
|
+
"license": "ISC",
|
|
22
|
+
"description": "A CLI tool to crawl documentation sites (OCP, ACP) and convert them to a single Markdown file preserving hierarchy.",
|
|
23
|
+
"dependencies": {
|
|
24
|
+
"axios": "^1.6.0",
|
|
25
|
+
"chalk": "^4.1.2",
|
|
26
|
+
"cheerio": "^1.0.0-rc.12",
|
|
27
|
+
"commander": "^12.0.0",
|
|
28
|
+
"ora": "^5.4.1",
|
|
29
|
+
"puppeteer-core": "^24.1.0",
|
|
30
|
+
"turndown": "^7.2.0"
|
|
31
|
+
},
|
|
32
|
+
"devDependencies": {
|
|
33
|
+
"@types/node": "^22.10.5",
|
|
34
|
+
"@types/turndown": "^5.0.5",
|
|
35
|
+
"ts-node": "^10.9.2",
|
|
36
|
+
"typescript": "^5.7.3"
|
|
37
|
+
},
|
|
38
|
+
"packageManager": "yarn@4.9.4+sha512.7b1cb0b62abba6a537b3a2ce00811a843bea02bcf53138581a6ae5b1bf563f734872bd47de49ce32a9ca9dcaff995aa789577ffb16811da7c603dcf69e73750b"
|
|
39
|
+
}
|