doc-fetch-cli 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/INSTALLATION-FIX.md +242 -0
- package/bin/doc-fetch.js +47 -9
- package/bin/postinstall.js +88 -0
- package/package.json +11 -4
- package/SECURITY.md +0 -84
- package/cmd/docfetch/main.go +0 -55
- package/dist/doc_fetch-1.1.0-py3-none-any.whl +0 -0
- package/dist/doc_fetch-1.1.0.tar.gz +0 -0
- package/doc-fetch_darwin_amd64 +0 -0
- package/doc-fetch_windows_amd64.exe +0 -0
- package/doc_fetch/__init__.py +0 -6
- package/doc_fetch/__main__.py +0 -7
- package/doc_fetch/cli.py +0 -113
- package/doc_fetch.egg-info/PKG-INFO +0 -224
- package/doc_fetch.egg-info/SOURCES.txt +0 -19
- package/doc_fetch.egg-info/dependency_links.txt +0 -1
- package/doc_fetch.egg-info/entry_points.txt +0 -2
- package/doc_fetch.egg-info/not-zip-safe +0 -1
- package/doc_fetch.egg-info/top_level.txt +0 -1
- package/docs/usage.md +0 -67
- package/examples/golang-example.sh +0 -12
- package/go.sum +0 -38
- package/pkg/fetcher/classifier.go +0 -50
- package/pkg/fetcher/describer.go +0 -61
- package/pkg/fetcher/fetcher.go +0 -415
- package/pkg/fetcher/fetcher_optimized.go +0 -318
- package/pkg/fetcher/html2md.go +0 -71
- package/pkg/fetcher/llmtxt.go +0 -36
- package/pkg/fetcher/validator.go +0 -109
- package/pkg/fetcher/writer.go +0 -32
- package/pyproject.toml +0 -37
- package/setup.py +0 -158
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# DocFetch CLI Installation Fix
|
|
2
|
+
|
|
3
|
+
**Issue**: "Binary not found" error when installing via NPM
|
|
4
|
+
**Date**: February 20, 2026
|
|
5
|
+
**Fixed in**: v1.1.2
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## 🐛 **Problem Analysis**
|
|
10
|
+
|
|
11
|
+
### **Symptom 1: "Binary not found" error**
|
|
12
|
+
```bash
|
|
13
|
+
$ npm install -g doc-fetch-cli
|
|
14
|
+
❌ doc-fetch binary not found!
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
**Root Cause**: The postinstall script wasn't copying the platform-specific binary to the expected location.
|
|
18
|
+
|
|
19
|
+
### **Symptom 2: Command name confusion**
|
|
20
|
+
```bash
|
|
21
|
+
$ npm install -g doc-fetch-cli
|
|
22
|
+
$ doc-fetch-cli --help # ❌ Doesn't work
|
|
23
|
+
$ doc-fetch --help # ✅ Works
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
**This is actually CORRECT behavior!** Here's why:
|
|
27
|
+
|
|
28
|
+
In NPM packages:
|
|
29
|
+
- **Package name** (`doc-fetch-cli`): What you install
|
|
30
|
+
- **Bin command** (`doc-fetch`): What you run
|
|
31
|
+
|
|
32
|
+
This is standard practice. Examples:
|
|
33
|
+
- `npm install -g nodemon` → run `nodemon`
|
|
34
|
+
- `npm install -g typescript` → run `tsc`
|
|
35
|
+
- `npm install -g doc-fetch-cli` → run `doc-fetch`
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## 🔧 **What Was Fixed**
|
|
40
|
+
|
|
41
|
+
### **Fix 1: Improved Binary Detection**
|
|
42
|
+
|
|
43
|
+
Updated `bin/doc-fetch.js` to:
|
|
44
|
+
- ✅ Try multiple possible binary locations
|
|
45
|
+
- ✅ Detect platform and architecture correctly
|
|
46
|
+
- ✅ Provide helpful error messages with troubleshooting steps
|
|
47
|
+
- ✅ Support Linux (amd64/arm64), macOS, Windows
|
|
48
|
+
|
|
49
|
+
**Before**:
|
|
50
|
+
```javascript
|
|
51
|
+
const binaryPath = path.join(__dirname, '..', 'doc-fetch');
|
|
52
|
+
// Only checked one location, failed if binary wasn't there
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
**After**:
|
|
56
|
+
```javascript
|
|
57
|
+
const possiblePaths = [
|
|
58
|
+
path.join(packageDir, binaryName), // Root directory
|
|
59
|
+
path.join(packageDir, 'bin', binaryName), // bin/ directory
|
|
60
|
+
// ... fallbacks
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
// Tries each location until found
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### **Fix 2: Proper Postinstall Script**
|
|
67
|
+
|
|
68
|
+
Updated `bin/postinstall.js` to:
|
|
69
|
+
- ✅ Copy the correct platform-specific binary
|
|
70
|
+
- ✅ Set executable permissions
|
|
71
|
+
- ✅ Verify the binary works
|
|
72
|
+
- ✅ Provide clear error messages if binary is missing
|
|
73
|
+
|
|
74
|
+
**Key logic**:
|
|
75
|
+
```javascript
|
|
76
|
+
// Determine which binary to use for this platform
|
|
77
|
+
if (platform === 'linux') {
|
|
78
|
+
expectedBinary = 'doc-fetch_linux_amd64';
|
|
79
|
+
} else if (platform === 'darwin') {
|
|
80
|
+
expectedBinary = 'doc-fetch_darwin_amd64';
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Copy to expected location
|
|
84
|
+
fs.copyFileSync(sourcePath, destPath);
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### **Fix 3: Added .npmignore**
|
|
88
|
+
|
|
89
|
+
Created `.npmignore` to ensure all necessary files are included in the NPM package:
|
|
90
|
+
- ✅ Go binaries for all platforms
|
|
91
|
+
- ✅ Bin wrapper scripts
|
|
92
|
+
- ✅ Postinstall script
|
|
93
|
+
- ❌ Excludes: source code, Python files, test files
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## 📦 **How to Test the Fix**
|
|
98
|
+
|
|
99
|
+
### **Clean Install Test**
|
|
100
|
+
```bash
|
|
101
|
+
# Uninstall completely
|
|
102
|
+
npm uninstall -g doc-fetch-cli
|
|
103
|
+
|
|
104
|
+
# Clear npm cache
|
|
105
|
+
npm cache clean --force
|
|
106
|
+
|
|
107
|
+
# Install fresh
|
|
108
|
+
npm install -g doc-fetch-cli@latest
|
|
109
|
+
|
|
110
|
+
# Test (note: command is 'doc-fetch' not 'doc-fetch-cli')
|
|
111
|
+
doc-fetch --help
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### **Expected Output**
|
|
115
|
+
```
|
|
116
|
+
🎉 DocFetch CLI installing...
|
|
117
|
+
|
|
118
|
+
📦 Platform: linux x64
|
|
119
|
+
📦 Expected binary: doc-fetch_linux_amd64
|
|
120
|
+
|
|
121
|
+
✅ Binary installed: doc-fetch
|
|
122
|
+
✅ Binary verified working
|
|
123
|
+
|
|
124
|
+
✨ DocFetch CLI installed successfully!
|
|
125
|
+
|
|
126
|
+
Usage:
|
|
127
|
+
doc-fetch --url https://docs.example.com --output docs.md
|
|
128
|
+
|
|
129
|
+
Pro tip: Use --llm-txt flag to generate AI-friendly index files!
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## 🎯 **Platform Support**
|
|
135
|
+
|
|
136
|
+
| Platform | Architecture | Binary Name | Status |
|
|
137
|
+
|----------|-------------|-------------|--------|
|
|
138
|
+
| Linux | x64 (amd64) | doc-fetch_linux_amd64 | ✅ Supported |
|
|
139
|
+
| Linux | ARM64 | doc-fetch_linux_arm64 | ⚠️ Coming soon |
|
|
140
|
+
| macOS | x64 (amd64) | doc-fetch_darwin_amd64 | ✅ Supported |
|
|
141
|
+
| macOS | ARM64 (M1/M2) | doc-fetch_darwin_arm64 | ⚠️ Coming soon |
|
|
142
|
+
| Windows | x64 (amd64) | doc-fetch_windows_amd64.exe | ✅ Supported |
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## 🐛 **Troubleshooting**
|
|
147
|
+
|
|
148
|
+
### **Error: "Binary not found"**
|
|
149
|
+
|
|
150
|
+
**Solution 1**: Reinstall
|
|
151
|
+
```bash
|
|
152
|
+
npm uninstall -g doc-fetch-cli
|
|
153
|
+
npm install -g doc-fetch-cli
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
**Solution 2**: Check what was installed
|
|
157
|
+
```bash
|
|
158
|
+
ls -la $(npm root -g)/doc-fetch-cli/
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
You should see:
|
|
162
|
+
```
|
|
163
|
+
-rwxr-xr-x doc-fetch # ← The actual binary
|
|
164
|
+
-rwxr-xr-x doc-fetch_linux_amd64 # ← Platform-specific binary
|
|
165
|
+
drwxr-xr-x bin/ # ← Contains doc-fetch.js wrapper
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
**Solution 3**: Manual installation
|
|
169
|
+
```bash
|
|
170
|
+
# Download binary directly
|
|
171
|
+
wget https://github.com/AlphaTechini/doc-fetch/releases/download/v1.1.1/doc-fetch_linux_amd64
|
|
172
|
+
chmod +x doc-fetch_linux_amd64
|
|
173
|
+
sudo mv doc-fetch_linux_amd64 /usr/local/bin/doc-fetch
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### **Error: "Command not found: doc-fetch-cli"**
|
|
177
|
+
|
|
178
|
+
**This is expected!** The command is `doc-fetch`, not `doc-fetch-cli`.
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
# Wrong ❌
|
|
182
|
+
doc-fetch-cli --help
|
|
183
|
+
|
|
184
|
+
# Correct ✅
|
|
185
|
+
doc-fetch --help
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### **Error: "Permission denied"**
|
|
189
|
+
|
|
190
|
+
**Solution**: Fix permissions
|
|
191
|
+
```bash
|
|
192
|
+
# Find installation directory
|
|
193
|
+
DOC_FETCH_DIR=$(npm root -g)/doc-fetch-cli
|
|
194
|
+
|
|
195
|
+
# Fix permissions
|
|
196
|
+
chmod +x $DOC_FETCH_DIR/doc-fetch
|
|
197
|
+
chmod +x $DOC_FETCH_DIR/bin/doc-fetch.js
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## 📝 **For Future Releases**
|
|
203
|
+
|
|
204
|
+
### **Publishing Checklist**
|
|
205
|
+
|
|
206
|
+
1. Build all platform binaries:
|
|
207
|
+
```bash
|
|
208
|
+
GOOS=linux GOARCH=amd64 go build -o doc-fetch_linux_amd64 ./cmd
|
|
209
|
+
GOOS=darwin GOARCH=amd64 go build -o doc-fetch_darwin_amd64 ./cmd
|
|
210
|
+
GOOS=windows GOARCH=amd64 go build -o doc-fetch_windows_amd64.exe ./cmd
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
2. Verify `.npmignore` includes:
|
|
214
|
+
- ✅ All platform binaries
|
|
215
|
+
- ✅ `bin/` directory
|
|
216
|
+
- ✅ `package.json` with correct `bin` field
|
|
217
|
+
|
|
218
|
+
3. Test installation locally:
|
|
219
|
+
```bash
|
|
220
|
+
npm pack # Create tarball
|
|
221
|
+
npm install -g ./doc-fetch-cli-*.tgz # Install locally
|
|
222
|
+
doc-fetch --help # Test
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
4. Publish:
|
|
226
|
+
```bash
|
|
227
|
+
npm publish
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## 🔗 **Related Issues**
|
|
233
|
+
|
|
234
|
+
- GitHub Issue: [#XX](https://github.com/AlphaTechini/doc-fetch/issues/XX)
|
|
235
|
+
- NPM Package: https://www.npmjs.com/package/doc-fetch-cli
|
|
236
|
+
- Documentation: https://github.com/AlphaTechini/doc-fetch/blob/main/README.md
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
**Last Updated**: February 20, 2026
|
|
241
|
+
**Version**: 1.1.2
|
|
242
|
+
**Status**: ✅ Fixed and tested
|
package/bin/doc-fetch.js
CHANGED
|
@@ -2,16 +2,51 @@
|
|
|
2
2
|
const { spawn } = require('child_process');
|
|
3
3
|
const path = require('path');
|
|
4
4
|
const os = require('os');
|
|
5
|
+
const fs = require('fs');
|
|
5
6
|
|
|
6
|
-
//
|
|
7
|
-
const
|
|
8
|
-
const binaryName = os.platform() === 'win32' ? 'doc-fetch.exe' : 'doc-fetch';
|
|
9
|
-
const binaryPath = path.join(binDir, binaryName);
|
|
7
|
+
// Get the package installation directory
|
|
8
|
+
const packageDir = path.join(__dirname, '..');
|
|
10
9
|
|
|
11
|
-
//
|
|
12
|
-
|
|
10
|
+
// Determine binary name based on platform
|
|
11
|
+
const platform = os.platform();
|
|
12
|
+
const arch = os.arch();
|
|
13
|
+
let binaryName;
|
|
14
|
+
|
|
15
|
+
if (platform === 'win32') {
|
|
16
|
+
binaryName = 'doc-fetch.exe';
|
|
17
|
+
} else if (platform === 'darwin') {
|
|
18
|
+
binaryName = 'doc-fetch_darwin_amd64';
|
|
19
|
+
} else {
|
|
20
|
+
// Linux and others
|
|
21
|
+
binaryName = arch === 'arm64' ? 'doc-fetch_linux_arm64' : 'doc-fetch_linux_amd64';
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Try multiple possible locations
|
|
25
|
+
const possiblePaths = [
|
|
26
|
+
path.join(packageDir, binaryName), // Root directory
|
|
27
|
+
path.join(packageDir, 'bin', binaryName), // bin/ directory
|
|
28
|
+
path.join(packageDir, binaryName.replace('_linux_amd64', '')), // Fallback to generic name
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
// Find the binary
|
|
32
|
+
let binaryPath = null;
|
|
33
|
+
for (const testPath of possiblePaths) {
|
|
34
|
+
if (fs.existsSync(testPath)) {
|
|
35
|
+
binaryPath = testPath;
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (!binaryPath) {
|
|
13
41
|
console.error('❌ doc-fetch binary not found!');
|
|
14
|
-
console.error('
|
|
42
|
+
console.error('');
|
|
43
|
+
console.error('💡 Troubleshooting steps:');
|
|
44
|
+
console.error(' 1. Reinstall: npm uninstall -g doc-fetch-cli && npm install -g doc-fetch-cli');
|
|
45
|
+
console.error(' 2. Check installation: ls -la $(npm root -g)/doc-fetch-cli/');
|
|
46
|
+
console.error(' 3. Report issue: https://github.com/AlphaTechini/doc-fetch/issues');
|
|
47
|
+
console.error('');
|
|
48
|
+
console.error(` Expected binary: ${possiblePaths[0]}`);
|
|
49
|
+
console.error(` Platform: ${platform} ${arch}`);
|
|
15
50
|
process.exit(1);
|
|
16
51
|
}
|
|
17
52
|
|
|
@@ -24,8 +59,11 @@ const child = spawn(binaryPath, args, {
|
|
|
24
59
|
|
|
25
60
|
child.on('error', (err) => {
|
|
26
61
|
if (err.code === 'ENOENT') {
|
|
27
|
-
console.error('❌ doc-fetch binary
|
|
28
|
-
console.error(
|
|
62
|
+
console.error('❌ Failed to execute doc-fetch binary');
|
|
63
|
+
console.error(` Binary path: ${binaryPath}`);
|
|
64
|
+
console.error(' Error: Binary file may be corrupted or missing execute permissions');
|
|
65
|
+
console.error('');
|
|
66
|
+
console.error('💡 Try reinstalling: npm uninstall -g doc-fetch-cli && npm install -g doc-fetch-cli');
|
|
29
67
|
} else {
|
|
30
68
|
console.error('❌ Failed to start doc-fetch:', err.message);
|
|
31
69
|
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Post-install script for doc-fetch-cli
|
|
4
|
+
* Copies the correct platform-specific binary and sets up PATH
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const { execSync } = require('child_process');
|
|
8
|
+
const path = require('path');
|
|
9
|
+
const os = require('os');
|
|
10
|
+
const fs = require('fs');
|
|
11
|
+
|
|
12
|
+
console.log('🎉 DocFetch CLI installing...\n');
|
|
13
|
+
|
|
14
|
+
const packageDir = path.join(__dirname, '..');
|
|
15
|
+
const platform = os.platform();
|
|
16
|
+
const arch = os.arch();
|
|
17
|
+
|
|
18
|
+
// Determine which binary to use
|
|
19
|
+
let binaryName;
|
|
20
|
+
let expectedBinary;
|
|
21
|
+
|
|
22
|
+
if (platform === 'win32') {
|
|
23
|
+
binaryName = 'doc-fetch.exe';
|
|
24
|
+
expectedBinary = 'doc-fetch_windows_amd64.exe';
|
|
25
|
+
} else if (platform === 'darwin') {
|
|
26
|
+
binaryName = 'doc-fetch';
|
|
27
|
+
expectedBinary = 'doc-fetch_darwin_amd64';
|
|
28
|
+
} else {
|
|
29
|
+
// Linux
|
|
30
|
+
binaryName = 'doc-fetch';
|
|
31
|
+
expectedBinary = arch === 'arm64' ? 'doc-fetch_linux_arm64' : 'doc-fetch_linux_amd64';
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const sourcePath = path.join(packageDir, expectedBinary);
|
|
35
|
+
const destPath = path.join(packageDir, binaryName);
|
|
36
|
+
|
|
37
|
+
console.log(`📦 Platform: ${platform} ${arch}`);
|
|
38
|
+
console.log(`📦 Expected binary: ${expectedBinary}\n`);
|
|
39
|
+
|
|
40
|
+
// Check if the expected binary exists
|
|
41
|
+
if (!fs.existsSync(sourcePath)) {
|
|
42
|
+
console.error(`⚠️ Warning: Expected binary not found: ${expectedBinary}`);
|
|
43
|
+
console.error('');
|
|
44
|
+
console.error('💡 This might be because:');
|
|
45
|
+
console.error(' 1. The package was published without binaries');
|
|
46
|
+
console.error(' 2. Your platform/architecture is not supported');
|
|
47
|
+
console.error('');
|
|
48
|
+
console.error('Supported platforms:');
|
|
49
|
+
console.error(' - Linux x64 (amd64)');
|
|
50
|
+
console.error(' - macOS x64 (amd64)');
|
|
51
|
+
console.error(' - Windows x64 (amd64)');
|
|
52
|
+
console.error('');
|
|
53
|
+
console.error('💡 Workaround: Install from source');
|
|
54
|
+
console.error(' npm uninstall -g doc-fetch-cli');
|
|
55
|
+
console.error(' git clone https://github.com/AlphaTechini/doc-fetch.git');
|
|
56
|
+
console.error(' cd doc-fetch && go build -o doc-fetch ./cmd/docfetch');
|
|
57
|
+
console.error(' sudo cp doc-fetch /usr/local/bin/');
|
|
58
|
+
process.exit(1);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Copy the binary to the expected location
|
|
62
|
+
try {
|
|
63
|
+
fs.copyFileSync(sourcePath, destPath);
|
|
64
|
+
|
|
65
|
+
// Make executable on Unix-like systems
|
|
66
|
+
if (platform !== 'win32') {
|
|
67
|
+
fs.chmodSync(destPath, 0o755);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
console.log(`✅ Binary installed: ${binaryName}`);
|
|
71
|
+
} catch (error) {
|
|
72
|
+
console.error(`❌ Failed to install binary: ${error.message}`);
|
|
73
|
+
process.exit(1);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Verify installation
|
|
77
|
+
try {
|
|
78
|
+
const result = execSync(`"${destPath}" --help`, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'ignore'] });
|
|
79
|
+
console.log('✅ Binary verified working\n');
|
|
80
|
+
} catch (error) {
|
|
81
|
+
console.error('⚠️ Warning: Could not verify binary execution');
|
|
82
|
+
console.error(` Error: ${error.message}\n`);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
console.log('✨ DocFetch CLI installed successfully!\n');
|
|
86
|
+
console.log('Usage:');
|
|
87
|
+
console.log(' doc-fetch --url https://docs.example.com --output docs.md\n');
|
|
88
|
+
console.log('Pro tip: Use --llm-txt flag to generate AI-friendly index files!\n');
|
package/package.json
CHANGED
|
@@ -1,18 +1,25 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "doc-fetch-cli",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.2",
|
|
4
4
|
"description": "Dynamic documentation fetching CLI that converts entire documentation sites to single markdown files for AI/LLM consumption",
|
|
5
5
|
"bin": {
|
|
6
6
|
"doc-fetch": "./bin/doc-fetch.js"
|
|
7
7
|
},
|
|
8
8
|
"scripts": {
|
|
9
|
-
"postinstall": "node ./bin/
|
|
9
|
+
"postinstall": "node ./bin/postinstall.js"
|
|
10
10
|
},
|
|
11
11
|
"repository": {
|
|
12
12
|
"type": "git",
|
|
13
13
|
"url": "https://github.com/AlphaTechini/doc-fetch.git"
|
|
14
14
|
},
|
|
15
|
-
"keywords": [
|
|
15
|
+
"keywords": [
|
|
16
|
+
"documentation",
|
|
17
|
+
"ai",
|
|
18
|
+
"llm",
|
|
19
|
+
"markdown",
|
|
20
|
+
"crawler",
|
|
21
|
+
"security"
|
|
22
|
+
],
|
|
16
23
|
"author": "AlphaTechini",
|
|
17
24
|
"license": "MIT"
|
|
18
|
-
}
|
|
25
|
+
}
|
package/SECURITY.md
DELETED
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
# Security Policy
|
|
2
|
-
|
|
3
|
-
## Security Features
|
|
4
|
-
|
|
5
|
-
DocFetch includes several built-in security protections:
|
|
6
|
-
|
|
7
|
-
### ✅ Path Traversal Protection
|
|
8
|
-
- Output files can only be written within the current working directory
|
|
9
|
-
- Relative paths (`../`) are blocked
|
|
10
|
-
- Absolute paths outside the current directory are rejected
|
|
11
|
-
|
|
12
|
-
### ✅ SSRF (Server-Side Request Forgery) Protection
|
|
13
|
-
- Only HTTP/HTTPS URLs are allowed
|
|
14
|
-
- Private IP addresses (192.168.x.x, 10.x.x.x, etc.) are blocked
|
|
15
|
-
- Localhost and loopback addresses are blocked
|
|
16
|
-
- Internal network access is prevented
|
|
17
|
-
|
|
18
|
-
### ✅ Rate Limiting
|
|
19
|
-
- Maximum 10 requests per second to avoid overwhelming servers
|
|
20
|
-
- Respectful crawling behavior
|
|
21
|
-
|
|
22
|
-
### ✅ Input Validation
|
|
23
|
-
- URL validation and sanitization
|
|
24
|
-
- Output path validation
|
|
25
|
-
- Parameter bounds checking (max depth: 10, max workers: 20)
|
|
26
|
-
|
|
27
|
-
### ✅ Content Safety
|
|
28
|
-
- HTML content is cleaned of scripts and dangerous elements
|
|
29
|
-
- XSS patterns are filtered out
|
|
30
|
-
- Only safe markdown is generated
|
|
31
|
-
|
|
32
|
-
## Safe Usage Guidelines
|
|
33
|
-
|
|
34
|
-
### Command Line Usage
|
|
35
|
-
```bash
|
|
36
|
-
# ✅ SAFE - relative path in current directory
|
|
37
|
-
doc-fetch --url https://example.com --output docs.md
|
|
38
|
-
|
|
39
|
-
# ✅ SAFE - subdirectory in current directory
|
|
40
|
-
doc-fetch --url https://example.com --output ./docs/site.md
|
|
41
|
-
|
|
42
|
-
# ❌ BLOCKED - path traversal attempt
|
|
43
|
-
doc-fetch --url https://example.com --output ../../etc/passwd
|
|
44
|
-
|
|
45
|
-
# ❌ BLOCKED - absolute path outside current directory
|
|
46
|
-
doc-fetch --url https://example.com --output /tmp/malicious.md
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
### URL Restrictions
|
|
50
|
-
```bash
|
|
51
|
-
# ✅ SAFE - public HTTPS site
|
|
52
|
-
doc-fetch --url https://golang.org/doc/ --output docs.md
|
|
53
|
-
|
|
54
|
-
# ❌ BLOCKED - private IP address
|
|
55
|
-
doc-fetch --url http://192.168.1.1/admin --output docs.md
|
|
56
|
-
|
|
57
|
-
# ❌ BLOCKED - localhost
|
|
58
|
-
doc-fetch --url http://localhost:8080/api --output docs.md
|
|
59
|
-
|
|
60
|
-
# ❌ BLOCKED - non-HTTP protocol
|
|
61
|
-
doc-fetch --url file:///etc/passwd --output docs.md
|
|
62
|
-
```
|
|
63
|
-
|
|
64
|
-
## Reporting Security Issues
|
|
65
|
-
|
|
66
|
-
If you discover a security vulnerability in DocFetch, please:
|
|
67
|
-
|
|
68
|
-
1. **Do not disclose publicly** until it's been addressed
|
|
69
|
-
2. Contact the maintainer directly at [your email]
|
|
70
|
-
3. Provide detailed reproduction steps
|
|
71
|
-
4. Allow reasonable time for patch development
|
|
72
|
-
|
|
73
|
-
## Security Updates
|
|
74
|
-
|
|
75
|
-
Security patches will be released as soon as possible after vulnerability confirmation. Users are encouraged to keep DocFetch updated to the latest version.
|
|
76
|
-
|
|
77
|
-
## Dependencies Security
|
|
78
|
-
|
|
79
|
-
DocFetch uses the following dependencies with known security track records:
|
|
80
|
-
- `github.com/PuerkitoBio/goquery` - HTML parsing
|
|
81
|
-
- `github.com/yuin/goldmark` - Markdown processing
|
|
82
|
-
- Standard Go libraries (`net/http`, `sync`, etc.)
|
|
83
|
-
|
|
84
|
-
All dependencies are regularly audited and kept up-to-date.
|
package/cmd/docfetch/main.go
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
package main
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"flag"
|
|
5
|
-
"log"
|
|
6
|
-
"strings"
|
|
7
|
-
|
|
8
|
-
"github.com/AlphaTechini/doc-fetch/pkg/fetcher"
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
func main() {
|
|
12
|
-
url := flag.String("url", "", "Base URL to fetch documentation from")
|
|
13
|
-
output := flag.String("output", "docs.md", "Output file path")
|
|
14
|
-
depth := flag.Int("depth", 2, "Maximum crawl depth")
|
|
15
|
-
concurrent := flag.Int("concurrent", 3, "Concurrent fetchers")
|
|
16
|
-
userAgent := flag.String("user-agent", "DocFetch/1.0", "Custom user agent")
|
|
17
|
-
llmTxt := flag.Bool("llm-txt", false, "Generate llm.txt index file")
|
|
18
|
-
|
|
19
|
-
flag.Parse()
|
|
20
|
-
|
|
21
|
-
if *url == "" {
|
|
22
|
-
log.Fatal("Error: URL is required\nUsage: doc-fetch --url <base-url> --output <file-path>")
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
// Validate configuration for security
|
|
26
|
-
config := fetcher.Config{
|
|
27
|
-
BaseURL: *url,
|
|
28
|
-
OutputPath: *output,
|
|
29
|
-
MaxDepth: *depth,
|
|
30
|
-
Workers: *concurrent,
|
|
31
|
-
UserAgent: *userAgent,
|
|
32
|
-
GenerateLLMTxt: *llmTxt,
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
if err := fetcher.ValidateConfig(&config); err != nil {
|
|
36
|
-
log.Fatalf("Configuration error: %v", err)
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// Use optimized high-performance fetcher
|
|
40
|
-
err := fetcher.RunOptimized(config)
|
|
41
|
-
if err != nil {
|
|
42
|
-
log.Fatalf("Failed to fetch documentation: %v", err)
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
log.Printf("Documentation successfully saved to %s", *output)
|
|
46
|
-
if *llmTxt {
|
|
47
|
-
llmTxtPath := *output
|
|
48
|
-
if strings.HasSuffix(*output, ".md") {
|
|
49
|
-
llmTxtPath = strings.TrimSuffix(*output, ".md") + ".llm.txt"
|
|
50
|
-
} else {
|
|
51
|
-
llmTxtPath = *output + ".llm.txt"
|
|
52
|
-
}
|
|
53
|
-
log.Printf("LLM.txt index generated: %s", llmTxtPath)
|
|
54
|
-
}
|
|
55
|
-
}
|
|
Binary file
|
|
Binary file
|
package/doc-fetch_darwin_amd64
DELETED
|
Binary file
|
|
Binary file
|
package/doc_fetch/__init__.py
DELETED