@imenam/simple-scraper 1.0.3 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -2
- package/dist/index.js +1 -0
- package/dist/logger.js +2 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -29,6 +29,17 @@ npm install -g @imenam/simple-scraper
|
|
|
29
29
|
simple-scraper
|
|
30
30
|
```
|
|
31
31
|
|
|
32
|
+
## Environment Variables
|
|
33
|
+
|
|
34
|
+
| Variable | Required | Default | Description |
|
|
35
|
+
|----------|----------|---------|-------------|
|
|
36
|
+
| `PUPPETEER_HEADLESS` | No | `true` | Run Chromium in headless mode. Set to `false` to display the browser window. |
|
|
37
|
+
| `PUPPETEER_TIMEOUT` | No | `30000` | Default timeout in milliseconds for page navigation and waits. |
|
|
38
|
+
| `COOKIES_DIR` | No | — | Absolute path to a folder containing Netscape-format `.txt` cookie files. All files are loaded and merged automatically before each request. |
|
|
39
|
+
| `PROXY_URL` | No | — | Base URL of the [MCP HTTP Gateway](https://www.npmjs.com/package/@imenam/mcp-http-gateway). Required to enable the GUI. |
|
|
40
|
+
| `PROXY_APP_PATH` | No | `/simple-scraper-mcp` | URL path under which the GUI is registered on the proxy. |
|
|
41
|
+
| `PROXY_APP_NAME` | No | `Simple Scraper MCP` | Display name shown in the proxy's app list. |
|
|
42
|
+
|
|
32
43
|
## Configuration
|
|
33
44
|
|
|
34
45
|
Copy `.env.example` to `.env` and configure the variables:
|
|
@@ -50,7 +61,7 @@ PUPPETEER_TIMEOUT=30000
|
|
|
50
61
|
|
|
51
62
|
## Usage with Claude Desktop
|
|
52
63
|
|
|
53
|
-
Add the following to your `claude_desktop_config.json
|
|
64
|
+
Add the following to your `claude_desktop_config.json`. Full example with all available options:
|
|
54
65
|
|
|
55
66
|
```json
|
|
56
67
|
{
|
|
@@ -60,7 +71,12 @@ Add the following to your `claude_desktop_config.json`:
|
|
|
60
71
|
"args": ["@imenam/simple-scraper"],
|
|
61
72
|
"env": {
|
|
62
73
|
"PUPPETEER_HEADLESS": "true",
|
|
63
|
-
"PUPPETEER_TIMEOUT": "30000"
|
|
74
|
+
"PUPPETEER_TIMEOUT": "30000",
|
|
75
|
+
"COOKIES_DIR": "/path/to/your/cookies",
|
|
76
|
+
"MCP_LOG_DIR": "/path/to/your/logs",
|
|
77
|
+
"PROXY_URL": "http://localhost:4500",
|
|
78
|
+
"PROXY_APP_PATH": "/simple-scraper",
|
|
79
|
+
"PROXY_APP_NAME": "Simple Scraper"
|
|
64
80
|
}
|
|
65
81
|
}
|
|
66
82
|
}
|
|
@@ -84,6 +100,31 @@ To load cookies automatically, add `COOKIES_DIR` pointing to a folder containing
|
|
|
84
100
|
}
|
|
85
101
|
```
|
|
86
102
|
|
|
103
|
+
## Usage with Cursor
|
|
104
|
+
|
|
105
|
+
In Cursor, MCP servers are configured in `.cursor/mcp.json`. You can pass environment variables directly in the config. Full example with all available options:
|
|
106
|
+
|
|
107
|
+
```json
|
|
108
|
+
{
|
|
109
|
+
"mcpServers": {
|
|
110
|
+
"simple-scraper": {
|
|
111
|
+
"command": "npx",
|
|
112
|
+
"args": ["-y", "@imenam/simple-scraper"],
|
|
113
|
+
"env": {
|
|
114
|
+
"PUPPETEER_HEADLESS": "true",
|
|
115
|
+
"PUPPETEER_TIMEOUT": "30000",
|
|
116
|
+
"COOKIES_DIR": "/path/to/your/cookies",
|
|
117
|
+
"PROXY_URL": "http://localhost:4500",
|
|
118
|
+
"PROXY_APP_PATH": "/simple-scraper",
|
|
119
|
+
"PROXY_APP_NAME": "Simple Scraper"
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
> **Note:** The `-y` flag in `args` avoids the interactive confirmation prompt when using `npx`.
|
|
127
|
+
|
|
87
128
|
## MCP Tools
|
|
88
129
|
|
|
89
130
|
### `scrape_page`
|
package/dist/index.js
CHANGED
package/dist/logger.js
CHANGED
|
@@ -2,9 +2,10 @@ import fs from "fs";
|
|
|
2
2
|
import path from "path";
|
|
3
3
|
let currentPrefix = "";
|
|
4
4
|
export function logToFile(message, prefix = "") {
|
|
5
|
-
const
|
|
5
|
+
const defaultLogDir = process.platform === 'win32'
|
|
6
6
|
? 'C:\\var\\log\\simple-scraper-mcp'
|
|
7
7
|
: '/var/log/simple-scraper-mcp';
|
|
8
|
+
const logDir = process.env.MCP_LOG_DIR ?? defaultLogDir;
|
|
8
9
|
const logFile = path.join(logDir, 'server.log');
|
|
9
10
|
const now = new Date().toLocaleString('fr-FR', {
|
|
10
11
|
year: 'numeric',
|