scraperapi-mcp-server 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scraperapi_mcp_server-0.1.0/PKG-INFO +207 -0
- scraperapi_mcp_server-0.1.0/README.md +188 -0
- scraperapi_mcp_server-0.1.0/pyproject.toml +35 -0
- scraperapi_mcp_server-0.1.0/src/scraperapi_mcp_server/__init__.py +27 -0
- scraperapi_mcp_server-0.1.0/src/scraperapi_mcp_server/__main__.py +5 -0
- scraperapi_mcp_server-0.1.0/src/scraperapi_mcp_server/config.py +25 -0
- scraperapi_mcp_server-0.1.0/src/scraperapi_mcp_server/scrape/models.py +31 -0
- scraperapi_mcp_server-0.1.0/src/scraperapi_mcp_server/scrape/scrape.py +45 -0
- scraperapi_mcp_server-0.1.0/src/scraperapi_mcp_server/server.py +45 -0
- scraperapi_mcp_server-0.1.0/src/scraperapi_mcp_server/utils/country_codes.py +416 -0
- scraperapi_mcp_server-0.1.0/src/scraperapi_mcp_server/utils/exceptions.py +49 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: scraperapi-mcp-server
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ScraperAPI MCP server
|
|
5
|
+
Keywords: http,mcp,llm,automation,scraping
|
|
6
|
+
Author: ScraperAPI
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Classifier: Development Status :: 1 - Planning
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Requires-Dist: mcp[cli] (>=1.6.0)
|
|
15
|
+
Requires-Dist: pydantic (>=2.11.3)
|
|
16
|
+
Requires-Dist: pydantic-settings (>=2.9.1)
|
|
17
|
+
Requires-Dist: requests (>=2.32.3)
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# ScraperAPI MCP server
|
|
21
|
+
|
|
22
|
+
The ScraperAPI MCP server enables LLM clients to retrieve and process web scraping requests using the ScraperAPI services.
|
|
23
|
+
|
|
24
|
+
## Table of Contents
|
|
25
|
+
|
|
26
|
+
- [Features](#features)
|
|
27
|
+
- [Architecture](#architecture)
|
|
28
|
+
- [Installation](#installation)
|
|
29
|
+
- [API Reference](#api-reference)
|
|
30
|
+
- [Configuration](#configuration)
|
|
31
|
+
- [Development](#development)
|
|
32
|
+
|
|
33
|
+
## Features
|
|
34
|
+
|
|
35
|
+
- Full implementation of the Model Context Protocol specification
|
|
36
|
+
- Seamless integration with ScraperAPI for web scraping
|
|
37
|
+
- Simple setup with Docker or Python
|
|
38
|
+
|
|
39
|
+
## Architecture
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
┌───────────────┐ ┌───────────────────────┐ ┌───────────────┐
|
|
43
|
+
│ LLM Client │────▶│ Scraper MCP Server │────▶│ AI Model │
|
|
44
|
+
└───────────────┘ └───────────────────────┘ └───────────────┘
|
|
45
|
+
│
|
|
46
|
+
▼
|
|
47
|
+
┌──────────────────┐
|
|
48
|
+
│ ScraperAPI API │
|
|
49
|
+
└──────────────────┘
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
The ScraperAPI MCP Server is designed to run as a local server on your machine, your LLM client will launch it automatically when configured.
|
|
55
|
+
|
|
56
|
+
### Prerequisites
|
|
57
|
+
- Python 3.11+
|
|
58
|
+
- Docker (optional)
|
|
59
|
+
|
|
60
|
+
### Using Python
|
|
61
|
+
|
|
62
|
+
Add this to your client configuration file:
|
|
63
|
+
|
|
64
|
+
```json
|
|
65
|
+
{
|
|
66
|
+
"mcpServers": {
|
|
67
|
+
"ScraperAPI": {
|
|
68
|
+
"command": "python",
|
|
69
|
+
"args": ["-m", "scraperapi_mcp_server"],
|
|
70
|
+
"env": {
|
|
71
|
+
"API_KEY": "<YOUR_SCRAPERAPI_API_KEY>"
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Using Docker
|
|
79
|
+
|
|
80
|
+
Add this to your client configuration file:
|
|
81
|
+
|
|
82
|
+
```json
|
|
83
|
+
{
|
|
84
|
+
"mcpServers": {
|
|
85
|
+
"ScraperAPI": {
|
|
86
|
+
"command": "docker",
|
|
87
|
+
"args": [
|
|
88
|
+
"run",
|
|
89
|
+
"-i",
|
|
90
|
+
"-e",
|
|
91
|
+
"API_KEY=${API_KEY}",
|
|
92
|
+
"--rm",
|
|
93
|
+
"scraperapi-mcp-server"]
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
</br>
|
|
100
|
+
|
|
101
|
+
> [!TIP]
|
|
102
|
+
>
|
|
103
|
+
> If your command is not working (for example, you see a `package not found` error when trying to start the server), double-check the path you are using. To find the correct path, activate your virtual environment first, then run:
|
|
104
|
+
> ```bash
|
|
105
|
+
> which <YOUR_COMMAND>
|
|
106
|
+
> ```
|
|
107
|
+
|
|
108
|
+
## API Reference
|
|
109
|
+
|
|
110
|
+
### Available Tools
|
|
111
|
+
|
|
112
|
+
- `scrape`
|
|
113
|
+
- Scrape a URL from the internet using ScraperAPI
|
|
114
|
+
- Parameters:
|
|
115
|
+
- `url` (string, required): URL to scrape
|
|
116
|
+
- `render` (boolean, optional): Whether to render the page using JavaScript. Defaults to `False`. Set to `True` only if the page requires JavaScript rendering to display its content.
|
|
117
|
+
- `country_code` (string, optional): Activate country geotargeting (ISO 2-letter code)
|
|
118
|
+
- `premium` (boolean, optional): Activate premium residential and mobile IPs
|
|
119
|
+
- `ultra_premium` (boolean, optional): Activate advanced bypass mechanisms. Can not combine with `premium`
|
|
120
|
+
- `device_type` (string, optional): Set request to use `mobile` or `desktop` user agents
|
|
121
|
+
- Returns: The scraped content as a string
|
|
122
|
+
|
|
123
|
+
### Prompt templates
|
|
124
|
+
|
|
125
|
+
- Please scrape this URL `<URL>`. If you receive a 500 server error identify the website's geo-targeting and add the corresponding country_code to overcome geo-restrictions. If errors continues, upgrade the request to use premium proxies by adding premium=true. For persistent failures, activate ultra_premium=true to use enhanced anti-blocking measures.
|
|
126
|
+
- Can you scrape URL `<URL>` to extract `<SPECIFIC_DATA>`? If the request returns missing/incomplete`<SPECIFIC_DATA>`, set render=true to enable JS Rendering.
|
|
127
|
+
|
|
128
|
+
## Configuration
|
|
129
|
+
|
|
130
|
+
### Settings
|
|
131
|
+
|
|
132
|
+
- `API_KEY`: Your ScraperAPI API key.
|
|
133
|
+
|
|
134
|
+
### Configure for Claude Desktop App
|
|
135
|
+
|
|
136
|
+
1. Open Claude Desktop Application
|
|
137
|
+
2. Access the Settings Menu
|
|
138
|
+
3. Click on the settings icon (typically a gear or three dots in the upper right corner)
|
|
139
|
+
4. Select the "Developer" tab
|
|
140
|
+
5. Click on "Edit Config" and paste [the JSON configuration file](#installation).
|
|
141
|
+
|
|
142
|
+
## Development
|
|
143
|
+
|
|
144
|
+
### Local setup
|
|
145
|
+
|
|
146
|
+
1. **Clone the repository:**
|
|
147
|
+
```bash
|
|
148
|
+
git clone https://github.com/scraperapi/scraperapi-mcp
|
|
149
|
+
cd scraperapi-mcp
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
2. **Install dependencies and run the package locally:**
|
|
153
|
+
- **Using Python:**
|
|
154
|
+
```bash
|
|
155
|
+
# Create virtual environment and activate it
|
|
156
|
+
python -m venv .venv
|
|
157
|
+
source .venv/bin/activate # MacOS/Linux
|
|
158
|
+
# OR
|
|
159
|
+
.venv/Scripts/activate # Windows
|
|
160
|
+
|
|
161
|
+
# Install the local package in editable mode
|
|
162
|
+
pip install -e .
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
- **Using Docker:**
|
|
166
|
+
```bash
|
|
167
|
+
# Build the Docker image locally
|
|
168
|
+
docker build -t scraperapi-mcp-server .
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Run the server
|
|
172
|
+
- **Using Python:**
|
|
173
|
+
```bash
|
|
174
|
+
python -m scraperapi_mcp_server
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
- **Using Docker:**
|
|
178
|
+
```bash
|
|
179
|
+
# Run the Docker container with your API key
|
|
180
|
+
docker run -e API_KEY=<YOUR_SCRAPERAPI_API_KEY> scraperapi-mcp-server
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Debug
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
python3 -m scraperapi_mcp_server --debug
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Testing
|
|
190
|
+
|
|
191
|
+
This project uses [pytest](https://docs.pytest.org/en/stable/) for testing.
|
|
192
|
+
|
|
193
|
+
#### Install Test Dependencies
|
|
194
|
+
```bash
|
|
195
|
+
# Install the package with test dependencies
|
|
196
|
+
pip install -e ".[test]"
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
#### Running Tests
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
# Run All Tests
|
|
203
|
+
pytest
|
|
204
|
+
|
|
205
|
+
# Run Specific Test
|
|
206
|
+
pytest <TEST_FILE_PATH>
|
|
207
|
+
```
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# ScraperAPI MCP server
|
|
2
|
+
|
|
3
|
+
The ScraperAPI MCP server enables LLM clients to retrieve and process web scraping requests using the ScraperAPI services.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
- [Features](#features)
|
|
8
|
+
- [Architecture](#architecture)
|
|
9
|
+
- [Installation](#installation)
|
|
10
|
+
- [API Reference](#api-reference)
|
|
11
|
+
- [Configuration](#configuration)
|
|
12
|
+
- [Development](#development)
|
|
13
|
+
|
|
14
|
+
## Features
|
|
15
|
+
|
|
16
|
+
- Full implementation of the Model Context Protocol specification
|
|
17
|
+
- Seamless integration with ScraperAPI for web scraping
|
|
18
|
+
- Simple setup with Docker or Python
|
|
19
|
+
|
|
20
|
+
## Architecture
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
┌───────────────┐ ┌───────────────────────┐ ┌───────────────┐
|
|
24
|
+
│ LLM Client │────▶│ Scraper MCP Server │────▶│ AI Model │
|
|
25
|
+
└───────────────┘ └───────────────────────┘ └───────────────┘
|
|
26
|
+
│
|
|
27
|
+
▼
|
|
28
|
+
┌──────────────────┐
|
|
29
|
+
│ ScraperAPI API │
|
|
30
|
+
└──────────────────┘
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
The ScraperAPI MCP Server is designed to run as a local server on your machine, your LLM client will launch it automatically when configured.
|
|
36
|
+
|
|
37
|
+
### Prerequisites
|
|
38
|
+
- Python 3.11+
|
|
39
|
+
- Docker (optional)
|
|
40
|
+
|
|
41
|
+
### Using Python
|
|
42
|
+
|
|
43
|
+
Add this to your client configuration file:
|
|
44
|
+
|
|
45
|
+
```json
|
|
46
|
+
{
|
|
47
|
+
"mcpServers": {
|
|
48
|
+
"ScraperAPI": {
|
|
49
|
+
"command": "python",
|
|
50
|
+
"args": ["-m", "scraperapi_mcp_server"],
|
|
51
|
+
"env": {
|
|
52
|
+
"API_KEY": "<YOUR_SCRAPERAPI_API_KEY>"
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Using Docker
|
|
60
|
+
|
|
61
|
+
Add this to your client configuration file:
|
|
62
|
+
|
|
63
|
+
```json
|
|
64
|
+
{
|
|
65
|
+
"mcpServers": {
|
|
66
|
+
"ScraperAPI": {
|
|
67
|
+
"command": "docker",
|
|
68
|
+
"args": [
|
|
69
|
+
"run",
|
|
70
|
+
"-i",
|
|
71
|
+
"-e",
|
|
72
|
+
"API_KEY=${API_KEY}",
|
|
73
|
+
"--rm",
|
|
74
|
+
"scraperapi-mcp-server"]
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
</br>
|
|
81
|
+
|
|
82
|
+
> [!TIP]
|
|
83
|
+
>
|
|
84
|
+
> If your command is not working (for example, you see a `package not found` error when trying to start the server), double-check the path you are using. To find the correct path, activate your virtual environment first, then run:
|
|
85
|
+
> ```bash
|
|
86
|
+
> which <YOUR_COMMAND>
|
|
87
|
+
> ```
|
|
88
|
+
|
|
89
|
+
## API Reference
|
|
90
|
+
|
|
91
|
+
### Available Tools
|
|
92
|
+
|
|
93
|
+
- `scrape`
|
|
94
|
+
- Scrape a URL from the internet using ScraperAPI
|
|
95
|
+
- Parameters:
|
|
96
|
+
- `url` (string, required): URL to scrape
|
|
97
|
+
- `render` (boolean, optional): Whether to render the page using JavaScript. Defaults to `False`. Set to `True` only if the page requires JavaScript rendering to display its content.
|
|
98
|
+
- `country_code` (string, optional): Activate country geotargeting (ISO 2-letter code)
|
|
99
|
+
- `premium` (boolean, optional): Activate premium residential and mobile IPs
|
|
100
|
+
- `ultra_premium` (boolean, optional): Activate advanced bypass mechanisms. Can not combine with `premium`
|
|
101
|
+
- `device_type` (string, optional): Set request to use `mobile` or `desktop` user agents
|
|
102
|
+
- Returns: The scraped content as a string
|
|
103
|
+
|
|
104
|
+
### Prompt templates
|
|
105
|
+
|
|
106
|
+
- Please scrape this URL `<URL>`. If you receive a 500 server error identify the website's geo-targeting and add the corresponding country_code to overcome geo-restrictions. If errors continues, upgrade the request to use premium proxies by adding premium=true. For persistent failures, activate ultra_premium=true to use enhanced anti-blocking measures.
|
|
107
|
+
- Can you scrape URL `<URL>` to extract `<SPECIFIC_DATA>`? If the request returns missing/incomplete`<SPECIFIC_DATA>`, set render=true to enable JS Rendering.
|
|
108
|
+
|
|
109
|
+
## Configuration
|
|
110
|
+
|
|
111
|
+
### Settings
|
|
112
|
+
|
|
113
|
+
- `API_KEY`: Your ScraperAPI API key.
|
|
114
|
+
|
|
115
|
+
### Configure for Claude Desktop App
|
|
116
|
+
|
|
117
|
+
1. Open Claude Desktop Application
|
|
118
|
+
2. Access the Settings Menu
|
|
119
|
+
3. Click on the settings icon (typically a gear or three dots in the upper right corner)
|
|
120
|
+
4. Select the "Developer" tab
|
|
121
|
+
5. Click on "Edit Config" and paste [the JSON configuration file](#installation).
|
|
122
|
+
|
|
123
|
+
## Development
|
|
124
|
+
|
|
125
|
+
### Local setup
|
|
126
|
+
|
|
127
|
+
1. **Clone the repository:**
|
|
128
|
+
```bash
|
|
129
|
+
git clone https://github.com/scraperapi/scraperapi-mcp
|
|
130
|
+
cd scraperapi-mcp
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
2. **Install dependencies and run the package locally:**
|
|
134
|
+
- **Using Python:**
|
|
135
|
+
```bash
|
|
136
|
+
# Create virtual environment and activate it
|
|
137
|
+
python -m venv .venv
|
|
138
|
+
source .venv/bin/activate # MacOS/Linux
|
|
139
|
+
# OR
|
|
140
|
+
.venv/Scripts/activate # Windows
|
|
141
|
+
|
|
142
|
+
# Install the local package in editable mode
|
|
143
|
+
pip install -e .
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
- **Using Docker:**
|
|
147
|
+
```bash
|
|
148
|
+
# Build the Docker image locally
|
|
149
|
+
docker build -t scraperapi-mcp-server .
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Run the server
|
|
153
|
+
- **Using Python:**
|
|
154
|
+
```bash
|
|
155
|
+
python -m scraperapi_mcp_server
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
- **Using Docker:**
|
|
159
|
+
```bash
|
|
160
|
+
# Run the Docker container with your API key
|
|
161
|
+
docker run -e API_KEY=<YOUR_SCRAPERAPI_API_KEY> scraperapi-mcp-server
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Debug
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
python3 -m scraperapi_mcp_server --debug
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Testing
|
|
171
|
+
|
|
172
|
+
This project uses [pytest](https://docs.pytest.org/en/stable/) for testing.
|
|
173
|
+
|
|
174
|
+
#### Install Test Dependencies
|
|
175
|
+
```bash
|
|
176
|
+
# Install the package with test dependencies
|
|
177
|
+
pip install -e ".[test]"
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
#### Running Tests
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
# Run All Tests
|
|
184
|
+
pytest
|
|
185
|
+
|
|
186
|
+
# Run Specific Test
|
|
187
|
+
pytest <TEST_FILE_PATH>
|
|
188
|
+
```
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "scraperapi-mcp-server"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "ScraperAPI MCP server"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = ["ScraperAPI"]
|
|
7
|
+
keywords = ["http", "mcp", "llm", "automation", "scraping"]
|
|
8
|
+
classifiers = [
|
|
9
|
+
"Development Status :: 1 - Planning",
|
|
10
|
+
"Programming Language :: Python :: 3.10",
|
|
11
|
+
]
|
|
12
|
+
packages = [{include = "scraperapi_mcp_server", from = "src"}]
|
|
13
|
+
|
|
14
|
+
[tool.poetry.dependencies]
|
|
15
|
+
python = ">=3.11"
|
|
16
|
+
mcp = {extras = ["cli"], version = ">=1.6.0"}
|
|
17
|
+
pydantic = ">=2.11.3"
|
|
18
|
+
pydantic-settings = ">=2.9.1"
|
|
19
|
+
requests = ">=2.32.3"
|
|
20
|
+
|
|
21
|
+
[tool.poetry.group.dev.dependencies]
|
|
22
|
+
pytest = ">=7.4.0"
|
|
23
|
+
pytest-mock = ">=3.10.0"
|
|
24
|
+
|
|
25
|
+
[tool.poetry.scripts]
|
|
26
|
+
scraperapi-mcp-server = "scraperapi_mcp_server:main"
|
|
27
|
+
|
|
28
|
+
[tool.pytest.ini_options]
|
|
29
|
+
markers = [
|
|
30
|
+
"integration: marks tests as integration tests",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[build-system]
|
|
34
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
35
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import logging
|
|
3
|
+
from scraperapi_mcp_server.server import mcp
|
|
4
|
+
|
|
5
|
+
__version__ = "0.1.0"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def main():
|
|
9
|
+
"""ScraperAPI MCP server main module."""
|
|
10
|
+
logging.info("Starting ScraperAPI MCP server main module.")
|
|
11
|
+
# Run the server
|
|
12
|
+
try:
|
|
13
|
+
logging.debug("Running ScraperAPI MCP server...")
|
|
14
|
+
mcp.run()
|
|
15
|
+
except Exception as e:
|
|
16
|
+
logging.exception(
|
|
17
|
+
f"Unhandled exception in ScraperAPI MCP server main loop: {e}"
|
|
18
|
+
)
|
|
19
|
+
raise
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if __name__ == "__main__":
|
|
23
|
+
logging.basicConfig(
|
|
24
|
+
level=logging.INFO,
|
|
25
|
+
format="%(asctime)s %(levelname)s %(message)s",
|
|
26
|
+
)
|
|
27
|
+
main()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dotenv import load_dotenv
|
|
3
|
+
from pydantic_settings import BaseSettings
|
|
4
|
+
from scraperapi_mcp_server.utils.exceptions import ApiKeyEnvVarNotSetError
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
load_dotenv()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Settings(BaseSettings):
|
|
11
|
+
"""ScraperAPI MCP Server settings."""
|
|
12
|
+
|
|
13
|
+
API_KEY: str = os.getenv("API_KEY", "")
|
|
14
|
+
API_URL: str = "https://api.scraperapi.com"
|
|
15
|
+
API_TIMEOUT_SECONDS: int = 70
|
|
16
|
+
|
|
17
|
+
def __init__(self, **kwargs):
|
|
18
|
+
super().__init__(**kwargs)
|
|
19
|
+
if not self.API_KEY:
|
|
20
|
+
raise ApiKeyEnvVarNotSetError(
|
|
21
|
+
"API_KEY environment variable is not set. Please set it when installing the MCP server. Check the README for more information."
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
settings = Settings()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
from pydantic import BaseModel, Field, AnyUrl
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Scrape(BaseModel):
|
|
6
|
+
"""Parameters for scraping a URL."""
|
|
7
|
+
|
|
8
|
+
url: Annotated[AnyUrl, Field(description="URL to scrape")]
|
|
9
|
+
render: Annotated[
|
|
10
|
+
bool,
|
|
11
|
+
Field(
|
|
12
|
+
default=False,
|
|
13
|
+
description="Whether to render the page using JavaScript. Set to `True` only if the page requires JavaScript rendering to display its content.",
|
|
14
|
+
),
|
|
15
|
+
]
|
|
16
|
+
country_code: Annotated[
|
|
17
|
+
str, Field(default=None, description="Country code to scrape from")
|
|
18
|
+
]
|
|
19
|
+
premium: Annotated[
|
|
20
|
+
bool, Field(default=False, description="Whether to use premium scraping")
|
|
21
|
+
]
|
|
22
|
+
ultra_premium: Annotated[
|
|
23
|
+
bool, Field(default=False, description="Whether to use ultra premium scraping")
|
|
24
|
+
]
|
|
25
|
+
device_type: Annotated[
|
|
26
|
+
str,
|
|
27
|
+
Field(
|
|
28
|
+
default=None,
|
|
29
|
+
description="Device type to scrape from. Set request to use `mobile` or `desktop` user agents",
|
|
30
|
+
),
|
|
31
|
+
]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from scraperapi_mcp_server.config import settings
|
|
3
|
+
from scraperapi_mcp_server.utils.exceptions import handle_scraper_error
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def basic_scrape(
|
|
8
|
+
url: str,
|
|
9
|
+
render: bool = None,
|
|
10
|
+
country_code: str = None,
|
|
11
|
+
premium: bool = None,
|
|
12
|
+
ultra_premium: bool = None,
|
|
13
|
+
device_type: str = None,
|
|
14
|
+
) -> str:
|
|
15
|
+
logging.info(f"Starting scrape for URL: {url}")
|
|
16
|
+
payload = {
|
|
17
|
+
"api_key": settings.API_KEY,
|
|
18
|
+
"url": url,
|
|
19
|
+
"output_format": "markdown",
|
|
20
|
+
"scraper_sdk": "mcp-server",
|
|
21
|
+
}
|
|
22
|
+
logging.debug(f"Initial payload: {payload}")
|
|
23
|
+
optional_params = {
|
|
24
|
+
"render": (render, lambda v: str(v).lower()),
|
|
25
|
+
"country_code": (country_code, str),
|
|
26
|
+
"premium": (premium, lambda v: str(v).lower()),
|
|
27
|
+
"ultra_premium": (ultra_premium, lambda v: str(v).lower()),
|
|
28
|
+
"device_type": (device_type, str),
|
|
29
|
+
}
|
|
30
|
+
for key, (value, formatter) in optional_params.items():
|
|
31
|
+
if value is not None:
|
|
32
|
+
payload[key] = formatter(value)
|
|
33
|
+
logging.debug(f"Added optional param: {key}={payload[key]}")
|
|
34
|
+
try:
|
|
35
|
+
logging.info(f"Sending request to {settings.API_URL} with params: {payload}")
|
|
36
|
+
response = requests.get(
|
|
37
|
+
settings.API_URL, params=payload, timeout=settings.API_TIMEOUT_SECONDS
|
|
38
|
+
)
|
|
39
|
+
response.raise_for_status()
|
|
40
|
+
logging.info(f"Scrape successful for URL: {url}")
|
|
41
|
+
return response.text
|
|
42
|
+
except Exception as e:
|
|
43
|
+
logging.error(f"Error during scrape for URL: {url}", exc_info=True)
|
|
44
|
+
error_obj = handle_scraper_error(e, url, payload)
|
|
45
|
+
raise Exception(error_obj.error.message) from e
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from mcp.server.fastmcp import FastMCP
|
|
2
|
+
from scraperapi_mcp_server.scrape.models import Scrape
|
|
3
|
+
from scraperapi_mcp_server.scrape.scrape import basic_scrape
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
mcp = FastMCP("ScraperAPI")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@mcp.tool()
|
|
10
|
+
def scrape(params: Scrape) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Execute a web scrape using ScraperAPI with the specified parameters.
|
|
13
|
+
|
|
14
|
+
Parameters:
|
|
15
|
+
params: Scrape model containing:
|
|
16
|
+
url: Target URL to scrape (required)
|
|
17
|
+
render: Enable JavaScript rendering only when needed for dynamic content (default: False)
|
|
18
|
+
Set to True ONLY if the content you need is missing from the initial HTML response and is loaded dynamically by JavaScript.
|
|
19
|
+
For most websites, including many modern ones, the main content is available without JavaScript rendering.
|
|
20
|
+
country_code: Two-letter country code for geo-specific scraping
|
|
21
|
+
premium: Use premium residential/mobile proxies for higher success rate (costs more, incompatible with ultra_premium)
|
|
22
|
+
ultra_premium: Activate advanced bypass mechanisms (costs more, incompatible with premium)
|
|
23
|
+
device_type: 'mobile' or 'desktop' for device-specific user agents
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Scraped content as a string
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
logging.info(f"Invoking scrape tool with params: {params}")
|
|
30
|
+
try:
|
|
31
|
+
result = basic_scrape(
|
|
32
|
+
url=str(params.url),
|
|
33
|
+
render=params.render,
|
|
34
|
+
country_code=params.country_code,
|
|
35
|
+
premium=params.premium,
|
|
36
|
+
ultra_premium=params.ultra_premium,
|
|
37
|
+
device_type=params.device_type,
|
|
38
|
+
)
|
|
39
|
+
logging.info(f"Scrape tool completed for URL: {params.url}")
|
|
40
|
+
return result
|
|
41
|
+
except Exception as e:
|
|
42
|
+
logging.error(
|
|
43
|
+
f"Scrape tool failed for URL: {params.url}. Error: {e}", exc_info=True
|
|
44
|
+
)
|
|
45
|
+
raise
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mapping of country names to their ISO 3166-1 alpha-2 country codes.
|
|
3
|
+
Used for scraping from specific geographic locations.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
COUNTRY_CODES = {
|
|
7
|
+
# North America
|
|
8
|
+
"us": "us",
|
|
9
|
+
"usa": "us",
|
|
10
|
+
"united states": "us",
|
|
11
|
+
"united states of america": "us",
|
|
12
|
+
"ca": "ca",
|
|
13
|
+
"canada": "ca",
|
|
14
|
+
"mx": "mx",
|
|
15
|
+
"mexico": "mx",
|
|
16
|
+
"cu": "cu",
|
|
17
|
+
"cuba": "cu",
|
|
18
|
+
"do": "do",
|
|
19
|
+
"dominican republic": "do",
|
|
20
|
+
"ht": "ht",
|
|
21
|
+
"haiti": "ht",
|
|
22
|
+
"jm": "jm",
|
|
23
|
+
"jamaica": "jm",
|
|
24
|
+
"pr": "pr",
|
|
25
|
+
"puerto rico": "pr",
|
|
26
|
+
# Central America
|
|
27
|
+
"bz": "bz",
|
|
28
|
+
"belize": "bz",
|
|
29
|
+
"cr": "cr",
|
|
30
|
+
"costa rica": "cr",
|
|
31
|
+
"sv": "sv",
|
|
32
|
+
"el salvador": "sv",
|
|
33
|
+
"gt": "gt",
|
|
34
|
+
"guatemala": "gt",
|
|
35
|
+
"hn": "hn",
|
|
36
|
+
"honduras": "hn",
|
|
37
|
+
"ni": "ni",
|
|
38
|
+
"nicaragua": "ni",
|
|
39
|
+
"pa": "pa",
|
|
40
|
+
"panama": "pa",
|
|
41
|
+
# South America
|
|
42
|
+
"ar": "ar",
|
|
43
|
+
"argentina": "ar",
|
|
44
|
+
"bo": "bo",
|
|
45
|
+
"bolivia": "bo",
|
|
46
|
+
"br": "br",
|
|
47
|
+
"brazil": "br",
|
|
48
|
+
"cl": "cl",
|
|
49
|
+
"chile": "cl",
|
|
50
|
+
"co": "co",
|
|
51
|
+
"colombia": "co",
|
|
52
|
+
"ec": "ec",
|
|
53
|
+
"ecuador": "ec",
|
|
54
|
+
"gy": "gy",
|
|
55
|
+
"guyana": "gy",
|
|
56
|
+
"py": "py",
|
|
57
|
+
"paraguay": "py",
|
|
58
|
+
"pe": "pe",
|
|
59
|
+
"peru": "pe",
|
|
60
|
+
"sr": "sr",
|
|
61
|
+
"suriname": "sr",
|
|
62
|
+
"uy": "uy",
|
|
63
|
+
"uruguay": "uy",
|
|
64
|
+
"ve": "ve",
|
|
65
|
+
"venezuela": "ve",
|
|
66
|
+
# Europe
|
|
67
|
+
"al": "al",
|
|
68
|
+
"albania": "al",
|
|
69
|
+
"ad": "ad",
|
|
70
|
+
"andorra": "ad",
|
|
71
|
+
"at": "at",
|
|
72
|
+
"austria": "at",
|
|
73
|
+
"by": "by",
|
|
74
|
+
"belarus": "by",
|
|
75
|
+
"be": "be",
|
|
76
|
+
"belgium": "be",
|
|
77
|
+
"ba": "ba",
|
|
78
|
+
"bosnia and herzegovina": "ba",
|
|
79
|
+
"bg": "bg",
|
|
80
|
+
"bulgaria": "bg",
|
|
81
|
+
"hr": "hr",
|
|
82
|
+
"croatia": "hr",
|
|
83
|
+
"cz": "cz",
|
|
84
|
+
"czech republic": "cz",
|
|
85
|
+
"dk": "dk",
|
|
86
|
+
"denmark": "dk",
|
|
87
|
+
"ee": "ee",
|
|
88
|
+
"estonia": "ee",
|
|
89
|
+
"fi": "fi",
|
|
90
|
+
"finland": "fi",
|
|
91
|
+
"fr": "fr",
|
|
92
|
+
"france": "fr",
|
|
93
|
+
"de": "de",
|
|
94
|
+
"germany": "de",
|
|
95
|
+
"gr": "gr",
|
|
96
|
+
"greece": "gr",
|
|
97
|
+
"hu": "hu",
|
|
98
|
+
"hungary": "hu",
|
|
99
|
+
"is": "is",
|
|
100
|
+
"iceland": "is",
|
|
101
|
+
"ie": "ie",
|
|
102
|
+
"ireland": "ie",
|
|
103
|
+
"it": "it",
|
|
104
|
+
"italy": "it",
|
|
105
|
+
"lv": "lv",
|
|
106
|
+
"latvia": "lv",
|
|
107
|
+
"li": "li",
|
|
108
|
+
"liechtenstein": "li",
|
|
109
|
+
"lt": "lt",
|
|
110
|
+
"lithuania": "lt",
|
|
111
|
+
"lu": "lu",
|
|
112
|
+
"luxembourg": "lu",
|
|
113
|
+
"mt": "mt",
|
|
114
|
+
"malta": "mt",
|
|
115
|
+
"md": "md",
|
|
116
|
+
"moldova": "md",
|
|
117
|
+
"mc": "mc",
|
|
118
|
+
"monaco": "mc",
|
|
119
|
+
"me": "me",
|
|
120
|
+
"montenegro": "me",
|
|
121
|
+
"nl": "nl",
|
|
122
|
+
"netherlands": "nl",
|
|
123
|
+
"mk": "mk",
|
|
124
|
+
"north macedonia": "mk",
|
|
125
|
+
"no": "no",
|
|
126
|
+
"norway": "no",
|
|
127
|
+
"pl": "pl",
|
|
128
|
+
"poland": "pl",
|
|
129
|
+
"pt": "pt",
|
|
130
|
+
"portugal": "pt",
|
|
131
|
+
"ro": "ro",
|
|
132
|
+
"romania": "ro",
|
|
133
|
+
"ru": "ru",
|
|
134
|
+
"russia": "ru",
|
|
135
|
+
"russian federation": "ru",
|
|
136
|
+
"sm": "sm",
|
|
137
|
+
"san marino": "sm",
|
|
138
|
+
"rs": "rs",
|
|
139
|
+
"serbia": "rs",
|
|
140
|
+
"sk": "sk",
|
|
141
|
+
"slovakia": "sk",
|
|
142
|
+
"si": "si",
|
|
143
|
+
"slovenia": "si",
|
|
144
|
+
"es": "es",
|
|
145
|
+
"spain": "es",
|
|
146
|
+
"se": "se",
|
|
147
|
+
"sweden": "se",
|
|
148
|
+
"ch": "ch",
|
|
149
|
+
"switzerland": "ch",
|
|
150
|
+
"ua": "ua",
|
|
151
|
+
"ukraine": "ua",
|
|
152
|
+
"uk": "gb",
|
|
153
|
+
"united kingdom": "gb",
|
|
154
|
+
"great britain": "gb",
|
|
155
|
+
"gb": "gb",
|
|
156
|
+
"va": "va",
|
|
157
|
+
"vatican city": "va",
|
|
158
|
+
# Asia
|
|
159
|
+
"af": "af",
|
|
160
|
+
"afghanistan": "af",
|
|
161
|
+
"am": "am",
|
|
162
|
+
"armenia": "am",
|
|
163
|
+
"az": "az",
|
|
164
|
+
"azerbaijan": "az",
|
|
165
|
+
"bh": "bh",
|
|
166
|
+
"bahrain": "bh",
|
|
167
|
+
"bd": "bd",
|
|
168
|
+
"bangladesh": "bd",
|
|
169
|
+
"bt": "bt",
|
|
170
|
+
"bhutan": "bt",
|
|
171
|
+
"bn": "bn",
|
|
172
|
+
"brunei": "bn",
|
|
173
|
+
"kh": "kh",
|
|
174
|
+
"cambodia": "kh",
|
|
175
|
+
"cn": "cn",
|
|
176
|
+
"china": "cn",
|
|
177
|
+
"peoples republic of china": "cn",
|
|
178
|
+
"cy": "cy",
|
|
179
|
+
"cyprus": "cy",
|
|
180
|
+
"ge": "ge",
|
|
181
|
+
"georgia": "ge",
|
|
182
|
+
"in": "in",
|
|
183
|
+
"india": "in",
|
|
184
|
+
"id": "id",
|
|
185
|
+
"indonesia": "id",
|
|
186
|
+
"ir": "ir",
|
|
187
|
+
"iran": "ir",
|
|
188
|
+
"iq": "iq",
|
|
189
|
+
"iraq": "iq",
|
|
190
|
+
"il": "il",
|
|
191
|
+
"israel": "il",
|
|
192
|
+
"jp": "jp",
|
|
193
|
+
"japan": "jp",
|
|
194
|
+
"jo": "jo",
|
|
195
|
+
"jordan": "jo",
|
|
196
|
+
"kz": "kz",
|
|
197
|
+
"kazakhstan": "kz",
|
|
198
|
+
"kw": "kw",
|
|
199
|
+
"kuwait": "kw",
|
|
200
|
+
"kg": "kg",
|
|
201
|
+
"kyrgyzstan": "kg",
|
|
202
|
+
"la": "la",
|
|
203
|
+
"laos": "la",
|
|
204
|
+
"lb": "lb",
|
|
205
|
+
"lebanon": "lb",
|
|
206
|
+
"my": "my",
|
|
207
|
+
"malaysia": "my",
|
|
208
|
+
"mv": "mv",
|
|
209
|
+
"maldives": "mv",
|
|
210
|
+
"mn": "mn",
|
|
211
|
+
"mongolia": "mn",
|
|
212
|
+
"mm": "mm",
|
|
213
|
+
"myanmar": "mm",
|
|
214
|
+
"burma": "mm",
|
|
215
|
+
"np": "np",
|
|
216
|
+
"nepal": "np",
|
|
217
|
+
"kp": "kp",
|
|
218
|
+
"north korea": "kp",
|
|
219
|
+
"dprk": "kp",
|
|
220
|
+
"om": "om",
|
|
221
|
+
"oman": "om",
|
|
222
|
+
"pk": "pk",
|
|
223
|
+
"pakistan": "pk",
|
|
224
|
+
"ph": "ph",
|
|
225
|
+
"philippines": "ph",
|
|
226
|
+
"qa": "qa",
|
|
227
|
+
"qatar": "qa",
|
|
228
|
+
"sa": "sa",
|
|
229
|
+
"saudi arabia": "sa",
|
|
230
|
+
"sg": "sg",
|
|
231
|
+
"singapore": "sg",
|
|
232
|
+
"kr": "kr",
|
|
233
|
+
"south korea": "kr",
|
|
234
|
+
"korea": "kr",
|
|
235
|
+
"lk": "lk",
|
|
236
|
+
"sri lanka": "lk",
|
|
237
|
+
"sy": "sy",
|
|
238
|
+
"syria": "sy",
|
|
239
|
+
"tw": "tw",
|
|
240
|
+
"taiwan": "tw",
|
|
241
|
+
"tj": "tj",
|
|
242
|
+
"tajikistan": "tj",
|
|
243
|
+
"th": "th",
|
|
244
|
+
"thailand": "th",
|
|
245
|
+
"tr": "tr",
|
|
246
|
+
"turkey": "tr",
|
|
247
|
+
"tm": "tm",
|
|
248
|
+
"turkmenistan": "tm",
|
|
249
|
+
"ae": "ae",
|
|
250
|
+
"uae": "ae",
|
|
251
|
+
"united arab emirates": "ae",
|
|
252
|
+
"uz": "uz",
|
|
253
|
+
"uzbekistan": "uz",
|
|
254
|
+
"vn": "vn",
|
|
255
|
+
"vietnam": "vn",
|
|
256
|
+
"ye": "ye",
|
|
257
|
+
"yemen": "ye",
|
|
258
|
+
# Africa
|
|
259
|
+
"dz": "dz",
|
|
260
|
+
"algeria": "dz",
|
|
261
|
+
"ao": "ao",
|
|
262
|
+
"angola": "ao",
|
|
263
|
+
"bj": "bj",
|
|
264
|
+
"benin": "bj",
|
|
265
|
+
"bw": "bw",
|
|
266
|
+
"botswana": "bw",
|
|
267
|
+
"bf": "bf",
|
|
268
|
+
"burkina faso": "bf",
|
|
269
|
+
"bi": "bi",
|
|
270
|
+
"burundi": "bi",
|
|
271
|
+
"cm": "cm",
|
|
272
|
+
"cameroon": "cm",
|
|
273
|
+
"cv": "cv",
|
|
274
|
+
"cape verde": "cv",
|
|
275
|
+
"cf": "cf",
|
|
276
|
+
"central african republic": "cf",
|
|
277
|
+
"td": "td",
|
|
278
|
+
"chad": "td",
|
|
279
|
+
"km": "km",
|
|
280
|
+
"comoros": "km",
|
|
281
|
+
"cg": "cg",
|
|
282
|
+
"congo": "cg",
|
|
283
|
+
"cd": "cd",
|
|
284
|
+
"drc": "cd",
|
|
285
|
+
"democratic republic of the congo": "cd",
|
|
286
|
+
"ci": "ci",
|
|
287
|
+
"cote divoire": "ci",
|
|
288
|
+
"ivory coast": "ci",
|
|
289
|
+
"dj": "dj",
|
|
290
|
+
"djibouti": "dj",
|
|
291
|
+
"eg": "eg",
|
|
292
|
+
"egypt": "eg",
|
|
293
|
+
"gq": "gq",
|
|
294
|
+
"equatorial guinea": "gq",
|
|
295
|
+
"er": "er",
|
|
296
|
+
"eritrea": "er",
|
|
297
|
+
"et": "et",
|
|
298
|
+
"ethiopia": "et",
|
|
299
|
+
"ga": "ga",
|
|
300
|
+
"gabon": "ga",
|
|
301
|
+
"gm": "gm",
|
|
302
|
+
"gambia": "gm",
|
|
303
|
+
"gh": "gh",
|
|
304
|
+
"ghana": "gh",
|
|
305
|
+
"gn": "gn",
|
|
306
|
+
"guinea": "gn",
|
|
307
|
+
"gw": "gw",
|
|
308
|
+
"guinea bissau": "gw",
|
|
309
|
+
"ke": "ke",
|
|
310
|
+
"kenya": "ke",
|
|
311
|
+
"ls": "ls",
|
|
312
|
+
"lesotho": "ls",
|
|
313
|
+
"lr": "lr",
|
|
314
|
+
"liberia": "lr",
|
|
315
|
+
"ly": "ly",
|
|
316
|
+
"libya": "ly",
|
|
317
|
+
"mg": "mg",
|
|
318
|
+
"madagascar": "mg",
|
|
319
|
+
"mw": "mw",
|
|
320
|
+
"malawi": "mw",
|
|
321
|
+
"ml": "ml",
|
|
322
|
+
"mali": "ml",
|
|
323
|
+
"mr": "mr",
|
|
324
|
+
"mauritania": "mr",
|
|
325
|
+
"mu": "mu",
|
|
326
|
+
"mauritius": "mu",
|
|
327
|
+
"ma": "ma",
|
|
328
|
+
"morocco": "ma",
|
|
329
|
+
"mz": "mz",
|
|
330
|
+
"mozambique": "mz",
|
|
331
|
+
"na": "na",
|
|
332
|
+
"namibia": "na",
|
|
333
|
+
"ne": "ne",
|
|
334
|
+
"niger": "ne",
|
|
335
|
+
"ng": "ng",
|
|
336
|
+
"nigeria": "ng",
|
|
337
|
+
"rw": "rw",
|
|
338
|
+
"rwanda": "rw",
|
|
339
|
+
"st": "st",
|
|
340
|
+
"sao tome and principe": "st",
|
|
341
|
+
"sn": "sn",
|
|
342
|
+
"senegal": "sn",
|
|
343
|
+
"sc": "sc",
|
|
344
|
+
"seychelles": "sc",
|
|
345
|
+
"sl": "sl",
|
|
346
|
+
"sierra leone": "sl",
|
|
347
|
+
"so": "so",
|
|
348
|
+
"somalia": "so",
|
|
349
|
+
"za": "za",
|
|
350
|
+
"south africa": "za",
|
|
351
|
+
"ss": "ss",
|
|
352
|
+
"south sudan": "ss",
|
|
353
|
+
"sd": "sd",
|
|
354
|
+
"sudan": "sd",
|
|
355
|
+
"sz": "sz",
|
|
356
|
+
"swaziland": "sz",
|
|
357
|
+
"eswatini": "sz",
|
|
358
|
+
"tz": "tz",
|
|
359
|
+
"tanzania": "tz",
|
|
360
|
+
"tg": "tg",
|
|
361
|
+
"togo": "tg",
|
|
362
|
+
"tn": "tn",
|
|
363
|
+
"tunisia": "tn",
|
|
364
|
+
"ug": "ug",
|
|
365
|
+
"uganda": "ug",
|
|
366
|
+
"zm": "zm",
|
|
367
|
+
"zambia": "zm",
|
|
368
|
+
"zw": "zw",
|
|
369
|
+
"zimbabwe": "zw",
|
|
370
|
+
# Oceania
|
|
371
|
+
"au": "au",
|
|
372
|
+
"australia": "au",
|
|
373
|
+
"fj": "fj",
|
|
374
|
+
"fiji": "fj",
|
|
375
|
+
"ki": "ki",
|
|
376
|
+
"kiribati": "ki",
|
|
377
|
+
"mh": "mh",
|
|
378
|
+
"marshall islands": "mh",
|
|
379
|
+
"fm": "fm",
|
|
380
|
+
"micronesia": "fm",
|
|
381
|
+
"nr": "nr",
|
|
382
|
+
"nauru": "nr",
|
|
383
|
+
"nz": "nz",
|
|
384
|
+
"new zealand": "nz",
|
|
385
|
+
"pw": "pw",
|
|
386
|
+
"palau": "pw",
|
|
387
|
+
"pg": "pg",
|
|
388
|
+
"papua new guinea": "pg",
|
|
389
|
+
"ws": "ws",
|
|
390
|
+
"samoa": "ws",
|
|
391
|
+
"sb": "sb",
|
|
392
|
+
"solomon islands": "sb",
|
|
393
|
+
"to": "to",
|
|
394
|
+
"tonga": "to",
|
|
395
|
+
"tv": "tv",
|
|
396
|
+
"tuvalu": "tv",
|
|
397
|
+
"vu": "vu",
|
|
398
|
+
"vanuatu": "vu",
|
|
399
|
+
# Territories and Special Cases
|
|
400
|
+
"hk": "hk",
|
|
401
|
+
"hong kong": "hk",
|
|
402
|
+
"mo": "mo",
|
|
403
|
+
"macau": "mo",
|
|
404
|
+
"ps": "ps",
|
|
405
|
+
"palestine": "ps",
|
|
406
|
+
"re": "re",
|
|
407
|
+
"reunion": "re",
|
|
408
|
+
"tf": "tf",
|
|
409
|
+
"french southern territories": "tf",
|
|
410
|
+
"um": "um",
|
|
411
|
+
"united states minor outlying islands": "um",
|
|
412
|
+
"wf": "wf",
|
|
413
|
+
"wallis and futuna": "wf",
|
|
414
|
+
"yt": "yt",
|
|
415
|
+
"mayotte": "yt",
|
|
416
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
from requests.exceptions import RequestException, HTTPError as RequestsHTTPError
|
|
3
|
+
from mcp.shared.exceptions import McpError
|
|
4
|
+
from mcp.types import ErrorData, INTERNAL_ERROR
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ApiKeyEnvVarNotSetError(Exception):
|
|
9
|
+
"""Raised when the API key environment variable is not set."""
|
|
10
|
+
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def handle_scraper_error(
|
|
15
|
+
e: Exception, url: str, payload: Dict[str, Any] = None
|
|
16
|
+
) -> McpError:
|
|
17
|
+
"""
|
|
18
|
+
Handle all scraper errors with simple error messages.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
e: The exception that occurred
|
|
22
|
+
url: The URL that was being scraped
|
|
23
|
+
payload: Optional parameters used in the API call
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
An McpError with error information
|
|
27
|
+
"""
|
|
28
|
+
if isinstance(e, RequestsHTTPError) and hasattr(e, "response"):
|
|
29
|
+
status_code = e.response.status_code
|
|
30
|
+
error_message = f"HTTP error {status_code} when scraping '{url}': {str(e)}"
|
|
31
|
+
elif isinstance(e, RequestException):
|
|
32
|
+
error_message = f"Connection error when scraping '{url}': {str(e)}"
|
|
33
|
+
else:
|
|
34
|
+
error_message = f"Error when scraping '{url}': {str(e)}"
|
|
35
|
+
|
|
36
|
+
# Include the parameters we tried in the error message if available
|
|
37
|
+
if payload and isinstance(e, RequestsHTTPError):
|
|
38
|
+
param_summary = " ".join(
|
|
39
|
+
[f"{k}={v}" for k, v in payload.items() if k != "api_key"]
|
|
40
|
+
)
|
|
41
|
+
error_message += f" Parameters used: {param_summary}"
|
|
42
|
+
|
|
43
|
+
logging.error(f"handle_scraper_error: {error_message}", exc_info=True)
|
|
44
|
+
return McpError(
|
|
45
|
+
ErrorData(
|
|
46
|
+
code=INTERNAL_ERROR,
|
|
47
|
+
message=error_message,
|
|
48
|
+
)
|
|
49
|
+
)
|