metalift-sdk 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metalift_sdk-1.0.0/LICENSE +74 -0
- metalift_sdk-1.0.0/PKG-INFO +164 -0
- metalift_sdk-1.0.0/README.md +136 -0
- metalift_sdk-1.0.0/metalift_sdk/__init__.py +12 -0
- metalift_sdk-1.0.0/metalift_sdk/client.py +108 -0
- metalift_sdk-1.0.0/metalift_sdk/py.typed +0 -0
- metalift_sdk-1.0.0/metalift_sdk.egg-info/PKG-INFO +164 -0
- metalift_sdk-1.0.0/metalift_sdk.egg-info/SOURCES.txt +11 -0
- metalift_sdk-1.0.0/metalift_sdk.egg-info/dependency_links.txt +1 -0
- metalift_sdk-1.0.0/metalift_sdk.egg-info/requires.txt +1 -0
- metalift_sdk-1.0.0/metalift_sdk.egg-info/top_level.txt +1 -0
- metalift_sdk-1.0.0/pyproject.toml +40 -0
- metalift_sdk-1.0.0/setup.cfg +4 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
Proprietary License — Maximum restriction
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Metalift. All rights reserved.
|
|
4
|
+
|
|
5
|
+
NOTICE: This software and all associated materials (source code, object code,
|
|
6
|
+
documentation, configuration, designs, and data) are proprietary and confidential.
|
|
7
|
+
No license or right is granted except as expressly set forth below.
|
|
8
|
+
|
|
9
|
+
1. NO GRANT OF RIGHTS
|
|
10
|
+
|
|
11
|
+
Except as expressly authorized in a separate written agreement signed by
|
|
12
|
+
Metalift, you are granted NO rights to use, copy, modify, merge, publish,
|
|
13
|
+
distribute, sublicense, sell, rent, lease, lend, disclose, or otherwise
|
|
14
|
+
transfer this software or any part thereof.
|
|
15
|
+
|
|
16
|
+
2. RESTRICTED ACTIVITIES
|
|
17
|
+
|
|
18
|
+
Without prior written consent from Metalift, you may NOT:
|
|
19
|
+
|
|
20
|
+
(a) access, use, or run the software in any environment;
|
|
21
|
+
(b) copy, reproduce, or duplicate the software or documentation;
|
|
22
|
+
(c) modify, adapt, translate, or create derivative works;
|
|
23
|
+
(d) reverse engineer, decompile, disassemble, or attempt to derive source
|
|
24
|
+
code, algorithms, or trade secrets;
|
|
25
|
+
(e) remove, alter, or obscure proprietary notices or labels;
|
|
26
|
+
(f) distribute, publish, or make the software available to any third party;
|
|
27
|
+
(g) use the software to develop, train, or improve competing products or
|
|
28
|
+
services;
|
|
29
|
+
(h) benchmark, scrape, or systematically extract outputs for redistribution;
|
|
30
|
+
(i) sublicense, assign, or transfer any rights under this notice.
|
|
31
|
+
|
|
32
|
+
3. CONFIDENTIALITY
|
|
33
|
+
|
|
34
|
+
The software constitutes confidential information of Metalift. You must
|
|
35
|
+
protect it using at least the same degree of care you use for your own
|
|
36
|
+
confidential information, and in no event less than reasonable care.
|
|
37
|
+
|
|
38
|
+
4. THIRD-PARTY COMPONENTS
|
|
39
|
+
|
|
40
|
+
Third-party open-source components, if any, remain subject to their
|
|
41
|
+
respective licenses. This notice does not restrict use of those components
|
|
42
|
+
as permitted by their licenses, but does not grant additional rights to
|
|
43
|
+
Metalift proprietary code.
|
|
44
|
+
|
|
45
|
+
5. TERMINATION
|
|
46
|
+
|
|
47
|
+
Any unauthorized use automatically terminates any permission that may have
|
|
48
|
+
been granted. Upon termination, you must destroy all copies of the software
|
|
49
|
+
and certify destruction if requested.
|
|
50
|
+
|
|
51
|
+
6. DISCLAIMER OF WARRANTY
|
|
52
|
+
|
|
53
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
54
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
55
|
+
FOR A PARTICULAR PURPOSE, TITLE, AND NON-INFRINGEMENT.
|
|
56
|
+
|
|
57
|
+
7. LIMITATION OF LIABILITY
|
|
58
|
+
|
|
59
|
+
IN NO EVENT SHALL METALIFT BE LIABLE FOR ANY INDIRECT, INCIDENTAL, SPECIAL,
|
|
60
|
+
CONSEQUENTIAL, OR PUNITIVE DAMAGES, OR ANY LOSS OF PROFITS, DATA, GOODWILL,
|
|
61
|
+
OR BUSINESS INTERRUPTION, ARISING FROM OR RELATED TO THE SOFTWARE, WHETHER
|
|
62
|
+
IN CONTRACT, TORT, OR OTHERWISE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
|
63
|
+
DAMAGES. METALIFT'S TOTAL LIABILITY SHALL NOT EXCEED THE AMOUNT PAID BY YOU
|
|
64
|
+
TO METALIFT FOR AUTHORIZED ACCESS IN THE TWELVE (12) MONTHS PRECEDING THE
|
|
65
|
+
CLAIM, OR ONE HUNDRED U.S. DOLLARS (USD $100) IF NO SUCH PAYMENT WAS MADE.
|
|
66
|
+
|
|
67
|
+
8. GOVERNING LAW
|
|
68
|
+
|
|
69
|
+
This notice is governed by the laws applicable where Metalift is organized,
|
|
70
|
+
without regard to conflict-of-law principles.
|
|
71
|
+
|
|
72
|
+
9. CONTACT
|
|
73
|
+
|
|
74
|
+
For licensing inquiries: legal@metalift.ai
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: metalift-sdk
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Official Python SDK for the Metalift web context API
|
|
5
|
+
Author-email: Metalift <support@metalift.ai>
|
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
|
7
|
+
Project-URL: Homepage, https://metalift.ai
|
|
8
|
+
Project-URL: Documentation, https://metalift.ai/docs
|
|
9
|
+
Project-URL: Repository, https://github.com/Endacoder/scraper-mcp
|
|
10
|
+
Project-URL: Issues, https://github.com/Endacoder/scraper-mcp/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/Endacoder/scraper-mcp/releases
|
|
12
|
+
Keywords: metalift,scraping,web-scraping,crawl,llm,agents,markdown
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: httpx>=0.27.0
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
# metalift-sdk
|
|
30
|
+
|
|
31
|
+
Official Python SDK for [Metalift](https://app.metalift.ai) — turn any URL into agent-ready markdown, HTML, or structured data.
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install metalift-sdk
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Requires Python 3.11+.
|
|
40
|
+
|
|
41
|
+
## Quick start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from metalift_sdk import Metalift
|
|
45
|
+
|
|
46
|
+
client = Metalift(api_key="YOUR_API_KEY")
|
|
47
|
+
|
|
48
|
+
result = client.scrape(
|
|
49
|
+
"https://example.com",
|
|
50
|
+
formats=["markdown"],
|
|
51
|
+
)
|
|
52
|
+
print(result["data"]["markdown"])
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Get an API key from the [Metalift dashboard](https://app.metalift.ai/dashboard/keys). New accounts receive **1,000 free credits/month**.
|
|
56
|
+
|
|
57
|
+
## Configuration
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from metalift_sdk import Metalift
|
|
61
|
+
|
|
62
|
+
client = Metalift(
|
|
63
|
+
api_url="https://api.metalift.ai", # default
|
|
64
|
+
api_key="YOUR_API_KEY",
|
|
65
|
+
timeout=60.0,
|
|
66
|
+
)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Environment variables are also supported:
|
|
70
|
+
|
|
71
|
+
| Variable | Description |
|
|
72
|
+
|----------|-------------|
|
|
73
|
+
| `METALIFT_API_URL` | API base URL (default: `https://api.metalift.ai`) |
|
|
74
|
+
| `METALIFT_API_KEY` | Bearer token for authenticated requests |
|
|
75
|
+
|
|
76
|
+
## API
|
|
77
|
+
|
|
78
|
+
### Scrape
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
client.scrape(
|
|
82
|
+
"https://example.com",
|
|
83
|
+
formats=["markdown"],
|
|
84
|
+
render="auto",
|
|
85
|
+
only_main_content=True,
|
|
86
|
+
strategy="default",
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Batch scrape
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
client.batch(
|
|
94
|
+
["https://example.com", "https://example.org"],
|
|
95
|
+
async_=True,
|
|
96
|
+
scrape_options={"formats": ["markdown"]},
|
|
97
|
+
)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Crawl and map
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
crawl_job = client.crawl("https://example.com", limit=50)
|
|
104
|
+
map_result = client.map("https://example.com", limit=100)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Async jobs
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
job = client.crawl("https://example.com", limit=20)
|
|
111
|
+
completed = client.wait_for_job(job["job_id"])
|
|
112
|
+
print(completed["status"], completed.get("data"))
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Strategies and protected sites
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
strategies = client.list_strategies()
|
|
119
|
+
protection = client.list_protection_types()
|
|
120
|
+
|
|
121
|
+
client.scrape(
|
|
122
|
+
"https://www.walmart.com/ip/EXAMPLE",
|
|
123
|
+
strategy="retail",
|
|
124
|
+
formats=["markdown"],
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
client.fetch_session(url="https://www.walmart.com/", domain="www.walmart.com")
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Session cookies
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
client.seed_session(
|
|
134
|
+
domain="example.com",
|
|
135
|
+
cookies=[{"name": "session", "value": "...", "domain": "example.com", "path": "/"}],
|
|
136
|
+
)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Errors
|
|
140
|
+
|
|
141
|
+
API failures raise `MetaliftError`:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from metalift_sdk import Metalift, MetaliftError
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
client.scrape("https://example.com")
|
|
148
|
+
except MetaliftError as exc:
|
|
149
|
+
print(exc.status_code, exc.detail)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Links
|
|
153
|
+
|
|
154
|
+
- [Documentation](https://app.metalift.ai/docs)
|
|
155
|
+
- [API reference](https://app.metalift.ai/openapi/metalift.v1.yaml)
|
|
156
|
+
- [Support](mailto:support@metalift.ai)
|
|
157
|
+
|
|
158
|
+
## License & distribution
|
|
159
|
+
|
|
160
|
+
`metalift-sdk` is **proprietary software** licensed under the terms in [LICENSE](./LICENSE). Use, copying, modification, and redistribution are not permitted except as expressly authorized by Metalift.
|
|
161
|
+
|
|
162
|
+
The package is published on the public [Python Package Index (PyPI)](https://pypi.org/project/metalift-sdk/) so customers can install it with `pip install metalift-sdk`. Publishing on PyPI does **not** make the software open source: the wheel and source archive are publicly downloadable, but legal use remains governed by the proprietary license.
|
|
163
|
+
|
|
164
|
+
Do not commit API keys or other credentials into your application code. Pass them at runtime via the `api_key` argument or the `METALIFT_API_KEY` environment variable.
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# metalift-sdk
|
|
2
|
+
|
|
3
|
+
Official Python SDK for [Metalift](https://app.metalift.ai) — turn any URL into agent-ready markdown, HTML, or structured data.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install metalift-sdk
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Requires Python 3.11+.
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from metalift_sdk import Metalift
|
|
17
|
+
|
|
18
|
+
client = Metalift(api_key="YOUR_API_KEY")
|
|
19
|
+
|
|
20
|
+
result = client.scrape(
|
|
21
|
+
"https://example.com",
|
|
22
|
+
formats=["markdown"],
|
|
23
|
+
)
|
|
24
|
+
print(result["data"]["markdown"])
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Get an API key from the [Metalift dashboard](https://app.metalift.ai/dashboard/keys). New accounts receive **1,000 free credits/month**.
|
|
28
|
+
|
|
29
|
+
## Configuration
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from metalift_sdk import Metalift
|
|
33
|
+
|
|
34
|
+
client = Metalift(
|
|
35
|
+
api_url="https://api.metalift.ai", # default
|
|
36
|
+
api_key="YOUR_API_KEY",
|
|
37
|
+
timeout=60.0,
|
|
38
|
+
)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Environment variables are also supported:
|
|
42
|
+
|
|
43
|
+
| Variable | Description |
|
|
44
|
+
|----------|-------------|
|
|
45
|
+
| `METALIFT_API_URL` | API base URL (default: `https://api.metalift.ai`) |
|
|
46
|
+
| `METALIFT_API_KEY` | Bearer token for authenticated requests |
|
|
47
|
+
|
|
48
|
+
## API
|
|
49
|
+
|
|
50
|
+
### Scrape
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
client.scrape(
|
|
54
|
+
"https://example.com",
|
|
55
|
+
formats=["markdown"],
|
|
56
|
+
render="auto",
|
|
57
|
+
only_main_content=True,
|
|
58
|
+
strategy="default",
|
|
59
|
+
)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Batch scrape
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
client.batch(
|
|
66
|
+
["https://example.com", "https://example.org"],
|
|
67
|
+
async_=True,
|
|
68
|
+
scrape_options={"formats": ["markdown"]},
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Crawl and map
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
crawl_job = client.crawl("https://example.com", limit=50)
|
|
76
|
+
map_result = client.map("https://example.com", limit=100)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Async jobs
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
job = client.crawl("https://example.com", limit=20)
|
|
83
|
+
completed = client.wait_for_job(job["job_id"])
|
|
84
|
+
print(completed["status"], completed.get("data"))
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Strategies and protected sites
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
strategies = client.list_strategies()
|
|
91
|
+
protection = client.list_protection_types()
|
|
92
|
+
|
|
93
|
+
client.scrape(
|
|
94
|
+
"https://www.walmart.com/ip/EXAMPLE",
|
|
95
|
+
strategy="retail",
|
|
96
|
+
formats=["markdown"],
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
client.fetch_session(url="https://www.walmart.com/", domain="www.walmart.com")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Session cookies
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
client.seed_session(
|
|
106
|
+
domain="example.com",
|
|
107
|
+
cookies=[{"name": "session", "value": "...", "domain": "example.com", "path": "/"}],
|
|
108
|
+
)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Errors
|
|
112
|
+
|
|
113
|
+
API failures raise `MetaliftError`:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from metalift_sdk import Metalift, MetaliftError
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
client.scrape("https://example.com")
|
|
120
|
+
except MetaliftError as exc:
|
|
121
|
+
print(exc.status_code, exc.detail)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Links
|
|
125
|
+
|
|
126
|
+
- [Documentation](https://app.metalift.ai/docs)
|
|
127
|
+
- [API reference](https://app.metalift.ai/openapi/metalift.v1.yaml)
|
|
128
|
+
- [Support](mailto:support@metalift.ai)
|
|
129
|
+
|
|
130
|
+
## License & distribution
|
|
131
|
+
|
|
132
|
+
`metalift-sdk` is **proprietary software** licensed under the terms in [LICENSE](./LICENSE). Use, copying, modification, and redistribution are not permitted except as expressly authorized by Metalift.
|
|
133
|
+
|
|
134
|
+
The package is published on the public [Python Package Index (PyPI)](https://pypi.org/project/metalift-sdk/) so customers can install it with `pip install metalift-sdk`. Publishing on PyPI does **not** make the software open source: the wheel and source archive are publicly downloadable, but legal use remains governed by the proprietary license.
|
|
135
|
+
|
|
136
|
+
Do not commit API keys or other credentials into your application code. Pass them at runtime via the `api_key` argument or the `METALIFT_API_KEY` environment variable.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Metalift Python SDK."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
4
|
+
|
|
5
|
+
from metalift_sdk.client import DEFAULT_API_URL, Metalift, MetaliftError
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
__version__ = version("metalift-sdk")
|
|
9
|
+
except PackageNotFoundError:
|
|
10
|
+
__version__ = "1.0.0"
|
|
11
|
+
|
|
12
|
+
__all__ = ["DEFAULT_API_URL", "Metalift", "MetaliftError", "__version__"]
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
DEFAULT_API_URL = "https://api.metalift.ai"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MetaliftError(RuntimeError):
|
|
13
|
+
"""Raised when the Metalift API returns an error response."""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
detail: str,
|
|
18
|
+
*,
|
|
19
|
+
status_code: int | None = None,
|
|
20
|
+
body: dict[str, Any] | None = None,
|
|
21
|
+
) -> None:
|
|
22
|
+
super().__init__(detail)
|
|
23
|
+
self.detail = detail
|
|
24
|
+
self.status_code = status_code
|
|
25
|
+
self.body = body or {}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Metalift:
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
api_url: str | None = None,
|
|
32
|
+
api_key: str | None = None,
|
|
33
|
+
timeout: float = 60.0,
|
|
34
|
+
) -> None:
|
|
35
|
+
self.api_url = (api_url or os.getenv("METALIFT_API_URL") or DEFAULT_API_URL).rstrip("/")
|
|
36
|
+
self.api_key = api_key or os.getenv("METALIFT_API_KEY")
|
|
37
|
+
self.timeout = timeout
|
|
38
|
+
|
|
39
|
+
def _headers(self) -> dict[str, str]:
|
|
40
|
+
headers = {"Content-Type": "application/json", "Accept": "application/json"}
|
|
41
|
+
if self.api_key:
|
|
42
|
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
|
43
|
+
return headers
|
|
44
|
+
|
|
45
|
+
def _parse_body(self, response: httpx.Response) -> dict[str, Any]:
|
|
46
|
+
try:
|
|
47
|
+
body = response.json()
|
|
48
|
+
if isinstance(body, dict):
|
|
49
|
+
return body
|
|
50
|
+
except ValueError:
|
|
51
|
+
pass
|
|
52
|
+
return {"detail": response.text or response.reason_phrase}
|
|
53
|
+
|
|
54
|
+
def _request(self, method: str, path: str, json: dict[str, Any] | None = None) -> dict[str, Any]:
|
|
55
|
+
with httpx.Client(timeout=self.timeout) as client:
|
|
56
|
+
response = client.request(method, f"{self.api_url}{path}", headers=self._headers(), json=json)
|
|
57
|
+
body = self._parse_body(response)
|
|
58
|
+
if response.is_error:
|
|
59
|
+
detail = body.get("detail") or body.get("error") or response.text or response.reason_phrase
|
|
60
|
+
raise MetaliftError(str(detail), status_code=response.status_code, body=body)
|
|
61
|
+
return body
|
|
62
|
+
|
|
63
|
+
def scrape(self, url: str, **kwargs: Any) -> dict[str, Any]:
|
|
64
|
+
payload = {"url": url, **kwargs}
|
|
65
|
+
return self._request("POST", "/v1/scrape", payload)
|
|
66
|
+
|
|
67
|
+
def batch(self, urls: list[str], **kwargs: Any) -> dict[str, Any]:
|
|
68
|
+
if "async_" in kwargs:
|
|
69
|
+
kwargs["async"] = kwargs.pop("async_")
|
|
70
|
+
payload = {"urls": urls, **kwargs}
|
|
71
|
+
return self._request("POST", "/v1/batch", payload)
|
|
72
|
+
|
|
73
|
+
def crawl(self, url: str, **kwargs: Any) -> dict[str, Any]:
|
|
74
|
+
payload = {"url": url, **kwargs}
|
|
75
|
+
return self._request("POST", "/v1/crawl", payload)
|
|
76
|
+
|
|
77
|
+
def map(self, url: str, **kwargs: Any) -> dict[str, Any]:
|
|
78
|
+
payload = {"url": url, **kwargs}
|
|
79
|
+
return self._request("POST", "/v1/map", payload)
|
|
80
|
+
|
|
81
|
+
def list_strategies(self) -> dict[str, Any]:
|
|
82
|
+
return self._request("GET", "/v1/strategies")
|
|
83
|
+
|
|
84
|
+
def list_protection_types(self) -> dict[str, Any]:
|
|
85
|
+
return self._request("GET", "/v1/protection-types")
|
|
86
|
+
|
|
87
|
+
def seed_session(self, **kwargs: Any) -> dict[str, Any]:
|
|
88
|
+
return self._request("PUT", "/v1/sessions", kwargs)
|
|
89
|
+
|
|
90
|
+
def fetch_session(self, **kwargs: Any) -> dict[str, Any]:
|
|
91
|
+
return self._request("POST", "/v1/sessions/fetch", kwargs)
|
|
92
|
+
|
|
93
|
+
def get_job(self, job_id: str) -> dict[str, Any]:
|
|
94
|
+
return self._request("GET", f"/v1/jobs/{job_id}")
|
|
95
|
+
|
|
96
|
+
def wait_for_job(
|
|
97
|
+
self,
|
|
98
|
+
job_id: str,
|
|
99
|
+
interval_seconds: float = 2.0,
|
|
100
|
+
timeout_seconds: float = 300.0,
|
|
101
|
+
) -> dict[str, Any]:
|
|
102
|
+
start = time.time()
|
|
103
|
+
while time.time() - start < timeout_seconds:
|
|
104
|
+
job = self.get_job(job_id)
|
|
105
|
+
if job.get("status") in {"completed", "failed"}:
|
|
106
|
+
return job
|
|
107
|
+
time.sleep(interval_seconds)
|
|
108
|
+
raise TimeoutError(f"Job {job_id} timed out after {timeout_seconds}s")
|
|
File without changes
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: metalift-sdk
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Official Python SDK for the Metalift web context API
|
|
5
|
+
Author-email: Metalift <support@metalift.ai>
|
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
|
7
|
+
Project-URL: Homepage, https://metalift.ai
|
|
8
|
+
Project-URL: Documentation, https://metalift.ai/docs
|
|
9
|
+
Project-URL: Repository, https://github.com/Endacoder/scraper-mcp
|
|
10
|
+
Project-URL: Issues, https://github.com/Endacoder/scraper-mcp/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/Endacoder/scraper-mcp/releases
|
|
12
|
+
Keywords: metalift,scraping,web-scraping,crawl,llm,agents,markdown
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: httpx>=0.27.0
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
# metalift-sdk
|
|
30
|
+
|
|
31
|
+
Official Python SDK for [Metalift](https://app.metalift.ai) — turn any URL into agent-ready markdown, HTML, or structured data.
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install metalift-sdk
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Requires Python 3.11+.
|
|
40
|
+
|
|
41
|
+
## Quick start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from metalift_sdk import Metalift
|
|
45
|
+
|
|
46
|
+
client = Metalift(api_key="YOUR_API_KEY")
|
|
47
|
+
|
|
48
|
+
result = client.scrape(
|
|
49
|
+
"https://example.com",
|
|
50
|
+
formats=["markdown"],
|
|
51
|
+
)
|
|
52
|
+
print(result["data"]["markdown"])
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Get an API key from the [Metalift dashboard](https://app.metalift.ai/dashboard/keys). New accounts receive **1,000 free credits/month**.
|
|
56
|
+
|
|
57
|
+
## Configuration
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from metalift_sdk import Metalift
|
|
61
|
+
|
|
62
|
+
client = Metalift(
|
|
63
|
+
api_url="https://api.metalift.ai", # default
|
|
64
|
+
api_key="YOUR_API_KEY",
|
|
65
|
+
timeout=60.0,
|
|
66
|
+
)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Environment variables are also supported:
|
|
70
|
+
|
|
71
|
+
| Variable | Description |
|
|
72
|
+
|----------|-------------|
|
|
73
|
+
| `METALIFT_API_URL` | API base URL (default: `https://api.metalift.ai`) |
|
|
74
|
+
| `METALIFT_API_KEY` | Bearer token for authenticated requests |
|
|
75
|
+
|
|
76
|
+
## API
|
|
77
|
+
|
|
78
|
+
### Scrape
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
client.scrape(
|
|
82
|
+
"https://example.com",
|
|
83
|
+
formats=["markdown"],
|
|
84
|
+
render="auto",
|
|
85
|
+
only_main_content=True,
|
|
86
|
+
strategy="default",
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Batch scrape
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
client.batch(
|
|
94
|
+
["https://example.com", "https://example.org"],
|
|
95
|
+
async_=True,
|
|
96
|
+
scrape_options={"formats": ["markdown"]},
|
|
97
|
+
)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Crawl and map
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
crawl_job = client.crawl("https://example.com", limit=50)
|
|
104
|
+
map_result = client.map("https://example.com", limit=100)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Async jobs
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
job = client.crawl("https://example.com", limit=20)
|
|
111
|
+
completed = client.wait_for_job(job["job_id"])
|
|
112
|
+
print(completed["status"], completed.get("data"))
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Strategies and protected sites
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
strategies = client.list_strategies()
|
|
119
|
+
protection = client.list_protection_types()
|
|
120
|
+
|
|
121
|
+
client.scrape(
|
|
122
|
+
"https://www.walmart.com/ip/EXAMPLE",
|
|
123
|
+
strategy="retail",
|
|
124
|
+
formats=["markdown"],
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
client.fetch_session(url="https://www.walmart.com/", domain="www.walmart.com")
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Session cookies
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
client.seed_session(
|
|
134
|
+
domain="example.com",
|
|
135
|
+
cookies=[{"name": "session", "value": "...", "domain": "example.com", "path": "/"}],
|
|
136
|
+
)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Errors
|
|
140
|
+
|
|
141
|
+
API failures raise `MetaliftError`:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from metalift_sdk import Metalift, MetaliftError
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
client.scrape("https://example.com")
|
|
148
|
+
except MetaliftError as exc:
|
|
149
|
+
print(exc.status_code, exc.detail)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Links
|
|
153
|
+
|
|
154
|
+
- [Documentation](https://app.metalift.ai/docs)
|
|
155
|
+
- [API reference](https://app.metalift.ai/openapi/metalift.v1.yaml)
|
|
156
|
+
- [Support](mailto:support@metalift.ai)
|
|
157
|
+
|
|
158
|
+
## License & distribution
|
|
159
|
+
|
|
160
|
+
`metalift-sdk` is **proprietary software** licensed under the terms in [LICENSE](./LICENSE). Use, copying, modification, and redistribution are not permitted except as expressly authorized by Metalift.
|
|
161
|
+
|
|
162
|
+
The package is published on the public [Python Package Index (PyPI)](https://pypi.org/project/metalift-sdk/) so customers can install it with `pip install metalift-sdk`. Publishing on PyPI does **not** make the software open source: the wheel and source archive are publicly downloadable, but legal use remains governed by the proprietary license.
|
|
163
|
+
|
|
164
|
+
Do not commit API keys or other credentials into your application code. Pass them at runtime via the `api_key` argument or the `METALIFT_API_KEY` environment variable.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
metalift_sdk/__init__.py
|
|
5
|
+
metalift_sdk/client.py
|
|
6
|
+
metalift_sdk/py.typed
|
|
7
|
+
metalift_sdk.egg-info/PKG-INFO
|
|
8
|
+
metalift_sdk.egg-info/SOURCES.txt
|
|
9
|
+
metalift_sdk.egg-info/dependency_links.txt
|
|
10
|
+
metalift_sdk.egg-info/requires.txt
|
|
11
|
+
metalift_sdk.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
httpx>=0.27.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
metalift_sdk
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "metalift-sdk"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Official Python SDK for the Metalift web context API"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = "LicenseRef-Proprietary"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [{ name = "Metalift", email = "support@metalift.ai" }]
|
|
14
|
+
keywords = ["metalift", "scraping", "web-scraping", "crawl", "llm", "agents", "markdown"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 5 - Production/Stable",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
24
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
25
|
+
"Typing :: Typed",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"httpx>=0.27.0",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://metalift.ai"
|
|
33
|
+
Documentation = "https://metalift.ai/docs"
|
|
34
|
+
Repository = "https://github.com/Endacoder/scraper-mcp"
|
|
35
|
+
Issues = "https://github.com/Endacoder/scraper-mcp/issues"
|
|
36
|
+
Changelog = "https://github.com/Endacoder/scraper-mcp/releases"
|
|
37
|
+
|
|
38
|
+
[tool.setuptools.packages.find]
|
|
39
|
+
where = ["."]
|
|
40
|
+
include = ["metalift_sdk*"]
|