coreclaw-client 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coreclaw/__init__.py +16 -0
- coreclaw/__main__.py +5 -0
- coreclaw/cli.py +26 -0
- coreclaw/client.py +109 -0
- coreclaw/exceptions.py +24 -0
- coreclaw/pagination.py +9 -0
- coreclaw/resources.py +118 -0
- coreclaw_client-1.0.0.dist-info/METADATA +104 -0
- coreclaw_client-1.0.0.dist-info/RECORD +12 -0
- coreclaw_client-1.0.0.dist-info/WHEEL +5 -0
- coreclaw_client-1.0.0.dist-info/entry_points.txt +2 -0
- coreclaw_client-1.0.0.dist-info/top_level.txt +1 -0
coreclaw/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""CoreClaw Python SDK."""
|
|
2
|
+
|
|
3
|
+
from .client import CoreClawAsyncClient, CoreClawClient
|
|
4
|
+
from .exceptions import CoreClawAPIError, CoreClawError, CoreClawHTTPError, CoreClawResponseError
|
|
5
|
+
|
|
6
|
+
__version__ = "1.0.0"
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"CoreClawAPIError",
|
|
10
|
+
"CoreClawAsyncClient",
|
|
11
|
+
"CoreClawClient",
|
|
12
|
+
"CoreClawError",
|
|
13
|
+
"CoreClawHTTPError",
|
|
14
|
+
"CoreClawResponseError",
|
|
15
|
+
"__version__",
|
|
16
|
+
]
|
coreclaw/__main__.py
ADDED
coreclaw/cli.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from collections.abc import Sequence
|
|
3
|
+
|
|
4
|
+
from . import __version__
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
8
|
+
parser = argparse.ArgumentParser(prog="coreclaw")
|
|
9
|
+
parser.add_argument(
|
|
10
|
+
"--version",
|
|
11
|
+
action="store_true",
|
|
12
|
+
help="print the package version and exit",
|
|
13
|
+
)
|
|
14
|
+
return parser
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
18
|
+
parser = build_parser()
|
|
19
|
+
args = parser.parse_args(argv)
|
|
20
|
+
|
|
21
|
+
if args.version:
|
|
22
|
+
print(f"coreclaw {__version__}")
|
|
23
|
+
return 0
|
|
24
|
+
|
|
25
|
+
parser.print_help()
|
|
26
|
+
return 0
|
coreclaw/client.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
from .exceptions import CoreClawAPIError, CoreClawHTTPError, CoreClawResponseError
|
|
6
|
+
from .resources import AsyncRunClient, AsyncScraperClient, RunClient, ScraperClient
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
DEFAULT_BASE_URL = "https://openapi.cafescraper.com"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CoreClawClient:
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
*,
|
|
16
|
+
api_key: str,
|
|
17
|
+
base_url: str | None = None,
|
|
18
|
+
timeout: float = 30.0,
|
|
19
|
+
transport: httpx.BaseTransport | None = None,
|
|
20
|
+
) -> None:
|
|
21
|
+
self._client = httpx.Client(
|
|
22
|
+
base_url=_normalize_base_url(base_url or DEFAULT_BASE_URL),
|
|
23
|
+
headers={"api-key": api_key},
|
|
24
|
+
timeout=timeout,
|
|
25
|
+
transport=transport,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
def scraper(self, scraper_slug: str) -> ScraperClient:
|
|
29
|
+
return ScraperClient(self, scraper_slug)
|
|
30
|
+
|
|
31
|
+
def run(self, run_slug: str) -> RunClient:
|
|
32
|
+
return RunClient(self, run_slug)
|
|
33
|
+
|
|
34
|
+
def post(self, path: str, payload: dict[str, Any]) -> Any:
|
|
35
|
+
response = self._client.post(path, json=payload)
|
|
36
|
+
return _parse_response(response)
|
|
37
|
+
|
|
38
|
+
def close(self) -> None:
|
|
39
|
+
self._client.close()
|
|
40
|
+
|
|
41
|
+
def __enter__(self) -> "CoreClawClient":
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
def __exit__(self, *args: object) -> None:
|
|
45
|
+
self.close()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class CoreClawAsyncClient:
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
*,
|
|
52
|
+
api_key: str,
|
|
53
|
+
base_url: str | None = None,
|
|
54
|
+
timeout: float = 30.0,
|
|
55
|
+
transport: httpx.AsyncBaseTransport | httpx.BaseTransport | None = None,
|
|
56
|
+
) -> None:
|
|
57
|
+
self._client = httpx.AsyncClient(
|
|
58
|
+
base_url=_normalize_base_url(base_url or DEFAULT_BASE_URL),
|
|
59
|
+
headers={"api-key": api_key},
|
|
60
|
+
timeout=timeout,
|
|
61
|
+
transport=transport,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def scraper(self, scraper_slug: str) -> AsyncScraperClient:
|
|
65
|
+
return AsyncScraperClient(self, scraper_slug)
|
|
66
|
+
|
|
67
|
+
def run(self, run_slug: str) -> AsyncRunClient:
|
|
68
|
+
return AsyncRunClient(self, run_slug)
|
|
69
|
+
|
|
70
|
+
async def post(self, path: str, payload: dict[str, Any]) -> Any:
|
|
71
|
+
response = await self._client.post(path, json=payload)
|
|
72
|
+
return _parse_response(response)
|
|
73
|
+
|
|
74
|
+
async def close(self) -> None:
|
|
75
|
+
await self._client.aclose()
|
|
76
|
+
|
|
77
|
+
async def __aenter__(self) -> "CoreClawAsyncClient":
|
|
78
|
+
return self
|
|
79
|
+
|
|
80
|
+
async def __aexit__(self, *args: object) -> None:
|
|
81
|
+
await self.close()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _normalize_base_url(base_url: str) -> str:
|
|
85
|
+
return base_url.rstrip("/") + "/api/v1/"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _parse_response(response: httpx.Response) -> Any:
|
|
89
|
+
if response.status_code < 200 or response.status_code >= 300:
|
|
90
|
+
raise CoreClawHTTPError(response.status_code, response.text)
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
payload = response.json()
|
|
94
|
+
except ValueError as exc:
|
|
95
|
+
raise CoreClawResponseError("response body is not valid JSON") from exc
|
|
96
|
+
|
|
97
|
+
if not isinstance(payload, dict):
|
|
98
|
+
raise CoreClawResponseError("response body must be a JSON object")
|
|
99
|
+
|
|
100
|
+
if "code" not in payload or "message" not in payload:
|
|
101
|
+
raise CoreClawResponseError("response body is missing code or message")
|
|
102
|
+
|
|
103
|
+
code = payload["code"]
|
|
104
|
+
message = str(payload["message"])
|
|
105
|
+
|
|
106
|
+
if code != 0:
|
|
107
|
+
raise CoreClawAPIError(int(code), message, payload.get("data"))
|
|
108
|
+
|
|
109
|
+
return payload.get("data")
|
coreclaw/exceptions.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CoreClawError(Exception):
|
|
5
|
+
"""Base class for all CoreClaw SDK errors."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CoreClawHTTPError(CoreClawError):
|
|
9
|
+
def __init__(self, status_code: int, message: str) -> None:
|
|
10
|
+
self.status_code = status_code
|
|
11
|
+
super().__init__(f"CoreClaw HTTP error {status_code}: {message}")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CoreClawResponseError(CoreClawError):
|
|
15
|
+
def __init__(self, message: str) -> None:
|
|
16
|
+
super().__init__(f"CoreClaw response error: {message}")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CoreClawAPIError(CoreClawError):
|
|
20
|
+
def __init__(self, code: int, message: str, data: Any = None) -> None:
|
|
21
|
+
self.code = code
|
|
22
|
+
self.message = message
|
|
23
|
+
self.data = data
|
|
24
|
+
super().__init__(f"CoreClaw API error {code}: {message}")
|
coreclaw/pagination.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
def limit_offset_to_page(limit: int, offset: int) -> tuple[int, int]:
|
|
2
|
+
if limit < 1:
|
|
3
|
+
raise ValueError("limit must be greater than or equal to 1")
|
|
4
|
+
if offset < 0:
|
|
5
|
+
raise ValueError("offset must be greater than or equal to 0")
|
|
6
|
+
if offset % limit != 0:
|
|
7
|
+
raise ValueError("offset must be divisible by limit")
|
|
8
|
+
|
|
9
|
+
return offset // limit + 1, limit
|
coreclaw/resources.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from .pagination import limit_offset_to_page
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ScraperClient:
|
|
7
|
+
def __init__(self, client: Any, scraper_slug: str) -> None:
|
|
8
|
+
self._client = client
|
|
9
|
+
self._scraper_slug = scraper_slug
|
|
10
|
+
|
|
11
|
+
def run(
|
|
12
|
+
self,
|
|
13
|
+
*,
|
|
14
|
+
input: dict[str, Any],
|
|
15
|
+
version: str | None = None,
|
|
16
|
+
callback_url: str | None = None,
|
|
17
|
+
wait_for_finish: bool = False,
|
|
18
|
+
limit: int = 10,
|
|
19
|
+
offset: int = 0,
|
|
20
|
+
) -> Any:
|
|
21
|
+
page_index, page_size = limit_offset_to_page(limit, offset)
|
|
22
|
+
payload = _build_scraper_run_payload(
|
|
23
|
+
self._scraper_slug,
|
|
24
|
+
input,
|
|
25
|
+
version,
|
|
26
|
+
callback_url,
|
|
27
|
+
wait_for_finish,
|
|
28
|
+
page_index,
|
|
29
|
+
page_size,
|
|
30
|
+
)
|
|
31
|
+
return self._client.post("scraper/run", payload)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class AsyncScraperClient:
|
|
35
|
+
def __init__(self, client: Any, scraper_slug: str) -> None:
|
|
36
|
+
self._client = client
|
|
37
|
+
self._scraper_slug = scraper_slug
|
|
38
|
+
|
|
39
|
+
async def run(
|
|
40
|
+
self,
|
|
41
|
+
*,
|
|
42
|
+
input: dict[str, Any],
|
|
43
|
+
version: str | None = None,
|
|
44
|
+
callback_url: str | None = None,
|
|
45
|
+
wait_for_finish: bool = False,
|
|
46
|
+
limit: int = 10,
|
|
47
|
+
offset: int = 0,
|
|
48
|
+
) -> Any:
|
|
49
|
+
page_index, page_size = limit_offset_to_page(limit, offset)
|
|
50
|
+
payload = _build_scraper_run_payload(
|
|
51
|
+
self._scraper_slug,
|
|
52
|
+
input,
|
|
53
|
+
version,
|
|
54
|
+
callback_url,
|
|
55
|
+
wait_for_finish,
|
|
56
|
+
page_index,
|
|
57
|
+
page_size,
|
|
58
|
+
)
|
|
59
|
+
return await self._client.post("scraper/run", payload)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class RunClient:
|
|
63
|
+
def __init__(self, client: Any, run_slug: str) -> None:
|
|
64
|
+
self._client = client
|
|
65
|
+
self._run_slug = run_slug
|
|
66
|
+
|
|
67
|
+
def list_results(self, *, limit: int = 10, offset: int = 0) -> Any:
|
|
68
|
+
page_index, page_size = limit_offset_to_page(limit, offset)
|
|
69
|
+
return self._client.post(
|
|
70
|
+
"run/result/list",
|
|
71
|
+
{
|
|
72
|
+
"run_slug": self._run_slug,
|
|
73
|
+
"page_index": page_index,
|
|
74
|
+
"page_size": page_size,
|
|
75
|
+
},
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class AsyncRunClient:
|
|
80
|
+
def __init__(self, client: Any, run_slug: str) -> None:
|
|
81
|
+
self._client = client
|
|
82
|
+
self._run_slug = run_slug
|
|
83
|
+
|
|
84
|
+
async def list_results(self, *, limit: int = 10, offset: int = 0) -> Any:
|
|
85
|
+
page_index, page_size = limit_offset_to_page(limit, offset)
|
|
86
|
+
return await self._client.post(
|
|
87
|
+
"run/result/list",
|
|
88
|
+
{
|
|
89
|
+
"run_slug": self._run_slug,
|
|
90
|
+
"page_index": page_index,
|
|
91
|
+
"page_size": page_size,
|
|
92
|
+
},
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _build_scraper_run_payload(
|
|
97
|
+
scraper_slug: str,
|
|
98
|
+
input: dict[str, Any],
|
|
99
|
+
version: str | None,
|
|
100
|
+
callback_url: str | None,
|
|
101
|
+
wait_for_finish: bool,
|
|
102
|
+
page_index: int,
|
|
103
|
+
page_size: int,
|
|
104
|
+
) -> dict[str, Any]:
|
|
105
|
+
payload: dict[str, Any] = {
|
|
106
|
+
"scraper_slug": scraper_slug,
|
|
107
|
+
"input": input,
|
|
108
|
+
"is_async": not wait_for_finish,
|
|
109
|
+
"page_index": page_index,
|
|
110
|
+
"page_size": page_size,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if version is not None:
|
|
114
|
+
payload["version"] = version
|
|
115
|
+
if callback_url is not None:
|
|
116
|
+
payload["callback_url"] = callback_url
|
|
117
|
+
|
|
118
|
+
return payload
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: coreclaw-client
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: CoreClaw Python SDK client.
|
|
5
|
+
Author: CoreClaw maintainers
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: httpx>=0.24
|
|
9
|
+
|
|
10
|
+
# CoreClaw Python SDK
|
|
11
|
+
|
|
12
|
+
CoreClaw Python SDK for running scraper scripts and reading run results.
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install coreclaw-client
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Run a Scraper
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from coreclaw import CoreClawClient
|
|
24
|
+
|
|
25
|
+
client = CoreClawClient(api_key="YOUR_API_KEY")
|
|
26
|
+
|
|
27
|
+
run = client.scraper("SCRAPER_SLUG").run(
|
|
28
|
+
input={
|
|
29
|
+
"parameters": {
|
|
30
|
+
"system": {
|
|
31
|
+
"proxy_region": "US",
|
|
32
|
+
"cpus": 0.125,
|
|
33
|
+
"memory": 512,
|
|
34
|
+
"execute_limit_time_seconds": 1800,
|
|
35
|
+
"max_total_charge": 0,
|
|
36
|
+
"max_total_traffic": 0,
|
|
37
|
+
},
|
|
38
|
+
"custom": {
|
|
39
|
+
"keyword": "python",
|
|
40
|
+
},
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
print(run["run_slug"])
|
|
46
|
+
client.close()
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
To run a specific version:
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
run = client.scraper("SCRAPER_SLUG").run(
|
|
53
|
+
input={...},
|
|
54
|
+
version="v1.0.1",
|
|
55
|
+
)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
If `version` is not provided, CoreClaw uses the latest available version.
|
|
59
|
+
|
|
60
|
+
## Get Run Results
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from coreclaw import CoreClawClient
|
|
64
|
+
|
|
65
|
+
client = CoreClawClient(api_key="YOUR_API_KEY")
|
|
66
|
+
|
|
67
|
+
results = client.run("RUN_SLUG").list_results(limit=10, offset=0)
|
|
68
|
+
|
|
69
|
+
print(results["count"])
|
|
70
|
+
for item in results["list"]:
|
|
71
|
+
print(item)
|
|
72
|
+
|
|
73
|
+
client.close()
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Async Usage
|
|
77
|
+
|
|
78
|
+
Run a scraper:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from coreclaw import CoreClawAsyncClient
|
|
82
|
+
|
|
83
|
+
client = CoreClawAsyncClient(api_key="YOUR_API_KEY")
|
|
84
|
+
run = await client.scraper("SCRAPER_SLUG").run(input={...})
|
|
85
|
+
await client.close()
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Get run results:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from coreclaw import CoreClawAsyncClient
|
|
92
|
+
|
|
93
|
+
client = CoreClawAsyncClient(api_key="YOUR_API_KEY")
|
|
94
|
+
results = await client.run("RUN_SLUG").list_results(limit=10, offset=0)
|
|
95
|
+
await client.close()
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## More
|
|
99
|
+
|
|
100
|
+
- Chinese README: [README.zh-CN.md](README.zh-CN.md)
|
|
101
|
+
- Run scraper demo: [examples/run_scraper.py](examples/run_scraper.py)
|
|
102
|
+
- Get results demo: [examples/get_results.py](examples/get_results.py)
|
|
103
|
+
- Async run scraper demo: [examples/async_run_scraper.py](examples/async_run_scraper.py)
|
|
104
|
+
- Async get results demo: [examples/async_get_results.py](examples/async_get_results.py)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
coreclaw/__init__.py,sha256=XdJf3xchUA5qYMMaktm1Hi9x1uBOl3gpKbU2FoUb-A0,387
|
|
2
|
+
coreclaw/__main__.py,sha256=nBzPa5T3-3BOCBetr5YfKJdwvp8R15bXsL0tFKTTolI,93
|
|
3
|
+
coreclaw/cli.py,sha256=qFQzC338cK9N4SX9ijyeoxcQY3ArRsP4Fu8SJMlelxs,568
|
|
4
|
+
coreclaw/client.py,sha256=GabkL5AlZl9FumkmC8bOKMQc84psGYJCI7VCQlBwIQU,3254
|
|
5
|
+
coreclaw/exceptions.py,sha256=RYZOpdZI1fmvbAZobd1UNhF4dhdRrhaRaLVPAVBBFYY,745
|
|
6
|
+
coreclaw/pagination.py,sha256=boGofnnsdKf_Cx3HggxnduRZn3omfsHpXK9PAIZtq9E,375
|
|
7
|
+
coreclaw/resources.py,sha256=dEZIRkVZnSsvrkT6tfI7_I3te2dl0msoHnZVg-BoJBE,3260
|
|
8
|
+
coreclaw_client-1.0.0.dist-info/METADATA,sha256=nluSQaphR2L5fssx7N0nyNPynAuyuJx8bjzQZDHL5dE,2371
|
|
9
|
+
coreclaw_client-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
10
|
+
coreclaw_client-1.0.0.dist-info/entry_points.txt,sha256=NxJt0wGfwPyMWFpMN-RMF3Tc0zIwbt1CrBJD0_tTyP0,47
|
|
11
|
+
coreclaw_client-1.0.0.dist-info/top_level.txt,sha256=_AUKKysucAwL_T-39bXO0ml5dLxGf7kY37wd0zvc7Lk,9
|
|
12
|
+
coreclaw_client-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
coreclaw
|