coreclaw-client 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
coreclaw/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ """CoreClaw Python SDK."""
2
+
3
+ from .client import CoreClawAsyncClient, CoreClawClient
4
+ from .exceptions import CoreClawAPIError, CoreClawError, CoreClawHTTPError, CoreClawResponseError
5
+
6
+ __version__ = "1.0.0"
7
+
8
+ __all__ = [
9
+ "CoreClawAPIError",
10
+ "CoreClawAsyncClient",
11
+ "CoreClawClient",
12
+ "CoreClawError",
13
+ "CoreClawHTTPError",
14
+ "CoreClawResponseError",
15
+ "__version__",
16
+ ]
coreclaw/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .cli import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ raise SystemExit(main(["--version"]))
coreclaw/cli.py ADDED
@@ -0,0 +1,26 @@
1
+ import argparse
2
+ from collections.abc import Sequence
3
+
4
+ from . import __version__
5
+
6
+
7
+ def build_parser() -> argparse.ArgumentParser:
8
+ parser = argparse.ArgumentParser(prog="coreclaw")
9
+ parser.add_argument(
10
+ "--version",
11
+ action="store_true",
12
+ help="print the package version and exit",
13
+ )
14
+ return parser
15
+
16
+
17
+ def main(argv: Sequence[str] | None = None) -> int:
18
+ parser = build_parser()
19
+ args = parser.parse_args(argv)
20
+
21
+ if args.version:
22
+ print(f"coreclaw {__version__}")
23
+ return 0
24
+
25
+ parser.print_help()
26
+ return 0
coreclaw/client.py ADDED
@@ -0,0 +1,109 @@
1
+ from typing import Any
2
+
3
+ import httpx
4
+
5
+ from .exceptions import CoreClawAPIError, CoreClawHTTPError, CoreClawResponseError
6
+ from .resources import AsyncRunClient, AsyncScraperClient, RunClient, ScraperClient
7
+
8
+
9
+ DEFAULT_BASE_URL = "https://openapi.cafescraper.com"
10
+
11
+
12
+ class CoreClawClient:
13
+ def __init__(
14
+ self,
15
+ *,
16
+ api_key: str,
17
+ base_url: str | None = None,
18
+ timeout: float = 30.0,
19
+ transport: httpx.BaseTransport | None = None,
20
+ ) -> None:
21
+ self._client = httpx.Client(
22
+ base_url=_normalize_base_url(base_url or DEFAULT_BASE_URL),
23
+ headers={"api-key": api_key},
24
+ timeout=timeout,
25
+ transport=transport,
26
+ )
27
+
28
+ def scraper(self, scraper_slug: str) -> ScraperClient:
29
+ return ScraperClient(self, scraper_slug)
30
+
31
+ def run(self, run_slug: str) -> RunClient:
32
+ return RunClient(self, run_slug)
33
+
34
+ def post(self, path: str, payload: dict[str, Any]) -> Any:
35
+ response = self._client.post(path, json=payload)
36
+ return _parse_response(response)
37
+
38
+ def close(self) -> None:
39
+ self._client.close()
40
+
41
+ def __enter__(self) -> "CoreClawClient":
42
+ return self
43
+
44
+ def __exit__(self, *args: object) -> None:
45
+ self.close()
46
+
47
+
48
+ class CoreClawAsyncClient:
49
+ def __init__(
50
+ self,
51
+ *,
52
+ api_key: str,
53
+ base_url: str | None = None,
54
+ timeout: float = 30.0,
55
+ transport: httpx.AsyncBaseTransport | httpx.BaseTransport | None = None,
56
+ ) -> None:
57
+ self._client = httpx.AsyncClient(
58
+ base_url=_normalize_base_url(base_url or DEFAULT_BASE_URL),
59
+ headers={"api-key": api_key},
60
+ timeout=timeout,
61
+ transport=transport,
62
+ )
63
+
64
+ def scraper(self, scraper_slug: str) -> AsyncScraperClient:
65
+ return AsyncScraperClient(self, scraper_slug)
66
+
67
+ def run(self, run_slug: str) -> AsyncRunClient:
68
+ return AsyncRunClient(self, run_slug)
69
+
70
+ async def post(self, path: str, payload: dict[str, Any]) -> Any:
71
+ response = await self._client.post(path, json=payload)
72
+ return _parse_response(response)
73
+
74
+ async def close(self) -> None:
75
+ await self._client.aclose()
76
+
77
+ async def __aenter__(self) -> "CoreClawAsyncClient":
78
+ return self
79
+
80
+ async def __aexit__(self, *args: object) -> None:
81
+ await self.close()
82
+
83
+
84
+ def _normalize_base_url(base_url: str) -> str:
85
+ return base_url.rstrip("/") + "/api/v1/"
86
+
87
+
88
+ def _parse_response(response: httpx.Response) -> Any:
89
+ if response.status_code < 200 or response.status_code >= 300:
90
+ raise CoreClawHTTPError(response.status_code, response.text)
91
+
92
+ try:
93
+ payload = response.json()
94
+ except ValueError as exc:
95
+ raise CoreClawResponseError("response body is not valid JSON") from exc
96
+
97
+ if not isinstance(payload, dict):
98
+ raise CoreClawResponseError("response body must be a JSON object")
99
+
100
+ if "code" not in payload or "message" not in payload:
101
+ raise CoreClawResponseError("response body is missing code or message")
102
+
103
+ code = payload["code"]
104
+ message = str(payload["message"])
105
+
106
+ if code != 0:
107
+ raise CoreClawAPIError(int(code), message, payload.get("data"))
108
+
109
+ return payload.get("data")
coreclaw/exceptions.py ADDED
@@ -0,0 +1,24 @@
1
+ from typing import Any
2
+
3
+
4
+ class CoreClawError(Exception):
5
+ """Base class for all CoreClaw SDK errors."""
6
+
7
+
8
+ class CoreClawHTTPError(CoreClawError):
9
+ def __init__(self, status_code: int, message: str) -> None:
10
+ self.status_code = status_code
11
+ super().__init__(f"CoreClaw HTTP error {status_code}: {message}")
12
+
13
+
14
+ class CoreClawResponseError(CoreClawError):
15
+ def __init__(self, message: str) -> None:
16
+ super().__init__(f"CoreClaw response error: {message}")
17
+
18
+
19
+ class CoreClawAPIError(CoreClawError):
20
+ def __init__(self, code: int, message: str, data: Any = None) -> None:
21
+ self.code = code
22
+ self.message = message
23
+ self.data = data
24
+ super().__init__(f"CoreClaw API error {code}: {message}")
coreclaw/pagination.py ADDED
@@ -0,0 +1,9 @@
1
+ def limit_offset_to_page(limit: int, offset: int) -> tuple[int, int]:
2
+ if limit < 1:
3
+ raise ValueError("limit must be greater than or equal to 1")
4
+ if offset < 0:
5
+ raise ValueError("offset must be greater than or equal to 0")
6
+ if offset % limit != 0:
7
+ raise ValueError("offset must be divisible by limit")
8
+
9
+ return offset // limit + 1, limit
coreclaw/resources.py ADDED
@@ -0,0 +1,118 @@
1
+ from typing import Any
2
+
3
+ from .pagination import limit_offset_to_page
4
+
5
+
6
+ class ScraperClient:
7
+ def __init__(self, client: Any, scraper_slug: str) -> None:
8
+ self._client = client
9
+ self._scraper_slug = scraper_slug
10
+
11
+ def run(
12
+ self,
13
+ *,
14
+ input: dict[str, Any],
15
+ version: str | None = None,
16
+ callback_url: str | None = None,
17
+ wait_for_finish: bool = False,
18
+ limit: int = 10,
19
+ offset: int = 0,
20
+ ) -> Any:
21
+ page_index, page_size = limit_offset_to_page(limit, offset)
22
+ payload = _build_scraper_run_payload(
23
+ self._scraper_slug,
24
+ input,
25
+ version,
26
+ callback_url,
27
+ wait_for_finish,
28
+ page_index,
29
+ page_size,
30
+ )
31
+ return self._client.post("scraper/run", payload)
32
+
33
+
34
+ class AsyncScraperClient:
35
+ def __init__(self, client: Any, scraper_slug: str) -> None:
36
+ self._client = client
37
+ self._scraper_slug = scraper_slug
38
+
39
+ async def run(
40
+ self,
41
+ *,
42
+ input: dict[str, Any],
43
+ version: str | None = None,
44
+ callback_url: str | None = None,
45
+ wait_for_finish: bool = False,
46
+ limit: int = 10,
47
+ offset: int = 0,
48
+ ) -> Any:
49
+ page_index, page_size = limit_offset_to_page(limit, offset)
50
+ payload = _build_scraper_run_payload(
51
+ self._scraper_slug,
52
+ input,
53
+ version,
54
+ callback_url,
55
+ wait_for_finish,
56
+ page_index,
57
+ page_size,
58
+ )
59
+ return await self._client.post("scraper/run", payload)
60
+
61
+
62
+ class RunClient:
63
+ def __init__(self, client: Any, run_slug: str) -> None:
64
+ self._client = client
65
+ self._run_slug = run_slug
66
+
67
+ def list_results(self, *, limit: int = 10, offset: int = 0) -> Any:
68
+ page_index, page_size = limit_offset_to_page(limit, offset)
69
+ return self._client.post(
70
+ "run/result/list",
71
+ {
72
+ "run_slug": self._run_slug,
73
+ "page_index": page_index,
74
+ "page_size": page_size,
75
+ },
76
+ )
77
+
78
+
79
+ class AsyncRunClient:
80
+ def __init__(self, client: Any, run_slug: str) -> None:
81
+ self._client = client
82
+ self._run_slug = run_slug
83
+
84
+ async def list_results(self, *, limit: int = 10, offset: int = 0) -> Any:
85
+ page_index, page_size = limit_offset_to_page(limit, offset)
86
+ return await self._client.post(
87
+ "run/result/list",
88
+ {
89
+ "run_slug": self._run_slug,
90
+ "page_index": page_index,
91
+ "page_size": page_size,
92
+ },
93
+ )
94
+
95
+
96
+ def _build_scraper_run_payload(
97
+ scraper_slug: str,
98
+ input: dict[str, Any],
99
+ version: str | None,
100
+ callback_url: str | None,
101
+ wait_for_finish: bool,
102
+ page_index: int,
103
+ page_size: int,
104
+ ) -> dict[str, Any]:
105
+ payload: dict[str, Any] = {
106
+ "scraper_slug": scraper_slug,
107
+ "input": input,
108
+ "is_async": not wait_for_finish,
109
+ "page_index": page_index,
110
+ "page_size": page_size,
111
+ }
112
+
113
+ if version is not None:
114
+ payload["version"] = version
115
+ if callback_url is not None:
116
+ payload["callback_url"] = callback_url
117
+
118
+ return payload
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.4
2
+ Name: coreclaw-client
3
+ Version: 1.0.0
4
+ Summary: CoreClaw Python SDK client.
5
+ Author: CoreClaw maintainers
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: httpx>=0.24
9
+
10
+ # CoreClaw Python SDK
11
+
12
+ CoreClaw Python SDK for running scraper scripts and reading run results.
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ pip install coreclaw-client
18
+ ```
19
+
20
+ ## Run a Scraper
21
+
22
+ ```python
23
+ from coreclaw import CoreClawClient
24
+
25
+ client = CoreClawClient(api_key="YOUR_API_KEY")
26
+
27
+ run = client.scraper("SCRAPER_SLUG").run(
28
+ input={
29
+ "parameters": {
30
+ "system": {
31
+ "proxy_region": "US",
32
+ "cpus": 0.125,
33
+ "memory": 512,
34
+ "execute_limit_time_seconds": 1800,
35
+ "max_total_charge": 0,
36
+ "max_total_traffic": 0,
37
+ },
38
+ "custom": {
39
+ "keyword": "python",
40
+ },
41
+ }
42
+ },
43
+ )
44
+
45
+ print(run["run_slug"])
46
+ client.close()
47
+ ```
48
+
49
+ To run a specific version:
50
+
51
+ ```python
52
+ run = client.scraper("SCRAPER_SLUG").run(
53
+ input={...},
54
+ version="v1.0.1",
55
+ )
56
+ ```
57
+
58
+ If `version` is not provided, CoreClaw uses the latest available version.
59
+
60
+ ## Get Run Results
61
+
62
+ ```python
63
+ from coreclaw import CoreClawClient
64
+
65
+ client = CoreClawClient(api_key="YOUR_API_KEY")
66
+
67
+ results = client.run("RUN_SLUG").list_results(limit=10, offset=0)
68
+
69
+ print(results["count"])
70
+ for item in results["list"]:
71
+ print(item)
72
+
73
+ client.close()
74
+ ```
75
+
76
+ ## Async Usage
77
+
78
+ Run a scraper:
79
+
80
+ ```python
81
+ from coreclaw import CoreClawAsyncClient
82
+
83
+ client = CoreClawAsyncClient(api_key="YOUR_API_KEY")
84
+ run = await client.scraper("SCRAPER_SLUG").run(input={...})
85
+ await client.close()
86
+ ```
87
+
88
+ Get run results:
89
+
90
+ ```python
91
+ from coreclaw import CoreClawAsyncClient
92
+
93
+ client = CoreClawAsyncClient(api_key="YOUR_API_KEY")
94
+ results = await client.run("RUN_SLUG").list_results(limit=10, offset=0)
95
+ await client.close()
96
+ ```
97
+
98
+ ## More
99
+
100
+ - Chinese README: [README.zh-CN.md](README.zh-CN.md)
101
+ - Run scraper demo: [examples/run_scraper.py](examples/run_scraper.py)
102
+ - Get results demo: [examples/get_results.py](examples/get_results.py)
103
+ - Async run scraper demo: [examples/async_run_scraper.py](examples/async_run_scraper.py)
104
+ - Async get results demo: [examples/async_get_results.py](examples/async_get_results.py)
@@ -0,0 +1,12 @@
1
+ coreclaw/__init__.py,sha256=XdJf3xchUA5qYMMaktm1Hi9x1uBOl3gpKbU2FoUb-A0,387
2
+ coreclaw/__main__.py,sha256=nBzPa5T3-3BOCBetr5YfKJdwvp8R15bXsL0tFKTTolI,93
3
+ coreclaw/cli.py,sha256=qFQzC338cK9N4SX9ijyeoxcQY3ArRsP4Fu8SJMlelxs,568
4
+ coreclaw/client.py,sha256=GabkL5AlZl9FumkmC8bOKMQc84psGYJCI7VCQlBwIQU,3254
5
+ coreclaw/exceptions.py,sha256=RYZOpdZI1fmvbAZobd1UNhF4dhdRrhaRaLVPAVBBFYY,745
6
+ coreclaw/pagination.py,sha256=boGofnnsdKf_Cx3HggxnduRZn3omfsHpXK9PAIZtq9E,375
7
+ coreclaw/resources.py,sha256=dEZIRkVZnSsvrkT6tfI7_I3te2dl0msoHnZVg-BoJBE,3260
8
+ coreclaw_client-1.0.0.dist-info/METADATA,sha256=nluSQaphR2L5fssx7N0nyNPynAuyuJx8bjzQZDHL5dE,2371
9
+ coreclaw_client-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
10
+ coreclaw_client-1.0.0.dist-info/entry_points.txt,sha256=NxJt0wGfwPyMWFpMN-RMF3Tc0zIwbt1CrBJD0_tTyP0,47
11
+ coreclaw_client-1.0.0.dist-info/top_level.txt,sha256=_AUKKysucAwL_T-39bXO0ml5dLxGf7kY37wd0zvc7Lk,9
12
+ coreclaw_client-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ coreclaw = coreclaw.cli:main
@@ -0,0 +1 @@
1
+ coreclaw