coreclaw-client 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.4
2
+ Name: coreclaw-client
3
+ Version: 1.0.0
4
+ Summary: CoreClaw Python SDK client.
5
+ Author: CoreClaw maintainers
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: httpx>=0.24
9
+
10
+ # CoreClaw Python SDK
11
+
12
+ CoreClaw Python SDK for running scraper scripts and reading run results.
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ pip install coreclaw-client
18
+ ```
19
+
20
+ ## Run a Scraper
21
+
22
+ ```python
23
+ from coreclaw import CoreClawClient
24
+
25
+ client = CoreClawClient(api_key="YOUR_API_KEY")
26
+
27
+ run = client.scraper("SCRAPER_SLUG").run(
28
+ input={
29
+ "parameters": {
30
+ "system": {
31
+ "proxy_region": "US",
32
+ "cpus": 0.125,
33
+ "memory": 512,
34
+ "execute_limit_time_seconds": 1800,
35
+ "max_total_charge": 0,
36
+ "max_total_traffic": 0,
37
+ },
38
+ "custom": {
39
+ "keyword": "python",
40
+ },
41
+ }
42
+ },
43
+ )
44
+
45
+ print(run["run_slug"])
46
+ client.close()
47
+ ```
48
+
49
+ To run a specific version:
50
+
51
+ ```python
52
+ run = client.scraper("SCRAPER_SLUG").run(
53
+ input={...},
54
+ version="v1.0.1",
55
+ )
56
+ ```
57
+
58
+ If `version` is not provided, CoreClaw uses the latest available version.
59
+
60
+ ## Get Run Results
61
+
62
+ ```python
63
+ from coreclaw import CoreClawClient
64
+
65
+ client = CoreClawClient(api_key="YOUR_API_KEY")
66
+
67
+ results = client.run("RUN_SLUG").list_results(limit=10, offset=0)
68
+
69
+ print(results["count"])
70
+ for item in results["list"]:
71
+ print(item)
72
+
73
+ client.close()
74
+ ```
75
+
76
+ ## Async Usage
77
+
78
+ Run a scraper:
79
+
80
+ ```python
81
+ from coreclaw import CoreClawAsyncClient
82
+
83
+ client = CoreClawAsyncClient(api_key="YOUR_API_KEY")
84
+ run = await client.scraper("SCRAPER_SLUG").run(input={...})
85
+ await client.close()
86
+ ```
87
+
88
+ Get run results:
89
+
90
+ ```python
91
+ from coreclaw import CoreClawAsyncClient
92
+
93
+ client = CoreClawAsyncClient(api_key="YOUR_API_KEY")
94
+ results = await client.run("RUN_SLUG").list_results(limit=10, offset=0)
95
+ await client.close()
96
+ ```
97
+
98
+ ## More
99
+
100
+ - Chinese README: [README.zh-CN.md](README.zh-CN.md)
101
+ - Run scraper demo: [examples/run_scraper.py](examples/run_scraper.py)
102
+ - Get results demo: [examples/get_results.py](examples/get_results.py)
103
+ - Async run scraper demo: [examples/async_run_scraper.py](examples/async_run_scraper.py)
104
+ - Async get results demo: [examples/async_get_results.py](examples/async_get_results.py)
@@ -0,0 +1,95 @@
1
+ # CoreClaw Python SDK
2
+
3
+ CoreClaw Python SDK for running scraper scripts and reading run results.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install coreclaw-client
9
+ ```
10
+
11
+ ## Run a Scraper
12
+
13
+ ```python
14
+ from coreclaw import CoreClawClient
15
+
16
+ client = CoreClawClient(api_key="YOUR_API_KEY")
17
+
18
+ run = client.scraper("SCRAPER_SLUG").run(
19
+ input={
20
+ "parameters": {
21
+ "system": {
22
+ "proxy_region": "US",
23
+ "cpus": 0.125,
24
+ "memory": 512,
25
+ "execute_limit_time_seconds": 1800,
26
+ "max_total_charge": 0,
27
+ "max_total_traffic": 0,
28
+ },
29
+ "custom": {
30
+ "keyword": "python",
31
+ },
32
+ }
33
+ },
34
+ )
35
+
36
+ print(run["run_slug"])
37
+ client.close()
38
+ ```
39
+
40
+ To run a specific version:
41
+
42
+ ```python
43
+ run = client.scraper("SCRAPER_SLUG").run(
44
+ input={...},
45
+ version="v1.0.1",
46
+ )
47
+ ```
48
+
49
+ If `version` is not provided, CoreClaw uses the latest available version.
50
+
51
+ ## Get Run Results
52
+
53
+ ```python
54
+ from coreclaw import CoreClawClient
55
+
56
+ client = CoreClawClient(api_key="YOUR_API_KEY")
57
+
58
+ results = client.run("RUN_SLUG").list_results(limit=10, offset=0)
59
+
60
+ print(results["count"])
61
+ for item in results["list"]:
62
+ print(item)
63
+
64
+ client.close()
65
+ ```
66
+
67
+ ## Async Usage
68
+
69
+ Run a scraper:
70
+
71
+ ```python
72
+ from coreclaw import CoreClawAsyncClient
73
+
74
+ client = CoreClawAsyncClient(api_key="YOUR_API_KEY")
75
+ run = await client.scraper("SCRAPER_SLUG").run(input={...})
76
+ await client.close()
77
+ ```
78
+
79
+ Get run results:
80
+
81
+ ```python
82
+ from coreclaw import CoreClawAsyncClient
83
+
84
+ client = CoreClawAsyncClient(api_key="YOUR_API_KEY")
85
+ results = await client.run("RUN_SLUG").list_results(limit=10, offset=0)
86
+ await client.close()
87
+ ```
88
+
89
+ ## More
90
+
91
+ - Chinese README: [README.zh-CN.md](README.zh-CN.md)
92
+ - Run scraper demo: [examples/run_scraper.py](examples/run_scraper.py)
93
+ - Get results demo: [examples/get_results.py](examples/get_results.py)
94
+ - Async run scraper demo: [examples/async_run_scraper.py](examples/async_run_scraper.py)
95
+ - Async get results demo: [examples/async_get_results.py](examples/async_get_results.py)
@@ -0,0 +1,22 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "coreclaw-client"
7
+ version = "1.0.0"
8
+ description = "CoreClaw Python SDK client."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ authors = [
12
+ { name = "CoreClaw maintainers" }
13
+ ]
14
+ dependencies = [
15
+ "httpx>=0.24",
16
+ ]
17
+
18
+ [project.scripts]
19
+ coreclaw = "coreclaw.cli:main"
20
+
21
+ [tool.setuptools.packages.find]
22
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,16 @@
1
+ """CoreClaw Python SDK."""
2
+
3
+ from .client import CoreClawAsyncClient, CoreClawClient
4
+ from .exceptions import CoreClawAPIError, CoreClawError, CoreClawHTTPError, CoreClawResponseError
5
+
6
+ __version__ = "1.0.0"
7
+
8
+ __all__ = [
9
+ "CoreClawAPIError",
10
+ "CoreClawAsyncClient",
11
+ "CoreClawClient",
12
+ "CoreClawError",
13
+ "CoreClawHTTPError",
14
+ "CoreClawResponseError",
15
+ "__version__",
16
+ ]
@@ -0,0 +1,5 @@
1
+ from .cli import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ raise SystemExit(main(["--version"]))
@@ -0,0 +1,26 @@
1
+ import argparse
2
+ from collections.abc import Sequence
3
+
4
+ from . import __version__
5
+
6
+
7
+ def build_parser() -> argparse.ArgumentParser:
8
+ parser = argparse.ArgumentParser(prog="coreclaw")
9
+ parser.add_argument(
10
+ "--version",
11
+ action="store_true",
12
+ help="print the package version and exit",
13
+ )
14
+ return parser
15
+
16
+
17
+ def main(argv: Sequence[str] | None = None) -> int:
18
+ parser = build_parser()
19
+ args = parser.parse_args(argv)
20
+
21
+ if args.version:
22
+ print(f"coreclaw {__version__}")
23
+ return 0
24
+
25
+ parser.print_help()
26
+ return 0
@@ -0,0 +1,109 @@
1
+ from typing import Any
2
+
3
+ import httpx
4
+
5
+ from .exceptions import CoreClawAPIError, CoreClawHTTPError, CoreClawResponseError
6
+ from .resources import AsyncRunClient, AsyncScraperClient, RunClient, ScraperClient
7
+
8
+
9
+ DEFAULT_BASE_URL = "https://openapi.cafescraper.com"
10
+
11
+
12
+ class CoreClawClient:
13
+ def __init__(
14
+ self,
15
+ *,
16
+ api_key: str,
17
+ base_url: str | None = None,
18
+ timeout: float = 30.0,
19
+ transport: httpx.BaseTransport | None = None,
20
+ ) -> None:
21
+ self._client = httpx.Client(
22
+ base_url=_normalize_base_url(base_url or DEFAULT_BASE_URL),
23
+ headers={"api-key": api_key},
24
+ timeout=timeout,
25
+ transport=transport,
26
+ )
27
+
28
+ def scraper(self, scraper_slug: str) -> ScraperClient:
29
+ return ScraperClient(self, scraper_slug)
30
+
31
+ def run(self, run_slug: str) -> RunClient:
32
+ return RunClient(self, run_slug)
33
+
34
+ def post(self, path: str, payload: dict[str, Any]) -> Any:
35
+ response = self._client.post(path, json=payload)
36
+ return _parse_response(response)
37
+
38
+ def close(self) -> None:
39
+ self._client.close()
40
+
41
+ def __enter__(self) -> "CoreClawClient":
42
+ return self
43
+
44
+ def __exit__(self, *args: object) -> None:
45
+ self.close()
46
+
47
+
48
+ class CoreClawAsyncClient:
49
+ def __init__(
50
+ self,
51
+ *,
52
+ api_key: str,
53
+ base_url: str | None = None,
54
+ timeout: float = 30.0,
55
+ transport: httpx.AsyncBaseTransport | httpx.BaseTransport | None = None,
56
+ ) -> None:
57
+ self._client = httpx.AsyncClient(
58
+ base_url=_normalize_base_url(base_url or DEFAULT_BASE_URL),
59
+ headers={"api-key": api_key},
60
+ timeout=timeout,
61
+ transport=transport,
62
+ )
63
+
64
+ def scraper(self, scraper_slug: str) -> AsyncScraperClient:
65
+ return AsyncScraperClient(self, scraper_slug)
66
+
67
+ def run(self, run_slug: str) -> AsyncRunClient:
68
+ return AsyncRunClient(self, run_slug)
69
+
70
+ async def post(self, path: str, payload: dict[str, Any]) -> Any:
71
+ response = await self._client.post(path, json=payload)
72
+ return _parse_response(response)
73
+
74
+ async def close(self) -> None:
75
+ await self._client.aclose()
76
+
77
+ async def __aenter__(self) -> "CoreClawAsyncClient":
78
+ return self
79
+
80
+ async def __aexit__(self, *args: object) -> None:
81
+ await self.close()
82
+
83
+
84
+ def _normalize_base_url(base_url: str) -> str:
85
+ return base_url.rstrip("/") + "/api/v1/"
86
+
87
+
88
+ def _parse_response(response: httpx.Response) -> Any:
89
+ if response.status_code < 200 or response.status_code >= 300:
90
+ raise CoreClawHTTPError(response.status_code, response.text)
91
+
92
+ try:
93
+ payload = response.json()
94
+ except ValueError as exc:
95
+ raise CoreClawResponseError("response body is not valid JSON") from exc
96
+
97
+ if not isinstance(payload, dict):
98
+ raise CoreClawResponseError("response body must be a JSON object")
99
+
100
+ if "code" not in payload or "message" not in payload:
101
+ raise CoreClawResponseError("response body is missing code or message")
102
+
103
+ code = payload["code"]
104
+ message = str(payload["message"])
105
+
106
+ if code != 0:
107
+ raise CoreClawAPIError(int(code), message, payload.get("data"))
108
+
109
+ return payload.get("data")
@@ -0,0 +1,24 @@
1
+ from typing import Any
2
+
3
+
4
+ class CoreClawError(Exception):
5
+ """Base class for all CoreClaw SDK errors."""
6
+
7
+
8
+ class CoreClawHTTPError(CoreClawError):
9
+ def __init__(self, status_code: int, message: str) -> None:
10
+ self.status_code = status_code
11
+ super().__init__(f"CoreClaw HTTP error {status_code}: {message}")
12
+
13
+
14
+ class CoreClawResponseError(CoreClawError):
15
+ def __init__(self, message: str) -> None:
16
+ super().__init__(f"CoreClaw response error: {message}")
17
+
18
+
19
+ class CoreClawAPIError(CoreClawError):
20
+ def __init__(self, code: int, message: str, data: Any = None) -> None:
21
+ self.code = code
22
+ self.message = message
23
+ self.data = data
24
+ super().__init__(f"CoreClaw API error {code}: {message}")
@@ -0,0 +1,9 @@
1
+ def limit_offset_to_page(limit: int, offset: int) -> tuple[int, int]:
2
+ if limit < 1:
3
+ raise ValueError("limit must be greater than or equal to 1")
4
+ if offset < 0:
5
+ raise ValueError("offset must be greater than or equal to 0")
6
+ if offset % limit != 0:
7
+ raise ValueError("offset must be divisible by limit")
8
+
9
+ return offset // limit + 1, limit
@@ -0,0 +1,118 @@
1
+ from typing import Any
2
+
3
+ from .pagination import limit_offset_to_page
4
+
5
+
6
+ class ScraperClient:
7
+ def __init__(self, client: Any, scraper_slug: str) -> None:
8
+ self._client = client
9
+ self._scraper_slug = scraper_slug
10
+
11
+ def run(
12
+ self,
13
+ *,
14
+ input: dict[str, Any],
15
+ version: str | None = None,
16
+ callback_url: str | None = None,
17
+ wait_for_finish: bool = False,
18
+ limit: int = 10,
19
+ offset: int = 0,
20
+ ) -> Any:
21
+ page_index, page_size = limit_offset_to_page(limit, offset)
22
+ payload = _build_scraper_run_payload(
23
+ self._scraper_slug,
24
+ input,
25
+ version,
26
+ callback_url,
27
+ wait_for_finish,
28
+ page_index,
29
+ page_size,
30
+ )
31
+ return self._client.post("scraper/run", payload)
32
+
33
+
34
+ class AsyncScraperClient:
35
+ def __init__(self, client: Any, scraper_slug: str) -> None:
36
+ self._client = client
37
+ self._scraper_slug = scraper_slug
38
+
39
+ async def run(
40
+ self,
41
+ *,
42
+ input: dict[str, Any],
43
+ version: str | None = None,
44
+ callback_url: str | None = None,
45
+ wait_for_finish: bool = False,
46
+ limit: int = 10,
47
+ offset: int = 0,
48
+ ) -> Any:
49
+ page_index, page_size = limit_offset_to_page(limit, offset)
50
+ payload = _build_scraper_run_payload(
51
+ self._scraper_slug,
52
+ input,
53
+ version,
54
+ callback_url,
55
+ wait_for_finish,
56
+ page_index,
57
+ page_size,
58
+ )
59
+ return await self._client.post("scraper/run", payload)
60
+
61
+
62
+ class RunClient:
63
+ def __init__(self, client: Any, run_slug: str) -> None:
64
+ self._client = client
65
+ self._run_slug = run_slug
66
+
67
+ def list_results(self, *, limit: int = 10, offset: int = 0) -> Any:
68
+ page_index, page_size = limit_offset_to_page(limit, offset)
69
+ return self._client.post(
70
+ "run/result/list",
71
+ {
72
+ "run_slug": self._run_slug,
73
+ "page_index": page_index,
74
+ "page_size": page_size,
75
+ },
76
+ )
77
+
78
+
79
+ class AsyncRunClient:
80
+ def __init__(self, client: Any, run_slug: str) -> None:
81
+ self._client = client
82
+ self._run_slug = run_slug
83
+
84
+ async def list_results(self, *, limit: int = 10, offset: int = 0) -> Any:
85
+ page_index, page_size = limit_offset_to_page(limit, offset)
86
+ return await self._client.post(
87
+ "run/result/list",
88
+ {
89
+ "run_slug": self._run_slug,
90
+ "page_index": page_index,
91
+ "page_size": page_size,
92
+ },
93
+ )
94
+
95
+
96
+ def _build_scraper_run_payload(
97
+ scraper_slug: str,
98
+ input: dict[str, Any],
99
+ version: str | None,
100
+ callback_url: str | None,
101
+ wait_for_finish: bool,
102
+ page_index: int,
103
+ page_size: int,
104
+ ) -> dict[str, Any]:
105
+ payload: dict[str, Any] = {
106
+ "scraper_slug": scraper_slug,
107
+ "input": input,
108
+ "is_async": not wait_for_finish,
109
+ "page_index": page_index,
110
+ "page_size": page_size,
111
+ }
112
+
113
+ if version is not None:
114
+ payload["version"] = version
115
+ if callback_url is not None:
116
+ payload["callback_url"] = callback_url
117
+
118
+ return payload
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.4
2
+ Name: coreclaw-client
3
+ Version: 1.0.0
4
+ Summary: CoreClaw Python SDK client.
5
+ Author: CoreClaw maintainers
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: httpx>=0.24
9
+
10
+ # CoreClaw Python SDK
11
+
12
+ CoreClaw Python SDK for running scraper scripts and reading run results.
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ pip install coreclaw-client
18
+ ```
19
+
20
+ ## Run a Scraper
21
+
22
+ ```python
23
+ from coreclaw import CoreClawClient
24
+
25
+ client = CoreClawClient(api_key="YOUR_API_KEY")
26
+
27
+ run = client.scraper("SCRAPER_SLUG").run(
28
+ input={
29
+ "parameters": {
30
+ "system": {
31
+ "proxy_region": "US",
32
+ "cpus": 0.125,
33
+ "memory": 512,
34
+ "execute_limit_time_seconds": 1800,
35
+ "max_total_charge": 0,
36
+ "max_total_traffic": 0,
37
+ },
38
+ "custom": {
39
+ "keyword": "python",
40
+ },
41
+ }
42
+ },
43
+ )
44
+
45
+ print(run["run_slug"])
46
+ client.close()
47
+ ```
48
+
49
+ To run a specific version:
50
+
51
+ ```python
52
+ run = client.scraper("SCRAPER_SLUG").run(
53
+ input={...},
54
+ version="v1.0.1",
55
+ )
56
+ ```
57
+
58
+ If `version` is not provided, CoreClaw uses the latest available version.
59
+
60
+ ## Get Run Results
61
+
62
+ ```python
63
+ from coreclaw import CoreClawClient
64
+
65
+ client = CoreClawClient(api_key="YOUR_API_KEY")
66
+
67
+ results = client.run("RUN_SLUG").list_results(limit=10, offset=0)
68
+
69
+ print(results["count"])
70
+ for item in results["list"]:
71
+ print(item)
72
+
73
+ client.close()
74
+ ```
75
+
76
+ ## Async Usage
77
+
78
+ Run a scraper:
79
+
80
+ ```python
81
+ from coreclaw import CoreClawAsyncClient
82
+
83
+ client = CoreClawAsyncClient(api_key="YOUR_API_KEY")
84
+ run = await client.scraper("SCRAPER_SLUG").run(input={...})
85
+ await client.close()
86
+ ```
87
+
88
+ Get run results:
89
+
90
+ ```python
91
+ from coreclaw import CoreClawAsyncClient
92
+
93
+ client = CoreClawAsyncClient(api_key="YOUR_API_KEY")
94
+ results = await client.run("RUN_SLUG").list_results(limit=10, offset=0)
95
+ await client.close()
96
+ ```
97
+
98
+ ## More
99
+
100
+ - Chinese README: [README.zh-CN.md](README.zh-CN.md)
101
+ - Run scraper demo: [examples/run_scraper.py](examples/run_scraper.py)
102
+ - Get results demo: [examples/get_results.py](examples/get_results.py)
103
+ - Async run scraper demo: [examples/async_run_scraper.py](examples/async_run_scraper.py)
104
+ - Async get results demo: [examples/async_get_results.py](examples/async_get_results.py)
@@ -0,0 +1,18 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/coreclaw/__init__.py
4
+ src/coreclaw/__main__.py
5
+ src/coreclaw/cli.py
6
+ src/coreclaw/client.py
7
+ src/coreclaw/exceptions.py
8
+ src/coreclaw/pagination.py
9
+ src/coreclaw/resources.py
10
+ src/coreclaw_client.egg-info/PKG-INFO
11
+ src/coreclaw_client.egg-info/SOURCES.txt
12
+ src/coreclaw_client.egg-info/dependency_links.txt
13
+ src/coreclaw_client.egg-info/entry_points.txt
14
+ src/coreclaw_client.egg-info/requires.txt
15
+ src/coreclaw_client.egg-info/top_level.txt
16
+ tests/test_client.py
17
+ tests/test_packaging.py
18
+ tests/test_version.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ coreclaw = coreclaw.cli:main
@@ -0,0 +1,268 @@
1
+ import asyncio
2
+ import json
3
+ import sys
4
+ import unittest
5
+ from pathlib import Path
6
+ from urllib.parse import urlparse
7
+
8
+ import httpx
9
+
10
+
11
+ ROOT = Path(__file__).resolve().parents[1]
12
+ SRC = ROOT / "src"
13
+
14
+ if str(SRC) not in sys.path:
15
+ sys.path.insert(0, str(SRC))
16
+
17
+
18
+ class RecordingTransport(httpx.MockTransport):
19
+ def __init__(self, payload):
20
+ self.requests = []
21
+ self.payload = payload
22
+ super().__init__(self._handler)
23
+
24
+ def _handler(self, request):
25
+ self.requests.append(request)
26
+ return httpx.Response(200, json=self.payload)
27
+
28
+
29
+ class CoreClawClientTests(unittest.TestCase):
30
+ def test_sync_client_uses_official_base_url_by_default(self):
31
+ from coreclaw import CoreClawClient
32
+
33
+ transport = RecordingTransport(
34
+ {"code": 0, "message": "success", "data": {"run_slug": "RUN123"}}
35
+ )
36
+ client = CoreClawClient(api_key="key-123", transport=transport)
37
+
38
+ client.scraper("SCRAPER123").run(input={"parameters": {}})
39
+
40
+ request_url = transport.requests[0].url
41
+
42
+ self.assertEqual(request_url.scheme, "https")
43
+ self.assertEqual(request_url.host, "openapi.cafescraper.com")
44
+ self.assertEqual(request_url.path, "/api/v1/scraper/run")
45
+
46
+ client.close()
47
+
48
+ def test_sync_scraper_run_maps_request_fields(self):
49
+ from coreclaw import CoreClawClient
50
+
51
+ transport = RecordingTransport(
52
+ {"code": 0, "message": "success", "data": {"run_slug": "RUN123"}}
53
+ )
54
+ client = CoreClawClient(
55
+ api_key="key-123",
56
+ base_url="https://api.coreclaw.example",
57
+ transport=transport,
58
+ )
59
+
60
+ result = client.scraper("SCRAPER123").run(
61
+ input={"parameters": {"custom": {"keyword": "python"}}},
62
+ version="v1.2.3",
63
+ callback_url="https://example.com/callback",
64
+ wait_for_finish=True,
65
+ limit=25,
66
+ offset=50,
67
+ )
68
+
69
+ request = transport.requests[0]
70
+ body = json.loads(request.content)
71
+
72
+ self.assertEqual(result, {"run_slug": "RUN123"})
73
+ self.assertEqual(request.method, "POST")
74
+ self.assertEqual(urlparse(str(request.url)).path, "/api/v1/scraper/run")
75
+ self.assertEqual(request.headers["api-key"], "key-123")
76
+ self.assertEqual(body["scraper_slug"], "SCRAPER123")
77
+ self.assertEqual(body["input"], {"parameters": {"custom": {"keyword": "python"}}})
78
+ self.assertEqual(body["version"], "v1.2.3")
79
+ self.assertEqual(body["callback_url"], "https://example.com/callback")
80
+ self.assertFalse(body["is_async"])
81
+ self.assertEqual(body["page_index"], 3)
82
+ self.assertEqual(body["page_size"], 25)
83
+
84
+ client.close()
85
+
86
+ def test_sync_scraper_run_omits_version_when_not_provided(self):
87
+ from coreclaw import CoreClawClient
88
+
89
+ transport = RecordingTransport(
90
+ {"code": 0, "message": "success", "data": {"run_slug": "RUN123"}}
91
+ )
92
+ client = CoreClawClient(
93
+ api_key="key-123",
94
+ base_url="https://api.coreclaw.example",
95
+ transport=transport,
96
+ )
97
+
98
+ client.scraper("SCRAPER123").run(input={"parameters": {}})
99
+
100
+ body = json.loads(transport.requests[0].content)
101
+
102
+ self.assertNotIn("version", body)
103
+ self.assertTrue(body["is_async"])
104
+
105
+ client.close()
106
+
107
+ def test_sync_run_lists_results_with_limit_offset(self):
108
+ from coreclaw import CoreClawClient
109
+
110
+ payload = {
111
+ "code": 0,
112
+ "message": "success",
113
+ "data": {
114
+ "count": 2,
115
+ "headers": [{"label": "Title", "key": "title", "format": "text"}],
116
+ "list": [{"title": "A"}, {"title": "B"}],
117
+ "page_index": 2,
118
+ "page_size": 10,
119
+ },
120
+ }
121
+ transport = RecordingTransport(payload)
122
+ client = CoreClawClient(
123
+ api_key="key-123",
124
+ base_url="https://api.coreclaw.example/",
125
+ transport=transport,
126
+ )
127
+
128
+ result = client.run("RUN123").list_results(limit=10, offset=10)
129
+
130
+ request = transport.requests[0]
131
+ body = json.loads(request.content)
132
+
133
+ self.assertEqual(result, payload["data"])
134
+ self.assertEqual(urlparse(str(request.url)).path, "/api/v1/run/result/list")
135
+ self.assertEqual(body, {"run_slug": "RUN123", "page_index": 2, "page_size": 10})
136
+
137
+ client.close()
138
+
139
+ def test_limit_offset_requires_aligned_offset(self):
140
+ from coreclaw import CoreClawClient
141
+
142
+ client = CoreClawClient(
143
+ api_key="key-123",
144
+ base_url="https://api.coreclaw.example",
145
+ transport=RecordingTransport({"code": 0, "message": "success", "data": {}}),
146
+ )
147
+
148
+ with self.assertRaisesRegex(ValueError, "offset must be divisible by limit"):
149
+ client.run("RUN123").list_results(limit=10, offset=5)
150
+
151
+ client.close()
152
+
153
+ def test_api_error_is_raised_for_nonzero_code(self):
154
+ from coreclaw import CoreClawAPIError, CoreClawClient
155
+
156
+ transport = RecordingTransport(
157
+ {"code": 50003, "message": "The script version is not available", "data": None}
158
+ )
159
+ client = CoreClawClient(
160
+ api_key="key-123",
161
+ base_url="https://api.coreclaw.example",
162
+ transport=transport,
163
+ )
164
+
165
+ with self.assertRaises(CoreClawAPIError) as raised:
166
+ client.scraper("SCRAPER123").run(
167
+ input={"parameters": {}},
168
+ version="v9.9.9",
169
+ )
170
+
171
+ self.assertEqual(raised.exception.code, 50003)
172
+ self.assertEqual(str(raised.exception), "CoreClaw API error 50003: The script version is not available")
173
+
174
+ client.close()
175
+
176
+
177
+ class CoreClawAsyncClientTests(unittest.TestCase):
178
+ def test_async_client_uses_official_base_url_by_default(self):
179
+ from coreclaw import CoreClawAsyncClient
180
+
181
+ async def scenario():
182
+ transport = RecordingTransport(
183
+ {"code": 0, "message": "success", "data": {"run_slug": "RUN123"}}
184
+ )
185
+ client = CoreClawAsyncClient(api_key="key-123", transport=transport)
186
+
187
+ await client.scraper("SCRAPER123").run(input={"parameters": {}})
188
+
189
+ request_url = transport.requests[0].url
190
+
191
+ self.assertEqual(request_url.scheme, "https")
192
+ self.assertEqual(request_url.host, "openapi.cafescraper.com")
193
+ self.assertEqual(request_url.path, "/api/v1/scraper/run")
194
+
195
+ await client.close()
196
+
197
+ asyncio.run(scenario())
198
+
199
+ def test_async_scraper_run_maps_request_fields(self):
200
+ from coreclaw import CoreClawAsyncClient
201
+
202
+ async def scenario():
203
+ transport = RecordingTransport(
204
+ {"code": 0, "message": "success", "data": {"run_slug": "RUN123"}}
205
+ )
206
+ client = CoreClawAsyncClient(
207
+ api_key="key-123",
208
+ base_url="https://api.coreclaw.example",
209
+ transport=transport,
210
+ )
211
+
212
+ result = await client.scraper("SCRAPER123").run(
213
+ input={"parameters": {"custom": {"keyword": "python"}}},
214
+ limit=20,
215
+ offset=40,
216
+ )
217
+
218
+ body = json.loads(transport.requests[0].content)
219
+
220
+ self.assertEqual(result, {"run_slug": "RUN123"})
221
+ self.assertEqual(urlparse(str(transport.requests[0].url)).path, "/api/v1/scraper/run")
222
+ self.assertEqual(transport.requests[0].headers["api-key"], "key-123")
223
+ self.assertEqual(body["scraper_slug"], "SCRAPER123")
224
+ self.assertTrue(body["is_async"])
225
+ self.assertEqual(body["page_index"], 3)
226
+ self.assertEqual(body["page_size"], 20)
227
+
228
+ await client.close()
229
+
230
+ asyncio.run(scenario())
231
+
232
+ def test_async_run_lists_results(self):
233
+ from coreclaw import CoreClawAsyncClient
234
+
235
+ async def scenario():
236
+ payload = {
237
+ "code": 0,
238
+ "message": "success",
239
+ "data": {
240
+ "count": 1,
241
+ "headers": [],
242
+ "list": [{"title": "A"}],
243
+ "page_index": 1,
244
+ "page_size": 50,
245
+ },
246
+ }
247
+ transport = RecordingTransport(payload)
248
+ client = CoreClawAsyncClient(
249
+ api_key="key-123",
250
+ base_url="https://api.coreclaw.example",
251
+ transport=transport,
252
+ )
253
+
254
+ result = await client.run("RUN123").list_results(limit=50, offset=0)
255
+
256
+ body = json.loads(transport.requests[0].content)
257
+
258
+ self.assertEqual(result, payload["data"])
259
+ self.assertEqual(urlparse(str(transport.requests[0].url)).path, "/api/v1/run/result/list")
260
+ self.assertEqual(body, {"run_slug": "RUN123", "page_index": 1, "page_size": 50})
261
+
262
+ await client.close()
263
+
264
+ asyncio.run(scenario())
265
+
266
+
267
+ if __name__ == "__main__":
268
+ unittest.main()
@@ -0,0 +1,27 @@
1
+ import sys
2
+ import tomllib
3
+ import unittest
4
+ from pathlib import Path
5
+
6
+
7
+ ROOT = Path(__file__).resolve().parents[1]
8
+ SRC = ROOT / "src"
9
+
10
+ if str(SRC) not in sys.path:
11
+ sys.path.insert(0, str(SRC))
12
+
13
+
14
+ class PackagingTests(unittest.TestCase):
15
+ def test_distribution_name_is_coreclaw_client(self):
16
+ pyproject = tomllib.loads((ROOT / "pyproject.toml").read_text(encoding="utf-8"))
17
+
18
+ self.assertEqual(pyproject["project"]["name"], "coreclaw-client")
19
+
20
+ def test_import_package_remains_coreclaw(self):
21
+ import coreclaw
22
+
23
+ self.assertEqual(coreclaw.__name__, "coreclaw")
24
+
25
+
26
+ if __name__ == "__main__":
27
+ unittest.main()
@@ -0,0 +1,48 @@
1
+ import io
2
+ import subprocess
3
+ import sys
4
+ import unittest
5
+ from contextlib import redirect_stdout
6
+ from pathlib import Path
7
+
8
+
9
+ ROOT = Path(__file__).resolve().parents[1]
10
+ SRC = ROOT / "src"
11
+
12
+ if str(SRC) not in sys.path:
13
+ sys.path.insert(0, str(SRC))
14
+
15
+
16
+ class VersionTests(unittest.TestCase):
17
+ def test_package_exposes_version(self):
18
+ import coreclaw
19
+
20
+ self.assertEqual(coreclaw.__version__, "1.0.0")
21
+
22
+ def test_cli_prints_version(self):
23
+ from coreclaw.cli import main
24
+
25
+ output = io.StringIO()
26
+
27
+ with redirect_stdout(output):
28
+ exit_code = main(["--version"])
29
+
30
+ self.assertEqual(exit_code, 0)
31
+ self.assertEqual(output.getvalue().strip(), "coreclaw 1.0.0")
32
+
33
+ def test_module_prints_version(self):
34
+ result = subprocess.run(
35
+ [sys.executable, "-m", "coreclaw"],
36
+ check=False,
37
+ capture_output=True,
38
+ cwd=ROOT,
39
+ env={"PYTHONPATH": str(SRC)},
40
+ text=True,
41
+ )
42
+
43
+ self.assertEqual(result.returncode, 0, result.stderr)
44
+ self.assertEqual(result.stdout.strip(), "coreclaw 1.0.0")
45
+
46
+
47
+ if __name__ == "__main__":
48
+ unittest.main()