hyperbrowser 0.6.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hyperbrowser might be problematic. Click here for more details.
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/PKG-INFO +7 -7
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/README.md +6 -6
- hyperbrowser-0.8.0/hyperbrowser/client/async_client.py +31 -0
- hyperbrowser-0.8.0/hyperbrowser/client/managers/async_manager/crawl.py +60 -0
- hyperbrowser-0.8.0/hyperbrowser/client/managers/async_manager/scrape.py +36 -0
- hyperbrowser-0.8.0/hyperbrowser/client/managers/async_manager/session.py +47 -0
- hyperbrowser-0.8.0/hyperbrowser/client/managers/sync_manager/crawl.py +60 -0
- hyperbrowser-0.8.0/hyperbrowser/client/managers/sync_manager/scrape.py +36 -0
- hyperbrowser-0.8.0/hyperbrowser/client/managers/sync_manager/session.py +45 -0
- hyperbrowser-0.8.0/hyperbrowser/client/sync.py +25 -0
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/hyperbrowser/models/consts.py +2 -0
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/hyperbrowser/models/crawl.py +21 -28
- hyperbrowser-0.8.0/hyperbrowser/models/scrape.py +82 -0
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/hyperbrowser/models/session.py +22 -4
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/pyproject.toml +1 -1
- hyperbrowser-0.6.0/hyperbrowser/client/async_client.py +0 -97
- hyperbrowser-0.6.0/hyperbrowser/client/sync.py +0 -83
- hyperbrowser-0.6.0/hyperbrowser/models/scrape.py +0 -74
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/LICENSE +0 -0
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/hyperbrowser/__init__.py +0 -0
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/hyperbrowser/client/base.py +0 -0
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/hyperbrowser/config.py +0 -0
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/hyperbrowser/exceptions.py +0 -0
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/hyperbrowser/transport/async_transport.py +0 -0
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/hyperbrowser/transport/base.py +0 -0
- {hyperbrowser-0.6.0 → hyperbrowser-0.8.0}/hyperbrowser/transport/sync.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hyperbrowser
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Python SDK for hyperbrowser
|
|
5
5
|
Home-page: https://github.com/hyperbrowserai/python-sdk
|
|
6
6
|
License: MIT
|
|
@@ -52,9 +52,9 @@ HYPERBROWSER_API_KEY = "test-key"
|
|
|
52
52
|
|
|
53
53
|
async def main():
|
|
54
54
|
async with AsyncHyperbrowser(api_key=HYPERBROWSER_API_KEY) as client:
|
|
55
|
-
session = await client.
|
|
55
|
+
session = await client.sessions.create()
|
|
56
56
|
|
|
57
|
-
ws_endpoint = session.
|
|
57
|
+
ws_endpoint = session.ws_endpoint
|
|
58
58
|
browser = await connect(browserWSEndpoint=ws_endpoint, defaultViewport=None)
|
|
59
59
|
|
|
60
60
|
# Get pages
|
|
@@ -72,7 +72,7 @@ async def main():
|
|
|
72
72
|
|
|
73
73
|
await page.close()
|
|
74
74
|
await browser.disconnect()
|
|
75
|
-
await client.
|
|
75
|
+
await client.sessions.stop(session.id)
|
|
76
76
|
print("Session completed!")
|
|
77
77
|
|
|
78
78
|
# Run the asyncio event loop
|
|
@@ -88,9 +88,9 @@ HYPERBROWSER_API_KEY = "test-key"
|
|
|
88
88
|
|
|
89
89
|
def main():
|
|
90
90
|
client = Hyperbrowser(api_key=HYPERBROWSER_API_KEY)
|
|
91
|
-
session = client.
|
|
91
|
+
session = client.sessions.create()
|
|
92
92
|
|
|
93
|
-
ws_endpoint = session.
|
|
93
|
+
ws_endpoint = session.ws_endpoint
|
|
94
94
|
|
|
95
95
|
# Launch Playwright and connect to the remote browser
|
|
96
96
|
with sync_playwright() as p:
|
|
@@ -112,7 +112,7 @@ def main():
|
|
|
112
112
|
page.close()
|
|
113
113
|
browser.close()
|
|
114
114
|
print("Session completed!")
|
|
115
|
-
client.
|
|
115
|
+
client.sessions.stop(session.id)
|
|
116
116
|
|
|
117
117
|
# Run the asyncio event loop
|
|
118
118
|
main()
|
|
@@ -31,9 +31,9 @@ HYPERBROWSER_API_KEY = "test-key"
|
|
|
31
31
|
|
|
32
32
|
async def main():
|
|
33
33
|
async with AsyncHyperbrowser(api_key=HYPERBROWSER_API_KEY) as client:
|
|
34
|
-
session = await client.
|
|
34
|
+
session = await client.sessions.create()
|
|
35
35
|
|
|
36
|
-
ws_endpoint = session.
|
|
36
|
+
ws_endpoint = session.ws_endpoint
|
|
37
37
|
browser = await connect(browserWSEndpoint=ws_endpoint, defaultViewport=None)
|
|
38
38
|
|
|
39
39
|
# Get pages
|
|
@@ -51,7 +51,7 @@ async def main():
|
|
|
51
51
|
|
|
52
52
|
await page.close()
|
|
53
53
|
await browser.disconnect()
|
|
54
|
-
await client.
|
|
54
|
+
await client.sessions.stop(session.id)
|
|
55
55
|
print("Session completed!")
|
|
56
56
|
|
|
57
57
|
# Run the asyncio event loop
|
|
@@ -67,9 +67,9 @@ HYPERBROWSER_API_KEY = "test-key"
|
|
|
67
67
|
|
|
68
68
|
def main():
|
|
69
69
|
client = Hyperbrowser(api_key=HYPERBROWSER_API_KEY)
|
|
70
|
-
session = client.
|
|
70
|
+
session = client.sessions.create()
|
|
71
71
|
|
|
72
|
-
ws_endpoint = session.
|
|
72
|
+
ws_endpoint = session.ws_endpoint
|
|
73
73
|
|
|
74
74
|
# Launch Playwright and connect to the remote browser
|
|
75
75
|
with sync_playwright() as p:
|
|
@@ -91,7 +91,7 @@ def main():
|
|
|
91
91
|
page.close()
|
|
92
92
|
browser.close()
|
|
93
93
|
print("Session completed!")
|
|
94
|
-
client.
|
|
94
|
+
client.sessions.stop(session.id)
|
|
95
95
|
|
|
96
96
|
# Run the asyncio event loop
|
|
97
97
|
main()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from .managers.async_manager.session import SessionManager
|
|
3
|
+
from .managers.async_manager.scrape import ScrapeManager
|
|
4
|
+
from .managers.async_manager.crawl import CrawlManager
|
|
5
|
+
from .base import HyperbrowserBase
|
|
6
|
+
from ..transport.async_transport import AsyncTransport
|
|
7
|
+
from ..config import ClientConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AsyncHyperbrowser(HyperbrowserBase):
|
|
11
|
+
"""Asynchronous Hyperbrowser client"""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
config: Optional[ClientConfig] = None,
|
|
16
|
+
api_key: Optional[str] = None,
|
|
17
|
+
base_url: Optional[str] = None,
|
|
18
|
+
):
|
|
19
|
+
super().__init__(AsyncTransport, config, api_key, base_url)
|
|
20
|
+
self.sessions = SessionManager(self)
|
|
21
|
+
self.scrape = ScrapeManager(self)
|
|
22
|
+
self.crawl = CrawlManager(self)
|
|
23
|
+
|
|
24
|
+
async def close(self) -> None:
|
|
25
|
+
await self.transport.close()
|
|
26
|
+
|
|
27
|
+
async def __aenter__(self):
|
|
28
|
+
return self
|
|
29
|
+
|
|
30
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
31
|
+
await self.close()
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from ....models.crawl import (
|
|
4
|
+
CrawlJobResponse,
|
|
5
|
+
GetCrawlJobParams,
|
|
6
|
+
StartCrawlJobParams,
|
|
7
|
+
StartCrawlJobResponse,
|
|
8
|
+
)
|
|
9
|
+
from ....exceptions import HyperbrowserError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CrawlManager:
|
|
13
|
+
def __init__(self, client):
|
|
14
|
+
self._client = client
|
|
15
|
+
|
|
16
|
+
async def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
|
|
17
|
+
response = await self._client.transport.post(
|
|
18
|
+
self._client._build_url("/crawl"),
|
|
19
|
+
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
20
|
+
)
|
|
21
|
+
return StartCrawlJobResponse(**response.data)
|
|
22
|
+
|
|
23
|
+
async def get(
|
|
24
|
+
self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
|
|
25
|
+
) -> CrawlJobResponse:
|
|
26
|
+
response = await self._client.transport.get(
|
|
27
|
+
self._client._build_url(f"/crawl/{job_id}"), params=params.__dict__
|
|
28
|
+
)
|
|
29
|
+
return CrawlJobResponse(**response.data)
|
|
30
|
+
|
|
31
|
+
async def start_and_wait(
|
|
32
|
+
self, params: StartCrawlJobParams, return_all_pages: bool = True
|
|
33
|
+
) -> CrawlJobResponse:
|
|
34
|
+
job_start_resp = await self.start(params)
|
|
35
|
+
if not job_start_resp.job_id:
|
|
36
|
+
raise HyperbrowserError("Failed to start crawl job")
|
|
37
|
+
|
|
38
|
+
job_response: CrawlJobResponse
|
|
39
|
+
while True:
|
|
40
|
+
job_response = await self.get(job_start_resp.job_id)
|
|
41
|
+
if job_response.status == "completed" or job_response.status == "failed":
|
|
42
|
+
break
|
|
43
|
+
await asyncio.sleep(2)
|
|
44
|
+
|
|
45
|
+
if not return_all_pages:
|
|
46
|
+
return job_response
|
|
47
|
+
|
|
48
|
+
while job_response.current_page_batch < job_response.total_page_batches:
|
|
49
|
+
tmp_job_response = await self.get(
|
|
50
|
+
job_start_resp.job_id,
|
|
51
|
+
GetCrawlJobParams(page=job_response.current_page_batch + 1),
|
|
52
|
+
)
|
|
53
|
+
if tmp_job_response.data:
|
|
54
|
+
job_response.data.extend(tmp_job_response.data)
|
|
55
|
+
job_response.current_page_batch = tmp_job_response.current_page_batch
|
|
56
|
+
job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
|
|
57
|
+
job_response.total_page_batches = tmp_job_response.total_page_batches
|
|
58
|
+
job_response.batch_size = tmp_job_response.batch_size
|
|
59
|
+
await asyncio.sleep(0.5)
|
|
60
|
+
return job_response
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from ....models.scrape import (
|
|
4
|
+
ScrapeJobResponse,
|
|
5
|
+
StartScrapeJobParams,
|
|
6
|
+
StartScrapeJobResponse,
|
|
7
|
+
)
|
|
8
|
+
from ....exceptions import HyperbrowserError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ScrapeManager:
|
|
12
|
+
def __init__(self, client):
|
|
13
|
+
self._client = client
|
|
14
|
+
|
|
15
|
+
async def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
|
|
16
|
+
response = await self._client.transport.post(
|
|
17
|
+
self._client._build_url("/scrape"),
|
|
18
|
+
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
19
|
+
)
|
|
20
|
+
return StartScrapeJobResponse(**response.data)
|
|
21
|
+
|
|
22
|
+
async def get(self, job_id: str) -> ScrapeJobResponse:
|
|
23
|
+
response = await self._client.transport.get(
|
|
24
|
+
self._client._build_url(f"/scrape/{job_id}")
|
|
25
|
+
)
|
|
26
|
+
return ScrapeJobResponse(**response.data)
|
|
27
|
+
|
|
28
|
+
async def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
|
|
29
|
+
job_start_resp = await self.start(params)
|
|
30
|
+
if not job_start_resp.job_id:
|
|
31
|
+
raise HyperbrowserError("Failed to start scrape job")
|
|
32
|
+
while True:
|
|
33
|
+
job_response = await self.get(job_start_resp.job_id)
|
|
34
|
+
if job_response.status == "completed" or job_response.status == "failed":
|
|
35
|
+
return job_response
|
|
36
|
+
await asyncio.sleep(2)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from ....models.session import (
|
|
3
|
+
BasicResponse,
|
|
4
|
+
CreateSessionParams,
|
|
5
|
+
SessionDetail,
|
|
6
|
+
SessionListParams,
|
|
7
|
+
SessionListResponse,
|
|
8
|
+
SessionRecording,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SessionManager:
|
|
13
|
+
def __init__(self, client):
|
|
14
|
+
self._client = client
|
|
15
|
+
|
|
16
|
+
async def create(self, params: CreateSessionParams) -> SessionDetail:
|
|
17
|
+
response = await self._client.transport.post(
|
|
18
|
+
self._client._build_url("/session"),
|
|
19
|
+
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
20
|
+
)
|
|
21
|
+
return SessionDetail(**response.data)
|
|
22
|
+
|
|
23
|
+
async def get(self, id: str) -> SessionDetail:
|
|
24
|
+
response = await self._client.transport.get(
|
|
25
|
+
self._client._build_url(f"/session/{id}")
|
|
26
|
+
)
|
|
27
|
+
return SessionDetail(**response.data)
|
|
28
|
+
|
|
29
|
+
async def stop(self, id: str) -> BasicResponse:
|
|
30
|
+
response = await self._client.transport.put(
|
|
31
|
+
self._client._build_url(f"/session/{id}/stop")
|
|
32
|
+
)
|
|
33
|
+
return BasicResponse(**response.data)
|
|
34
|
+
|
|
35
|
+
async def list(
|
|
36
|
+
self, params: SessionListParams = SessionListParams()
|
|
37
|
+
) -> SessionListResponse:
|
|
38
|
+
response = await self._client.transport.get(
|
|
39
|
+
self._client._build_url("/sessions"), params=params.__dict__
|
|
40
|
+
)
|
|
41
|
+
return SessionListResponse(**response.data)
|
|
42
|
+
|
|
43
|
+
async def get_recording(self, id: str) -> List[SessionRecording]:
|
|
44
|
+
response = await self._client.transport.get(
|
|
45
|
+
self._client._build_url(f"/session/{id}/recording")
|
|
46
|
+
)
|
|
47
|
+
return [SessionRecording(**recording) for recording in response.data]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from ....models.crawl import (
|
|
4
|
+
CrawlJobResponse,
|
|
5
|
+
GetCrawlJobParams,
|
|
6
|
+
StartCrawlJobParams,
|
|
7
|
+
StartCrawlJobResponse,
|
|
8
|
+
)
|
|
9
|
+
from ....exceptions import HyperbrowserError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CrawlManager:
|
|
13
|
+
def __init__(self, client):
|
|
14
|
+
self._client = client
|
|
15
|
+
|
|
16
|
+
def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
|
|
17
|
+
response = self._client.transport.post(
|
|
18
|
+
self._client._build_url("/crawl"),
|
|
19
|
+
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
20
|
+
)
|
|
21
|
+
return StartCrawlJobResponse(**response.data)
|
|
22
|
+
|
|
23
|
+
def get(
|
|
24
|
+
self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
|
|
25
|
+
) -> CrawlJobResponse:
|
|
26
|
+
response = self._client.transport.get(
|
|
27
|
+
self._client._build_url(f"/crawl/{job_id}"), params=params.__dict__
|
|
28
|
+
)
|
|
29
|
+
return CrawlJobResponse(**response.data)
|
|
30
|
+
|
|
31
|
+
def start_and_wait(
|
|
32
|
+
self, params: StartCrawlJobParams, return_all_pages: bool = True
|
|
33
|
+
) -> CrawlJobResponse:
|
|
34
|
+
job_start_resp = self.start(params)
|
|
35
|
+
if not job_start_resp.job_id:
|
|
36
|
+
raise HyperbrowserError("Failed to start crawl job")
|
|
37
|
+
|
|
38
|
+
job_response: CrawlJobResponse
|
|
39
|
+
while True:
|
|
40
|
+
job_response = self.get(job_start_resp.job_id)
|
|
41
|
+
if job_response.status == "completed" or job_response.status == "failed":
|
|
42
|
+
break
|
|
43
|
+
time.sleep(2)
|
|
44
|
+
|
|
45
|
+
if not return_all_pages:
|
|
46
|
+
return job_response
|
|
47
|
+
|
|
48
|
+
while job_response.current_page_batch < job_response.total_page_batches:
|
|
49
|
+
tmp_job_response = self.get(
|
|
50
|
+
job_start_resp.job_id,
|
|
51
|
+
GetCrawlJobParams(page=job_response.current_page_batch + 1),
|
|
52
|
+
)
|
|
53
|
+
if tmp_job_response.data:
|
|
54
|
+
job_response.data.extend(tmp_job_response.data)
|
|
55
|
+
job_response.current_page_batch = tmp_job_response.current_page_batch
|
|
56
|
+
job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
|
|
57
|
+
job_response.total_page_batches = tmp_job_response.total_page_batches
|
|
58
|
+
job_response.batch_size = tmp_job_response.batch_size
|
|
59
|
+
time.sleep(0.5)
|
|
60
|
+
return job_response
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from ....models.scrape import (
|
|
4
|
+
ScrapeJobResponse,
|
|
5
|
+
StartScrapeJobParams,
|
|
6
|
+
StartScrapeJobResponse,
|
|
7
|
+
)
|
|
8
|
+
from ....exceptions import HyperbrowserError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ScrapeManager:
|
|
12
|
+
def __init__(self, client):
|
|
13
|
+
self._client = client
|
|
14
|
+
|
|
15
|
+
def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
|
|
16
|
+
response = self._client.transport.post(
|
|
17
|
+
self._client._build_url("/scrape"),
|
|
18
|
+
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
19
|
+
)
|
|
20
|
+
return StartScrapeJobResponse(**response.data)
|
|
21
|
+
|
|
22
|
+
def get(self, job_id: str) -> ScrapeJobResponse:
|
|
23
|
+
response = self._client.transport.get(
|
|
24
|
+
self._client._build_url(f"/scrape/{job_id}")
|
|
25
|
+
)
|
|
26
|
+
return ScrapeJobResponse(**response.data)
|
|
27
|
+
|
|
28
|
+
def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
|
|
29
|
+
job_start_resp = self.start(params)
|
|
30
|
+
if not job_start_resp.job_id:
|
|
31
|
+
raise HyperbrowserError("Failed to start scrape job")
|
|
32
|
+
while True:
|
|
33
|
+
job_response = self.get(job_start_resp.job_id)
|
|
34
|
+
if job_response.status == "completed" or job_response.status == "failed":
|
|
35
|
+
return job_response
|
|
36
|
+
time.sleep(2)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from ....models.session import (
|
|
3
|
+
BasicResponse,
|
|
4
|
+
CreateSessionParams,
|
|
5
|
+
SessionDetail,
|
|
6
|
+
SessionListParams,
|
|
7
|
+
SessionListResponse,
|
|
8
|
+
SessionRecording,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SessionManager:
|
|
13
|
+
def __init__(self, client):
|
|
14
|
+
self._client = client
|
|
15
|
+
|
|
16
|
+
def create(self, params: CreateSessionParams) -> SessionDetail:
|
|
17
|
+
response = self._client.transport.post(
|
|
18
|
+
self._client._build_url("/session"),
|
|
19
|
+
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
20
|
+
)
|
|
21
|
+
return SessionDetail(**response.data)
|
|
22
|
+
|
|
23
|
+
def get(self, id: str) -> SessionDetail:
|
|
24
|
+
response = self._client.transport.get(self._client._build_url(f"/session/{id}"))
|
|
25
|
+
return SessionDetail(**response.data)
|
|
26
|
+
|
|
27
|
+
def stop(self, id: str) -> BasicResponse:
|
|
28
|
+
response = self._client.transport.put(
|
|
29
|
+
self._client._build_url(f"/session/{id}/stop")
|
|
30
|
+
)
|
|
31
|
+
return BasicResponse(**response.data)
|
|
32
|
+
|
|
33
|
+
def list(
|
|
34
|
+
self, params: SessionListParams = SessionListParams()
|
|
35
|
+
) -> SessionListResponse:
|
|
36
|
+
response = self._client.transport.get(
|
|
37
|
+
self._client._build_url("/sessions"), params=params.__dict__
|
|
38
|
+
)
|
|
39
|
+
return SessionListResponse(**response.data)
|
|
40
|
+
|
|
41
|
+
def get_recording(self, id: str) -> List[SessionRecording]:
|
|
42
|
+
response = self._client.transport.get(
|
|
43
|
+
self._client._build_url(f"/session/{id}/recording")
|
|
44
|
+
)
|
|
45
|
+
return [SessionRecording(**recording) for recording in response.data]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from .managers.sync_manager.session import SessionManager
|
|
3
|
+
from .managers.sync_manager.scrape import ScrapeManager
|
|
4
|
+
from .managers.sync_manager.crawl import CrawlManager
|
|
5
|
+
from .base import HyperbrowserBase
|
|
6
|
+
from ..transport.sync import SyncTransport
|
|
7
|
+
from ..config import ClientConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Hyperbrowser(HyperbrowserBase):
|
|
11
|
+
"""Synchronous Hyperbrowser client"""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
config: Optional[ClientConfig] = None,
|
|
16
|
+
api_key: Optional[str] = None,
|
|
17
|
+
base_url: Optional[str] = None,
|
|
18
|
+
):
|
|
19
|
+
super().__init__(SyncTransport, config, api_key, base_url)
|
|
20
|
+
self.sessions = SessionManager(self)
|
|
21
|
+
self.scrape = ScrapeManager(self)
|
|
22
|
+
self.crawl = CrawlManager(self)
|
|
23
|
+
|
|
24
|
+
def close(self) -> None:
|
|
25
|
+
self.transport.close()
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
-
from typing import List, Literal, Optional
|
|
1
|
+
from typing import List, Literal, Optional, Union
|
|
2
2
|
from pydantic import BaseModel, ConfigDict, Field
|
|
3
3
|
|
|
4
|
+
from hyperbrowser.models.scrape import ScrapeOptions
|
|
5
|
+
from hyperbrowser.models.session import CreateSessionParams
|
|
6
|
+
|
|
4
7
|
CrawlJobStatus = Literal["pending", "running", "completed", "failed"]
|
|
8
|
+
CrawlPageStatus = Literal["completed", "failed"]
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
class StartCrawlJobParams(BaseModel):
|
|
@@ -14,16 +18,21 @@ class StartCrawlJobParams(BaseModel):
|
|
|
14
18
|
)
|
|
15
19
|
|
|
16
20
|
url: str
|
|
17
|
-
max_pages: int = Field(default=10, ge=1,
|
|
21
|
+
max_pages: int = Field(default=10, ge=1, serialization_alias="maxPages")
|
|
18
22
|
follow_links: bool = Field(default=True, serialization_alias="followLinks")
|
|
23
|
+
ignore_sitemap: bool = Field(default=False, serialization_alias="ignoreSitemap")
|
|
19
24
|
exclude_patterns: List[str] = Field(
|
|
20
25
|
default=[], serialization_alias="excludePatterns"
|
|
21
26
|
)
|
|
22
27
|
include_patterns: List[str] = Field(
|
|
23
28
|
default=[], serialization_alias="includePatterns"
|
|
24
29
|
)
|
|
25
|
-
|
|
26
|
-
|
|
30
|
+
session_options: Optional[CreateSessionParams] = Field(
|
|
31
|
+
default=None, serialization_alias="sessionOptions"
|
|
32
|
+
)
|
|
33
|
+
scrape_options: Optional[ScrapeOptions] = Field(
|
|
34
|
+
default=None, serialization_alias="scrapeOptions"
|
|
35
|
+
)
|
|
27
36
|
|
|
28
37
|
|
|
29
38
|
class StartCrawlJobResponse(BaseModel):
|
|
@@ -38,35 +47,18 @@ class StartCrawlJobResponse(BaseModel):
|
|
|
38
47
|
job_id: str = Field(alias="jobId")
|
|
39
48
|
|
|
40
49
|
|
|
41
|
-
class CrawledPageMetadata(BaseModel):
|
|
42
|
-
"""
|
|
43
|
-
Metadata for the crawled page.
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
model_config = ConfigDict(
|
|
47
|
-
populate_by_alias=True,
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
title: str
|
|
51
|
-
description: str
|
|
52
|
-
robots: str
|
|
53
|
-
og_title: str = Field(alias="ogTitle")
|
|
54
|
-
og_description: str = Field(alias="ogDescription")
|
|
55
|
-
og_url: str = Field(alias="ogUrl")
|
|
56
|
-
og_image: str = Field(alias="ogImage")
|
|
57
|
-
og_locale_alternate: List[str] = Field(alias="ogLocaleAlternate")
|
|
58
|
-
og_site_name: str = Field(alias="ogSiteName")
|
|
59
|
-
source_url: str = Field(alias="sourceURL")
|
|
60
|
-
|
|
61
|
-
|
|
62
50
|
class CrawledPage(BaseModel):
|
|
63
51
|
"""
|
|
64
52
|
Data from a crawled page.
|
|
65
53
|
"""
|
|
66
54
|
|
|
67
|
-
metadata:
|
|
68
|
-
|
|
55
|
+
metadata: Optional[dict[str, Union[str, list[str]]]] = None
|
|
56
|
+
html: Optional[str] = None
|
|
57
|
+
markdown: Optional[str] = None
|
|
58
|
+
links: Optional[List[str]] = None
|
|
69
59
|
url: str
|
|
60
|
+
status: CrawlPageStatus
|
|
61
|
+
error: Optional[str] = None
|
|
70
62
|
|
|
71
63
|
|
|
72
64
|
class GetCrawlJobParams(BaseModel):
|
|
@@ -76,7 +68,7 @@ class GetCrawlJobParams(BaseModel):
|
|
|
76
68
|
|
|
77
69
|
page: Optional[int] = Field(default=None, serialization_alias="page")
|
|
78
70
|
batch_size: Optional[int] = Field(
|
|
79
|
-
default=
|
|
71
|
+
default=20, ge=1, le=30, serialization_alias="batchSize"
|
|
80
72
|
)
|
|
81
73
|
|
|
82
74
|
|
|
@@ -89,6 +81,7 @@ class CrawlJobResponse(BaseModel):
|
|
|
89
81
|
populate_by_alias=True,
|
|
90
82
|
)
|
|
91
83
|
|
|
84
|
+
job_id: str = Field(alias="jobId")
|
|
92
85
|
status: CrawlJobStatus
|
|
93
86
|
error: Optional[str] = None
|
|
94
87
|
data: List[CrawledPage] = Field(alias="data")
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from typing import List, Literal, Optional, Union
|
|
2
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
3
|
+
|
|
4
|
+
from hyperbrowser.models.consts import ScrapeFormat
|
|
5
|
+
from hyperbrowser.models.session import CreateSessionParams
|
|
6
|
+
|
|
7
|
+
ScrapeJobStatus = Literal["pending", "running", "completed", "failed"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ScrapeOptions(BaseModel):
|
|
11
|
+
"""
|
|
12
|
+
Options for scraping a page.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
formats: Optional[List[ScrapeFormat]] = None
|
|
16
|
+
include_tags: Optional[List[str]] = Field(
|
|
17
|
+
default=None, serialization_alias="includeTags"
|
|
18
|
+
)
|
|
19
|
+
exclude_tags: Optional[List[str]] = Field(
|
|
20
|
+
default=None, serialization_alias="excludeTags"
|
|
21
|
+
)
|
|
22
|
+
only_main_content: Optional[bool] = Field(
|
|
23
|
+
default=None, serialization_alias="onlyMainContent"
|
|
24
|
+
)
|
|
25
|
+
wait_for: Optional[int] = Field(default=None, serialization_alias="waitFor")
|
|
26
|
+
timeout: Optional[int] = Field(default=None, serialization_alias="timeout")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class StartScrapeJobParams(BaseModel):
|
|
30
|
+
"""
|
|
31
|
+
Parameters for creating a new scrape job.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
model_config = ConfigDict(
|
|
35
|
+
populate_by_alias=True,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
url: str
|
|
39
|
+
session_options: Optional[CreateSessionParams] = Field(
|
|
40
|
+
default=None, serialization_alias="sessionOptions"
|
|
41
|
+
)
|
|
42
|
+
scrape_options: Optional[ScrapeOptions] = Field(
|
|
43
|
+
default=None, serialization_alias="scrapeOptions"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class StartScrapeJobResponse(BaseModel):
|
|
48
|
+
"""
|
|
49
|
+
Response from creating a scrape job.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
model_config = ConfigDict(
|
|
53
|
+
populate_by_alias=True,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
job_id: str = Field(alias="jobId")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ScrapeJobData(BaseModel):
|
|
60
|
+
"""
|
|
61
|
+
Data from a scraped site.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
metadata: Optional[dict[str, Union[str, list[str]]]] = None
|
|
65
|
+
html: Optional[str] = None
|
|
66
|
+
markdown: Optional[str] = None
|
|
67
|
+
links: Optional[List[str]] = None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ScrapeJobResponse(BaseModel):
|
|
71
|
+
"""
|
|
72
|
+
Response from getting a scrape job.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
model_config = ConfigDict(
|
|
76
|
+
populate_by_alias=True,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
job_id: str = Field(alias="jobId")
|
|
80
|
+
status: ScrapeJobStatus
|
|
81
|
+
error: Optional[str] = None
|
|
82
|
+
data: Optional[ScrapeJobData] = None
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Literal, Optional, Union
|
|
1
|
+
from typing import Any, List, Literal, Optional, Union
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from pydantic import BaseModel, Field, ConfigDict, field_validator
|
|
4
4
|
|
|
@@ -50,7 +50,7 @@ class SessionDetail(Session):
|
|
|
50
50
|
Detailed session information including websocket endpoint.
|
|
51
51
|
"""
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
ws_endpoint: Optional[str] = Field(alias="wsEndpoint", default=None)
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
class SessionListParams(BaseModel):
|
|
@@ -96,8 +96,8 @@ class ScreenConfig(BaseModel):
|
|
|
96
96
|
Screen configuration parameters for browser session.
|
|
97
97
|
"""
|
|
98
98
|
|
|
99
|
-
width: int = Field(default=1280,
|
|
100
|
-
height: int = Field(default=720,
|
|
99
|
+
width: int = Field(default=1280, serialization_alias="width")
|
|
100
|
+
height: int = Field(default=720, serialization_alias="height")
|
|
101
101
|
|
|
102
102
|
|
|
103
103
|
class CreateSessionParams(BaseModel):
|
|
@@ -132,3 +132,21 @@ class CreateSessionParams(BaseModel):
|
|
|
132
132
|
adblock: bool = Field(default=False, serialization_alias="adblock")
|
|
133
133
|
trackers: bool = Field(default=False, serialization_alias="trackers")
|
|
134
134
|
annoyances: bool = Field(default=False, serialization_alias="annoyances")
|
|
135
|
+
enable_web_recording: Optional[bool] = Field(
|
|
136
|
+
default=False, serialization_alias="enableWebRecording"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class SessionRecording(BaseModel):
|
|
141
|
+
"""
|
|
142
|
+
Model for session recording data.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
model_config = ConfigDict(
|
|
146
|
+
populate_by_alias=True,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
type: int
|
|
150
|
+
data: Any
|
|
151
|
+
timestamp: int
|
|
152
|
+
delay: Optional[int] = None
|
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
from hyperbrowser.models.crawl import (
|
|
4
|
-
CrawlJobResponse,
|
|
5
|
-
GetCrawlJobParams,
|
|
6
|
-
StartCrawlJobParams,
|
|
7
|
-
StartCrawlJobResponse,
|
|
8
|
-
)
|
|
9
|
-
from hyperbrowser.models.scrape import (
|
|
10
|
-
ScrapeJobResponse,
|
|
11
|
-
StartScrapeJobParams,
|
|
12
|
-
StartScrapeJobResponse,
|
|
13
|
-
)
|
|
14
|
-
from ..transport.async_transport import AsyncTransport
|
|
15
|
-
from .base import HyperbrowserBase
|
|
16
|
-
from ..models.session import (
|
|
17
|
-
BasicResponse,
|
|
18
|
-
CreateSessionParams,
|
|
19
|
-
SessionDetail,
|
|
20
|
-
SessionListParams,
|
|
21
|
-
SessionListResponse,
|
|
22
|
-
)
|
|
23
|
-
from ..config import ClientConfig
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class AsyncHyperbrowser(HyperbrowserBase):
|
|
27
|
-
"""Asynchronous Hyperbrowser client"""
|
|
28
|
-
|
|
29
|
-
def __init__(
|
|
30
|
-
self,
|
|
31
|
-
config: Optional[ClientConfig] = None,
|
|
32
|
-
api_key: Optional[str] = None,
|
|
33
|
-
base_url: Optional[str] = None,
|
|
34
|
-
):
|
|
35
|
-
super().__init__(AsyncTransport, config, api_key, base_url)
|
|
36
|
-
|
|
37
|
-
async def create_session(self, params: CreateSessionParams) -> SessionDetail:
|
|
38
|
-
response = await self.transport.post(
|
|
39
|
-
self._build_url("/session"),
|
|
40
|
-
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
41
|
-
)
|
|
42
|
-
return SessionDetail(**response.data)
|
|
43
|
-
|
|
44
|
-
async def get_session(self, id: str) -> SessionDetail:
|
|
45
|
-
response = await self.transport.get(self._build_url(f"/session/{id}"))
|
|
46
|
-
return SessionDetail(**response.data)
|
|
47
|
-
|
|
48
|
-
async def stop_session(self, id: str) -> BasicResponse:
|
|
49
|
-
response = await self.transport.put(self._build_url(f"/session/{id}/stop"))
|
|
50
|
-
return BasicResponse(**response.data)
|
|
51
|
-
|
|
52
|
-
async def get_session_list(
|
|
53
|
-
self, params: SessionListParams = SessionListParams()
|
|
54
|
-
) -> SessionListResponse:
|
|
55
|
-
response = await self.transport.get(
|
|
56
|
-
self._build_url("/sessions"), params=params.__dict__
|
|
57
|
-
)
|
|
58
|
-
return SessionListResponse(**response.data)
|
|
59
|
-
|
|
60
|
-
async def start_scrape_job(
|
|
61
|
-
self, params: StartScrapeJobParams
|
|
62
|
-
) -> StartScrapeJobResponse:
|
|
63
|
-
response = await self.transport.post(
|
|
64
|
-
self._build_url("/scrape"),
|
|
65
|
-
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
66
|
-
)
|
|
67
|
-
return StartScrapeJobResponse(**response.data)
|
|
68
|
-
|
|
69
|
-
async def get_scrape_job(self, job_id: str) -> ScrapeJobResponse:
|
|
70
|
-
response = await self.transport.get(self._build_url(f"/scrape/{job_id}"))
|
|
71
|
-
return ScrapeJobResponse(**response.data)
|
|
72
|
-
|
|
73
|
-
async def start_crawl_job(
|
|
74
|
-
self, params: StartCrawlJobParams
|
|
75
|
-
) -> StartCrawlJobResponse:
|
|
76
|
-
response = await self.transport.post(
|
|
77
|
-
self._build_url("/crawl"),
|
|
78
|
-
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
79
|
-
)
|
|
80
|
-
return StartCrawlJobResponse(**response.data)
|
|
81
|
-
|
|
82
|
-
async def get_crawl_job(
|
|
83
|
-
self, job_id: str, params: GetCrawlJobParams
|
|
84
|
-
) -> CrawlJobResponse:
|
|
85
|
-
response = await self.transport.get(
|
|
86
|
-
self._build_url(f"/crawl/{job_id}"), params=params.__dict__
|
|
87
|
-
)
|
|
88
|
-
return CrawlJobResponse(**response.data)
|
|
89
|
-
|
|
90
|
-
async def close(self) -> None:
|
|
91
|
-
await self.transport.close()
|
|
92
|
-
|
|
93
|
-
async def __aenter__(self):
|
|
94
|
-
return self
|
|
95
|
-
|
|
96
|
-
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
97
|
-
await self.close()
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
from hyperbrowser.models.crawl import (
|
|
4
|
-
CrawlJobResponse,
|
|
5
|
-
GetCrawlJobParams,
|
|
6
|
-
StartCrawlJobParams,
|
|
7
|
-
StartCrawlJobResponse,
|
|
8
|
-
)
|
|
9
|
-
from hyperbrowser.models.scrape import (
|
|
10
|
-
ScrapeJobResponse,
|
|
11
|
-
StartScrapeJobParams,
|
|
12
|
-
StartScrapeJobResponse,
|
|
13
|
-
)
|
|
14
|
-
from ..transport.sync import SyncTransport
|
|
15
|
-
from .base import HyperbrowserBase
|
|
16
|
-
from ..models.session import (
|
|
17
|
-
BasicResponse,
|
|
18
|
-
CreateSessionParams,
|
|
19
|
-
SessionDetail,
|
|
20
|
-
SessionListParams,
|
|
21
|
-
SessionListResponse,
|
|
22
|
-
)
|
|
23
|
-
from ..config import ClientConfig
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class Hyperbrowser(HyperbrowserBase):
|
|
27
|
-
"""Synchronous Hyperbrowser client"""
|
|
28
|
-
|
|
29
|
-
def __init__(
|
|
30
|
-
self,
|
|
31
|
-
config: Optional[ClientConfig] = None,
|
|
32
|
-
api_key: Optional[str] = None,
|
|
33
|
-
base_url: Optional[str] = None,
|
|
34
|
-
):
|
|
35
|
-
super().__init__(SyncTransport, config, api_key, base_url)
|
|
36
|
-
|
|
37
|
-
def create_session(self, params: CreateSessionParams) -> SessionDetail:
|
|
38
|
-
response = self.transport.post(
|
|
39
|
-
self._build_url("/session"),
|
|
40
|
-
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
41
|
-
)
|
|
42
|
-
return SessionDetail(**response.data)
|
|
43
|
-
|
|
44
|
-
def get_session(self, id: str) -> SessionDetail:
|
|
45
|
-
response = self.transport.get(self._build_url(f"/session/{id}"))
|
|
46
|
-
return SessionDetail(**response.data)
|
|
47
|
-
|
|
48
|
-
def stop_session(self, id: str) -> BasicResponse:
|
|
49
|
-
response = self.transport.put(self._build_url(f"/session/{id}/stop"))
|
|
50
|
-
return BasicResponse(**response.data)
|
|
51
|
-
|
|
52
|
-
def get_session_list(self, params: SessionListParams) -> SessionListResponse:
|
|
53
|
-
response = self.transport.get(
|
|
54
|
-
self._build_url("/sessions"), params=params.__dict__
|
|
55
|
-
)
|
|
56
|
-
return SessionListResponse(**response.data)
|
|
57
|
-
|
|
58
|
-
def start_scrape_job(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
|
|
59
|
-
response = self.transport.post(
|
|
60
|
-
self._build_url("/scrape"),
|
|
61
|
-
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
62
|
-
)
|
|
63
|
-
return StartScrapeJobResponse(**response.data)
|
|
64
|
-
|
|
65
|
-
def get_scrape_job(self, job_id: str) -> ScrapeJobResponse:
|
|
66
|
-
response = self.transport.get(self._build_url(f"/scrape/{job_id}"))
|
|
67
|
-
return ScrapeJobResponse(**response.data)
|
|
68
|
-
|
|
69
|
-
def start_crawl_job(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
|
|
70
|
-
response = self.transport.post(
|
|
71
|
-
self._build_url("/crawl"),
|
|
72
|
-
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
73
|
-
)
|
|
74
|
-
return StartCrawlJobResponse(**response.data)
|
|
75
|
-
|
|
76
|
-
def get_crawl_job(self, job_id: str, params: GetCrawlJobParams) -> CrawlJobResponse:
|
|
77
|
-
response = self.transport.get(
|
|
78
|
-
self._build_url(f"/crawl/{job_id}"), params=params.__dict__
|
|
79
|
-
)
|
|
80
|
-
return CrawlJobResponse(**response.data)
|
|
81
|
-
|
|
82
|
-
def close(self) -> None:
|
|
83
|
-
self.transport.close()
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
from typing import List, Literal, Optional
|
|
2
|
-
from pydantic import BaseModel, ConfigDict, Field
|
|
3
|
-
|
|
4
|
-
ScrapeJobStatus = Literal["pending", "running", "completed", "failed"]
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class StartScrapeJobParams(BaseModel):
|
|
8
|
-
"""
|
|
9
|
-
Parameters for creating a new scrape job.
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
model_config = ConfigDict(
|
|
13
|
-
populate_by_alias=True,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
url: str
|
|
17
|
-
use_proxy: bool = Field(default=False, serialization_alias="useProxy")
|
|
18
|
-
solve_captchas: bool = Field(default=False, serialization_alias="solveCaptchas")
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class StartScrapeJobResponse(BaseModel):
|
|
22
|
-
"""
|
|
23
|
-
Response from creating a scrape job.
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
model_config = ConfigDict(
|
|
27
|
-
populate_by_alias=True,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
job_id: str = Field(alias="jobId")
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class ScrapeJobMetadata(BaseModel):
|
|
34
|
-
"""
|
|
35
|
-
Metadata for the scraped site.
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
model_config = ConfigDict(
|
|
39
|
-
populate_by_alias=True,
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
title: str
|
|
43
|
-
description: str
|
|
44
|
-
robots: str
|
|
45
|
-
og_title: str = Field(alias="ogTitle")
|
|
46
|
-
og_description: str = Field(alias="ogDescription")
|
|
47
|
-
og_url: str = Field(alias="ogUrl")
|
|
48
|
-
og_image: str = Field(alias="ogImage")
|
|
49
|
-
og_locale_alternate: List[str] = Field(alias="ogLocaleAlternate")
|
|
50
|
-
og_site_name: str = Field(alias="ogSiteName")
|
|
51
|
-
source_url: str = Field(alias="sourceURL")
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class ScrapeJobData(BaseModel):
|
|
55
|
-
"""
|
|
56
|
-
Data from a scraped site.
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
metadata: ScrapeJobMetadata
|
|
60
|
-
markdown: str
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class ScrapeJobResponse(BaseModel):
|
|
64
|
-
"""
|
|
65
|
-
Response from getting a scrape job.
|
|
66
|
-
"""
|
|
67
|
-
|
|
68
|
-
model_config = ConfigDict(
|
|
69
|
-
populate_by_alias=True,
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
status: ScrapeJobStatus
|
|
73
|
-
error: Optional[str] = None
|
|
74
|
-
data: Optional[ScrapeJobData] = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|