hyperbrowser 0.7.0__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hyperbrowser might be problematic. Click here for more details.

Files changed (26) hide show
  1. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/PKG-INFO +7 -7
  2. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/README.md +6 -6
  3. hyperbrowser-0.8.0/hyperbrowser/client/async_client.py +31 -0
  4. hyperbrowser-0.8.0/hyperbrowser/client/managers/async_manager/crawl.py +60 -0
  5. hyperbrowser-0.8.0/hyperbrowser/client/managers/async_manager/scrape.py +36 -0
  6. hyperbrowser-0.8.0/hyperbrowser/client/managers/async_manager/session.py +47 -0
  7. hyperbrowser-0.8.0/hyperbrowser/client/managers/sync_manager/crawl.py +60 -0
  8. hyperbrowser-0.8.0/hyperbrowser/client/managers/sync_manager/scrape.py +36 -0
  9. hyperbrowser-0.8.0/hyperbrowser/client/managers/sync_manager/session.py +45 -0
  10. hyperbrowser-0.8.0/hyperbrowser/client/sync.py +25 -0
  11. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/hyperbrowser/models/consts.py +2 -0
  12. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/hyperbrowser/models/crawl.py +21 -28
  13. hyperbrowser-0.8.0/hyperbrowser/models/scrape.py +82 -0
  14. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/hyperbrowser/models/session.py +22 -4
  15. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/pyproject.toml +1 -1
  16. hyperbrowser-0.7.0/hyperbrowser/client/async_client.py +0 -97
  17. hyperbrowser-0.7.0/hyperbrowser/client/sync.py +0 -85
  18. hyperbrowser-0.7.0/hyperbrowser/models/scrape.py +0 -74
  19. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/LICENSE +0 -0
  20. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/hyperbrowser/__init__.py +0 -0
  21. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/hyperbrowser/client/base.py +0 -0
  22. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/hyperbrowser/config.py +0 -0
  23. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/hyperbrowser/exceptions.py +0 -0
  24. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/hyperbrowser/transport/async_transport.py +0 -0
  25. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/hyperbrowser/transport/base.py +0 -0
  26. {hyperbrowser-0.7.0 → hyperbrowser-0.8.0}/hyperbrowser/transport/sync.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hyperbrowser
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Python SDK for hyperbrowser
5
5
  Home-page: https://github.com/hyperbrowserai/python-sdk
6
6
  License: MIT
@@ -52,9 +52,9 @@ HYPERBROWSER_API_KEY = "test-key"
52
52
 
53
53
  async def main():
54
54
  async with AsyncHyperbrowser(api_key=HYPERBROWSER_API_KEY) as client:
55
- session = await client.create_session()
55
+ session = await client.sessions.create()
56
56
 
57
- ws_endpoint = session.websocket_url
57
+ ws_endpoint = session.ws_endpoint
58
58
  browser = await connect(browserWSEndpoint=ws_endpoint, defaultViewport=None)
59
59
 
60
60
  # Get pages
@@ -72,7 +72,7 @@ async def main():
72
72
 
73
73
  await page.close()
74
74
  await browser.disconnect()
75
- await client.stop_session(session.id)
75
+ await client.sessions.stop(session.id)
76
76
  print("Session completed!")
77
77
 
78
78
  # Run the asyncio event loop
@@ -88,9 +88,9 @@ HYPERBROWSER_API_KEY = "test-key"
88
88
 
89
89
  def main():
90
90
  client = Hyperbrowser(api_key=HYPERBROWSER_API_KEY)
91
- session = client.create_session()
91
+ session = client.sessions.create()
92
92
 
93
- ws_endpoint = session.websocket_url
93
+ ws_endpoint = session.ws_endpoint
94
94
 
95
95
  # Launch Playwright and connect to the remote browser
96
96
  with sync_playwright() as p:
@@ -112,7 +112,7 @@ def main():
112
112
  page.close()
113
113
  browser.close()
114
114
  print("Session completed!")
115
- client.stop_session(session.id)
115
+ client.sessions.stop(session.id)
116
116
 
117
117
  # Run the asyncio event loop
118
118
  main()
@@ -31,9 +31,9 @@ HYPERBROWSER_API_KEY = "test-key"
31
31
 
32
32
  async def main():
33
33
  async with AsyncHyperbrowser(api_key=HYPERBROWSER_API_KEY) as client:
34
- session = await client.create_session()
34
+ session = await client.sessions.create()
35
35
 
36
- ws_endpoint = session.websocket_url
36
+ ws_endpoint = session.ws_endpoint
37
37
  browser = await connect(browserWSEndpoint=ws_endpoint, defaultViewport=None)
38
38
 
39
39
  # Get pages
@@ -51,7 +51,7 @@ async def main():
51
51
 
52
52
  await page.close()
53
53
  await browser.disconnect()
54
- await client.stop_session(session.id)
54
+ await client.sessions.stop(session.id)
55
55
  print("Session completed!")
56
56
 
57
57
  # Run the asyncio event loop
@@ -67,9 +67,9 @@ HYPERBROWSER_API_KEY = "test-key"
67
67
 
68
68
  def main():
69
69
  client = Hyperbrowser(api_key=HYPERBROWSER_API_KEY)
70
- session = client.create_session()
70
+ session = client.sessions.create()
71
71
 
72
- ws_endpoint = session.websocket_url
72
+ ws_endpoint = session.ws_endpoint
73
73
 
74
74
  # Launch Playwright and connect to the remote browser
75
75
  with sync_playwright() as p:
@@ -91,7 +91,7 @@ def main():
91
91
  page.close()
92
92
  browser.close()
93
93
  print("Session completed!")
94
- client.stop_session(session.id)
94
+ client.sessions.stop(session.id)
95
95
 
96
96
  # Run the asyncio event loop
97
97
  main()
@@ -0,0 +1,31 @@
1
+ from typing import Optional
2
+ from .managers.async_manager.session import SessionManager
3
+ from .managers.async_manager.scrape import ScrapeManager
4
+ from .managers.async_manager.crawl import CrawlManager
5
+ from .base import HyperbrowserBase
6
+ from ..transport.async_transport import AsyncTransport
7
+ from ..config import ClientConfig
8
+
9
+
10
+ class AsyncHyperbrowser(HyperbrowserBase):
11
+ """Asynchronous Hyperbrowser client"""
12
+
13
+ def __init__(
14
+ self,
15
+ config: Optional[ClientConfig] = None,
16
+ api_key: Optional[str] = None,
17
+ base_url: Optional[str] = None,
18
+ ):
19
+ super().__init__(AsyncTransport, config, api_key, base_url)
20
+ self.sessions = SessionManager(self)
21
+ self.scrape = ScrapeManager(self)
22
+ self.crawl = CrawlManager(self)
23
+
24
+ async def close(self) -> None:
25
+ await self.transport.close()
26
+
27
+ async def __aenter__(self):
28
+ return self
29
+
30
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
31
+ await self.close()
@@ -0,0 +1,60 @@
1
+ import asyncio
2
+ from typing import Optional
3
+ from ....models.crawl import (
4
+ CrawlJobResponse,
5
+ GetCrawlJobParams,
6
+ StartCrawlJobParams,
7
+ StartCrawlJobResponse,
8
+ )
9
+ from ....exceptions import HyperbrowserError
10
+
11
+
12
+ class CrawlManager:
13
+ def __init__(self, client):
14
+ self._client = client
15
+
16
+ async def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
17
+ response = await self._client.transport.post(
18
+ self._client._build_url("/crawl"),
19
+ data=params.model_dump(exclude_none=True, by_alias=True),
20
+ )
21
+ return StartCrawlJobResponse(**response.data)
22
+
23
+ async def get(
24
+ self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
25
+ ) -> CrawlJobResponse:
26
+ response = await self._client.transport.get(
27
+ self._client._build_url(f"/crawl/{job_id}"), params=params.__dict__
28
+ )
29
+ return CrawlJobResponse(**response.data)
30
+
31
+ async def start_and_wait(
32
+ self, params: StartCrawlJobParams, return_all_pages: bool = True
33
+ ) -> CrawlJobResponse:
34
+ job_start_resp = await self.start(params)
35
+ if not job_start_resp.job_id:
36
+ raise HyperbrowserError("Failed to start crawl job")
37
+
38
+ job_response: CrawlJobResponse
39
+ while True:
40
+ job_response = await self.get(job_start_resp.job_id)
41
+ if job_response.status == "completed" or job_response.status == "failed":
42
+ break
43
+ await asyncio.sleep(2)
44
+
45
+ if not return_all_pages:
46
+ return job_response
47
+
48
+ while job_response.current_page_batch < job_response.total_page_batches:
49
+ tmp_job_response = await self.get(
50
+ job_start_resp.job_id,
51
+ GetCrawlJobParams(page=job_response.current_page_batch + 1),
52
+ )
53
+ if tmp_job_response.data:
54
+ job_response.data.extend(tmp_job_response.data)
55
+ job_response.current_page_batch = tmp_job_response.current_page_batch
56
+ job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
57
+ job_response.total_page_batches = tmp_job_response.total_page_batches
58
+ job_response.batch_size = tmp_job_response.batch_size
59
+ await asyncio.sleep(0.5)
60
+ return job_response
@@ -0,0 +1,36 @@
1
+ import asyncio
2
+ from typing import Optional
3
+ from ....models.scrape import (
4
+ ScrapeJobResponse,
5
+ StartScrapeJobParams,
6
+ StartScrapeJobResponse,
7
+ )
8
+ from ....exceptions import HyperbrowserError
9
+
10
+
11
+ class ScrapeManager:
12
+ def __init__(self, client):
13
+ self._client = client
14
+
15
+ async def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
16
+ response = await self._client.transport.post(
17
+ self._client._build_url("/scrape"),
18
+ data=params.model_dump(exclude_none=True, by_alias=True),
19
+ )
20
+ return StartScrapeJobResponse(**response.data)
21
+
22
+ async def get(self, job_id: str) -> ScrapeJobResponse:
23
+ response = await self._client.transport.get(
24
+ self._client._build_url(f"/scrape/{job_id}")
25
+ )
26
+ return ScrapeJobResponse(**response.data)
27
+
28
+ async def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
29
+ job_start_resp = await self.start(params)
30
+ if not job_start_resp.job_id:
31
+ raise HyperbrowserError("Failed to start scrape job")
32
+ while True:
33
+ job_response = await self.get(job_start_resp.job_id)
34
+ if job_response.status == "completed" or job_response.status == "failed":
35
+ return job_response
36
+ await asyncio.sleep(2)
@@ -0,0 +1,47 @@
1
+ from typing import List
2
+ from ....models.session import (
3
+ BasicResponse,
4
+ CreateSessionParams,
5
+ SessionDetail,
6
+ SessionListParams,
7
+ SessionListResponse,
8
+ SessionRecording,
9
+ )
10
+
11
+
12
+ class SessionManager:
13
+ def __init__(self, client):
14
+ self._client = client
15
+
16
+ async def create(self, params: CreateSessionParams) -> SessionDetail:
17
+ response = await self._client.transport.post(
18
+ self._client._build_url("/session"),
19
+ data=params.model_dump(exclude_none=True, by_alias=True),
20
+ )
21
+ return SessionDetail(**response.data)
22
+
23
+ async def get(self, id: str) -> SessionDetail:
24
+ response = await self._client.transport.get(
25
+ self._client._build_url(f"/session/{id}")
26
+ )
27
+ return SessionDetail(**response.data)
28
+
29
+ async def stop(self, id: str) -> BasicResponse:
30
+ response = await self._client.transport.put(
31
+ self._client._build_url(f"/session/{id}/stop")
32
+ )
33
+ return BasicResponse(**response.data)
34
+
35
+ async def list(
36
+ self, params: SessionListParams = SessionListParams()
37
+ ) -> SessionListResponse:
38
+ response = await self._client.transport.get(
39
+ self._client._build_url("/sessions"), params=params.__dict__
40
+ )
41
+ return SessionListResponse(**response.data)
42
+
43
+ async def get_recording(self, id: str) -> List[SessionRecording]:
44
+ response = await self._client.transport.get(
45
+ self._client._build_url(f"/session/{id}/recording")
46
+ )
47
+ return [SessionRecording(**recording) for recording in response.data]
@@ -0,0 +1,60 @@
1
+ import time
2
+ from typing import Optional
3
+ from ....models.crawl import (
4
+ CrawlJobResponse,
5
+ GetCrawlJobParams,
6
+ StartCrawlJobParams,
7
+ StartCrawlJobResponse,
8
+ )
9
+ from ....exceptions import HyperbrowserError
10
+
11
+
12
+ class CrawlManager:
13
+ def __init__(self, client):
14
+ self._client = client
15
+
16
+ def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
17
+ response = self._client.transport.post(
18
+ self._client._build_url("/crawl"),
19
+ data=params.model_dump(exclude_none=True, by_alias=True),
20
+ )
21
+ return StartCrawlJobResponse(**response.data)
22
+
23
+ def get(
24
+ self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
25
+ ) -> CrawlJobResponse:
26
+ response = self._client.transport.get(
27
+ self._client._build_url(f"/crawl/{job_id}"), params=params.__dict__
28
+ )
29
+ return CrawlJobResponse(**response.data)
30
+
31
+ def start_and_wait(
32
+ self, params: StartCrawlJobParams, return_all_pages: bool = True
33
+ ) -> CrawlJobResponse:
34
+ job_start_resp = self.start(params)
35
+ if not job_start_resp.job_id:
36
+ raise HyperbrowserError("Failed to start crawl job")
37
+
38
+ job_response: CrawlJobResponse
39
+ while True:
40
+ job_response = self.get(job_start_resp.job_id)
41
+ if job_response.status == "completed" or job_response.status == "failed":
42
+ break
43
+ time.sleep(2)
44
+
45
+ if not return_all_pages:
46
+ return job_response
47
+
48
+ while job_response.current_page_batch < job_response.total_page_batches:
49
+ tmp_job_response = self.get(
50
+ job_start_resp.job_id,
51
+ GetCrawlJobParams(page=job_response.current_page_batch + 1),
52
+ )
53
+ if tmp_job_response.data:
54
+ job_response.data.extend(tmp_job_response.data)
55
+ job_response.current_page_batch = tmp_job_response.current_page_batch
56
+ job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
57
+ job_response.total_page_batches = tmp_job_response.total_page_batches
58
+ job_response.batch_size = tmp_job_response.batch_size
59
+ time.sleep(0.5)
60
+ return job_response
@@ -0,0 +1,36 @@
1
+ import time
2
+ from typing import Optional
3
+ from ....models.scrape import (
4
+ ScrapeJobResponse,
5
+ StartScrapeJobParams,
6
+ StartScrapeJobResponse,
7
+ )
8
+ from ....exceptions import HyperbrowserError
9
+
10
+
11
+ class ScrapeManager:
12
+ def __init__(self, client):
13
+ self._client = client
14
+
15
+ def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
16
+ response = self._client.transport.post(
17
+ self._client._build_url("/scrape"),
18
+ data=params.model_dump(exclude_none=True, by_alias=True),
19
+ )
20
+ return StartScrapeJobResponse(**response.data)
21
+
22
+ def get(self, job_id: str) -> ScrapeJobResponse:
23
+ response = self._client.transport.get(
24
+ self._client._build_url(f"/scrape/{job_id}")
25
+ )
26
+ return ScrapeJobResponse(**response.data)
27
+
28
+ def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
29
+ job_start_resp = self.start(params)
30
+ if not job_start_resp.job_id:
31
+ raise HyperbrowserError("Failed to start scrape job")
32
+ while True:
33
+ job_response = self.get(job_start_resp.job_id)
34
+ if job_response.status == "completed" or job_response.status == "failed":
35
+ return job_response
36
+ time.sleep(2)
@@ -0,0 +1,45 @@
1
+ from typing import List
2
+ from ....models.session import (
3
+ BasicResponse,
4
+ CreateSessionParams,
5
+ SessionDetail,
6
+ SessionListParams,
7
+ SessionListResponse,
8
+ SessionRecording,
9
+ )
10
+
11
+
12
+ class SessionManager:
13
+ def __init__(self, client):
14
+ self._client = client
15
+
16
+ def create(self, params: CreateSessionParams) -> SessionDetail:
17
+ response = self._client.transport.post(
18
+ self._client._build_url("/session"),
19
+ data=params.model_dump(exclude_none=True, by_alias=True),
20
+ )
21
+ return SessionDetail(**response.data)
22
+
23
+ def get(self, id: str) -> SessionDetail:
24
+ response = self._client.transport.get(self._client._build_url(f"/session/{id}"))
25
+ return SessionDetail(**response.data)
26
+
27
+ def stop(self, id: str) -> BasicResponse:
28
+ response = self._client.transport.put(
29
+ self._client._build_url(f"/session/{id}/stop")
30
+ )
31
+ return BasicResponse(**response.data)
32
+
33
+ def list(
34
+ self, params: SessionListParams = SessionListParams()
35
+ ) -> SessionListResponse:
36
+ response = self._client.transport.get(
37
+ self._client._build_url("/sessions"), params=params.__dict__
38
+ )
39
+ return SessionListResponse(**response.data)
40
+
41
+ def get_recording(self, id: str) -> List[SessionRecording]:
42
+ response = self._client.transport.get(
43
+ self._client._build_url(f"/session/{id}/recording")
44
+ )
45
+ return [SessionRecording(**recording) for recording in response.data]
@@ -0,0 +1,25 @@
1
+ from typing import Optional
2
+ from .managers.sync_manager.session import SessionManager
3
+ from .managers.sync_manager.scrape import ScrapeManager
4
+ from .managers.sync_manager.crawl import CrawlManager
5
+ from .base import HyperbrowserBase
6
+ from ..transport.sync import SyncTransport
7
+ from ..config import ClientConfig
8
+
9
+
10
+ class Hyperbrowser(HyperbrowserBase):
11
+ """Synchronous Hyperbrowser client"""
12
+
13
+ def __init__(
14
+ self,
15
+ config: Optional[ClientConfig] = None,
16
+ api_key: Optional[str] = None,
17
+ base_url: Optional[str] = None,
18
+ ):
19
+ super().__init__(SyncTransport, config, api_key, base_url)
20
+ self.sessions = SessionManager(self)
21
+ self.scrape = ScrapeManager(self)
22
+ self.crawl = CrawlManager(self)
23
+
24
+ def close(self) -> None:
25
+ self.transport.close()
@@ -1,5 +1,7 @@
1
1
  from typing import Literal
2
2
 
3
+ ScrapeFormat = Literal["markdown", "html", "links"]
4
+
3
5
  Country = Literal[
4
6
  "AD",
5
7
  "AE",
@@ -1,7 +1,11 @@
1
- from typing import List, Literal, Optional
1
+ from typing import List, Literal, Optional, Union
2
2
  from pydantic import BaseModel, ConfigDict, Field
3
3
 
4
+ from hyperbrowser.models.scrape import ScrapeOptions
5
+ from hyperbrowser.models.session import CreateSessionParams
6
+
4
7
  CrawlJobStatus = Literal["pending", "running", "completed", "failed"]
8
+ CrawlPageStatus = Literal["completed", "failed"]
5
9
 
6
10
 
7
11
  class StartCrawlJobParams(BaseModel):
@@ -14,16 +18,21 @@ class StartCrawlJobParams(BaseModel):
14
18
  )
15
19
 
16
20
  url: str
17
- max_pages: int = Field(default=10, ge=1, le=50, serialization_alias="maxPages")
21
+ max_pages: int = Field(default=10, ge=1, serialization_alias="maxPages")
18
22
  follow_links: bool = Field(default=True, serialization_alias="followLinks")
23
+ ignore_sitemap: bool = Field(default=False, serialization_alias="ignoreSitemap")
19
24
  exclude_patterns: List[str] = Field(
20
25
  default=[], serialization_alias="excludePatterns"
21
26
  )
22
27
  include_patterns: List[str] = Field(
23
28
  default=[], serialization_alias="includePatterns"
24
29
  )
25
- use_proxy: bool = Field(default=False, serialization_alias="useProxy")
26
- solve_captchas: bool = Field(default=False, serialization_alias="solveCaptchas")
30
+ session_options: Optional[CreateSessionParams] = Field(
31
+ default=None, serialization_alias="sessionOptions"
32
+ )
33
+ scrape_options: Optional[ScrapeOptions] = Field(
34
+ default=None, serialization_alias="scrapeOptions"
35
+ )
27
36
 
28
37
 
29
38
  class StartCrawlJobResponse(BaseModel):
@@ -38,35 +47,18 @@ class StartCrawlJobResponse(BaseModel):
38
47
  job_id: str = Field(alias="jobId")
39
48
 
40
49
 
41
- class CrawledPageMetadata(BaseModel):
42
- """
43
- Metadata for the crawled page.
44
- """
45
-
46
- model_config = ConfigDict(
47
- populate_by_alias=True,
48
- )
49
-
50
- title: str
51
- description: str
52
- robots: str
53
- og_title: str = Field(alias="ogTitle")
54
- og_description: str = Field(alias="ogDescription")
55
- og_url: str = Field(alias="ogUrl")
56
- og_image: str = Field(alias="ogImage")
57
- og_locale_alternate: List[str] = Field(alias="ogLocaleAlternate")
58
- og_site_name: str = Field(alias="ogSiteName")
59
- source_url: str = Field(alias="sourceURL")
60
-
61
-
62
50
  class CrawledPage(BaseModel):
63
51
  """
64
52
  Data from a crawled page.
65
53
  """
66
54
 
67
- metadata: CrawledPageMetadata
68
- markdown: str
55
+ metadata: Optional[dict[str, Union[str, list[str]]]] = None
56
+ html: Optional[str] = None
57
+ markdown: Optional[str] = None
58
+ links: Optional[List[str]] = None
69
59
  url: str
60
+ status: CrawlPageStatus
61
+ error: Optional[str] = None
70
62
 
71
63
 
72
64
  class GetCrawlJobParams(BaseModel):
@@ -76,7 +68,7 @@ class GetCrawlJobParams(BaseModel):
76
68
 
77
69
  page: Optional[int] = Field(default=None, serialization_alias="page")
78
70
  batch_size: Optional[int] = Field(
79
- default=20, ge=1, le=50, serialization_alias="batchSize"
71
+ default=20, ge=1, le=30, serialization_alias="batchSize"
80
72
  )
81
73
 
82
74
 
@@ -89,6 +81,7 @@ class CrawlJobResponse(BaseModel):
89
81
  populate_by_alias=True,
90
82
  )
91
83
 
84
+ job_id: str = Field(alias="jobId")
92
85
  status: CrawlJobStatus
93
86
  error: Optional[str] = None
94
87
  data: List[CrawledPage] = Field(alias="data")
@@ -0,0 +1,82 @@
1
+ from typing import List, Literal, Optional, Union
2
+ from pydantic import BaseModel, ConfigDict, Field
3
+
4
+ from hyperbrowser.models.consts import ScrapeFormat
5
+ from hyperbrowser.models.session import CreateSessionParams
6
+
7
+ ScrapeJobStatus = Literal["pending", "running", "completed", "failed"]
8
+
9
+
10
+ class ScrapeOptions(BaseModel):
11
+ """
12
+ Options for scraping a page.
13
+ """
14
+
15
+ formats: Optional[List[ScrapeFormat]] = None
16
+ include_tags: Optional[List[str]] = Field(
17
+ default=None, serialization_alias="includeTags"
18
+ )
19
+ exclude_tags: Optional[List[str]] = Field(
20
+ default=None, serialization_alias="excludeTags"
21
+ )
22
+ only_main_content: Optional[bool] = Field(
23
+ default=None, serialization_alias="onlyMainContent"
24
+ )
25
+ wait_for: Optional[int] = Field(default=None, serialization_alias="waitFor")
26
+ timeout: Optional[int] = Field(default=None, serialization_alias="timeout")
27
+
28
+
29
+ class StartScrapeJobParams(BaseModel):
30
+ """
31
+ Parameters for creating a new scrape job.
32
+ """
33
+
34
+ model_config = ConfigDict(
35
+ populate_by_alias=True,
36
+ )
37
+
38
+ url: str
39
+ session_options: Optional[CreateSessionParams] = Field(
40
+ default=None, serialization_alias="sessionOptions"
41
+ )
42
+ scrape_options: Optional[ScrapeOptions] = Field(
43
+ default=None, serialization_alias="scrapeOptions"
44
+ )
45
+
46
+
47
+ class StartScrapeJobResponse(BaseModel):
48
+ """
49
+ Response from creating a scrape job.
50
+ """
51
+
52
+ model_config = ConfigDict(
53
+ populate_by_alias=True,
54
+ )
55
+
56
+ job_id: str = Field(alias="jobId")
57
+
58
+
59
+ class ScrapeJobData(BaseModel):
60
+ """
61
+ Data from a scraped site.
62
+ """
63
+
64
+ metadata: Optional[dict[str, Union[str, list[str]]]] = None
65
+ html: Optional[str] = None
66
+ markdown: Optional[str] = None
67
+ links: Optional[List[str]] = None
68
+
69
+
70
+ class ScrapeJobResponse(BaseModel):
71
+ """
72
+ Response from getting a scrape job.
73
+ """
74
+
75
+ model_config = ConfigDict(
76
+ populate_by_alias=True,
77
+ )
78
+
79
+ job_id: str = Field(alias="jobId")
80
+ status: ScrapeJobStatus
81
+ error: Optional[str] = None
82
+ data: Optional[ScrapeJobData] = None
@@ -1,4 +1,4 @@
1
- from typing import List, Literal, Optional, Union
1
+ from typing import Any, List, Literal, Optional, Union
2
2
  from datetime import datetime
3
3
  from pydantic import BaseModel, Field, ConfigDict, field_validator
4
4
 
@@ -50,7 +50,7 @@ class SessionDetail(Session):
50
50
  Detailed session information including websocket endpoint.
51
51
  """
52
52
 
53
- websocket_url: Optional[str] = Field(alias="wsEndpoint", default=None)
53
+ ws_endpoint: Optional[str] = Field(alias="wsEndpoint", default=None)
54
54
 
55
55
 
56
56
  class SessionListParams(BaseModel):
@@ -96,8 +96,8 @@ class ScreenConfig(BaseModel):
96
96
  Screen configuration parameters for browser session.
97
97
  """
98
98
 
99
- width: int = Field(default=1280, le=3840, ge=640, serialization_alias="width")
100
- height: int = Field(default=720, le=2160, ge=360, serialization_alias="height")
99
+ width: int = Field(default=1280, serialization_alias="width")
100
+ height: int = Field(default=720, serialization_alias="height")
101
101
 
102
102
 
103
103
  class CreateSessionParams(BaseModel):
@@ -132,3 +132,21 @@ class CreateSessionParams(BaseModel):
132
132
  adblock: bool = Field(default=False, serialization_alias="adblock")
133
133
  trackers: bool = Field(default=False, serialization_alias="trackers")
134
134
  annoyances: bool = Field(default=False, serialization_alias="annoyances")
135
+ enable_web_recording: Optional[bool] = Field(
136
+ default=False, serialization_alias="enableWebRecording"
137
+ )
138
+
139
+
140
+ class SessionRecording(BaseModel):
141
+ """
142
+ Model for session recording data.
143
+ """
144
+
145
+ model_config = ConfigDict(
146
+ populate_by_alias=True,
147
+ )
148
+
149
+ type: int
150
+ data: Any
151
+ timestamp: int
152
+ delay: Optional[int] = None
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "hyperbrowser"
3
- version = "0.7.0"
3
+ version = "0.8.0"
4
4
  description = "Python SDK for hyperbrowser"
5
5
  authors = ["Nikhil Shahi <nshahi1998@gmail.com>"]
6
6
  license = "MIT"
@@ -1,97 +0,0 @@
1
- from typing import Optional
2
-
3
- from hyperbrowser.models.crawl import (
4
- CrawlJobResponse,
5
- GetCrawlJobParams,
6
- StartCrawlJobParams,
7
- StartCrawlJobResponse,
8
- )
9
- from hyperbrowser.models.scrape import (
10
- ScrapeJobResponse,
11
- StartScrapeJobParams,
12
- StartScrapeJobResponse,
13
- )
14
- from ..transport.async_transport import AsyncTransport
15
- from .base import HyperbrowserBase
16
- from ..models.session import (
17
- BasicResponse,
18
- CreateSessionParams,
19
- SessionDetail,
20
- SessionListParams,
21
- SessionListResponse,
22
- )
23
- from ..config import ClientConfig
24
-
25
-
26
- class AsyncHyperbrowser(HyperbrowserBase):
27
- """Asynchronous Hyperbrowser client"""
28
-
29
- def __init__(
30
- self,
31
- config: Optional[ClientConfig] = None,
32
- api_key: Optional[str] = None,
33
- base_url: Optional[str] = None,
34
- ):
35
- super().__init__(AsyncTransport, config, api_key, base_url)
36
-
37
- async def create_session(self, params: CreateSessionParams) -> SessionDetail:
38
- response = await self.transport.post(
39
- self._build_url("/session"),
40
- data=params.model_dump(exclude_none=True, by_alias=True),
41
- )
42
- return SessionDetail(**response.data)
43
-
44
- async def get_session(self, id: str) -> SessionDetail:
45
- response = await self.transport.get(self._build_url(f"/session/{id}"))
46
- return SessionDetail(**response.data)
47
-
48
- async def stop_session(self, id: str) -> BasicResponse:
49
- response = await self.transport.put(self._build_url(f"/session/{id}/stop"))
50
- return BasicResponse(**response.data)
51
-
52
- async def get_session_list(
53
- self, params: SessionListParams = SessionListParams()
54
- ) -> SessionListResponse:
55
- response = await self.transport.get(
56
- self._build_url("/sessions"), params=params.__dict__
57
- )
58
- return SessionListResponse(**response.data)
59
-
60
- async def start_scrape_job(
61
- self, params: StartScrapeJobParams
62
- ) -> StartScrapeJobResponse:
63
- response = await self.transport.post(
64
- self._build_url("/scrape"),
65
- data=params.model_dump(exclude_none=True, by_alias=True),
66
- )
67
- return StartScrapeJobResponse(**response.data)
68
-
69
- async def get_scrape_job(self, job_id: str) -> ScrapeJobResponse:
70
- response = await self.transport.get(self._build_url(f"/scrape/{job_id}"))
71
- return ScrapeJobResponse(**response.data)
72
-
73
- async def start_crawl_job(
74
- self, params: StartCrawlJobParams
75
- ) -> StartCrawlJobResponse:
76
- response = await self.transport.post(
77
- self._build_url("/crawl"),
78
- data=params.model_dump(exclude_none=True, by_alias=True),
79
- )
80
- return StartCrawlJobResponse(**response.data)
81
-
82
- async def get_crawl_job(
83
- self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
84
- ) -> CrawlJobResponse:
85
- response = await self.transport.get(
86
- self._build_url(f"/crawl/{job_id}"), params=params.__dict__
87
- )
88
- return CrawlJobResponse(**response.data)
89
-
90
- async def close(self) -> None:
91
- await self.transport.close()
92
-
93
- async def __aenter__(self):
94
- return self
95
-
96
- async def __aexit__(self, exc_type, exc_val, exc_tb):
97
- await self.close()
@@ -1,85 +0,0 @@
1
- from typing import Optional
2
-
3
- from hyperbrowser.models.crawl import (
4
- CrawlJobResponse,
5
- GetCrawlJobParams,
6
- StartCrawlJobParams,
7
- StartCrawlJobResponse,
8
- )
9
- from hyperbrowser.models.scrape import (
10
- ScrapeJobResponse,
11
- StartScrapeJobParams,
12
- StartScrapeJobResponse,
13
- )
14
- from ..transport.sync import SyncTransport
15
- from .base import HyperbrowserBase
16
- from ..models.session import (
17
- BasicResponse,
18
- CreateSessionParams,
19
- SessionDetail,
20
- SessionListParams,
21
- SessionListResponse,
22
- )
23
- from ..config import ClientConfig
24
-
25
-
26
- class Hyperbrowser(HyperbrowserBase):
27
- """Synchronous Hyperbrowser client"""
28
-
29
- def __init__(
30
- self,
31
- config: Optional[ClientConfig] = None,
32
- api_key: Optional[str] = None,
33
- base_url: Optional[str] = None,
34
- ):
35
- super().__init__(SyncTransport, config, api_key, base_url)
36
-
37
- def create_session(self, params: CreateSessionParams) -> SessionDetail:
38
- response = self.transport.post(
39
- self._build_url("/session"),
40
- data=params.model_dump(exclude_none=True, by_alias=True),
41
- )
42
- return SessionDetail(**response.data)
43
-
44
- def get_session(self, id: str) -> SessionDetail:
45
- response = self.transport.get(self._build_url(f"/session/{id}"))
46
- return SessionDetail(**response.data)
47
-
48
- def stop_session(self, id: str) -> BasicResponse:
49
- response = self.transport.put(self._build_url(f"/session/{id}/stop"))
50
- return BasicResponse(**response.data)
51
-
52
- def get_session_list(self, params: SessionListParams) -> SessionListResponse:
53
- response = self.transport.get(
54
- self._build_url("/sessions"), params=params.__dict__
55
- )
56
- return SessionListResponse(**response.data)
57
-
58
- def start_scrape_job(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
59
- response = self.transport.post(
60
- self._build_url("/scrape"),
61
- data=params.model_dump(exclude_none=True, by_alias=True),
62
- )
63
- return StartScrapeJobResponse(**response.data)
64
-
65
- def get_scrape_job(self, job_id: str) -> ScrapeJobResponse:
66
- response = self.transport.get(self._build_url(f"/scrape/{job_id}"))
67
- return ScrapeJobResponse(**response.data)
68
-
69
- def start_crawl_job(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
70
- response = self.transport.post(
71
- self._build_url("/crawl"),
72
- data=params.model_dump(exclude_none=True, by_alias=True),
73
- )
74
- return StartCrawlJobResponse(**response.data)
75
-
76
- def get_crawl_job(
77
- self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
78
- ) -> CrawlJobResponse:
79
- response = self.transport.get(
80
- self._build_url(f"/crawl/{job_id}"), params=params.__dict__
81
- )
82
- return CrawlJobResponse(**response.data)
83
-
84
- def close(self) -> None:
85
- self.transport.close()
@@ -1,74 +0,0 @@
1
- from typing import List, Literal, Optional
2
- from pydantic import BaseModel, ConfigDict, Field
3
-
4
- ScrapeJobStatus = Literal["pending", "running", "completed", "failed"]
5
-
6
-
7
- class StartScrapeJobParams(BaseModel):
8
- """
9
- Parameters for creating a new scrape job.
10
- """
11
-
12
- model_config = ConfigDict(
13
- populate_by_alias=True,
14
- )
15
-
16
- url: str
17
- use_proxy: bool = Field(default=False, serialization_alias="useProxy")
18
- solve_captchas: bool = Field(default=False, serialization_alias="solveCaptchas")
19
-
20
-
21
- class StartScrapeJobResponse(BaseModel):
22
- """
23
- Response from creating a scrape job.
24
- """
25
-
26
- model_config = ConfigDict(
27
- populate_by_alias=True,
28
- )
29
-
30
- job_id: str = Field(alias="jobId")
31
-
32
-
33
- class ScrapeJobMetadata(BaseModel):
34
- """
35
- Metadata for the scraped site.
36
- """
37
-
38
- model_config = ConfigDict(
39
- populate_by_alias=True,
40
- )
41
-
42
- title: str
43
- description: str
44
- robots: str
45
- og_title: str = Field(alias="ogTitle")
46
- og_description: str = Field(alias="ogDescription")
47
- og_url: str = Field(alias="ogUrl")
48
- og_image: str = Field(alias="ogImage")
49
- og_locale_alternate: List[str] = Field(alias="ogLocaleAlternate")
50
- og_site_name: str = Field(alias="ogSiteName")
51
- source_url: str = Field(alias="sourceURL")
52
-
53
-
54
- class ScrapeJobData(BaseModel):
55
- """
56
- Data from a scraped site.
57
- """
58
-
59
- metadata: ScrapeJobMetadata
60
- markdown: str
61
-
62
-
63
- class ScrapeJobResponse(BaseModel):
64
- """
65
- Response from getting a scrape job.
66
- """
67
-
68
- model_config = ConfigDict(
69
- populate_by_alias=True,
70
- )
71
-
72
- status: ScrapeJobStatus
73
- error: Optional[str] = None
74
- data: Optional[ScrapeJobData] = None
File without changes