hyperbrowser 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hyperbrowser might be problematic. Click here for more details.

@@ -1,25 +1,9 @@
1
1
  from typing import Optional
2
-
3
- from hyperbrowser.models.crawl import (
4
- CrawlJobResponse,
5
- GetCrawlJobParams,
6
- StartCrawlJobParams,
7
- StartCrawlJobResponse,
8
- )
9
- from hyperbrowser.models.scrape import (
10
- ScrapeJobResponse,
11
- StartScrapeJobParams,
12
- StartScrapeJobResponse,
13
- )
14
- from ..transport.async_transport import AsyncTransport
2
+ from .managers.async_manager.session import SessionManager
3
+ from .managers.async_manager.scrape import ScrapeManager
4
+ from .managers.async_manager.crawl import CrawlManager
15
5
  from .base import HyperbrowserBase
16
- from ..models.session import (
17
- BasicResponse,
18
- CreateSessionParams,
19
- SessionDetail,
20
- SessionListParams,
21
- SessionListResponse,
22
- )
6
+ from ..transport.async_transport import AsyncTransport
23
7
  from ..config import ClientConfig
24
8
 
25
9
 
@@ -33,59 +17,9 @@ class AsyncHyperbrowser(HyperbrowserBase):
33
17
  base_url: Optional[str] = None,
34
18
  ):
35
19
  super().__init__(AsyncTransport, config, api_key, base_url)
36
-
37
- async def create_session(self, params: CreateSessionParams) -> SessionDetail:
38
- response = await self.transport.post(
39
- self._build_url("/session"),
40
- data=params.model_dump(exclude_none=True, by_alias=True),
41
- )
42
- return SessionDetail(**response.data)
43
-
44
- async def get_session(self, id: str) -> SessionDetail:
45
- response = await self.transport.get(self._build_url(f"/session/{id}"))
46
- return SessionDetail(**response.data)
47
-
48
- async def stop_session(self, id: str) -> BasicResponse:
49
- response = await self.transport.put(self._build_url(f"/session/{id}/stop"))
50
- return BasicResponse(**response.data)
51
-
52
- async def get_session_list(
53
- self, params: SessionListParams = SessionListParams()
54
- ) -> SessionListResponse:
55
- response = await self.transport.get(
56
- self._build_url("/sessions"), params=params.__dict__
57
- )
58
- return SessionListResponse(**response.data)
59
-
60
- async def start_scrape_job(
61
- self, params: StartScrapeJobParams
62
- ) -> StartScrapeJobResponse:
63
- response = await self.transport.post(
64
- self._build_url("/scrape"),
65
- data=params.model_dump(exclude_none=True, by_alias=True),
66
- )
67
- return StartScrapeJobResponse(**response.data)
68
-
69
- async def get_scrape_job(self, job_id: str) -> ScrapeJobResponse:
70
- response = await self.transport.get(self._build_url(f"/scrape/{job_id}"))
71
- return ScrapeJobResponse(**response.data)
72
-
73
- async def start_crawl_job(
74
- self, params: StartCrawlJobParams
75
- ) -> StartCrawlJobResponse:
76
- response = await self.transport.post(
77
- self._build_url("/crawl"),
78
- data=params.model_dump(exclude_none=True, by_alias=True),
79
- )
80
- return StartCrawlJobResponse(**response.data)
81
-
82
- async def get_crawl_job(
83
- self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
84
- ) -> CrawlJobResponse:
85
- response = await self.transport.get(
86
- self._build_url(f"/crawl/{job_id}"), params=params.__dict__
87
- )
88
- return CrawlJobResponse(**response.data)
20
+ self.sessions = SessionManager(self)
21
+ self.scrape = ScrapeManager(self)
22
+ self.crawl = CrawlManager(self)
89
23
 
90
24
  async def close(self) -> None:
91
25
  await self.transport.close()
@@ -0,0 +1,60 @@
1
+ import asyncio
2
+ from typing import Optional
3
+ from ....models.crawl import (
4
+ CrawlJobResponse,
5
+ GetCrawlJobParams,
6
+ StartCrawlJobParams,
7
+ StartCrawlJobResponse,
8
+ )
9
+ from ....exceptions import HyperbrowserError
10
+
11
+
12
+ class CrawlManager:
13
+ def __init__(self, client):
14
+ self._client = client
15
+
16
+ async def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
17
+ response = await self._client.transport.post(
18
+ self._client._build_url("/crawl"),
19
+ data=params.model_dump(exclude_none=True, by_alias=True),
20
+ )
21
+ return StartCrawlJobResponse(**response.data)
22
+
23
+ async def get(
24
+ self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
25
+ ) -> CrawlJobResponse:
26
+ response = await self._client.transport.get(
27
+ self._client._build_url(f"/crawl/{job_id}"), params=params.__dict__
28
+ )
29
+ return CrawlJobResponse(**response.data)
30
+
31
+ async def start_and_wait(
32
+ self, params: StartCrawlJobParams, return_all_pages: bool = True
33
+ ) -> CrawlJobResponse:
34
+ job_start_resp = await self.start(params)
35
+ if not job_start_resp.job_id:
36
+ raise HyperbrowserError("Failed to start crawl job")
37
+
38
+ job_response: CrawlJobResponse
39
+ while True:
40
+ job_response = await self.get(job_start_resp.job_id)
41
+ if job_response.status == "completed" or job_response.status == "failed":
42
+ break
43
+ await asyncio.sleep(2)
44
+
45
+ if not return_all_pages:
46
+ return job_response
47
+
48
+ while job_response.current_page_batch < job_response.total_page_batches:
49
+ tmp_job_response = await self.get(
50
+ job_start_resp.job_id,
51
+ GetCrawlJobParams(page=job_response.current_page_batch + 1),
52
+ )
53
+ if tmp_job_response.data:
54
+ job_response.data.extend(tmp_job_response.data)
55
+ job_response.current_page_batch = tmp_job_response.current_page_batch
56
+ job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
57
+ job_response.total_page_batches = tmp_job_response.total_page_batches
58
+ job_response.batch_size = tmp_job_response.batch_size
59
+ await asyncio.sleep(0.5)
60
+ return job_response
@@ -0,0 +1,36 @@
1
+ import asyncio
2
+ from typing import Optional
3
+ from ....models.scrape import (
4
+ ScrapeJobResponse,
5
+ StartScrapeJobParams,
6
+ StartScrapeJobResponse,
7
+ )
8
+ from ....exceptions import HyperbrowserError
9
+
10
+
11
+ class ScrapeManager:
12
+ def __init__(self, client):
13
+ self._client = client
14
+
15
+ async def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
16
+ response = await self._client.transport.post(
17
+ self._client._build_url("/scrape"),
18
+ data=params.model_dump(exclude_none=True, by_alias=True),
19
+ )
20
+ return StartScrapeJobResponse(**response.data)
21
+
22
+ async def get(self, job_id: str) -> ScrapeJobResponse:
23
+ response = await self._client.transport.get(
24
+ self._client._build_url(f"/scrape/{job_id}")
25
+ )
26
+ return ScrapeJobResponse(**response.data)
27
+
28
+ async def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
29
+ job_start_resp = await self.start(params)
30
+ if not job_start_resp.job_id:
31
+ raise HyperbrowserError("Failed to start scrape job")
32
+ while True:
33
+ job_response = await self.get(job_start_resp.job_id)
34
+ if job_response.status == "completed" or job_response.status == "failed":
35
+ return job_response
36
+ await asyncio.sleep(2)
@@ -0,0 +1,47 @@
1
+ from typing import List
2
+ from ....models.session import (
3
+ BasicResponse,
4
+ CreateSessionParams,
5
+ SessionDetail,
6
+ SessionListParams,
7
+ SessionListResponse,
8
+ SessionRecording,
9
+ )
10
+
11
+
12
+ class SessionManager:
13
+ def __init__(self, client):
14
+ self._client = client
15
+
16
+ async def create(self, params: CreateSessionParams) -> SessionDetail:
17
+ response = await self._client.transport.post(
18
+ self._client._build_url("/session"),
19
+ data=params.model_dump(exclude_none=True, by_alias=True),
20
+ )
21
+ return SessionDetail(**response.data)
22
+
23
+ async def get(self, id: str) -> SessionDetail:
24
+ response = await self._client.transport.get(
25
+ self._client._build_url(f"/session/{id}")
26
+ )
27
+ return SessionDetail(**response.data)
28
+
29
+ async def stop(self, id: str) -> BasicResponse:
30
+ response = await self._client.transport.put(
31
+ self._client._build_url(f"/session/{id}/stop")
32
+ )
33
+ return BasicResponse(**response.data)
34
+
35
+ async def list(
36
+ self, params: SessionListParams = SessionListParams()
37
+ ) -> SessionListResponse:
38
+ response = await self._client.transport.get(
39
+ self._client._build_url("/sessions"), params=params.__dict__
40
+ )
41
+ return SessionListResponse(**response.data)
42
+
43
+ async def get_recording(self, id: str) -> List[SessionRecording]:
44
+ response = await self._client.transport.get(
45
+ self._client._build_url(f"/session/{id}/recording")
46
+ )
47
+ return [SessionRecording(**recording) for recording in response.data]
@@ -0,0 +1,60 @@
1
+ import time
2
+ from typing import Optional
3
+ from ....models.crawl import (
4
+ CrawlJobResponse,
5
+ GetCrawlJobParams,
6
+ StartCrawlJobParams,
7
+ StartCrawlJobResponse,
8
+ )
9
+ from ....exceptions import HyperbrowserError
10
+
11
+
12
+ class CrawlManager:
13
+ def __init__(self, client):
14
+ self._client = client
15
+
16
+ def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
17
+ response = self._client.transport.post(
18
+ self._client._build_url("/crawl"),
19
+ data=params.model_dump(exclude_none=True, by_alias=True),
20
+ )
21
+ return StartCrawlJobResponse(**response.data)
22
+
23
+ def get(
24
+ self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
25
+ ) -> CrawlJobResponse:
26
+ response = self._client.transport.get(
27
+ self._client._build_url(f"/crawl/{job_id}"), params=params.__dict__
28
+ )
29
+ return CrawlJobResponse(**response.data)
30
+
31
+ def start_and_wait(
32
+ self, params: StartCrawlJobParams, return_all_pages: bool = True
33
+ ) -> CrawlJobResponse:
34
+ job_start_resp = self.start(params)
35
+ if not job_start_resp.job_id:
36
+ raise HyperbrowserError("Failed to start crawl job")
37
+
38
+ job_response: CrawlJobResponse
39
+ while True:
40
+ job_response = self.get(job_start_resp.job_id)
41
+ if job_response.status == "completed" or job_response.status == "failed":
42
+ break
43
+ time.sleep(2)
44
+
45
+ if not return_all_pages:
46
+ return job_response
47
+
48
+ while job_response.current_page_batch < job_response.total_page_batches:
49
+ tmp_job_response = self.get(
50
+ job_start_resp.job_id,
51
+ GetCrawlJobParams(page=job_response.current_page_batch + 1),
52
+ )
53
+ if tmp_job_response.data:
54
+ job_response.data.extend(tmp_job_response.data)
55
+ job_response.current_page_batch = tmp_job_response.current_page_batch
56
+ job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
57
+ job_response.total_page_batches = tmp_job_response.total_page_batches
58
+ job_response.batch_size = tmp_job_response.batch_size
59
+ time.sleep(0.5)
60
+ return job_response
@@ -0,0 +1,36 @@
1
+ import time
2
+ from typing import Optional
3
+ from ....models.scrape import (
4
+ ScrapeJobResponse,
5
+ StartScrapeJobParams,
6
+ StartScrapeJobResponse,
7
+ )
8
+ from ....exceptions import HyperbrowserError
9
+
10
+
11
+ class ScrapeManager:
12
+ def __init__(self, client):
13
+ self._client = client
14
+
15
+ def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
16
+ response = self._client.transport.post(
17
+ self._client._build_url("/scrape"),
18
+ data=params.model_dump(exclude_none=True, by_alias=True),
19
+ )
20
+ return StartScrapeJobResponse(**response.data)
21
+
22
+ def get(self, job_id: str) -> ScrapeJobResponse:
23
+ response = self._client.transport.get(
24
+ self._client._build_url(f"/scrape/{job_id}")
25
+ )
26
+ return ScrapeJobResponse(**response.data)
27
+
28
+ def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
29
+ job_start_resp = self.start(params)
30
+ if not job_start_resp.job_id:
31
+ raise HyperbrowserError("Failed to start scrape job")
32
+ while True:
33
+ job_response = self.get(job_start_resp.job_id)
34
+ if job_response.status == "completed" or job_response.status == "failed":
35
+ return job_response
36
+ time.sleep(2)
@@ -0,0 +1,45 @@
1
+ from typing import List
2
+ from ....models.session import (
3
+ BasicResponse,
4
+ CreateSessionParams,
5
+ SessionDetail,
6
+ SessionListParams,
7
+ SessionListResponse,
8
+ SessionRecording,
9
+ )
10
+
11
+
12
+ class SessionManager:
13
+ def __init__(self, client):
14
+ self._client = client
15
+
16
+ def create(self, params: CreateSessionParams) -> SessionDetail:
17
+ response = self._client.transport.post(
18
+ self._client._build_url("/session"),
19
+ data=params.model_dump(exclude_none=True, by_alias=True),
20
+ )
21
+ return SessionDetail(**response.data)
22
+
23
+ def get(self, id: str) -> SessionDetail:
24
+ response = self._client.transport.get(self._client._build_url(f"/session/{id}"))
25
+ return SessionDetail(**response.data)
26
+
27
+ def stop(self, id: str) -> BasicResponse:
28
+ response = self._client.transport.put(
29
+ self._client._build_url(f"/session/{id}/stop")
30
+ )
31
+ return BasicResponse(**response.data)
32
+
33
+ def list(
34
+ self, params: SessionListParams = SessionListParams()
35
+ ) -> SessionListResponse:
36
+ response = self._client.transport.get(
37
+ self._client._build_url("/sessions"), params=params.__dict__
38
+ )
39
+ return SessionListResponse(**response.data)
40
+
41
+ def get_recording(self, id: str) -> List[SessionRecording]:
42
+ response = self._client.transport.get(
43
+ self._client._build_url(f"/session/{id}/recording")
44
+ )
45
+ return [SessionRecording(**recording) for recording in response.data]
@@ -1,25 +1,9 @@
1
1
  from typing import Optional
2
-
3
- from hyperbrowser.models.crawl import (
4
- CrawlJobResponse,
5
- GetCrawlJobParams,
6
- StartCrawlJobParams,
7
- StartCrawlJobResponse,
8
- )
9
- from hyperbrowser.models.scrape import (
10
- ScrapeJobResponse,
11
- StartScrapeJobParams,
12
- StartScrapeJobResponse,
13
- )
14
- from ..transport.sync import SyncTransport
2
+ from .managers.sync_manager.session import SessionManager
3
+ from .managers.sync_manager.scrape import ScrapeManager
4
+ from .managers.sync_manager.crawl import CrawlManager
15
5
  from .base import HyperbrowserBase
16
- from ..models.session import (
17
- BasicResponse,
18
- CreateSessionParams,
19
- SessionDetail,
20
- SessionListParams,
21
- SessionListResponse,
22
- )
6
+ from ..transport.sync import SyncTransport
23
7
  from ..config import ClientConfig
24
8
 
25
9
 
@@ -33,53 +17,9 @@ class Hyperbrowser(HyperbrowserBase):
33
17
  base_url: Optional[str] = None,
34
18
  ):
35
19
  super().__init__(SyncTransport, config, api_key, base_url)
36
-
37
- def create_session(self, params: CreateSessionParams) -> SessionDetail:
38
- response = self.transport.post(
39
- self._build_url("/session"),
40
- data=params.model_dump(exclude_none=True, by_alias=True),
41
- )
42
- return SessionDetail(**response.data)
43
-
44
- def get_session(self, id: str) -> SessionDetail:
45
- response = self.transport.get(self._build_url(f"/session/{id}"))
46
- return SessionDetail(**response.data)
47
-
48
- def stop_session(self, id: str) -> BasicResponse:
49
- response = self.transport.put(self._build_url(f"/session/{id}/stop"))
50
- return BasicResponse(**response.data)
51
-
52
- def get_session_list(self, params: SessionListParams) -> SessionListResponse:
53
- response = self.transport.get(
54
- self._build_url("/sessions"), params=params.__dict__
55
- )
56
- return SessionListResponse(**response.data)
57
-
58
- def start_scrape_job(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
59
- response = self.transport.post(
60
- self._build_url("/scrape"),
61
- data=params.model_dump(exclude_none=True, by_alias=True),
62
- )
63
- return StartScrapeJobResponse(**response.data)
64
-
65
- def get_scrape_job(self, job_id: str) -> ScrapeJobResponse:
66
- response = self.transport.get(self._build_url(f"/scrape/{job_id}"))
67
- return ScrapeJobResponse(**response.data)
68
-
69
- def start_crawl_job(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
70
- response = self.transport.post(
71
- self._build_url("/crawl"),
72
- data=params.model_dump(exclude_none=True, by_alias=True),
73
- )
74
- return StartCrawlJobResponse(**response.data)
75
-
76
- def get_crawl_job(
77
- self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
78
- ) -> CrawlJobResponse:
79
- response = self.transport.get(
80
- self._build_url(f"/crawl/{job_id}"), params=params.__dict__
81
- )
82
- return CrawlJobResponse(**response.data)
20
+ self.sessions = SessionManager(self)
21
+ self.scrape = ScrapeManager(self)
22
+ self.crawl = CrawlManager(self)
83
23
 
84
24
  def close(self) -> None:
85
25
  self.transport.close()
@@ -1,5 +1,7 @@
1
1
  from typing import Literal
2
2
 
3
+ ScrapeFormat = Literal["markdown", "html", "links"]
4
+
3
5
  Country = Literal[
4
6
  "AD",
5
7
  "AE",
@@ -1,7 +1,11 @@
1
- from typing import List, Literal, Optional
1
+ from typing import List, Literal, Optional, Union
2
2
  from pydantic import BaseModel, ConfigDict, Field
3
3
 
4
+ from hyperbrowser.models.scrape import ScrapeOptions
5
+ from hyperbrowser.models.session import CreateSessionParams
6
+
4
7
  CrawlJobStatus = Literal["pending", "running", "completed", "failed"]
8
+ CrawlPageStatus = Literal["completed", "failed"]
5
9
 
6
10
 
7
11
  class StartCrawlJobParams(BaseModel):
@@ -14,16 +18,21 @@ class StartCrawlJobParams(BaseModel):
14
18
  )
15
19
 
16
20
  url: str
17
- max_pages: int = Field(default=10, ge=1, le=50, serialization_alias="maxPages")
21
+ max_pages: int = Field(default=10, ge=1, serialization_alias="maxPages")
18
22
  follow_links: bool = Field(default=True, serialization_alias="followLinks")
23
+ ignore_sitemap: bool = Field(default=False, serialization_alias="ignoreSitemap")
19
24
  exclude_patterns: List[str] = Field(
20
25
  default=[], serialization_alias="excludePatterns"
21
26
  )
22
27
  include_patterns: List[str] = Field(
23
28
  default=[], serialization_alias="includePatterns"
24
29
  )
25
- use_proxy: bool = Field(default=False, serialization_alias="useProxy")
26
- solve_captchas: bool = Field(default=False, serialization_alias="solveCaptchas")
30
+ session_options: Optional[CreateSessionParams] = Field(
31
+ default=None, serialization_alias="sessionOptions"
32
+ )
33
+ scrape_options: Optional[ScrapeOptions] = Field(
34
+ default=None, serialization_alias="scrapeOptions"
35
+ )
27
36
 
28
37
 
29
38
  class StartCrawlJobResponse(BaseModel):
@@ -38,35 +47,18 @@ class StartCrawlJobResponse(BaseModel):
38
47
  job_id: str = Field(alias="jobId")
39
48
 
40
49
 
41
- class CrawledPageMetadata(BaseModel):
42
- """
43
- Metadata for the crawled page.
44
- """
45
-
46
- model_config = ConfigDict(
47
- populate_by_alias=True,
48
- )
49
-
50
- title: str
51
- description: str
52
- robots: str
53
- og_title: str = Field(alias="ogTitle")
54
- og_description: str = Field(alias="ogDescription")
55
- og_url: str = Field(alias="ogUrl")
56
- og_image: str = Field(alias="ogImage")
57
- og_locale_alternate: List[str] = Field(alias="ogLocaleAlternate")
58
- og_site_name: str = Field(alias="ogSiteName")
59
- source_url: str = Field(alias="sourceURL")
60
-
61
-
62
50
  class CrawledPage(BaseModel):
63
51
  """
64
52
  Data from a crawled page.
65
53
  """
66
54
 
67
- metadata: CrawledPageMetadata
68
- markdown: str
55
+ metadata: Optional[dict[str, Union[str, list[str]]]] = None
56
+ html: Optional[str] = None
57
+ markdown: Optional[str] = None
58
+ links: Optional[List[str]] = None
69
59
  url: str
60
+ status: CrawlPageStatus
61
+ error: Optional[str] = None
70
62
 
71
63
 
72
64
  class GetCrawlJobParams(BaseModel):
@@ -76,7 +68,7 @@ class GetCrawlJobParams(BaseModel):
76
68
 
77
69
  page: Optional[int] = Field(default=None, serialization_alias="page")
78
70
  batch_size: Optional[int] = Field(
79
- default=20, ge=1, le=50, serialization_alias="batchSize"
71
+ default=20, ge=1, le=30, serialization_alias="batchSize"
80
72
  )
81
73
 
82
74
 
@@ -89,6 +81,7 @@ class CrawlJobResponse(BaseModel):
89
81
  populate_by_alias=True,
90
82
  )
91
83
 
84
+ job_id: str = Field(alias="jobId")
92
85
  status: CrawlJobStatus
93
86
  error: Optional[str] = None
94
87
  data: List[CrawledPage] = Field(alias="data")
@@ -1,54 +1,59 @@
1
- from typing import List, Literal, Optional
1
+ from typing import List, Literal, Optional, Union
2
2
  from pydantic import BaseModel, ConfigDict, Field
3
3
 
4
+ from hyperbrowser.models.consts import ScrapeFormat
5
+ from hyperbrowser.models.session import CreateSessionParams
6
+
4
7
  ScrapeJobStatus = Literal["pending", "running", "completed", "failed"]
5
8
 
6
9
 
7
- class StartScrapeJobParams(BaseModel):
10
+ class ScrapeOptions(BaseModel):
8
11
  """
9
- Parameters for creating a new scrape job.
12
+ Options for scraping a page.
10
13
  """
11
14
 
12
- model_config = ConfigDict(
13
- populate_by_alias=True,
15
+ formats: Optional[List[ScrapeFormat]] = None
16
+ include_tags: Optional[List[str]] = Field(
17
+ default=None, serialization_alias="includeTags"
14
18
  )
15
-
16
- url: str
17
- use_proxy: bool = Field(default=False, serialization_alias="useProxy")
18
- solve_captchas: bool = Field(default=False, serialization_alias="solveCaptchas")
19
+ exclude_tags: Optional[List[str]] = Field(
20
+ default=None, serialization_alias="excludeTags"
21
+ )
22
+ only_main_content: Optional[bool] = Field(
23
+ default=None, serialization_alias="onlyMainContent"
24
+ )
25
+ wait_for: Optional[int] = Field(default=None, serialization_alias="waitFor")
26
+ timeout: Optional[int] = Field(default=None, serialization_alias="timeout")
19
27
 
20
28
 
21
- class StartScrapeJobResponse(BaseModel):
29
+ class StartScrapeJobParams(BaseModel):
22
30
  """
23
- Response from creating a scrape job.
31
+ Parameters for creating a new scrape job.
24
32
  """
25
33
 
26
34
  model_config = ConfigDict(
27
35
  populate_by_alias=True,
28
36
  )
29
37
 
30
- job_id: str = Field(alias="jobId")
38
+ url: str
39
+ session_options: Optional[CreateSessionParams] = Field(
40
+ default=None, serialization_alias="sessionOptions"
41
+ )
42
+ scrape_options: Optional[ScrapeOptions] = Field(
43
+ default=None, serialization_alias="scrapeOptions"
44
+ )
31
45
 
32
46
 
33
- class ScrapeJobMetadata(BaseModel):
47
+ class StartScrapeJobResponse(BaseModel):
34
48
  """
35
- Metadata for the scraped site.
49
+ Response from creating a scrape job.
36
50
  """
37
51
 
38
52
  model_config = ConfigDict(
39
53
  populate_by_alias=True,
40
54
  )
41
55
 
42
- title: str
43
- description: str
44
- robots: str
45
- og_title: str = Field(alias="ogTitle")
46
- og_description: str = Field(alias="ogDescription")
47
- og_url: str = Field(alias="ogUrl")
48
- og_image: str = Field(alias="ogImage")
49
- og_locale_alternate: List[str] = Field(alias="ogLocaleAlternate")
50
- og_site_name: str = Field(alias="ogSiteName")
51
- source_url: str = Field(alias="sourceURL")
56
+ job_id: str = Field(alias="jobId")
52
57
 
53
58
 
54
59
  class ScrapeJobData(BaseModel):
@@ -56,8 +61,10 @@ class ScrapeJobData(BaseModel):
56
61
  Data from a scraped site.
57
62
  """
58
63
 
59
- metadata: ScrapeJobMetadata
60
- markdown: str
64
+ metadata: Optional[dict[str, Union[str, list[str]]]] = None
65
+ html: Optional[str] = None
66
+ markdown: Optional[str] = None
67
+ links: Optional[List[str]] = None
61
68
 
62
69
 
63
70
  class ScrapeJobResponse(BaseModel):
@@ -69,6 +76,7 @@ class ScrapeJobResponse(BaseModel):
69
76
  populate_by_alias=True,
70
77
  )
71
78
 
79
+ job_id: str = Field(alias="jobId")
72
80
  status: ScrapeJobStatus
73
81
  error: Optional[str] = None
74
82
  data: Optional[ScrapeJobData] = None
@@ -1,4 +1,4 @@
1
- from typing import List, Literal, Optional, Union
1
+ from typing import Any, List, Literal, Optional, Union
2
2
  from datetime import datetime
3
3
  from pydantic import BaseModel, Field, ConfigDict, field_validator
4
4
 
@@ -33,6 +33,7 @@ class Session(BaseModel):
33
33
  end_time: Optional[int] = Field(default=None, alias="endTime")
34
34
  duration: Optional[int] = None
35
35
  session_url: str = Field(alias="sessionUrl")
36
+ token: str = Field(alias="token")
36
37
 
37
38
  @field_validator("start_time", "end_time", mode="before")
38
39
  @classmethod
@@ -50,7 +51,7 @@ class SessionDetail(Session):
50
51
  Detailed session information including websocket endpoint.
51
52
  """
52
53
 
53
- websocket_url: Optional[str] = Field(alias="wsEndpoint", default=None)
54
+ ws_endpoint: Optional[str] = Field(alias="wsEndpoint", default=None)
54
55
 
55
56
 
56
57
  class SessionListParams(BaseModel):
@@ -96,8 +97,8 @@ class ScreenConfig(BaseModel):
96
97
  Screen configuration parameters for browser session.
97
98
  """
98
99
 
99
- width: int = Field(default=1280, le=3840, ge=640, serialization_alias="width")
100
- height: int = Field(default=720, le=2160, ge=360, serialization_alias="height")
100
+ width: int = Field(default=1280, serialization_alias="width")
101
+ height: int = Field(default=720, serialization_alias="height")
101
102
 
102
103
 
103
104
  class CreateSessionParams(BaseModel):
@@ -132,3 +133,21 @@ class CreateSessionParams(BaseModel):
132
133
  adblock: bool = Field(default=False, serialization_alias="adblock")
133
134
  trackers: bool = Field(default=False, serialization_alias="trackers")
134
135
  annoyances: bool = Field(default=False, serialization_alias="annoyances")
136
+ enable_web_recording: Optional[bool] = Field(
137
+ default=False, serialization_alias="enableWebRecording"
138
+ )
139
+
140
+
141
+ class SessionRecording(BaseModel):
142
+ """
143
+ Model for session recording data.
144
+ """
145
+
146
+ model_config = ConfigDict(
147
+ populate_by_alias=True,
148
+ )
149
+
150
+ type: int
151
+ data: Any
152
+ timestamp: int
153
+ delay: Optional[int] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hyperbrowser
3
- Version: 0.7.0
3
+ Version: 0.9.0
4
4
  Summary: Python SDK for hyperbrowser
5
5
  Home-page: https://github.com/hyperbrowserai/python-sdk
6
6
  License: MIT
@@ -52,9 +52,9 @@ HYPERBROWSER_API_KEY = "test-key"
52
52
 
53
53
  async def main():
54
54
  async with AsyncHyperbrowser(api_key=HYPERBROWSER_API_KEY) as client:
55
- session = await client.create_session()
55
+ session = await client.sessions.create()
56
56
 
57
- ws_endpoint = session.websocket_url
57
+ ws_endpoint = session.ws_endpoint
58
58
  browser = await connect(browserWSEndpoint=ws_endpoint, defaultViewport=None)
59
59
 
60
60
  # Get pages
@@ -72,7 +72,7 @@ async def main():
72
72
 
73
73
  await page.close()
74
74
  await browser.disconnect()
75
- await client.stop_session(session.id)
75
+ await client.sessions.stop(session.id)
76
76
  print("Session completed!")
77
77
 
78
78
  # Run the asyncio event loop
@@ -88,9 +88,9 @@ HYPERBROWSER_API_KEY = "test-key"
88
88
 
89
89
  def main():
90
90
  client = Hyperbrowser(api_key=HYPERBROWSER_API_KEY)
91
- session = client.create_session()
91
+ session = client.sessions.create()
92
92
 
93
- ws_endpoint = session.websocket_url
93
+ ws_endpoint = session.ws_endpoint
94
94
 
95
95
  # Launch Playwright and connect to the remote browser
96
96
  with sync_playwright() as p:
@@ -112,7 +112,7 @@ def main():
112
112
  page.close()
113
113
  browser.close()
114
114
  print("Session completed!")
115
- client.stop_session(session.id)
115
+ client.sessions.stop(session.id)
116
116
 
117
117
  # Run the asyncio event loop
118
118
  main()
@@ -0,0 +1,24 @@
1
+ LICENSE,sha256=6rUGKlyKb_1ZAH7h7YITYAAUNFN3MNGGKCyfrw49NLE,1071
2
+ hyperbrowser/__init__.py,sha256=zWGcLhqhvWy6BTwuNpzWK1-0LpIn311ks-4U9nrsb7Y,187
3
+ hyperbrowser/client/async_client.py,sha256=ppJI8O7SQi89mwMhIHVgTgFeRu2aZbLl2zbFaI3sXNU,984
4
+ hyperbrowser/client/base.py,sha256=9gFma7RdvJBUlDCqr8tZd315UPrjn4ldU4B0-Y-L4O4,1268
5
+ hyperbrowser/client/managers/async_manager/crawl.py,sha256=hBS2WwfE0-ZopCW9PjP30meU5iTDdRViFl1C1OF1hVU,2291
6
+ hyperbrowser/client/managers/async_manager/scrape.py,sha256=7FdYS_NNEpvB9z3ShGZaZxNryKHm02MQR-g9diadGhA,1319
7
+ hyperbrowser/client/managers/async_manager/session.py,sha256=uFHWQISthvQHLFaTf8p4orj95-CKTLmfE9yF-GvRtqE,1595
8
+ hyperbrowser/client/managers/sync_manager/crawl.py,sha256=lnMtBmOPcamjtvzH4BAnWbBTGbKBmHGUQiMnnZlj2tg,2222
9
+ hyperbrowser/client/managers/sync_manager/scrape.py,sha256=DxSvdHa-z2P_rvNUwmRfU4iQz19wiEi_M2YmBQZfLyk,1265
10
+ hyperbrowser/client/managers/sync_manager/session.py,sha256=J-OVwgDmKScUYVhytM-w-svUitHe43i1v58c5WgQLiU,1513
11
+ hyperbrowser/client/sync.py,sha256=CzXlPksK4D7eazQDzbra-pM64Sy0bLrg0zjv5xBKZdk,811
12
+ hyperbrowser/config.py,sha256=2J6GYNR_83vzJZ6jEV-LXO1U-q6DHIrfyAU0WrUPhw8,625
13
+ hyperbrowser/exceptions.py,sha256=SUUkptK2OL36xDORYmSicaTYR7pMbxeWAjAgz35xnM8,1171
14
+ hyperbrowser/models/consts.py,sha256=xsMBPivE4M6wGJ5Q0x3oRTgt0Koi1occtAeHthes9ZY,4970
15
+ hyperbrowser/models/crawl.py,sha256=DWeJRwuZ0EXOEpEx0OyUZp_HOdGfpptg_mNo5J0u6po,2566
16
+ hyperbrowser/models/scrape.py,sha256=e3Z5HgCkLD1FxOjXtPmI6SAJ9wsrAKXj7WElXFXy8yE,2103
17
+ hyperbrowser/models/session.py,sha256=QVcPc4rkXqTfSE9roEImRgsJ4xxHruTaKubQSHy__xI,4541
18
+ hyperbrowser/transport/async_transport.py,sha256=P-nX9iczGVYJyvqtqlGAOFQ3PghRC2_bE6Lruiiecn0,3511
19
+ hyperbrowser/transport/base.py,sha256=9l7k-qTX4Q2KaZIR_fwsNlxDgOzsmc8zgucZ9tfHgkw,1622
20
+ hyperbrowser/transport/sync.py,sha256=DFDPYqF-_WQSZkRbWDRFTPowQMzz-B3N869r2vvocPc,2829
21
+ hyperbrowser-0.9.0.dist-info/LICENSE,sha256=6rUGKlyKb_1ZAH7h7YITYAAUNFN3MNGGKCyfrw49NLE,1071
22
+ hyperbrowser-0.9.0.dist-info/METADATA,sha256=Si_LKjvhplK4Wbi8CxjFBA9O_dWd-0gp-k-HKfmvUOY,3289
23
+ hyperbrowser-0.9.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
24
+ hyperbrowser-0.9.0.dist-info/RECORD,,
@@ -1,18 +0,0 @@
1
- LICENSE,sha256=6rUGKlyKb_1ZAH7h7YITYAAUNFN3MNGGKCyfrw49NLE,1071
2
- hyperbrowser/__init__.py,sha256=zWGcLhqhvWy6BTwuNpzWK1-0LpIn311ks-4U9nrsb7Y,187
3
- hyperbrowser/client/async_client.py,sha256=nmbbqaIEOWk01-mqBCYkg8hU01_sQLrAJNiPaWfINEI,3225
4
- hyperbrowser/client/base.py,sha256=9gFma7RdvJBUlDCqr8tZd315UPrjn4ldU4B0-Y-L4O4,1268
5
- hyperbrowser/client/sync.py,sha256=2ZembYQu7h0ph57jYFUH6ytXA0ebohceu39PevwSWaM,2895
6
- hyperbrowser/config.py,sha256=2J6GYNR_83vzJZ6jEV-LXO1U-q6DHIrfyAU0WrUPhw8,625
7
- hyperbrowser/exceptions.py,sha256=SUUkptK2OL36xDORYmSicaTYR7pMbxeWAjAgz35xnM8,1171
8
- hyperbrowser/models/consts.py,sha256=VmtqbXqK6WTvlD5XExL3e2JE3WaFTi_iniEAQlRSQgs,4917
9
- hyperbrowser/models/crawl.py,sha256=-u0pJ28sNjyycfbuLHjuA5bftDtkV60ZFvek7Z510ao,2582
10
- hyperbrowser/models/scrape.py,sha256=JIS6zbHlpv-U1hc9qVYeCazXYHBiRzjQX6y_TXsl4js,1678
11
- hyperbrowser/models/session.py,sha256=N05NLI0NFul7uQPkLihOv82-JCjXkWW8hlMbQsPZMvo,4173
12
- hyperbrowser/transport/async_transport.py,sha256=P-nX9iczGVYJyvqtqlGAOFQ3PghRC2_bE6Lruiiecn0,3511
13
- hyperbrowser/transport/base.py,sha256=9l7k-qTX4Q2KaZIR_fwsNlxDgOzsmc8zgucZ9tfHgkw,1622
14
- hyperbrowser/transport/sync.py,sha256=DFDPYqF-_WQSZkRbWDRFTPowQMzz-B3N869r2vvocPc,2829
15
- hyperbrowser-0.7.0.dist-info/LICENSE,sha256=6rUGKlyKb_1ZAH7h7YITYAAUNFN3MNGGKCyfrw49NLE,1071
16
- hyperbrowser-0.7.0.dist-info/METADATA,sha256=tgrKlcUojlBC7D1cClvMIMiA7_krPD7NGpxyuHLGLA0,3289
17
- hyperbrowser-0.7.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
18
- hyperbrowser-0.7.0.dist-info/RECORD,,