hyperbrowser 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hyperbrowser might be problematic. Click here for more details.
- hyperbrowser/client/async_client.py +19 -0
- hyperbrowser/client/sync.py +17 -0
- hyperbrowser/models/scrape.py +72 -0
- hyperbrowser/models/session.py +8 -4
- {hyperbrowser-0.4.0.dist-info → hyperbrowser-0.5.0.dist-info}/METADATA +1 -1
- {hyperbrowser-0.4.0.dist-info → hyperbrowser-0.5.0.dist-info}/RECORD +8 -7
- {hyperbrowser-0.4.0.dist-info → hyperbrowser-0.5.0.dist-info}/LICENSE +0 -0
- {hyperbrowser-0.4.0.dist-info → hyperbrowser-0.5.0.dist-info}/WHEEL +0 -0
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from hyperbrowser.models.scrape import (
|
|
4
|
+
ScrapeJobResponse,
|
|
5
|
+
StartScrapeJobParams,
|
|
6
|
+
StartScrapeJobResponse,
|
|
7
|
+
)
|
|
2
8
|
from ..transport.async_transport import AsyncTransport
|
|
3
9
|
from .base import HyperbrowserBase
|
|
4
10
|
from ..models.session import (
|
|
@@ -45,6 +51,19 @@ class AsyncHyperbrowser(HyperbrowserBase):
|
|
|
45
51
|
)
|
|
46
52
|
return SessionListResponse(**response.data)
|
|
47
53
|
|
|
54
|
+
async def start_scrape_job(
|
|
55
|
+
self, params: StartScrapeJobParams
|
|
56
|
+
) -> StartScrapeJobResponse:
|
|
57
|
+
response = await self.transport.post(
|
|
58
|
+
self._build_url("/scrape"),
|
|
59
|
+
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
60
|
+
)
|
|
61
|
+
return StartScrapeJobResponse(**response.data)
|
|
62
|
+
|
|
63
|
+
async def get_scrape_job(self, job_id: str) -> ScrapeJobResponse:
|
|
64
|
+
response = await self.transport.get(self._build_url(f"/api/scrape/{job_id}"))
|
|
65
|
+
return ScrapeJobResponse(**response.data)
|
|
66
|
+
|
|
48
67
|
async def close(self) -> None:
|
|
49
68
|
await self.transport.close()
|
|
50
69
|
|
hyperbrowser/client/sync.py
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from hyperbrowser.models.scrape import (
|
|
4
|
+
ScrapeJobResponse,
|
|
5
|
+
StartScrapeJobParams,
|
|
6
|
+
StartScrapeJobResponse,
|
|
7
|
+
)
|
|
2
8
|
from ..transport.sync import SyncTransport
|
|
3
9
|
from .base import HyperbrowserBase
|
|
4
10
|
from ..models.session import (
|
|
@@ -43,5 +49,16 @@ class Hyperbrowser(HyperbrowserBase):
|
|
|
43
49
|
)
|
|
44
50
|
return SessionListResponse(**response.data)
|
|
45
51
|
|
|
52
|
+
def start_scrape_job(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
|
|
53
|
+
response = self.transport.post(
|
|
54
|
+
self._build_url("/scrape"),
|
|
55
|
+
data=params.model_dump(exclude_none=True, by_alias=True),
|
|
56
|
+
)
|
|
57
|
+
return StartScrapeJobResponse(**response.data)
|
|
58
|
+
|
|
59
|
+
def get_scrape_job(self, job_id: str) -> ScrapeJobResponse:
|
|
60
|
+
response = self.transport.get(self._build_url(f"/api/scrape/{job_id}"))
|
|
61
|
+
return ScrapeJobResponse(**response.data)
|
|
62
|
+
|
|
46
63
|
def close(self) -> None:
|
|
47
64
|
self.transport.close()
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from typing import Literal, Optional
|
|
2
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
3
|
+
|
|
4
|
+
ScrapeJobStatus = Literal["pending", "running", "completed", "failed"]
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class StartScrapeJobParams(BaseModel):
|
|
8
|
+
"""
|
|
9
|
+
Parameters for creating a new scrape job.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
model_config = ConfigDict(
|
|
13
|
+
populate_by_alias=True,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
url: str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class StartScrapeJobResponse(BaseModel):
|
|
20
|
+
"""
|
|
21
|
+
Response from creating a scrape job.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
model_config = ConfigDict(
|
|
25
|
+
populate_by_alias=True,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
job_id: str = Field(alias="jobId")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ScrapeJobMetadata(BaseModel):
|
|
32
|
+
"""
|
|
33
|
+
Metadata for the scraped site.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
model_config = ConfigDict(
|
|
37
|
+
populate_by_alias=True,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
title: str
|
|
41
|
+
description: str
|
|
42
|
+
robots: str
|
|
43
|
+
og_title: str = Field(alias="ogTitle")
|
|
44
|
+
og_description: str = Field(alias="ogDescription")
|
|
45
|
+
og_url: str = Field(alias="ogUrl")
|
|
46
|
+
og_image: str = Field(alias="ogImage")
|
|
47
|
+
og_locale_alternate: list[str] = Field(alias="ogLocaleAlternate")
|
|
48
|
+
og_site_name: str = Field(alias="ogSiteName")
|
|
49
|
+
source_url: str = Field(alias="sourceURL")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ScrapeJobData(BaseModel):
|
|
53
|
+
"""
|
|
54
|
+
Data from a scraped site.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
metadata: ScrapeJobMetadata
|
|
58
|
+
markdown: str
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ScrapeJobResponse(BaseModel):
|
|
62
|
+
"""
|
|
63
|
+
Response from getting a scrape job.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
model_config = ConfigDict(
|
|
67
|
+
populate_by_alias=True,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
status: ScrapeJobStatus
|
|
71
|
+
error: Optional[str] = None
|
|
72
|
+
data: Optional[ScrapeJobData] = None
|
hyperbrowser/models/session.py
CHANGED
|
@@ -96,10 +96,8 @@ class ScreenConfig(BaseModel):
|
|
|
96
96
|
Screen configuration parameters for browser session.
|
|
97
97
|
"""
|
|
98
98
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
min_width: int = Field(default=800, ge=360, serialization_alias="minWidth")
|
|
102
|
-
min_height: int = Field(default=480, ge=360, serialization_alias="minHeight")
|
|
99
|
+
width: int = Field(default=1280, le=3840, ge=640, serialization_alias="width")
|
|
100
|
+
height: int = Field(default=720, le=2160, ge=360, serialization_alias="height")
|
|
103
101
|
|
|
104
102
|
|
|
105
103
|
class CreateSessionParams(BaseModel):
|
|
@@ -111,6 +109,8 @@ class CreateSessionParams(BaseModel):
|
|
|
111
109
|
populate_by_alias=True,
|
|
112
110
|
)
|
|
113
111
|
|
|
112
|
+
use_stealth: bool = Field(default=False, serialization_alias="useStealth")
|
|
113
|
+
use_proxy: bool = Field(default=False, serialization_alias="useProxy")
|
|
114
114
|
proxy_server: Optional[str] = Field(default=None, serialization_alias="proxyServer")
|
|
115
115
|
proxy_server_password: Optional[str] = Field(
|
|
116
116
|
default=None, serialization_alias="proxyServerPassword"
|
|
@@ -128,3 +128,7 @@ class CreateSessionParams(BaseModel):
|
|
|
128
128
|
platform: Optional[List[Platform]] = Field(default=None)
|
|
129
129
|
locales: List[ISO639_1] = Field(default=["en"])
|
|
130
130
|
screen: Optional[ScreenConfig] = Field(default=None)
|
|
131
|
+
solve_captchas: bool = Field(default=False, serialization_alias="solveCaptchas")
|
|
132
|
+
adblock: bool = Field(default=False, serialization_alias="adblock")
|
|
133
|
+
trackers: bool = Field(default=False, serialization_alias="trackers")
|
|
134
|
+
annoyances: bool = Field(default=False, serialization_alias="annoyances")
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
LICENSE,sha256=6rUGKlyKb_1ZAH7h7YITYAAUNFN3MNGGKCyfrw49NLE,1071
|
|
2
2
|
hyperbrowser/__init__.py,sha256=zWGcLhqhvWy6BTwuNpzWK1-0LpIn311ks-4U9nrsb7Y,187
|
|
3
|
-
hyperbrowser/client/async_client.py,sha256=
|
|
3
|
+
hyperbrowser/client/async_client.py,sha256=Elja054YwFyAgc43aQ1R21Y8c_adsIJ4ueg6gWYYIME,2457
|
|
4
4
|
hyperbrowser/client/base.py,sha256=9gFma7RdvJBUlDCqr8tZd315UPrjn4ldU4B0-Y-L4O4,1268
|
|
5
|
-
hyperbrowser/client/sync.py,sha256=
|
|
5
|
+
hyperbrowser/client/sync.py,sha256=47WzDbNAHV6WxaA7Ph7FVoI-kbmolFKv30Dha1C4fF8,2165
|
|
6
6
|
hyperbrowser/config.py,sha256=2J6GYNR_83vzJZ6jEV-LXO1U-q6DHIrfyAU0WrUPhw8,625
|
|
7
7
|
hyperbrowser/exceptions.py,sha256=SUUkptK2OL36xDORYmSicaTYR7pMbxeWAjAgz35xnM8,1171
|
|
8
8
|
hyperbrowser/models/consts.py,sha256=VmtqbXqK6WTvlD5XExL3e2JE3WaFTi_iniEAQlRSQgs,4917
|
|
9
|
-
hyperbrowser/models/
|
|
9
|
+
hyperbrowser/models/scrape.py,sha256=PoqdmF2EIBSJuyNcrt3NtZ0jSnjVB_EqLBXYC2RQ2_Q,1512
|
|
10
|
+
hyperbrowser/models/session.py,sha256=N05NLI0NFul7uQPkLihOv82-JCjXkWW8hlMbQsPZMvo,4173
|
|
10
11
|
hyperbrowser/transport/async_transport.py,sha256=P-nX9iczGVYJyvqtqlGAOFQ3PghRC2_bE6Lruiiecn0,3511
|
|
11
12
|
hyperbrowser/transport/base.py,sha256=9l7k-qTX4Q2KaZIR_fwsNlxDgOzsmc8zgucZ9tfHgkw,1622
|
|
12
13
|
hyperbrowser/transport/sync.py,sha256=DFDPYqF-_WQSZkRbWDRFTPowQMzz-B3N869r2vvocPc,2829
|
|
13
|
-
hyperbrowser-0.
|
|
14
|
-
hyperbrowser-0.
|
|
15
|
-
hyperbrowser-0.
|
|
16
|
-
hyperbrowser-0.
|
|
14
|
+
hyperbrowser-0.5.0.dist-info/LICENSE,sha256=6rUGKlyKb_1ZAH7h7YITYAAUNFN3MNGGKCyfrw49NLE,1071
|
|
15
|
+
hyperbrowser-0.5.0.dist-info/METADATA,sha256=X8aMEPn5yli9jf8a3Gv2rI7zL4VsYdGNSsnvyT0_Jpg,3289
|
|
16
|
+
hyperbrowser-0.5.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
17
|
+
hyperbrowser-0.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|