hyperbrowser 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hyperbrowser might be problematic. Click here for more details.

@@ -1,5 +1,6 @@
1
1
  import asyncio
2
- from typing import Optional
2
+
3
+ from hyperbrowser.models.consts import POLLING_ATTEMPTS
3
4
  from ....models.crawl import (
4
5
  CrawlJobResponse,
5
6
  GetCrawlJobParams,
@@ -32,29 +33,69 @@ class CrawlManager:
32
33
  self, params: StartCrawlJobParams, return_all_pages: bool = True
33
34
  ) -> CrawlJobResponse:
34
35
  job_start_resp = await self.start(params)
35
- if not job_start_resp.job_id:
36
+ job_id = job_start_resp.job_id
37
+ if not job_id:
36
38
  raise HyperbrowserError("Failed to start crawl job")
37
39
 
38
40
  job_response: CrawlJobResponse
41
+ failures = 0
39
42
  while True:
40
- job_response = await self.get(job_start_resp.job_id)
41
- if job_response.status == "completed" or job_response.status == "failed":
42
- break
43
+ try:
44
+ job_response = await self.get(
45
+ job_id,
46
+ params=GetCrawlJobParams(batch_size=1),
47
+ )
48
+ if (
49
+ job_response.status == "completed"
50
+ or job_response.status == "failed"
51
+ ):
52
+ break
53
+ except Exception as e:
54
+ failures += 1
55
+ if failures >= POLLING_ATTEMPTS:
56
+ raise HyperbrowserError(
57
+ f"Failed to poll crawl job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
58
+ )
43
59
  await asyncio.sleep(2)
44
60
 
61
+ failures = 0
45
62
  if not return_all_pages:
46
- return job_response
63
+ while True:
64
+ try:
65
+ job_response = await self.get(job_id)
66
+ return job_response
67
+ except Exception as e:
68
+ failures += 1
69
+ if failures >= POLLING_ATTEMPTS:
70
+ raise HyperbrowserError(
71
+ f"Failed to get crawl job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
72
+ )
73
+ await asyncio.sleep(0.5)
47
74
 
75
+ failures = 0
76
+ job_response.current_page_batch = 0
77
+ job_response.data = []
48
78
  while job_response.current_page_batch < job_response.total_page_batches:
49
- tmp_job_response = await self.get(
50
- job_start_resp.job_id,
51
- GetCrawlJobParams(page=job_response.current_page_batch + 1),
52
- )
53
- if tmp_job_response.data:
54
- job_response.data.extend(tmp_job_response.data)
55
- job_response.current_page_batch = tmp_job_response.current_page_batch
56
- job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
57
- job_response.total_page_batches = tmp_job_response.total_page_batches
58
- job_response.batch_size = tmp_job_response.batch_size
79
+ try:
80
+ tmp_job_response = await self.get(
81
+ job_start_resp.job_id,
82
+ GetCrawlJobParams(
83
+ page=job_response.current_page_batch + 1, batch_size=100
84
+ ),
85
+ )
86
+ if tmp_job_response.data:
87
+ job_response.data.extend(tmp_job_response.data)
88
+ job_response.current_page_batch = tmp_job_response.current_page_batch
89
+ job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
90
+ job_response.total_page_batches = tmp_job_response.total_page_batches
91
+ job_response.batch_size = tmp_job_response.batch_size
92
+ failures = 0
93
+ except Exception as e:
94
+ failures += 1
95
+ if failures >= POLLING_ATTEMPTS:
96
+ raise HyperbrowserError(
97
+ f"Failed to get crawl batch page {job_response.current_page_batch} for job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
98
+ )
59
99
  await asyncio.sleep(0.5)
100
+
60
101
  return job_response
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  from hyperbrowser.exceptions import HyperbrowserError
3
+ from hyperbrowser.models.consts import POLLING_ATTEMPTS
3
4
  from hyperbrowser.models.extract import (
4
5
  ExtractJobResponse,
5
6
  StartExtractJobParams,
@@ -32,10 +33,24 @@ class ExtractManager:
32
33
 
33
34
  async def start_and_wait(self, params: StartExtractJobParams) -> ExtractJobResponse:
34
35
  job_start_resp = await self.start(params)
35
- if not job_start_resp.job_id:
36
+ job_id = job_start_resp.job_id
37
+ if not job_id:
36
38
  raise HyperbrowserError("Failed to start extract job")
39
+
40
+ failures = 0
37
41
  while True:
38
- job_response = await self.get(job_start_resp.job_id)
39
- if job_response.status == "completed" or job_response.status == "failed":
40
- return job_response
42
+ try:
43
+ job_response = await self.get(job_id)
44
+ if (
45
+ job_response.status == "completed"
46
+ or job_response.status == "failed"
47
+ ):
48
+ return job_response
49
+ failures = 0
50
+ except Exception as e:
51
+ failures += 1
52
+ if failures >= POLLING_ATTEMPTS:
53
+ raise HyperbrowserError(
54
+ f"Failed to poll extract job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
55
+ )
41
56
  await asyncio.sleep(2)
@@ -1,16 +1,116 @@
1
1
  import asyncio
2
2
  from typing import Optional
3
+
4
+ from hyperbrowser.models.consts import POLLING_ATTEMPTS
3
5
  from ....models.scrape import (
6
+ BatchScrapeJobResponse,
7
+ GetBatchScrapeJobParams,
4
8
  ScrapeJobResponse,
9
+ StartBatchScrapeJobParams,
10
+ StartBatchScrapeJobResponse,
5
11
  StartScrapeJobParams,
6
12
  StartScrapeJobResponse,
7
13
  )
8
14
  from ....exceptions import HyperbrowserError
9
15
 
10
16
 
17
+ class BatchScrapeManager:
18
+ def __init__(self, client):
19
+ self._client = client
20
+
21
+ async def start(
22
+ self, params: StartBatchScrapeJobParams
23
+ ) -> StartBatchScrapeJobResponse:
24
+ response = await self._client.transport.post(
25
+ self._client._build_url("/scrape/batch"),
26
+ data=params.model_dump(exclude_none=True, by_alias=True),
27
+ )
28
+ return StartBatchScrapeJobResponse(**response.data)
29
+
30
+ async def get(
31
+ self, job_id: str, params: GetBatchScrapeJobParams = GetBatchScrapeJobParams()
32
+ ) -> BatchScrapeJobResponse:
33
+ response = await self._client.transport.get(
34
+ self._client._build_url(f"/scrape/batch/{job_id}"), params=params.__dict__
35
+ )
36
+ return BatchScrapeJobResponse(**response.data)
37
+
38
+ async def start_and_wait(
39
+ self, params: StartBatchScrapeJobParams, return_all_pages: bool = True
40
+ ) -> BatchScrapeJobResponse:
41
+ job_start_resp = await self.start(params)
42
+ job_id = job_start_resp.job_id
43
+ if not job_id:
44
+ raise HyperbrowserError("Failed to start batch scrape job")
45
+
46
+ job_response: BatchScrapeJobResponse
47
+ failures = 0
48
+ while True:
49
+ try:
50
+ job_response = await self.get(
51
+ job_id, params=GetBatchScrapeJobParams(batch_size=1)
52
+ )
53
+ if (
54
+ job_response.status == "completed"
55
+ or job_response.status == "failed"
56
+ ):
57
+ break
58
+ failures = 0
59
+ except Exception as e:
60
+ failures += 1
61
+ if failures >= POLLING_ATTEMPTS:
62
+ raise HyperbrowserError(
63
+ f"Failed to poll batch scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
64
+ )
65
+ await asyncio.sleep(2)
66
+
67
+ failures = 0
68
+ if not return_all_pages:
69
+ while True:
70
+ try:
71
+ job_response = await self.get(job_id)
72
+ return job_response
73
+ except Exception as e:
74
+ failures += 1
75
+ if failures >= POLLING_ATTEMPTS:
76
+ raise HyperbrowserError(
77
+ f"Failed to get batch scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
78
+ )
79
+ await asyncio.sleep(0.5)
80
+
81
+ failures = 0
82
+ job_response.current_page_batch = 0
83
+ job_response.data = []
84
+ while job_response.current_page_batch < job_response.total_page_batches:
85
+ try:
86
+ tmp_job_response = await self.get(
87
+ job_id,
88
+ params=GetBatchScrapeJobParams(
89
+ page=job_response.current_page_batch + 1, batch_size=100
90
+ ),
91
+ )
92
+ if tmp_job_response.data:
93
+ job_response.data.extend(tmp_job_response.data)
94
+ job_response.current_page_batch = tmp_job_response.current_page_batch
95
+ job_response.total_scraped_pages = tmp_job_response.total_scraped_pages
96
+ job_response.total_page_batches = tmp_job_response.total_page_batches
97
+ job_response.batch_size = tmp_job_response.batch_size
98
+ failures = 0
99
+ except Exception as e:
100
+ failures += 1
101
+ if failures >= POLLING_ATTEMPTS:
102
+ raise HyperbrowserError(
103
+ f"Failed to get batch page {job_response.current_page_batch} for job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
104
+ )
105
+ await asyncio.sleep(0.5)
106
+
107
+ return job_response
108
+
109
+
11
110
  class ScrapeManager:
12
111
  def __init__(self, client):
13
112
  self._client = client
113
+ self.batch = BatchScrapeManager(client)
14
114
 
15
115
  async def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
16
116
  response = await self._client.transport.post(
@@ -27,10 +127,24 @@ class ScrapeManager:
27
127
 
28
128
  async def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
29
129
  job_start_resp = await self.start(params)
30
- if not job_start_resp.job_id:
130
+ job_id = job_start_resp.job_id
131
+ if not job_id:
31
132
  raise HyperbrowserError("Failed to start scrape job")
133
+
134
+ failures = 0
32
135
  while True:
33
- job_response = await self.get(job_start_resp.job_id)
34
- if job_response.status == "completed" or job_response.status == "failed":
35
- return job_response
136
+ try:
137
+ job_response = await self.get(job_id)
138
+ if (
139
+ job_response.status == "completed"
140
+ or job_response.status == "failed"
141
+ ):
142
+ return job_response
143
+ failures = 0
144
+ except Exception as e:
145
+ failures += 1
146
+ if failures >= POLLING_ATTEMPTS:
147
+ raise HyperbrowserError(
148
+ f"Failed to poll scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
149
+ )
36
150
  await asyncio.sleep(2)
@@ -1,5 +1,7 @@
1
1
  import time
2
2
  from typing import Optional
3
+
4
+ from hyperbrowser.models.consts import POLLING_ATTEMPTS
3
5
  from ....models.crawl import (
4
6
  CrawlJobResponse,
5
7
  GetCrawlJobParams,
@@ -32,29 +34,69 @@ class CrawlManager:
32
34
  self, params: StartCrawlJobParams, return_all_pages: bool = True
33
35
  ) -> CrawlJobResponse:
34
36
  job_start_resp = self.start(params)
35
- if not job_start_resp.job_id:
37
+ job_id = job_start_resp.job_id
38
+ if not job_id:
36
39
  raise HyperbrowserError("Failed to start crawl job")
37
40
 
38
41
  job_response: CrawlJobResponse
42
+ failures = 0
39
43
  while True:
40
- job_response = self.get(job_start_resp.job_id)
41
- if job_response.status == "completed" or job_response.status == "failed":
42
- break
44
+ try:
45
+ job_response = self.get(
46
+ job_id,
47
+ params=GetCrawlJobParams(batch_size=1),
48
+ )
49
+ if (
50
+ job_response.status == "completed"
51
+ or job_response.status == "failed"
52
+ ):
53
+ break
54
+ except Exception as e:
55
+ failures += 1
56
+ if failures >= POLLING_ATTEMPTS:
57
+ raise HyperbrowserError(
58
+ f"Failed to poll crawl job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
59
+ )
43
60
  time.sleep(2)
44
61
 
62
+ failures = 0
45
63
  if not return_all_pages:
46
- return job_response
64
+ while True:
65
+ try:
66
+ job_response = self.get(job_id)
67
+ return job_response
68
+ except Exception as e:
69
+ failures += 1
70
+ if failures >= POLLING_ATTEMPTS:
71
+ raise HyperbrowserError(
72
+ f"Failed to get crawl job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
73
+ )
74
+ time.sleep(0.5)
47
75
 
76
+ failures = 0
77
+ job_response.current_page_batch = 0
78
+ job_response.data = []
48
79
  while job_response.current_page_batch < job_response.total_page_batches:
49
- tmp_job_response = self.get(
50
- job_start_resp.job_id,
51
- GetCrawlJobParams(page=job_response.current_page_batch + 1),
52
- )
53
- if tmp_job_response.data:
54
- job_response.data.extend(tmp_job_response.data)
55
- job_response.current_page_batch = tmp_job_response.current_page_batch
56
- job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
57
- job_response.total_page_batches = tmp_job_response.total_page_batches
58
- job_response.batch_size = tmp_job_response.batch_size
80
+ try:
81
+ tmp_job_response = self.get(
82
+ job_id,
83
+ GetCrawlJobParams(
84
+ page=job_response.current_page_batch + 1, batch_size=100
85
+ ),
86
+ )
87
+ if tmp_job_response.data:
88
+ job_response.data.extend(tmp_job_response.data)
89
+ job_response.current_page_batch = tmp_job_response.current_page_batch
90
+ job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
91
+ job_response.total_page_batches = tmp_job_response.total_page_batches
92
+ job_response.batch_size = tmp_job_response.batch_size
93
+ failures = 0
94
+ except Exception as e:
95
+ failures += 1
96
+ if failures >= POLLING_ATTEMPTS:
97
+ raise HyperbrowserError(
98
+ f"Failed to get crawl batch page {job_response.current_page_batch} for job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
99
+ )
59
100
  time.sleep(0.5)
101
+
60
102
  return job_response
@@ -1,5 +1,6 @@
1
1
  import time
2
2
  from hyperbrowser.exceptions import HyperbrowserError
3
+ from hyperbrowser.models.consts import POLLING_ATTEMPTS
3
4
  from hyperbrowser.models.extract import (
4
5
  ExtractJobResponse,
5
6
  StartExtractJobParams,
@@ -32,10 +33,23 @@ class ExtractManager:
32
33
 
33
34
  def start_and_wait(self, params: StartExtractJobParams) -> ExtractJobResponse:
34
35
  job_start_resp = self.start(params)
35
- if not job_start_resp.job_id:
36
+ job_id = job_start_resp.job_id
37
+ if not job_id:
36
38
  raise HyperbrowserError("Failed to start extract job")
39
+
40
+ failures = 0
37
41
  while True:
38
- job_response = self.get(job_start_resp.job_id)
39
- if job_response.status == "completed" or job_response.status == "failed":
40
- return job_response
42
+ try:
43
+ job_response = self.get(job_start_resp.job_id)
44
+ if (
45
+ job_response.status == "completed"
46
+ or job_response.status == "failed"
47
+ ):
48
+ return job_response
49
+ except Exception as e:
50
+ failures += 1
51
+ if failures >= POLLING_ATTEMPTS:
52
+ raise HyperbrowserError(
53
+ f"Failed to poll extract job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
54
+ )
41
55
  time.sleep(2)
@@ -1,16 +1,114 @@
1
1
  import time
2
2
  from typing import Optional
3
+
4
+ from hyperbrowser.models.consts import POLLING_ATTEMPTS
3
5
  from ....models.scrape import (
6
+ BatchScrapeJobResponse,
7
+ GetBatchScrapeJobParams,
4
8
  ScrapeJobResponse,
9
+ StartBatchScrapeJobParams,
10
+ StartBatchScrapeJobResponse,
5
11
  StartScrapeJobParams,
6
12
  StartScrapeJobResponse,
7
13
  )
8
14
  from ....exceptions import HyperbrowserError
9
15
 
10
16
 
17
+ class BatchScrapeManager:
18
+ def __init__(self, client):
19
+ self._client = client
20
+
21
+ def start(self, params: StartBatchScrapeJobParams) -> StartBatchScrapeJobResponse:
22
+ response = self._client.transport.post(
23
+ self._client._build_url("/scrape/batch"),
24
+ data=params.model_dump(exclude_none=True, by_alias=True),
25
+ )
26
+ return StartBatchScrapeJobResponse(**response.data)
27
+
28
+ def get(
29
+ self, job_id: str, params: GetBatchScrapeJobParams = GetBatchScrapeJobParams()
30
+ ) -> BatchScrapeJobResponse:
31
+ response = self._client.transport.get(
32
+ self._client._build_url(f"/scrape/batch/{job_id}"), params=params.__dict__
33
+ )
34
+ return BatchScrapeJobResponse(**response.data)
35
+
36
+ def start_and_wait(
37
+ self, params: StartBatchScrapeJobParams, return_all_pages: bool = True
38
+ ) -> BatchScrapeJobResponse:
39
+ job_start_resp = self.start(params)
40
+ job_id = job_start_resp.job_id
41
+ if not job_id:
42
+ raise HyperbrowserError("Failed to start batch scrape job")
43
+
44
+ job_response: BatchScrapeJobResponse
45
+ failures = 0
46
+ while True:
47
+ try:
48
+ job_response = self.get(
49
+ job_id, params=GetBatchScrapeJobParams(batch_size=1)
50
+ )
51
+ if (
52
+ job_response.status == "completed"
53
+ or job_response.status == "failed"
54
+ ):
55
+ break
56
+ failures = 0
57
+ except Exception as e:
58
+ failures += 1
59
+ if failures >= POLLING_ATTEMPTS:
60
+ raise HyperbrowserError(
61
+ f"Failed to poll batch scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
62
+ )
63
+ time.sleep(2)
64
+
65
+ failures = 0
66
+ if not return_all_pages:
67
+ while True:
68
+ try:
69
+ job_response = self.get(job_id)
70
+ return job_response
71
+ except Exception as e:
72
+ failures += 1
73
+ if failures >= POLLING_ATTEMPTS:
74
+ raise HyperbrowserError(
75
+ f"Failed to get batch scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
76
+ )
77
+ time.sleep(0.5)
78
+
79
+ failures = 0
80
+ job_response.current_page_batch = 0
81
+ job_response.data = []
82
+ while job_response.current_page_batch < job_response.total_page_batches:
83
+ try:
84
+ tmp_job_response = self.get(
85
+ job_start_resp.job_id,
86
+ GetBatchScrapeJobParams(
87
+ page=job_response.current_page_batch + 1, batch_size=100
88
+ ),
89
+ )
90
+ if tmp_job_response.data:
91
+ job_response.data.extend(tmp_job_response.data)
92
+ job_response.current_page_batch = tmp_job_response.current_page_batch
93
+ job_response.total_scraped_pages = tmp_job_response.total_scraped_pages
94
+ job_response.total_page_batches = tmp_job_response.total_page_batches
95
+ job_response.batch_size = tmp_job_response.batch_size
96
+ failures = 0
97
+ except Exception as e:
98
+ failures += 1
99
+ if failures >= POLLING_ATTEMPTS:
100
+ raise HyperbrowserError(
101
+ f"Failed to get batch page {job_response.current_page_batch} for job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
102
+ )
103
+ time.sleep(0.5)
104
+
105
+ return job_response
106
+
107
+
11
108
  class ScrapeManager:
12
109
  def __init__(self, client):
13
110
  self._client = client
111
+ self.batch = BatchScrapeManager(client)
14
112
 
15
113
  def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
16
114
  response = self._client.transport.post(
@@ -27,10 +125,24 @@ class ScrapeManager:
27
125
 
28
126
  def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
29
127
  job_start_resp = self.start(params)
30
- if not job_start_resp.job_id:
128
+ job_id = job_start_resp.job_id
129
+ if not job_id:
31
130
  raise HyperbrowserError("Failed to start scrape job")
131
+
132
+ failures = 0
32
133
  while True:
33
- job_response = self.get(job_start_resp.job_id)
34
- if job_response.status == "completed" or job_response.status == "failed":
35
- return job_response
134
+ try:
135
+ job_response = self.get(job_id)
136
+ if (
137
+ job_response.status == "completed"
138
+ or job_response.status == "failed"
139
+ ):
140
+ return job_response
141
+ failures = 0
142
+ except Exception as e:
143
+ failures += 1
144
+ if failures >= POLLING_ATTEMPTS:
145
+ raise HyperbrowserError(
146
+ f"Failed to poll scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
147
+ )
36
148
  time.sleep(2)
@@ -2,6 +2,8 @@ from typing import Literal
2
2
 
3
3
  ScrapeFormat = Literal["markdown", "html", "links", "screenshot"]
4
4
  ScrapeWaitUntil = Literal["load", "domcontentloaded", "networkidle"]
5
+ ScrapePageStatus = Literal["completed", "failed", "pending", "running"]
6
+ POLLING_ATTEMPTS = 5
5
7
 
6
8
  Country = Literal[
7
9
  "AD",
@@ -18,7 +18,7 @@ class StartCrawlJobParams(BaseModel):
18
18
  )
19
19
 
20
20
  url: str
21
- max_pages: int = Field(default=10, ge=1, serialization_alias="maxPages")
21
+ max_pages: Optional[int] = Field(default=None, ge=1, serialization_alias="maxPages")
22
22
  follow_links: bool = Field(default=True, serialization_alias="followLinks")
23
23
  ignore_sitemap: bool = Field(default=False, serialization_alias="ignoreSitemap")
24
24
  exclude_patterns: List[str] = Field(
@@ -69,7 +69,7 @@ class GetCrawlJobParams(BaseModel):
69
69
 
70
70
  page: Optional[int] = Field(default=None, serialization_alias="page")
71
71
  batch_size: Optional[int] = Field(
72
- default=20, ge=1, le=30, serialization_alias="batchSize"
72
+ default=None, ge=1, serialization_alias="batchSize"
73
73
  )
74
74
 
75
75
 
@@ -17,13 +17,17 @@ class StartExtractJobParams(BaseModel):
17
17
  )
18
18
 
19
19
  urls: List[str]
20
- prompt: Optional[str] = None
20
+ system_prompt: Optional[str] = Field(
21
+ default=None, serialization_alias="systemPrompt"
22
+ )
23
+ prompt: Optional[str] = Field(default=None, serialization_alias="prompt")
21
24
  schema_: Optional[Any] = pydantic.Field(
22
25
  None, alias="schema", serialization_alias="schema"
23
26
  )
24
27
  session_options: Optional[CreateSessionParams] = Field(
25
28
  default=None, serialization_alias="sessionOptions"
26
29
  )
30
+ max_links: Optional[int] = Field(default=None, serialization_alias="maxLinks")
27
31
 
28
32
 
29
33
  class StartExtractJobResponse(BaseModel):
@@ -1,7 +1,7 @@
1
1
  from typing import List, Literal, Optional, Union
2
2
  from pydantic import BaseModel, ConfigDict, Field
3
3
 
4
- from hyperbrowser.models.consts import ScrapeFormat, ScrapeWaitUntil
4
+ from hyperbrowser.models.consts import ScrapeFormat, ScrapePageStatus, ScrapeWaitUntil
5
5
  from hyperbrowser.models.session import CreateSessionParams
6
6
 
7
7
  ScrapeJobStatus = Literal["pending", "running", "completed", "failed"]
@@ -84,3 +84,78 @@ class ScrapeJobResponse(BaseModel):
84
84
  status: ScrapeJobStatus
85
85
  error: Optional[str] = None
86
86
  data: Optional[ScrapeJobData] = None
87
+
88
+
89
+ class StartBatchScrapeJobParams(BaseModel):
90
+ """
91
+ Parameters for creating a new batch scrape job.
92
+ """
93
+
94
+ urls: List[str]
95
+ session_options: Optional[CreateSessionParams] = Field(
96
+ default=None, serialization_alias="sessionOptions"
97
+ )
98
+ scrape_options: Optional[ScrapeOptions] = Field(
99
+ default=None, serialization_alias="scrapeOptions"
100
+ )
101
+
102
+
103
+ class ScrapedPage(BaseModel):
104
+ """
105
+ A scraped page.
106
+ """
107
+
108
+ model_config = ConfigDict(
109
+ populate_by_alias=True,
110
+ )
111
+
112
+ url: str
113
+ status: ScrapePageStatus
114
+ error: Optional[str] = None
115
+ metadata: Optional[dict[str, Union[str, list[str]]]] = None
116
+ html: Optional[str] = None
117
+ markdown: Optional[str] = None
118
+ links: Optional[List[str]] = None
119
+ screenshot: Optional[str] = None
120
+
121
+
122
+ class GetBatchScrapeJobParams(BaseModel):
123
+ """
124
+ Parameters for getting a batch scrape job.
125
+ """
126
+
127
+ page: Optional[int] = Field(default=None, serialization_alias="page")
128
+ batch_size: Optional[int] = Field(
129
+ default=None, ge=1, serialization_alias="batchSize"
130
+ )
131
+
132
+
133
+ class StartBatchScrapeJobResponse(BaseModel):
134
+ """
135
+ Response from starting a batch scrape job.
136
+ """
137
+
138
+ model_config = ConfigDict(
139
+ populate_by_alias=True,
140
+ )
141
+
142
+ job_id: str = Field(alias="jobId")
143
+
144
+
145
+ class BatchScrapeJobResponse(BaseModel):
146
+ """
147
+ Response from getting a batch scrape job.
148
+ """
149
+
150
+ model_config = ConfigDict(
151
+ populate_by_alias=True,
152
+ )
153
+
154
+ job_id: str = Field(alias="jobId")
155
+ status: ScrapeJobStatus
156
+ error: Optional[str] = None
157
+ data: Optional[List[ScrapedPage]] = Field(alias="data")
158
+ total_scraped_pages: int = Field(alias="totalScrapedPages")
159
+ total_page_batches: int = Field(alias="totalPageBatches")
160
+ current_page_batch: int = Field(alias="currentPageBatch")
161
+ batch_size: int = Field(alias="batchSize")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hyperbrowser
3
- Version: 0.23.0
3
+ Version: 0.25.0
4
4
  Summary: Python SDK for hyperbrowser
5
5
  License: MIT
6
6
  Author: Nikhil Shahi
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
16
  Classifier: Programming Language :: Python :: 3.13
17
17
  Requires-Dist: httpx (>=0.23.0,<1)
18
- Requires-Dist: pydantic (>=1.9.0,<3)
18
+ Requires-Dist: pydantic (>=2.0,<3)
19
19
  Project-URL: Homepage, https://github.com/hyperbrowserai/python-sdk
20
20
  Project-URL: Repository, https://github.com/hyperbrowserai/python-sdk
21
21
  Description-Content-Type: text/markdown
@@ -1,27 +1,27 @@
1
1
  hyperbrowser/__init__.py,sha256=zWGcLhqhvWy6BTwuNpzWK1-0LpIn311ks-4U9nrsb7Y,187
2
2
  hyperbrowser/client/async_client.py,sha256=L7mbzg_wOVMneOm6-bA5XaBoVWUmybuRogE4YEMR5Bg,1389
3
3
  hyperbrowser/client/base.py,sha256=9gFma7RdvJBUlDCqr8tZd315UPrjn4ldU4B0-Y-L4O4,1268
4
- hyperbrowser/client/managers/async_manager/crawl.py,sha256=hBS2WwfE0-ZopCW9PjP30meU5iTDdRViFl1C1OF1hVU,2291
4
+ hyperbrowser/client/managers/async_manager/crawl.py,sha256=n0KhHarVpPFwjjlc9UnhSfD5vvdC2kpcXHVr8vPiKxE,3870
5
5
  hyperbrowser/client/managers/async_manager/extension.py,sha256=a-xYtXXdCspukYtsguRgjEoQ8E_kzzA2tQAJtIyCtAs,1439
6
- hyperbrowser/client/managers/async_manager/extract.py,sha256=gjrAixnbBHjGlJNr6DGQIa5MbVy3MevT-9liSX00Zg4,1614
6
+ hyperbrowser/client/managers/async_manager/extract.py,sha256=9p8dGLYmoow7smnQ0BTRN6diDmIayjW-EPRGzzvheZk,2102
7
7
  hyperbrowser/client/managers/async_manager/profile.py,sha256=f2uX2GGYdgL0fyzB0jnI-L-nWleqG6cwZ0pc1K1zdQY,1244
8
- hyperbrowser/client/managers/async_manager/scrape.py,sha256=7FdYS_NNEpvB9z3ShGZaZxNryKHm02MQR-g9diadGhA,1319
8
+ hyperbrowser/client/managers/async_manager/scrape.py,sha256=Lr6oicTX3U1xooczUVQlsq2mzIhAKEW-A9909ZdSzKc,5720
9
9
  hyperbrowser/client/managers/async_manager/session.py,sha256=ObJhz1IkCCIQLwmztQ-M7lCKzKsVDr-eWCFnan2d9rQ,1692
10
- hyperbrowser/client/managers/sync_manager/crawl.py,sha256=lnMtBmOPcamjtvzH4BAnWbBTGbKBmHGUQiMnnZlj2tg,2222
10
+ hyperbrowser/client/managers/sync_manager/crawl.py,sha256=uAVmjhUbamVnzAAyfswq1bdBR5c7JrfGVvPdVmmw4R8,3799
11
11
  hyperbrowser/client/managers/sync_manager/extension.py,sha256=1YoyTZtMo43trl9jAsXv95aor0nBHiJEmLva39jFW-k,1415
12
- hyperbrowser/client/managers/sync_manager/extract.py,sha256=rxDFjDG3tw3llaG5AAkUA3y17krevqeiTHlcNsLvaZ0,1560
12
+ hyperbrowser/client/managers/sync_manager/extract.py,sha256=XocMKC0qAarRpE12KU4m_mi1KhUOHp3-TK4dLeiIn6E,2034
13
13
  hyperbrowser/client/managers/sync_manager/profile.py,sha256=va6mlhQ5SKZa-viEGFNzV6UBZEP5SqwVp32_oxC_NzM,1196
14
- hyperbrowser/client/managers/sync_manager/scrape.py,sha256=DxSvdHa-z2P_rvNUwmRfU4iQz19wiEi_M2YmBQZfLyk,1265
14
+ hyperbrowser/client/managers/sync_manager/scrape.py,sha256=y4YB-NusXRi3brE7jBGRBHGANY-_-aHMBirKuuU6mdg,5579
15
15
  hyperbrowser/client/managers/sync_manager/session.py,sha256=74cekrDaGKW5WlP_0Qrqlk-xW2p1u4s63E-D08a4A2s,1610
16
16
  hyperbrowser/client/sync.py,sha256=HgglJY9pNdW987OzNO_5dSZgj1AfAqovCmY99WYQD2E,1213
17
17
  hyperbrowser/config.py,sha256=2J6GYNR_83vzJZ6jEV-LXO1U-q6DHIrfyAU0WrUPhw8,625
18
18
  hyperbrowser/exceptions.py,sha256=SUUkptK2OL36xDORYmSicaTYR7pMbxeWAjAgz35xnM8,1171
19
- hyperbrowser/models/consts.py,sha256=WO-T7X1PjLedDig7GQuV31ooSwA7ytA2q3cLB8yEsgI,5053
20
- hyperbrowser/models/crawl.py,sha256=cP8XmRfTqIJLAnyIHn8MXnHrYokYlYj-JagehGcY0Xc,2603
19
+ hyperbrowser/models/consts.py,sha256=L_6A8JhqLVuR-7p1gMGU6X-eF6KKRM0QIofc_J77Vgw,5146
20
+ hyperbrowser/models/crawl.py,sha256=22hP_DPZMfa2MAfOeJ90qj5CH4rr7VtQT1gCQqO8jO8,2610
21
21
  hyperbrowser/models/extension.py,sha256=nXjKXKt9R7RxyZ4hd3EvfqZsEGy_ufh1r5j2mqCLykQ,804
22
- hyperbrowser/models/extract.py,sha256=LZM4bZ96C_-8Mhf4f4LLMQYmBpvxvtOHs0AkFbswncc,1214
22
+ hyperbrowser/models/extract.py,sha256=24rNo0BzczRPTb3JOcz_WHo5Lz3rBc1Z3-l4EhhroI0,1447
23
23
  hyperbrowser/models/profile.py,sha256=KRb_LNxxW00AsD_thzzthFS51vInJawt1RcoNz4Q9i8,1322
24
- hyperbrowser/models/scrape.py,sha256=fOzT5jp0nW9x_qaNVE6zEGKMUvGuuwr35ZuHoJd0Ed4,2268
24
+ hyperbrowser/models/scrape.py,sha256=HAQJk8KOTcTb9NuD_106tlWlUj55SOhZ6j6vzoQbsZ4,4159
25
25
  hyperbrowser/models/session.py,sha256=pBSXnwhiibx8iW7fuxm25p5YFrq1-i_wCQA7mWgFgM0,5294
26
26
  hyperbrowser/tools/__init__.py,sha256=OUaTUM-kiigYmzfbpx3XQhzMK1xT1wd8cqXgR4znsAY,2021
27
27
  hyperbrowser/tools/anthropic.py,sha256=5pEkJm1H-26GToTwXsDjo4GGqVy1hATws4Pg59mumow,1667
@@ -30,7 +30,7 @@ hyperbrowser/tools/schema.py,sha256=cR2MUX8TvUyN8TnCyeX0pccp4AmPjrdaKzuAXRThOJo,
30
30
  hyperbrowser/transport/async_transport.py,sha256=MIPJvilvZWBPXLZ96c9OohuN6TN9DaaU0EnyleG3q6g,4017
31
31
  hyperbrowser/transport/base.py,sha256=ildpMrDiM8nvrSGrH2LTOafmB17T7PQB_NQ1ODA378U,1703
32
32
  hyperbrowser/transport/sync.py,sha256=ER844H_OCPCrnmbc58cuqphWTVvCZJQn7-D7ZenCr3Y,3311
33
- hyperbrowser-0.23.0.dist-info/LICENSE,sha256=6rUGKlyKb_1ZAH7h7YITYAAUNFN3MNGGKCyfrw49NLE,1071
34
- hyperbrowser-0.23.0.dist-info/METADATA,sha256=s87FmgXprS0mCVGtlyGsj-6vwWrY5dCH9fKUlVyT2wc,3440
35
- hyperbrowser-0.23.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
36
- hyperbrowser-0.23.0.dist-info/RECORD,,
33
+ hyperbrowser-0.25.0.dist-info/LICENSE,sha256=6rUGKlyKb_1ZAH7h7YITYAAUNFN3MNGGKCyfrw49NLE,1071
34
+ hyperbrowser-0.25.0.dist-info/METADATA,sha256=SBCSETGrHAvM4kFQAYFeI5GFHl1nc9nd8Fz84DC10jo,3438
35
+ hyperbrowser-0.25.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
36
+ hyperbrowser-0.25.0.dist-info/RECORD,,