PyPI - hyperbrowser - Versions diffs - 0.23.0__tar.gz → 0.24.0__tar.gz - Mend

hyperbrowser 0.23.0tar.gz → 0.24.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hyperbrowser might be problematic. Click here for more details.

Files changed (40) hide show

{hyperbrowser-0.23.0 → hyperbrowser-0.24.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hyperbrowser
-Version: 0.23.0
+Version: 0.24.0
 Summary: Python SDK for hyperbrowser
 License: MIT
 Author: Nikhil Shahi

hyperbrowser-0.24.0/hyperbrowser/client/managers/async_manager/crawl.py ADDED Viewed

@@ -0,0 +1,101 @@
+import asyncio
+from hyperbrowser.models.consts import POLLING_ATTEMPTS
+from ....models.crawl import (
+    CrawlJobResponse,
+    GetCrawlJobParams,
+    StartCrawlJobParams,
+    StartCrawlJobResponse,
+)
+from ....exceptions import HyperbrowserError
+class CrawlManager:
+    def __init__(self, client):
+        self._client = client
+    async def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
+        response = await self._client.transport.post(
+            self._client._build_url("/crawl"),
+            data=params.model_dump(exclude_none=True, by_alias=True),
+        )
+        return StartCrawlJobResponse(**response.data)
+    async def get(
+        self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
+    ) -> CrawlJobResponse:
+        response = await self._client.transport.get(
+            self._client._build_url(f"/crawl/{job_id}"), params=params.__dict__
+        )
+        return CrawlJobResponse(**response.data)
+    async def start_and_wait(
+        self, params: StartCrawlJobParams, return_all_pages: bool = True
+    ) -> CrawlJobResponse:
+        job_start_resp = await self.start(params)
+        job_id = job_start_resp.job_id
+        if not job_id:
+            raise HyperbrowserError("Failed to start crawl job")
+        job_response: CrawlJobResponse
+        failures = 0
+        while True:
+            try:
+                job_response = await self.get(
+                    job_id,
+                    params=GetCrawlJobParams(batch_size=1),
+                )
+                if (
+                    job_response.status == "completed"
+                    or job_response.status == "failed"
+                ):
+                    break
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to poll crawl job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
+            await asyncio.sleep(2)
+        failures = 0
+        if not return_all_pages:
+            while True:
+                try:
+                    job_response = await self.get(job_id)
+                    return job_response
+                except Exception as e:
+                    failures += 1
+                    if failures >= POLLING_ATTEMPTS:
+                        raise HyperbrowserError(
+                            f"Failed to get crawl job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                        )
+                await asyncio.sleep(0.5)
+        failures = 0
+        job_response.current_page_batch = 0
+        job_response.data = []
+        while job_response.current_page_batch < job_response.total_page_batches:
+            try:
+                tmp_job_response = await self.get(
+                    job_start_resp.job_id,
+                    GetCrawlJobParams(
+                        page=job_response.current_page_batch + 1, batch_size=100
+                    ),
+                )
+                if tmp_job_response.data:
+                    job_response.data.extend(tmp_job_response.data)
+                job_response.current_page_batch = tmp_job_response.current_page_batch
+                job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
+                job_response.total_page_batches = tmp_job_response.total_page_batches
+                job_response.batch_size = tmp_job_response.batch_size
+                failures = 0
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to get crawl batch page {job_response.current_page_batch} for job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
+            await asyncio.sleep(0.5)
+        return job_response

{hyperbrowser-0.23.0 → hyperbrowser-0.24.0}/hyperbrowser/client/managers/async_manager/extract.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 from hyperbrowser.exceptions import HyperbrowserError
+from hyperbrowser.models.consts import POLLING_ATTEMPTS
 from hyperbrowser.models.extract import (
     ExtractJobResponse,
     StartExtractJobParams,
@@ -32,10 +33,24 @@ class ExtractManager:
     async def start_and_wait(self, params: StartExtractJobParams) -> ExtractJobResponse:
         job_start_resp = await self.start(params)
-        if not job_start_resp.job_id:
+        job_id = job_start_resp.job_id
+        if not job_id:
             raise HyperbrowserError("Failed to start extract job")
+        failures = 0
         while True:
-            job_response = await self.get(job_start_resp.job_id)
-            if job_response.status == "completed" or job_response.status == "failed":
-                return job_response
+            try:
+                job_response = await self.get(job_id)
+                if (
+                    job_response.status == "completed"
+                    or job_response.status == "failed"
+                ):
+                    return job_response
+                failures = 0
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to poll extract job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
             await asyncio.sleep(2)

hyperbrowser-0.24.0/hyperbrowser/client/managers/async_manager/scrape.py ADDED Viewed

@@ -0,0 +1,150 @@
+import asyncio
+from typing import Optional
+from hyperbrowser.models.consts import POLLING_ATTEMPTS
+from ....models.scrape import (
+    BatchScrapeJobResponse,
+    GetBatchScrapeJobParams,
+    ScrapeJobResponse,
+    StartBatchScrapeJobParams,
+    StartBatchScrapeJobResponse,
+    StartScrapeJobParams,
+    StartScrapeJobResponse,
+)
+from ....exceptions import HyperbrowserError
+class BatchScrapeManager:
+    def __init__(self, client):
+        self._client = client
+    async def start(
+        self, params: StartBatchScrapeJobParams
+    ) -> StartBatchScrapeJobResponse:
+        response = await self._client.transport.post(
+            self._client._build_url("/scrape/batch"),
+            data=params.model_dump(exclude_none=True, by_alias=True),
+        )
+        return StartBatchScrapeJobResponse(**response.data)
+    async def get(
+        self, job_id: str, params: GetBatchScrapeJobParams = GetBatchScrapeJobParams()
+    ) -> BatchScrapeJobResponse:
+        response = await self._client.transport.get(
+            self._client._build_url(f"/scrape/batch/{job_id}"), params=params.__dict__
+        )
+        return BatchScrapeJobResponse(**response.data)
+    async def start_and_wait(
+        self, params: StartBatchScrapeJobParams, return_all_pages: bool = True
+    ) -> BatchScrapeJobResponse:
+        job_start_resp = await self.start(params)
+        job_id = job_start_resp.job_id
+        if not job_id:
+            raise HyperbrowserError("Failed to start batch scrape job")
+        job_response: BatchScrapeJobResponse
+        failures = 0
+        while True:
+            try:
+                job_response = await self.get(
+                    job_id, params=GetBatchScrapeJobParams(batch_size=1)
+                )
+                if (
+                    job_response.status == "completed"
+                    or job_response.status == "failed"
+                ):
+                    break
+                failures = 0
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to poll batch scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
+            await asyncio.sleep(2)
+        failures = 0
+        if not return_all_pages:
+            while True:
+                try:
+                    job_response = await self.get(job_id)
+                    return job_response
+                except Exception as e:
+                    failures += 1
+                    if failures >= POLLING_ATTEMPTS:
+                        raise HyperbrowserError(
+                            f"Failed to get batch scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                        )
+                await asyncio.sleep(0.5)
+        failures = 0
+        job_response.current_page_batch = 0
+        job_response.data = []
+        while job_response.current_page_batch < job_response.total_page_batches:
+            try:
+                tmp_job_response = await self.get(
+                    job_id,
+                    params=GetBatchScrapeJobParams(
+                        page=job_response.current_page_batch + 1, batch_size=100
+                    ),
+                )
+                if tmp_job_response.data:
+                    job_response.data.extend(tmp_job_response.data)
+                job_response.current_page_batch = tmp_job_response.current_page_batch
+                job_response.total_scraped_pages = tmp_job_response.total_scraped_pages
+                job_response.total_page_batches = tmp_job_response.total_page_batches
+                job_response.batch_size = tmp_job_response.batch_size
+                failures = 0
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to get batch page {job_response.current_page_batch} for job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
+            await asyncio.sleep(0.5)
+        return job_response
+class ScrapeManager:
+    def __init__(self, client):
+        self._client = client
+        self.batch = BatchScrapeManager(client)
+    async def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
+        response = await self._client.transport.post(
+            self._client._build_url("/scrape"),
+            data=params.model_dump(exclude_none=True, by_alias=True),
+        )
+        return StartScrapeJobResponse(**response.data)
+    async def get(self, job_id: str) -> ScrapeJobResponse:
+        response = await self._client.transport.get(
+            self._client._build_url(f"/scrape/{job_id}")
+        )
+        return ScrapeJobResponse(**response.data)
+    async def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
+        job_start_resp = await self.start(params)
+        job_id = job_start_resp.job_id
+        if not job_id:
+            raise HyperbrowserError("Failed to start scrape job")
+        failures = 0
+        while True:
+            try:
+                job_response = await self.get(job_id)
+                if (
+                    job_response.status == "completed"
+                    or job_response.status == "failed"
+                ):
+                    return job_response
+                failures = 0
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to poll scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
+            await asyncio.sleep(2)

hyperbrowser-0.24.0/hyperbrowser/client/managers/sync_manager/crawl.py ADDED Viewed

@@ -0,0 +1,102 @@
+import time
+from typing import Optional
+from hyperbrowser.models.consts import POLLING_ATTEMPTS
+from ....models.crawl import (
+    CrawlJobResponse,
+    GetCrawlJobParams,
+    StartCrawlJobParams,
+    StartCrawlJobResponse,
+)
+from ....exceptions import HyperbrowserError
+class CrawlManager:
+    def __init__(self, client):
+        self._client = client
+    def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
+        response = self._client.transport.post(
+            self._client._build_url("/crawl"),
+            data=params.model_dump(exclude_none=True, by_alias=True),
+        )
+        return StartCrawlJobResponse(**response.data)
+    def get(
+        self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
+    ) -> CrawlJobResponse:
+        response = self._client.transport.get(
+            self._client._build_url(f"/crawl/{job_id}"), params=params.__dict__
+        )
+        return CrawlJobResponse(**response.data)
+    def start_and_wait(
+        self, params: StartCrawlJobParams, return_all_pages: bool = True
+    ) -> CrawlJobResponse:
+        job_start_resp = self.start(params)
+        job_id = job_start_resp.job_id
+        if not job_id:
+            raise HyperbrowserError("Failed to start crawl job")
+        job_response: CrawlJobResponse
+        failures = 0
+        while True:
+            try:
+                job_response = self.get(
+                    job_id,
+                    params=GetCrawlJobParams(batch_size=1),
+                )
+                if (
+                    job_response.status == "completed"
+                    or job_response.status == "failed"
+                ):
+                    break
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to poll crawl job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
+            time.sleep(2)
+        failures = 0
+        if not return_all_pages:
+            while True:
+                try:
+                    job_response = self.get(job_id)
+                    return job_response
+                except Exception as e:
+                    failures += 1
+                    if failures >= POLLING_ATTEMPTS:
+                        raise HyperbrowserError(
+                            f"Failed to get crawl job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                        )
+                time.sleep(0.5)
+        failures = 0
+        job_response.current_page_batch = 0
+        job_response.data = []
+        while job_response.current_page_batch < job_response.total_page_batches:
+            try:
+                tmp_job_response = self.get(
+                    job_id,
+                    GetCrawlJobParams(
+                        page=job_response.current_page_batch + 1, batch_size=100
+                    ),
+                )
+                if tmp_job_response.data:
+                    job_response.data.extend(tmp_job_response.data)
+                job_response.current_page_batch = tmp_job_response.current_page_batch
+                job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
+                job_response.total_page_batches = tmp_job_response.total_page_batches
+                job_response.batch_size = tmp_job_response.batch_size
+                failures = 0
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to get crawl batch page {job_response.current_page_batch} for job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
+            time.sleep(0.5)
+        return job_response

{hyperbrowser-0.23.0 → hyperbrowser-0.24.0}/hyperbrowser/client/managers/sync_manager/extract.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import time
 from hyperbrowser.exceptions import HyperbrowserError
+from hyperbrowser.models.consts import POLLING_ATTEMPTS
 from hyperbrowser.models.extract import (
     ExtractJobResponse,
     StartExtractJobParams,
@@ -32,10 +33,23 @@ class ExtractManager:
     def start_and_wait(self, params: StartExtractJobParams) -> ExtractJobResponse:
         job_start_resp = self.start(params)
-        if not job_start_resp.job_id:
+        job_id = job_start_resp.job_id
+        if not job_id:
             raise HyperbrowserError("Failed to start extract job")
+        failures = 0
         while True:
-            job_response = self.get(job_start_resp.job_id)
-            if job_response.status == "completed" or job_response.status == "failed":
-                return job_response
+            try:
+                job_response = self.get(job_start_resp.job_id)
+                if (
+                    job_response.status == "completed"
+                    or job_response.status == "failed"
+                ):
+                    return job_response
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to poll extract job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
             time.sleep(2)

hyperbrowser-0.24.0/hyperbrowser/client/managers/sync_manager/scrape.py ADDED Viewed

@@ -0,0 +1,148 @@
+import time
+from typing import Optional
+from hyperbrowser.models.consts import POLLING_ATTEMPTS
+from ....models.scrape import (
+    BatchScrapeJobResponse,
+    GetBatchScrapeJobParams,
+    ScrapeJobResponse,
+    StartBatchScrapeJobParams,
+    StartBatchScrapeJobResponse,
+    StartScrapeJobParams,
+    StartScrapeJobResponse,
+)
+from ....exceptions import HyperbrowserError
+class BatchScrapeManager:
+    def __init__(self, client):
+        self._client = client
+    def start(self, params: StartBatchScrapeJobParams) -> StartBatchScrapeJobResponse:
+        response = self._client.transport.post(
+            self._client._build_url("/scrape/batch"),
+            data=params.model_dump(exclude_none=True, by_alias=True),
+        )
+        return StartBatchScrapeJobResponse(**response.data)
+    def get(
+        self, job_id: str, params: GetBatchScrapeJobParams = GetBatchScrapeJobParams()
+    ) -> BatchScrapeJobResponse:
+        response = self._client.transport.get(
+            self._client._build_url(f"/scrape/batch/{job_id}"), params=params.__dict__
+        )
+        return BatchScrapeJobResponse(**response.data)
+    def start_and_wait(
+        self, params: StartBatchScrapeJobParams, return_all_pages: bool = True
+    ) -> BatchScrapeJobResponse:
+        job_start_resp = self.start(params)
+        job_id = job_start_resp.job_id
+        if not job_id:
+            raise HyperbrowserError("Failed to start batch scrape job")
+        job_response: BatchScrapeJobResponse
+        failures = 0
+        while True:
+            try:
+                job_response = self.get(
+                    job_id, params=GetBatchScrapeJobParams(batch_size=1)
+                )
+                if (
+                    job_response.status == "completed"
+                    or job_response.status == "failed"
+                ):
+                    break
+                failures = 0
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to poll batch scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
+            time.sleep(2)
+        failures = 0
+        if not return_all_pages:
+            while True:
+                try:
+                    job_response = self.get(job_id)
+                    return job_response
+                except Exception as e:
+                    failures += 1
+                    if failures >= POLLING_ATTEMPTS:
+                        raise HyperbrowserError(
+                            f"Failed to get batch scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                        )
+                time.sleep(0.5)
+        failures = 0
+        job_response.current_page_batch = 0
+        job_response.data = []
+        while job_response.current_page_batch < job_response.total_page_batches:
+            try:
+                tmp_job_response = self.get(
+                    job_start_resp.job_id,
+                    GetBatchScrapeJobParams(
+                        page=job_response.current_page_batch + 1, batch_size=100
+                    ),
+                )
+                if tmp_job_response.data:
+                    job_response.data.extend(tmp_job_response.data)
+                job_response.current_page_batch = tmp_job_response.current_page_batch
+                job_response.total_scraped_pages = tmp_job_response.total_scraped_pages
+                job_response.total_page_batches = tmp_job_response.total_page_batches
+                job_response.batch_size = tmp_job_response.batch_size
+                failures = 0
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to get batch page {job_response.current_page_batch} for job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
+            time.sleep(0.5)
+        return job_response
+class ScrapeManager:
+    def __init__(self, client):
+        self._client = client
+        self.batch = BatchScrapeManager(client)
+    def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
+        response = self._client.transport.post(
+            self._client._build_url("/scrape"),
+            data=params.model_dump(exclude_none=True, by_alias=True),
+        )
+        return StartScrapeJobResponse(**response.data)
+    def get(self, job_id: str) -> ScrapeJobResponse:
+        response = self._client.transport.get(
+            self._client._build_url(f"/scrape/{job_id}")
+        )
+        return ScrapeJobResponse(**response.data)
+    def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
+        job_start_resp = self.start(params)
+        job_id = job_start_resp.job_id
+        if not job_id:
+            raise HyperbrowserError("Failed to start scrape job")
+        failures = 0
+        while True:
+            try:
+                job_response = self.get(job_id)
+                if (
+                    job_response.status == "completed"
+                    or job_response.status == "failed"
+                ):
+                    return job_response
+                failures = 0
+            except Exception as e:
+                failures += 1
+                if failures >= POLLING_ATTEMPTS:
+                    raise HyperbrowserError(
+                        f"Failed to poll scrape job {job_id} after {POLLING_ATTEMPTS} attempts: {e}"
+                    )
+            time.sleep(2)

{hyperbrowser-0.23.0 → hyperbrowser-0.24.0}/hyperbrowser/models/consts.py RENAMED Viewed

@@ -2,6 +2,8 @@ from typing import Literal
 ScrapeFormat = Literal["markdown", "html", "links", "screenshot"]
 ScrapeWaitUntil = Literal["load", "domcontentloaded", "networkidle"]
+ScrapePageStatus = Literal["completed", "failed", "pending", "running"]
+POLLING_ATTEMPTS = 5
 Country = Literal[
     "AD",

{hyperbrowser-0.23.0 → hyperbrowser-0.24.0}/hyperbrowser/models/crawl.py RENAMED Viewed

@@ -18,7 +18,7 @@ class StartCrawlJobParams(BaseModel):
     )
     url: str
-    max_pages: int = Field(default=10, ge=1, serialization_alias="maxPages")
+    max_pages: Optional[int] = Field(default=None, ge=1, serialization_alias="maxPages")
     follow_links: bool = Field(default=True, serialization_alias="followLinks")
     ignore_sitemap: bool = Field(default=False, serialization_alias="ignoreSitemap")
     exclude_patterns: List[str] = Field(
@@ -69,7 +69,7 @@ class GetCrawlJobParams(BaseModel):
     page: Optional[int] = Field(default=None, serialization_alias="page")
     batch_size: Optional[int] = Field(
-        default=20, ge=1, le=30, serialization_alias="batchSize"
+        default=None, ge=1, serialization_alias="batchSize"
     )

{hyperbrowser-0.23.0 → hyperbrowser-0.24.0}/hyperbrowser/models/extract.py RENAMED Viewed

@@ -17,13 +17,17 @@ class StartExtractJobParams(BaseModel):
     )
     urls: List[str]
-    prompt: Optional[str] = None
+    system_prompt: Optional[str] = Field(
+        default=None, serialization_alias="systemPrompt"
+    )
+    prompt: Optional[str] = Field(default=None, serialization_alias="prompt")
     schema_: Optional[Any] = pydantic.Field(
         None, alias="schema", serialization_alias="schema"
     )
     session_options: Optional[CreateSessionParams] = Field(
         default=None, serialization_alias="sessionOptions"
     )
+    max_links: Optional[int] = Field(default=None, serialization_alias="maxLinks")
 class StartExtractJobResponse(BaseModel):

{hyperbrowser-0.23.0 → hyperbrowser-0.24.0}/hyperbrowser/models/scrape.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from typing import List, Literal, Optional, Union
 from pydantic import BaseModel, ConfigDict, Field
-from hyperbrowser.models.consts import ScrapeFormat, ScrapeWaitUntil
+from hyperbrowser.models.consts import ScrapeFormat, ScrapePageStatus, ScrapeWaitUntil
 from hyperbrowser.models.session import CreateSessionParams
 ScrapeJobStatus = Literal["pending", "running", "completed", "failed"]
@@ -84,3 +84,78 @@ class ScrapeJobResponse(BaseModel):
     status: ScrapeJobStatus
     error: Optional[str] = None
     data: Optional[ScrapeJobData] = None
+class StartBatchScrapeJobParams(BaseModel):
+    """
+    Parameters for creating a new batch scrape job.
+    """
+    urls: List[str]
+    session_options: Optional[CreateSessionParams] = Field(
+        default=None, serialization_alias="sessionOptions"
+    )
+    scrape_options: Optional[ScrapeOptions] = Field(
+        default=None, serialization_alias="scrapeOptions"
+    )
+class ScrapedPage(BaseModel):
+    """
+    A scraped page.
+    """
+    model_config = ConfigDict(
+        populate_by_alias=True,
+    )
+    url: str
+    status: ScrapePageStatus
+    error: Optional[str] = None
+    metadata: Optional[dict[str, Union[str, list[str]]]] = None
+    html: Optional[str] = None
+    markdown: Optional[str] = None
+    links: Optional[List[str]] = None
+    screenshot: Optional[str] = None
+class GetBatchScrapeJobParams(BaseModel):
+    """
+    Parameters for getting a batch scrape job.
+    """
+    page: Optional[int] = Field(default=None, serialization_alias="page")
+    batch_size: Optional[int] = Field(
+        default=None, ge=1, serialization_alias="batchSize"
+    )
+class StartBatchScrapeJobResponse(BaseModel):
+    """
+    Response from starting a batch scrape job.
+    """
+    model_config = ConfigDict(
+        populate_by_alias=True,
+    )
+    job_id: str = Field(alias="jobId")
+class BatchScrapeJobResponse(BaseModel):
+    """
+    Response from getting a batch scrape job.
+    """
+    model_config = ConfigDict(
+        populate_by_alias=True,
+    )
+    job_id: str = Field(alias="jobId")
+    status: ScrapeJobStatus
+    error: Optional[str] = None
+    data: Optional[List[ScrapedPage]] = Field(alias="data")
+    total_scraped_pages: int = Field(alias="totalScrapedPages")
+    total_page_batches: int = Field(alias="totalPageBatches")
+    current_page_batch: int = Field(alias="currentPageBatch")
+    batch_size: int = Field(alias="batchSize")

{hyperbrowser-0.23.0 → hyperbrowser-0.24.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "hyperbrowser"
-version = "0.23.0"
+version = "0.24.0"
 description = "Python SDK for hyperbrowser"
 authors = ["Nikhil Shahi <nshahi1998@gmail.com>"]
 license = "MIT"

hyperbrowser-0.23.0/hyperbrowser/client/managers/async_manager/crawl.py DELETED Viewed

@@ -1,60 +0,0 @@
-import asyncio
-from typing import Optional
-from ....models.crawl import (
-    CrawlJobResponse,
-    GetCrawlJobParams,
-    StartCrawlJobParams,
-    StartCrawlJobResponse,
-)
-from ....exceptions import HyperbrowserError
-class CrawlManager:
-    def __init__(self, client):
-        self._client = client
-    async def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
-        response = await self._client.transport.post(
-            self._client._build_url("/crawl"),
-            data=params.model_dump(exclude_none=True, by_alias=True),
-        )
-        return StartCrawlJobResponse(**response.data)
-    async def get(
-        self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
-    ) -> CrawlJobResponse:
-        response = await self._client.transport.get(
-            self._client._build_url(f"/crawl/{job_id}"), params=params.__dict__
-        )
-        return CrawlJobResponse(**response.data)
-    async def start_and_wait(
-        self, params: StartCrawlJobParams, return_all_pages: bool = True
-    ) -> CrawlJobResponse:
-        job_start_resp = await self.start(params)
-        if not job_start_resp.job_id:
-            raise HyperbrowserError("Failed to start crawl job")
-        job_response: CrawlJobResponse
-        while True:
-            job_response = await self.get(job_start_resp.job_id)
-            if job_response.status == "completed" or job_response.status == "failed":
-                break
-            await asyncio.sleep(2)
-        if not return_all_pages:
-            return job_response
-        while job_response.current_page_batch < job_response.total_page_batches:
-            tmp_job_response = await self.get(
-                job_start_resp.job_id,
-                GetCrawlJobParams(page=job_response.current_page_batch + 1),
-            )
-            if tmp_job_response.data:
-                job_response.data.extend(tmp_job_response.data)
-            job_response.current_page_batch = tmp_job_response.current_page_batch
-            job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
-            job_response.total_page_batches = tmp_job_response.total_page_batches
-            job_response.batch_size = tmp_job_response.batch_size
-            await asyncio.sleep(0.5)
-        return job_response

hyperbrowser-0.23.0/hyperbrowser/client/managers/async_manager/scrape.py DELETED Viewed

@@ -1,36 +0,0 @@
-import asyncio
-from typing import Optional
-from ....models.scrape import (
-    ScrapeJobResponse,
-    StartScrapeJobParams,
-    StartScrapeJobResponse,
-)
-from ....exceptions import HyperbrowserError
-class ScrapeManager:
-    def __init__(self, client):
-        self._client = client
-    async def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
-        response = await self._client.transport.post(
-            self._client._build_url("/scrape"),
-            data=params.model_dump(exclude_none=True, by_alias=True),
-        )
-        return StartScrapeJobResponse(**response.data)
-    async def get(self, job_id: str) -> ScrapeJobResponse:
-        response = await self._client.transport.get(
-            self._client._build_url(f"/scrape/{job_id}")
-        )
-        return ScrapeJobResponse(**response.data)
-    async def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
-        job_start_resp = await self.start(params)
-        if not job_start_resp.job_id:
-            raise HyperbrowserError("Failed to start scrape job")
-        while True:
-            job_response = await self.get(job_start_resp.job_id)
-            if job_response.status == "completed" or job_response.status == "failed":
-                return job_response
-            await asyncio.sleep(2)

hyperbrowser-0.23.0/hyperbrowser/client/managers/sync_manager/crawl.py DELETED Viewed

@@ -1,60 +0,0 @@
-import time
-from typing import Optional
-from ....models.crawl import (
-    CrawlJobResponse,
-    GetCrawlJobParams,
-    StartCrawlJobParams,
-    StartCrawlJobResponse,
-)
-from ....exceptions import HyperbrowserError
-class CrawlManager:
-    def __init__(self, client):
-        self._client = client
-    def start(self, params: StartCrawlJobParams) -> StartCrawlJobResponse:
-        response = self._client.transport.post(
-            self._client._build_url("/crawl"),
-            data=params.model_dump(exclude_none=True, by_alias=True),
-        )
-        return StartCrawlJobResponse(**response.data)
-    def get(
-        self, job_id: str, params: GetCrawlJobParams = GetCrawlJobParams()
-    ) -> CrawlJobResponse:
-        response = self._client.transport.get(
-            self._client._build_url(f"/crawl/{job_id}"), params=params.__dict__
-        )
-        return CrawlJobResponse(**response.data)
-    def start_and_wait(
-        self, params: StartCrawlJobParams, return_all_pages: bool = True
-    ) -> CrawlJobResponse:
-        job_start_resp = self.start(params)
-        if not job_start_resp.job_id:
-            raise HyperbrowserError("Failed to start crawl job")
-        job_response: CrawlJobResponse
-        while True:
-            job_response = self.get(job_start_resp.job_id)
-            if job_response.status == "completed" or job_response.status == "failed":
-                break
-            time.sleep(2)
-        if not return_all_pages:
-            return job_response
-        while job_response.current_page_batch < job_response.total_page_batches:
-            tmp_job_response = self.get(
-                job_start_resp.job_id,
-                GetCrawlJobParams(page=job_response.current_page_batch + 1),
-            )
-            if tmp_job_response.data:
-                job_response.data.extend(tmp_job_response.data)
-            job_response.current_page_batch = tmp_job_response.current_page_batch
-            job_response.total_crawled_pages = tmp_job_response.total_crawled_pages
-            job_response.total_page_batches = tmp_job_response.total_page_batches
-            job_response.batch_size = tmp_job_response.batch_size
-            time.sleep(0.5)
-        return job_response

hyperbrowser-0.23.0/hyperbrowser/client/managers/sync_manager/scrape.py DELETED Viewed

@@ -1,36 +0,0 @@
-import time
-from typing import Optional
-from ....models.scrape import (
-    ScrapeJobResponse,
-    StartScrapeJobParams,
-    StartScrapeJobResponse,
-)
-from ....exceptions import HyperbrowserError
-class ScrapeManager:
-    def __init__(self, client):
-        self._client = client
-    def start(self, params: StartScrapeJobParams) -> StartScrapeJobResponse:
-        response = self._client.transport.post(
-            self._client._build_url("/scrape"),
-            data=params.model_dump(exclude_none=True, by_alias=True),
-        )
-        return StartScrapeJobResponse(**response.data)
-    def get(self, job_id: str) -> ScrapeJobResponse:
-        response = self._client.transport.get(
-            self._client._build_url(f"/scrape/{job_id}")
-        )
-        return ScrapeJobResponse(**response.data)
-    def start_and_wait(self, params: StartScrapeJobParams) -> ScrapeJobResponse:
-        job_start_resp = self.start(params)
-        if not job_start_resp.job_id:
-            raise HyperbrowserError("Failed to start scrape job")
-        while True:
-            job_response = self.get(job_start_resp.job_id)
-            if job_response.status == "completed" or job_response.status == "failed":
-                return job_response
-            time.sleep(2)