hyperbrowser 0.33.0__py3-none-any.whl → 0.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hyperbrowser might be problematic. Click here for more details.

Files changed (32) hide show
  1. hyperbrowser/client/async_client.py +8 -8
  2. hyperbrowser/client/managers/async_manager/{beta/agents → agents}/__init__.py +3 -2
  3. hyperbrowser/client/managers/async_manager/{beta/agents → agents}/browser_use.py +5 -3
  4. hyperbrowser/client/managers/async_manager/crawl.py +30 -15
  5. hyperbrowser/client/managers/async_manager/extract.py +15 -7
  6. hyperbrowser/client/managers/async_manager/profile.py +2 -1
  7. hyperbrowser/client/managers/async_manager/scrape.py +42 -21
  8. hyperbrowser/client/managers/async_manager/session.py +2 -1
  9. hyperbrowser/client/managers/sync_manager/{beta/agents → agents}/__init__.py +3 -2
  10. hyperbrowser/client/managers/sync_manager/{beta/agents → agents}/browser_use.py +5 -3
  11. hyperbrowser/client/managers/sync_manager/crawl.py +31 -16
  12. hyperbrowser/client/managers/sync_manager/extract.py +15 -7
  13. hyperbrowser/client/managers/sync_manager/profile.py +2 -1
  14. hyperbrowser/client/managers/sync_manager/scrape.py +44 -23
  15. hyperbrowser/client/managers/sync_manager/session.py +2 -1
  16. hyperbrowser/client/sync.py +8 -8
  17. hyperbrowser/models/__init__.py +76 -67
  18. hyperbrowser/models/{beta/agents → agents}/browser_use.py +4 -2
  19. hyperbrowser/models/crawl.py +12 -0
  20. hyperbrowser/models/extract.py +12 -0
  21. hyperbrowser/models/scrape.py +24 -0
  22. hyperbrowser/tools/__init__.py +40 -0
  23. hyperbrowser/tools/anthropic.py +19 -1
  24. hyperbrowser/tools/openai.py +27 -1
  25. hyperbrowser/tools/schema.py +93 -21
  26. {hyperbrowser-0.33.0.dist-info → hyperbrowser-0.35.0.dist-info}/METADATA +2 -1
  27. hyperbrowser-0.35.0.dist-info/RECORD +42 -0
  28. hyperbrowser/client/managers/async_manager/beta/__init__.py +0 -6
  29. hyperbrowser/client/managers/sync_manager/beta/__init__.py +0 -6
  30. hyperbrowser-0.33.0.dist-info/RECORD +0 -44
  31. {hyperbrowser-0.33.0.dist-info → hyperbrowser-0.35.0.dist-info}/LICENSE +0 -0
  32. {hyperbrowser-0.33.0.dist-info → hyperbrowser-0.35.0.dist-info}/WHEEL +0 -0
@@ -40,7 +40,8 @@ class SessionManager:
40
40
  self, params: SessionListParams = SessionListParams()
41
41
  ) -> SessionListResponse:
42
42
  response = self._client.transport.get(
43
- self._client._build_url("/sessions"), params=params.__dict__
43
+ self._client._build_url("/sessions"),
44
+ params=params.model_dump(exclude_none=True, by_alias=True),
44
45
  )
45
46
  return SessionListResponse(**response.data)
46
47
 
@@ -1,15 +1,15 @@
1
1
  from typing import Optional
2
2
 
3
+ from ..config import ClientConfig
4
+ from ..transport.sync import SyncTransport
5
+ from .base import HyperbrowserBase
6
+ from .managers.sync_manager.agents import Agents
7
+ from .managers.sync_manager.crawl import CrawlManager
8
+ from .managers.sync_manager.extension import ExtensionManager
3
9
  from .managers.sync_manager.extract import ExtractManager
4
10
  from .managers.sync_manager.profile import ProfileManager
5
- from .managers.sync_manager.session import SessionManager
6
11
  from .managers.sync_manager.scrape import ScrapeManager
7
- from .managers.sync_manager.crawl import CrawlManager
8
- from .managers.sync_manager.beta import Beta
9
- from .managers.sync_manager.extension import ExtensionManager
10
- from .base import HyperbrowserBase
11
- from ..transport.sync import SyncTransport
12
- from ..config import ClientConfig
12
+ from .managers.sync_manager.session import SessionManager
13
13
 
14
14
 
15
15
  class Hyperbrowser(HyperbrowserBase):
@@ -30,7 +30,7 @@ class Hyperbrowser(HyperbrowserBase):
30
30
  self.extract = ExtractManager(self)
31
31
  self.profiles = ProfileManager(self)
32
32
  self.extensions = ExtensionManager(self)
33
- self.beta = Beta(self)
33
+ self.agents = Agents(self)
34
34
 
35
35
  def close(self) -> None:
36
36
  self.transport.close()
@@ -1,142 +1,151 @@
1
+ from .agents.browser_use import (
2
+ BrowserUseTaskData,
3
+ BrowserUseTaskResponse,
4
+ BrowserUseTaskStatusResponse,
5
+ StartBrowserUseTaskParams,
6
+ StartBrowserUseTaskResponse,
7
+ )
1
8
  from .consts import (
2
- ScrapeFormat,
3
- ScrapeWaitUntil,
4
- ScrapePageStatus,
5
- ScrapeScreenshotFormat,
6
- RecordingStatus,
7
- DownloadsStatus,
9
+ ISO639_1,
8
10
  POLLING_ATTEMPTS,
11
+ BrowserUseLlm,
9
12
  Country,
13
+ DownloadsStatus,
10
14
  OperatingSystem,
11
15
  Platform,
12
- ISO639_1,
16
+ RecordingStatus,
17
+ ScrapeFormat,
18
+ ScrapePageStatus,
19
+ ScrapeScreenshotFormat,
20
+ ScrapeWaitUntil,
13
21
  State,
14
- BrowserUseLlm,
15
22
  )
16
23
  from .crawl import (
24
+ CrawledPage,
25
+ CrawlJobResponse,
17
26
  CrawlJobStatus,
27
+ CrawlJobStatusResponse,
18
28
  CrawlPageStatus,
29
+ GetCrawlJobParams,
19
30
  StartCrawlJobParams,
20
31
  StartCrawlJobResponse,
21
- CrawledPage,
22
- GetCrawlJobParams,
23
- CrawlJobResponse,
24
32
  )
25
33
  from .extension import CreateExtensionParams, ExtensionResponse
26
34
  from .extract import (
35
+ ExtractJobResponse,
27
36
  ExtractJobStatus,
37
+ ExtractJobStatusResponse,
28
38
  StartExtractJobParams,
29
39
  StartExtractJobResponse,
30
- ExtractJobResponse,
31
40
  )
32
41
  from .profile import (
33
42
  CreateProfileResponse,
34
- ProfileResponse,
35
43
  ProfileListParams,
36
44
  ProfileListResponse,
45
+ ProfileResponse,
37
46
  )
38
47
  from .scrape import (
39
- ScrapeJobStatus,
40
- ScreenshotOptions,
41
- ScrapeOptions,
42
- StartScrapeJobParams,
43
- StartScrapeJobResponse,
48
+ BatchScrapeJobResponse,
49
+ BatchScrapeJobStatusResponse,
50
+ GetBatchScrapeJobParams,
51
+ ScrapedPage,
44
52
  ScrapeJobData,
45
53
  ScrapeJobResponse,
54
+ ScrapeJobStatus,
55
+ ScrapeJobStatusResponse,
56
+ ScrapeOptions,
57
+ ScreenshotOptions,
46
58
  StartBatchScrapeJobParams,
47
- ScrapedPage,
48
- GetBatchScrapeJobParams,
49
59
  StartBatchScrapeJobResponse,
50
- BatchScrapeJobResponse,
60
+ StartScrapeJobParams,
61
+ StartScrapeJobResponse,
51
62
  )
52
63
  from .session import (
53
- SessionStatus,
54
64
  BasicResponse,
65
+ CreateSessionParams,
66
+ CreateSessionProfile,
67
+ GetSessionDownloadsUrlResponse,
68
+ GetSessionRecordingUrlResponse,
69
+ ScreenConfig,
55
70
  Session,
56
71
  SessionDetail,
57
72
  SessionListParams,
58
73
  SessionListResponse,
59
- ScreenConfig,
60
- CreateSessionProfile,
61
- CreateSessionParams,
62
74
  SessionRecording,
63
- GetSessionRecordingUrlResponse,
64
- GetSessionDownloadsUrlResponse,
65
- )
66
- from .beta.agents.browser_use import (
67
- StartBrowserUseTaskParams,
68
- StartBrowserUseTaskResponse,
69
- BrowserUseTaskStatusResponse,
70
- BrowserUseTaskData,
71
- BrowserUseTaskResponse,
75
+ SessionStatus,
72
76
  )
73
77
 
74
78
  __all__ = [
75
79
  # consts
76
- "BrowserUseLlm",
77
- "ScrapeFormat",
78
- "ScrapeWaitUntil",
79
- "ScrapePageStatus",
80
- "ScrapeScreenshotFormat",
81
- "RecordingStatus",
82
- "DownloadsStatus",
80
+ "ISO639_1",
83
81
  "POLLING_ATTEMPTS",
82
+ "BrowserUseLlm",
84
83
  "Country",
84
+ "DownloadsStatus",
85
85
  "OperatingSystem",
86
86
  "Platform",
87
- "ISO639_1",
87
+ "RecordingStatus",
88
+ "ScrapeFormat",
89
+ "ScrapePageStatus",
90
+ "ScrapeScreenshotFormat",
91
+ "ScrapeWaitUntil",
88
92
  "State",
93
+ # agents
94
+ "BrowserUseTaskStatus",
95
+ "BrowserUseTaskData",
96
+ "BrowserUseTaskResponse",
97
+ "BrowserUseTaskStatusResponse",
98
+ "StartBrowserUseTaskParams",
99
+ "StartBrowserUseTaskResponse",
89
100
  # crawl
101
+ "CrawledPage",
102
+ "CrawlJobResponse",
90
103
  "CrawlJobStatus",
104
+ "CrawlJobStatusResponse",
91
105
  "CrawlPageStatus",
106
+ "GetCrawlJobParams",
92
107
  "StartCrawlJobParams",
93
108
  "StartCrawlJobResponse",
94
- "CrawledPage",
95
- "GetCrawlJobParams",
96
- "CrawlJobResponse",
97
109
  # extension
98
110
  "CreateExtensionParams",
99
111
  "ExtensionResponse",
100
112
  # extract
113
+ "ExtractJobResponse",
101
114
  "ExtractJobStatus",
115
+ "ExtractJobStatusResponse",
102
116
  "StartExtractJobParams",
103
117
  "StartExtractJobResponse",
104
- "ExtractJobResponse",
105
118
  # profile
106
119
  "CreateProfileResponse",
107
- "ProfileResponse",
108
120
  "ProfileListParams",
109
121
  "ProfileListResponse",
122
+ "ProfileResponse",
110
123
  # scrape
111
- "ScrapeJobStatus",
112
- "ScreenshotOptions",
113
- "ScrapeOptions",
114
- "StartScrapeJobParams",
115
- "StartScrapeJobResponse",
124
+ "BatchScrapeJobResponse",
125
+ "BatchScrapeJobStatusResponse",
126
+ "GetBatchScrapeJobParams",
127
+ "ScrapedPage",
116
128
  "ScrapeJobData",
117
129
  "ScrapeJobResponse",
130
+ "ScrapeJobStatus",
131
+ "ScrapeJobStatusResponse",
132
+ "ScrapeOptions",
133
+ "ScreenshotOptions",
118
134
  "StartBatchScrapeJobParams",
119
- "ScrapedPage",
120
- "GetBatchScrapeJobParams",
121
135
  "StartBatchScrapeJobResponse",
122
- "BatchScrapeJobResponse",
136
+ "StartScrapeJobParams",
137
+ "StartScrapeJobResponse",
123
138
  # session
124
- "SessionStatus",
125
139
  "BasicResponse",
140
+ "CreateSessionParams",
141
+ "CreateSessionProfile",
142
+ "GetSessionDownloadsUrlResponse",
143
+ "GetSessionRecordingUrlResponse",
144
+ "ScreenConfig",
126
145
  "Session",
127
146
  "SessionDetail",
128
147
  "SessionListParams",
129
148
  "SessionListResponse",
130
- "ScreenConfig",
131
- "CreateSessionProfile",
132
- "CreateSessionParams",
133
149
  "SessionRecording",
134
- "GetSessionRecordingUrlResponse",
135
- "GetSessionDownloadsUrlResponse",
136
- # agents
137
- "StartBrowserUseTaskParams",
138
- "StartBrowserUseTaskResponse",
139
- "BrowserUseTaskStatusResponse",
140
- "BrowserUseTaskData",
141
- "BrowserUseTaskResponse",
150
+ "SessionStatus",
142
151
  ]
@@ -1,8 +1,9 @@
1
1
  from typing import Literal, Optional, Union
2
+
2
3
  from pydantic import BaseModel, ConfigDict, Field
3
4
 
4
- from ...consts import BrowserUseLlm
5
- from ...session import CreateSessionParams
5
+ from ..consts import BrowserUseLlm
6
+ from ..session import CreateSessionParams
6
7
 
7
8
  BrowserUseTaskStatus = Literal["pending", "running", "completed", "failed", "stopped"]
8
9
 
@@ -182,3 +183,4 @@ class BrowserUseTaskResponse(BaseModel):
182
183
  data: Optional[BrowserUseTaskData] = Field(default=None, alias="data")
183
184
  error: Optional[str] = Field(default=None, alias="error")
184
185
  live_url: Optional[str] = Field(default=None, alias="liveUrl")
186
+ live_url: Optional[str] = Field(default=None, alias="liveUrl")
@@ -47,6 +47,18 @@ class StartCrawlJobResponse(BaseModel):
47
47
  job_id: str = Field(alias="jobId")
48
48
 
49
49
 
50
+ class CrawlJobStatusResponse(BaseModel):
51
+ """
52
+ Response from getting the status of a crawl job.
53
+ """
54
+
55
+ model_config = ConfigDict(
56
+ populate_by_alias=True,
57
+ )
58
+
59
+ status: CrawlJobStatus
60
+
61
+
50
62
  class CrawledPage(BaseModel):
51
63
  """
52
64
  Data from a crawled page.
@@ -43,6 +43,18 @@ class StartExtractJobResponse(BaseModel):
43
43
  job_id: str = Field(alias="jobId")
44
44
 
45
45
 
46
+ class ExtractJobStatusResponse(BaseModel):
47
+ """
48
+ Response from getting the status of a extract job.
49
+ """
50
+
51
+ model_config = ConfigDict(
52
+ populate_by_alias=True,
53
+ )
54
+
55
+ status: ExtractJobStatus
56
+
57
+
46
58
  class ExtractJobResponse(BaseModel):
47
59
  """
48
60
  Response from a extract job.
@@ -78,6 +78,18 @@ class StartScrapeJobResponse(BaseModel):
78
78
  job_id: str = Field(alias="jobId")
79
79
 
80
80
 
81
+ class ScrapeJobStatusResponse(BaseModel):
82
+ """
83
+ Response from getting the status of a scrape job.
84
+ """
85
+
86
+ model_config = ConfigDict(
87
+ populate_by_alias=True,
88
+ )
89
+
90
+ status: ScrapeJobStatus
91
+
92
+
81
93
  class ScrapeJobData(BaseModel):
82
94
  """
83
95
  Data from a scraped site.
@@ -119,6 +131,18 @@ class StartBatchScrapeJobParams(BaseModel):
119
131
  )
120
132
 
121
133
 
134
+ class BatchScrapeJobStatusResponse(BaseModel):
135
+ """
136
+ Response from getting the status of a batch scrape job.
137
+ """
138
+
139
+ model_config = ConfigDict(
140
+ populate_by_alias=True,
141
+ )
142
+
143
+ status: ScrapeJobStatus
144
+
145
+
122
146
  class ScrapedPage(BaseModel):
123
147
  """
124
148
  A scraped page.
@@ -1,17 +1,22 @@
1
1
  import json
2
+ from hyperbrowser.models.agents.browser_use import StartBrowserUseTaskParams
2
3
  from hyperbrowser.models.crawl import StartCrawlJobParams
3
4
  from hyperbrowser.models.extract import StartExtractJobParams
4
5
  from hyperbrowser.models.scrape import StartScrapeJobParams
5
6
  from hyperbrowser import Hyperbrowser, AsyncHyperbrowser
6
7
 
7
8
  from .openai import (
9
+ BROWSER_USE_TOOL_OPENAI,
8
10
  EXTRACT_TOOL_OPENAI,
9
11
  SCRAPE_TOOL_OPENAI,
12
+ SCREENSHOT_TOOL_OPENAI,
10
13
  CRAWL_TOOL_OPENAI,
11
14
  )
12
15
  from .anthropic import (
16
+ BROWSER_USE_TOOL_ANTHROPIC,
13
17
  EXTRACT_TOOL_ANTHROPIC,
14
18
  SCRAPE_TOOL_ANTHROPIC,
19
+ SCREENSHOT_TOOL_ANTHROPIC,
15
20
  CRAWL_TOOL_ANTHROPIC,
16
21
  )
17
22
 
@@ -31,6 +36,21 @@ class WebsiteScrapeTool:
31
36
  return resp.data.markdown if resp.data and resp.data.markdown else ""
32
37
 
33
38
 
39
+ class WebsiteScreenshotTool:
40
+ openai_tool_definition = SCREENSHOT_TOOL_OPENAI
41
+ anthropic_tool_definition = SCREENSHOT_TOOL_ANTHROPIC
42
+
43
+ @staticmethod
44
+ def runnable(hb: Hyperbrowser, params: dict) -> str:
45
+ resp = hb.scrape.start_and_wait(params=StartScrapeJobParams(**params))
46
+ return resp.data.screenshot if resp.data and resp.data.screenshot else ""
47
+
48
+ @staticmethod
49
+ async def async_runnable(hb: AsyncHyperbrowser, params: dict) -> str:
50
+ resp = await hb.scrape.start_and_wait(params=StartScrapeJobParams(**params))
51
+ return resp.data.screenshot if resp.data and resp.data.screenshot else ""
52
+
53
+
34
54
  class WebsiteCrawlTool:
35
55
  openai_tool_definition = CRAWL_TOOL_OPENAI
36
56
  anthropic_tool_definition = CRAWL_TOOL_ANTHROPIC
@@ -79,8 +99,28 @@ class WebsiteExtractTool:
79
99
  return json.dumps(resp.data) if resp.data else ""
80
100
 
81
101
 
102
+ class BrowserUseTool:
103
+ openai_tool_definition = BROWSER_USE_TOOL_OPENAI
104
+ anthropic_tool_definition = BROWSER_USE_TOOL_ANTHROPIC
105
+
106
+ @staticmethod
107
+ def runnable(hb: Hyperbrowser, params: dict) -> str:
108
+ resp = hb.agents.browser_use.start_and_wait(
109
+ params=StartBrowserUseTaskParams(**params)
110
+ )
111
+ return resp.data.final_result if resp.data and resp.data.final_result else ""
112
+
113
+ @staticmethod
114
+ async def async_runnable(hb: AsyncHyperbrowser, params: dict) -> str:
115
+ resp = await hb.agents.browser_use.start_and_wait(
116
+ params=StartBrowserUseTaskParams(**params)
117
+ )
118
+ return resp.data.final_result if resp.data and resp.data.final_result else ""
119
+
120
+
82
121
  __all__ = [
83
122
  "WebsiteScrapeTool",
84
123
  "WebsiteCrawlTool",
85
124
  "WebsiteExtractTool",
125
+ "BrowserUseTool",
86
126
  ]
@@ -1,7 +1,13 @@
1
1
  from typing import Dict, Union, Optional
2
2
  from typing_extensions import Literal, Required, TypeAlias, TypedDict
3
3
 
4
- from hyperbrowser.tools.schema import CRAWL_SCHEMA, EXTRACT_SCHEMA, SCRAPE_SCHEMA
4
+ from hyperbrowser.tools.schema import (
5
+ BROWSER_USE_SCHEMA,
6
+ CRAWL_SCHEMA,
7
+ EXTRACT_SCHEMA,
8
+ SCRAPE_SCHEMA,
9
+ SCREENSHOT_SCHEMA,
10
+ )
5
11
 
6
12
 
7
13
  class CacheControlEphemeralParam(TypedDict, total=False):
@@ -49,6 +55,12 @@ SCRAPE_TOOL_ANTHROPIC: ToolParam = {
49
55
  "description": "Scrape content from a webpage and return the content in markdown format",
50
56
  }
51
57
 
58
+ SCREENSHOT_TOOL_ANTHROPIC: ToolParam = {
59
+ "input_schema": SCREENSHOT_SCHEMA,
60
+ "name": "screenshot_webpage",
61
+ "description": "Scrape content from a webpage and return the content in screenshot format",
62
+ }
63
+
52
64
  CRAWL_TOOL_ANTHROPIC: ToolParam = {
53
65
  "input_schema": CRAWL_SCHEMA,
54
66
  "name": "crawl_website",
@@ -60,3 +72,9 @@ EXTRACT_TOOL_ANTHROPIC: ToolParam = {
60
72
  "name": "extract_data",
61
73
  "description": "Extract data in a structured format from multiple URLs in a single function call. IMPORTANT: When information must be gathered from multiple sources (such as comparing items, researching topics across sites, or answering questions that span multiple webpages), ALWAYS include all relevant URLs in ONE function call. This enables comprehensive answers with cross-referenced information. Returns data as a json string.",
62
74
  }
75
+
76
+ BROWSER_USE_TOOL_ANTHROPIC: ToolParam = {
77
+ "input_schema": BROWSER_USE_SCHEMA,
78
+ "name": "browser_use",
79
+ "description": "Have an AI agent use a browser to perform a task on the web.",
80
+ }
@@ -1,7 +1,13 @@
1
1
  from typing import Dict, Optional
2
2
  from typing_extensions import Literal, Required, TypedDict, TypeAlias
3
3
 
4
- from hyperbrowser.tools.schema import CRAWL_SCHEMA, EXTRACT_SCHEMA, SCRAPE_SCHEMA
4
+ from hyperbrowser.tools.schema import (
5
+ BROWSER_USE_SCHEMA,
6
+ CRAWL_SCHEMA,
7
+ EXTRACT_SCHEMA,
8
+ SCRAPE_SCHEMA,
9
+ SCREENSHOT_SCHEMA,
10
+ )
5
11
 
6
12
  FunctionParameters: TypeAlias = Dict[str, object]
7
13
 
@@ -58,6 +64,16 @@ SCRAPE_TOOL_OPENAI: ChatCompletionToolParam = {
58
64
  },
59
65
  }
60
66
 
67
+ SCREENSHOT_TOOL_OPENAI: ChatCompletionToolParam = {
68
+ "type": "function",
69
+ "function": {
70
+ "name": "screenshot_webpage",
71
+ "description": "Scrape content from a webpage and return the content in screenshot format",
72
+ "parameters": SCREENSHOT_SCHEMA,
73
+ "strict": True,
74
+ },
75
+ }
76
+
61
77
  CRAWL_TOOL_OPENAI: ChatCompletionToolParam = {
62
78
  "type": "function",
63
79
  "function": {
@@ -77,3 +93,13 @@ EXTRACT_TOOL_OPENAI: ChatCompletionToolParam = {
77
93
  "strict": True,
78
94
  },
79
95
  }
96
+
97
+ BROWSER_USE_TOOL_OPENAI: ChatCompletionToolParam = {
98
+ "type": "function",
99
+ "function": {
100
+ "name": "browser_use",
101
+ "description": "Have an AI agent use a browser to perform a task on the web.",
102
+ "parameters": BROWSER_USE_SCHEMA,
103
+ "strict": True,
104
+ },
105
+ }
@@ -1,38 +1,63 @@
1
- SCRAPE_OPTIONS = {
2
- "type": "object",
3
- "description": "The options for the scrape",
4
- "properties": {
5
- "include_tags": {
6
- "type": "array",
7
- "items": {
1
+ from typing import Literal, List
2
+
3
+ scrape_types = Literal["markdown", "screenshot"]
4
+
5
+
6
+ def get_scrape_options(formats: List[scrape_types] = ["markdown"]):
7
+ return {
8
+ "type": "object",
9
+ "description": "The options for the scrape",
10
+ "properties": {
11
+ "format": {
8
12
  "type": "string",
13
+ "description": "The format of the content to scrape",
14
+ "enum": formats,
9
15
  },
10
- "description": "An array of HTML tags, classes, or IDs to include in the scraped content. Only elements matching these selectors will be returned.",
11
- },
12
- "exclude_tags": {
13
- "type": "array",
14
- "items": {
15
- "type": "string",
16
+ "include_tags": {
17
+ "type": "array",
18
+ "items": {
19
+ "type": "string",
20
+ },
21
+ "description": "An array of HTML tags, classes, or IDs to include in the scraped content. Only elements matching these selectors will be returned.",
22
+ },
23
+ "exclude_tags": {
24
+ "type": "array",
25
+ "items": {
26
+ "type": "string",
27
+ },
28
+ "description": "An array of HTML tags, classes, or IDs to exclude from the scraped content. Elements matching these selectors will be omitted from the response.",
29
+ },
30
+ "only_main_content": {
31
+ "type": "boolean",
32
+ "description": "Whether to only return the main content of the page. If true, only the main content of the page will be returned, excluding any headers, navigation menus,footers, or other non-main content.",
16
33
  },
17
- "description": "An array of HTML tags, classes, or IDs to exclude from the scraped content. Elements matching these selectors will be omitted from the response.",
18
34
  },
19
- "only_main_content": {
20
- "type": "boolean",
21
- "description": "Whether to only return the main content of the page. If true, only the main content of the page will be returned, excluding any headers, navigation menus,footers, or other non-main content.",
35
+ "required": ["include_tags", "exclude_tags", "only_main_content", "format"],
36
+ "additionalProperties": False,
37
+ }
38
+
39
+
40
+ SCRAPE_SCHEMA = {
41
+ "type": "object",
42
+ "properties": {
43
+ "url": {
44
+ "type": "string",
45
+ "description": "The URL of the website to scrape",
22
46
  },
47
+ "scrape_options": get_scrape_options(),
23
48
  },
24
- "required": ["include_tags", "exclude_tags", "only_main_content"],
49
+ "required": ["url", "scrape_options"],
25
50
  "additionalProperties": False,
26
51
  }
27
52
 
28
- SCRAPE_SCHEMA = {
53
+ SCREENSHOT_SCHEMA = {
29
54
  "type": "object",
30
55
  "properties": {
31
56
  "url": {
32
57
  "type": "string",
33
58
  "description": "The URL of the website to scrape",
34
59
  },
35
- "scrape_options": SCRAPE_OPTIONS,
60
+ "scrape_options": get_scrape_options(["screenshot"]),
36
61
  },
37
62
  "required": ["url", "scrape_options"],
38
63
  "additionalProperties": False,
@@ -71,7 +96,7 @@ CRAWL_SCHEMA = {
71
96
  },
72
97
  "description": "An array of regular expressions or wildcard patterns specifying which URLs should be included in the crawl. Only pages whose URLs' path match one of these path patterns will be visited. Example: ['/admin', '/careers/*']",
73
98
  },
74
- "scrape_options": SCRAPE_OPTIONS,
99
+ "scrape_options": get_scrape_options(),
75
100
  },
76
101
  "required": [
77
102
  "url",
@@ -111,3 +136,50 @@ EXTRACT_SCHEMA = {
111
136
  "required": ["urls", "prompt", "schema", "max_links"],
112
137
  "additionalProperties": False,
113
138
  }
139
+
140
+ BROWSER_USE_LLM_SCHEMA = {
141
+ "type": "string",
142
+ "enum": [
143
+ "gpt-4o",
144
+ "gpt-4o-mini",
145
+ "claude-3-7-sonnet-20250219",
146
+ "claude-3-5-sonnet-20241022",
147
+ "claude-3-5-haiku-20241022",
148
+ "gemini-2.0-flash",
149
+ ],
150
+ "default": "gemini-2.0-flash",
151
+ }
152
+
153
+ BROWSER_USE_SCHEMA = {
154
+ "type": "object",
155
+ "properties": {
156
+ "task": {
157
+ "type": "string",
158
+ "description": "The text description of the task to be performed by the agent.",
159
+ },
160
+ "llm": {
161
+ **BROWSER_USE_LLM_SCHEMA,
162
+ "description": "The language model (LLM) instance to use for generating actions. Default to gemini-2.0-flash.",
163
+ },
164
+ "planner_llm": {
165
+ **BROWSER_USE_LLM_SCHEMA,
166
+ "description": "The language model to use specifically for planning future actions, can differ from the main LLM. Default to gemini-2.0-flash.",
167
+ },
168
+ "page_extraction_llm": {
169
+ **BROWSER_USE_LLM_SCHEMA,
170
+ "description": "The language model to use for extracting structured data from webpages. Default to gemini-2.0-flash.",
171
+ },
172
+ "keep_browser_open": {
173
+ "type": "boolean",
174
+ "description": "When enabled, keeps the browser session open after task completion.",
175
+ },
176
+ },
177
+ "required": [
178
+ "task",
179
+ "llm",
180
+ "planner_llm",
181
+ "page_extraction_llm",
182
+ "keep_browser_open",
183
+ ],
184
+ "additionalProperties": False,
185
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hyperbrowser
3
- Version: 0.33.0
3
+ Version: 0.35.0
4
4
  Summary: Python SDK for hyperbrowser
5
5
  License: MIT
6
6
  Author: Nikhil Shahi
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
16
  Classifier: Programming Language :: Python :: 3.13
17
17
  Requires-Dist: httpx (>=0.23.0,<1)
18
+ Requires-Dist: jsonref (>=1.1.0)
18
19
  Requires-Dist: pydantic (>=2.0,<3)
19
20
  Project-URL: Homepage, https://github.com/hyperbrowserai/python-sdk
20
21
  Project-URL: Repository, https://github.com/hyperbrowserai/python-sdk