hyperbrowser 0.32.0__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hyperbrowser might be problematic. Click here for more details.

@@ -1,12 +1,16 @@
1
+ import json
1
2
  from hyperbrowser.models.crawl import StartCrawlJobParams
3
+ from hyperbrowser.models.extract import StartExtractJobParams
2
4
  from hyperbrowser.models.scrape import StartScrapeJobParams
3
5
  from hyperbrowser import Hyperbrowser, AsyncHyperbrowser
4
6
 
5
7
  from .openai import (
8
+ EXTRACT_TOOL_OPENAI,
6
9
  SCRAPE_TOOL_OPENAI,
7
10
  CRAWL_TOOL_OPENAI,
8
11
  )
9
12
  from .anthropic import (
13
+ EXTRACT_TOOL_ANTHROPIC,
10
14
  SCRAPE_TOOL_ANTHROPIC,
11
15
  CRAWL_TOOL_ANTHROPIC,
12
16
  )
@@ -56,7 +60,27 @@ class WebsiteCrawlTool:
56
60
  return markdown
57
61
 
58
62
 
63
+ class WebsiteExtractTool:
64
+ openai_tool_definition = EXTRACT_TOOL_OPENAI
65
+ anthropic_tool_definition = EXTRACT_TOOL_ANTHROPIC
66
+
67
+ @staticmethod
68
+ def runnable(hb: Hyperbrowser, params: dict) -> str:
69
+ if params.get("schema") and isinstance(params.get("schema"), str):
70
+ params["schema"] = json.loads(params["schema"])
71
+ resp = hb.extract.start_and_wait(params=StartExtractJobParams(**params))
72
+ return json.dumps(resp.data) if resp.data else ""
73
+
74
+ @staticmethod
75
+ async def async_runnable(hb: AsyncHyperbrowser, params: dict) -> str:
76
+ if params.get("schema") and isinstance(params.get("schema"), str):
77
+ params["schema"] = json.loads(params["schema"])
78
+ resp = await hb.extract.start_and_wait(params=StartExtractJobParams(**params))
79
+ return json.dumps(resp.data) if resp.data else ""
80
+
81
+
59
82
  __all__ = [
60
83
  "WebsiteScrapeTool",
61
84
  "WebsiteCrawlTool",
85
+ "WebsiteExtractTool",
62
86
  ]
@@ -1,7 +1,7 @@
1
1
  from typing import Dict, Union, Optional
2
2
  from typing_extensions import Literal, Required, TypeAlias, TypedDict
3
3
 
4
- from hyperbrowser.tools.schema import CRAWL_SCHEMA, SCRAPE_SCHEMA
4
+ from hyperbrowser.tools.schema import CRAWL_SCHEMA, EXTRACT_SCHEMA, SCRAPE_SCHEMA
5
5
 
6
6
 
7
7
  class CacheControlEphemeralParam(TypedDict, total=False):
@@ -54,3 +54,9 @@ CRAWL_TOOL_ANTHROPIC: ToolParam = {
54
54
  "name": "crawl_website",
55
55
  "description": "Crawl a website and return the content in markdown format",
56
56
  }
57
+
58
+ EXTRACT_TOOL_ANTHROPIC: ToolParam = {
59
+ "input_schema": EXTRACT_SCHEMA,
60
+ "name": "extract_data",
61
+ "description": "Extract data in a structured format from multiple URLs in a single function call. IMPORTANT: When information must be gathered from multiple sources (such as comparing items, researching topics across sites, or answering questions that span multiple webpages), ALWAYS include all relevant URLs in ONE function call. This enables comprehensive answers with cross-referenced information. Returns data as a json string.",
62
+ }
@@ -1,7 +1,7 @@
1
1
  from typing import Dict, Optional
2
2
  from typing_extensions import Literal, Required, TypedDict, TypeAlias
3
3
 
4
- from hyperbrowser.tools.schema import CRAWL_SCHEMA, SCRAPE_SCHEMA
4
+ from hyperbrowser.tools.schema import CRAWL_SCHEMA, EXTRACT_SCHEMA, SCRAPE_SCHEMA
5
5
 
6
6
  FunctionParameters: TypeAlias = Dict[str, object]
7
7
 
@@ -67,3 +67,13 @@ CRAWL_TOOL_OPENAI: ChatCompletionToolParam = {
67
67
  "strict": True,
68
68
  },
69
69
  }
70
+
71
+ EXTRACT_TOOL_OPENAI: ChatCompletionToolParam = {
72
+ "type": "function",
73
+ "function": {
74
+ "name": "extract_data",
75
+ "description": "Extract data in a structured format from multiple URLs in a single function call. IMPORTANT: When information must be gathered from multiple sources (such as comparing items, researching topics across sites, or answering questions that span multiple webpages), ALWAYS include all relevant URLs in ONE function call. This enables comprehensive answers with cross-referenced information. Returns data as a json string.",
76
+ "parameters": EXTRACT_SCHEMA,
77
+ "strict": True,
78
+ },
79
+ }
@@ -84,3 +84,30 @@ CRAWL_SCHEMA = {
84
84
  ],
85
85
  "additionalProperties": False,
86
86
  }
87
+
88
+ EXTRACT_SCHEMA = {
89
+ "type": "object",
90
+ "properties": {
91
+ "urls": {
92
+ "type": "array",
93
+ "items": {
94
+ "type": "string",
95
+ },
96
+ "description": "A required list of up to 10 urls you want to process IN A SINGLE EXTRACTION. When answering questions that involve multiple sources or topics, ALWAYS include ALL relevant URLs in this single array rather than making separate function calls. This enables cross-referencing information across multiple sources to provide comprehensive answers. To allow crawling for any of the urls provided in the list, simply add /* to the end of the url (https://hyperbrowser.ai/*). This will crawl other pages on the site with the same origin and find relevant pages to use for the extraction context.",
97
+ },
98
+ "prompt": {
99
+ "type": "string",
100
+ "description": "A prompt describing how you want the data structured, or what you want to extract from the urls provided. Can also be used to guide the extraction process. For multi-source queries, structure this prompt to request unified, comparative, or aggregated information across all provided URLs.",
101
+ },
102
+ "schema": {
103
+ "type": "string",
104
+ "description": "A strict json schema you want the returned data to be structured as. For multi-source extraction, design this schema to accommodate information from all URLs in a single structure. Ensure that this is a proper json schema, and the root level should be of type 'object'.",
105
+ },
106
+ "max_links": {
107
+ "type": "number",
108
+ "description": "The maximum number of links to look for if performing a crawl for any given url in the urls list.",
109
+ },
110
+ },
111
+ "required": ["urls", "prompt", "schema", "max_links"],
112
+ "additionalProperties": False,
113
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hyperbrowser
3
- Version: 0.32.0
3
+ Version: 0.33.0
4
4
  Summary: Python SDK for hyperbrowser
5
5
  License: MIT
6
6
  Author: Nikhil Shahi
@@ -31,14 +31,14 @@ hyperbrowser/models/extract.py,sha256=kkcM7DXkwnFJ7d_JZMDAb4VAlQdh-snJktFpJs92z8
31
31
  hyperbrowser/models/profile.py,sha256=KRb_LNxxW00AsD_thzzthFS51vInJawt1RcoNz4Q9i8,1322
32
32
  hyperbrowser/models/scrape.py,sha256=ZdVNN4rh7EKLh-O1Ou5aU_OgTNxC9pFQlkG-Q2fR-Z4,4611
33
33
  hyperbrowser/models/session.py,sha256=i1NkrQWNlKziDd98ySdrUUH7XSv6qOa2cmiK5vV7VeI,6730
34
- hyperbrowser/tools/__init__.py,sha256=OUaTUM-kiigYmzfbpx3XQhzMK1xT1wd8cqXgR4znsAY,2021
35
- hyperbrowser/tools/anthropic.py,sha256=5pEkJm1H-26GToTwXsDjo4GGqVy1hATws4Pg59mumow,1667
36
- hyperbrowser/tools/openai.py,sha256=4-71IIWSxc_ByhywcfWj9-QI9iYNEe0xO6B2spE8WG0,2200
37
- hyperbrowser/tools/schema.py,sha256=cR2MUX8TvUyN8TnCyeX0pccp4AmPjrdaKzuAXRThOJo,3075
34
+ hyperbrowser/tools/__init__.py,sha256=Wkxk3PKjXs8A2-oN1fDlv5C5nggVznxSITaDIuZbWK4,3029
35
+ hyperbrowser/tools/anthropic.py,sha256=Ij-DcyGwmV9fYhKyZcuEZijr3A5Pr6Tetxh6TMJEZls,2227
36
+ hyperbrowser/tools/openai.py,sha256=yb1BtTNUnkN3_hcsWQDiHwHg8vA4126klEo2R1M1mYc,2854
37
+ hyperbrowser/tools/schema.py,sha256=UsirWHaRpDIlc1yusI_l26yizzEtQugmrOlovsCeC6E,4920
38
38
  hyperbrowser/transport/async_transport.py,sha256=6HKoeM5TutIqraEscEWobvSPWF3iVKh2hPflGNKwykw,4128
39
39
  hyperbrowser/transport/base.py,sha256=ildpMrDiM8nvrSGrH2LTOafmB17T7PQB_NQ1ODA378U,1703
40
40
  hyperbrowser/transport/sync.py,sha256=aUVpxWF8sqSycLNKxVNEZvlsZSoqc1eHgPK1Y1QA1u8,3422
41
- hyperbrowser-0.32.0.dist-info/LICENSE,sha256=6rUGKlyKb_1ZAH7h7YITYAAUNFN3MNGGKCyfrw49NLE,1071
42
- hyperbrowser-0.32.0.dist-info/METADATA,sha256=rxEInDkCZjDhAMsDvmHUe-VwXSEBimHmfhxAF6XrHQQ,3438
43
- hyperbrowser-0.32.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
44
- hyperbrowser-0.32.0.dist-info/RECORD,,
41
+ hyperbrowser-0.33.0.dist-info/LICENSE,sha256=6rUGKlyKb_1ZAH7h7YITYAAUNFN3MNGGKCyfrw49NLE,1071
42
+ hyperbrowser-0.33.0.dist-info/METADATA,sha256=twHJeCFloGz_L7USASXaAuHXNXLLjVVPZSXs6Zkrq7o,3438
43
+ hyperbrowser-0.33.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
44
+ hyperbrowser-0.33.0.dist-info/RECORD,,