@hyperbrowser/sdk 0.33.0 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/tools/anthropic.d.ts +1 -0
- package/dist/tools/anthropic.js +6 -1
- package/dist/tools/index.d.ts +6 -0
- package/dist/tools/index.js +13 -1
- package/dist/tools/openai.d.ts +1 -0
- package/dist/tools/openai.js +10 -1
- package/dist/tools/schema.d.ts +42 -16
- package/dist/tools/schema.js +45 -19
- package/dist/types/index.d.ts +3 -1
- package/package.json +1 -1
package/dist/tools/anthropic.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CRAWL_TOOL_ANTHROPIC = exports.SCRAPE_TOOL_ANTHROPIC = void 0;
|
|
3
|
+
exports.EXTRACT_TOOL_ANTHROPIC = exports.CRAWL_TOOL_ANTHROPIC = exports.SCRAPE_TOOL_ANTHROPIC = void 0;
|
|
4
4
|
const schema_1 = require("./schema");
|
|
5
5
|
exports.SCRAPE_TOOL_ANTHROPIC = {
|
|
6
6
|
input_schema: schema_1.SCRAPE_SCHEMA,
|
|
@@ -12,3 +12,8 @@ exports.CRAWL_TOOL_ANTHROPIC = {
|
|
|
12
12
|
name: "crawl_website",
|
|
13
13
|
description: "Crawl a website and return the content in markdown format",
|
|
14
14
|
};
|
|
15
|
+
exports.EXTRACT_TOOL_ANTHROPIC = {
|
|
16
|
+
input_schema: schema_1.EXTRACT_SCHEMA,
|
|
17
|
+
name: "extract_data",
|
|
18
|
+
description: "Extract data in a structured format from multiple URLs in a single function call. IMPORTANT: When information must be gathered from multiple sources (such as comparing items, researching topics across sites, or answering questions that span multiple webpages), ALWAYS include all relevant URLs in ONE function call. This enables comprehensive answers with cross-referenced information. Returns data as a json string.",
|
|
19
|
+
};
|
package/dist/tools/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { HyperbrowserClient } from "../client";
|
|
2
2
|
import { StartScrapeJobParams, StartCrawlJobParams } from "../types";
|
|
3
|
+
import { StartExtractJobParams } from "../types/extract";
|
|
3
4
|
export declare class WebsiteScrapeTool {
|
|
4
5
|
static openaiToolDefinition: import("./openai").ChatCompletionTool;
|
|
5
6
|
static anthropicToolDefinition: import("./anthropic").Tool;
|
|
@@ -10,3 +11,8 @@ export declare class WebsiteCrawlTool {
|
|
|
10
11
|
static anthropicToolDefinition: import("./anthropic").Tool;
|
|
11
12
|
static runnable(hb: HyperbrowserClient, params: StartCrawlJobParams): Promise<string>;
|
|
12
13
|
}
|
|
14
|
+
export declare class WebsiteExtractTool {
|
|
15
|
+
static openaiToolDefinition: import("./openai").ChatCompletionTool;
|
|
16
|
+
static anthropicToolDefinition: import("./anthropic").Tool;
|
|
17
|
+
static runnable(hb: HyperbrowserClient, params: StartExtractJobParams): Promise<string>;
|
|
18
|
+
}
|
package/dist/tools/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.WebsiteCrawlTool = exports.WebsiteScrapeTool = void 0;
|
|
3
|
+
exports.WebsiteExtractTool = exports.WebsiteCrawlTool = exports.WebsiteScrapeTool = void 0;
|
|
4
4
|
const openai_1 = require("./openai");
|
|
5
5
|
const anthropic_1 = require("./anthropic");
|
|
6
6
|
class WebsiteScrapeTool {
|
|
@@ -29,3 +29,15 @@ class WebsiteCrawlTool {
|
|
|
29
29
|
exports.WebsiteCrawlTool = WebsiteCrawlTool;
|
|
30
30
|
WebsiteCrawlTool.openaiToolDefinition = openai_1.CRAWL_TOOL_OPENAI;
|
|
31
31
|
WebsiteCrawlTool.anthropicToolDefinition = anthropic_1.CRAWL_TOOL_ANTHROPIC;
|
|
32
|
+
class WebsiteExtractTool {
|
|
33
|
+
static async runnable(hb, params) {
|
|
34
|
+
if (params.schema && typeof params.schema === "string") {
|
|
35
|
+
params.schema = JSON.parse(params.schema);
|
|
36
|
+
}
|
|
37
|
+
const resp = await hb.extract.startAndWait(params);
|
|
38
|
+
return resp.data ? JSON.stringify(resp.data) : "";
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
exports.WebsiteExtractTool = WebsiteExtractTool;
|
|
42
|
+
WebsiteExtractTool.openaiToolDefinition = openai_1.EXTRACT_TOOL_OPENAI;
|
|
43
|
+
WebsiteExtractTool.anthropicToolDefinition = anthropic_1.EXTRACT_TOOL_ANTHROPIC;
|
package/dist/tools/openai.d.ts
CHANGED
package/dist/tools/openai.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CRAWL_TOOL_OPENAI = exports.SCRAPE_TOOL_OPENAI = void 0;
|
|
3
|
+
exports.EXTRACT_TOOL_OPENAI = exports.CRAWL_TOOL_OPENAI = exports.SCRAPE_TOOL_OPENAI = void 0;
|
|
4
4
|
const schema_1 = require("./schema");
|
|
5
5
|
exports.SCRAPE_TOOL_OPENAI = {
|
|
6
6
|
type: "function",
|
|
@@ -20,3 +20,12 @@ exports.CRAWL_TOOL_OPENAI = {
|
|
|
20
20
|
strict: true,
|
|
21
21
|
},
|
|
22
22
|
};
|
|
23
|
+
exports.EXTRACT_TOOL_OPENAI = {
|
|
24
|
+
type: "function",
|
|
25
|
+
function: {
|
|
26
|
+
name: "extract_data",
|
|
27
|
+
description: "Extract data in a structured format from multiple URLs in a single function call. IMPORTANT: When information must be gathered from multiple sources (such as comparing items, researching topics across sites, or answering questions that span multiple webpages), ALWAYS include all relevant URLs in ONE function call. This enables comprehensive answers with cross-referenced information. Returns data as a json string.",
|
|
28
|
+
parameters: schema_1.EXTRACT_SCHEMA,
|
|
29
|
+
strict: true,
|
|
30
|
+
},
|
|
31
|
+
};
|
package/dist/tools/schema.d.ts
CHANGED
|
@@ -2,21 +2,21 @@ export declare const SCRAPE_OPTIONS: {
|
|
|
2
2
|
type: string;
|
|
3
3
|
description: string;
|
|
4
4
|
properties: {
|
|
5
|
-
|
|
5
|
+
includeTags: {
|
|
6
6
|
type: string;
|
|
7
7
|
items: {
|
|
8
8
|
type: string;
|
|
9
9
|
};
|
|
10
10
|
description: string;
|
|
11
11
|
};
|
|
12
|
-
|
|
12
|
+
excludeTags: {
|
|
13
13
|
type: string;
|
|
14
14
|
items: {
|
|
15
15
|
type: string;
|
|
16
16
|
};
|
|
17
17
|
description: string;
|
|
18
18
|
};
|
|
19
|
-
|
|
19
|
+
onlyMainContent: {
|
|
20
20
|
type: string;
|
|
21
21
|
description: string;
|
|
22
22
|
};
|
|
@@ -31,25 +31,25 @@ export declare const SCRAPE_SCHEMA: {
|
|
|
31
31
|
type: string;
|
|
32
32
|
description: string;
|
|
33
33
|
};
|
|
34
|
-
|
|
34
|
+
scrapeOptions: {
|
|
35
35
|
type: string;
|
|
36
36
|
description: string;
|
|
37
37
|
properties: {
|
|
38
|
-
|
|
38
|
+
includeTags: {
|
|
39
39
|
type: string;
|
|
40
40
|
items: {
|
|
41
41
|
type: string;
|
|
42
42
|
};
|
|
43
43
|
description: string;
|
|
44
44
|
};
|
|
45
|
-
|
|
45
|
+
excludeTags: {
|
|
46
46
|
type: string;
|
|
47
47
|
items: {
|
|
48
48
|
type: string;
|
|
49
49
|
};
|
|
50
50
|
description: string;
|
|
51
51
|
};
|
|
52
|
-
|
|
52
|
+
onlyMainContent: {
|
|
53
53
|
type: string;
|
|
54
54
|
description: string;
|
|
55
55
|
};
|
|
@@ -68,51 +68,51 @@ export declare const CRAWL_SCHEMA: {
|
|
|
68
68
|
type: string;
|
|
69
69
|
description: string;
|
|
70
70
|
};
|
|
71
|
-
|
|
71
|
+
maxPages: {
|
|
72
72
|
type: string;
|
|
73
73
|
description: string;
|
|
74
74
|
};
|
|
75
|
-
|
|
75
|
+
followLinks: {
|
|
76
76
|
type: string;
|
|
77
77
|
description: string;
|
|
78
78
|
};
|
|
79
|
-
|
|
79
|
+
ignoreSitemap: {
|
|
80
80
|
type: string;
|
|
81
81
|
description: string;
|
|
82
82
|
};
|
|
83
|
-
|
|
83
|
+
excludePatterns: {
|
|
84
84
|
type: string;
|
|
85
85
|
items: {
|
|
86
86
|
type: string;
|
|
87
87
|
};
|
|
88
88
|
description: string;
|
|
89
89
|
};
|
|
90
|
-
|
|
90
|
+
includePatterns: {
|
|
91
91
|
type: string;
|
|
92
92
|
items: {
|
|
93
93
|
type: string;
|
|
94
94
|
};
|
|
95
95
|
description: string;
|
|
96
96
|
};
|
|
97
|
-
|
|
97
|
+
scrapeOptions: {
|
|
98
98
|
type: string;
|
|
99
99
|
description: string;
|
|
100
100
|
properties: {
|
|
101
|
-
|
|
101
|
+
includeTags: {
|
|
102
102
|
type: string;
|
|
103
103
|
items: {
|
|
104
104
|
type: string;
|
|
105
105
|
};
|
|
106
106
|
description: string;
|
|
107
107
|
};
|
|
108
|
-
|
|
108
|
+
excludeTags: {
|
|
109
109
|
type: string;
|
|
110
110
|
items: {
|
|
111
111
|
type: string;
|
|
112
112
|
};
|
|
113
113
|
description: string;
|
|
114
114
|
};
|
|
115
|
-
|
|
115
|
+
onlyMainContent: {
|
|
116
116
|
type: string;
|
|
117
117
|
description: string;
|
|
118
118
|
};
|
|
@@ -124,3 +124,29 @@ export declare const CRAWL_SCHEMA: {
|
|
|
124
124
|
required: string[];
|
|
125
125
|
additionalProperties: boolean;
|
|
126
126
|
};
|
|
127
|
+
export declare const EXTRACT_SCHEMA: {
|
|
128
|
+
type: "object";
|
|
129
|
+
properties: {
|
|
130
|
+
urls: {
|
|
131
|
+
type: string;
|
|
132
|
+
items: {
|
|
133
|
+
type: string;
|
|
134
|
+
};
|
|
135
|
+
description: string;
|
|
136
|
+
};
|
|
137
|
+
prompt: {
|
|
138
|
+
type: string;
|
|
139
|
+
description: string;
|
|
140
|
+
};
|
|
141
|
+
schema: {
|
|
142
|
+
type: string;
|
|
143
|
+
description: string;
|
|
144
|
+
};
|
|
145
|
+
maxLinks: {
|
|
146
|
+
type: string;
|
|
147
|
+
description: string;
|
|
148
|
+
};
|
|
149
|
+
};
|
|
150
|
+
required: string[];
|
|
151
|
+
additionalProperties: boolean;
|
|
152
|
+
};
|
package/dist/tools/schema.js
CHANGED
|
@@ -1,30 +1,30 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CRAWL_SCHEMA = exports.SCRAPE_SCHEMA = exports.SCRAPE_OPTIONS = void 0;
|
|
3
|
+
exports.EXTRACT_SCHEMA = exports.CRAWL_SCHEMA = exports.SCRAPE_SCHEMA = exports.SCRAPE_OPTIONS = void 0;
|
|
4
4
|
exports.SCRAPE_OPTIONS = {
|
|
5
5
|
type: "object",
|
|
6
6
|
description: "The options for the scrape",
|
|
7
7
|
properties: {
|
|
8
|
-
|
|
8
|
+
includeTags: {
|
|
9
9
|
type: "array",
|
|
10
10
|
items: {
|
|
11
11
|
type: "string",
|
|
12
12
|
},
|
|
13
13
|
description: "An array of HTML tags, classes, or IDs to include in the scraped content. Only elements matching these selectors will be returned.",
|
|
14
14
|
},
|
|
15
|
-
|
|
15
|
+
excludeTags: {
|
|
16
16
|
type: "array",
|
|
17
17
|
items: {
|
|
18
18
|
type: "string",
|
|
19
19
|
},
|
|
20
20
|
description: "An array of HTML tags, classes, or IDs to exclude from the scraped content. Elements matching these selectors will be omitted from the response.",
|
|
21
21
|
},
|
|
22
|
-
|
|
22
|
+
onlyMainContent: {
|
|
23
23
|
type: "boolean",
|
|
24
24
|
description: "Whether to only return the main content of the page. If true, only the main content of the page will be returned, excluding any headers, navigation menus,footers, or other non-main content.",
|
|
25
25
|
},
|
|
26
26
|
},
|
|
27
|
-
required: ["
|
|
27
|
+
required: ["includeTags", "excludeTags", "onlyMainContent"],
|
|
28
28
|
additionalProperties: false,
|
|
29
29
|
};
|
|
30
30
|
exports.SCRAPE_SCHEMA = {
|
|
@@ -34,9 +34,9 @@ exports.SCRAPE_SCHEMA = {
|
|
|
34
34
|
type: "string",
|
|
35
35
|
description: "The URL of the website to scrape",
|
|
36
36
|
},
|
|
37
|
-
|
|
37
|
+
scrapeOptions: exports.SCRAPE_OPTIONS,
|
|
38
38
|
},
|
|
39
|
-
required: ["url", "
|
|
39
|
+
required: ["url", "scrapeOptions"],
|
|
40
40
|
additionalProperties: false,
|
|
41
41
|
};
|
|
42
42
|
exports.CRAWL_SCHEMA = {
|
|
@@ -46,42 +46,68 @@ exports.CRAWL_SCHEMA = {
|
|
|
46
46
|
type: "string",
|
|
47
47
|
description: "The URL of the website to crawl",
|
|
48
48
|
},
|
|
49
|
-
|
|
49
|
+
maxPages: {
|
|
50
50
|
type: "number",
|
|
51
51
|
description: "The maximum number of pages to crawl",
|
|
52
52
|
},
|
|
53
|
-
|
|
53
|
+
followLinks: {
|
|
54
54
|
type: "boolean",
|
|
55
55
|
description: "Whether to follow links on the page",
|
|
56
56
|
},
|
|
57
|
-
|
|
57
|
+
ignoreSitemap: {
|
|
58
58
|
type: "boolean",
|
|
59
59
|
description: "Whether to ignore the sitemap",
|
|
60
60
|
},
|
|
61
|
-
|
|
61
|
+
excludePatterns: {
|
|
62
62
|
type: "array",
|
|
63
63
|
items: {
|
|
64
64
|
type: "string",
|
|
65
65
|
},
|
|
66
66
|
description: "An array of regular expressions or wildcard patterns specifying which URLs should be excluded from the crawl. Any pages whose URLs' path match one of these patterns will be skipped. Example: ['/admin', '/careers/*']",
|
|
67
67
|
},
|
|
68
|
-
|
|
68
|
+
includePatterns: {
|
|
69
69
|
type: "array",
|
|
70
70
|
items: {
|
|
71
71
|
type: "string",
|
|
72
72
|
},
|
|
73
73
|
description: "An array of regular expressions or wildcard patterns specifying which URLs should be included in the crawl. Only pages whose URLs' path match one of these path patterns will be visited. Example: ['/admin', '/careers/*']",
|
|
74
74
|
},
|
|
75
|
-
|
|
75
|
+
scrapeOptions: exports.SCRAPE_OPTIONS,
|
|
76
76
|
},
|
|
77
77
|
required: [
|
|
78
78
|
"url",
|
|
79
|
-
"
|
|
80
|
-
"
|
|
81
|
-
"
|
|
82
|
-
"
|
|
83
|
-
"
|
|
84
|
-
"
|
|
79
|
+
"maxPages",
|
|
80
|
+
"followLinks",
|
|
81
|
+
"ignoreSitemap",
|
|
82
|
+
"excludePatterns",
|
|
83
|
+
"includePatterns",
|
|
84
|
+
"scrapeOptions",
|
|
85
85
|
],
|
|
86
86
|
additionalProperties: false,
|
|
87
87
|
};
|
|
88
|
+
exports.EXTRACT_SCHEMA = {
|
|
89
|
+
type: "object",
|
|
90
|
+
properties: {
|
|
91
|
+
urls: {
|
|
92
|
+
type: "array",
|
|
93
|
+
items: {
|
|
94
|
+
type: "string",
|
|
95
|
+
},
|
|
96
|
+
description: "A required list of up to 10 urls you want to process IN A SINGLE EXTRACTION. When answering questions that involve multiple sources or topics, ALWAYS include ALL relevant URLs in this single array rather than making separate function calls. This enables cross-referencing information across multiple sources to provide comprehensive answers. To allow crawling for any of the urls provided in the list, simply add /* to the end of the url (https://hyperbrowser.ai/*). This will crawl other pages on the site with the same origin and find relevant pages to use for the extraction context.",
|
|
97
|
+
},
|
|
98
|
+
prompt: {
|
|
99
|
+
type: "string",
|
|
100
|
+
description: "A prompt describing how you want the data structured, or what you want to extract from the urls provided. Can also be used to guide the extraction process. For multi-source queries, structure this prompt to request unified, comparative, or aggregated information across all provided URLs.",
|
|
101
|
+
},
|
|
102
|
+
schema: {
|
|
103
|
+
type: "string",
|
|
104
|
+
description: "A strict json schema you want the returned data to be structured as. For multi-source extraction, design this schema to accommodate information from all URLs in a single structure. Ensure that this is a proper json schema, and the root level should be of type 'object'.",
|
|
105
|
+
},
|
|
106
|
+
maxLinks: {
|
|
107
|
+
type: "number",
|
|
108
|
+
description: "The maximum number of links to look for if performing a crawl for any given url in the urls list.",
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
required: ["urls", "prompt", "schema", "maxLinks"],
|
|
112
|
+
additionalProperties: false,
|
|
113
|
+
};
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
export { HyperbrowserConfig } from "./config";
|
|
2
2
|
export { StartCrawlJobParams, StartCrawlJobResponse, CrawledPage, CrawlJobResponse, GetCrawlJobParams, } from "./crawl";
|
|
3
3
|
export { StartScrapeJobParams, StartScrapeJobResponse, ScrapeJobData, ScrapeJobResponse, ScrapeOptions, } from "./scrape";
|
|
4
|
+
export { StartExtractJobParams, StartExtractJobResponse, ExtractJobResponse } from "./extract";
|
|
5
|
+
export { StartBrowserUseTaskParams, StartBrowserUseTaskResponse, BrowserUseTaskStatusResponse, BrowserUseTaskResponse, BrowserUseTaskData, } from "./beta/agents/browser-use";
|
|
4
6
|
export { BasicResponse, SessionStatus, Session, SessionDetail, SessionListParams, SessionListResponse, ScreenConfig, CreateSessionParams, } from "./session";
|
|
5
7
|
export { ProfileResponse, CreateProfileResponse, ProfileListParams, ProfileListResponse, } from "./profile";
|
|
6
8
|
export { CreateExtensionParams, CreateExtensionResponse, ListExtensionsResponse, } from "./extension";
|
|
7
|
-
export { ScrapeJobStatus, CrawlJobStatus, Country, State, ISO639_1, OperatingSystem, Platform, ScrapeFormat, ScrapeWaitUntil, ScrapePageStatus, CrawlPageStatus, } from "./constants";
|
|
9
|
+
export { ExtractJobStatus, BrowserUseTaskStatus, BrowserUseLlm, ScrapeScreenshotFormat, ScrapeJobStatus, CrawlJobStatus, Country, State, ISO639_1, OperatingSystem, Platform, ScrapeFormat, ScrapeWaitUntil, ScrapePageStatus, CrawlPageStatus, } from "./constants";
|