@hyperbrowser/sdk 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/dist/tools/anthropic.js +0 -14
- package/dist/tools/index.d.ts +0 -12
- package/dist/tools/index.js +0 -31
- package/dist/tools/openai.js +0 -22
- package/dist/tools/schema.d.ts +0 -126
- package/dist/tools/schema.js +0 -87
package/package.json
CHANGED
package/dist/tools/anthropic.js
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CRAWL_TOOL_ANTHROPIC = exports.SCRAPE_TOOL_ANTHROPIC = void 0;
|
|
4
|
-
const schema_1 = require("./schema");
|
|
5
|
-
exports.SCRAPE_TOOL_ANTHROPIC = {
|
|
6
|
-
input_schema: schema_1.SCRAPE_SCHEMA,
|
|
7
|
-
name: "scrape_webpage",
|
|
8
|
-
description: "Scrape content from a webpage and return the content in markdown format",
|
|
9
|
-
};
|
|
10
|
-
exports.CRAWL_TOOL_ANTHROPIC = {
|
|
11
|
-
input_schema: schema_1.CRAWL_SCHEMA,
|
|
12
|
-
name: "crawl_website",
|
|
13
|
-
description: "Crawl a website and return the content in markdown format",
|
|
14
|
-
};
|
package/dist/tools/index.d.ts
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import { HyperbrowserClient } from "../client";
|
|
2
|
-
import { StartScrapeJobParams, StartCrawlJobParams } from "../types";
|
|
3
|
-
export declare class WebsiteScrapeTool {
|
|
4
|
-
static openaiToolDefinition: import("./openai").ChatCompletionTool;
|
|
5
|
-
static anthropicToolDefinition: import("./anthropic").Tool;
|
|
6
|
-
static runnable(hb: HyperbrowserClient, params: StartScrapeJobParams): Promise<string>;
|
|
7
|
-
}
|
|
8
|
-
export declare class WebsiteCrawlTool {
|
|
9
|
-
static openaiToolDefinition: import("./openai").ChatCompletionTool;
|
|
10
|
-
static anthropicToolDefinition: import("./anthropic").Tool;
|
|
11
|
-
static runnable(hb: HyperbrowserClient, params: StartCrawlJobParams): Promise<string>;
|
|
12
|
-
}
|
package/dist/tools/index.js
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.WebsiteCrawlTool = exports.WebsiteScrapeTool = void 0;
|
|
4
|
-
const openai_1 = require("./openai");
|
|
5
|
-
const anthropic_1 = require("./anthropic");
|
|
6
|
-
class WebsiteScrapeTool {
|
|
7
|
-
static async runnable(hb, params) {
|
|
8
|
-
const resp = await hb.scrape.startAndWait(params);
|
|
9
|
-
return resp.data?.markdown || "";
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
exports.WebsiteScrapeTool = WebsiteScrapeTool;
|
|
13
|
-
WebsiteScrapeTool.openaiToolDefinition = openai_1.SCRAPE_TOOL_OPENAI;
|
|
14
|
-
WebsiteScrapeTool.anthropicToolDefinition = anthropic_1.SCRAPE_TOOL_ANTHROPIC;
|
|
15
|
-
class WebsiteCrawlTool {
|
|
16
|
-
static async runnable(hb, params) {
|
|
17
|
-
const resp = await hb.crawl.startAndWait(params);
|
|
18
|
-
let markdown = "";
|
|
19
|
-
if (resp.data) {
|
|
20
|
-
for (const page of resp.data) {
|
|
21
|
-
if (page.markdown) {
|
|
22
|
-
markdown += `\n${"-".repeat(50)}\nUrl: ${page.url}\nMarkdown:\n${page.markdown}\n`;
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
return markdown;
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
exports.WebsiteCrawlTool = WebsiteCrawlTool;
|
|
30
|
-
WebsiteCrawlTool.openaiToolDefinition = openai_1.CRAWL_TOOL_OPENAI;
|
|
31
|
-
WebsiteCrawlTool.anthropicToolDefinition = anthropic_1.CRAWL_TOOL_ANTHROPIC;
|
package/dist/tools/openai.js
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CRAWL_TOOL_OPENAI = exports.SCRAPE_TOOL_OPENAI = void 0;
|
|
4
|
-
const schema_1 = require("./schema");
|
|
5
|
-
exports.SCRAPE_TOOL_OPENAI = {
|
|
6
|
-
type: "function",
|
|
7
|
-
function: {
|
|
8
|
-
name: "scrape_webpage",
|
|
9
|
-
description: "Scrape content from a webpage and return the content in markdown format",
|
|
10
|
-
parameters: schema_1.SCRAPE_SCHEMA,
|
|
11
|
-
strict: true,
|
|
12
|
-
},
|
|
13
|
-
};
|
|
14
|
-
exports.CRAWL_TOOL_OPENAI = {
|
|
15
|
-
type: "function",
|
|
16
|
-
function: {
|
|
17
|
-
name: "crawl_website",
|
|
18
|
-
description: "Crawl a website and return the content in markdown format",
|
|
19
|
-
parameters: schema_1.CRAWL_SCHEMA,
|
|
20
|
-
strict: true,
|
|
21
|
-
},
|
|
22
|
-
};
|
package/dist/tools/schema.d.ts
DELETED
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
export declare const SCRAPE_OPTIONS: {
|
|
2
|
-
type: string;
|
|
3
|
-
description: string;
|
|
4
|
-
properties: {
|
|
5
|
-
include_tags: {
|
|
6
|
-
type: string;
|
|
7
|
-
items: {
|
|
8
|
-
type: string;
|
|
9
|
-
};
|
|
10
|
-
description: string;
|
|
11
|
-
};
|
|
12
|
-
exclude_tags: {
|
|
13
|
-
type: string;
|
|
14
|
-
items: {
|
|
15
|
-
type: string;
|
|
16
|
-
};
|
|
17
|
-
description: string;
|
|
18
|
-
};
|
|
19
|
-
only_main_content: {
|
|
20
|
-
type: string;
|
|
21
|
-
description: string;
|
|
22
|
-
};
|
|
23
|
-
};
|
|
24
|
-
required: string[];
|
|
25
|
-
additionalProperties: boolean;
|
|
26
|
-
};
|
|
27
|
-
export declare const SCRAPE_SCHEMA: {
|
|
28
|
-
type: "object";
|
|
29
|
-
properties: {
|
|
30
|
-
url: {
|
|
31
|
-
type: string;
|
|
32
|
-
description: string;
|
|
33
|
-
};
|
|
34
|
-
scrape_options: {
|
|
35
|
-
type: string;
|
|
36
|
-
description: string;
|
|
37
|
-
properties: {
|
|
38
|
-
include_tags: {
|
|
39
|
-
type: string;
|
|
40
|
-
items: {
|
|
41
|
-
type: string;
|
|
42
|
-
};
|
|
43
|
-
description: string;
|
|
44
|
-
};
|
|
45
|
-
exclude_tags: {
|
|
46
|
-
type: string;
|
|
47
|
-
items: {
|
|
48
|
-
type: string;
|
|
49
|
-
};
|
|
50
|
-
description: string;
|
|
51
|
-
};
|
|
52
|
-
only_main_content: {
|
|
53
|
-
type: string;
|
|
54
|
-
description: string;
|
|
55
|
-
};
|
|
56
|
-
};
|
|
57
|
-
required: string[];
|
|
58
|
-
additionalProperties: boolean;
|
|
59
|
-
};
|
|
60
|
-
};
|
|
61
|
-
required: string[];
|
|
62
|
-
additionalProperties: boolean;
|
|
63
|
-
};
|
|
64
|
-
export declare const CRAWL_SCHEMA: {
|
|
65
|
-
type: "object";
|
|
66
|
-
properties: {
|
|
67
|
-
url: {
|
|
68
|
-
type: string;
|
|
69
|
-
description: string;
|
|
70
|
-
};
|
|
71
|
-
max_pages: {
|
|
72
|
-
type: string;
|
|
73
|
-
description: string;
|
|
74
|
-
};
|
|
75
|
-
follow_links: {
|
|
76
|
-
type: string;
|
|
77
|
-
description: string;
|
|
78
|
-
};
|
|
79
|
-
ignore_sitemap: {
|
|
80
|
-
type: string;
|
|
81
|
-
description: string;
|
|
82
|
-
};
|
|
83
|
-
exclude_patterns: {
|
|
84
|
-
type: string;
|
|
85
|
-
items: {
|
|
86
|
-
type: string;
|
|
87
|
-
};
|
|
88
|
-
description: string;
|
|
89
|
-
};
|
|
90
|
-
include_patterns: {
|
|
91
|
-
type: string;
|
|
92
|
-
items: {
|
|
93
|
-
type: string;
|
|
94
|
-
};
|
|
95
|
-
description: string;
|
|
96
|
-
};
|
|
97
|
-
scrape_options: {
|
|
98
|
-
type: string;
|
|
99
|
-
description: string;
|
|
100
|
-
properties: {
|
|
101
|
-
include_tags: {
|
|
102
|
-
type: string;
|
|
103
|
-
items: {
|
|
104
|
-
type: string;
|
|
105
|
-
};
|
|
106
|
-
description: string;
|
|
107
|
-
};
|
|
108
|
-
exclude_tags: {
|
|
109
|
-
type: string;
|
|
110
|
-
items: {
|
|
111
|
-
type: string;
|
|
112
|
-
};
|
|
113
|
-
description: string;
|
|
114
|
-
};
|
|
115
|
-
only_main_content: {
|
|
116
|
-
type: string;
|
|
117
|
-
description: string;
|
|
118
|
-
};
|
|
119
|
-
};
|
|
120
|
-
required: string[];
|
|
121
|
-
additionalProperties: boolean;
|
|
122
|
-
};
|
|
123
|
-
};
|
|
124
|
-
required: string[];
|
|
125
|
-
additionalProperties: boolean;
|
|
126
|
-
};
|
package/dist/tools/schema.js
DELETED
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CRAWL_SCHEMA = exports.SCRAPE_SCHEMA = exports.SCRAPE_OPTIONS = void 0;
|
|
4
|
-
exports.SCRAPE_OPTIONS = {
|
|
5
|
-
type: "object",
|
|
6
|
-
description: "The options for the scrape",
|
|
7
|
-
properties: {
|
|
8
|
-
include_tags: {
|
|
9
|
-
type: "array",
|
|
10
|
-
items: {
|
|
11
|
-
type: "string",
|
|
12
|
-
},
|
|
13
|
-
description: "An array of HTML tags, classes, or IDs to include in the scraped content. Only elements matching these selectors will be returned.",
|
|
14
|
-
},
|
|
15
|
-
exclude_tags: {
|
|
16
|
-
type: "array",
|
|
17
|
-
items: {
|
|
18
|
-
type: "string",
|
|
19
|
-
},
|
|
20
|
-
description: "An array of HTML tags, classes, or IDs to exclude from the scraped content. Elements matching these selectors will be omitted from the response.",
|
|
21
|
-
},
|
|
22
|
-
only_main_content: {
|
|
23
|
-
type: "boolean",
|
|
24
|
-
description: "Whether to only return the main content of the page. If true, only the main content of the page will be returned, excluding any headers, navigation menus,footers, or other non-main content.",
|
|
25
|
-
},
|
|
26
|
-
},
|
|
27
|
-
required: ["include_tags", "exclude_tags", "only_main_content"],
|
|
28
|
-
additionalProperties: false,
|
|
29
|
-
};
|
|
30
|
-
exports.SCRAPE_SCHEMA = {
|
|
31
|
-
type: "object",
|
|
32
|
-
properties: {
|
|
33
|
-
url: {
|
|
34
|
-
type: "string",
|
|
35
|
-
description: "The URL of the website to scrape",
|
|
36
|
-
},
|
|
37
|
-
scrape_options: exports.SCRAPE_OPTIONS,
|
|
38
|
-
},
|
|
39
|
-
required: ["url", "scrape_options"],
|
|
40
|
-
additionalProperties: false,
|
|
41
|
-
};
|
|
42
|
-
exports.CRAWL_SCHEMA = {
|
|
43
|
-
type: "object",
|
|
44
|
-
properties: {
|
|
45
|
-
url: {
|
|
46
|
-
type: "string",
|
|
47
|
-
description: "The URL of the website to crawl",
|
|
48
|
-
},
|
|
49
|
-
max_pages: {
|
|
50
|
-
type: "number",
|
|
51
|
-
description: "The maximum number of pages to crawl",
|
|
52
|
-
},
|
|
53
|
-
follow_links: {
|
|
54
|
-
type: "boolean",
|
|
55
|
-
description: "Whether to follow links on the page",
|
|
56
|
-
},
|
|
57
|
-
ignore_sitemap: {
|
|
58
|
-
type: "boolean",
|
|
59
|
-
description: "Whether to ignore the sitemap",
|
|
60
|
-
},
|
|
61
|
-
exclude_patterns: {
|
|
62
|
-
type: "array",
|
|
63
|
-
items: {
|
|
64
|
-
type: "string",
|
|
65
|
-
},
|
|
66
|
-
description: "An array of regular expressions or wildcard patterns specifying which URLs should be excluded from the crawl. Any pages whose URLs' path match one of these patterns will be skipped. Example: ['/admin', '/careers/*']",
|
|
67
|
-
},
|
|
68
|
-
include_patterns: {
|
|
69
|
-
type: "array",
|
|
70
|
-
items: {
|
|
71
|
-
type: "string",
|
|
72
|
-
},
|
|
73
|
-
description: "An array of regular expressions or wildcard patterns specifying which URLs should be included in the crawl. Only pages whose URLs' path match one of these path patterns will be visited. Example: ['/admin', '/careers/*']",
|
|
74
|
-
},
|
|
75
|
-
scrape_options: exports.SCRAPE_OPTIONS,
|
|
76
|
-
},
|
|
77
|
-
required: [
|
|
78
|
-
"url",
|
|
79
|
-
"max_pages",
|
|
80
|
-
"follow_links",
|
|
81
|
-
"ignore_sitemap",
|
|
82
|
-
"exclude_patterns",
|
|
83
|
-
"include_patterns",
|
|
84
|
-
"scrape_options",
|
|
85
|
-
],
|
|
86
|
-
additionalProperties: false,
|
|
87
|
-
};
|