@hyperbrowser/sdk 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.d.ts +20 -0
- package/dist/client.js +34 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +16 -0
- package/dist/services/base.d.ts +8 -0
- package/dist/services/base.js +68 -0
- package/dist/services/crawl.d.ts +21 -0
- package/dist/services/crawl.js +82 -0
- package/dist/services/extensions.d.ts +13 -0
- package/dist/services/extensions.js +74 -0
- package/dist/services/extract.d.ts +20 -0
- package/dist/services/extract.js +78 -0
- package/dist/services/profiles.d.ts +19 -0
- package/dist/services/profiles.js +56 -0
- package/dist/services/scrape.d.ts +19 -0
- package/dist/services/scrape.js +62 -0
- package/dist/services/sessions.d.ts +29 -0
- package/dist/services/sessions.js +91 -0
- package/dist/tools/anthropic.d.ts +35 -0
- package/dist/tools/anthropic.js +14 -0
- package/dist/tools/index.d.ts +12 -0
- package/dist/tools/index.js +31 -0
- package/dist/tools/openai.d.ts +40 -0
- package/dist/tools/openai.js +22 -0
- package/dist/tools/schema.d.ts +126 -0
- package/dist/tools/schema.js +87 -0
- package/dist/types/config.d.ts +5 -0
- package/dist/types/config.js +2 -0
- package/dist/types/constants.d.ts +9 -0
- package/dist/types/constants.js +2 -0
- package/dist/types/crawl.d.ts +40 -0
- package/dist/types/crawl.js +2 -0
- package/dist/types/extension.d.ts +13 -0
- package/dist/types/extension.js +2 -0
- package/dist/types/extract.d.ts +18 -0
- package/dist/types/extract.js +2 -0
- package/dist/types/index.d.ts +7 -0
- package/dist/types/index.js +2 -0
- package/dist/types/profile.d.ts +9 -0
- package/dist/types/profile.js +2 -0
- package/dist/types/scrape.d.ts +31 -0
- package/dist/types/scrape.js +2 -0
- package/dist/types/session.d.ts +64 -0
- package/dist/types/session.js +2 -0
- package/dist/utils.d.ts +1 -0
- package/dist/utils.js +5 -0
- package/package.json +10 -4
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.SessionsService = void 0;
|
|
4
|
+
const base_1 = require("./base");
|
|
5
|
+
const client_1 = require("../client");
|
|
6
|
+
class SessionsService extends base_1.BaseService {
|
|
7
|
+
/**
|
|
8
|
+
* Create a new browser session
|
|
9
|
+
* @param params Configuration parameters for the new session
|
|
10
|
+
*/
|
|
11
|
+
async create(params) {
|
|
12
|
+
try {
|
|
13
|
+
return await this.request("/session", {
|
|
14
|
+
method: "POST",
|
|
15
|
+
body: params ? JSON.stringify(params) : undefined,
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
catch (error) {
|
|
19
|
+
if (error instanceof client_1.HyperbrowserError) {
|
|
20
|
+
throw error;
|
|
21
|
+
}
|
|
22
|
+
throw new client_1.HyperbrowserError("Failed to create session", undefined);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Get details of an existing session
|
|
27
|
+
* @param id The ID of the session to get
|
|
28
|
+
*/
|
|
29
|
+
async get(id) {
|
|
30
|
+
try {
|
|
31
|
+
return await this.request(`/session/${id}`);
|
|
32
|
+
}
|
|
33
|
+
catch (error) {
|
|
34
|
+
if (error instanceof client_1.HyperbrowserError) {
|
|
35
|
+
throw error;
|
|
36
|
+
}
|
|
37
|
+
throw new client_1.HyperbrowserError(`Failed to get session ${id}`, undefined);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Stop a running session
|
|
42
|
+
* @param id The ID of the session to stop
|
|
43
|
+
*/
|
|
44
|
+
async stop(id) {
|
|
45
|
+
try {
|
|
46
|
+
return await this.request(`/session/${id}/stop`, {
|
|
47
|
+
method: "PUT",
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
catch (error) {
|
|
51
|
+
if (error instanceof client_1.HyperbrowserError) {
|
|
52
|
+
throw error;
|
|
53
|
+
}
|
|
54
|
+
throw new client_1.HyperbrowserError(`Failed to stop session ${id}`, undefined);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* List all sessions with optional filtering
|
|
59
|
+
* @param params Optional parameters to filter the sessions
|
|
60
|
+
*/
|
|
61
|
+
async list(params = {}) {
|
|
62
|
+
try {
|
|
63
|
+
return await this.request("/sessions", undefined, {
|
|
64
|
+
status: params.status,
|
|
65
|
+
page: params.page,
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
catch (error) {
|
|
69
|
+
if (error instanceof client_1.HyperbrowserError) {
|
|
70
|
+
throw error;
|
|
71
|
+
}
|
|
72
|
+
throw new client_1.HyperbrowserError("Failed to list sessions", undefined);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Get the recording of a session
|
|
77
|
+
* @param id The ID of the session to get the recording from
|
|
78
|
+
*/
|
|
79
|
+
async getRecording(id) {
|
|
80
|
+
try {
|
|
81
|
+
return await this.request(`/session/${id}/recording`);
|
|
82
|
+
}
|
|
83
|
+
catch (error) {
|
|
84
|
+
if (error instanceof client_1.HyperbrowserError) {
|
|
85
|
+
throw error;
|
|
86
|
+
}
|
|
87
|
+
throw new client_1.HyperbrowserError(`Failed to get recording for session ${id}`, undefined);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
exports.SessionsService = SessionsService;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
export interface CacheControlEphemeral {
|
|
2
|
+
type: "ephemeral";
|
|
3
|
+
}
|
|
4
|
+
export interface InputSchema {
|
|
5
|
+
type: "object";
|
|
6
|
+
properties?: unknown | null;
|
|
7
|
+
[k: string]: unknown;
|
|
8
|
+
}
|
|
9
|
+
export interface Tool {
|
|
10
|
+
/**
|
|
11
|
+
* [JSON schema](https://json-schema.org/) for this tool's input.
|
|
12
|
+
*
|
|
13
|
+
* This defines the shape of the `input` that your tool accepts and that the model
|
|
14
|
+
* will produce.
|
|
15
|
+
*/
|
|
16
|
+
input_schema: InputSchema;
|
|
17
|
+
/**
|
|
18
|
+
* Name of the tool.
|
|
19
|
+
*
|
|
20
|
+
* This is how the tool will be called by the model and in tool_use blocks.
|
|
21
|
+
*/
|
|
22
|
+
name: string;
|
|
23
|
+
cache_control?: CacheControlEphemeral | null;
|
|
24
|
+
/**
|
|
25
|
+
* Description of what this tool does.
|
|
26
|
+
*
|
|
27
|
+
* Tool descriptions should be as detailed as possible. The more information that
|
|
28
|
+
* the model has about what the tool is and how to use it, the better it will
|
|
29
|
+
* perform. You can use natural language descriptions to reinforce important
|
|
30
|
+
* aspects of the tool input JSON schema.
|
|
31
|
+
*/
|
|
32
|
+
description?: string;
|
|
33
|
+
}
|
|
34
|
+
export declare const SCRAPE_TOOL_ANTHROPIC: Tool;
|
|
35
|
+
export declare const CRAWL_TOOL_ANTHROPIC: Tool;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CRAWL_TOOL_ANTHROPIC = exports.SCRAPE_TOOL_ANTHROPIC = void 0;
|
|
4
|
+
const schema_1 = require("./schema");
|
|
5
|
+
exports.SCRAPE_TOOL_ANTHROPIC = {
|
|
6
|
+
input_schema: schema_1.SCRAPE_SCHEMA,
|
|
7
|
+
name: "scrape_webpage",
|
|
8
|
+
description: "Scrape content from a webpage and return the content in markdown format",
|
|
9
|
+
};
|
|
10
|
+
exports.CRAWL_TOOL_ANTHROPIC = {
|
|
11
|
+
input_schema: schema_1.CRAWL_SCHEMA,
|
|
12
|
+
name: "crawl_website",
|
|
13
|
+
description: "Crawl a website and return the content in markdown format",
|
|
14
|
+
};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { HyperbrowserClient } from "../client";
|
|
2
|
+
import { StartScrapeJobParams, StartCrawlJobParams } from "../types";
|
|
3
|
+
export declare class WebsiteScrapeTool {
|
|
4
|
+
static openaiToolDefinition: import("./openai").ChatCompletionTool;
|
|
5
|
+
static anthropicToolDefinition: import("./anthropic").Tool;
|
|
6
|
+
static runnable(hb: HyperbrowserClient, params: StartScrapeJobParams): Promise<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare class WebsiteCrawlTool {
|
|
9
|
+
static openaiToolDefinition: import("./openai").ChatCompletionTool;
|
|
10
|
+
static anthropicToolDefinition: import("./anthropic").Tool;
|
|
11
|
+
static runnable(hb: HyperbrowserClient, params: StartCrawlJobParams): Promise<string>;
|
|
12
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.WebsiteCrawlTool = exports.WebsiteScrapeTool = void 0;
|
|
4
|
+
const openai_1 = require("./openai");
|
|
5
|
+
const anthropic_1 = require("./anthropic");
|
|
6
|
+
class WebsiteScrapeTool {
|
|
7
|
+
static async runnable(hb, params) {
|
|
8
|
+
const resp = await hb.scrape.startAndWait(params);
|
|
9
|
+
return resp.data?.markdown || "";
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
exports.WebsiteScrapeTool = WebsiteScrapeTool;
|
|
13
|
+
WebsiteScrapeTool.openaiToolDefinition = openai_1.SCRAPE_TOOL_OPENAI;
|
|
14
|
+
WebsiteScrapeTool.anthropicToolDefinition = anthropic_1.SCRAPE_TOOL_ANTHROPIC;
|
|
15
|
+
class WebsiteCrawlTool {
|
|
16
|
+
static async runnable(hb, params) {
|
|
17
|
+
const resp = await hb.crawl.startAndWait(params);
|
|
18
|
+
let markdown = "";
|
|
19
|
+
if (resp.data) {
|
|
20
|
+
for (const page of resp.data) {
|
|
21
|
+
if (page.markdown) {
|
|
22
|
+
markdown += `\n${"-".repeat(50)}\nUrl: ${page.url}\nMarkdown:\n${page.markdown}\n`;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return markdown;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
exports.WebsiteCrawlTool = WebsiteCrawlTool;
|
|
30
|
+
WebsiteCrawlTool.openaiToolDefinition = openai_1.CRAWL_TOOL_OPENAI;
|
|
31
|
+
WebsiteCrawlTool.anthropicToolDefinition = anthropic_1.CRAWL_TOOL_ANTHROPIC;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
export type FunctionParameters = Record<string, unknown>;
|
|
2
|
+
export interface FunctionDefinition {
|
|
3
|
+
/**
|
|
4
|
+
* The name of the function to be called. Must be a-z, A-Z, 0-9, or contain
|
|
5
|
+
* underscores and dashes, with a maximum length of 64.
|
|
6
|
+
*/
|
|
7
|
+
name: string;
|
|
8
|
+
/**
|
|
9
|
+
* A description of what the function does, used by the model to choose when and
|
|
10
|
+
* how to call the function.
|
|
11
|
+
*/
|
|
12
|
+
description?: string;
|
|
13
|
+
/**
|
|
14
|
+
* The parameters the functions accepts, described as a JSON Schema object. See the
|
|
15
|
+
* [guide](https://platform.openai.com/docs/guides/function-calling) for examples,
|
|
16
|
+
* and the
|
|
17
|
+
* [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
|
|
18
|
+
* documentation about the format.
|
|
19
|
+
*
|
|
20
|
+
* Omitting `parameters` defines a function with an empty parameter list.
|
|
21
|
+
*/
|
|
22
|
+
parameters?: FunctionParameters;
|
|
23
|
+
/**
|
|
24
|
+
* Whether to enable strict schema adherence when generating the function call. If
|
|
25
|
+
* set to true, the model will follow the exact schema defined in the `parameters`
|
|
26
|
+
* field. Only a subset of JSON Schema is supported when `strict` is `true`. Learn
|
|
27
|
+
* more about Structured Outputs in the
|
|
28
|
+
* [function calling guide](docs/guides/function-calling).
|
|
29
|
+
*/
|
|
30
|
+
strict?: boolean | null;
|
|
31
|
+
}
|
|
32
|
+
export interface ChatCompletionTool {
|
|
33
|
+
function: FunctionDefinition;
|
|
34
|
+
/**
|
|
35
|
+
* The type of the tool. Currently, only `function` is supported.
|
|
36
|
+
*/
|
|
37
|
+
type: "function";
|
|
38
|
+
}
|
|
39
|
+
export declare const SCRAPE_TOOL_OPENAI: ChatCompletionTool;
|
|
40
|
+
export declare const CRAWL_TOOL_OPENAI: ChatCompletionTool;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CRAWL_TOOL_OPENAI = exports.SCRAPE_TOOL_OPENAI = void 0;
|
|
4
|
+
const schema_1 = require("./schema");
|
|
5
|
+
exports.SCRAPE_TOOL_OPENAI = {
|
|
6
|
+
type: "function",
|
|
7
|
+
function: {
|
|
8
|
+
name: "scrape_webpage",
|
|
9
|
+
description: "Scrape content from a webpage and return the content in markdown format",
|
|
10
|
+
parameters: schema_1.SCRAPE_SCHEMA,
|
|
11
|
+
strict: true,
|
|
12
|
+
},
|
|
13
|
+
};
|
|
14
|
+
exports.CRAWL_TOOL_OPENAI = {
|
|
15
|
+
type: "function",
|
|
16
|
+
function: {
|
|
17
|
+
name: "crawl_website",
|
|
18
|
+
description: "Crawl a website and return the content in markdown format",
|
|
19
|
+
parameters: schema_1.CRAWL_SCHEMA,
|
|
20
|
+
strict: true,
|
|
21
|
+
},
|
|
22
|
+
};
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
export declare const SCRAPE_OPTIONS: {
|
|
2
|
+
type: string;
|
|
3
|
+
description: string;
|
|
4
|
+
properties: {
|
|
5
|
+
include_tags: {
|
|
6
|
+
type: string;
|
|
7
|
+
items: {
|
|
8
|
+
type: string;
|
|
9
|
+
};
|
|
10
|
+
description: string;
|
|
11
|
+
};
|
|
12
|
+
exclude_tags: {
|
|
13
|
+
type: string;
|
|
14
|
+
items: {
|
|
15
|
+
type: string;
|
|
16
|
+
};
|
|
17
|
+
description: string;
|
|
18
|
+
};
|
|
19
|
+
only_main_content: {
|
|
20
|
+
type: string;
|
|
21
|
+
description: string;
|
|
22
|
+
};
|
|
23
|
+
};
|
|
24
|
+
required: string[];
|
|
25
|
+
additionalProperties: boolean;
|
|
26
|
+
};
|
|
27
|
+
export declare const SCRAPE_SCHEMA: {
|
|
28
|
+
type: "object";
|
|
29
|
+
properties: {
|
|
30
|
+
url: {
|
|
31
|
+
type: string;
|
|
32
|
+
description: string;
|
|
33
|
+
};
|
|
34
|
+
scrape_options: {
|
|
35
|
+
type: string;
|
|
36
|
+
description: string;
|
|
37
|
+
properties: {
|
|
38
|
+
include_tags: {
|
|
39
|
+
type: string;
|
|
40
|
+
items: {
|
|
41
|
+
type: string;
|
|
42
|
+
};
|
|
43
|
+
description: string;
|
|
44
|
+
};
|
|
45
|
+
exclude_tags: {
|
|
46
|
+
type: string;
|
|
47
|
+
items: {
|
|
48
|
+
type: string;
|
|
49
|
+
};
|
|
50
|
+
description: string;
|
|
51
|
+
};
|
|
52
|
+
only_main_content: {
|
|
53
|
+
type: string;
|
|
54
|
+
description: string;
|
|
55
|
+
};
|
|
56
|
+
};
|
|
57
|
+
required: string[];
|
|
58
|
+
additionalProperties: boolean;
|
|
59
|
+
};
|
|
60
|
+
};
|
|
61
|
+
required: string[];
|
|
62
|
+
additionalProperties: boolean;
|
|
63
|
+
};
|
|
64
|
+
export declare const CRAWL_SCHEMA: {
|
|
65
|
+
type: "object";
|
|
66
|
+
properties: {
|
|
67
|
+
url: {
|
|
68
|
+
type: string;
|
|
69
|
+
description: string;
|
|
70
|
+
};
|
|
71
|
+
max_pages: {
|
|
72
|
+
type: string;
|
|
73
|
+
description: string;
|
|
74
|
+
};
|
|
75
|
+
follow_links: {
|
|
76
|
+
type: string;
|
|
77
|
+
description: string;
|
|
78
|
+
};
|
|
79
|
+
ignore_sitemap: {
|
|
80
|
+
type: string;
|
|
81
|
+
description: string;
|
|
82
|
+
};
|
|
83
|
+
exclude_patterns: {
|
|
84
|
+
type: string;
|
|
85
|
+
items: {
|
|
86
|
+
type: string;
|
|
87
|
+
};
|
|
88
|
+
description: string;
|
|
89
|
+
};
|
|
90
|
+
include_patterns: {
|
|
91
|
+
type: string;
|
|
92
|
+
items: {
|
|
93
|
+
type: string;
|
|
94
|
+
};
|
|
95
|
+
description: string;
|
|
96
|
+
};
|
|
97
|
+
scrape_options: {
|
|
98
|
+
type: string;
|
|
99
|
+
description: string;
|
|
100
|
+
properties: {
|
|
101
|
+
include_tags: {
|
|
102
|
+
type: string;
|
|
103
|
+
items: {
|
|
104
|
+
type: string;
|
|
105
|
+
};
|
|
106
|
+
description: string;
|
|
107
|
+
};
|
|
108
|
+
exclude_tags: {
|
|
109
|
+
type: string;
|
|
110
|
+
items: {
|
|
111
|
+
type: string;
|
|
112
|
+
};
|
|
113
|
+
description: string;
|
|
114
|
+
};
|
|
115
|
+
only_main_content: {
|
|
116
|
+
type: string;
|
|
117
|
+
description: string;
|
|
118
|
+
};
|
|
119
|
+
};
|
|
120
|
+
required: string[];
|
|
121
|
+
additionalProperties: boolean;
|
|
122
|
+
};
|
|
123
|
+
};
|
|
124
|
+
required: string[];
|
|
125
|
+
additionalProperties: boolean;
|
|
126
|
+
};
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CRAWL_SCHEMA = exports.SCRAPE_SCHEMA = exports.SCRAPE_OPTIONS = void 0;
|
|
4
|
+
exports.SCRAPE_OPTIONS = {
|
|
5
|
+
type: "object",
|
|
6
|
+
description: "The options for the scrape",
|
|
7
|
+
properties: {
|
|
8
|
+
include_tags: {
|
|
9
|
+
type: "array",
|
|
10
|
+
items: {
|
|
11
|
+
type: "string",
|
|
12
|
+
},
|
|
13
|
+
description: "An array of HTML tags, classes, or IDs to include in the scraped content. Only elements matching these selectors will be returned.",
|
|
14
|
+
},
|
|
15
|
+
exclude_tags: {
|
|
16
|
+
type: "array",
|
|
17
|
+
items: {
|
|
18
|
+
type: "string",
|
|
19
|
+
},
|
|
20
|
+
description: "An array of HTML tags, classes, or IDs to exclude from the scraped content. Elements matching these selectors will be omitted from the response.",
|
|
21
|
+
},
|
|
22
|
+
only_main_content: {
|
|
23
|
+
type: "boolean",
|
|
24
|
+
description: "Whether to only return the main content of the page. If true, only the main content of the page will be returned, excluding any headers, navigation menus,footers, or other non-main content.",
|
|
25
|
+
},
|
|
26
|
+
},
|
|
27
|
+
required: ["include_tags", "exclude_tags", "only_main_content"],
|
|
28
|
+
additionalProperties: false,
|
|
29
|
+
};
|
|
30
|
+
exports.SCRAPE_SCHEMA = {
|
|
31
|
+
type: "object",
|
|
32
|
+
properties: {
|
|
33
|
+
url: {
|
|
34
|
+
type: "string",
|
|
35
|
+
description: "The URL of the website to scrape",
|
|
36
|
+
},
|
|
37
|
+
scrape_options: exports.SCRAPE_OPTIONS,
|
|
38
|
+
},
|
|
39
|
+
required: ["url", "scrape_options"],
|
|
40
|
+
additionalProperties: false,
|
|
41
|
+
};
|
|
42
|
+
exports.CRAWL_SCHEMA = {
|
|
43
|
+
type: "object",
|
|
44
|
+
properties: {
|
|
45
|
+
url: {
|
|
46
|
+
type: "string",
|
|
47
|
+
description: "The URL of the website to crawl",
|
|
48
|
+
},
|
|
49
|
+
max_pages: {
|
|
50
|
+
type: "number",
|
|
51
|
+
description: "The maximum number of pages to crawl",
|
|
52
|
+
},
|
|
53
|
+
follow_links: {
|
|
54
|
+
type: "boolean",
|
|
55
|
+
description: "Whether to follow links on the page",
|
|
56
|
+
},
|
|
57
|
+
ignore_sitemap: {
|
|
58
|
+
type: "boolean",
|
|
59
|
+
description: "Whether to ignore the sitemap",
|
|
60
|
+
},
|
|
61
|
+
exclude_patterns: {
|
|
62
|
+
type: "array",
|
|
63
|
+
items: {
|
|
64
|
+
type: "string",
|
|
65
|
+
},
|
|
66
|
+
description: "An array of regular expressions or wildcard patterns specifying which URLs should be excluded from the crawl. Any pages whose URLs' path match one of these patterns will be skipped. Example: ['/admin', '/careers/*']",
|
|
67
|
+
},
|
|
68
|
+
include_patterns: {
|
|
69
|
+
type: "array",
|
|
70
|
+
items: {
|
|
71
|
+
type: "string",
|
|
72
|
+
},
|
|
73
|
+
description: "An array of regular expressions or wildcard patterns specifying which URLs should be included in the crawl. Only pages whose URLs' path match one of these path patterns will be visited. Example: ['/admin', '/careers/*']",
|
|
74
|
+
},
|
|
75
|
+
scrape_options: exports.SCRAPE_OPTIONS,
|
|
76
|
+
},
|
|
77
|
+
required: [
|
|
78
|
+
"url",
|
|
79
|
+
"max_pages",
|
|
80
|
+
"follow_links",
|
|
81
|
+
"ignore_sitemap",
|
|
82
|
+
"exclude_patterns",
|
|
83
|
+
"include_patterns",
|
|
84
|
+
"scrape_options",
|
|
85
|
+
],
|
|
86
|
+
additionalProperties: false,
|
|
87
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export type ScrapeFormat = "markdown" | "html" | "links" | "screenshot";
|
|
2
|
+
export type ScrapeJobStatus = "pending" | "running" | "completed" | "failed";
|
|
3
|
+
export type ExtractJobStatus = "pending" | "running" | "completed" | "failed";
|
|
4
|
+
export type CrawlJobStatus = "pending" | "running" | "completed" | "failed";
|
|
5
|
+
export type CrawlPageStatus = "completed" | "failed";
|
|
6
|
+
export type Country = "AD" | "AE" | "AF" | "AL" | "AM" | "AO" | "AR" | "AT" | "AU" | "AW" | "AZ" | "BA" | "BD" | "BE" | "BG" | "BH" | "BJ" | "BO" | "BR" | "BS" | "BT" | "BY" | "BZ" | "CA" | "CF" | "CH" | "CI" | "CL" | "CM" | "CN" | "CO" | "CR" | "CU" | "CY" | "CZ" | "DE" | "DJ" | "DK" | "DM" | "EC" | "EE" | "EG" | "ES" | "ET" | "EU" | "FI" | "FJ" | "FR" | "GB" | "GE" | "GH" | "GM" | "GR" | "HK" | "HN" | "HR" | "HT" | "HU" | "ID" | "IE" | "IL" | "IN" | "IQ" | "IR" | "IS" | "IT" | "JM" | "JO" | "JP" | "KE" | "KH" | "KR" | "KW" | "KZ" | "LB" | "LI" | "LR" | "LT" | "LU" | "LV" | "MA" | "MC" | "MD" | "ME" | "MG" | "MK" | "ML" | "MM" | "MN" | "MR" | "MT" | "MU" | "MV" | "MX" | "MY" | "MZ" | "NG" | "NL" | "NO" | "NZ" | "OM" | "PA" | "PE" | "PH" | "PK" | "PL" | "PR" | "PT" | "PY" | "QA" | "RANDOM_COUNTRY" | "RO" | "RS" | "RU" | "SA" | "SC" | "SD" | "SE" | "SG" | "SI" | "SK" | "SN" | "SS" | "TD" | "TG" | "TH" | "TM" | "TN" | "TR" | "TT" | "TW" | "UA" | "UG" | "US" | "UY" | "UZ" | "VE" | "VG" | "VN" | "YE" | "ZA" | "ZM" | "ZW" | "ad" | "ae" | "af" | "al" | "am" | "ao" | "ar" | "at" | "au" | "aw" | "az" | "ba" | "bd" | "be" | "bg" | "bh" | "bj" | "bo" | "br" | "bs" | "bt" | "by" | "bz" | "ca" | "cf" | "ch" | "ci" | "cl" | "cm" | "cn" | "co" | "cr" | "cu" | "cy" | "cz" | "de" | "dj" | "dk" | "dm" | "ec" | "ee" | "eg" | "es" | "et" | "eu" | "fi" | "fj" | "fr" | "gb" | "ge" | "gh" | "gm" | "gr" | "hk" | "hn" | "hr" | "ht" | "hu" | "id" | "ie" | "il" | "in" | "iq" | "ir" | "is" | "it" | "jm" | "jo" | "jp" | "ke" | "kh" | "kr" | "kw" | "kz" | "lb" | "li" | "lr" | "lt" | "lu" | "lv" | "ma" | "mc" | "md" | "me" | "mg" | "mk" | "ml" | "mm" | "mn" | "mr" | "mt" | "mu" | "mv" | "mx" | "my" | "mz" | "ng" | "nl" | "no" | "nz" | "om" | "pa" | "pe" | "ph" | "pk" | "pl" | "pr" | "pt" | "py" | "qa" | "ro" | "rs" | "ru" | "sa" | "sc" | "sd" | "se" | "sg" | "si" | "sk" | "sn" | "ss" | "td" | "tg" | "th" | "tm" | "tn" | "tr" | "tt" | "tw" | "ua" | "ug" | "us" | "uy" | "uz" | "ve" | "vg" | "vn" | "ye" | "za" | "zm" | "zw";
|
|
7
|
+
export type OperatingSystem = "windows" | "android" | "macos" | "linux" | "ios";
|
|
8
|
+
export type Platform = "chrome" | "firefox" | "safari" | "edge";
|
|
9
|
+
export type ISO639_1 = "aa" | "ab" | "ae" | "af" | "ak" | "am" | "an" | "ar" | "as" | "av" | "ay" | "az" | "ba" | "be" | "bg" | "bh" | "bi" | "bm" | "bn" | "bo" | "br" | "bs" | "ca" | "ce" | "ch" | "co" | "cr" | "cs" | "cu" | "cv" | "cy" | "da" | "de" | "dv" | "dz" | "ee" | "el" | "en" | "eo" | "es" | "et" | "eu" | "fa" | "ff" | "fi" | "fj" | "fo" | "fr" | "fy" | "ga" | "gd" | "gl" | "gn" | "gu" | "gv" | "ha" | "he" | "hi" | "ho" | "hr" | "ht" | "hu" | "hy" | "hz" | "ia" | "id" | "ie" | "ig" | "ii" | "ik" | "io" | "is" | "it" | "iu" | "ja" | "jv" | "ka" | "kg" | "ki" | "kj" | "kk" | "kl" | "km" | "kn" | "ko" | "kr" | "ks" | "ku" | "kv" | "kw" | "ky" | "la" | "lb" | "lg" | "li" | "ln" | "lo" | "lt" | "lu" | "lv" | "mg" | "mh" | "mi" | "mk" | "ml" | "mn" | "mo" | "mr" | "ms" | "mt" | "my" | "na" | "nb" | "nd" | "ne" | "ng" | "nl" | "nn" | "no" | "nr" | "nv" | "ny" | "oc" | "oj" | "om" | "or" | "os" | "pa" | "pi" | "pl" | "ps" | "pt" | "qu" | "rm" | "rn" | "ro" | "ru" | "rw" | "sa" | "sc" | "sd" | "se" | "sg" | "si" | "sk" | "sl" | "sm" | "sn" | "so" | "sq" | "sr" | "ss" | "st" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "ti" | "tk" | "tl" | "tn" | "to" | "tr" | "ts" | "tt" | "tw" | "ty" | "ug" | "uk" | "ur" | "uz" | "ve" | "vi" | "vo" | "wa" | "wo" | "xh" | "yi" | "yo" | "za" | "zh" | "zu";
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { CrawlJobStatus, CrawlPageStatus } from "./constants";
|
|
2
|
+
import { ScrapeOptions } from "./scrape";
|
|
3
|
+
import { CreateSessionParams } from "./session";
|
|
4
|
+
export interface StartCrawlJobParams {
|
|
5
|
+
url: string;
|
|
6
|
+
maxPages?: number;
|
|
7
|
+
followLinks?: boolean;
|
|
8
|
+
ignoreSitemap?: boolean;
|
|
9
|
+
excludePatterns?: string[];
|
|
10
|
+
includePatterns?: string[];
|
|
11
|
+
sessionOptions?: CreateSessionParams;
|
|
12
|
+
scrapeOptions?: ScrapeOptions;
|
|
13
|
+
}
|
|
14
|
+
export interface StartCrawlJobResponse {
|
|
15
|
+
jobId: string;
|
|
16
|
+
}
|
|
17
|
+
export interface GetCrawlJobParams {
|
|
18
|
+
page?: number;
|
|
19
|
+
batchSize?: number;
|
|
20
|
+
}
|
|
21
|
+
export interface CrawledPage {
|
|
22
|
+
url: string;
|
|
23
|
+
status: CrawlPageStatus;
|
|
24
|
+
error?: string | null;
|
|
25
|
+
metadata?: Record<string, string | string[]>;
|
|
26
|
+
markdown?: string;
|
|
27
|
+
html?: string;
|
|
28
|
+
links?: string[];
|
|
29
|
+
screenshot?: string;
|
|
30
|
+
}
|
|
31
|
+
export interface CrawlJobResponse {
|
|
32
|
+
jobId: string;
|
|
33
|
+
status: CrawlJobStatus;
|
|
34
|
+
data?: CrawledPage[];
|
|
35
|
+
error?: string;
|
|
36
|
+
totalCrawledPages: number;
|
|
37
|
+
totalPageBatches: number;
|
|
38
|
+
currentPageBatch: number;
|
|
39
|
+
batchSize: number;
|
|
40
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export interface CreateExtensionParams {
|
|
2
|
+
filePath: string;
|
|
3
|
+
name?: string;
|
|
4
|
+
}
|
|
5
|
+
interface ExtensionResponse {
|
|
6
|
+
name: string;
|
|
7
|
+
id: string;
|
|
8
|
+
createdAt: string;
|
|
9
|
+
updatedAt: string;
|
|
10
|
+
}
|
|
11
|
+
export type CreateExtensionResponse = ExtensionResponse;
|
|
12
|
+
export type ListExtensionsResponse = Array<ExtensionResponse>;
|
|
13
|
+
export {};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { ExtractJobStatus } from "./constants";
|
|
3
|
+
import { CreateSessionParams } from "./session";
|
|
4
|
+
export interface StartExtractJobParams {
|
|
5
|
+
urls: string[];
|
|
6
|
+
prompt?: string;
|
|
7
|
+
schema?: z.ZodSchema | object;
|
|
8
|
+
sessionOptions?: CreateSessionParams;
|
|
9
|
+
}
|
|
10
|
+
export interface StartExtractJobResponse {
|
|
11
|
+
jobId: string;
|
|
12
|
+
}
|
|
13
|
+
export interface ExtractJobResponse {
|
|
14
|
+
jobId: string;
|
|
15
|
+
status: ExtractJobStatus;
|
|
16
|
+
data?: object;
|
|
17
|
+
error?: string;
|
|
18
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export { HyperbrowserConfig } from "./config";
|
|
2
|
+
export { StartCrawlJobParams, StartCrawlJobResponse, CrawledPage, CrawlJobResponse, GetCrawlJobParams, } from "./crawl";
|
|
3
|
+
export { StartScrapeJobParams, StartScrapeJobResponse, ScrapeJobData, ScrapeJobResponse, } from "./scrape";
|
|
4
|
+
export { BasicResponse, SessionStatus, Session, SessionDetail, SessionListParams, SessionListResponse, ScreenConfig, CreateSessionParams, } from "./session";
|
|
5
|
+
export { ProfileResponse, CreateProfileResponse } from "./profile";
|
|
6
|
+
export { CreateExtensionParams, CreateExtensionResponse, ListExtensionsResponse, } from "./extension";
|
|
7
|
+
export { ScrapeJobStatus, CrawlJobStatus, Country, ISO639_1, OperatingSystem, Platform, } from "./constants";
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { ScrapeFormat, ScrapeJobStatus } from "./constants";
|
|
2
|
+
import { CreateSessionParams } from "./session";
|
|
3
|
+
export interface ScrapeOptions {
|
|
4
|
+
formats?: ScrapeFormat[];
|
|
5
|
+
includeTags?: string[];
|
|
6
|
+
excludeTags?: string[];
|
|
7
|
+
onlyMainContent?: boolean;
|
|
8
|
+
waitFor?: number;
|
|
9
|
+
timeout?: number;
|
|
10
|
+
}
|
|
11
|
+
export interface StartScrapeJobParams {
|
|
12
|
+
url: string;
|
|
13
|
+
sessionOptions?: CreateSessionParams;
|
|
14
|
+
scrapeOptions?: ScrapeOptions;
|
|
15
|
+
}
|
|
16
|
+
export interface StartScrapeJobResponse {
|
|
17
|
+
jobId: string;
|
|
18
|
+
}
|
|
19
|
+
export interface ScrapeJobData {
|
|
20
|
+
metadata?: Record<string, string | string[]>;
|
|
21
|
+
markdown?: string;
|
|
22
|
+
html?: string;
|
|
23
|
+
links?: string[];
|
|
24
|
+
screenshot?: string;
|
|
25
|
+
}
|
|
26
|
+
export interface ScrapeJobResponse {
|
|
27
|
+
jobId: string;
|
|
28
|
+
status: ScrapeJobStatus;
|
|
29
|
+
data?: ScrapeJobData;
|
|
30
|
+
error?: string;
|
|
31
|
+
}
|