@hyperbrowser/sdk 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/services/crawl.js +50 -13
- package/dist/services/extract.js +14 -3
- package/dist/services/scrape.d.ts +22 -1
- package/dist/services/scrape.js +131 -4
- package/dist/types/constants.d.ts +2 -0
- package/dist/types/constants.js +2 -0
- package/dist/types/extract.d.ts +2 -0
- package/dist/types/index.d.ts +2 -2
- package/dist/types/scrape.d.ts +33 -1
- package/package.json +1 -1
package/dist/services/crawl.js
CHANGED
|
@@ -4,6 +4,7 @@ exports.CrawlService = void 0;
|
|
|
4
4
|
const base_1 = require("./base");
|
|
5
5
|
const utils_1 = require("../utils");
|
|
6
6
|
const client_1 = require("../client");
|
|
7
|
+
const constants_1 = require("../types/constants");
|
|
7
8
|
class CrawlService extends base_1.BaseService {
|
|
8
9
|
/**
|
|
9
10
|
* Start a new crawl job
|
|
@@ -53,27 +54,63 @@ class CrawlService extends base_1.BaseService {
|
|
|
53
54
|
throw new client_1.HyperbrowserError("Failed to start crawl job, could not get job ID");
|
|
54
55
|
}
|
|
55
56
|
let jobResponse;
|
|
57
|
+
let failures = 0;
|
|
56
58
|
while (true) {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
59
|
+
try {
|
|
60
|
+
jobResponse = await this.get(jobId, { batchSize: 1 });
|
|
61
|
+
if (jobResponse.status === "completed" || jobResponse.status === "failed") {
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
failures = 0;
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
failures++;
|
|
68
|
+
if (failures >= constants_1.POLLING_ATTEMPTS) {
|
|
69
|
+
throw new client_1.HyperbrowserError(`Failed to poll crawl job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
|
|
70
|
+
}
|
|
60
71
|
}
|
|
61
72
|
await (0, utils_1.sleep)(2000);
|
|
62
73
|
}
|
|
74
|
+
failures = 0;
|
|
63
75
|
if (!returnAllPages) {
|
|
64
|
-
|
|
76
|
+
while (true) {
|
|
77
|
+
try {
|
|
78
|
+
jobResponse = await this.get(jobId);
|
|
79
|
+
return jobResponse;
|
|
80
|
+
}
|
|
81
|
+
catch (error) {
|
|
82
|
+
failures++;
|
|
83
|
+
if (failures >= constants_1.POLLING_ATTEMPTS) {
|
|
84
|
+
throw new client_1.HyperbrowserError(`Failed to get crawl job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
await (0, utils_1.sleep)(500);
|
|
88
|
+
}
|
|
65
89
|
}
|
|
90
|
+
jobResponse.currentPageBatch = 0;
|
|
91
|
+
jobResponse.data = [];
|
|
92
|
+
failures = 0;
|
|
66
93
|
while (jobResponse.currentPageBatch < jobResponse.totalPageBatches) {
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
94
|
+
try {
|
|
95
|
+
const tmpJobResponse = await this.get(jobId, {
|
|
96
|
+
page: jobResponse.currentPageBatch + 1,
|
|
97
|
+
batchSize: 100,
|
|
98
|
+
});
|
|
99
|
+
if (tmpJobResponse.data) {
|
|
100
|
+
jobResponse.data?.push(...tmpJobResponse.data);
|
|
101
|
+
}
|
|
102
|
+
jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch;
|
|
103
|
+
jobResponse.totalCrawledPages = tmpJobResponse.totalCrawledPages;
|
|
104
|
+
jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches;
|
|
105
|
+
jobResponse.batchSize = tmpJobResponse.batchSize;
|
|
106
|
+
failures = 0;
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
failures++;
|
|
110
|
+
if (failures >= constants_1.POLLING_ATTEMPTS) {
|
|
111
|
+
throw new client_1.HyperbrowserError(`Failed to get crawl job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
|
|
112
|
+
}
|
|
72
113
|
}
|
|
73
|
-
jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch;
|
|
74
|
-
jobResponse.totalCrawledPages = tmpJobResponse.totalCrawledPages;
|
|
75
|
-
jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches;
|
|
76
|
-
jobResponse.batchSize = tmpJobResponse.batchSize;
|
|
77
114
|
await (0, utils_1.sleep)(500);
|
|
78
115
|
}
|
|
79
116
|
return jobResponse;
|
package/dist/services/extract.js
CHANGED
|
@@ -5,6 +5,7 @@ const zod_to_json_schema_1 = require("zod-to-json-schema");
|
|
|
5
5
|
const base_1 = require("./base");
|
|
6
6
|
const utils_1 = require("../utils");
|
|
7
7
|
const client_1 = require("../client");
|
|
8
|
+
const constants_1 = require("../types/constants");
|
|
8
9
|
const isZodSchema = (schema) => {
|
|
9
10
|
return (schema &&
|
|
10
11
|
typeof schema === "object" &&
|
|
@@ -65,10 +66,20 @@ class ExtractService extends base_1.BaseService {
|
|
|
65
66
|
throw new client_1.HyperbrowserError("Failed to start extract job, could not get job ID");
|
|
66
67
|
}
|
|
67
68
|
let jobResponse;
|
|
69
|
+
let failures = 0;
|
|
68
70
|
while (true) {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
71
|
+
try {
|
|
72
|
+
jobResponse = await this.get(jobId);
|
|
73
|
+
if (jobResponse.status === "completed" || jobResponse.status === "failed") {
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
failures = 0;
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
failures++;
|
|
80
|
+
if (failures >= constants_1.POLLING_ATTEMPTS) {
|
|
81
|
+
throw new client_1.HyperbrowserError(`Failed to poll extract job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
|
|
82
|
+
}
|
|
72
83
|
}
|
|
73
84
|
await (0, utils_1.sleep)(2000);
|
|
74
85
|
}
|
|
@@ -1,6 +1,27 @@
|
|
|
1
|
-
import { ScrapeJobResponse, StartScrapeJobParams, StartScrapeJobResponse } from "../types/scrape";
|
|
1
|
+
import { BatchScrapeJobResponse, GetBatchScrapeJobParams, ScrapeJobResponse, StartBatchScrapeJobParams, StartBatchScrapeJobResponse, StartScrapeJobParams, StartScrapeJobResponse } from "../types/scrape";
|
|
2
2
|
import { BaseService } from "./base";
|
|
3
|
+
export declare class BatchScrapeService extends BaseService {
|
|
4
|
+
/**
|
|
5
|
+
* Start a new batch scrape job
|
|
6
|
+
* @param params The parameters for the batch scrape job
|
|
7
|
+
*/
|
|
8
|
+
start(params: StartBatchScrapeJobParams): Promise<StartBatchScrapeJobResponse>;
|
|
9
|
+
/**
|
|
10
|
+
* Get the status of a batch scrape job
|
|
11
|
+
* @param id The ID of the batch scrape job to get
|
|
12
|
+
* @param params Optional parameters to filter the batch scrape job
|
|
13
|
+
*/
|
|
14
|
+
get(id: string, params?: GetBatchScrapeJobParams): Promise<BatchScrapeJobResponse>;
|
|
15
|
+
/**
|
|
16
|
+
* Start a batch scrape job and wait for it to complete
|
|
17
|
+
* @param params The parameters for the batch scrape job
|
|
18
|
+
* @param returnAllPages Whether to return all pages in the batch scrape job response
|
|
19
|
+
*/
|
|
20
|
+
startAndWait(params: StartBatchScrapeJobParams, returnAllPages?: boolean): Promise<BatchScrapeJobResponse>;
|
|
21
|
+
}
|
|
3
22
|
export declare class ScrapeService extends BaseService {
|
|
23
|
+
readonly batch: BatchScrapeService;
|
|
24
|
+
constructor(apiKey: string, baseUrl: string, timeout: number);
|
|
4
25
|
/**
|
|
5
26
|
* Start a new scrape job
|
|
6
27
|
* @param params The parameters for the scrape job
|
package/dist/services/scrape.js
CHANGED
|
@@ -1,10 +1,127 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.ScrapeService = void 0;
|
|
3
|
+
exports.ScrapeService = exports.BatchScrapeService = void 0;
|
|
4
4
|
const base_1 = require("./base");
|
|
5
5
|
const utils_1 = require("../utils");
|
|
6
6
|
const client_1 = require("../client");
|
|
7
|
+
const constants_1 = require("../types/constants");
|
|
8
|
+
class BatchScrapeService extends base_1.BaseService {
|
|
9
|
+
/**
|
|
10
|
+
* Start a new batch scrape job
|
|
11
|
+
* @param params The parameters for the batch scrape job
|
|
12
|
+
*/
|
|
13
|
+
async start(params) {
|
|
14
|
+
try {
|
|
15
|
+
return await this.request("/scrape/batch", {
|
|
16
|
+
method: "POST",
|
|
17
|
+
body: JSON.stringify(params),
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
catch (error) {
|
|
21
|
+
if (error instanceof client_1.HyperbrowserError) {
|
|
22
|
+
throw error;
|
|
23
|
+
}
|
|
24
|
+
throw new client_1.HyperbrowserError("Failed to start batch scrape job", undefined);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Get the status of a batch scrape job
|
|
29
|
+
* @param id The ID of the batch scrape job to get
|
|
30
|
+
* @param params Optional parameters to filter the batch scrape job
|
|
31
|
+
*/
|
|
32
|
+
async get(id, params) {
|
|
33
|
+
try {
|
|
34
|
+
return await this.request(`/scrape/batch/${id}`, undefined, {
|
|
35
|
+
page: params?.page,
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
catch (error) {
|
|
39
|
+
if (error instanceof client_1.HyperbrowserError) {
|
|
40
|
+
throw error;
|
|
41
|
+
}
|
|
42
|
+
throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${id}`, undefined);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Start a batch scrape job and wait for it to complete
|
|
47
|
+
* @param params The parameters for the batch scrape job
|
|
48
|
+
* @param returnAllPages Whether to return all pages in the batch scrape job response
|
|
49
|
+
*/
|
|
50
|
+
async startAndWait(params, returnAllPages = true) {
|
|
51
|
+
const job = await this.start(params);
|
|
52
|
+
const jobId = job.jobId;
|
|
53
|
+
if (!jobId) {
|
|
54
|
+
throw new client_1.HyperbrowserError("Failed to start batch scrape job, could not get job ID");
|
|
55
|
+
}
|
|
56
|
+
let jobResponse;
|
|
57
|
+
let failures = 0;
|
|
58
|
+
while (true) {
|
|
59
|
+
try {
|
|
60
|
+
jobResponse = await this.get(jobId, { batchSize: 1 });
|
|
61
|
+
if (jobResponse.status === "completed" || jobResponse.status === "failed") {
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
failures = 0;
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
failures++;
|
|
68
|
+
if (failures >= constants_1.POLLING_ATTEMPTS) {
|
|
69
|
+
throw new client_1.HyperbrowserError(`Failed to poll batch scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
await (0, utils_1.sleep)(2000);
|
|
73
|
+
}
|
|
74
|
+
failures = 0;
|
|
75
|
+
if (!returnAllPages) {
|
|
76
|
+
while (true) {
|
|
77
|
+
try {
|
|
78
|
+
jobResponse = await this.get(jobId);
|
|
79
|
+
return jobResponse;
|
|
80
|
+
}
|
|
81
|
+
catch (error) {
|
|
82
|
+
failures++;
|
|
83
|
+
if (failures >= constants_1.POLLING_ATTEMPTS) {
|
|
84
|
+
throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
await (0, utils_1.sleep)(500);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
jobResponse.currentPageBatch = 0;
|
|
91
|
+
jobResponse.data = [];
|
|
92
|
+
failures = 0;
|
|
93
|
+
while (jobResponse.currentPageBatch < jobResponse.totalPageBatches) {
|
|
94
|
+
try {
|
|
95
|
+
const tmpJobResponse = await this.get(jobId, {
|
|
96
|
+
page: jobResponse.currentPageBatch + 1,
|
|
97
|
+
batchSize: 100,
|
|
98
|
+
});
|
|
99
|
+
if (tmpJobResponse.data) {
|
|
100
|
+
jobResponse.data?.push(...tmpJobResponse.data);
|
|
101
|
+
}
|
|
102
|
+
jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch;
|
|
103
|
+
jobResponse.totalScrapedPages = tmpJobResponse.totalScrapedPages;
|
|
104
|
+
jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches;
|
|
105
|
+
jobResponse.batchSize = tmpJobResponse.batchSize;
|
|
106
|
+
failures = 0;
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
failures++;
|
|
110
|
+
if (failures >= constants_1.POLLING_ATTEMPTS) {
|
|
111
|
+
throw new client_1.HyperbrowserError(`Failed to get batch page ${jobResponse.currentPageBatch + 1} for job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
await (0, utils_1.sleep)(500);
|
|
115
|
+
}
|
|
116
|
+
return jobResponse;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
exports.BatchScrapeService = BatchScrapeService;
|
|
7
120
|
class ScrapeService extends base_1.BaseService {
|
|
121
|
+
constructor(apiKey, baseUrl, timeout) {
|
|
122
|
+
super(apiKey, baseUrl, timeout);
|
|
123
|
+
this.batch = new BatchScrapeService(apiKey, baseUrl, timeout);
|
|
124
|
+
}
|
|
8
125
|
/**
|
|
9
126
|
* Start a new scrape job
|
|
10
127
|
* @param params The parameters for the scrape job
|
|
@@ -49,10 +166,20 @@ class ScrapeService extends base_1.BaseService {
|
|
|
49
166
|
throw new client_1.HyperbrowserError("Failed to start scrape job, could not get job ID");
|
|
50
167
|
}
|
|
51
168
|
let jobResponse;
|
|
169
|
+
let failures = 0;
|
|
52
170
|
while (true) {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
171
|
+
try {
|
|
172
|
+
jobResponse = await this.get(jobId);
|
|
173
|
+
if (jobResponse.status === "completed" || jobResponse.status === "failed") {
|
|
174
|
+
break;
|
|
175
|
+
}
|
|
176
|
+
failures = 0;
|
|
177
|
+
}
|
|
178
|
+
catch (error) {
|
|
179
|
+
failures++;
|
|
180
|
+
if (failures >= constants_1.POLLING_ATTEMPTS) {
|
|
181
|
+
throw new client_1.HyperbrowserError(`Failed to poll scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
|
|
182
|
+
}
|
|
56
183
|
}
|
|
57
184
|
await (0, utils_1.sleep)(2000);
|
|
58
185
|
}
|
|
@@ -2,8 +2,10 @@ export type ScrapeFormat = "markdown" | "html" | "links" | "screenshot";
|
|
|
2
2
|
export type ScrapeJobStatus = "pending" | "running" | "completed" | "failed";
|
|
3
3
|
export type ExtractJobStatus = "pending" | "running" | "completed" | "failed";
|
|
4
4
|
export type CrawlJobStatus = "pending" | "running" | "completed" | "failed";
|
|
5
|
+
export type ScrapePageStatus = "completed" | "failed" | "pending" | "running";
|
|
5
6
|
export type CrawlPageStatus = "completed" | "failed";
|
|
6
7
|
export type ScrapeWaitUntil = "load" | "domcontentloaded" | "networkidle";
|
|
8
|
+
export declare const POLLING_ATTEMPTS = 5;
|
|
7
9
|
export type Country = "AD" | "AE" | "AF" | "AL" | "AM" | "AO" | "AR" | "AT" | "AU" | "AW" | "AZ" | "BA" | "BD" | "BE" | "BG" | "BH" | "BJ" | "BO" | "BR" | "BS" | "BT" | "BY" | "BZ" | "CA" | "CF" | "CH" | "CI" | "CL" | "CM" | "CN" | "CO" | "CR" | "CU" | "CY" | "CZ" | "DE" | "DJ" | "DK" | "DM" | "EC" | "EE" | "EG" | "ES" | "ET" | "EU" | "FI" | "FJ" | "FR" | "GB" | "GE" | "GH" | "GM" | "GR" | "HK" | "HN" | "HR" | "HT" | "HU" | "ID" | "IE" | "IL" | "IN" | "IQ" | "IR" | "IS" | "IT" | "JM" | "JO" | "JP" | "KE" | "KH" | "KR" | "KW" | "KZ" | "LB" | "LI" | "LR" | "LT" | "LU" | "LV" | "MA" | "MC" | "MD" | "ME" | "MG" | "MK" | "ML" | "MM" | "MN" | "MR" | "MT" | "MU" | "MV" | "MX" | "MY" | "MZ" | "NG" | "NL" | "NO" | "NZ" | "OM" | "PA" | "PE" | "PH" | "PK" | "PL" | "PR" | "PT" | "PY" | "QA" | "RANDOM_COUNTRY" | "RO" | "RS" | "RU" | "SA" | "SC" | "SD" | "SE" | "SG" | "SI" | "SK" | "SN" | "SS" | "TD" | "TG" | "TH" | "TM" | "TN" | "TR" | "TT" | "TW" | "UA" | "UG" | "US" | "UY" | "UZ" | "VE" | "VG" | "VN" | "YE" | "ZA" | "ZM" | "ZW" | "ad" | "ae" | "af" | "al" | "am" | "ao" | "ar" | "at" | "au" | "aw" | "az" | "ba" | "bd" | "be" | "bg" | "bh" | "bj" | "bo" | "br" | "bs" | "bt" | "by" | "bz" | "ca" | "cf" | "ch" | "ci" | "cl" | "cm" | "cn" | "co" | "cr" | "cu" | "cy" | "cz" | "de" | "dj" | "dk" | "dm" | "ec" | "ee" | "eg" | "es" | "et" | "eu" | "fi" | "fj" | "fr" | "gb" | "ge" | "gh" | "gm" | "gr" | "hk" | "hn" | "hr" | "ht" | "hu" | "id" | "ie" | "il" | "in" | "iq" | "ir" | "is" | "it" | "jm" | "jo" | "jp" | "ke" | "kh" | "kr" | "kw" | "kz" | "lb" | "li" | "lr" | "lt" | "lu" | "lv" | "ma" | "mc" | "md" | "me" | "mg" | "mk" | "ml" | "mm" | "mn" | "mr" | "mt" | "mu" | "mv" | "mx" | "my" | "mz" | "ng" | "nl" | "no" | "nz" | "om" | "pa" | "pe" | "ph" | "pk" | "pl" | "pr" | "pt" | "py" | "qa" | "ro" | "rs" | "ru" | "sa" | "sc" | "sd" | "se" | "sg" | "si" | "sk" | "sn" | "ss" | "td" | "tg" | "th" | "tm" | "tn" | "tr" | "tt" | "tw" | "ua" | "ug" | "us" | "uy" | "uz" | "ve" | "vg" | "vn" | "ye" | "za" | "zm" | "zw";
|
|
8
10
|
export type OperatingSystem = "windows" | "android" | "macos" | "linux" | "ios";
|
|
9
11
|
export type Platform = "chrome" | "firefox" | "safari" | "edge";
|
package/dist/types/constants.js
CHANGED
package/dist/types/extract.d.ts
CHANGED
|
@@ -3,9 +3,11 @@ import { ExtractJobStatus } from "./constants";
|
|
|
3
3
|
import { CreateSessionParams } from "./session";
|
|
4
4
|
export interface StartExtractJobParams {
|
|
5
5
|
urls: string[];
|
|
6
|
+
systemPrompt?: string;
|
|
6
7
|
prompt?: string;
|
|
7
8
|
schema?: z.ZodSchema | object;
|
|
8
9
|
sessionOptions?: CreateSessionParams;
|
|
10
|
+
maxLinks?: number;
|
|
9
11
|
}
|
|
10
12
|
export interface StartExtractJobResponse {
|
|
11
13
|
jobId: string;
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export { HyperbrowserConfig } from "./config";
|
|
2
2
|
export { StartCrawlJobParams, StartCrawlJobResponse, CrawledPage, CrawlJobResponse, GetCrawlJobParams, } from "./crawl";
|
|
3
|
-
export { StartScrapeJobParams, StartScrapeJobResponse, ScrapeJobData, ScrapeJobResponse, } from "./scrape";
|
|
3
|
+
export { StartScrapeJobParams, StartScrapeJobResponse, ScrapeJobData, ScrapeJobResponse, ScrapeOptions, } from "./scrape";
|
|
4
4
|
export { BasicResponse, SessionStatus, Session, SessionDetail, SessionListParams, SessionListResponse, ScreenConfig, CreateSessionParams, } from "./session";
|
|
5
5
|
export { ProfileResponse, CreateProfileResponse, ProfileListParams, ProfileListResponse, } from "./profile";
|
|
6
6
|
export { CreateExtensionParams, CreateExtensionResponse, ListExtensionsResponse, } from "./extension";
|
|
7
|
-
export { ScrapeJobStatus, CrawlJobStatus, Country, ISO639_1, OperatingSystem, Platform, ScrapeFormat, ScrapeWaitUntil, } from "./constants";
|
|
7
|
+
export { ScrapeJobStatus, CrawlJobStatus, Country, ISO639_1, OperatingSystem, Platform, ScrapeFormat, ScrapeWaitUntil, ScrapePageStatus, CrawlPageStatus, } from "./constants";
|
package/dist/types/scrape.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ScrapeFormat, ScrapeJobStatus, ScrapeWaitUntil } from "./constants";
|
|
1
|
+
import { ScrapeFormat, ScrapeJobStatus, ScrapePageStatus, ScrapeWaitUntil } from "./constants";
|
|
2
2
|
import { CreateSessionParams } from "./session";
|
|
3
3
|
export interface ScrapeOptions {
|
|
4
4
|
formats?: ScrapeFormat[];
|
|
@@ -30,3 +30,35 @@ export interface ScrapeJobResponse {
|
|
|
30
30
|
data?: ScrapeJobData;
|
|
31
31
|
error?: string;
|
|
32
32
|
}
|
|
33
|
+
export interface StartBatchScrapeJobParams {
|
|
34
|
+
urls: string[];
|
|
35
|
+
sessionOptions?: CreateSessionParams;
|
|
36
|
+
scrapeOptions?: ScrapeOptions;
|
|
37
|
+
}
|
|
38
|
+
export interface ScrapedPage {
|
|
39
|
+
url: string;
|
|
40
|
+
status: ScrapePageStatus;
|
|
41
|
+
error?: string | null;
|
|
42
|
+
metadata?: Record<string, string | string[]>;
|
|
43
|
+
markdown?: string;
|
|
44
|
+
html?: string;
|
|
45
|
+
links?: string[];
|
|
46
|
+
screenshot?: string;
|
|
47
|
+
}
|
|
48
|
+
export interface GetBatchScrapeJobParams {
|
|
49
|
+
page?: number;
|
|
50
|
+
batchSize?: number;
|
|
51
|
+
}
|
|
52
|
+
export interface StartBatchScrapeJobResponse {
|
|
53
|
+
jobId: string;
|
|
54
|
+
}
|
|
55
|
+
export interface BatchScrapeJobResponse {
|
|
56
|
+
jobId: string;
|
|
57
|
+
status: ScrapeJobStatus;
|
|
58
|
+
data?: ScrapedPage[];
|
|
59
|
+
error?: string;
|
|
60
|
+
totalScrapedPages: number;
|
|
61
|
+
totalPageBatches: number;
|
|
62
|
+
currentPageBatch: number;
|
|
63
|
+
batchSize: number;
|
|
64
|
+
}
|