@hyperbrowser/sdk 0.25.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ exports.CrawlService = void 0;
4
4
  const base_1 = require("./base");
5
5
  const utils_1 = require("../utils");
6
6
  const client_1 = require("../client");
7
+ const constants_1 = require("../types/constants");
7
8
  class CrawlService extends base_1.BaseService {
8
9
  /**
9
10
  * Start a new crawl job
@@ -53,27 +54,63 @@ class CrawlService extends base_1.BaseService {
53
54
  throw new client_1.HyperbrowserError("Failed to start crawl job, could not get job ID");
54
55
  }
55
56
  let jobResponse;
57
+ let failures = 0;
56
58
  while (true) {
57
- jobResponse = await this.get(jobId);
58
- if (jobResponse.status === "completed" || jobResponse.status === "failed") {
59
- break;
59
+ try {
60
+ jobResponse = await this.get(jobId, { batchSize: 1 });
61
+ if (jobResponse.status === "completed" || jobResponse.status === "failed") {
62
+ break;
63
+ }
64
+ failures = 0;
65
+ }
66
+ catch (error) {
67
+ failures++;
68
+ if (failures >= constants_1.POLLING_ATTEMPTS) {
69
+ throw new client_1.HyperbrowserError(`Failed to poll crawl job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
70
+ }
60
71
  }
61
72
  await (0, utils_1.sleep)(2000);
62
73
  }
74
+ failures = 0;
63
75
  if (!returnAllPages) {
64
- return jobResponse;
76
+ while (true) {
77
+ try {
78
+ jobResponse = await this.get(jobId);
79
+ return jobResponse;
80
+ }
81
+ catch (error) {
82
+ failures++;
83
+ if (failures >= constants_1.POLLING_ATTEMPTS) {
84
+ throw new client_1.HyperbrowserError(`Failed to get crawl job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
85
+ }
86
+ }
87
+ await (0, utils_1.sleep)(500);
88
+ }
65
89
  }
90
+ jobResponse.currentPageBatch = 0;
91
+ jobResponse.data = [];
92
+ failures = 0;
66
93
  while (jobResponse.currentPageBatch < jobResponse.totalPageBatches) {
67
- const tmpJobResponse = await this.get(jobId, {
68
- page: jobResponse.currentPageBatch + 1,
69
- });
70
- if (tmpJobResponse.data) {
71
- jobResponse.data?.push(...tmpJobResponse.data);
94
+ try {
95
+ const tmpJobResponse = await this.get(jobId, {
96
+ page: jobResponse.currentPageBatch + 1,
97
+ batchSize: 100,
98
+ });
99
+ if (tmpJobResponse.data) {
100
+ jobResponse.data?.push(...tmpJobResponse.data);
101
+ }
102
+ jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch;
103
+ jobResponse.totalCrawledPages = tmpJobResponse.totalCrawledPages;
104
+ jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches;
105
+ jobResponse.batchSize = tmpJobResponse.batchSize;
106
+ failures = 0;
107
+ }
108
+ catch (error) {
109
+ failures++;
110
+ if (failures >= constants_1.POLLING_ATTEMPTS) {
111
+ throw new client_1.HyperbrowserError(`Failed to get crawl job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
112
+ }
72
113
  }
73
- jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch;
74
- jobResponse.totalCrawledPages = tmpJobResponse.totalCrawledPages;
75
- jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches;
76
- jobResponse.batchSize = tmpJobResponse.batchSize;
77
114
  await (0, utils_1.sleep)(500);
78
115
  }
79
116
  return jobResponse;
@@ -5,6 +5,7 @@ const zod_to_json_schema_1 = require("zod-to-json-schema");
5
5
  const base_1 = require("./base");
6
6
  const utils_1 = require("../utils");
7
7
  const client_1 = require("../client");
8
+ const constants_1 = require("../types/constants");
8
9
  const isZodSchema = (schema) => {
9
10
  return (schema &&
10
11
  typeof schema === "object" &&
@@ -65,10 +66,20 @@ class ExtractService extends base_1.BaseService {
65
66
  throw new client_1.HyperbrowserError("Failed to start extract job, could not get job ID");
66
67
  }
67
68
  let jobResponse;
69
+ let failures = 0;
68
70
  while (true) {
69
- jobResponse = await this.get(jobId);
70
- if (jobResponse.status === "completed" || jobResponse.status === "failed") {
71
- break;
71
+ try {
72
+ jobResponse = await this.get(jobId);
73
+ if (jobResponse.status === "completed" || jobResponse.status === "failed") {
74
+ break;
75
+ }
76
+ failures = 0;
77
+ }
78
+ catch (error) {
79
+ failures++;
80
+ if (failures >= constants_1.POLLING_ATTEMPTS) {
81
+ throw new client_1.HyperbrowserError(`Failed to poll extract job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
82
+ }
72
83
  }
73
84
  await (0, utils_1.sleep)(2000);
74
85
  }
@@ -4,6 +4,7 @@ exports.ScrapeService = exports.BatchScrapeService = void 0;
4
4
  const base_1 = require("./base");
5
5
  const utils_1 = require("../utils");
6
6
  const client_1 = require("../client");
7
+ const constants_1 = require("../types/constants");
7
8
  class BatchScrapeService extends base_1.BaseService {
8
9
  /**
9
10
  * Start a new batch scrape job
@@ -56,7 +57,7 @@ class BatchScrapeService extends base_1.BaseService {
56
57
  let failures = 0;
57
58
  while (true) {
58
59
  try {
59
- jobResponse = await this.get(jobId);
60
+ jobResponse = await this.get(jobId, { batchSize: 1 });
60
61
  if (jobResponse.status === "completed" || jobResponse.status === "failed") {
61
62
  break;
62
63
  }
@@ -64,15 +65,30 @@ class BatchScrapeService extends base_1.BaseService {
64
65
  }
65
66
  catch (error) {
66
67
  failures++;
67
- if (failures >= 5) {
68
- throw new client_1.HyperbrowserError(`Failed to poll batch scrape job ${jobId} after 5 attempts: ${error}`);
68
+ if (failures >= constants_1.POLLING_ATTEMPTS) {
69
+ throw new client_1.HyperbrowserError(`Failed to poll batch scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
69
70
  }
70
71
  }
71
72
  await (0, utils_1.sleep)(2000);
72
73
  }
74
+ failures = 0;
73
75
  if (!returnAllPages) {
74
- return jobResponse;
76
+ while (true) {
77
+ try {
78
+ jobResponse = await this.get(jobId);
79
+ return jobResponse;
80
+ }
81
+ catch (error) {
82
+ failures++;
83
+ if (failures >= constants_1.POLLING_ATTEMPTS) {
84
+ throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
85
+ }
86
+ }
87
+ await (0, utils_1.sleep)(500);
88
+ }
75
89
  }
90
+ jobResponse.currentPageBatch = 0;
91
+ jobResponse.data = [];
76
92
  failures = 0;
77
93
  while (jobResponse.currentPageBatch < jobResponse.totalPageBatches) {
78
94
  try {
@@ -91,8 +107,8 @@ class BatchScrapeService extends base_1.BaseService {
91
107
  }
92
108
  catch (error) {
93
109
  failures++;
94
- if (failures >= 5) {
95
- throw new client_1.HyperbrowserError(`Failed to get batch page ${jobResponse.currentPageBatch + 1} for job ${jobId} after 5 attempts: ${error}`);
110
+ if (failures >= constants_1.POLLING_ATTEMPTS) {
111
+ throw new client_1.HyperbrowserError(`Failed to get batch page ${jobResponse.currentPageBatch + 1} for job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
96
112
  }
97
113
  }
98
114
  await (0, utils_1.sleep)(500);
@@ -150,10 +166,20 @@ class ScrapeService extends base_1.BaseService {
150
166
  throw new client_1.HyperbrowserError("Failed to start scrape job, could not get job ID");
151
167
  }
152
168
  let jobResponse;
169
+ let failures = 0;
153
170
  while (true) {
154
- jobResponse = await this.get(jobId);
155
- if (jobResponse.status === "completed" || jobResponse.status === "failed") {
156
- break;
171
+ try {
172
+ jobResponse = await this.get(jobId);
173
+ if (jobResponse.status === "completed" || jobResponse.status === "failed") {
174
+ break;
175
+ }
176
+ failures = 0;
177
+ }
178
+ catch (error) {
179
+ failures++;
180
+ if (failures >= constants_1.POLLING_ATTEMPTS) {
181
+ throw new client_1.HyperbrowserError(`Failed to poll scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
182
+ }
157
183
  }
158
184
  await (0, utils_1.sleep)(2000);
159
185
  }
@@ -2,9 +2,11 @@ export type ScrapeFormat = "markdown" | "html" | "links" | "screenshot";
2
2
  export type ScrapeJobStatus = "pending" | "running" | "completed" | "failed";
3
3
  export type ExtractJobStatus = "pending" | "running" | "completed" | "failed";
4
4
  export type CrawlJobStatus = "pending" | "running" | "completed" | "failed";
5
- export type ScrapePageStatus = "completed" | "failed";
5
+ export type ScrapePageStatus = "completed" | "failed" | "pending" | "running";
6
6
  export type CrawlPageStatus = "completed" | "failed";
7
7
  export type ScrapeWaitUntil = "load" | "domcontentloaded" | "networkidle";
8
+ export type ScrapeScreenshotFormat = "jpeg" | "png" | "webp";
9
+ export declare const POLLING_ATTEMPTS = 5;
8
10
  export type Country = "AD" | "AE" | "AF" | "AL" | "AM" | "AO" | "AR" | "AT" | "AU" | "AW" | "AZ" | "BA" | "BD" | "BE" | "BG" | "BH" | "BJ" | "BO" | "BR" | "BS" | "BT" | "BY" | "BZ" | "CA" | "CF" | "CH" | "CI" | "CL" | "CM" | "CN" | "CO" | "CR" | "CU" | "CY" | "CZ" | "DE" | "DJ" | "DK" | "DM" | "EC" | "EE" | "EG" | "ES" | "ET" | "EU" | "FI" | "FJ" | "FR" | "GB" | "GE" | "GH" | "GM" | "GR" | "HK" | "HN" | "HR" | "HT" | "HU" | "ID" | "IE" | "IL" | "IN" | "IQ" | "IR" | "IS" | "IT" | "JM" | "JO" | "JP" | "KE" | "KH" | "KR" | "KW" | "KZ" | "LB" | "LI" | "LR" | "LT" | "LU" | "LV" | "MA" | "MC" | "MD" | "ME" | "MG" | "MK" | "ML" | "MM" | "MN" | "MR" | "MT" | "MU" | "MV" | "MX" | "MY" | "MZ" | "NG" | "NL" | "NO" | "NZ" | "OM" | "PA" | "PE" | "PH" | "PK" | "PL" | "PR" | "PT" | "PY" | "QA" | "RANDOM_COUNTRY" | "RO" | "RS" | "RU" | "SA" | "SC" | "SD" | "SE" | "SG" | "SI" | "SK" | "SN" | "SS" | "TD" | "TG" | "TH" | "TM" | "TN" | "TR" | "TT" | "TW" | "UA" | "UG" | "US" | "UY" | "UZ" | "VE" | "VG" | "VN" | "YE" | "ZA" | "ZM" | "ZW" | "ad" | "ae" | "af" | "al" | "am" | "ao" | "ar" | "at" | "au" | "aw" | "az" | "ba" | "bd" | "be" | "bg" | "bh" | "bj" | "bo" | "br" | "bs" | "bt" | "by" | "bz" | "ca" | "cf" | "ch" | "ci" | "cl" | "cm" | "cn" | "co" | "cr" | "cu" | "cy" | "cz" | "de" | "dj" | "dk" | "dm" | "ec" | "ee" | "eg" | "es" | "et" | "eu" | "fi" | "fj" | "fr" | "gb" | "ge" | "gh" | "gm" | "gr" | "hk" | "hn" | "hr" | "ht" | "hu" | "id" | "ie" | "il" | "in" | "iq" | "ir" | "is" | "it" | "jm" | "jo" | "jp" | "ke" | "kh" | "kr" | "kw" | "kz" | "lb" | "li" | "lr" | "lt" | "lu" | "lv" | "ma" | "mc" | "md" | "me" | "mg" | "mk" | "ml" | "mm" | "mn" | "mr" | "mt" | "mu" | "mv" | "mx" | "my" | "mz" | "ng" | "nl" | "no" | "nz" | "om" | "pa" | "pe" | "ph" | "pk" | "pl" | "pr" | "pt" | "py" | "qa" | "ro" | "rs" | "ru" | "sa" | "sc" | "sd" | "se" | "sg" | "si" | "sk" | "sn" | "ss" | "td" | "tg" | "th" | "tm" | "tn" | "tr" | "tt" | "tw" | "ua" | "ug" | "us" | "uy" | "uz" | "ve" | "vg" | "vn" | "ye" | "za" | "zm" | "zw";
9
11
  export type OperatingSystem = "windows" | "android" | "macos" | "linux" | "ios";
10
12
  export type Platform = "chrome" | "firefox" | "safari" | "edge";
@@ -1,2 +1,4 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.POLLING_ATTEMPTS = void 0;
4
+ exports.POLLING_ATTEMPTS = 5;
@@ -3,9 +3,11 @@ import { ExtractJobStatus } from "./constants";
3
3
  import { CreateSessionParams } from "./session";
4
4
  export interface StartExtractJobParams {
5
5
  urls: string[];
6
+ systemPrompt?: string;
6
7
  prompt?: string;
7
8
  schema?: z.ZodSchema | object;
8
9
  sessionOptions?: CreateSessionParams;
10
+ maxLinks?: number;
9
11
  }
10
12
  export interface StartExtractJobResponse {
11
13
  jobId: string;
@@ -1,5 +1,9 @@
1
- import { ScrapeFormat, ScrapeJobStatus, ScrapePageStatus, ScrapeWaitUntil } from "./constants";
1
+ import { ScrapeFormat, ScrapeJobStatus, ScrapePageStatus, ScrapeScreenshotFormat, ScrapeWaitUntil } from "./constants";
2
2
  import { CreateSessionParams } from "./session";
3
+ export interface ScreenshotOptions {
4
+ fullPage?: boolean;
5
+ format?: ScrapeScreenshotFormat;
6
+ }
3
7
  export interface ScrapeOptions {
4
8
  formats?: ScrapeFormat[];
5
9
  includeTags?: string[];
@@ -8,6 +12,7 @@ export interface ScrapeOptions {
8
12
  waitFor?: number;
9
13
  timeout?: number;
10
14
  waitUntil?: ScrapeWaitUntil;
15
+ screenshotOptions?: ScreenshotOptions;
11
16
  }
12
17
  export interface StartScrapeJobParams {
13
18
  url: string;
@@ -12,11 +12,11 @@ export interface Session {
12
12
  createdAt: string;
13
13
  updatedAt: string;
14
14
  sessionUrl: string;
15
- liveUrl?: string;
16
- token: string;
17
15
  }
18
16
  export interface SessionDetail extends Session {
19
17
  wsEndpoint?: string;
18
+ liveUrl?: string;
19
+ token: string;
20
20
  }
21
21
  export interface SessionListParams {
22
22
  status?: SessionStatus;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hyperbrowser/sdk",
3
- "version": "0.25.0",
3
+ "version": "0.27.0",
4
4
  "description": "Node SDK for Hyperbrowser API",
5
5
  "author": "",
6
6
  "main": "dist/index.js",