@graphext/cuery 0.9.1 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/mod.d.ts +1 -0
- package/esm/mod.d.ts.map +1 -1
- package/esm/mod.js +1 -0
- package/esm/src/apis/brightdata/index.d.ts +2 -0
- package/esm/src/apis/brightdata/index.d.ts.map +1 -0
- package/esm/src/apis/brightdata/index.js +1 -0
- package/esm/src/apis/brightdata/scrape.d.ts +30 -0
- package/esm/src/apis/brightdata/scrape.d.ts.map +1 -0
- package/esm/src/apis/brightdata/scrape.js +93 -0
- package/package.json +1 -1
- package/script/mod.d.ts +1 -0
- package/script/mod.d.ts.map +1 -1
- package/script/mod.js +1 -0
- package/script/src/apis/brightdata/index.d.ts +2 -0
- package/script/src/apis/brightdata/index.d.ts.map +1 -0
- package/script/src/apis/brightdata/index.js +17 -0
- package/script/src/apis/brightdata/scrape.d.ts +30 -0
- package/script/src/apis/brightdata/scrape.d.ts.map +1 -0
- package/script/src/apis/brightdata/scrape.js +130 -0
package/esm/mod.d.ts
CHANGED
|
@@ -26,6 +26,7 @@ export * from './src/tools/scorer.js';
|
|
|
26
26
|
export * from './src/helpers/seedKeywords.js';
|
|
27
27
|
export * from './src/tools/generic.js';
|
|
28
28
|
export * from './src/apis/hasdata/index.js';
|
|
29
|
+
export * from './src/apis/brightdata/index.js';
|
|
29
30
|
export * from './src/apis/chatgptScraper/index.js';
|
|
30
31
|
export * from './src/apis/googleAds/keywordPlanner.js';
|
|
31
32
|
export * from './src/schemas/index.js';
|
package/esm/mod.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EACN,WAAW,EACX,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,aAAa,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACd,MAAM,0BAA0B,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,oCAAoC,CAAC;AACnD,cAAc,wCAAwC,CAAC;AACvD,cAAc,wBAAwB,CAAC"}
|
|
1
|
+
{"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EACN,WAAW,EACX,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,aAAa,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACd,MAAM,0BAA0B,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,oCAAoC,CAAC;AACnD,cAAc,wCAAwC,CAAC;AACvD,cAAc,wBAAwB,CAAC"}
|
package/esm/mod.js
CHANGED
|
@@ -28,6 +28,7 @@ export * from './src/tools/scorer.js';
|
|
|
28
28
|
export * from './src/helpers/seedKeywords.js';
|
|
29
29
|
export * from './src/tools/generic.js';
|
|
30
30
|
export * from './src/apis/hasdata/index.js';
|
|
31
|
+
export * from './src/apis/brightdata/index.js';
|
|
31
32
|
export * from './src/apis/chatgptScraper/index.js';
|
|
32
33
|
export * from './src/apis/googleAds/keywordPlanner.js';
|
|
33
34
|
export * from './src/schemas/index.js';
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/brightdata/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './scrape.js';
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { type RetryConfig } from '../../helpers/async.js';
|
|
2
|
+
export interface BrightDataScrapeOptions {
|
|
3
|
+
/** BrightData zone name. Defaults to "web_unlocker". */
|
|
4
|
+
zone?: string;
|
|
5
|
+
/** HTTP method for the target request. Defaults to "GET". */
|
|
6
|
+
method?: 'GET' | 'POST';
|
|
7
|
+
/** Country code for geo-targeting (e.g. "us", "gb", "de"). */
|
|
8
|
+
country?: string;
|
|
9
|
+
/** Request body for POST requests to the target URL. */
|
|
10
|
+
body?: string;
|
|
11
|
+
/** Additional headers to send to the target URL. */
|
|
12
|
+
headers?: Record<string, string>;
|
|
13
|
+
/** Custom retry configuration. */
|
|
14
|
+
retryConfig?: RetryConfig;
|
|
15
|
+
}
|
|
16
|
+
export interface BrightDataScrapeResponse {
|
|
17
|
+
url: string;
|
|
18
|
+
html?: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Scrape a single URL using BrightData Web Unlocker API.
|
|
22
|
+
* Returns raw HTML content.
|
|
23
|
+
*/
|
|
24
|
+
export declare function scrapeBrightData(url: string, options?: BrightDataScrapeOptions): Promise<BrightDataScrapeResponse>;
|
|
25
|
+
/**
|
|
26
|
+
* Scrape multiple URLs in parallel using BrightData Web Unlocker API.
|
|
27
|
+
* Uses mapParallel with configurable concurrency.
|
|
28
|
+
*/
|
|
29
|
+
export declare function scrapeBrightDataBatch(urls: Array<string>, options?: BrightDataScrapeOptions, maxConcurrency?: number): Promise<Array<BrightDataScrapeResponse>>;
|
|
30
|
+
//# sourceMappingURL=scrape.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/brightdata/scrape.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAapF,MAAM,WAAW,uBAAuB;IACpC,wDAAwD;IACxD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,6DAA6D;IAC7D,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IACxB,8DAA8D;IAC9D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,wDAAwD;IACxD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oDAAoD;IACpD,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,kCAAkC;IAClC,WAAW,CAAC,EAAE,WAAW,CAAC;CAC7B;AAED,MAAM,WAAW,wBAAwB;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;CACjB;AA4ED;;;GAGG;AACH,wBAAsB,gBAAgB,CAClC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,uBAA4B,GACtC,OAAO,CAAC,wBAAwB,CAAC,CAYnC;AAED;;;GAGG;AACH,wBAAsB,qBAAqB,CACvC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,GAAE,uBAA4B,EACrC,cAAc,GAAE,MAA+B,GAChD,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC,CAQ1C"}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import * as dntShim from "../../../_dnt.shims.js";
|
|
2
|
+
import { mapParallel, withRetries } from '../../helpers/async.js';
|
|
3
|
+
const BRIGHTDATA_CONCURRENCY = 10;
|
|
4
|
+
const BRIGHTDATA_RETRY_CONFIG = {
|
|
5
|
+
maxRetries: 3,
|
|
6
|
+
initialDelay: 1000,
|
|
7
|
+
maxDelay: 8000,
|
|
8
|
+
backoffMultiplier: 2,
|
|
9
|
+
statusCodes: [429, 500, 502, 503]
|
|
10
|
+
};
|
|
11
|
+
function getApiKey() {
|
|
12
|
+
const apiKey = dntShim.Deno.env.get('BRIGHTDATA_API_KEY');
|
|
13
|
+
if (apiKey == null) {
|
|
14
|
+
throw new Error('BRIGHTDATA_API_KEY environment variable is required');
|
|
15
|
+
}
|
|
16
|
+
return apiKey;
|
|
17
|
+
}
|
|
18
|
+
async function fetchBrightData(url, apiKey, options, retryConfig) {
|
|
19
|
+
const response = await withRetries(async () => {
|
|
20
|
+
const body = {
|
|
21
|
+
zone: options.zone ?? 'web_unlocker',
|
|
22
|
+
url,
|
|
23
|
+
format: 'raw',
|
|
24
|
+
method: options.method ?? 'GET'
|
|
25
|
+
};
|
|
26
|
+
if (options.country != null) {
|
|
27
|
+
body.country = options.country;
|
|
28
|
+
}
|
|
29
|
+
if (options.body != null) {
|
|
30
|
+
body.body = options.body;
|
|
31
|
+
}
|
|
32
|
+
const headers = {
|
|
33
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
34
|
+
'Content-Type': 'application/json'
|
|
35
|
+
};
|
|
36
|
+
if (options.headers != null) {
|
|
37
|
+
// Forward custom headers as part of the BrightData request
|
|
38
|
+
body.headers = options.headers;
|
|
39
|
+
}
|
|
40
|
+
return fetch('https://api.brightdata.com/request', {
|
|
41
|
+
method: 'POST',
|
|
42
|
+
headers,
|
|
43
|
+
body: JSON.stringify(body),
|
|
44
|
+
signal: dntShim.dntGlobalThis.abortSignal
|
|
45
|
+
});
|
|
46
|
+
}, retryConfig);
|
|
47
|
+
if (!response.ok) {
|
|
48
|
+
const status = response.status;
|
|
49
|
+
let details = '';
|
|
50
|
+
try {
|
|
51
|
+
details = ` - ${await response.text()}`;
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
// ignore
|
|
55
|
+
}
|
|
56
|
+
const errorMessage = status === 401
|
|
57
|
+
? 'BrightData API error (401): Invalid API key'
|
|
58
|
+
: status === 403
|
|
59
|
+
? 'BrightData API error (403): Forbidden or credits exhausted'
|
|
60
|
+
: status === 429
|
|
61
|
+
? 'BrightData API error (429): Rate limit exceeded'
|
|
62
|
+
: `BrightData API error: ${status} ${response.statusText}${details}`;
|
|
63
|
+
console.error(errorMessage);
|
|
64
|
+
throw new Error(errorMessage);
|
|
65
|
+
}
|
|
66
|
+
return response;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Scrape a single URL using BrightData Web Unlocker API.
|
|
70
|
+
* Returns raw HTML content.
|
|
71
|
+
*/
|
|
72
|
+
export async function scrapeBrightData(url, options = {}) {
|
|
73
|
+
const apiKey = getApiKey();
|
|
74
|
+
const retryConfig = options.retryConfig ?? BRIGHTDATA_RETRY_CONFIG;
|
|
75
|
+
try {
|
|
76
|
+
const response = await fetchBrightData(url, apiKey, options, retryConfig);
|
|
77
|
+
const html = await response.text();
|
|
78
|
+
return { url, html };
|
|
79
|
+
}
|
|
80
|
+
catch (error) {
|
|
81
|
+
console.error(`BrightData scrape error for ${url}:`, error);
|
|
82
|
+
return { url };
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Scrape multiple URLs in parallel using BrightData Web Unlocker API.
|
|
87
|
+
* Uses mapParallel with configurable concurrency.
|
|
88
|
+
*/
|
|
89
|
+
export async function scrapeBrightDataBatch(urls, options = {}, maxConcurrency = BRIGHTDATA_CONCURRENCY) {
|
|
90
|
+
return mapParallel(urls, maxConcurrency, async (url) => {
|
|
91
|
+
return await scrapeBrightData(url, options);
|
|
92
|
+
});
|
|
93
|
+
}
|
package/package.json
CHANGED
package/script/mod.d.ts
CHANGED
|
@@ -26,6 +26,7 @@ export * from './src/tools/scorer.js';
|
|
|
26
26
|
export * from './src/helpers/seedKeywords.js';
|
|
27
27
|
export * from './src/tools/generic.js';
|
|
28
28
|
export * from './src/apis/hasdata/index.js';
|
|
29
|
+
export * from './src/apis/brightdata/index.js';
|
|
29
30
|
export * from './src/apis/chatgptScraper/index.js';
|
|
30
31
|
export * from './src/apis/googleAds/keywordPlanner.js';
|
|
31
32
|
export * from './src/schemas/index.js';
|
package/script/mod.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EACN,WAAW,EACX,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,aAAa,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACd,MAAM,0BAA0B,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,oCAAoC,CAAC;AACnD,cAAc,wCAAwC,CAAC;AACvD,cAAc,wBAAwB,CAAC"}
|
|
1
|
+
{"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EACN,WAAW,EACX,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,aAAa,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACd,MAAM,0BAA0B,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,oCAAoC,CAAC;AACnD,cAAc,wCAAwC,CAAC;AACvD,cAAc,wBAAwB,CAAC"}
|
package/script/mod.js
CHANGED
|
@@ -51,6 +51,7 @@ __exportStar(require("./src/tools/scorer.js"), exports);
|
|
|
51
51
|
__exportStar(require("./src/helpers/seedKeywords.js"), exports);
|
|
52
52
|
__exportStar(require("./src/tools/generic.js"), exports);
|
|
53
53
|
__exportStar(require("./src/apis/hasdata/index.js"), exports);
|
|
54
|
+
__exportStar(require("./src/apis/brightdata/index.js"), exports);
|
|
54
55
|
__exportStar(require("./src/apis/chatgptScraper/index.js"), exports);
|
|
55
56
|
__exportStar(require("./src/apis/googleAds/keywordPlanner.js"), exports);
|
|
56
57
|
__exportStar(require("./src/schemas/index.js"), exports);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/brightdata/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./scrape.js"), exports);
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { type RetryConfig } from '../../helpers/async.js';
|
|
2
|
+
export interface BrightDataScrapeOptions {
|
|
3
|
+
/** BrightData zone name. Defaults to "web_unlocker". */
|
|
4
|
+
zone?: string;
|
|
5
|
+
/** HTTP method for the target request. Defaults to "GET". */
|
|
6
|
+
method?: 'GET' | 'POST';
|
|
7
|
+
/** Country code for geo-targeting (e.g. "us", "gb", "de"). */
|
|
8
|
+
country?: string;
|
|
9
|
+
/** Request body for POST requests to the target URL. */
|
|
10
|
+
body?: string;
|
|
11
|
+
/** Additional headers to send to the target URL. */
|
|
12
|
+
headers?: Record<string, string>;
|
|
13
|
+
/** Custom retry configuration. */
|
|
14
|
+
retryConfig?: RetryConfig;
|
|
15
|
+
}
|
|
16
|
+
export interface BrightDataScrapeResponse {
|
|
17
|
+
url: string;
|
|
18
|
+
html?: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Scrape a single URL using BrightData Web Unlocker API.
|
|
22
|
+
* Returns raw HTML content.
|
|
23
|
+
*/
|
|
24
|
+
export declare function scrapeBrightData(url: string, options?: BrightDataScrapeOptions): Promise<BrightDataScrapeResponse>;
|
|
25
|
+
/**
|
|
26
|
+
* Scrape multiple URLs in parallel using BrightData Web Unlocker API.
|
|
27
|
+
* Uses mapParallel with configurable concurrency.
|
|
28
|
+
*/
|
|
29
|
+
export declare function scrapeBrightDataBatch(urls: Array<string>, options?: BrightDataScrapeOptions, maxConcurrency?: number): Promise<Array<BrightDataScrapeResponse>>;
|
|
30
|
+
//# sourceMappingURL=scrape.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/brightdata/scrape.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAapF,MAAM,WAAW,uBAAuB;IACpC,wDAAwD;IACxD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,6DAA6D;IAC7D,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IACxB,8DAA8D;IAC9D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,wDAAwD;IACxD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oDAAoD;IACpD,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,kCAAkC;IAClC,WAAW,CAAC,EAAE,WAAW,CAAC;CAC7B;AAED,MAAM,WAAW,wBAAwB;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;CACjB;AA4ED;;;GAGG;AACH,wBAAsB,gBAAgB,CAClC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,uBAA4B,GACtC,OAAO,CAAC,wBAAwB,CAAC,CAYnC;AAED;;;GAGG;AACH,wBAAsB,qBAAqB,CACvC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,GAAE,uBAA4B,EACrC,cAAc,GAAE,MAA+B,GAChD,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC,CAQ1C"}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.scrapeBrightData = scrapeBrightData;
|
|
37
|
+
exports.scrapeBrightDataBatch = scrapeBrightDataBatch;
|
|
38
|
+
const dntShim = __importStar(require("../../../_dnt.shims.js"));
|
|
39
|
+
const async_js_1 = require("../../helpers/async.js");
|
|
40
|
+
const BRIGHTDATA_CONCURRENCY = 10;
|
|
41
|
+
const BRIGHTDATA_RETRY_CONFIG = {
|
|
42
|
+
maxRetries: 3,
|
|
43
|
+
initialDelay: 1000,
|
|
44
|
+
maxDelay: 8000,
|
|
45
|
+
backoffMultiplier: 2,
|
|
46
|
+
statusCodes: [429, 500, 502, 503]
|
|
47
|
+
};
|
|
48
|
+
function getApiKey() {
|
|
49
|
+
const apiKey = dntShim.Deno.env.get('BRIGHTDATA_API_KEY');
|
|
50
|
+
if (apiKey == null) {
|
|
51
|
+
throw new Error('BRIGHTDATA_API_KEY environment variable is required');
|
|
52
|
+
}
|
|
53
|
+
return apiKey;
|
|
54
|
+
}
|
|
55
|
+
async function fetchBrightData(url, apiKey, options, retryConfig) {
|
|
56
|
+
const response = await (0, async_js_1.withRetries)(async () => {
|
|
57
|
+
const body = {
|
|
58
|
+
zone: options.zone ?? 'web_unlocker',
|
|
59
|
+
url,
|
|
60
|
+
format: 'raw',
|
|
61
|
+
method: options.method ?? 'GET'
|
|
62
|
+
};
|
|
63
|
+
if (options.country != null) {
|
|
64
|
+
body.country = options.country;
|
|
65
|
+
}
|
|
66
|
+
if (options.body != null) {
|
|
67
|
+
body.body = options.body;
|
|
68
|
+
}
|
|
69
|
+
const headers = {
|
|
70
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
71
|
+
'Content-Type': 'application/json'
|
|
72
|
+
};
|
|
73
|
+
if (options.headers != null) {
|
|
74
|
+
// Forward custom headers as part of the BrightData request
|
|
75
|
+
body.headers = options.headers;
|
|
76
|
+
}
|
|
77
|
+
return fetch('https://api.brightdata.com/request', {
|
|
78
|
+
method: 'POST',
|
|
79
|
+
headers,
|
|
80
|
+
body: JSON.stringify(body),
|
|
81
|
+
signal: dntShim.dntGlobalThis.abortSignal
|
|
82
|
+
});
|
|
83
|
+
}, retryConfig);
|
|
84
|
+
if (!response.ok) {
|
|
85
|
+
const status = response.status;
|
|
86
|
+
let details = '';
|
|
87
|
+
try {
|
|
88
|
+
details = ` - ${await response.text()}`;
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
// ignore
|
|
92
|
+
}
|
|
93
|
+
const errorMessage = status === 401
|
|
94
|
+
? 'BrightData API error (401): Invalid API key'
|
|
95
|
+
: status === 403
|
|
96
|
+
? 'BrightData API error (403): Forbidden or credits exhausted'
|
|
97
|
+
: status === 429
|
|
98
|
+
? 'BrightData API error (429): Rate limit exceeded'
|
|
99
|
+
: `BrightData API error: ${status} ${response.statusText}${details}`;
|
|
100
|
+
console.error(errorMessage);
|
|
101
|
+
throw new Error(errorMessage);
|
|
102
|
+
}
|
|
103
|
+
return response;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Scrape a single URL using BrightData Web Unlocker API.
|
|
107
|
+
* Returns raw HTML content.
|
|
108
|
+
*/
|
|
109
|
+
async function scrapeBrightData(url, options = {}) {
|
|
110
|
+
const apiKey = getApiKey();
|
|
111
|
+
const retryConfig = options.retryConfig ?? BRIGHTDATA_RETRY_CONFIG;
|
|
112
|
+
try {
|
|
113
|
+
const response = await fetchBrightData(url, apiKey, options, retryConfig);
|
|
114
|
+
const html = await response.text();
|
|
115
|
+
return { url, html };
|
|
116
|
+
}
|
|
117
|
+
catch (error) {
|
|
118
|
+
console.error(`BrightData scrape error for ${url}:`, error);
|
|
119
|
+
return { url };
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Scrape multiple URLs in parallel using BrightData Web Unlocker API.
|
|
124
|
+
* Uses mapParallel with configurable concurrency.
|
|
125
|
+
*/
|
|
126
|
+
async function scrapeBrightDataBatch(urls, options = {}, maxConcurrency = BRIGHTDATA_CONCURRENCY) {
|
|
127
|
+
return (0, async_js_1.mapParallel)(urls, maxConcurrency, async (url) => {
|
|
128
|
+
return await scrapeBrightData(url, options);
|
|
129
|
+
});
|
|
130
|
+
}
|