@brightdata/brightdata-plugin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,501 @@
1
+ import { Type, type TSchema } from "@sinclair/typebox";
2
+ import { readStringParam } from "openclaw/plugin-sdk/agent-runtime";
3
+ import type { OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-runtime";
4
+ import { wrapExternalContent } from "openclaw/plugin-sdk/security-runtime";
5
+ import { runBrightDataWebData } from "./brightdata-client.js";
6
+
7
+ type BrightDataDatasetDefinition = {
8
+ id: string;
9
+ datasetId: string;
10
+ description: string;
11
+ inputs: string[];
12
+ defaults?: Record<string, string>;
13
+ fixedValues?: Record<string, string | number | boolean>;
14
+ triggerParams?: Record<string, string | number | boolean>;
15
+ };
16
+
17
+ const DATASET_INPUT_DESCRIPTIONS: Record<string, string> = {
18
+ url: "Target URL for the Bright Data dataset.",
19
+ keyword: "Search keyword.",
20
+ first_name: "First name for the search.",
21
+ last_name: "Last name for the search.",
22
+ num_of_reviews: "Number of reviews to fetch.",
23
+ days_limit: "Limit results to the last N days.",
24
+ num_of_comments: "Number of comments to fetch.",
25
+ prompt: "Prompt to send to the AI insights dataset.",
26
+ start_date: "Optional start date in YYYY-MM-DD format.",
27
+ end_date: "Optional end date in YYYY-MM-DD format.",
28
+ };
29
+
30
+ export const BRIGHTDATA_DATASET_DEFINITIONS: readonly BrightDataDatasetDefinition[] = [
31
+ {
32
+ id: "amazon_product",
33
+ datasetId: "gd_l7q7dkf244hwjntr0",
34
+ description:
35
+ "Quickly read structured amazon product data.\nRequires a valid product URL with /dp/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping",
36
+ inputs: ["url"],
37
+ },
38
+ {
39
+ id: "amazon_product_reviews",
40
+ datasetId: "gd_le8e811kzy4ggddlq",
41
+ description:
42
+ "Quickly read structured amazon product review data.\nRequires a valid product URL with /dp/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping",
43
+ inputs: ["url"],
44
+ },
45
+ {
46
+ id: "amazon_product_search",
47
+ datasetId: "gd_lwdb4vjm1ehb499uxs",
48
+ description:
49
+ "Quickly read structured amazon product search data.\nRequires a valid search keyword and amazon domain URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
50
+ inputs: ["keyword", "url"],
51
+ fixedValues: {
52
+ pages_to_search: "1",
53
+ },
54
+ },
55
+ {
56
+ id: "walmart_product",
57
+ datasetId: "gd_l95fol7l1ru6rlo116",
58
+ description:
59
+ "Quickly read structured walmart product data.\nRequires a valid product URL with /ip/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping",
60
+ inputs: ["url"],
61
+ },
62
+ {
63
+ id: "walmart_seller",
64
+ datasetId: "gd_m7ke48w81ocyu4hhz0",
65
+ description:
66
+ "Quickly read structured walmart seller data.\nRequires a valid walmart seller URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
67
+ inputs: ["url"],
68
+ },
69
+ {
70
+ id: "ebay_product",
71
+ datasetId: "gd_ltr9mjt81n0zzdk1fb",
72
+ description:
73
+ "Quickly read structured ebay product data.\nRequires a valid ebay product URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
74
+ inputs: ["url"],
75
+ },
76
+ {
77
+ id: "homedepot_products",
78
+ datasetId: "gd_lmusivh019i7g97q2n",
79
+ description:
80
+ "Quickly read structured homedepot product data.\nRequires a valid homedepot product URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
81
+ inputs: ["url"],
82
+ },
83
+ {
84
+ id: "zara_products",
85
+ datasetId: "gd_lct4vafw1tgx27d4o0",
86
+ description:
87
+ "Quickly read structured zara product data.\nRequires a valid zara product URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
88
+ inputs: ["url"],
89
+ },
90
+ {
91
+ id: "etsy_products",
92
+ datasetId: "gd_ltppk0jdv1jqz25mz",
93
+ description:
94
+ "Quickly read structured etsy product data.\nRequires a valid etsy product URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
95
+ inputs: ["url"],
96
+ },
97
+ {
98
+ id: "bestbuy_products",
99
+ datasetId: "gd_ltre1jqe1jfr7cccf",
100
+ description:
101
+ "Quickly read structured bestbuy product data.\nRequires a valid bestbuy product URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
102
+ inputs: ["url"],
103
+ },
104
+ {
105
+ id: "linkedin_person_profile",
106
+ datasetId: "gd_l1viktl72bvl7bjuj0",
107
+ description:
108
+ "Quickly read structured linkedin people profile data.\nThis can be a cache lookup, so it can be more reliable than scraping",
109
+ inputs: ["url"],
110
+ },
111
+ {
112
+ id: "linkedin_company_profile",
113
+ datasetId: "gd_l1vikfnt1wgvvqz95w",
114
+ description:
115
+ "Quickly read structured linkedin company profile data\nThis can be a cache lookup, so it can be more reliable than scraping",
116
+ inputs: ["url"],
117
+ },
118
+ {
119
+ id: "linkedin_job_listings",
120
+ datasetId: "gd_lpfll7v5hcqtkxl6l",
121
+ description:
122
+ "Quickly read structured linkedin job listings data\nThis can be a cache lookup, so it can be more reliable than scraping",
123
+ inputs: ["url"],
124
+ },
125
+ {
126
+ id: "linkedin_posts",
127
+ datasetId: "gd_lyy3tktm25m4avu764",
128
+ description:
129
+ "Quickly read structured linkedin posts data.\nRequires a real LinkedIn post URL, for example:\nlinkedin.com/pulse/... or linkedin.com/posts/...\nThis can be a cache lookup, so it can be more reliable than scraping",
130
+ inputs: ["url"],
131
+ },
132
+ {
133
+ id: "linkedin_people_search",
134
+ datasetId: "gd_m8d03he47z8nwb5xc",
135
+ description:
136
+ "Quickly read structured linkedin people search data\nThis can be a cache lookup, so it can be more reliable than scraping",
137
+ inputs: ["url", "first_name", "last_name"],
138
+ },
139
+ {
140
+ id: "crunchbase_company",
141
+ datasetId: "gd_l1vijqt9jfj7olije",
142
+ description:
143
+ "Quickly read structured crunchbase company data\nThis can be a cache lookup, so it can be more reliable than scraping",
144
+ inputs: ["url"],
145
+ },
146
+ {
147
+ id: "zoominfo_company_profile",
148
+ datasetId: "gd_m0ci4a4ivx3j5l6nx",
149
+ description:
150
+ "Quickly read structured ZoomInfo company profile data.\nRequires a valid ZoomInfo company URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
151
+ inputs: ["url"],
152
+ },
153
+ {
154
+ id: "instagram_profiles",
155
+ datasetId: "gd_l1vikfch901nx3by4",
156
+ description:
157
+ "Quickly read structured Instagram profile data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
158
+ inputs: ["url"],
159
+ },
160
+ {
161
+ id: "instagram_posts",
162
+ datasetId: "gd_lk5ns7kz21pck8jpis",
163
+ description:
164
+ "Quickly read structured Instagram post data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
165
+ inputs: ["url"],
166
+ },
167
+ {
168
+ id: "instagram_reels",
169
+ datasetId: "gd_lyclm20il4r5helnj",
170
+ description:
171
+ "Quickly read structured Instagram reel data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
172
+ inputs: ["url"],
173
+ },
174
+ {
175
+ id: "instagram_comments",
176
+ datasetId: "gd_ltppn085pokosxh13",
177
+ description:
178
+ "Quickly read structured Instagram comments data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
179
+ inputs: ["url"],
180
+ },
181
+ {
182
+ id: "facebook_posts",
183
+ datasetId: "gd_lyclm1571iy3mv57zw",
184
+ description:
185
+ "Quickly read structured Facebook post data.\nRequires a valid Facebook post URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
186
+ inputs: ["url"],
187
+ },
188
+ {
189
+ id: "facebook_marketplace_listings",
190
+ datasetId: "gd_lvt9iwuh6fbcwmx1a",
191
+ description:
192
+ "Quickly read structured Facebook marketplace listing data.\nRequires a valid Facebook marketplace listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
193
+ inputs: ["url"],
194
+ },
195
+ {
196
+ id: "facebook_company_reviews",
197
+ datasetId: "gd_m0dtqpiu1mbcyc2g86",
198
+ description:
199
+ "Quickly read structured Facebook company reviews data.\nRequires a valid Facebook company URL and number of reviews.\nThis can be a cache lookup, so it can be more reliable than scraping",
200
+ inputs: ["url", "num_of_reviews"],
201
+ },
202
+ {
203
+ id: "facebook_events",
204
+ datasetId: "gd_m14sd0to1jz48ppm51",
205
+ description:
206
+ "Quickly read structured Facebook events data.\nRequires a valid Facebook event URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
207
+ inputs: ["url"],
208
+ },
209
+ {
210
+ id: "tiktok_profiles",
211
+ datasetId: "gd_l1villgoiiidt09ci",
212
+ description:
213
+ "Quickly read structured Tiktok profiles data.\nRequires a valid Tiktok profile URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
214
+ inputs: ["url"],
215
+ },
216
+ {
217
+ id: "tiktok_posts",
218
+ datasetId: "gd_lu702nij2f790tmv9h",
219
+ description:
220
+ "Quickly read structured Tiktok post data.\nRequires a valid Tiktok post URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
221
+ inputs: ["url"],
222
+ },
223
+ {
224
+ id: "tiktok_shop",
225
+ datasetId: "gd_m45m1u911dsa4274pi",
226
+ description:
227
+ "Quickly read structured Tiktok shop data.\nRequires a valid Tiktok shop product URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
228
+ inputs: ["url"],
229
+ },
230
+ {
231
+ id: "tiktok_comments",
232
+ datasetId: "gd_lkf2st302ap89utw5k",
233
+ description:
234
+ "Quickly read structured Tiktok comments data.\nRequires a valid Tiktok video URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
235
+ inputs: ["url"],
236
+ },
237
+ {
238
+ id: "google_maps_reviews",
239
+ datasetId: "gd_luzfs1dn2oa0teb81",
240
+ description:
241
+ "Quickly read structured Google maps reviews data.\nRequires a valid Google maps URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
242
+ inputs: ["url", "days_limit"],
243
+ defaults: {
244
+ days_limit: "3",
245
+ },
246
+ },
247
+ {
248
+ id: "google_shopping",
249
+ datasetId: "gd_ltppk50q18kdw67omz",
250
+ description:
251
+ "Quickly read structured Google shopping data.\nRequires a valid Google shopping product URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
252
+ inputs: ["url"],
253
+ },
254
+ {
255
+ id: "google_play_store",
256
+ datasetId: "gd_lsk382l8xei8vzm4u",
257
+ description:
258
+ "Quickly read structured Google play store data.\nRequires a valid Google play store app URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
259
+ inputs: ["url"],
260
+ },
261
+ {
262
+ id: "apple_app_store",
263
+ datasetId: "gd_lsk9ki3u2iishmwrui",
264
+ description:
265
+ "Quickly read structured apple app store data.\nRequires a valid apple app store app URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
266
+ inputs: ["url"],
267
+ },
268
+ {
269
+ id: "reuter_news",
270
+ datasetId: "gd_lyptx9h74wtlvpnfu",
271
+ description:
272
+ "Quickly read structured reuter news data.\nRequires a valid reuter news report URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
273
+ inputs: ["url"],
274
+ },
275
+ {
276
+ id: "github_repository_file",
277
+ datasetId: "gd_lyrexgxc24b3d4imjt",
278
+ description:
279
+ "Quickly read structured github repository data.\nRequires a valid github repository file URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
280
+ inputs: ["url"],
281
+ },
282
+ {
283
+ id: "yahoo_finance_business",
284
+ datasetId: "gd_lmrpz3vxmz972ghd7",
285
+ description:
286
+ "Quickly read structured yahoo finance business data.\nRequires a valid yahoo finance business URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
287
+ inputs: ["url"],
288
+ },
289
+ {
290
+ id: "x_posts",
291
+ datasetId: "gd_lwxkxvnf1cynvib9co",
292
+ description:
293
+ "Quickly read structured X post data.\nRequires a valid X post URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
294
+ inputs: ["url"],
295
+ },
296
+ {
297
+ id: "x_profile_posts",
298
+ datasetId: "gd_lwxkxvnf1cynvib9co",
299
+ description:
300
+ "Quickly read structured X posts from a profile.\nRequires a valid X profile URL (e.g. https://x.com/username).\nReturns the most recent posts from the profile.\nOptionally filter by date range using start_date and end_date\n(format: YYYY-MM-DD).",
301
+ inputs: ["url", "start_date", "end_date"],
302
+ defaults: {
303
+ start_date: "",
304
+ end_date: "",
305
+ },
306
+ triggerParams: {
307
+ type: "discover_new",
308
+ discover_by: "profile_url_most_recent_posts",
309
+ limit_per_input: 10,
310
+ },
311
+ },
312
+ {
313
+ id: "zillow_properties_listing",
314
+ datasetId: "gd_lfqkr8wm13ixtbd8f5",
315
+ description:
316
+ "Quickly read structured zillow properties listing data.\nRequires a valid zillow properties listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
317
+ inputs: ["url"],
318
+ },
319
+ {
320
+ id: "booking_hotel_listings",
321
+ datasetId: "gd_m5mbdl081229ln6t4a",
322
+ description:
323
+ "Quickly read structured booking hotel listings data.\nRequires a valid booking hotel listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
324
+ inputs: ["url"],
325
+ },
326
+ {
327
+ id: "youtube_profiles",
328
+ datasetId: "gd_lk538t2k2p1k3oos71",
329
+ description:
330
+ "Quickly read structured youtube profiles data.\nRequires a valid youtube profile URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
331
+ inputs: ["url"],
332
+ },
333
+ {
334
+ id: "youtube_comments",
335
+ datasetId: "gd_lk9q0ew71spt1mxywf",
336
+ description:
337
+ "Quickly read structured youtube comments data.\nRequires a valid youtube video URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
338
+ inputs: ["url", "num_of_comments"],
339
+ defaults: {
340
+ num_of_comments: "10",
341
+ },
342
+ },
343
+ {
344
+ id: "reddit_posts",
345
+ datasetId: "gd_lvz8ah06191smkebj4",
346
+ description:
347
+ "Quickly read structured reddit posts data.\nRequires a valid reddit post URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
348
+ inputs: ["url"],
349
+ },
350
+ {
351
+ id: "youtube_videos",
352
+ datasetId: "gd_lk56epmy2i5g7lzu0k",
353
+ description:
354
+ "Quickly read structured YouTube videos data.\nRequires a valid YouTube video URL.\nThis can be a cache lookup, so it can be more reliable than scraping",
355
+ inputs: ["url"],
356
+ },
357
+ {
358
+ id: "chatgpt_ai_insights",
359
+ datasetId: "gd_m7aof0k82r803d5bjm",
360
+ description:
361
+ "Send a prompt to ChatGPT and get back AI-generated insights.\nReturns structured answer text, citations, recommendations, and markdown. Useful for GEO and LLM as a judge.",
362
+ inputs: ["prompt"],
363
+ fixedValues: {
364
+ url: "https://chatgpt.com/",
365
+ country: "",
366
+ web_search: false,
367
+ additional_prompt: "",
368
+ },
369
+ triggerParams: {
370
+ custom_output_fields: "answer_text_markdown",
371
+ },
372
+ },
373
+ {
374
+ id: "grok_ai_insights",
375
+ datasetId: "gd_m8ve0u141icu75ae74",
376
+ description:
377
+ "Send a prompt to Grok and get back AI-generated insights.\nReturns structured answer text in markdown format.\nUseful for GEO and LLM as a judge.",
378
+ inputs: ["prompt"],
379
+ fixedValues: {
380
+ url: "https://grok.com/",
381
+ index: "",
382
+ },
383
+ triggerParams: {
384
+ custom_output_fields: "answer_text_markdown",
385
+ },
386
+ },
387
+ {
388
+ id: "perplexity_ai_insights",
389
+ datasetId: "gd_m7dhdot1vw9a7gc1n",
390
+ description:
391
+ "Send a prompt to Perplexity and get back AI-generated insights.\nReturns structured answer text in markdown format.\nUseful for GEO and LLM as a judge.",
392
+ inputs: ["prompt"],
393
+ fixedValues: {
394
+ url: "https://www.perplexity.ai",
395
+ index: "",
396
+ country: "",
397
+ },
398
+ triggerParams: {
399
+ custom_output_fields: "answer_text_markdown",
400
+ },
401
+ },
402
+ ];
403
+
404
+ function datasetIdToTitle(id: string): string {
405
+ return id
406
+ .split("_")
407
+ .map((word) => word.charAt(0).toUpperCase() + word.slice(1))
408
+ .join(" ");
409
+ }
410
+
411
+ function resolveInputDescription(input: string): string {
412
+ return DATASET_INPUT_DESCRIPTIONS[input] ?? `${datasetIdToTitle(input)} input.`;
413
+ }
414
+
415
+ function hasDatasetDefault(definition: BrightDataDatasetDefinition, input: string): boolean {
416
+ return Object.prototype.hasOwnProperty.call(definition.defaults ?? {}, input);
417
+ }
418
+
419
+ function buildDatasetParameters(definition: BrightDataDatasetDefinition) {
420
+ const properties: Record<string, TSchema> = {};
421
+ for (const input of definition.inputs) {
422
+ const defaultValue = definition.defaults?.[input];
423
+ const schema = Type.String({
424
+ description: resolveInputDescription(input),
425
+ ...(defaultValue !== undefined ? { default: defaultValue } : {}),
426
+ });
427
+ properties[input] = defaultValue !== undefined ? Type.Optional(schema) : schema;
428
+ }
429
+ return Type.Object(properties, { additionalProperties: false });
430
+ }
431
+
432
+ function readDatasetInputs(
433
+ rawParams: Record<string, unknown>,
434
+ definition: BrightDataDatasetDefinition,
435
+ ): Record<string, string> {
436
+ const input: Record<string, string> = {};
437
+ for (const inputName of definition.inputs) {
438
+ const value = readStringParam(rawParams, inputName, {
439
+ required: !hasDatasetDefault(definition, inputName),
440
+ });
441
+ input[inputName] = value !== undefined ? value : (definition.defaults?.[inputName] ?? "");
442
+ }
443
+ return input;
444
+ }
445
+
446
+ function brightDataDatasetResult(
447
+ payload: Record<string, unknown>,
448
+ definition: BrightDataDatasetDefinition,
449
+ ) {
450
+ const wrappedText = wrapExternalContent(JSON.stringify(payload, null, 2), {
451
+ source: "api",
452
+ includeWarning: true,
453
+ });
454
+ const externalContent = {
455
+ untrusted: true,
456
+ source: "api",
457
+ provider: "brightdata",
458
+ kind: "dataset",
459
+ datasetId:
460
+ typeof payload.datasetId === "string" && payload.datasetId
461
+ ? payload.datasetId
462
+ : definition.datasetId,
463
+ wrapped: true,
464
+ };
465
+ return {
466
+ content: [{ type: "text" as const, text: wrappedText }],
467
+ details: {
468
+ ...payload,
469
+ externalContent: {
470
+ ...(payload.externalContent && typeof payload.externalContent === "object"
471
+ ? (payload.externalContent as Record<string, unknown>)
472
+ : {}),
473
+ ...externalContent,
474
+ },
475
+ },
476
+ };
477
+ }
478
+
479
+ export function createBrightDataWebDataTools(api: OpenClawPluginApi) {
480
+ return BRIGHTDATA_DATASET_DEFINITIONS.map((definition) => {
481
+ const toolName = `brightdata_${definition.id}`;
482
+ return {
483
+ name: toolName,
484
+ label: `Bright Data ${datasetIdToTitle(definition.id)}`,
485
+ description: definition.description,
486
+ parameters: buildDatasetParameters(definition),
487
+ execute: async (_toolCallId: string, rawParams: Record<string, unknown>) =>
488
+ brightDataDatasetResult(
489
+ await runBrightDataWebData({
490
+ pluginConfig: api.pluginConfig,
491
+ datasetId: definition.datasetId,
492
+ input: readDatasetInputs(rawParams, definition),
493
+ fixedValues: definition.fixedValues,
494
+ triggerParams: definition.triggerParams,
495
+ toolName,
496
+ }),
497
+ definition,
498
+ ),
499
+ };
500
+ });
501
+ }
@@ -0,0 +1,177 @@
1
+ import { wrapWebContent } from "openclaw/plugin-sdk/security-runtime";
2
+ import { DEFAULT_BRIGHTDATA_BASE_URL } from "./config.js";
3
+
4
+ const ENSURED_BRIGHTDATA_ZONES = new Map<string, Promise<boolean>>();
5
+
6
+ export type BrightDataZoneKind = "browser" | "unlocker";
7
+
8
+ type TrustedWebToolsEndpointRunner = <T>(
9
+ params: {
10
+ url: string;
11
+ timeoutSeconds: number;
12
+ init?: RequestInit;
13
+ },
14
+ run: (result: { response: Response; finalUrl: string }) => Promise<T>,
15
+ ) => Promise<T>;
16
+
17
+ export function resolveBrightDataApiEndpoint(baseUrl: string, pathname: string): string {
18
+ const trimmed = baseUrl.trim();
19
+ try {
20
+ const url = new URL(trimmed || DEFAULT_BRIGHTDATA_BASE_URL);
21
+ url.pathname = pathname;
22
+ url.search = "";
23
+ url.hash = "";
24
+ return url.toString();
25
+ } catch {
26
+ return new URL(pathname, DEFAULT_BRIGHTDATA_BASE_URL).toString();
27
+ }
28
+ }
29
+
30
+ function buildBrightDataZoneCacheKey(params: {
31
+ apiToken: string;
32
+ baseUrl: string;
33
+ zoneName: string;
34
+ kind: BrightDataZoneKind;
35
+ }): string {
36
+ return [
37
+ "brightdata-zone",
38
+ params.apiToken.trim(),
39
+ params.baseUrl.trim(),
40
+ params.zoneName.trim(),
41
+ params.kind,
42
+ ].join(":");
43
+ }
44
+
45
+ export function hasBrightDataZone(payload: unknown, zoneName: string): boolean {
46
+ const records = Array.isArray(payload)
47
+ ? payload
48
+ : payload &&
49
+ typeof payload === "object" &&
50
+ !Array.isArray(payload) &&
51
+ Array.isArray((payload as Record<string, unknown>).zones)
52
+ ? ((payload as Record<string, unknown>).zones as unknown[])
53
+ : [];
54
+ return records.some(
55
+ (entry) =>
56
+ entry &&
57
+ typeof entry === "object" &&
58
+ !Array.isArray(entry) &&
59
+ typeof (entry as Record<string, unknown>).name === "string" &&
60
+ ((entry as Record<string, unknown>).name as string).trim() === zoneName,
61
+ );
62
+ }
63
+
64
+ export function buildBrightDataZoneCreatePayload(params: {
65
+ kind: BrightDataZoneKind;
66
+ zoneName: string;
67
+ }): Record<string, unknown> {
68
+ if (params.kind === "browser") {
69
+ return {
70
+ zone: { name: params.zoneName, type: "browser_api" },
71
+ plan: { type: "browser_api" },
72
+ };
73
+ }
74
+ return {
75
+ zone: { name: params.zoneName, type: "unblocker" },
76
+ plan: { type: "unblocker", ub_premium: true },
77
+ };
78
+ }
79
+
80
+ export async function requestBrightDataZoneJson(params: {
81
+ requestEndpoint: TrustedWebToolsEndpointRunner;
82
+ apiToken: string;
83
+ baseUrl: string;
84
+ pathname: string;
85
+ timeoutSeconds: number;
86
+ errorLabel: string;
87
+ body?: unknown;
88
+ }): Promise<unknown> {
89
+ const endpoint = resolveBrightDataApiEndpoint(params.baseUrl, params.pathname);
90
+ return await params.requestEndpoint(
91
+ {
92
+ url: endpoint,
93
+ timeoutSeconds: params.timeoutSeconds,
94
+ init: {
95
+ method: params.body === undefined ? "GET" : "POST",
96
+ headers: {
97
+ Authorization: `Bearer ${params.apiToken}`,
98
+ Accept: "application/json",
99
+ ...(params.body === undefined ? {} : { "Content-Type": "application/json" }),
100
+ },
101
+ ...(params.body === undefined ? {} : { body: JSON.stringify(params.body) }),
102
+ },
103
+ },
104
+ async ({ response }) => {
105
+ const text = (await response.text()).trim();
106
+ if (!response.ok) {
107
+ throw new Error(
108
+ `${params.errorLabel} failed (${response.status}): ${wrapWebContent(text || response.statusText, "web_fetch")}`,
109
+ );
110
+ }
111
+ if (!text) {
112
+ return null;
113
+ }
114
+ try {
115
+ return JSON.parse(text) as unknown;
116
+ } catch {
117
+ throw new Error(`${params.errorLabel} returned invalid JSON.`);
118
+ }
119
+ },
120
+ );
121
+ }
122
+
123
+ export async function ensureBrightDataZoneExists(params: {
124
+ requestEndpoint: TrustedWebToolsEndpointRunner;
125
+ apiToken: string;
126
+ baseUrl: string;
127
+ zoneName: string;
128
+ kind: BrightDataZoneKind;
129
+ timeoutSeconds: number;
130
+ onError?: (error: unknown) => void;
131
+ }): Promise<boolean> {
132
+ const cacheKey = buildBrightDataZoneCacheKey(params);
133
+ const existing = ENSURED_BRIGHTDATA_ZONES.get(cacheKey);
134
+ if (existing) {
135
+ return await existing;
136
+ }
137
+
138
+ const ensurePromise = (async () => {
139
+ try {
140
+ const activeZones = await requestBrightDataZoneJson({
141
+ requestEndpoint: params.requestEndpoint,
142
+ apiToken: params.apiToken,
143
+ baseUrl: params.baseUrl,
144
+ pathname: "/zone/get_active_zones",
145
+ timeoutSeconds: params.timeoutSeconds,
146
+ errorLabel: "Bright Data active zones",
147
+ });
148
+ if (hasBrightDataZone(activeZones, params.zoneName)) {
149
+ return true;
150
+ }
151
+ await requestBrightDataZoneJson({
152
+ requestEndpoint: params.requestEndpoint,
153
+ apiToken: params.apiToken,
154
+ baseUrl: params.baseUrl,
155
+ pathname: "/zone",
156
+ timeoutSeconds: params.timeoutSeconds,
157
+ errorLabel: `Bright Data create ${params.kind} zone (${params.zoneName})`,
158
+ body: buildBrightDataZoneCreatePayload({
159
+ kind: params.kind,
160
+ zoneName: params.zoneName,
161
+ }),
162
+ });
163
+ return true;
164
+ } catch (error) {
165
+ ENSURED_BRIGHTDATA_ZONES.delete(cacheKey);
166
+ params.onError?.(error);
167
+ return false;
168
+ }
169
+ })();
170
+
171
+ ENSURED_BRIGHTDATA_ZONES.set(cacheKey, ensurePromise);
172
+ return await ensurePromise;
173
+ }
174
+
175
+ export function resetEnsuredBrightDataZones(): void {
176
+ ENSURED_BRIGHTDATA_ZONES.clear();
177
+ }