clearscrape 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,374 @@
1
+ /**
2
+ * ClearScrape SDK Types
3
+ */
4
+ /**
5
+ * Configuration options for the ClearScrape client
6
+ */
7
+ interface ClearScrapeConfig {
8
+ /** Your ClearScrape API key */
9
+ apiKey: string;
10
+ /** Base URL for the API (defaults to https://api.clearscrape.io) */
11
+ baseUrl?: string;
12
+ /** Request timeout in milliseconds (defaults to 60000) */
13
+ timeout?: number;
14
+ /** Number of retries for failed requests (defaults to 3) */
15
+ retries?: number;
16
+ }
17
+ /**
18
+ * Options for scraping a URL
19
+ */
20
+ interface ScrapeOptions {
21
+ /** Target URL to scrape */
22
+ url: string;
23
+ /** HTTP method (defaults to GET) */
24
+ method?: 'GET' | 'POST' | 'PUT' | 'DELETE' | 'PATCH';
25
+ /** Enable JavaScript rendering (+5 credits) */
26
+ jsRender?: boolean;
27
+ /** Use premium residential proxies (+10 credits) */
28
+ premiumProxy?: boolean;
29
+ /** Enable antibot bypass (+25 credits) */
30
+ antibot?: boolean;
31
+ /** 2-letter country code for geo-targeting */
32
+ proxyCountry?: string;
33
+ /** CSS selector to wait for (requires jsRender) */
34
+ waitFor?: string;
35
+ /** Fixed wait time in milliseconds (max 30000) */
36
+ wait?: number;
37
+ /** Scroll page to load lazy content */
38
+ autoScroll?: boolean;
39
+ /** Capture full page screenshot */
40
+ screenshot?: boolean;
41
+ /** Capture screenshot of specific element */
42
+ screenshotSelector?: string;
43
+ /** Custom HTTP headers */
44
+ headers?: Record<string, string>;
45
+ /** Request body for POST/PUT requests */
46
+ body?: string | Record<string, unknown>;
47
+ /** Domain extractor (amazon, walmart, google, etc.) */
48
+ domain?: DomainType;
49
+ }
50
+ /**
51
+ * Supported domain extractors
52
+ */
53
+ type DomainType = 'amazon' | 'walmart' | 'google' | 'google_shopping' | 'ebay' | 'target' | 'etsy' | 'bestbuy' | 'homedepot' | 'zillow' | 'yelp' | 'indeed' | 'linkedin_jobs';
54
+ /**
55
+ * Response from a successful scrape request
56
+ */
57
+ interface ScrapeResponse {
58
+ success: true;
59
+ data: {
60
+ /** Raw HTML content */
61
+ html: string;
62
+ /** Extracted text content */
63
+ text?: string;
64
+ /** Base64 encoded screenshot (if requested) */
65
+ screenshot?: string;
66
+ /** Extracted data (if domain extractor used) */
67
+ extracted?: Record<string, unknown>;
68
+ };
69
+ metadata: {
70
+ /** Final URL after redirects */
71
+ url: string;
72
+ /** HTTP status code */
73
+ statusCode: number;
74
+ /** Credits consumed */
75
+ cost: number;
76
+ /** Request duration in milliseconds */
77
+ duration: number;
78
+ /** Response size in bytes */
79
+ byteSize: number;
80
+ /** Options used for the request */
81
+ options: {
82
+ js_render: boolean;
83
+ premium_proxy: boolean;
84
+ antibot: boolean;
85
+ proxy_country?: string;
86
+ };
87
+ /** Domain extractor used */
88
+ domain?: string;
89
+ };
90
+ }
91
+ /**
92
+ * Response from a failed scrape request
93
+ */
94
+ interface ScrapeErrorResponse {
95
+ success: false;
96
+ error: string;
97
+ message: string;
98
+ /** Credits required (for insufficient credits error) */
99
+ required?: number;
100
+ }
101
+ /**
102
+ * Amazon product data extracted by domain API
103
+ */
104
+ interface AmazonProduct {
105
+ title: string;
106
+ price: string;
107
+ originalPrice?: string;
108
+ currency: string;
109
+ rating: string;
110
+ reviewCount: string;
111
+ availability: string;
112
+ seller: string;
113
+ asin: string;
114
+ brand?: string;
115
+ images: string[];
116
+ features: string[];
117
+ breadcrumbs: string[];
118
+ description?: string;
119
+ specifications?: Record<string, string>;
120
+ }
121
+ /**
122
+ * Google SERP data extracted by domain API
123
+ */
124
+ interface GoogleSerpResult {
125
+ searchQuery: string;
126
+ totalResults: string;
127
+ organicResults: Array<{
128
+ position: number;
129
+ title: string;
130
+ url: string;
131
+ displayUrl: string;
132
+ description: string;
133
+ }>;
134
+ featuredSnippet?: {
135
+ title: string;
136
+ content: string;
137
+ url: string;
138
+ };
139
+ peopleAlsoAsk?: Array<{
140
+ question: string;
141
+ answer: string;
142
+ }>;
143
+ relatedSearches?: string[];
144
+ }
145
+ /**
146
+ * Proxy configuration for residential proxy service
147
+ */
148
+ interface ProxyConfig {
149
+ host: string;
150
+ port: number;
151
+ username: string;
152
+ password: string;
153
+ }
154
+ /**
155
+ * Browser connection options for Scraping Browser
156
+ */
157
+ interface BrowserOptions {
158
+ /** 2-letter country code for geo-targeting */
159
+ proxyCountry?: string;
160
+ }
161
+ /**
162
+ * ClearScrape API error
163
+ */
164
+ declare class ClearScrapeError extends Error {
165
+ readonly statusCode: number;
166
+ readonly response?: ScrapeErrorResponse;
167
+ constructor(message: string, statusCode: number, response?: ScrapeErrorResponse);
168
+ }
169
+ /**
170
+ * Insufficient credits error
171
+ */
172
+ declare class InsufficientCreditsError extends ClearScrapeError {
173
+ readonly required: number;
174
+ constructor(message: string, required: number);
175
+ }
176
+ /**
177
+ * Rate limit error
178
+ */
179
+ declare class RateLimitError extends ClearScrapeError {
180
+ constructor(message: string);
181
+ }
182
+
183
+ /**
184
+ * ClearScrape API Client
185
+ *
186
+ * @example
187
+ * ```typescript
188
+ * import { ClearScrape } from 'clearscrape';
189
+ *
190
+ * const client = new ClearScrape({ apiKey: 'your-api-key' });
191
+ *
192
+ * const result = await client.scrape({
193
+ * url: 'https://example.com',
194
+ * jsRender: true
195
+ * });
196
+ *
197
+ * console.log(result.data.html);
198
+ * ```
199
+ */
200
+ declare class ClearScrape {
201
+ private readonly apiKey;
202
+ private readonly baseUrl;
203
+ private readonly timeout;
204
+ private readonly retries;
205
+ constructor(config: ClearScrapeConfig);
206
+ /**
207
+ * Scrape a URL and return the HTML content
208
+ *
209
+ * @param options - Scraping options
210
+ * @returns Promise resolving to scrape response
211
+ *
212
+ * @example
213
+ * ```typescript
214
+ * // Basic scrape
215
+ * const result = await client.scrape({ url: 'https://example.com' });
216
+ *
217
+ * // With JavaScript rendering
218
+ * const result = await client.scrape({
219
+ * url: 'https://example.com',
220
+ * jsRender: true,
221
+ * waitFor: '.content'
222
+ * });
223
+ *
224
+ * // With premium proxy and country targeting
225
+ * const result = await client.scrape({
226
+ * url: 'https://example.com',
227
+ * premiumProxy: true,
228
+ * proxyCountry: 'us'
229
+ * });
230
+ * ```
231
+ */
232
+ scrape(options: ScrapeOptions): Promise<ScrapeResponse>;
233
+ /**
234
+ * Scrape a URL and return only the HTML content
235
+ *
236
+ * @param url - URL to scrape
237
+ * @param options - Additional scraping options
238
+ * @returns Promise resolving to HTML string
239
+ */
240
+ getHtml(url: string, options?: Omit<ScrapeOptions, 'url'>): Promise<string>;
241
+ /**
242
+ * Scrape a URL and return only the text content
243
+ *
244
+ * @param url - URL to scrape
245
+ * @param options - Additional scraping options
246
+ * @returns Promise resolving to text string
247
+ */
248
+ getText(url: string, options?: Omit<ScrapeOptions, 'url'>): Promise<string>;
249
+ /**
250
+ * Take a screenshot of a URL
251
+ *
252
+ * @param url - URL to screenshot
253
+ * @param options - Additional options
254
+ * @returns Promise resolving to base64 encoded screenshot
255
+ *
256
+ * @example
257
+ * ```typescript
258
+ * const screenshot = await client.screenshot('https://example.com');
259
+ * // Save to file
260
+ * fs.writeFileSync('screenshot.png', Buffer.from(screenshot, 'base64'));
261
+ * ```
262
+ */
263
+ screenshot(url: string, options?: Omit<ScrapeOptions, 'url' | 'screenshot'>): Promise<string>;
264
+ /**
265
+ * Scrape using a domain-specific extractor (Amazon, Walmart, Google, etc.)
266
+ *
267
+ * @param url - URL to scrape
268
+ * @param domain - Domain extractor to use
269
+ * @returns Promise resolving to extracted data
270
+ *
271
+ * @example
272
+ * ```typescript
273
+ * // Extract Amazon product data
274
+ * const product = await client.extract(
275
+ * 'https://www.amazon.com/dp/B09V3KXJPB',
276
+ * 'amazon'
277
+ * );
278
+ * console.log(product.title, product.price);
279
+ *
280
+ * // Extract Google SERP data
281
+ * const serp = await client.extract(
282
+ * 'https://www.google.com/search?q=best+laptops',
283
+ * 'google'
284
+ * );
285
+ * console.log(serp.organicResults);
286
+ * ```
287
+ */
288
+ extract<T = Record<string, unknown>>(url: string, domain: ScrapeOptions['domain']): Promise<T>;
289
+ /**
290
+ * Get proxy configuration for the residential proxy service
291
+ *
292
+ * @param options - Proxy options
293
+ * @returns Proxy configuration object
294
+ *
295
+ * @example
296
+ * ```typescript
297
+ * // Basic proxy config
298
+ * const proxy = client.getProxyConfig();
299
+ * // { host: 'proxy.clearscrape.io', port: 8000, username: '...', password: '...' }
300
+ *
301
+ * // With country targeting
302
+ * const proxy = client.getProxyConfig({ country: 'us' });
303
+ *
304
+ * // With session sticky IP
305
+ * const proxy = client.getProxyConfig({ session: 'my-session-123' });
306
+ * ```
307
+ */
308
+ getProxyConfig(options?: {
309
+ country?: string;
310
+ session?: string;
311
+ }): ProxyConfig;
312
+ /**
313
+ * Get proxy URL string for use with HTTP clients
314
+ *
315
+ * @param options - Proxy options
316
+ * @returns Proxy URL string
317
+ *
318
+ * @example
319
+ * ```typescript
320
+ * const proxyUrl = client.getProxyUrl({ country: 'us' });
321
+ * // 'http://apikey-country-us:apikey@proxy.clearscrape.io:8000'
322
+ *
323
+ * // Use with axios
324
+ * const HttpsProxyAgent = require('https-proxy-agent');
325
+ * const agent = new HttpsProxyAgent(client.getProxyUrl());
326
+ * axios.get(url, { httpsAgent: agent });
327
+ * ```
328
+ */
329
+ getProxyUrl(options?: {
330
+ country?: string;
331
+ session?: string;
332
+ }): string;
333
+ /**
334
+ * Get WebSocket URL for Scraping Browser (Playwright/Puppeteer)
335
+ *
336
+ * @param options - Browser options
337
+ * @returns WebSocket URL string
338
+ *
339
+ * @example
340
+ * ```typescript
341
+ * // Use with Playwright
342
+ * const { chromium } = require('playwright');
343
+ * const browser = await chromium.connectOverCDP(client.getBrowserWsUrl());
344
+ *
345
+ * // Use with Puppeteer
346
+ * const puppeteer = require('puppeteer-core');
347
+ * const browser = await puppeteer.connect({
348
+ * browserWSEndpoint: client.getBrowserWsUrl()
349
+ * });
350
+ *
351
+ * // With country targeting
352
+ * const wsUrl = client.getBrowserWsUrl({ proxyCountry: 'gb' });
353
+ * ```
354
+ */
355
+ getBrowserWsUrl(options?: BrowserOptions): string;
356
+ /**
357
+ * Build the API request payload
358
+ */
359
+ private buildPayload;
360
+ /**
361
+ * Make an API request with retries
362
+ */
363
+ private makeRequest;
364
+ /**
365
+ * Handle API errors
366
+ */
367
+ private handleError;
368
+ /**
369
+ * Sleep for a specified duration
370
+ */
371
+ private sleep;
372
+ }
373
+
374
+ export { type AmazonProduct, type BrowserOptions, ClearScrape, type ClearScrapeConfig, ClearScrapeError, type DomainType, type GoogleSerpResult, InsufficientCreditsError, type ProxyConfig, RateLimitError, type ScrapeErrorResponse, type ScrapeOptions, type ScrapeResponse, ClearScrape as default };