@crawlkit-sh/sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/LICENSE +21 -0
- package/README.md +386 -0
- package/dist/index.cjs +745 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +1416 -0
- package/dist/index.d.ts +1416 -0
- package/dist/index.js +734 -0
- package/dist/index.js.map +1 -0
- package/package.json +82 -0
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,1416 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Configuration for resource instances
|
|
3
|
+
*/
|
|
4
|
+
interface ResourceConfig {
|
|
5
|
+
/** API key for authentication */
|
|
6
|
+
apiKey: string;
|
|
7
|
+
/** Base URL for the API */
|
|
8
|
+
baseUrl: string;
|
|
9
|
+
/** Default timeout in milliseconds */
|
|
10
|
+
timeout: number;
|
|
11
|
+
/** Fetch implementation */
|
|
12
|
+
fetch: typeof globalThis.fetch;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Base class for API resources
|
|
16
|
+
* Provides common HTTP functionality for all resource classes
|
|
17
|
+
*/
|
|
18
|
+
declare abstract class BaseResource {
|
|
19
|
+
protected readonly config: ResourceConfig;
|
|
20
|
+
constructor(config: ResourceConfig);
|
|
21
|
+
/**
|
|
22
|
+
* Make a POST request to the API
|
|
23
|
+
* @param endpoint - API endpoint path (e.g., '/v1/crawl/scrape')
|
|
24
|
+
* @param body - Request body object
|
|
25
|
+
* @returns Parsed response data
|
|
26
|
+
* @throws {CrawlKitError} On API errors
|
|
27
|
+
*/
|
|
28
|
+
protected post<T, B extends object = object>(endpoint: string, body: B): Promise<T>;
|
|
29
|
+
/**
|
|
30
|
+
* Make a GET request to the API
|
|
31
|
+
* @param endpoint - API endpoint path
|
|
32
|
+
* @param params - Query parameters
|
|
33
|
+
* @returns Parsed response data
|
|
34
|
+
* @throws {CrawlKitError} On API errors
|
|
35
|
+
*/
|
|
36
|
+
protected get<T>(endpoint: string, params?: Record<string, string | number | boolean | undefined>): Promise<T>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Base API success response wrapper
|
|
41
|
+
*/
|
|
42
|
+
interface ApiSuccessResponse<T> {
|
|
43
|
+
success: true;
|
|
44
|
+
data: T;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* API error response
|
|
48
|
+
*/
|
|
49
|
+
interface ApiErrorResponse {
|
|
50
|
+
success: false;
|
|
51
|
+
error: {
|
|
52
|
+
code: string;
|
|
53
|
+
message: string;
|
|
54
|
+
};
|
|
55
|
+
creditsRefunded?: number;
|
|
56
|
+
creditsRemaining?: number;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Union type for all API responses
|
|
60
|
+
*/
|
|
61
|
+
type ApiResponse<T> = ApiSuccessResponse<T> | ApiErrorResponse;
|
|
62
|
+
/**
|
|
63
|
+
* Credit information included in successful responses
|
|
64
|
+
*/
|
|
65
|
+
interface CreditInfo {
|
|
66
|
+
creditsUsed: number;
|
|
67
|
+
creditsRemaining: number;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Timing information
|
|
71
|
+
*/
|
|
72
|
+
interface Timing {
|
|
73
|
+
total: number;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* API error codes
|
|
77
|
+
*/
|
|
78
|
+
type ErrorCode = 'VALIDATION_ERROR' | 'INSUFFICIENT_CREDITS' | 'TIMEOUT' | 'DNS_FAILED' | 'CONNECTION_REFUSED' | 'SSL_ERROR' | 'TOO_MANY_REDIRECTS' | 'INVALID_URL' | 'PROXY_ERROR' | 'PARSE_ERROR' | 'RATE_LIMITED' | 'NOT_FOUND' | 'BLOCKED' | 'INSTAGRAM_BLOCKED' | 'INSTAGRAM_ERROR' | 'UNKNOWN';
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Browser action: wait for specified milliseconds
|
|
82
|
+
*/
|
|
83
|
+
interface WaitAction {
|
|
84
|
+
type: 'wait';
|
|
85
|
+
/** Milliseconds to wait (100-30000) */
|
|
86
|
+
milliseconds: number;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Browser action: click an element
|
|
90
|
+
*/
|
|
91
|
+
interface ClickAction {
|
|
92
|
+
type: 'click';
|
|
93
|
+
/** CSS selector of element to click */
|
|
94
|
+
selector: string;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Browser action: type text into an input
|
|
98
|
+
*/
|
|
99
|
+
interface TypeAction {
|
|
100
|
+
type: 'type';
|
|
101
|
+
/** CSS selector of input element */
|
|
102
|
+
selector: string;
|
|
103
|
+
/** Text to type into the element */
|
|
104
|
+
text: string;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Browser action: press a key
|
|
108
|
+
*/
|
|
109
|
+
interface PressAction {
|
|
110
|
+
type: 'press';
|
|
111
|
+
/** Key to press (Enter, Tab, Escape, ArrowDown, etc.) */
|
|
112
|
+
key: string;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Browser action: scroll the page
|
|
116
|
+
*/
|
|
117
|
+
interface ScrollAction {
|
|
118
|
+
type: 'scroll';
|
|
119
|
+
/** Scroll direction */
|
|
120
|
+
direction: 'up' | 'down';
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Browser action: execute JavaScript
|
|
124
|
+
*/
|
|
125
|
+
interface EvaluateAction {
|
|
126
|
+
type: 'evaluate';
|
|
127
|
+
/** JavaScript code to execute in browser context */
|
|
128
|
+
script: string;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Union type for all browser actions
|
|
132
|
+
*/
|
|
133
|
+
type BrowserAction = WaitAction | ClickAction | TypeAction | PressAction | ScrollAction | EvaluateAction;
|
|
134
|
+
/**
|
|
135
|
+
* Options for scraping a URL
|
|
136
|
+
*/
|
|
137
|
+
interface ScrapeOptions {
|
|
138
|
+
/** Request timeout in milliseconds (1000-300000, default: 30000) */
|
|
139
|
+
timeout?: number;
|
|
140
|
+
/** Additional HTTP headers */
|
|
141
|
+
headers?: Record<string, string>;
|
|
142
|
+
/** CSS selector or milliseconds to wait before extracting content. Forces browser rendering. */
|
|
143
|
+
waitFor?: string | number;
|
|
144
|
+
/** Browser actions to execute in order. Forces browser rendering. Max 50 actions. */
|
|
145
|
+
actions?: BrowserAction[];
|
|
146
|
+
/** Extract only main content - removes boilerplate, navigation, etc. (default: true) */
|
|
147
|
+
onlyMainContent?: boolean;
|
|
148
|
+
/** Custom CSS selector to extract specific content */
|
|
149
|
+
contentSelector?: string;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Parameters for the scrape endpoint
|
|
153
|
+
*/
|
|
154
|
+
interface ScrapeParams {
|
|
155
|
+
/** URL to scrape */
|
|
156
|
+
url: string;
|
|
157
|
+
/** Scrape options */
|
|
158
|
+
options?: ScrapeOptions;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Page metadata extracted from the scraped page
|
|
162
|
+
*/
|
|
163
|
+
interface PageMetadata {
|
|
164
|
+
title: string | null;
|
|
165
|
+
description: string | null;
|
|
166
|
+
language: string | null;
|
|
167
|
+
ogImage: string | null;
|
|
168
|
+
ogTitle: string | null;
|
|
169
|
+
ogDescription: string | null;
|
|
170
|
+
siteName: string | null;
|
|
171
|
+
favicon: string | null;
|
|
172
|
+
author: string | null;
|
|
173
|
+
publishedTime: string | null;
|
|
174
|
+
modifiedTime: string | null;
|
|
175
|
+
keywords: string[];
|
|
176
|
+
canonical: string | null;
|
|
177
|
+
robots: string | null;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Links found on the scraped page
|
|
181
|
+
*/
|
|
182
|
+
interface PageLinks {
|
|
183
|
+
internal: string[];
|
|
184
|
+
external: string[];
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Processing time statistics
|
|
188
|
+
*/
|
|
189
|
+
interface CrawlStats {
|
|
190
|
+
fetchTime: number;
|
|
191
|
+
cleaningTime: number;
|
|
192
|
+
extractionTime: number;
|
|
193
|
+
conversionTime: number;
|
|
194
|
+
llmTime: number | null;
|
|
195
|
+
totalTime: number;
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Result of a browser action execution
|
|
199
|
+
*/
|
|
200
|
+
interface ActionResult {
|
|
201
|
+
type: string;
|
|
202
|
+
success: boolean;
|
|
203
|
+
duration: number;
|
|
204
|
+
value?: unknown;
|
|
205
|
+
error?: string;
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Data returned from the scrape endpoint
|
|
209
|
+
*/
|
|
210
|
+
interface ScrapeData {
|
|
211
|
+
/** Original requested URL */
|
|
212
|
+
url: string;
|
|
213
|
+
/** Final URL after redirects */
|
|
214
|
+
finalUrl: string;
|
|
215
|
+
/** Cleaned, readable markdown content */
|
|
216
|
+
markdown: string;
|
|
217
|
+
/** Cleaned HTML (main content only) */
|
|
218
|
+
html: string;
|
|
219
|
+
/** Original unprocessed HTML */
|
|
220
|
+
rawHtml: string;
|
|
221
|
+
/** LLM-extracted data (null for scrape, populated for extract) */
|
|
222
|
+
json: Record<string, unknown> | null;
|
|
223
|
+
/** Page metadata */
|
|
224
|
+
metadata: PageMetadata;
|
|
225
|
+
/** Internal and external links */
|
|
226
|
+
links: PageLinks;
|
|
227
|
+
/** Processing time statistics */
|
|
228
|
+
stats: CrawlStats;
|
|
229
|
+
/** Embedded data from Next.js, Nuxt.js, Notion, etc. */
|
|
230
|
+
embeddedData?: Record<string, unknown> | null;
|
|
231
|
+
/** Results of browser actions (if any were executed) */
|
|
232
|
+
actionResults?: ActionResult[];
|
|
233
|
+
/** Credits charged for this operation */
|
|
234
|
+
creditsUsed: number;
|
|
235
|
+
/** Remaining credits after operation */
|
|
236
|
+
creditsRemaining: number;
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Response from the scrape endpoint
|
|
240
|
+
*/
|
|
241
|
+
type ScrapeResponse = ApiSuccessResponse<ScrapeData>;
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Options for extracting structured data from a URL
|
|
245
|
+
*/
|
|
246
|
+
interface ExtractOptions extends ScrapeOptions {
|
|
247
|
+
/** Custom extraction prompt (max 2000 chars). Helps guide the LLM on what to extract. */
|
|
248
|
+
prompt?: string;
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Parameters for the extract endpoint
|
|
252
|
+
*/
|
|
253
|
+
interface ExtractParams {
|
|
254
|
+
/** URL to scrape and extract from */
|
|
255
|
+
url: string;
|
|
256
|
+
/** JSON Schema defining the structure for LLM extraction */
|
|
257
|
+
schema: Record<string, unknown>;
|
|
258
|
+
/** Extract options */
|
|
259
|
+
options?: ExtractOptions;
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Data returned from the extract endpoint
|
|
263
|
+
* @template T - The type of the extracted JSON data based on the provided schema
|
|
264
|
+
*/
|
|
265
|
+
interface ExtractData<T = unknown> {
|
|
266
|
+
/** Original requested URL */
|
|
267
|
+
url: string;
|
|
268
|
+
/** Final URL after redirects */
|
|
269
|
+
finalUrl: string;
|
|
270
|
+
/** Cleaned, readable markdown content */
|
|
271
|
+
markdown: string;
|
|
272
|
+
/** Cleaned HTML (main content only) */
|
|
273
|
+
html: string;
|
|
274
|
+
/** Original unprocessed HTML */
|
|
275
|
+
rawHtml: string;
|
|
276
|
+
/** LLM-extracted structured data based on provided schema */
|
|
277
|
+
json: T;
|
|
278
|
+
/** Page metadata */
|
|
279
|
+
metadata: PageMetadata;
|
|
280
|
+
/** Internal and external links */
|
|
281
|
+
links: PageLinks;
|
|
282
|
+
/** Processing time statistics */
|
|
283
|
+
stats: CrawlStats;
|
|
284
|
+
/** Embedded data from Next.js, Nuxt.js, Notion, etc. */
|
|
285
|
+
embeddedData?: Record<string, unknown> | null;
|
|
286
|
+
/** Results of browser actions (if any were executed) */
|
|
287
|
+
actionResults?: ActionResult[];
|
|
288
|
+
/** Credits charged for this operation */
|
|
289
|
+
creditsUsed: number;
|
|
290
|
+
/** Remaining credits after operation */
|
|
291
|
+
creditsRemaining: number;
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Response from the extract endpoint
|
|
295
|
+
* @template T - The type of the extracted JSON data based on the provided schema
|
|
296
|
+
*/
|
|
297
|
+
type ExtractResponse<T = unknown> = ApiSuccessResponse<ExtractData<T>>;
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Time range filter for search results
|
|
301
|
+
* - 'd': Past day
|
|
302
|
+
* - 'w': Past week
|
|
303
|
+
* - 'm': Past month
|
|
304
|
+
* - 'y': Past year
|
|
305
|
+
*/
|
|
306
|
+
type TimeRange = 'd' | 'w' | 'm' | 'y';
|
|
307
|
+
/**
|
|
308
|
+
* Options for web search
|
|
309
|
+
*/
|
|
310
|
+
interface SearchOptions {
|
|
311
|
+
/** Language code (e.g., 'tr-TR', 'en-US') */
|
|
312
|
+
language?: string;
|
|
313
|
+
/** Region code (e.g., 'tr-tr', 'us-en', 'de-de') */
|
|
314
|
+
region?: string;
|
|
315
|
+
/** Time filter: d (day), w (week), m (month), y (year), null (any time) */
|
|
316
|
+
timeRange?: TimeRange | null;
|
|
317
|
+
/** Maximum number of results (1-100, default: 30) */
|
|
318
|
+
maxResults?: number;
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Parameters for the search endpoint
|
|
322
|
+
*/
|
|
323
|
+
interface SearchParams {
|
|
324
|
+
/** Search query (1-500 characters) */
|
|
325
|
+
query: string;
|
|
326
|
+
/** Search options */
|
|
327
|
+
options?: SearchOptions;
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* A single search result
|
|
331
|
+
*/
|
|
332
|
+
interface SearchResult {
|
|
333
|
+
/** Result position (1-indexed) */
|
|
334
|
+
position: number;
|
|
335
|
+
/** Result title */
|
|
336
|
+
title: string;
|
|
337
|
+
/** Result URL */
|
|
338
|
+
url: string;
|
|
339
|
+
/** Result snippet/description */
|
|
340
|
+
snippet: string;
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
* Data returned from the search endpoint
|
|
344
|
+
*/
|
|
345
|
+
interface SearchData {
|
|
346
|
+
/** Original search query */
|
|
347
|
+
query: string;
|
|
348
|
+
/** Total number of results returned */
|
|
349
|
+
totalResults: number;
|
|
350
|
+
/** Search results */
|
|
351
|
+
results: SearchResult[];
|
|
352
|
+
/** Timing information */
|
|
353
|
+
timing: {
|
|
354
|
+
total: number;
|
|
355
|
+
};
|
|
356
|
+
/** Credits charged for this operation */
|
|
357
|
+
creditsUsed: number;
|
|
358
|
+
/** Remaining credits after operation */
|
|
359
|
+
creditsRemaining: number;
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* Response from the search endpoint
|
|
363
|
+
*/
|
|
364
|
+
type SearchResponse = ApiSuccessResponse<SearchData>;
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Options for taking a screenshot
|
|
368
|
+
*/
|
|
369
|
+
interface ScreenshotOptions {
|
|
370
|
+
/** Viewport width in pixels (320-3840, default: 1920) */
|
|
371
|
+
width?: number;
|
|
372
|
+
/** Viewport height in pixels (240-2160, default: 1080) */
|
|
373
|
+
height?: number;
|
|
374
|
+
/** Page load timeout in milliseconds (1000-60000, default: 30000) */
|
|
375
|
+
timeout?: number;
|
|
376
|
+
/** CSS selector to wait for before taking screenshot */
|
|
377
|
+
waitForSelector?: string;
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Parameters for the screenshot endpoint
|
|
381
|
+
*/
|
|
382
|
+
interface ScreenshotParams {
|
|
383
|
+
/** URL to screenshot */
|
|
384
|
+
url: string;
|
|
385
|
+
/** Screenshot options */
|
|
386
|
+
options?: ScreenshotOptions;
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Data returned from the screenshot endpoint
|
|
390
|
+
*/
|
|
391
|
+
interface ScreenshotData {
|
|
392
|
+
/** Public URL of the screenshot */
|
|
393
|
+
url: string;
|
|
394
|
+
/** Viewport width used */
|
|
395
|
+
width: number;
|
|
396
|
+
/** Viewport height used */
|
|
397
|
+
height: number;
|
|
398
|
+
/** Timing information */
|
|
399
|
+
timing: {
|
|
400
|
+
total: number;
|
|
401
|
+
};
|
|
402
|
+
/** Credits charged for this operation */
|
|
403
|
+
creditsUsed: number;
|
|
404
|
+
/** Remaining credits after operation */
|
|
405
|
+
creditsRemaining: number;
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Response from the screenshot endpoint
|
|
409
|
+
*/
|
|
410
|
+
type ScreenshotResponse = ApiSuccessResponse<ScreenshotData>;
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Options for scraping a LinkedIn company profile
|
|
414
|
+
*/
|
|
415
|
+
interface LinkedInCompanyOptions {
|
|
416
|
+
/** Page load timeout in milliseconds (5000-60000, default: 30000) */
|
|
417
|
+
timeout?: number;
|
|
418
|
+
/** Whether to fetch job listings (default: true) */
|
|
419
|
+
includeJobs?: boolean;
|
|
420
|
+
}
|
|
421
|
+
/**
|
|
422
|
+
* Parameters for the LinkedIn company endpoint
|
|
423
|
+
*/
|
|
424
|
+
interface LinkedInCompanyParams {
|
|
425
|
+
/** LinkedIn company URL (e.g., https://www.linkedin.com/company/openai/) */
|
|
426
|
+
url: string;
|
|
427
|
+
/** Options */
|
|
428
|
+
options?: LinkedInCompanyOptions;
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* LinkedIn employee information
|
|
432
|
+
*/
|
|
433
|
+
interface LinkedInEmployee {
|
|
434
|
+
name: string;
|
|
435
|
+
photoUrl: string | null;
|
|
436
|
+
linkedinUrl: string;
|
|
437
|
+
}
|
|
438
|
+
/**
|
|
439
|
+
* LinkedIn job listing
|
|
440
|
+
*/
|
|
441
|
+
interface LinkedInJob {
|
|
442
|
+
title: string;
|
|
443
|
+
jobUrl: string;
|
|
444
|
+
jobId: string | null;
|
|
445
|
+
location: string | null;
|
|
446
|
+
postedTime: string | null;
|
|
447
|
+
}
|
|
448
|
+
/**
|
|
449
|
+
* Similar company information
|
|
450
|
+
*/
|
|
451
|
+
interface LinkedInSimilarCompany {
|
|
452
|
+
name: string;
|
|
453
|
+
industry: string | null;
|
|
454
|
+
location: string | null;
|
|
455
|
+
logoUrl: string | null;
|
|
456
|
+
linkedinUrl: string;
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* LinkedIn company post
|
|
460
|
+
*/
|
|
461
|
+
interface LinkedInPost {
|
|
462
|
+
content: string | null;
|
|
463
|
+
postUrl: string;
|
|
464
|
+
timeAgo: string | null;
|
|
465
|
+
reactions: number;
|
|
466
|
+
comments: number;
|
|
467
|
+
imageUrls: string[];
|
|
468
|
+
}
|
|
469
|
+
/**
|
|
470
|
+
* LinkedIn company profile data
|
|
471
|
+
*/
|
|
472
|
+
interface LinkedInCompany {
|
|
473
|
+
name: string;
|
|
474
|
+
industry: string | null;
|
|
475
|
+
location: string | null;
|
|
476
|
+
followers: number | null;
|
|
477
|
+
slogan: string | null;
|
|
478
|
+
logoUrl: string | null;
|
|
479
|
+
coverImageUrl: string | null;
|
|
480
|
+
description: string | null;
|
|
481
|
+
website: string | null;
|
|
482
|
+
companySize: string | null;
|
|
483
|
+
headquarters: string | null;
|
|
484
|
+
companyType: string | null;
|
|
485
|
+
foundedYear: number | null;
|
|
486
|
+
specialties: string[];
|
|
487
|
+
employees: LinkedInEmployee[];
|
|
488
|
+
locations: string[];
|
|
489
|
+
similarCompanies: LinkedInSimilarCompany[];
|
|
490
|
+
recentPosts: LinkedInPost[];
|
|
491
|
+
jobs: LinkedInJob[];
|
|
492
|
+
linkedinUrl: string;
|
|
493
|
+
scrapedAt: string;
|
|
494
|
+
}
|
|
495
|
+
/**
|
|
496
|
+
* Data returned from the LinkedIn company endpoint
|
|
497
|
+
*/
|
|
498
|
+
interface LinkedInCompanyData {
|
|
499
|
+
company: LinkedInCompany;
|
|
500
|
+
timing: {
|
|
501
|
+
total: number;
|
|
502
|
+
};
|
|
503
|
+
creditsUsed: number;
|
|
504
|
+
creditsRemaining: number;
|
|
505
|
+
}
|
|
506
|
+
/**
|
|
507
|
+
* Response from the LinkedIn company endpoint
|
|
508
|
+
*/
|
|
509
|
+
type LinkedInCompanyResponse = ApiSuccessResponse<LinkedInCompanyData>;
|
|
510
|
+
/**
|
|
511
|
+
* Parameters for the LinkedIn person endpoint
|
|
512
|
+
*/
|
|
513
|
+
interface LinkedInPersonParams {
|
|
514
|
+
/** Single LinkedIn profile URL or array of URLs (max 10) */
|
|
515
|
+
url: string | string[];
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
518
|
+
* LinkedIn person profile result
|
|
519
|
+
*/
|
|
520
|
+
interface LinkedInPersonResult {
|
|
521
|
+
url: string;
|
|
522
|
+
person: Record<string, unknown>;
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Data returned from the LinkedIn person endpoint
|
|
526
|
+
*/
|
|
527
|
+
interface LinkedInPersonData {
|
|
528
|
+
persons: LinkedInPersonResult[];
|
|
529
|
+
failed?: string[];
|
|
530
|
+
totalUrls: number;
|
|
531
|
+
successCount: number;
|
|
532
|
+
failedCount: number;
|
|
533
|
+
timing: {
|
|
534
|
+
total: number;
|
|
535
|
+
};
|
|
536
|
+
creditsUsed: number;
|
|
537
|
+
creditsRemaining: number;
|
|
538
|
+
}
|
|
539
|
+
/**
|
|
540
|
+
* Response from the LinkedIn person endpoint
|
|
541
|
+
*/
|
|
542
|
+
type LinkedInPersonResponse = ApiSuccessResponse<LinkedInPersonData>;
|
|
543
|
+
|
|
544
|
+
/**
|
|
545
|
+
* Options for scraping an Instagram profile
|
|
546
|
+
*/
|
|
547
|
+
interface InstagramProfileOptions {
|
|
548
|
+
/** Page load timeout in milliseconds (default: 30000) */
|
|
549
|
+
timeout?: number;
|
|
550
|
+
}
|
|
551
|
+
/**
|
|
552
|
+
* Parameters for the Instagram profile endpoint
|
|
553
|
+
*/
|
|
554
|
+
interface InstagramProfileParams {
|
|
555
|
+
/** Instagram username (without @) or profile URL */
|
|
556
|
+
username: string;
|
|
557
|
+
/** Options */
|
|
558
|
+
options?: InstagramProfileOptions;
|
|
559
|
+
}
|
|
560
|
+
/**
|
|
561
|
+
* Instagram bio link
|
|
562
|
+
*/
|
|
563
|
+
interface InstagramBioLink {
|
|
564
|
+
title: string;
|
|
565
|
+
url: string;
|
|
566
|
+
link_type: string;
|
|
567
|
+
}
|
|
568
|
+
/**
|
|
569
|
+
* Instagram post dimensions
|
|
570
|
+
*/
|
|
571
|
+
interface InstagramDimensions {
|
|
572
|
+
height: number;
|
|
573
|
+
width: number;
|
|
574
|
+
}
|
|
575
|
+
/**
|
|
576
|
+
* Instagram post from profile
|
|
577
|
+
*/
|
|
578
|
+
interface InstagramPost {
|
|
579
|
+
id: string;
|
|
580
|
+
shortcode: string;
|
|
581
|
+
display_url: string;
|
|
582
|
+
thumbnail_src: string;
|
|
583
|
+
is_video: boolean;
|
|
584
|
+
video_url: string | null;
|
|
585
|
+
caption: string | null;
|
|
586
|
+
like_count: number;
|
|
587
|
+
comment_count: number;
|
|
588
|
+
taken_at_timestamp: number;
|
|
589
|
+
dimensions: InstagramDimensions;
|
|
590
|
+
video_view_count: number | null;
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Instagram profile data
|
|
594
|
+
*/
|
|
595
|
+
interface InstagramProfile {
|
|
596
|
+
id: string;
|
|
597
|
+
username: string;
|
|
598
|
+
full_name: string;
|
|
599
|
+
biography: string | null;
|
|
600
|
+
bio_links: InstagramBioLink[];
|
|
601
|
+
follower_count: number;
|
|
602
|
+
following_count: number;
|
|
603
|
+
media_count: number;
|
|
604
|
+
profile_pic_url: string;
|
|
605
|
+
profile_pic_url_hd: string | null;
|
|
606
|
+
is_verified: boolean;
|
|
607
|
+
is_private: boolean;
|
|
608
|
+
is_business_account: boolean;
|
|
609
|
+
is_professional_account: boolean;
|
|
610
|
+
business_category_name: string | null;
|
|
611
|
+
business_email: string | null;
|
|
612
|
+
business_phone_number: string | null;
|
|
613
|
+
external_url: string | null;
|
|
614
|
+
highlight_reel_count: number;
|
|
615
|
+
posts: InstagramPost[];
|
|
616
|
+
}
|
|
617
|
+
/**
|
|
618
|
+
* Data returned from the Instagram profile endpoint
|
|
619
|
+
*/
|
|
620
|
+
interface InstagramProfileData {
|
|
621
|
+
profile: InstagramProfile;
|
|
622
|
+
timing: {
|
|
623
|
+
total: number;
|
|
624
|
+
};
|
|
625
|
+
creditsUsed: number;
|
|
626
|
+
creditsRemaining: number;
|
|
627
|
+
}
|
|
628
|
+
/**
|
|
629
|
+
* Response from the Instagram profile endpoint
|
|
630
|
+
*/
|
|
631
|
+
type InstagramProfileResponse = ApiSuccessResponse<InstagramProfileData>;
|
|
632
|
+
/**
|
|
633
|
+
* Options for scraping Instagram content
|
|
634
|
+
*/
|
|
635
|
+
interface InstagramContentOptions {
|
|
636
|
+
/** Page load timeout in milliseconds (default: 30000) */
|
|
637
|
+
timeout?: number;
|
|
638
|
+
}
|
|
639
|
+
/**
|
|
640
|
+
* Parameters for the Instagram content endpoint
|
|
641
|
+
*/
|
|
642
|
+
interface InstagramContentParams {
|
|
643
|
+
/** Post shortcode or full URL */
|
|
644
|
+
shortcode: string;
|
|
645
|
+
/** Options */
|
|
646
|
+
options?: InstagramContentOptions;
|
|
647
|
+
}
|
|
648
|
+
/**
|
|
649
|
+
* Instagram content owner information
|
|
650
|
+
*/
|
|
651
|
+
interface InstagramContentOwner {
|
|
652
|
+
id: string;
|
|
653
|
+
username: string;
|
|
654
|
+
full_name: string;
|
|
655
|
+
profile_pic_url: string;
|
|
656
|
+
is_verified: boolean;
|
|
657
|
+
}
|
|
658
|
+
/**
|
|
659
|
+
* Instagram audio information (for reels)
|
|
660
|
+
*/
|
|
661
|
+
interface InstagramAudioInfo {
|
|
662
|
+
title: string | null;
|
|
663
|
+
artist_username: string | null;
|
|
664
|
+
is_original: boolean;
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Instagram carousel media item
|
|
668
|
+
*/
|
|
669
|
+
interface InstagramCarouselItem {
|
|
670
|
+
id: string;
|
|
671
|
+
media_type: string;
|
|
672
|
+
display_url: string;
|
|
673
|
+
video_url: string | null;
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Instagram post/reel/video content data
|
|
677
|
+
*/
|
|
678
|
+
interface InstagramContent {
|
|
679
|
+
id: string;
|
|
680
|
+
shortcode: string;
|
|
681
|
+
taken_at: number;
|
|
682
|
+
media_type: string;
|
|
683
|
+
product_type: string;
|
|
684
|
+
width: number;
|
|
685
|
+
height: number;
|
|
686
|
+
like_count: number;
|
|
687
|
+
comment_count: number;
|
|
688
|
+
caption: string | null;
|
|
689
|
+
has_audio: boolean;
|
|
690
|
+
display_url: string;
|
|
691
|
+
video_url: string | null;
|
|
692
|
+
thumbnail_url: string;
|
|
693
|
+
owner: InstagramContentOwner;
|
|
694
|
+
audio_info: InstagramAudioInfo | null;
|
|
695
|
+
carousel_media: InstagramCarouselItem[] | null;
|
|
696
|
+
}
|
|
697
|
+
/**
|
|
698
|
+
* Data returned from the Instagram content endpoint
|
|
699
|
+
*/
|
|
700
|
+
interface InstagramContentData {
|
|
701
|
+
post: InstagramContent;
|
|
702
|
+
timing: {
|
|
703
|
+
total: number;
|
|
704
|
+
};
|
|
705
|
+
creditsUsed: number;
|
|
706
|
+
creditsRemaining: number;
|
|
707
|
+
}
|
|
708
|
+
/**
|
|
709
|
+
* Response from the Instagram content endpoint
|
|
710
|
+
*/
|
|
711
|
+
type InstagramContentResponse = ApiSuccessResponse<InstagramContentData>;
|
|
712
|
+
|
|
713
|
+
/**
|
|
714
|
+
* Options for fetching Play Store reviews
|
|
715
|
+
*/
|
|
716
|
+
interface PlayStoreReviewsOptions {
|
|
717
|
+
/** Language code (e.g., 'en', 'tr') */
|
|
718
|
+
lang?: string;
|
|
719
|
+
/** Request timeout in milliseconds (default: 30000) */
|
|
720
|
+
timeout?: number;
|
|
721
|
+
}
|
|
722
|
+
/**
|
|
723
|
+
* Parameters for the Play Store reviews endpoint
|
|
724
|
+
*/
|
|
725
|
+
interface PlayStoreReviewsParams {
|
|
726
|
+
/** App ID (e.g., 'com.example.app') */
|
|
727
|
+
appId: string;
|
|
728
|
+
/** Pagination cursor from previous response */
|
|
729
|
+
cursor?: string | null;
|
|
730
|
+
/** Options */
|
|
731
|
+
options?: PlayStoreReviewsOptions;
|
|
732
|
+
}
|
|
733
|
+
/**
|
|
734
|
+
* Developer reply to a review
|
|
735
|
+
*/
|
|
736
|
+
interface DeveloperReply {
|
|
737
|
+
author: string;
|
|
738
|
+
text: string;
|
|
739
|
+
date: number | null;
|
|
740
|
+
}
|
|
741
|
+
/**
|
|
742
|
+
* Play Store review
|
|
743
|
+
*/
|
|
744
|
+
interface PlayStoreReview {
|
|
745
|
+
id: string;
|
|
746
|
+
username: string;
|
|
747
|
+
userAvatar: string | null;
|
|
748
|
+
rating: number;
|
|
749
|
+
text: string;
|
|
750
|
+
date: number | null;
|
|
751
|
+
thumbsUp: number;
|
|
752
|
+
developerReply: DeveloperReply | null;
|
|
753
|
+
appVersion: string | null;
|
|
754
|
+
}
|
|
755
|
+
/**
|
|
756
|
+
* Pagination information
|
|
757
|
+
*/
|
|
758
|
+
interface Pagination {
|
|
759
|
+
nextCursor: string | null;
|
|
760
|
+
hasMore: boolean;
|
|
761
|
+
}
|
|
762
|
+
/**
|
|
763
|
+
* Data returned from the Play Store reviews endpoint
|
|
764
|
+
*/
|
|
765
|
+
interface PlayStoreReviewsData {
|
|
766
|
+
appId: string;
|
|
767
|
+
reviews: PlayStoreReview[];
|
|
768
|
+
pagination: Pagination;
|
|
769
|
+
timing: {
|
|
770
|
+
total: number;
|
|
771
|
+
};
|
|
772
|
+
creditsUsed: number;
|
|
773
|
+
creditsRemaining: number;
|
|
774
|
+
}
|
|
775
|
+
/**
|
|
776
|
+
* Response from the Play Store reviews endpoint
|
|
777
|
+
*/
|
|
778
|
+
type PlayStoreReviewsResponse = ApiSuccessResponse<PlayStoreReviewsData>;
|
|
779
|
+
/**
|
|
780
|
+
* Options for fetching Play Store app details
|
|
781
|
+
*/
|
|
782
|
+
interface PlayStoreDetailOptions {
|
|
783
|
+
/** Language code (e.g., 'en', 'tr') */
|
|
784
|
+
lang?: string;
|
|
785
|
+
/** Request timeout in milliseconds (default: 30000) */
|
|
786
|
+
timeout?: number;
|
|
787
|
+
}
|
|
788
|
+
/**
|
|
789
|
+
* Parameters for the Play Store detail endpoint
|
|
790
|
+
*/
|
|
791
|
+
interface PlayStoreDetailParams {
|
|
792
|
+
/** App ID (e.g., 'com.example.app') */
|
|
793
|
+
appId: string;
|
|
794
|
+
/** Options */
|
|
795
|
+
options?: PlayStoreDetailOptions;
|
|
796
|
+
}
|
|
797
|
+
/**
|
|
798
|
+
* Screenshot information
|
|
799
|
+
*/
|
|
800
|
+
interface Screenshot {
|
|
801
|
+
url: string;
|
|
802
|
+
width: number | null;
|
|
803
|
+
height: number | null;
|
|
804
|
+
}
|
|
805
|
+
/**
|
|
806
|
+
* Rating distribution (star counts)
|
|
807
|
+
*/
|
|
808
|
+
interface RatingDistribution {
|
|
809
|
+
5: number;
|
|
810
|
+
4: number;
|
|
811
|
+
3: number;
|
|
812
|
+
2: number;
|
|
813
|
+
1: number;
|
|
814
|
+
}
|
|
815
|
+
/**
|
|
816
|
+
* Developer information
|
|
817
|
+
*/
|
|
818
|
+
interface Developer {
|
|
819
|
+
name: string;
|
|
820
|
+
id: string | null;
|
|
821
|
+
website: string | null;
|
|
822
|
+
email: string | null;
|
|
823
|
+
phone: string | null;
|
|
824
|
+
address: string | null;
|
|
825
|
+
}
|
|
826
|
+
/**
|
|
827
|
+
* App permission
|
|
828
|
+
*/
|
|
829
|
+
interface Permission {
|
|
830
|
+
name: string;
|
|
831
|
+
icon: string | null;
|
|
832
|
+
details: string[];
|
|
833
|
+
}
|
|
834
|
+
/**
|
|
835
|
+
* Data safety information
|
|
836
|
+
*/
|
|
837
|
+
interface DataSafety {
|
|
838
|
+
sharedData: string[] | null;
|
|
839
|
+
collectedData: string[] | null;
|
|
840
|
+
encrypted: boolean;
|
|
841
|
+
deletable: boolean;
|
|
842
|
+
}
|
|
843
|
+
/**
|
|
844
|
+
* Data returned from the Play Store detail endpoint
|
|
845
|
+
*/
|
|
846
|
+
interface PlayStoreDetailData {
|
|
847
|
+
appId: string;
|
|
848
|
+
appName: string;
|
|
849
|
+
icon: string | null;
|
|
850
|
+
summary: string | null;
|
|
851
|
+
description: string | null;
|
|
852
|
+
screenshots: Screenshot[];
|
|
853
|
+
category: string | null;
|
|
854
|
+
categoryId: string | null;
|
|
855
|
+
rating: number | null;
|
|
856
|
+
ratingCount: number | null;
|
|
857
|
+
reviewCount: number | null;
|
|
858
|
+
ratingDistribution: RatingDistribution | null;
|
|
859
|
+
installs: string | null;
|
|
860
|
+
installsExact: number | null;
|
|
861
|
+
free: boolean;
|
|
862
|
+
price: string | null;
|
|
863
|
+
currency: string | null;
|
|
864
|
+
contentRating: string | null;
|
|
865
|
+
contentRatingDescription: string | null;
|
|
866
|
+
developer: Developer;
|
|
867
|
+
releaseDate: number | null;
|
|
868
|
+
lastUpdate: number | null;
|
|
869
|
+
version: string | null;
|
|
870
|
+
androidVersion: string | null;
|
|
871
|
+
whatsNew: string | null;
|
|
872
|
+
permissions: Permission[];
|
|
873
|
+
dataSafety: DataSafety | null;
|
|
874
|
+
privacyPolicy: string | null;
|
|
875
|
+
timing: {
|
|
876
|
+
total: number;
|
|
877
|
+
};
|
|
878
|
+
creditsUsed: number;
|
|
879
|
+
creditsRemaining: number;
|
|
880
|
+
}
|
|
881
|
+
/**
|
|
882
|
+
* Response from the Play Store detail endpoint
|
|
883
|
+
*/
|
|
884
|
+
type PlayStoreDetailResponse = ApiSuccessResponse<PlayStoreDetailData>;
|
|
885
|
+
|
|
886
|
+
/**
|
|
887
|
+
* Options for fetching App Store reviews
|
|
888
|
+
*/
|
|
889
|
+
interface AppStoreReviewsOptions {
|
|
890
|
+
/** Language code (e.g., 'en', 'tr') */
|
|
891
|
+
lang?: string;
|
|
892
|
+
/** Request timeout in milliseconds (default: 30000) */
|
|
893
|
+
timeout?: number;
|
|
894
|
+
}
|
|
895
|
+
/**
|
|
896
|
+
* Parameters for the App Store reviews endpoint
|
|
897
|
+
*/
|
|
898
|
+
interface AppStoreReviewsParams {
|
|
899
|
+
/** App ID (numeric ID from App Store URL) */
|
|
900
|
+
appId: string;
|
|
901
|
+
/** Pagination cursor from previous response */
|
|
902
|
+
cursor?: string | null;
|
|
903
|
+
/** Options */
|
|
904
|
+
options?: AppStoreReviewsOptions;
|
|
905
|
+
}
|
|
906
|
+
/**
|
|
907
|
+
* App Store review
|
|
908
|
+
*/
|
|
909
|
+
interface AppStoreReview {
|
|
910
|
+
id: string;
|
|
911
|
+
username: string;
|
|
912
|
+
userAvatar: string | null;
|
|
913
|
+
rating: number;
|
|
914
|
+
title: string;
|
|
915
|
+
text: string;
|
|
916
|
+
date: number | null;
|
|
917
|
+
isEdited: boolean;
|
|
918
|
+
thumbsUp: number;
|
|
919
|
+
developerReply: DeveloperReply | null;
|
|
920
|
+
appVersion: string | null;
|
|
921
|
+
}
|
|
922
|
+
/**
|
|
923
|
+
* Data returned from the App Store reviews endpoint
|
|
924
|
+
*/
|
|
925
|
+
interface AppStoreReviewsData {
|
|
926
|
+
appId: string;
|
|
927
|
+
reviews: AppStoreReview[];
|
|
928
|
+
pagination: Pagination;
|
|
929
|
+
timing: {
|
|
930
|
+
total: number;
|
|
931
|
+
};
|
|
932
|
+
creditsUsed: number;
|
|
933
|
+
creditsRemaining: number;
|
|
934
|
+
}
|
|
935
|
+
/**
|
|
936
|
+
* Response from the App Store reviews endpoint
|
|
937
|
+
*/
|
|
938
|
+
type AppStoreReviewsResponse = ApiSuccessResponse<AppStoreReviewsData>;
|
|
939
|
+
|
|
940
|
+
/**
|
|
941
|
+
* LinkedIn scraping operations
|
|
942
|
+
* Provides company and person profile scraping
|
|
943
|
+
*/
|
|
944
|
+
declare class LinkedInResource extends BaseResource {
|
|
945
|
+
/**
|
|
946
|
+
* Scrape a LinkedIn company profile
|
|
947
|
+
*
|
|
948
|
+
* @param params - Company profile parameters
|
|
949
|
+
* @returns Company profile data including description, employees, jobs, posts
|
|
950
|
+
* @throws {CrawlKitError} On API errors
|
|
951
|
+
*
|
|
952
|
+
* @example
|
|
953
|
+
* ```typescript
|
|
954
|
+
* const result = await crawlkit.linkedin.company({
|
|
955
|
+
* url: 'https://www.linkedin.com/company/openai',
|
|
956
|
+
* options: { includeJobs: true }
|
|
957
|
+
* });
|
|
958
|
+
* console.log(result.company.name);
|
|
959
|
+
* console.log(result.company.followers);
|
|
960
|
+
* console.log(result.company.jobs);
|
|
961
|
+
* ```
|
|
962
|
+
*
|
|
963
|
+
* @costs 1 credit
|
|
964
|
+
*/
|
|
965
|
+
company(params: LinkedInCompanyParams): Promise<LinkedInCompanyData>;
|
|
966
|
+
/**
|
|
967
|
+
* Scrape LinkedIn person profile(s)
|
|
968
|
+
*
|
|
969
|
+
* @param params - Person profile parameters (single URL or array of URLs, max 10)
|
|
970
|
+
* @returns Person profile data for each URL
|
|
971
|
+
* @throws {CrawlKitError} On API errors
|
|
972
|
+
*
|
|
973
|
+
* @example
|
|
974
|
+
* ```typescript
|
|
975
|
+
* // Single profile
|
|
976
|
+
* const result = await crawlkit.linkedin.person({
|
|
977
|
+
* url: 'https://www.linkedin.com/in/username'
|
|
978
|
+
* });
|
|
979
|
+
*
|
|
980
|
+
* // Multiple profiles (batch)
|
|
981
|
+
* const batchResult = await crawlkit.linkedin.person({
|
|
982
|
+
* url: [
|
|
983
|
+
* 'https://www.linkedin.com/in/user1',
|
|
984
|
+
* 'https://www.linkedin.com/in/user2'
|
|
985
|
+
* ]
|
|
986
|
+
* });
|
|
987
|
+
* console.log(`Success: ${batchResult.successCount}, Failed: ${batchResult.failedCount}`);
|
|
988
|
+
* ```
|
|
989
|
+
*
|
|
990
|
+
* @costs 3 credits per URL
|
|
991
|
+
*/
|
|
992
|
+
person(params: LinkedInPersonParams): Promise<LinkedInPersonData>;
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
/**
|
|
996
|
+
* Instagram scraping operations
|
|
997
|
+
* Provides profile and content (posts/reels) scraping
|
|
998
|
+
*/
|
|
999
|
+
declare class InstagramResource extends BaseResource {
|
|
1000
|
+
/**
|
|
1001
|
+
* Scrape an Instagram profile
|
|
1002
|
+
*
|
|
1003
|
+
* @param params - Profile parameters (username or URL)
|
|
1004
|
+
* @returns Profile data including bio, follower count, and recent posts
|
|
1005
|
+
* @throws {CrawlKitError} On API errors
|
|
1006
|
+
*
|
|
1007
|
+
* @example
|
|
1008
|
+
* ```typescript
|
|
1009
|
+
* const result = await crawlkit.instagram.profile({
|
|
1010
|
+
* username: 'instagram'
|
|
1011
|
+
* });
|
|
1012
|
+
* console.log(result.profile.full_name);
|
|
1013
|
+
* console.log(result.profile.follower_count);
|
|
1014
|
+
* console.log(result.profile.posts.length);
|
|
1015
|
+
* ```
|
|
1016
|
+
*
|
|
1017
|
+
* @costs 1 credit
|
|
1018
|
+
*/
|
|
1019
|
+
profile(params: InstagramProfileParams): Promise<InstagramProfileData>;
|
|
1020
|
+
/**
|
|
1021
|
+
* Scrape Instagram content (post, reel, or video)
|
|
1022
|
+
*
|
|
1023
|
+
* @param params - Content parameters (shortcode or full URL)
|
|
1024
|
+
* @returns Content data including media URLs, likes, comments, and owner info
|
|
1025
|
+
* @throws {CrawlKitError} On API errors
|
|
1026
|
+
*
|
|
1027
|
+
* @example
|
|
1028
|
+
* ```typescript
|
|
1029
|
+
* // Using shortcode
|
|
1030
|
+
* const result = await crawlkit.instagram.content({
|
|
1031
|
+
* shortcode: 'CxIIgCCq8mg'
|
|
1032
|
+
* });
|
|
1033
|
+
*
|
|
1034
|
+
* // Using full URL
|
|
1035
|
+
* const result = await crawlkit.instagram.content({
|
|
1036
|
+
* shortcode: 'https://www.instagram.com/p/CxIIgCCq8mg/'
|
|
1037
|
+
* });
|
|
1038
|
+
*
|
|
1039
|
+
* console.log(result.post.like_count);
|
|
1040
|
+
* console.log(result.post.video_url);
|
|
1041
|
+
* ```
|
|
1042
|
+
*
|
|
1043
|
+
* @costs 1 credit
|
|
1044
|
+
*/
|
|
1045
|
+
content(params: InstagramContentParams): Promise<InstagramContentData>;
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
/**
|
|
1049
|
+
* App store operations
|
|
1050
|
+
* Provides Google Play Store and Apple App Store data scraping
|
|
1051
|
+
*/
|
|
1052
|
+
declare class AppStoreResource extends BaseResource {
|
|
1053
|
+
/**
|
|
1054
|
+
* Fetch Google Play Store reviews for an app
|
|
1055
|
+
*
|
|
1056
|
+
* @param params - Reviews parameters including app ID and optional pagination cursor
|
|
1057
|
+
* @returns Reviews with pagination information
|
|
1058
|
+
* @throws {CrawlKitError} On API errors
|
|
1059
|
+
*
|
|
1060
|
+
* @example
|
|
1061
|
+
* ```typescript
|
|
1062
|
+
* // First page
|
|
1063
|
+
* const result = await crawlkit.appstore.playstoreReviews({
|
|
1064
|
+
* appId: 'com.example.app',
|
|
1065
|
+
* options: { lang: 'en' }
|
|
1066
|
+
* });
|
|
1067
|
+
*
|
|
1068
|
+
* // Next page
|
|
1069
|
+
* if (result.pagination.hasMore) {
|
|
1070
|
+
* const nextPage = await crawlkit.appstore.playstoreReviews({
|
|
1071
|
+
* appId: 'com.example.app',
|
|
1072
|
+
* cursor: result.pagination.nextCursor
|
|
1073
|
+
* });
|
|
1074
|
+
* }
|
|
1075
|
+
* ```
|
|
1076
|
+
*
|
|
1077
|
+
* @costs 1 credit per page
|
|
1078
|
+
*/
|
|
1079
|
+
playstoreReviews(params: PlayStoreReviewsParams): Promise<PlayStoreReviewsData>;
|
|
1080
|
+
/**
|
|
1081
|
+
* Fetch Google Play Store app details
|
|
1082
|
+
*
|
|
1083
|
+
* @param params - App detail parameters
|
|
1084
|
+
* @returns Comprehensive app information including ratings, screenshots, permissions
|
|
1085
|
+
* @throws {CrawlKitError} On API errors
|
|
1086
|
+
*
|
|
1087
|
+
* @example
|
|
1088
|
+
* ```typescript
|
|
1089
|
+
* const result = await crawlkit.appstore.playstoreDetail({
|
|
1090
|
+
* appId: 'com.example.app',
|
|
1091
|
+
* options: { lang: 'en' }
|
|
1092
|
+
* });
|
|
1093
|
+
* console.log(result.appName);
|
|
1094
|
+
* console.log(result.rating);
|
|
1095
|
+
* console.log(result.installs);
|
|
1096
|
+
* ```
|
|
1097
|
+
*
|
|
1098
|
+
* @costs 1 credit
|
|
1099
|
+
*/
|
|
1100
|
+
playstoreDetail(params: PlayStoreDetailParams): Promise<PlayStoreDetailData>;
|
|
1101
|
+
/**
|
|
1102
|
+
* Fetch Apple App Store reviews for an app
|
|
1103
|
+
*
|
|
1104
|
+
* @param params - Reviews parameters including app ID and optional pagination cursor
|
|
1105
|
+
* @returns Reviews with pagination information
|
|
1106
|
+
* @throws {CrawlKitError} On API errors
|
|
1107
|
+
*
|
|
1108
|
+
* @example
|
|
1109
|
+
* ```typescript
|
|
1110
|
+
* // First page
|
|
1111
|
+
* const result = await crawlkit.appstore.appstoreReviews({
|
|
1112
|
+
* appId: '123456789',
|
|
1113
|
+
* options: { lang: 'en' }
|
|
1114
|
+
* });
|
|
1115
|
+
*
|
|
1116
|
+
* // Paginate through all reviews
|
|
1117
|
+
* let cursor = result.pagination.nextCursor;
|
|
1118
|
+
* while (cursor) {
|
|
1119
|
+
* const nextPage = await crawlkit.appstore.appstoreReviews({
|
|
1120
|
+
* appId: '123456789',
|
|
1121
|
+
* cursor
|
|
1122
|
+
* });
|
|
1123
|
+
* cursor = nextPage.pagination.nextCursor;
|
|
1124
|
+
* }
|
|
1125
|
+
* ```
|
|
1126
|
+
*
|
|
1127
|
+
* @costs 1 credit per page
|
|
1128
|
+
*/
|
|
1129
|
+
appstoreReviews(params: AppStoreReviewsParams): Promise<AppStoreReviewsData>;
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
/**
|
|
1133
|
+
* Configuration options for the CrawlKit client
|
|
1134
|
+
*/
|
|
1135
|
+
interface CrawlKitConfig {
|
|
1136
|
+
/**
|
|
1137
|
+
* API key for authentication
|
|
1138
|
+
* Must start with 'ck_' prefix
|
|
1139
|
+
* Get your API key at https://crawlkit.sh
|
|
1140
|
+
*/
|
|
1141
|
+
apiKey: string;
|
|
1142
|
+
/**
|
|
1143
|
+
* Base URL for the API
|
|
1144
|
+
* @default 'https://api.crawlkit.sh'
|
|
1145
|
+
*/
|
|
1146
|
+
baseUrl?: string;
|
|
1147
|
+
/**
|
|
1148
|
+
* Default timeout in milliseconds for all requests
|
|
1149
|
+
* @default 30000
|
|
1150
|
+
*/
|
|
1151
|
+
timeout?: number;
|
|
1152
|
+
/**
|
|
1153
|
+
* Custom fetch implementation
|
|
1154
|
+
* Useful for testing or environments without native fetch
|
|
1155
|
+
*/
|
|
1156
|
+
fetch?: typeof globalThis.fetch;
|
|
1157
|
+
}
|
|
1158
|
+
/**
|
|
1159
|
+
* CrawlKit SDK client for web scraping API
|
|
1160
|
+
*
|
|
1161
|
+
* @example
|
|
1162
|
+
* ```typescript
|
|
1163
|
+
* import { CrawlKit } from '@crawlkit/sdk';
|
|
1164
|
+
*
|
|
1165
|
+
* const crawlkit = new CrawlKit({ apiKey: 'ck_your_api_key' });
|
|
1166
|
+
*
|
|
1167
|
+
* // Scrape a webpage
|
|
1168
|
+
* const page = await crawlkit.scrape({ url: 'https://example.com' });
|
|
1169
|
+
* console.log(page.markdown);
|
|
1170
|
+
*
|
|
1171
|
+
* // Extract structured data with AI
|
|
1172
|
+
* const data = await crawlkit.extract({
|
|
1173
|
+
* url: 'https://example.com/product',
|
|
1174
|
+
* schema: {
|
|
1175
|
+
* type: 'object',
|
|
1176
|
+
* properties: {
|
|
1177
|
+
* name: { type: 'string' },
|
|
1178
|
+
* price: { type: 'number' }
|
|
1179
|
+
* }
|
|
1180
|
+
* }
|
|
1181
|
+
* });
|
|
1182
|
+
*
|
|
1183
|
+
* // Scrape social media
|
|
1184
|
+
* const company = await crawlkit.linkedin.company({
|
|
1185
|
+
* url: 'https://linkedin.com/company/openai'
|
|
1186
|
+
* });
|
|
1187
|
+
* ```
|
|
1188
|
+
*/
|
|
1189
|
+
declare class CrawlKit {
|
|
1190
|
+
private readonly config;
|
|
1191
|
+
private readonly crawl;
|
|
1192
|
+
/**
|
|
1193
|
+
* LinkedIn scraping operations
|
|
1194
|
+
* Provides company and person profile scraping
|
|
1195
|
+
*/
|
|
1196
|
+
readonly linkedin: LinkedInResource;
|
|
1197
|
+
/**
|
|
1198
|
+
* Instagram scraping operations
|
|
1199
|
+
* Provides profile and content scraping
|
|
1200
|
+
*/
|
|
1201
|
+
readonly instagram: InstagramResource;
|
|
1202
|
+
/**
|
|
1203
|
+
* App store operations
|
|
1204
|
+
* Provides Google Play Store and Apple App Store data
|
|
1205
|
+
*/
|
|
1206
|
+
readonly appstore: AppStoreResource;
|
|
1207
|
+
/**
|
|
1208
|
+
* Create a new CrawlKit client
|
|
1209
|
+
*
|
|
1210
|
+
* @param config - Client configuration
|
|
1211
|
+
* @throws {AuthenticationError} If API key is invalid or missing
|
|
1212
|
+
*
|
|
1213
|
+
* @example
|
|
1214
|
+
* ```typescript
|
|
1215
|
+
* const crawlkit = new CrawlKit({
|
|
1216
|
+
* apiKey: 'ck_your_api_key',
|
|
1217
|
+
* timeout: 60000 // 60 seconds
|
|
1218
|
+
* });
|
|
1219
|
+
* ```
|
|
1220
|
+
*/
|
|
1221
|
+
constructor(config: CrawlKitConfig);
|
|
1222
|
+
/**
|
|
1223
|
+
* Scrape a URL and return markdown, HTML, metadata, and links
|
|
1224
|
+
*
|
|
1225
|
+
* @param params - Scrape parameters
|
|
1226
|
+
* @returns Scraped page data including markdown, HTML, metadata, and links
|
|
1227
|
+
* @throws {CrawlKitError} On API errors
|
|
1228
|
+
*
|
|
1229
|
+
* @example
|
|
1230
|
+
* ```typescript
|
|
1231
|
+
* // Basic scraping
|
|
1232
|
+
* const result = await crawlkit.scrape({
|
|
1233
|
+
* url: 'https://example.com'
|
|
1234
|
+
* });
|
|
1235
|
+
* console.log(result.markdown);
|
|
1236
|
+
* console.log(result.metadata.title);
|
|
1237
|
+
*
|
|
1238
|
+
* // With browser automation
|
|
1239
|
+
* const spaResult = await crawlkit.scrape({
|
|
1240
|
+
* url: 'https://example.com/spa',
|
|
1241
|
+
* options: {
|
|
1242
|
+
* waitFor: '#content-loaded',
|
|
1243
|
+
* actions: [
|
|
1244
|
+
* { type: 'click', selector: '#load-more' },
|
|
1245
|
+
* { type: 'wait', milliseconds: 2000 }
|
|
1246
|
+
* ]
|
|
1247
|
+
* }
|
|
1248
|
+
* });
|
|
1249
|
+
* ```
|
|
1250
|
+
*
|
|
1251
|
+
* @costs 1 credit
|
|
1252
|
+
*/
|
|
1253
|
+
scrape(params: ScrapeParams): Promise<ScrapeData>;
|
|
1254
|
+
/**
|
|
1255
|
+
* Extract structured data from a URL using AI
|
|
1256
|
+
*
|
|
1257
|
+
* Uses LLM to extract data according to the provided JSON schema.
|
|
1258
|
+
*
|
|
1259
|
+
* @param params - Extract parameters including JSON schema
|
|
1260
|
+
* @returns Extracted structured data along with page content
|
|
1261
|
+
* @throws {CrawlKitError} On API errors
|
|
1262
|
+
*
|
|
1263
|
+
* @example
|
|
1264
|
+
* ```typescript
|
|
1265
|
+
* interface Product {
|
|
1266
|
+
* name: string;
|
|
1267
|
+
* price: number;
|
|
1268
|
+
* description: string;
|
|
1269
|
+
* inStock: boolean;
|
|
1270
|
+
* }
|
|
1271
|
+
*
|
|
1272
|
+
* const result = await crawlkit.extract<Product>({
|
|
1273
|
+
* url: 'https://example.com/product/123',
|
|
1274
|
+
* schema: {
|
|
1275
|
+
* type: 'object',
|
|
1276
|
+
* properties: {
|
|
1277
|
+
* name: { type: 'string' },
|
|
1278
|
+
* price: { type: 'number' },
|
|
1279
|
+
* description: { type: 'string' },
|
|
1280
|
+
* inStock: { type: 'boolean' }
|
|
1281
|
+
* }
|
|
1282
|
+
* },
|
|
1283
|
+
* options: {
|
|
1284
|
+
* prompt: 'Extract product information from this page'
|
|
1285
|
+
* }
|
|
1286
|
+
* });
|
|
1287
|
+
*
|
|
1288
|
+
* // TypeScript knows result.json is Product
|
|
1289
|
+
* console.log(result.json.name);
|
|
1290
|
+
* console.log(result.json.price);
|
|
1291
|
+
* ```
|
|
1292
|
+
*
|
|
1293
|
+
* @costs 5 credits
|
|
1294
|
+
*/
|
|
1295
|
+
extract<T = unknown>(params: ExtractParams): Promise<ExtractData<T>>;
|
|
1296
|
+
/**
|
|
1297
|
+
* Perform a web search using DuckDuckGo
|
|
1298
|
+
*
|
|
1299
|
+
* @param params - Search parameters
|
|
1300
|
+
* @returns Search results with titles, URLs, and snippets
|
|
1301
|
+
* @throws {CrawlKitError} On API errors
|
|
1302
|
+
*
|
|
1303
|
+
* @example
|
|
1304
|
+
* ```typescript
|
|
1305
|
+
* const result = await crawlkit.search({
|
|
1306
|
+
* query: 'typescript best practices 2024',
|
|
1307
|
+
* options: {
|
|
1308
|
+
* maxResults: 20,
|
|
1309
|
+
* timeRange: 'm', // Past month
|
|
1310
|
+
* region: 'us-en'
|
|
1311
|
+
* }
|
|
1312
|
+
* });
|
|
1313
|
+
*
|
|
1314
|
+
* for (const item of result.results) {
|
|
1315
|
+
* console.log(`${item.position}. ${item.title}`);
|
|
1316
|
+
* console.log(` ${item.url}`);
|
|
1317
|
+
* console.log(` ${item.snippet}\n`);
|
|
1318
|
+
* }
|
|
1319
|
+
* ```
|
|
1320
|
+
*
|
|
1321
|
+
* @costs 1 credit per page (~10 results)
|
|
1322
|
+
*/
|
|
1323
|
+
search(params: SearchParams): Promise<SearchData>;
|
|
1324
|
+
/**
|
|
1325
|
+
* Take a full-page screenshot of a URL
|
|
1326
|
+
*
|
|
1327
|
+
* @param params - Screenshot parameters
|
|
1328
|
+
* @returns Public URL of the screenshot
|
|
1329
|
+
* @throws {CrawlKitError} On API errors
|
|
1330
|
+
*
|
|
1331
|
+
* @example
|
|
1332
|
+
* ```typescript
|
|
1333
|
+
* const result = await crawlkit.screenshot({
|
|
1334
|
+
* url: 'https://example.com',
|
|
1335
|
+
* options: {
|
|
1336
|
+
* width: 1920,
|
|
1337
|
+
* height: 1080,
|
|
1338
|
+
* waitForSelector: '#main-content'
|
|
1339
|
+
* }
|
|
1340
|
+
* });
|
|
1341
|
+
*
|
|
1342
|
+
* console.log('Screenshot URL:', result.url);
|
|
1343
|
+
* console.log(`Dimensions: ${result.width}x${result.height}`);
|
|
1344
|
+
* ```
|
|
1345
|
+
*
|
|
1346
|
+
* @costs 1 credit
|
|
1347
|
+
*/
|
|
1348
|
+
screenshot(params: ScreenshotParams): Promise<ScreenshotData>;
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
/**
|
|
1352
|
+
* Base error class for all CrawlKit errors
|
|
1353
|
+
*/
|
|
1354
|
+
declare class CrawlKitError extends Error {
|
|
1355
|
+
/** Error code from API */
|
|
1356
|
+
readonly code: ErrorCode;
|
|
1357
|
+
/** HTTP status code */
|
|
1358
|
+
readonly statusCode: number;
|
|
1359
|
+
/** Credits refunded if operation failed */
|
|
1360
|
+
readonly creditsRefunded?: number;
|
|
1361
|
+
/** Remaining credits after operation */
|
|
1362
|
+
readonly creditsRemaining?: number;
|
|
1363
|
+
constructor(code: ErrorCode, message: string, statusCode: number, creditsRefunded?: number, creditsRemaining?: number);
|
|
1364
|
+
}
|
|
1365
|
+
/**
|
|
1366
|
+
* Authentication error - invalid or missing API key
|
|
1367
|
+
*/
|
|
1368
|
+
declare class AuthenticationError extends CrawlKitError {
|
|
1369
|
+
constructor(message?: string);
|
|
1370
|
+
}
|
|
1371
|
+
/**
|
|
1372
|
+
* Insufficient credits to perform the operation
|
|
1373
|
+
*/
|
|
1374
|
+
declare class InsufficientCreditsError extends CrawlKitError {
|
|
1375
|
+
/** Credits required for the operation */
|
|
1376
|
+
readonly required?: number;
|
|
1377
|
+
/** Credits available */
|
|
1378
|
+
readonly available?: number;
|
|
1379
|
+
constructor(message: string, creditsRefunded?: number, creditsRemaining?: number);
|
|
1380
|
+
}
|
|
1381
|
+
/**
|
|
1382
|
+
* Validation error - invalid request parameters
|
|
1383
|
+
*/
|
|
1384
|
+
declare class ValidationError extends CrawlKitError {
|
|
1385
|
+
constructor(message: string);
|
|
1386
|
+
}
|
|
1387
|
+
/**
|
|
1388
|
+
* Rate limit exceeded
|
|
1389
|
+
*/
|
|
1390
|
+
declare class RateLimitError extends CrawlKitError {
|
|
1391
|
+
constructor(message?: string);
|
|
1392
|
+
}
|
|
1393
|
+
/**
|
|
1394
|
+
* Request timeout
|
|
1395
|
+
*/
|
|
1396
|
+
declare class TimeoutError extends CrawlKitError {
|
|
1397
|
+
constructor(message: string, creditsRefunded?: number, creditsRemaining?: number);
|
|
1398
|
+
}
|
|
1399
|
+
/**
|
|
1400
|
+
* Resource not found (404)
|
|
1401
|
+
*/
|
|
1402
|
+
declare class NotFoundError extends CrawlKitError {
|
|
1403
|
+
constructor(message?: string);
|
|
1404
|
+
}
|
|
1405
|
+
/**
|
|
1406
|
+
* Network or connection error
|
|
1407
|
+
*/
|
|
1408
|
+
declare class NetworkError extends CrawlKitError {
|
|
1409
|
+
constructor(code: ErrorCode, message: string, creditsRefunded?: number, creditsRemaining?: number);
|
|
1410
|
+
}
|
|
1411
|
+
/**
|
|
1412
|
+
* Create appropriate error instance from API response
|
|
1413
|
+
*/
|
|
1414
|
+
declare function createErrorFromResponse(code: string, message: string, statusCode: number, creditsRefunded?: number, creditsRemaining?: number): CrawlKitError;
|
|
1415
|
+
|
|
1416
|
+
export { type ActionResult, type ApiErrorResponse, type ApiResponse, type ApiSuccessResponse, type AppStoreReview, type AppStoreReviewsData, type AppStoreReviewsOptions, type AppStoreReviewsParams, type AppStoreReviewsResponse, AuthenticationError, type BrowserAction, type ClickAction, CrawlKit, type CrawlKitConfig, CrawlKitError, type CrawlStats, type CreditInfo, type DataSafety, type Developer, type DeveloperReply, type ErrorCode, type EvaluateAction, type ExtractData, type ExtractOptions, type ExtractParams, type ExtractResponse, type InstagramAudioInfo, type InstagramBioLink, type InstagramCarouselItem, type InstagramContent, type InstagramContentData, type InstagramContentOptions, type InstagramContentOwner, type InstagramContentParams, type InstagramContentResponse, type InstagramDimensions, type InstagramPost, type InstagramProfile, type InstagramProfileData, type InstagramProfileOptions, type InstagramProfileParams, type InstagramProfileResponse, InsufficientCreditsError, type LinkedInCompany, type LinkedInCompanyData, type LinkedInCompanyOptions, type LinkedInCompanyParams, type LinkedInCompanyResponse, type LinkedInEmployee, type LinkedInJob, type LinkedInPersonData, type LinkedInPersonParams, type LinkedInPersonResponse, type LinkedInPersonResult, type LinkedInPost, type LinkedInSimilarCompany, NetworkError, NotFoundError, type PageLinks, type PageMetadata, type Pagination, type Permission, type PlayStoreDetailData, type PlayStoreDetailOptions, type PlayStoreDetailParams, type PlayStoreDetailResponse, type PlayStoreReview, type PlayStoreReviewsData, type PlayStoreReviewsOptions, type PlayStoreReviewsParams, type PlayStoreReviewsResponse, type PressAction, RateLimitError, type RatingDistribution, type ScrapeData, type ScrapeOptions, type ScrapeParams, type ScrapeResponse, type Screenshot, type ScreenshotData, type ScreenshotOptions, type ScreenshotParams, type ScreenshotResponse, type ScrollAction, type SearchData, type SearchOptions, type SearchParams, type SearchResponse, type SearchResult, type TimeRange, TimeoutError, type Timing, type TypeAction, ValidationError, type WaitAction, createErrorFromResponse };
|