@crawlkit-sh/sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/LICENSE +21 -0
- package/README.md +386 -0
- package/dist/index.cjs +745 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +1416 -0
- package/dist/index.d.ts +1416 -0
- package/dist/index.js +734 -0
- package/dist/index.js.map +1 -0
- package/package.json +82 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,745 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// src/errors/index.ts
|
|
4
|
+
var CrawlKitError = class extends Error {
|
|
5
|
+
constructor(code, message, statusCode, creditsRefunded, creditsRemaining) {
|
|
6
|
+
super(message);
|
|
7
|
+
this.name = "CrawlKitError";
|
|
8
|
+
this.code = code;
|
|
9
|
+
this.statusCode = statusCode;
|
|
10
|
+
this.creditsRefunded = creditsRefunded;
|
|
11
|
+
this.creditsRemaining = creditsRemaining;
|
|
12
|
+
if (Error.captureStackTrace) {
|
|
13
|
+
Error.captureStackTrace(this, this.constructor);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
var AuthenticationError = class extends CrawlKitError {
|
|
18
|
+
constructor(message = "Invalid or missing API key") {
|
|
19
|
+
super("VALIDATION_ERROR", message, 401);
|
|
20
|
+
this.name = "AuthenticationError";
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
var InsufficientCreditsError = class extends CrawlKitError {
|
|
24
|
+
constructor(message, creditsRefunded, creditsRemaining) {
|
|
25
|
+
super("INSUFFICIENT_CREDITS", message, 402, creditsRefunded, creditsRemaining);
|
|
26
|
+
this.name = "InsufficientCreditsError";
|
|
27
|
+
this.available = creditsRemaining;
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
var ValidationError = class extends CrawlKitError {
|
|
31
|
+
constructor(message) {
|
|
32
|
+
super("VALIDATION_ERROR", message, 400);
|
|
33
|
+
this.name = "ValidationError";
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
var RateLimitError = class extends CrawlKitError {
|
|
37
|
+
constructor(message = "Rate limit exceeded") {
|
|
38
|
+
super("RATE_LIMITED", message, 429);
|
|
39
|
+
this.name = "RateLimitError";
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
var TimeoutError = class extends CrawlKitError {
|
|
43
|
+
constructor(message, creditsRefunded, creditsRemaining) {
|
|
44
|
+
super("TIMEOUT", message, 408, creditsRefunded, creditsRemaining);
|
|
45
|
+
this.name = "TimeoutError";
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
var NotFoundError = class extends CrawlKitError {
|
|
49
|
+
constructor(message = "Resource not found") {
|
|
50
|
+
super("NOT_FOUND", message, 404);
|
|
51
|
+
this.name = "NotFoundError";
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
var NetworkError = class extends CrawlKitError {
|
|
55
|
+
constructor(code, message, creditsRefunded, creditsRemaining) {
|
|
56
|
+
super(code, message, 502, creditsRefunded, creditsRemaining);
|
|
57
|
+
this.name = "NetworkError";
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
function createErrorFromResponse(code, message, statusCode, creditsRefunded, creditsRemaining) {
|
|
61
|
+
const errorCode = code;
|
|
62
|
+
switch (statusCode) {
|
|
63
|
+
case 401:
|
|
64
|
+
return new AuthenticationError(message);
|
|
65
|
+
case 402:
|
|
66
|
+
return new InsufficientCreditsError(message, creditsRefunded, creditsRemaining);
|
|
67
|
+
case 429:
|
|
68
|
+
return new RateLimitError(message);
|
|
69
|
+
case 404:
|
|
70
|
+
return new NotFoundError(message);
|
|
71
|
+
}
|
|
72
|
+
switch (errorCode) {
|
|
73
|
+
case "VALIDATION_ERROR":
|
|
74
|
+
return new ValidationError(message);
|
|
75
|
+
case "INSUFFICIENT_CREDITS":
|
|
76
|
+
return new InsufficientCreditsError(message, creditsRefunded, creditsRemaining);
|
|
77
|
+
case "TIMEOUT":
|
|
78
|
+
return new TimeoutError(message, creditsRefunded, creditsRemaining);
|
|
79
|
+
case "RATE_LIMITED":
|
|
80
|
+
return new RateLimitError(message);
|
|
81
|
+
case "NOT_FOUND":
|
|
82
|
+
return new NotFoundError(message);
|
|
83
|
+
case "DNS_FAILED":
|
|
84
|
+
case "CONNECTION_REFUSED":
|
|
85
|
+
case "SSL_ERROR":
|
|
86
|
+
case "TOO_MANY_REDIRECTS":
|
|
87
|
+
case "PROXY_ERROR":
|
|
88
|
+
return new NetworkError(errorCode, message, creditsRefunded, creditsRemaining);
|
|
89
|
+
default:
|
|
90
|
+
return new CrawlKitError(
|
|
91
|
+
errorCode,
|
|
92
|
+
message,
|
|
93
|
+
statusCode,
|
|
94
|
+
creditsRefunded,
|
|
95
|
+
creditsRemaining
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// src/resources/base.ts
|
|
101
|
+
var BaseResource = class {
|
|
102
|
+
constructor(config) {
|
|
103
|
+
this.config = config;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Make a POST request to the API
|
|
107
|
+
* @param endpoint - API endpoint path (e.g., '/v1/crawl/scrape')
|
|
108
|
+
* @param body - Request body object
|
|
109
|
+
* @returns Parsed response data
|
|
110
|
+
* @throws {CrawlKitError} On API errors
|
|
111
|
+
*/
|
|
112
|
+
async post(endpoint, body) {
|
|
113
|
+
const url = `${this.config.baseUrl}${endpoint}`;
|
|
114
|
+
const controller = new AbortController();
|
|
115
|
+
const timeoutId = setTimeout(() => controller.abort(), this.config.timeout);
|
|
116
|
+
try {
|
|
117
|
+
const response = await this.config.fetch(url, {
|
|
118
|
+
method: "POST",
|
|
119
|
+
headers: {
|
|
120
|
+
"Content-Type": "application/json",
|
|
121
|
+
"Authorization": `ApiKey ${this.config.apiKey}`,
|
|
122
|
+
"User-Agent": "@crawlkit/sdk"
|
|
123
|
+
},
|
|
124
|
+
body: JSON.stringify(body),
|
|
125
|
+
signal: controller.signal
|
|
126
|
+
});
|
|
127
|
+
clearTimeout(timeoutId);
|
|
128
|
+
let json;
|
|
129
|
+
try {
|
|
130
|
+
json = await response.json();
|
|
131
|
+
} catch {
|
|
132
|
+
throw new CrawlKitError(
|
|
133
|
+
"PARSE_ERROR",
|
|
134
|
+
"Failed to parse API response",
|
|
135
|
+
response.status
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
if (!json.success) {
|
|
139
|
+
const errorResponse = json;
|
|
140
|
+
throw createErrorFromResponse(
|
|
141
|
+
errorResponse.error.code,
|
|
142
|
+
errorResponse.error.message,
|
|
143
|
+
response.status,
|
|
144
|
+
errorResponse.creditsRefunded,
|
|
145
|
+
errorResponse.creditsRemaining
|
|
146
|
+
);
|
|
147
|
+
}
|
|
148
|
+
return json.data;
|
|
149
|
+
} catch (error) {
|
|
150
|
+
clearTimeout(timeoutId);
|
|
151
|
+
if (error instanceof CrawlKitError) {
|
|
152
|
+
throw error;
|
|
153
|
+
}
|
|
154
|
+
if (error instanceof Error) {
|
|
155
|
+
if (error.name === "AbortError") {
|
|
156
|
+
throw new CrawlKitError(
|
|
157
|
+
"TIMEOUT",
|
|
158
|
+
`Request timed out after ${this.config.timeout}ms`,
|
|
159
|
+
408
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
throw new CrawlKitError(
|
|
163
|
+
"UNKNOWN",
|
|
164
|
+
error.message || "An unknown error occurred",
|
|
165
|
+
500
|
|
166
|
+
);
|
|
167
|
+
}
|
|
168
|
+
throw new CrawlKitError("UNKNOWN", "An unknown error occurred", 500);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Make a GET request to the API
|
|
173
|
+
* @param endpoint - API endpoint path
|
|
174
|
+
* @param params - Query parameters
|
|
175
|
+
* @returns Parsed response data
|
|
176
|
+
* @throws {CrawlKitError} On API errors
|
|
177
|
+
*/
|
|
178
|
+
async get(endpoint, params) {
|
|
179
|
+
let url = `${this.config.baseUrl}${endpoint}`;
|
|
180
|
+
if (params) {
|
|
181
|
+
const searchParams = new URLSearchParams();
|
|
182
|
+
for (const [key, value] of Object.entries(params)) {
|
|
183
|
+
if (value !== void 0) {
|
|
184
|
+
searchParams.append(key, String(value));
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
const queryString = searchParams.toString();
|
|
188
|
+
if (queryString) {
|
|
189
|
+
url += `?${queryString}`;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
const controller = new AbortController();
|
|
193
|
+
const timeoutId = setTimeout(() => controller.abort(), this.config.timeout);
|
|
194
|
+
try {
|
|
195
|
+
const response = await this.config.fetch(url, {
|
|
196
|
+
method: "GET",
|
|
197
|
+
headers: {
|
|
198
|
+
"Authorization": `ApiKey ${this.config.apiKey}`,
|
|
199
|
+
"User-Agent": "@crawlkit/sdk"
|
|
200
|
+
},
|
|
201
|
+
signal: controller.signal
|
|
202
|
+
});
|
|
203
|
+
clearTimeout(timeoutId);
|
|
204
|
+
let json;
|
|
205
|
+
try {
|
|
206
|
+
json = await response.json();
|
|
207
|
+
} catch {
|
|
208
|
+
throw new CrawlKitError(
|
|
209
|
+
"PARSE_ERROR",
|
|
210
|
+
"Failed to parse API response",
|
|
211
|
+
response.status
|
|
212
|
+
);
|
|
213
|
+
}
|
|
214
|
+
if (!json.success) {
|
|
215
|
+
const errorResponse = json;
|
|
216
|
+
throw createErrorFromResponse(
|
|
217
|
+
errorResponse.error.code,
|
|
218
|
+
errorResponse.error.message,
|
|
219
|
+
response.status,
|
|
220
|
+
errorResponse.creditsRefunded,
|
|
221
|
+
errorResponse.creditsRemaining
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
return json.data;
|
|
225
|
+
} catch (error) {
|
|
226
|
+
clearTimeout(timeoutId);
|
|
227
|
+
if (error instanceof CrawlKitError) {
|
|
228
|
+
throw error;
|
|
229
|
+
}
|
|
230
|
+
if (error instanceof Error) {
|
|
231
|
+
if (error.name === "AbortError") {
|
|
232
|
+
throw new CrawlKitError(
|
|
233
|
+
"TIMEOUT",
|
|
234
|
+
`Request timed out after ${this.config.timeout}ms`,
|
|
235
|
+
408
|
|
236
|
+
);
|
|
237
|
+
}
|
|
238
|
+
throw new CrawlKitError(
|
|
239
|
+
"UNKNOWN",
|
|
240
|
+
error.message || "An unknown error occurred",
|
|
241
|
+
500
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
throw new CrawlKitError("UNKNOWN", "An unknown error occurred", 500);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
// src/resources/crawl.ts
|
|
250
|
+
var CrawlResource = class extends BaseResource {
|
|
251
|
+
/**
|
|
252
|
+
* Scrape a URL and return markdown, HTML, metadata, and links
|
|
253
|
+
*
|
|
254
|
+
* @param params - Scrape parameters
|
|
255
|
+
* @returns Scraped page data including markdown, HTML, metadata, and links
|
|
256
|
+
* @throws {CrawlKitError} On API errors
|
|
257
|
+
*
|
|
258
|
+
* @example
|
|
259
|
+
* ```typescript
|
|
260
|
+
* const result = await crawlkit.scrape({
|
|
261
|
+
* url: 'https://example.com',
|
|
262
|
+
* options: {
|
|
263
|
+
* onlyMainContent: true,
|
|
264
|
+
* waitFor: '#content'
|
|
265
|
+
* }
|
|
266
|
+
* });
|
|
267
|
+
* console.log(result.markdown);
|
|
268
|
+
* ```
|
|
269
|
+
*
|
|
270
|
+
* @costs 1 credit
|
|
271
|
+
*/
|
|
272
|
+
async scrape(params) {
|
|
273
|
+
return this.post("/v1/crawl/scrape", params);
|
|
274
|
+
}
|
|
275
|
+
/**
|
|
276
|
+
* Extract structured data from a URL using AI
|
|
277
|
+
*
|
|
278
|
+
* @param params - Extract parameters including JSON schema
|
|
279
|
+
* @returns Extracted structured data along with page content
|
|
280
|
+
* @throws {CrawlKitError} On API errors
|
|
281
|
+
*
|
|
282
|
+
* @example
|
|
283
|
+
* ```typescript
|
|
284
|
+
* interface Product {
|
|
285
|
+
* name: string;
|
|
286
|
+
* price: number;
|
|
287
|
+
* }
|
|
288
|
+
*
|
|
289
|
+
* const result = await crawlkit.extract<Product>({
|
|
290
|
+
* url: 'https://example.com/product',
|
|
291
|
+
* schema: {
|
|
292
|
+
* type: 'object',
|
|
293
|
+
* properties: {
|
|
294
|
+
* name: { type: 'string' },
|
|
295
|
+
* price: { type: 'number' }
|
|
296
|
+
* }
|
|
297
|
+
* }
|
|
298
|
+
* });
|
|
299
|
+
* console.log(result.json.name, result.json.price);
|
|
300
|
+
* ```
|
|
301
|
+
*
|
|
302
|
+
* @costs 5 credits
|
|
303
|
+
*/
|
|
304
|
+
async extract(params) {
|
|
305
|
+
return this.post("/v1/crawl/extract", params);
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Perform a web search using DuckDuckGo
|
|
309
|
+
*
|
|
310
|
+
* @param params - Search parameters
|
|
311
|
+
* @returns Search results with titles, URLs, and snippets
|
|
312
|
+
* @throws {CrawlKitError} On API errors
|
|
313
|
+
*
|
|
314
|
+
* @example
|
|
315
|
+
* ```typescript
|
|
316
|
+
* const result = await crawlkit.search({
|
|
317
|
+
* query: 'typescript best practices',
|
|
318
|
+
* options: {
|
|
319
|
+
* maxResults: 10,
|
|
320
|
+
* timeRange: 'w' // Past week
|
|
321
|
+
* }
|
|
322
|
+
* });
|
|
323
|
+
* result.results.forEach(r => console.log(r.title, r.url));
|
|
324
|
+
* ```
|
|
325
|
+
*
|
|
326
|
+
* @costs 1 credit per page (~10 results)
|
|
327
|
+
*/
|
|
328
|
+
async search(params) {
|
|
329
|
+
return this.post("/v1/crawl/search", params);
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Take a full-page screenshot of a URL
|
|
333
|
+
*
|
|
334
|
+
* @param params - Screenshot parameters
|
|
335
|
+
* @returns Public URL of the screenshot
|
|
336
|
+
* @throws {CrawlKitError} On API errors
|
|
337
|
+
*
|
|
338
|
+
* @example
|
|
339
|
+
* ```typescript
|
|
340
|
+
* const result = await crawlkit.screenshot({
|
|
341
|
+
* url: 'https://example.com',
|
|
342
|
+
* options: {
|
|
343
|
+
* width: 1920,
|
|
344
|
+
* height: 1080,
|
|
345
|
+
* waitForSelector: '#content'
|
|
346
|
+
* }
|
|
347
|
+
* });
|
|
348
|
+
* console.log('Screenshot URL:', result.url);
|
|
349
|
+
* ```
|
|
350
|
+
*
|
|
351
|
+
* @costs 1 credit
|
|
352
|
+
*/
|
|
353
|
+
async screenshot(params) {
|
|
354
|
+
return this.post("/v1/crawl/screenshot", params);
|
|
355
|
+
}
|
|
356
|
+
};
|
|
357
|
+
|
|
358
|
+
// src/resources/linkedin.ts
|
|
359
|
+
var LinkedInResource = class extends BaseResource {
|
|
360
|
+
/**
|
|
361
|
+
* Scrape a LinkedIn company profile
|
|
362
|
+
*
|
|
363
|
+
* @param params - Company profile parameters
|
|
364
|
+
* @returns Company profile data including description, employees, jobs, posts
|
|
365
|
+
* @throws {CrawlKitError} On API errors
|
|
366
|
+
*
|
|
367
|
+
* @example
|
|
368
|
+
* ```typescript
|
|
369
|
+
* const result = await crawlkit.linkedin.company({
|
|
370
|
+
* url: 'https://www.linkedin.com/company/openai',
|
|
371
|
+
* options: { includeJobs: true }
|
|
372
|
+
* });
|
|
373
|
+
* console.log(result.company.name);
|
|
374
|
+
* console.log(result.company.followers);
|
|
375
|
+
* console.log(result.company.jobs);
|
|
376
|
+
* ```
|
|
377
|
+
*
|
|
378
|
+
* @costs 1 credit
|
|
379
|
+
*/
|
|
380
|
+
async company(params) {
|
|
381
|
+
return this.post("/v1/crawl/linkedin/company", params);
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Scrape LinkedIn person profile(s)
|
|
385
|
+
*
|
|
386
|
+
* @param params - Person profile parameters (single URL or array of URLs, max 10)
|
|
387
|
+
* @returns Person profile data for each URL
|
|
388
|
+
* @throws {CrawlKitError} On API errors
|
|
389
|
+
*
|
|
390
|
+
* @example
|
|
391
|
+
* ```typescript
|
|
392
|
+
* // Single profile
|
|
393
|
+
* const result = await crawlkit.linkedin.person({
|
|
394
|
+
* url: 'https://www.linkedin.com/in/username'
|
|
395
|
+
* });
|
|
396
|
+
*
|
|
397
|
+
* // Multiple profiles (batch)
|
|
398
|
+
* const batchResult = await crawlkit.linkedin.person({
|
|
399
|
+
* url: [
|
|
400
|
+
* 'https://www.linkedin.com/in/user1',
|
|
401
|
+
* 'https://www.linkedin.com/in/user2'
|
|
402
|
+
* ]
|
|
403
|
+
* });
|
|
404
|
+
* console.log(`Success: ${batchResult.successCount}, Failed: ${batchResult.failedCount}`);
|
|
405
|
+
* ```
|
|
406
|
+
*
|
|
407
|
+
* @costs 3 credits per URL
|
|
408
|
+
*/
|
|
409
|
+
async person(params) {
|
|
410
|
+
return this.post("/v1/crawl/linkedin/person", params);
|
|
411
|
+
}
|
|
412
|
+
};
|
|
413
|
+
|
|
414
|
+
// src/resources/instagram.ts
|
|
415
|
+
var InstagramResource = class extends BaseResource {
|
|
416
|
+
/**
|
|
417
|
+
* Scrape an Instagram profile
|
|
418
|
+
*
|
|
419
|
+
* @param params - Profile parameters (username or URL)
|
|
420
|
+
* @returns Profile data including bio, follower count, and recent posts
|
|
421
|
+
* @throws {CrawlKitError} On API errors
|
|
422
|
+
*
|
|
423
|
+
* @example
|
|
424
|
+
* ```typescript
|
|
425
|
+
* const result = await crawlkit.instagram.profile({
|
|
426
|
+
* username: 'instagram'
|
|
427
|
+
* });
|
|
428
|
+
* console.log(result.profile.full_name);
|
|
429
|
+
* console.log(result.profile.follower_count);
|
|
430
|
+
* console.log(result.profile.posts.length);
|
|
431
|
+
* ```
|
|
432
|
+
*
|
|
433
|
+
* @costs 1 credit
|
|
434
|
+
*/
|
|
435
|
+
async profile(params) {
|
|
436
|
+
return this.post("/v1/crawl/instagram/profile", params);
|
|
437
|
+
}
|
|
438
|
+
/**
|
|
439
|
+
* Scrape Instagram content (post, reel, or video)
|
|
440
|
+
*
|
|
441
|
+
* @param params - Content parameters (shortcode or full URL)
|
|
442
|
+
* @returns Content data including media URLs, likes, comments, and owner info
|
|
443
|
+
* @throws {CrawlKitError} On API errors
|
|
444
|
+
*
|
|
445
|
+
* @example
|
|
446
|
+
* ```typescript
|
|
447
|
+
* // Using shortcode
|
|
448
|
+
* const result = await crawlkit.instagram.content({
|
|
449
|
+
* shortcode: 'CxIIgCCq8mg'
|
|
450
|
+
* });
|
|
451
|
+
*
|
|
452
|
+
* // Using full URL
|
|
453
|
+
* const result = await crawlkit.instagram.content({
|
|
454
|
+
* shortcode: 'https://www.instagram.com/p/CxIIgCCq8mg/'
|
|
455
|
+
* });
|
|
456
|
+
*
|
|
457
|
+
* console.log(result.post.like_count);
|
|
458
|
+
* console.log(result.post.video_url);
|
|
459
|
+
* ```
|
|
460
|
+
*
|
|
461
|
+
* @costs 1 credit
|
|
462
|
+
*/
|
|
463
|
+
async content(params) {
|
|
464
|
+
return this.post("/v1/crawl/instagram/content", params);
|
|
465
|
+
}
|
|
466
|
+
};
|
|
467
|
+
|
|
468
|
+
// src/resources/appstore.ts
|
|
469
|
+
var AppStoreResource = class extends BaseResource {
|
|
470
|
+
/**
|
|
471
|
+
* Fetch Google Play Store reviews for an app
|
|
472
|
+
*
|
|
473
|
+
* @param params - Reviews parameters including app ID and optional pagination cursor
|
|
474
|
+
* @returns Reviews with pagination information
|
|
475
|
+
* @throws {CrawlKitError} On API errors
|
|
476
|
+
*
|
|
477
|
+
* @example
|
|
478
|
+
* ```typescript
|
|
479
|
+
* // First page
|
|
480
|
+
* const result = await crawlkit.appstore.playstoreReviews({
|
|
481
|
+
* appId: 'com.example.app',
|
|
482
|
+
* options: { lang: 'en' }
|
|
483
|
+
* });
|
|
484
|
+
*
|
|
485
|
+
* // Next page
|
|
486
|
+
* if (result.pagination.hasMore) {
|
|
487
|
+
* const nextPage = await crawlkit.appstore.playstoreReviews({
|
|
488
|
+
* appId: 'com.example.app',
|
|
489
|
+
* cursor: result.pagination.nextCursor
|
|
490
|
+
* });
|
|
491
|
+
* }
|
|
492
|
+
* ```
|
|
493
|
+
*
|
|
494
|
+
* @costs 1 credit per page
|
|
495
|
+
*/
|
|
496
|
+
async playstoreReviews(params) {
|
|
497
|
+
return this.post("/v1/crawl/playstore/reviews", params);
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* Fetch Google Play Store app details
|
|
501
|
+
*
|
|
502
|
+
* @param params - App detail parameters
|
|
503
|
+
* @returns Comprehensive app information including ratings, screenshots, permissions
|
|
504
|
+
* @throws {CrawlKitError} On API errors
|
|
505
|
+
*
|
|
506
|
+
* @example
|
|
507
|
+
* ```typescript
|
|
508
|
+
* const result = await crawlkit.appstore.playstoreDetail({
|
|
509
|
+
* appId: 'com.example.app',
|
|
510
|
+
* options: { lang: 'en' }
|
|
511
|
+
* });
|
|
512
|
+
* console.log(result.appName);
|
|
513
|
+
* console.log(result.rating);
|
|
514
|
+
* console.log(result.installs);
|
|
515
|
+
* ```
|
|
516
|
+
*
|
|
517
|
+
* @costs 1 credit
|
|
518
|
+
*/
|
|
519
|
+
async playstoreDetail(params) {
|
|
520
|
+
return this.post("/v1/crawl/playstore/detail", params);
|
|
521
|
+
}
|
|
522
|
+
/**
|
|
523
|
+
* Fetch Apple App Store reviews for an app
|
|
524
|
+
*
|
|
525
|
+
* @param params - Reviews parameters including app ID and optional pagination cursor
|
|
526
|
+
* @returns Reviews with pagination information
|
|
527
|
+
* @throws {CrawlKitError} On API errors
|
|
528
|
+
*
|
|
529
|
+
* @example
|
|
530
|
+
* ```typescript
|
|
531
|
+
* // First page
|
|
532
|
+
* const result = await crawlkit.appstore.appstoreReviews({
|
|
533
|
+
* appId: '123456789',
|
|
534
|
+
* options: { lang: 'en' }
|
|
535
|
+
* });
|
|
536
|
+
*
|
|
537
|
+
* // Paginate through all reviews
|
|
538
|
+
* let cursor = result.pagination.nextCursor;
|
|
539
|
+
* while (cursor) {
|
|
540
|
+
* const nextPage = await crawlkit.appstore.appstoreReviews({
|
|
541
|
+
* appId: '123456789',
|
|
542
|
+
* cursor
|
|
543
|
+
* });
|
|
544
|
+
* cursor = nextPage.pagination.nextCursor;
|
|
545
|
+
* }
|
|
546
|
+
* ```
|
|
547
|
+
*
|
|
548
|
+
* @costs 1 credit per page
|
|
549
|
+
*/
|
|
550
|
+
async appstoreReviews(params) {
|
|
551
|
+
return this.post("/v1/crawl/appstore/reviews", params);
|
|
552
|
+
}
|
|
553
|
+
};
|
|
554
|
+
|
|
555
|
+
// src/client.ts
|
|
556
|
+
var CrawlKit = class {
|
|
557
|
+
/**
|
|
558
|
+
* Create a new CrawlKit client
|
|
559
|
+
*
|
|
560
|
+
* @param config - Client configuration
|
|
561
|
+
* @throws {AuthenticationError} If API key is invalid or missing
|
|
562
|
+
*
|
|
563
|
+
* @example
|
|
564
|
+
* ```typescript
|
|
565
|
+
* const crawlkit = new CrawlKit({
|
|
566
|
+
* apiKey: 'ck_your_api_key',
|
|
567
|
+
* timeout: 60000 // 60 seconds
|
|
568
|
+
* });
|
|
569
|
+
* ```
|
|
570
|
+
*/
|
|
571
|
+
constructor(config) {
|
|
572
|
+
if (!config.apiKey) {
|
|
573
|
+
throw new AuthenticationError("API key is required");
|
|
574
|
+
}
|
|
575
|
+
if (!config.apiKey.startsWith("ck_")) {
|
|
576
|
+
throw new AuthenticationError(
|
|
577
|
+
'Invalid API key format. API keys must start with "ck_"'
|
|
578
|
+
);
|
|
579
|
+
}
|
|
580
|
+
this.config = {
|
|
581
|
+
apiKey: config.apiKey,
|
|
582
|
+
baseUrl: config.baseUrl ?? "https://api.crawlkit.sh",
|
|
583
|
+
timeout: config.timeout ?? 3e4,
|
|
584
|
+
fetch: config.fetch ?? globalThis.fetch.bind(globalThis)
|
|
585
|
+
};
|
|
586
|
+
const resourceConfig = {
|
|
587
|
+
apiKey: this.config.apiKey,
|
|
588
|
+
baseUrl: this.config.baseUrl,
|
|
589
|
+
timeout: this.config.timeout,
|
|
590
|
+
fetch: this.config.fetch
|
|
591
|
+
};
|
|
592
|
+
this.crawl = new CrawlResource(resourceConfig);
|
|
593
|
+
this.linkedin = new LinkedInResource(resourceConfig);
|
|
594
|
+
this.instagram = new InstagramResource(resourceConfig);
|
|
595
|
+
this.appstore = new AppStoreResource(resourceConfig);
|
|
596
|
+
}
|
|
597
|
+
/**
|
|
598
|
+
* Scrape a URL and return markdown, HTML, metadata, and links
|
|
599
|
+
*
|
|
600
|
+
* @param params - Scrape parameters
|
|
601
|
+
* @returns Scraped page data including markdown, HTML, metadata, and links
|
|
602
|
+
* @throws {CrawlKitError} On API errors
|
|
603
|
+
*
|
|
604
|
+
* @example
|
|
605
|
+
* ```typescript
|
|
606
|
+
* // Basic scraping
|
|
607
|
+
* const result = await crawlkit.scrape({
|
|
608
|
+
* url: 'https://example.com'
|
|
609
|
+
* });
|
|
610
|
+
* console.log(result.markdown);
|
|
611
|
+
* console.log(result.metadata.title);
|
|
612
|
+
*
|
|
613
|
+
* // With browser automation
|
|
614
|
+
* const spaResult = await crawlkit.scrape({
|
|
615
|
+
* url: 'https://example.com/spa',
|
|
616
|
+
* options: {
|
|
617
|
+
* waitFor: '#content-loaded',
|
|
618
|
+
* actions: [
|
|
619
|
+
* { type: 'click', selector: '#load-more' },
|
|
620
|
+
* { type: 'wait', milliseconds: 2000 }
|
|
621
|
+
* ]
|
|
622
|
+
* }
|
|
623
|
+
* });
|
|
624
|
+
* ```
|
|
625
|
+
*
|
|
626
|
+
* @costs 1 credit
|
|
627
|
+
*/
|
|
628
|
+
async scrape(params) {
|
|
629
|
+
return this.crawl.scrape(params);
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* Extract structured data from a URL using AI
|
|
633
|
+
*
|
|
634
|
+
* Uses LLM to extract data according to the provided JSON schema.
|
|
635
|
+
*
|
|
636
|
+
* @param params - Extract parameters including JSON schema
|
|
637
|
+
* @returns Extracted structured data along with page content
|
|
638
|
+
* @throws {CrawlKitError} On API errors
|
|
639
|
+
*
|
|
640
|
+
* @example
|
|
641
|
+
* ```typescript
|
|
642
|
+
* interface Product {
|
|
643
|
+
* name: string;
|
|
644
|
+
* price: number;
|
|
645
|
+
* description: string;
|
|
646
|
+
* inStock: boolean;
|
|
647
|
+
* }
|
|
648
|
+
*
|
|
649
|
+
* const result = await crawlkit.extract<Product>({
|
|
650
|
+
* url: 'https://example.com/product/123',
|
|
651
|
+
* schema: {
|
|
652
|
+
* type: 'object',
|
|
653
|
+
* properties: {
|
|
654
|
+
* name: { type: 'string' },
|
|
655
|
+
* price: { type: 'number' },
|
|
656
|
+
* description: { type: 'string' },
|
|
657
|
+
* inStock: { type: 'boolean' }
|
|
658
|
+
* }
|
|
659
|
+
* },
|
|
660
|
+
* options: {
|
|
661
|
+
* prompt: 'Extract product information from this page'
|
|
662
|
+
* }
|
|
663
|
+
* });
|
|
664
|
+
*
|
|
665
|
+
* // TypeScript knows result.json is Product
|
|
666
|
+
* console.log(result.json.name);
|
|
667
|
+
* console.log(result.json.price);
|
|
668
|
+
* ```
|
|
669
|
+
*
|
|
670
|
+
* @costs 5 credits
|
|
671
|
+
*/
|
|
672
|
+
async extract(params) {
|
|
673
|
+
return this.crawl.extract(params);
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Perform a web search using DuckDuckGo
|
|
677
|
+
*
|
|
678
|
+
* @param params - Search parameters
|
|
679
|
+
* @returns Search results with titles, URLs, and snippets
|
|
680
|
+
* @throws {CrawlKitError} On API errors
|
|
681
|
+
*
|
|
682
|
+
* @example
|
|
683
|
+
* ```typescript
|
|
684
|
+
* const result = await crawlkit.search({
|
|
685
|
+
* query: 'typescript best practices 2024',
|
|
686
|
+
* options: {
|
|
687
|
+
* maxResults: 20,
|
|
688
|
+
* timeRange: 'm', // Past month
|
|
689
|
+
* region: 'us-en'
|
|
690
|
+
* }
|
|
691
|
+
* });
|
|
692
|
+
*
|
|
693
|
+
* for (const item of result.results) {
|
|
694
|
+
* console.log(`${item.position}. ${item.title}`);
|
|
695
|
+
* console.log(` ${item.url}`);
|
|
696
|
+
* console.log(` ${item.snippet}\n`);
|
|
697
|
+
* }
|
|
698
|
+
* ```
|
|
699
|
+
*
|
|
700
|
+
* @costs 1 credit per page (~10 results)
|
|
701
|
+
*/
|
|
702
|
+
async search(params) {
|
|
703
|
+
return this.crawl.search(params);
|
|
704
|
+
}
|
|
705
|
+
/**
|
|
706
|
+
* Take a full-page screenshot of a URL
|
|
707
|
+
*
|
|
708
|
+
* @param params - Screenshot parameters
|
|
709
|
+
* @returns Public URL of the screenshot
|
|
710
|
+
* @throws {CrawlKitError} On API errors
|
|
711
|
+
*
|
|
712
|
+
* @example
|
|
713
|
+
* ```typescript
|
|
714
|
+
* const result = await crawlkit.screenshot({
|
|
715
|
+
* url: 'https://example.com',
|
|
716
|
+
* options: {
|
|
717
|
+
* width: 1920,
|
|
718
|
+
* height: 1080,
|
|
719
|
+
* waitForSelector: '#main-content'
|
|
720
|
+
* }
|
|
721
|
+
* });
|
|
722
|
+
*
|
|
723
|
+
* console.log('Screenshot URL:', result.url);
|
|
724
|
+
* console.log(`Dimensions: ${result.width}x${result.height}`);
|
|
725
|
+
* ```
|
|
726
|
+
*
|
|
727
|
+
* @costs 1 credit
|
|
728
|
+
*/
|
|
729
|
+
async screenshot(params) {
|
|
730
|
+
return this.crawl.screenshot(params);
|
|
731
|
+
}
|
|
732
|
+
};
|
|
733
|
+
|
|
734
|
+
exports.AuthenticationError = AuthenticationError;
|
|
735
|
+
exports.CrawlKit = CrawlKit;
|
|
736
|
+
exports.CrawlKitError = CrawlKitError;
|
|
737
|
+
exports.InsufficientCreditsError = InsufficientCreditsError;
|
|
738
|
+
exports.NetworkError = NetworkError;
|
|
739
|
+
exports.NotFoundError = NotFoundError;
|
|
740
|
+
exports.RateLimitError = RateLimitError;
|
|
741
|
+
exports.TimeoutError = TimeoutError;
|
|
742
|
+
exports.ValidationError = ValidationError;
|
|
743
|
+
exports.createErrorFromResponse = createErrorFromResponse;
|
|
744
|
+
//# sourceMappingURL=index.cjs.map
|
|
745
|
+
//# sourceMappingURL=index.cjs.map
|