@upcrawl/sdk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,373 @@
1
+ /**
2
+ * Upcrawl SDK Types
3
+ * Type definitions for all API requests and responses
4
+ */
5
+ interface UpcrawlConfig {
6
+ apiKey?: string;
7
+ baseUrl?: string;
8
+ timeout?: number;
9
+ }
10
+ interface SummaryQuery {
11
+ /** Query/instruction for content summarization */
12
+ query: string;
13
+ }
14
+ interface ScrapeOptions {
15
+ /** URL to scrape (required) */
16
+ url: string;
17
+ /** Output format: html or markdown. Defaults to "html" */
18
+ type?: 'html' | 'markdown';
19
+ /** Extract only main content (removes nav, ads, footers). Defaults to true */
20
+ onlyMainContent?: boolean;
21
+ /** Whether to extract page metadata */
22
+ extractMetadata?: boolean;
23
+ /** Summary query for LLM summarization */
24
+ summary?: SummaryQuery;
25
+ /** Custom timeout in milliseconds (1000-120000) */
26
+ timeoutMs?: number;
27
+ /** Wait strategy for page load */
28
+ waitUntil?: 'load' | 'domcontentloaded' | 'networkidle';
29
+ }
30
+ interface ScrapeMetadata {
31
+ title?: string;
32
+ description?: string;
33
+ canonicalUrl?: string;
34
+ finalUrl?: string;
35
+ contentType?: string;
36
+ contentLength?: number;
37
+ }
38
+ interface ScrapeResponse {
39
+ /** Original URL that was scraped */
40
+ url: string;
41
+ /** Rendered HTML content (when type is html) */
42
+ html?: string | null;
43
+ /** Content converted to Markdown (when type is markdown) */
44
+ markdown?: string | null;
45
+ /** HTTP status code */
46
+ statusCode: number | null;
47
+ /** Whether scraping was successful */
48
+ success: boolean;
49
+ /** Error message if scraping failed */
50
+ error?: string;
51
+ /** ISO timestamp when scraping completed */
52
+ timestamp: string;
53
+ /** Time taken to load and render the page in milliseconds */
54
+ loadTimeMs: number;
55
+ /** Additional page metadata */
56
+ metadata?: ScrapeMetadata;
57
+ /** Number of retry attempts made */
58
+ retryCount: number;
59
+ /** Cost in USD for this scrape operation */
60
+ cost?: number;
61
+ /** Content after summarization (when summary query provided) */
62
+ content?: string | null;
63
+ }
64
+ interface BatchScrapeOptions {
65
+ /** Array of URLs to scrape (strings or detailed request objects) */
66
+ urls: (string | ScrapeOptions)[];
67
+ /** Output format: html or markdown */
68
+ type?: 'html' | 'markdown';
69
+ /** Extract only main content (removes nav, ads, footers) */
70
+ onlyMainContent?: boolean;
71
+ /** Summary query for LLM summarization */
72
+ summary?: SummaryQuery;
73
+ /** Global timeout for entire batch operation in milliseconds (10000-600000) */
74
+ batchTimeoutMs?: number;
75
+ /** Whether to stop on first error */
76
+ failFast?: boolean;
77
+ }
78
+ interface BatchScrapeResponse {
79
+ /** Array of scrape results */
80
+ results: ScrapeResponse[];
81
+ /** Total number of URLs processed */
82
+ total: number;
83
+ /** Number of successful scrapes */
84
+ successful: number;
85
+ /** Number of failed scrapes */
86
+ failed: number;
87
+ /** Total time taken for batch operation in milliseconds */
88
+ totalTimeMs: number;
89
+ /** Timestamp when batch operation completed */
90
+ timestamp: string;
91
+ /** Total cost in USD for all scrape operations */
92
+ cost?: number;
93
+ }
94
+ interface SearchOptions {
95
+ /** Array of search queries to execute (1-20) */
96
+ queries: string[];
97
+ /** Number of results per query (1-100). Defaults to 10 */
98
+ limit?: number;
99
+ /** Location for search (e.g., "IN", "US") */
100
+ location?: string;
101
+ /** Domains to include (will add site: to query) */
102
+ includeDomains?: string[];
103
+ /** Domains to exclude (will add -site: to query) */
104
+ excludeDomains?: string[];
105
+ }
106
+ interface SearchResultWeb {
107
+ /** URL of the search result */
108
+ url: string;
109
+ /** Title of the search result */
110
+ title: string;
111
+ /** Description/snippet of the search result */
112
+ description: string;
113
+ }
114
+ interface SearchResultItem {
115
+ /** The search query */
116
+ query: string;
117
+ /** Whether the search was successful */
118
+ success: boolean;
119
+ /** Parsed search result links */
120
+ results: SearchResultWeb[];
121
+ /** Error message if failed */
122
+ error?: string;
123
+ /** Time taken in milliseconds */
124
+ loadTimeMs?: number;
125
+ /** Cost in USD for this query */
126
+ cost?: number;
127
+ }
128
+ interface SearchResponse {
129
+ /** Array of search results per query */
130
+ results: SearchResultItem[];
131
+ /** Total number of queries */
132
+ total: number;
133
+ /** Number of successful searches */
134
+ successful: number;
135
+ /** Number of failed searches */
136
+ failed: number;
137
+ /** Total time in milliseconds */
138
+ totalTimeMs: number;
139
+ /** ISO timestamp */
140
+ timestamp: string;
141
+ /** Total cost in USD */
142
+ cost?: number;
143
+ }
144
+ interface UpcrawlErrorResponse {
145
+ error: {
146
+ code: string;
147
+ message: string;
148
+ };
149
+ statusCode?: number;
150
+ }
151
+ declare class UpcrawlError extends Error {
152
+ readonly status: number;
153
+ readonly code: string;
154
+ constructor(message: string, status: number, code?: string);
155
+ }
156
+
157
+ /**
158
+ * Upcrawl API Client
159
+ * Handles all HTTP communication with the Upcrawl API
160
+ */
161
+
162
+ /**
163
+ * Set the API key globally
164
+ * @param apiKey - Your Upcrawl API key (starts with 'uc-')
165
+ */
166
+ declare function setApiKey(apiKey: string): void;
167
+ /**
168
+ * Set a custom base URL (useful for self-hosted or testing)
169
+ * @param baseUrl - Custom API base URL
170
+ */
171
+ declare function setBaseUrl(baseUrl: string): void;
172
+ /**
173
+ * Set request timeout in milliseconds
174
+ * @param timeout - Timeout in milliseconds
175
+ */
176
+ declare function setTimeout(timeout: number): void;
177
+ /**
178
+ * Configure multiple options at once
179
+ * @param config - Configuration object
180
+ */
181
+ declare function configure(config: UpcrawlConfig): void;
182
+ /**
183
+ * Get current configuration (for debugging)
184
+ */
185
+ declare function getConfig(): Omit<UpcrawlConfig, 'apiKey'> & {
186
+ apiKeySet: boolean;
187
+ };
188
+ /**
189
+ * Reset configuration to defaults
190
+ */
191
+ declare function resetConfig(): void;
192
+ /**
193
+ * Scrape a single URL
194
+ * @param options - Scrape options including the URL to scrape
195
+ * @returns Promise with scrape response
196
+ *
197
+ * @example
198
+ * ```typescript
199
+ * import { scrape, setApiKey } from 'upcrawl';
200
+ *
201
+ * setApiKey('uc-your-api-key');
202
+ *
203
+ * const result = await scrape({
204
+ * url: 'https://example.com',
205
+ * type: 'markdown',
206
+ * onlyMainContent: true
207
+ * });
208
+ *
209
+ * console.log(result.markdown);
210
+ * ```
211
+ */
212
+ declare function scrape(options: ScrapeOptions): Promise<ScrapeResponse>;
213
+ /**
214
+ * Scrape multiple URLs in a batch
215
+ * @param options - Batch scrape options including URLs to scrape
216
+ * @returns Promise with batch scrape response
217
+ *
218
+ * @example
219
+ * ```typescript
220
+ * import { batchScrape, setApiKey } from 'upcrawl';
221
+ *
222
+ * setApiKey('uc-your-api-key');
223
+ *
224
+ * const result = await batchScrape({
225
+ * urls: [
226
+ * 'https://example.com/page1',
227
+ * 'https://example.com/page2',
228
+ * { url: 'https://example.com/page3', type: 'html' }
229
+ * ],
230
+ * type: 'markdown'
231
+ * });
232
+ *
233
+ * console.log(`Scraped ${result.successful} of ${result.total} pages`);
234
+ * ```
235
+ */
236
+ declare function batchScrape(options: BatchScrapeOptions): Promise<BatchScrapeResponse>;
237
+ /**
238
+ * Search the web
239
+ * @param options - Search options including queries
240
+ * @returns Promise with search response
241
+ *
242
+ * @example
243
+ * ```typescript
244
+ * import { search, setApiKey } from 'upcrawl';
245
+ *
246
+ * setApiKey('uc-your-api-key');
247
+ *
248
+ * const result = await search({
249
+ * queries: ['latest AI news 2025'],
250
+ * limit: 10,
251
+ * location: 'US'
252
+ * });
253
+ *
254
+ * result.results.forEach(queryResult => {
255
+ * console.log(`Query: ${queryResult.query}`);
256
+ * queryResult.results.forEach(item => {
257
+ * console.log(`- ${item.title}: ${item.url}`);
258
+ * });
259
+ * });
260
+ * ```
261
+ */
262
+ declare function search(options: SearchOptions): Promise<SearchResponse>;
263
+
264
+ /**
265
+ * Upcrawl SDK
266
+ * Official Node.js/Browser SDK for the Upcrawl API
267
+ *
268
+ * @example
269
+ * ```typescript
270
+ * // Using the Upcrawl namespace (recommended)
271
+ * import Upcrawl from 'upcrawl';
272
+ *
273
+ * Upcrawl.setApiKey('uc-your-api-key');
274
+ *
275
+ * const result = await Upcrawl.scrape({
276
+ * url: 'https://example.com',
277
+ * type: 'markdown'
278
+ * });
279
+ * ```
280
+ *
281
+ * @example
282
+ * ```typescript
283
+ * // Using named imports
284
+ * import { setApiKey, scrape, search } from 'upcrawl';
285
+ *
286
+ * setApiKey('uc-your-api-key');
287
+ *
288
+ * const result = await scrape({ url: 'https://example.com' });
289
+ * ```
290
+ */
291
+
292
+ /**
293
+ * Upcrawl namespace object
294
+ * Provides a convenient way to access all SDK functionality
295
+ *
296
+ * @example
297
+ * ```typescript
298
+ * import Upcrawl from 'upcrawl';
299
+ *
300
+ * // Set API key globally
301
+ * Upcrawl.setApiKey('uc-your-api-key');
302
+ *
303
+ * // Scrape a single URL
304
+ * const page = await Upcrawl.scrape({
305
+ * url: 'https://example.com',
306
+ * type: 'markdown'
307
+ * });
308
+ *
309
+ * // Batch scrape multiple URLs
310
+ * const pages = await Upcrawl.batchScrape({
311
+ * urls: ['https://example.com/1', 'https://example.com/2']
312
+ * });
313
+ *
314
+ * // Search the web
315
+ * const results = await Upcrawl.search({
316
+ * queries: ['AI trends 2025']
317
+ * });
318
+ * ```
319
+ */
320
+ declare const Upcrawl: {
321
+ /**
322
+ * Set the API key globally
323
+ * @param apiKey - Your Upcrawl API key (starts with 'uc-')
324
+ */
325
+ readonly setApiKey: typeof setApiKey;
326
+ /**
327
+ * Set a custom base URL (useful for self-hosted or testing)
328
+ * @param baseUrl - Custom API base URL
329
+ */
330
+ readonly setBaseUrl: typeof setBaseUrl;
331
+ /**
332
+ * Set request timeout in milliseconds
333
+ * @param timeout - Timeout in milliseconds
334
+ */
335
+ readonly setTimeout: typeof setTimeout;
336
+ /**
337
+ * Configure multiple options at once
338
+ * @param config - Configuration object
339
+ */
340
+ readonly configure: typeof configure;
341
+ /**
342
+ * Get current configuration (for debugging)
343
+ */
344
+ readonly getConfig: typeof getConfig;
345
+ /**
346
+ * Reset configuration to defaults
347
+ */
348
+ readonly resetConfig: typeof resetConfig;
349
+ /**
350
+ * Scrape a single URL
351
+ * @param options - Scrape options including the URL to scrape
352
+ * @returns Promise with scrape response
353
+ */
354
+ readonly scrape: typeof scrape;
355
+ /**
356
+ * Scrape multiple URLs in a batch
357
+ * @param options - Batch scrape options including URLs to scrape
358
+ * @returns Promise with batch scrape response
359
+ */
360
+ readonly batchScrape: typeof batchScrape;
361
+ /**
362
+ * Search the web
363
+ * @param options - Search options including queries
364
+ * @returns Promise with search response
365
+ */
366
+ readonly search: typeof search;
367
+ /**
368
+ * Error class for Upcrawl API errors
369
+ */
370
+ readonly UpcrawlError: typeof UpcrawlError;
371
+ };
372
+
373
+ export { type BatchScrapeOptions, type BatchScrapeResponse, type ScrapeMetadata, type ScrapeOptions, type ScrapeResponse, type SearchOptions, type SearchResponse, type SearchResultItem, type SearchResultWeb, type SummaryQuery, type UpcrawlConfig, UpcrawlError, type UpcrawlErrorResponse, batchScrape, configure, Upcrawl as default, getConfig, resetConfig, scrape, search, setApiKey, setBaseUrl, setTimeout };
package/dist/index.js ADDED
@@ -0,0 +1,252 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ UpcrawlError: () => UpcrawlError,
34
+ batchScrape: () => batchScrape,
35
+ configure: () => configure,
36
+ default: () => index_default,
37
+ getConfig: () => getConfig,
38
+ resetConfig: () => resetConfig,
39
+ scrape: () => scrape,
40
+ search: () => search,
41
+ setApiKey: () => setApiKey,
42
+ setBaseUrl: () => setBaseUrl,
43
+ setTimeout: () => setTimeout
44
+ });
45
+ module.exports = __toCommonJS(index_exports);
46
+
47
+ // src/types.ts
48
+ var UpcrawlError = class extends Error {
49
+ constructor(message, status, code = "UNKNOWN_ERROR") {
50
+ super(message);
51
+ this.name = "UpcrawlError";
52
+ this.status = status;
53
+ this.code = code;
54
+ }
55
+ };
56
+
57
+ // src/client.ts
58
+ var import_axios = __toESM(require("axios"));
59
+ var DEFAULT_BASE_URL = "https://api.upcrawl.dev/api/v1";
60
+ var DEFAULT_TIMEOUT = 12e4;
61
+ var globalConfig = {
62
+ apiKey: void 0,
63
+ baseUrl: DEFAULT_BASE_URL,
64
+ timeout: DEFAULT_TIMEOUT
65
+ };
66
+ function createClient() {
67
+ if (!globalConfig.apiKey) {
68
+ throw new UpcrawlError(
69
+ "API key not set. Call Upcrawl.setApiKey(apiKey) before making requests.",
70
+ 401,
71
+ "API_KEY_NOT_SET"
72
+ );
73
+ }
74
+ return import_axios.default.create({
75
+ baseURL: globalConfig.baseUrl,
76
+ timeout: globalConfig.timeout,
77
+ headers: {
78
+ "Content-Type": "application/json",
79
+ "Authorization": `Bearer ${globalConfig.apiKey}`
80
+ }
81
+ });
82
+ }
83
+ function handleError(error) {
84
+ if (import_axios.default.isAxiosError(error)) {
85
+ const axiosError = error;
86
+ const status = axiosError.response?.status || 500;
87
+ const data = axiosError.response?.data;
88
+ if (data?.error) {
89
+ throw new UpcrawlError(data.error.message, status, data.error.code);
90
+ }
91
+ switch (status) {
92
+ case 401:
93
+ throw new UpcrawlError("Invalid or missing API key", 401, "UNAUTHORIZED");
94
+ case 403:
95
+ throw new UpcrawlError("Access forbidden", 403, "FORBIDDEN");
96
+ case 404:
97
+ throw new UpcrawlError("Resource not found", 404, "NOT_FOUND");
98
+ case 429:
99
+ throw new UpcrawlError("Rate limit exceeded", 429, "RATE_LIMIT_EXCEEDED");
100
+ case 500:
101
+ throw new UpcrawlError("Internal server error", 500, "INTERNAL_ERROR");
102
+ case 503:
103
+ throw new UpcrawlError("Service unavailable", 503, "SERVICE_UNAVAILABLE");
104
+ default:
105
+ throw new UpcrawlError(
106
+ axiosError.message || "An unknown error occurred",
107
+ status,
108
+ "UNKNOWN_ERROR"
109
+ );
110
+ }
111
+ }
112
+ if (error instanceof UpcrawlError) {
113
+ throw error;
114
+ }
115
+ throw new UpcrawlError(
116
+ error instanceof Error ? error.message : "An unknown error occurred",
117
+ 500,
118
+ "UNKNOWN_ERROR"
119
+ );
120
+ }
121
+ function setApiKey(apiKey) {
122
+ if (!apiKey || typeof apiKey !== "string") {
123
+ throw new UpcrawlError("API key must be a non-empty string", 400, "INVALID_API_KEY");
124
+ }
125
+ globalConfig.apiKey = apiKey;
126
+ }
127
+ function setBaseUrl(baseUrl) {
128
+ if (!baseUrl || typeof baseUrl !== "string") {
129
+ throw new UpcrawlError("Base URL must be a non-empty string", 400, "INVALID_BASE_URL");
130
+ }
131
+ globalConfig.baseUrl = baseUrl.replace(/\/$/, "");
132
+ }
133
+ function setTimeout(timeout) {
134
+ if (typeof timeout !== "number" || timeout < 1e3) {
135
+ throw new UpcrawlError("Timeout must be a number >= 1000ms", 400, "INVALID_TIMEOUT");
136
+ }
137
+ globalConfig.timeout = timeout;
138
+ }
139
+ function configure(config) {
140
+ if (config.apiKey) setApiKey(config.apiKey);
141
+ if (config.baseUrl) setBaseUrl(config.baseUrl);
142
+ if (config.timeout) setTimeout(config.timeout);
143
+ }
144
+ function getConfig() {
145
+ return {
146
+ apiKeySet: !!globalConfig.apiKey,
147
+ baseUrl: globalConfig.baseUrl,
148
+ timeout: globalConfig.timeout
149
+ };
150
+ }
151
+ function resetConfig() {
152
+ globalConfig = {
153
+ apiKey: void 0,
154
+ baseUrl: DEFAULT_BASE_URL,
155
+ timeout: DEFAULT_TIMEOUT
156
+ };
157
+ }
158
+ async function scrape(options) {
159
+ try {
160
+ const client = createClient();
161
+ const response = await client.post("/scrape/single", options);
162
+ return response.data;
163
+ } catch (error) {
164
+ handleError(error);
165
+ }
166
+ }
167
+ async function batchScrape(options) {
168
+ try {
169
+ const client = createClient();
170
+ const response = await client.post("/scrape/batch", options);
171
+ return response.data;
172
+ } catch (error) {
173
+ handleError(error);
174
+ }
175
+ }
176
+ async function search(options) {
177
+ try {
178
+ const client = createClient();
179
+ const response = await client.post("/search", options);
180
+ return response.data;
181
+ } catch (error) {
182
+ handleError(error);
183
+ }
184
+ }
185
+
186
+ // src/index.ts
187
+ var Upcrawl = {
188
+ /**
189
+ * Set the API key globally
190
+ * @param apiKey - Your Upcrawl API key (starts with 'uc-')
191
+ */
192
+ setApiKey,
193
+ /**
194
+ * Set a custom base URL (useful for self-hosted or testing)
195
+ * @param baseUrl - Custom API base URL
196
+ */
197
+ setBaseUrl,
198
+ /**
199
+ * Set request timeout in milliseconds
200
+ * @param timeout - Timeout in milliseconds
201
+ */
202
+ setTimeout,
203
+ /**
204
+ * Configure multiple options at once
205
+ * @param config - Configuration object
206
+ */
207
+ configure,
208
+ /**
209
+ * Get current configuration (for debugging)
210
+ */
211
+ getConfig,
212
+ /**
213
+ * Reset configuration to defaults
214
+ */
215
+ resetConfig,
216
+ /**
217
+ * Scrape a single URL
218
+ * @param options - Scrape options including the URL to scrape
219
+ * @returns Promise with scrape response
220
+ */
221
+ scrape,
222
+ /**
223
+ * Scrape multiple URLs in a batch
224
+ * @param options - Batch scrape options including URLs to scrape
225
+ * @returns Promise with batch scrape response
226
+ */
227
+ batchScrape,
228
+ /**
229
+ * Search the web
230
+ * @param options - Search options including queries
231
+ * @returns Promise with search response
232
+ */
233
+ search,
234
+ /**
235
+ * Error class for Upcrawl API errors
236
+ */
237
+ UpcrawlError
238
+ };
239
+ var index_default = Upcrawl;
240
+ // Annotate the CommonJS export names for ESM import in node:
241
+ 0 && (module.exports = {
242
+ UpcrawlError,
243
+ batchScrape,
244
+ configure,
245
+ getConfig,
246
+ resetConfig,
247
+ scrape,
248
+ search,
249
+ setApiKey,
250
+ setBaseUrl,
251
+ setTimeout
252
+ });