npm - @upcrawl/sdk - Versions diffs - 1.0.0 - Mend

@upcrawl/sdk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,266 @@
+# Upcrawl
+Official Node.js/Browser SDK for the [Upcrawl](https://upcrawl.dev) API. Extract data from any website with a single API call.
+## Installation
+```bash
+npm install upcrawl
+```
+Or with yarn:
+```bash
+yarn add upcrawl
+```
+## Quick Start
+```typescript
+import Upcrawl from 'upcrawl';
+// Set your API key (get one at https://upcrawl.dev)
+Upcrawl.setApiKey('uc-your-api-key');
+// Scrape a webpage
+const result = await Upcrawl.scrape({
+  url: 'https://example.com',
+  type: 'markdown'
+});
+console.log(result.markdown);
+```
+## Usage
+### Setting API Key
+The API key must be set before making any requests:
+```typescript
+import Upcrawl from 'upcrawl';
+Upcrawl.setApiKey('uc-your-api-key');
+```
+Or using named imports:
+```typescript
+import { setApiKey } from 'upcrawl';
+setApiKey('uc-your-api-key');
+```
+### Scraping a Single URL
+```typescript
+import Upcrawl from 'upcrawl';
+Upcrawl.setApiKey('uc-your-api-key');
+const result = await Upcrawl.scrape({
+  url: 'https://example.com',
+  type: 'markdown',          // 'markdown' or 'html'
+  onlyMainContent: true,     // Remove nav, ads, footers
+  extractMetadata: true      // Get title, description, etc.
+});
+console.log(result.markdown);
+console.log(result.metadata?.title);
+```
+### Batch Scraping
+Scrape multiple URLs in a single request:
+```typescript
+const result = await Upcrawl.batchScrape({
+  urls: [
+    'https://example.com/page1',
+    'https://example.com/page2',
+    // You can also pass detailed options per URL:
+    { url: 'https://example.com/page3', type: 'html' }
+  ],
+  type: 'markdown'
+});
+console.log(`Scraped ${result.successful} of ${result.total} pages`);
+result.results.forEach(page => {
+  if (page.success) {
+    console.log(`${page.url}: ${page.markdown?.length} chars`);
+  } else {
+    console.log(`${page.url}: Failed - ${page.error}`);
+  }
+});
+```
+### Web Search
+Search the web and get structured results:
+```typescript
+const result = await Upcrawl.search({
+  queries: ['latest AI news 2025'],
+  limit: 10,
+  location: 'US'
+});
+result.results.forEach(queryResult => {
+  console.log(`Query: ${queryResult.query}`);
+  queryResult.results.forEach(item => {
+    console.log(`- ${item.title}`);
+    console.log(`  ${item.url}`);
+  });
+});
+```
+### Domain Filtering
+Filter search results by domain:
+```typescript
+// Only include specific domains
+const result = await Upcrawl.search({
+  queries: ['machine learning tutorials'],
+  includeDomains: ['medium.com', 'towardsdatascience.com']
+});
+// Or exclude domains
+const result2 = await Upcrawl.search({
+  queries: ['javascript frameworks'],
+  excludeDomains: ['pinterest.com', 'quora.com']
+});
+```
+### LLM Summarization
+Ask the API to summarize scraped content:
+```typescript
+const result = await Upcrawl.scrape({
+  url: 'https://example.com/product',
+  type: 'markdown',
+  summary: {
+    query: 'Extract the product name, price, and key features in JSON format'
+  }
+});
+console.log(result.content); // Summarized content
+```
+## Configuration
+### Custom Base URL
+For self-hosted instances or testing:
+```typescript
+Upcrawl.setBaseUrl('https://your-instance.com/v1');
+```
+### Request Timeout
+Set a custom timeout (in milliseconds):
+```typescript
+Upcrawl.setTimeout(60000); // 60 seconds
+```
+### Configure Multiple Options
+```typescript
+Upcrawl.configure({
+  apiKey: 'uc-your-api-key',
+  baseUrl: 'https://api.upcrawl.dev/v1',
+  timeout: 120000
+});
+```
+## Error Handling
+The SDK throws `UpcrawlError` for API errors:
+```typescript
+import Upcrawl, { UpcrawlError } from 'upcrawl';
+try {
+  const result = await Upcrawl.scrape({ url: 'https://example.com' });
+} catch (error) {
+  if (error instanceof UpcrawlError) {
+    console.error(`Error ${error.status}: ${error.message}`);
+    console.error(`Code: ${error.code}`);
+  }
+}
+```
+Common error codes:
+| Status | Code | Description |
+|--------|------|-------------|
+| 401 | `UNAUTHORIZED` | Invalid or missing API key |
+| 403 | `FORBIDDEN` | Access forbidden |
+| 429 | `RATE_LIMIT_EXCEEDED` | Too many requests |
+| 500 | `INTERNAL_ERROR` | Server error |
+## TypeScript Support
+The SDK includes full TypeScript definitions:
+```typescript
+import Upcrawl, {
+  ScrapeOptions,
+  ScrapeResponse,
+  SearchOptions,
+  SearchResponse,
+  BatchScrapeOptions,
+  BatchScrapeResponse
+} from 'upcrawl';
+const options: ScrapeOptions = {
+  url: 'https://example.com',
+  type: 'markdown'
+};
+const result: ScrapeResponse = await Upcrawl.scrape(options);
+```
+## API Reference
+### Methods
+| Method | Description |
+|--------|-------------|
+| `Upcrawl.setApiKey(key)` | Set the API key globally |
+| `Upcrawl.setBaseUrl(url)` | Set custom base URL |
+| `Upcrawl.setTimeout(ms)` | Set request timeout |
+| `Upcrawl.configure(config)` | Configure multiple options |
+| `Upcrawl.scrape(options)` | Scrape a single URL |
+| `Upcrawl.batchScrape(options)` | Scrape multiple URLs |
+| `Upcrawl.search(options)` | Search the web |
+### Scrape Options
+| Option | Type | Description |
+|--------|------|-------------|
+| `url` | `string` | URL to scrape (required) |
+| `type` | `'markdown' \| 'html'` | Output format |
+| `onlyMainContent` | `boolean` | Remove nav, ads, footers |
+| `extractMetadata` | `boolean` | Extract page metadata |
+| `timeoutMs` | `number` | Request timeout (1000-120000) |
+| `waitUntil` | `string` | Page load strategy |
+| `summary` | `{ query: string }` | LLM summarization |
+### Search Options
+| Option | Type | Description |
+|--------|------|-------------|
+| `queries` | `string[]` | Search queries (1-20) |
+| `limit` | `number` | Results per query (1-100) |
+| `location` | `string` | Search location (e.g., 'US') |
+| `includeDomains` | `string[]` | Only include these domains |
+| `excludeDomains` | `string[]` | Exclude these domains |
+## License
+MIT

package/dist/index.d.mts ADDED Viewed

@@ -0,0 +1,373 @@
+/**
+ * Upcrawl SDK Types
+ * Type definitions for all API requests and responses
+ */
+interface UpcrawlConfig {
+    apiKey?: string;
+    baseUrl?: string;
+    timeout?: number;
+}
+interface SummaryQuery {
+    /** Query/instruction for content summarization */
+    query: string;
+}
+interface ScrapeOptions {
+    /** URL to scrape (required) */
+    url: string;
+    /** Output format: html or markdown. Defaults to "html" */
+    type?: 'html' | 'markdown';
+    /** Extract only main content (removes nav, ads, footers). Defaults to true */
+    onlyMainContent?: boolean;
+    /** Whether to extract page metadata */
+    extractMetadata?: boolean;
+    /** Summary query for LLM summarization */
+    summary?: SummaryQuery;
+    /** Custom timeout in milliseconds (1000-120000) */
+    timeoutMs?: number;
+    /** Wait strategy for page load */
+    waitUntil?: 'load' | 'domcontentloaded' | 'networkidle';
+}
+interface ScrapeMetadata {
+    title?: string;
+    description?: string;
+    canonicalUrl?: string;
+    finalUrl?: string;
+    contentType?: string;
+    contentLength?: number;
+}
+interface ScrapeResponse {
+    /** Original URL that was scraped */
+    url: string;
+    /** Rendered HTML content (when type is html) */
+    html?: string | null;
+    /** Content converted to Markdown (when type is markdown) */
+    markdown?: string | null;
+    /** HTTP status code */
+    statusCode: number | null;
+    /** Whether scraping was successful */
+    success: boolean;
+    /** Error message if scraping failed */
+    error?: string;
+    /** ISO timestamp when scraping completed */
+    timestamp: string;
+    /** Time taken to load and render the page in milliseconds */
+    loadTimeMs: number;
+    /** Additional page metadata */
+    metadata?: ScrapeMetadata;
+    /** Number of retry attempts made */
+    retryCount: number;
+    /** Cost in USD for this scrape operation */
+    cost?: number;
+    /** Content after summarization (when summary query provided) */
+    content?: string | null;
+}
+interface BatchScrapeOptions {
+    /** Array of URLs to scrape (strings or detailed request objects) */
+    urls: (string | ScrapeOptions)[];
+    /** Output format: html or markdown */
+    type?: 'html' | 'markdown';
+    /** Extract only main content (removes nav, ads, footers) */
+    onlyMainContent?: boolean;
+    /** Summary query for LLM summarization */
+    summary?: SummaryQuery;
+    /** Global timeout for entire batch operation in milliseconds (10000-600000) */
+    batchTimeoutMs?: number;
+    /** Whether to stop on first error */
+    failFast?: boolean;
+}
+interface BatchScrapeResponse {
+    /** Array of scrape results */
+    results: ScrapeResponse[];
+    /** Total number of URLs processed */
+    total: number;
+    /** Number of successful scrapes */
+    successful: number;
+    /** Number of failed scrapes */
+    failed: number;
+    /** Total time taken for batch operation in milliseconds */
+    totalTimeMs: number;
+    /** Timestamp when batch operation completed */
+    timestamp: string;
+    /** Total cost in USD for all scrape operations */
+    cost?: number;
+}
+interface SearchOptions {
+    /** Array of search queries to execute (1-20) */
+    queries: string[];
+    /** Number of results per query (1-100). Defaults to 10 */
+    limit?: number;
+    /** Location for search (e.g., "IN", "US") */
+    location?: string;
+    /** Domains to include (will add site: to query) */
+    includeDomains?: string[];
+    /** Domains to exclude (will add -site: to query) */
+    excludeDomains?: string[];
+}
+interface SearchResultWeb {
+    /** URL of the search result */
+    url: string;
+    /** Title of the search result */
+    title: string;
+    /** Description/snippet of the search result */
+    description: string;
+}
+interface SearchResultItem {
+    /** The search query */
+    query: string;
+    /** Whether the search was successful */
+    success: boolean;
+    /** Parsed search result links */
+    results: SearchResultWeb[];
+    /** Error message if failed */
+    error?: string;
+    /** Time taken in milliseconds */
+    loadTimeMs?: number;
+    /** Cost in USD for this query */
+    cost?: number;
+}
+interface SearchResponse {
+    /** Array of search results per query */
+    results: SearchResultItem[];
+    /** Total number of queries */
+    total: number;
+    /** Number of successful searches */
+    successful: number;
+    /** Number of failed searches */
+    failed: number;
+    /** Total time in milliseconds */
+    totalTimeMs: number;
+    /** ISO timestamp */
+    timestamp: string;
+    /** Total cost in USD */
+    cost?: number;
+}
+interface UpcrawlErrorResponse {
+    error: {
+        code: string;
+        message: string;
+    };
+    statusCode?: number;
+}
+declare class UpcrawlError extends Error {
+    readonly status: number;
+    readonly code: string;
+    constructor(message: string, status: number, code?: string);
+}
+/**
+ * Upcrawl API Client
+ * Handles all HTTP communication with the Upcrawl API
+ */
+/**
+ * Set the API key globally
+ * @param apiKey - Your Upcrawl API key (starts with 'uc-')
+ */
+declare function setApiKey(apiKey: string): void;
+/**
+ * Set a custom base URL (useful for self-hosted or testing)
+ * @param baseUrl - Custom API base URL
+ */
+declare function setBaseUrl(baseUrl: string): void;
+/**
+ * Set request timeout in milliseconds
+ * @param timeout - Timeout in milliseconds
+ */
+declare function setTimeout(timeout: number): void;
+/**
+ * Configure multiple options at once
+ * @param config - Configuration object
+ */
+declare function configure(config: UpcrawlConfig): void;
+/**
+ * Get current configuration (for debugging)
+ */
+declare function getConfig(): Omit<UpcrawlConfig, 'apiKey'> & {
+    apiKeySet: boolean;
+};
+/**
+ * Reset configuration to defaults
+ */
+declare function resetConfig(): void;
+/**
+ * Scrape a single URL
+ * @param options - Scrape options including the URL to scrape
+ * @returns Promise with scrape response
+ *
+ * @example
+ * ```typescript
+ * import { scrape, setApiKey } from 'upcrawl';
+ *
+ * setApiKey('uc-your-api-key');
+ *
+ * const result = await scrape({
+ *   url: 'https://example.com',
+ *   type: 'markdown',
+ *   onlyMainContent: true
+ * });
+ *
+ * console.log(result.markdown);
+ * ```
+ */
+declare function scrape(options: ScrapeOptions): Promise<ScrapeResponse>;
+/**
+ * Scrape multiple URLs in a batch
+ * @param options - Batch scrape options including URLs to scrape
+ * @returns Promise with batch scrape response
+ *
+ * @example
+ * ```typescript
+ * import { batchScrape, setApiKey } from 'upcrawl';
+ *
+ * setApiKey('uc-your-api-key');
+ *
+ * const result = await batchScrape({
+ *   urls: [
+ *     'https://example.com/page1',
+ *     'https://example.com/page2',
+ *     { url: 'https://example.com/page3', type: 'html' }
+ *   ],
+ *   type: 'markdown'
+ * });
+ *
+ * console.log(`Scraped ${result.successful} of ${result.total} pages`);
+ * ```
+ */
+declare function batchScrape(options: BatchScrapeOptions): Promise<BatchScrapeResponse>;
+/**
+ * Search the web
+ * @param options - Search options including queries
+ * @returns Promise with search response
+ *
+ * @example
+ * ```typescript
+ * import { search, setApiKey } from 'upcrawl';
+ *
+ * setApiKey('uc-your-api-key');
+ *
+ * const result = await search({
+ *   queries: ['latest AI news 2025'],
+ *   limit: 10,
+ *   location: 'US'
+ * });
+ *
+ * result.results.forEach(queryResult => {
+ *   console.log(`Query: ${queryResult.query}`);
+ *   queryResult.results.forEach(item => {
+ *     console.log(`- ${item.title}: ${item.url}`);
+ *   });
+ * });
+ * ```
+ */
+declare function search(options: SearchOptions): Promise<SearchResponse>;
+/**
+ * Upcrawl SDK
+ * Official Node.js/Browser SDK for the Upcrawl API
+ *
+ * @example
+ * ```typescript
+ * // Using the Upcrawl namespace (recommended)
+ * import Upcrawl from 'upcrawl';
+ *
+ * Upcrawl.setApiKey('uc-your-api-key');
+ *
+ * const result = await Upcrawl.scrape({
+ *   url: 'https://example.com',
+ *   type: 'markdown'
+ * });
+ * ```
+ *
+ * @example
+ * ```typescript
+ * // Using named imports
+ * import { setApiKey, scrape, search } from 'upcrawl';
+ *
+ * setApiKey('uc-your-api-key');
+ *
+ * const result = await scrape({ url: 'https://example.com' });
+ * ```
+ */
+/**
+ * Upcrawl namespace object
+ * Provides a convenient way to access all SDK functionality
+ *
+ * @example
+ * ```typescript
+ * import Upcrawl from 'upcrawl';
+ *
+ * // Set API key globally
+ * Upcrawl.setApiKey('uc-your-api-key');
+ *
+ * // Scrape a single URL
+ * const page = await Upcrawl.scrape({
+ *   url: 'https://example.com',
+ *   type: 'markdown'
+ * });
+ *
+ * // Batch scrape multiple URLs
+ * const pages = await Upcrawl.batchScrape({
+ *   urls: ['https://example.com/1', 'https://example.com/2']
+ * });
+ *
+ * // Search the web
+ * const results = await Upcrawl.search({
+ *   queries: ['AI trends 2025']
+ * });
+ * ```
+ */
+declare const Upcrawl: {
+    /**
+     * Set the API key globally
+     * @param apiKey - Your Upcrawl API key (starts with 'uc-')
+     */
+    readonly setApiKey: typeof setApiKey;
+    /**
+     * Set a custom base URL (useful for self-hosted or testing)
+     * @param baseUrl - Custom API base URL
+     */
+    readonly setBaseUrl: typeof setBaseUrl;
+    /**
+     * Set request timeout in milliseconds
+     * @param timeout - Timeout in milliseconds
+     */
+    readonly setTimeout: typeof setTimeout;
+    /**
+     * Configure multiple options at once
+     * @param config - Configuration object
+     */
+    readonly configure: typeof configure;
+    /**
+     * Get current configuration (for debugging)
+     */
+    readonly getConfig: typeof getConfig;
+    /**
+     * Reset configuration to defaults
+     */
+    readonly resetConfig: typeof resetConfig;
+    /**
+     * Scrape a single URL
+     * @param options - Scrape options including the URL to scrape
+     * @returns Promise with scrape response
+     */
+    readonly scrape: typeof scrape;
+    /**
+     * Scrape multiple URLs in a batch
+     * @param options - Batch scrape options including URLs to scrape
+     * @returns Promise with batch scrape response
+     */
+    readonly batchScrape: typeof batchScrape;
+    /**
+     * Search the web
+     * @param options - Search options including queries
+     * @returns Promise with search response
+     */
+    readonly search: typeof search;
+    /**
+     * Error class for Upcrawl API errors
+     */
+    readonly UpcrawlError: typeof UpcrawlError;
+};
+export { type BatchScrapeOptions, type BatchScrapeResponse, type ScrapeMetadata, type ScrapeOptions, type ScrapeResponse, type SearchOptions, type SearchResponse, type SearchResultItem, type SearchResultWeb, type SummaryQuery, type UpcrawlConfig, UpcrawlError, type UpcrawlErrorResponse, batchScrape, configure, Upcrawl as default, getConfig, resetConfig, scrape, search, setApiKey, setBaseUrl, setTimeout };