npm - @upcrawl/sdk - Versions diffs - 1.0.0 - Mend

@upcrawl/sdk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,373 @@
+/**
+ * Upcrawl SDK Types
+ * Type definitions for all API requests and responses
+ */
+interface UpcrawlConfig {
+    apiKey?: string;
+    baseUrl?: string;
+    timeout?: number;
+}
+interface SummaryQuery {
+    /** Query/instruction for content summarization */
+    query: string;
+}
+interface ScrapeOptions {
+    /** URL to scrape (required) */
+    url: string;
+    /** Output format: html or markdown. Defaults to "html" */
+    type?: 'html' | 'markdown';
+    /** Extract only main content (removes nav, ads, footers). Defaults to true */
+    onlyMainContent?: boolean;
+    /** Whether to extract page metadata */
+    extractMetadata?: boolean;
+    /** Summary query for LLM summarization */
+    summary?: SummaryQuery;
+    /** Custom timeout in milliseconds (1000-120000) */
+    timeoutMs?: number;
+    /** Wait strategy for page load */
+    waitUntil?: 'load' | 'domcontentloaded' | 'networkidle';
+}
+interface ScrapeMetadata {
+    title?: string;
+    description?: string;
+    canonicalUrl?: string;
+    finalUrl?: string;
+    contentType?: string;
+    contentLength?: number;
+}
+interface ScrapeResponse {
+    /** Original URL that was scraped */
+    url: string;
+    /** Rendered HTML content (when type is html) */
+    html?: string | null;
+    /** Content converted to Markdown (when type is markdown) */
+    markdown?: string | null;
+    /** HTTP status code */
+    statusCode: number | null;
+    /** Whether scraping was successful */
+    success: boolean;
+    /** Error message if scraping failed */
+    error?: string;
+    /** ISO timestamp when scraping completed */
+    timestamp: string;
+    /** Time taken to load and render the page in milliseconds */
+    loadTimeMs: number;
+    /** Additional page metadata */
+    metadata?: ScrapeMetadata;
+    /** Number of retry attempts made */
+    retryCount: number;
+    /** Cost in USD for this scrape operation */
+    cost?: number;
+    /** Content after summarization (when summary query provided) */
+    content?: string | null;
+}
+interface BatchScrapeOptions {
+    /** Array of URLs to scrape (strings or detailed request objects) */
+    urls: (string | ScrapeOptions)[];
+    /** Output format: html or markdown */
+    type?: 'html' | 'markdown';
+    /** Extract only main content (removes nav, ads, footers) */
+    onlyMainContent?: boolean;
+    /** Summary query for LLM summarization */
+    summary?: SummaryQuery;
+    /** Global timeout for entire batch operation in milliseconds (10000-600000) */
+    batchTimeoutMs?: number;
+    /** Whether to stop on first error */
+    failFast?: boolean;
+}
+interface BatchScrapeResponse {
+    /** Array of scrape results */
+    results: ScrapeResponse[];
+    /** Total number of URLs processed */
+    total: number;
+    /** Number of successful scrapes */
+    successful: number;
+    /** Number of failed scrapes */
+    failed: number;
+    /** Total time taken for batch operation in milliseconds */
+    totalTimeMs: number;
+    /** Timestamp when batch operation completed */
+    timestamp: string;
+    /** Total cost in USD for all scrape operations */
+    cost?: number;
+}
+interface SearchOptions {
+    /** Array of search queries to execute (1-20) */
+    queries: string[];
+    /** Number of results per query (1-100). Defaults to 10 */
+    limit?: number;
+    /** Location for search (e.g., "IN", "US") */
+    location?: string;
+    /** Domains to include (will add site: to query) */
+    includeDomains?: string[];
+    /** Domains to exclude (will add -site: to query) */
+    excludeDomains?: string[];
+}
+interface SearchResultWeb {
+    /** URL of the search result */
+    url: string;
+    /** Title of the search result */
+    title: string;
+    /** Description/snippet of the search result */
+    description: string;
+}
+interface SearchResultItem {
+    /** The search query */
+    query: string;
+    /** Whether the search was successful */
+    success: boolean;
+    /** Parsed search result links */
+    results: SearchResultWeb[];
+    /** Error message if failed */
+    error?: string;
+    /** Time taken in milliseconds */
+    loadTimeMs?: number;
+    /** Cost in USD for this query */
+    cost?: number;
+}
+interface SearchResponse {
+    /** Array of search results per query */
+    results: SearchResultItem[];
+    /** Total number of queries */
+    total: number;
+    /** Number of successful searches */
+    successful: number;
+    /** Number of failed searches */
+    failed: number;
+    /** Total time in milliseconds */
+    totalTimeMs: number;
+    /** ISO timestamp */
+    timestamp: string;
+    /** Total cost in USD */
+    cost?: number;
+}
+interface UpcrawlErrorResponse {
+    error: {
+        code: string;
+        message: string;
+    };
+    statusCode?: number;
+}
+declare class UpcrawlError extends Error {
+    readonly status: number;
+    readonly code: string;
+    constructor(message: string, status: number, code?: string);
+}
+/**
+ * Upcrawl API Client
+ * Handles all HTTP communication with the Upcrawl API
+ */
+/**
+ * Set the API key globally
+ * @param apiKey - Your Upcrawl API key (starts with 'uc-')
+ */
+declare function setApiKey(apiKey: string): void;
+/**
+ * Set a custom base URL (useful for self-hosted or testing)
+ * @param baseUrl - Custom API base URL
+ */
+declare function setBaseUrl(baseUrl: string): void;
+/**
+ * Set request timeout in milliseconds
+ * @param timeout - Timeout in milliseconds
+ */
+declare function setTimeout(timeout: number): void;
+/**
+ * Configure multiple options at once
+ * @param config - Configuration object
+ */
+declare function configure(config: UpcrawlConfig): void;
+/**
+ * Get current configuration (for debugging)
+ */
+declare function getConfig(): Omit<UpcrawlConfig, 'apiKey'> & {
+    apiKeySet: boolean;
+};
+/**
+ * Reset configuration to defaults
+ */
+declare function resetConfig(): void;
+/**
+ * Scrape a single URL
+ * @param options - Scrape options including the URL to scrape
+ * @returns Promise with scrape response
+ *
+ * @example
+ * ```typescript
+ * import { scrape, setApiKey } from 'upcrawl';
+ *
+ * setApiKey('uc-your-api-key');
+ *
+ * const result = await scrape({
+ *   url: 'https://example.com',
+ *   type: 'markdown',
+ *   onlyMainContent: true
+ * });
+ *
+ * console.log(result.markdown);
+ * ```
+ */
+declare function scrape(options: ScrapeOptions): Promise<ScrapeResponse>;
+/**
+ * Scrape multiple URLs in a batch
+ * @param options - Batch scrape options including URLs to scrape
+ * @returns Promise with batch scrape response
+ *
+ * @example
+ * ```typescript
+ * import { batchScrape, setApiKey } from 'upcrawl';
+ *
+ * setApiKey('uc-your-api-key');
+ *
+ * const result = await batchScrape({
+ *   urls: [
+ *     'https://example.com/page1',
+ *     'https://example.com/page2',
+ *     { url: 'https://example.com/page3', type: 'html' }
+ *   ],
+ *   type: 'markdown'
+ * });
+ *
+ * console.log(`Scraped ${result.successful} of ${result.total} pages`);
+ * ```
+ */
+declare function batchScrape(options: BatchScrapeOptions): Promise<BatchScrapeResponse>;
+/**
+ * Search the web
+ * @param options - Search options including queries
+ * @returns Promise with search response
+ *
+ * @example
+ * ```typescript
+ * import { search, setApiKey } from 'upcrawl';
+ *
+ * setApiKey('uc-your-api-key');
+ *
+ * const result = await search({
+ *   queries: ['latest AI news 2025'],
+ *   limit: 10,
+ *   location: 'US'
+ * });
+ *
+ * result.results.forEach(queryResult => {
+ *   console.log(`Query: ${queryResult.query}`);
+ *   queryResult.results.forEach(item => {
+ *     console.log(`- ${item.title}: ${item.url}`);
+ *   });
+ * });
+ * ```
+ */
+declare function search(options: SearchOptions): Promise<SearchResponse>;
+/**
+ * Upcrawl SDK
+ * Official Node.js/Browser SDK for the Upcrawl API
+ *
+ * @example
+ * ```typescript
+ * // Using the Upcrawl namespace (recommended)
+ * import Upcrawl from 'upcrawl';
+ *
+ * Upcrawl.setApiKey('uc-your-api-key');
+ *
+ * const result = await Upcrawl.scrape({
+ *   url: 'https://example.com',
+ *   type: 'markdown'
+ * });
+ * ```
+ *
+ * @example
+ * ```typescript
+ * // Using named imports
+ * import { setApiKey, scrape, search } from 'upcrawl';
+ *
+ * setApiKey('uc-your-api-key');
+ *
+ * const result = await scrape({ url: 'https://example.com' });
+ * ```
+ */
+/**
+ * Upcrawl namespace object
+ * Provides a convenient way to access all SDK functionality
+ *
+ * @example
+ * ```typescript
+ * import Upcrawl from 'upcrawl';
+ *
+ * // Set API key globally
+ * Upcrawl.setApiKey('uc-your-api-key');
+ *
+ * // Scrape a single URL
+ * const page = await Upcrawl.scrape({
+ *   url: 'https://example.com',
+ *   type: 'markdown'
+ * });
+ *
+ * // Batch scrape multiple URLs
+ * const pages = await Upcrawl.batchScrape({
+ *   urls: ['https://example.com/1', 'https://example.com/2']
+ * });
+ *
+ * // Search the web
+ * const results = await Upcrawl.search({
+ *   queries: ['AI trends 2025']
+ * });
+ * ```
+ */
+declare const Upcrawl: {
+    /**
+     * Set the API key globally
+     * @param apiKey - Your Upcrawl API key (starts with 'uc-')
+     */
+    readonly setApiKey: typeof setApiKey;
+    /**
+     * Set a custom base URL (useful for self-hosted or testing)
+     * @param baseUrl - Custom API base URL
+     */
+    readonly setBaseUrl: typeof setBaseUrl;
+    /**
+     * Set request timeout in milliseconds
+     * @param timeout - Timeout in milliseconds
+     */
+    readonly setTimeout: typeof setTimeout;
+    /**
+     * Configure multiple options at once
+     * @param config - Configuration object
+     */
+    readonly configure: typeof configure;
+    /**
+     * Get current configuration (for debugging)
+     */
+    readonly getConfig: typeof getConfig;
+    /**
+     * Reset configuration to defaults
+     */
+    readonly resetConfig: typeof resetConfig;
+    /**
+     * Scrape a single URL
+     * @param options - Scrape options including the URL to scrape
+     * @returns Promise with scrape response
+     */
+    readonly scrape: typeof scrape;
+    /**
+     * Scrape multiple URLs in a batch
+     * @param options - Batch scrape options including URLs to scrape
+     * @returns Promise with batch scrape response
+     */
+    readonly batchScrape: typeof batchScrape;
+    /**
+     * Search the web
+     * @param options - Search options including queries
+     * @returns Promise with search response
+     */
+    readonly search: typeof search;
+    /**
+     * Error class for Upcrawl API errors
+     */
+    readonly UpcrawlError: typeof UpcrawlError;
+};
+export { type BatchScrapeOptions, type BatchScrapeResponse, type ScrapeMetadata, type ScrapeOptions, type ScrapeResponse, type SearchOptions, type SearchResponse, type SearchResultItem, type SearchResultWeb, type SummaryQuery, type UpcrawlConfig, UpcrawlError, type UpcrawlErrorResponse, batchScrape, configure, Upcrawl as default, getConfig, resetConfig, scrape, search, setApiKey, setBaseUrl, setTimeout };

package/dist/index.js ADDED Viewed

@@ -0,0 +1,252 @@
+"use strict";
+var __create = Object.create;
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/index.ts
+var index_exports = {};
+__export(index_exports, {
+  UpcrawlError: () => UpcrawlError,
+  batchScrape: () => batchScrape,
+  configure: () => configure,
+  default: () => index_default,
+  getConfig: () => getConfig,
+  resetConfig: () => resetConfig,
+  scrape: () => scrape,
+  search: () => search,
+  setApiKey: () => setApiKey,
+  setBaseUrl: () => setBaseUrl,
+  setTimeout: () => setTimeout
+});
+module.exports = __toCommonJS(index_exports);
+// src/types.ts
+var UpcrawlError = class extends Error {
+  constructor(message, status, code = "UNKNOWN_ERROR") {
+    super(message);
+    this.name = "UpcrawlError";
+    this.status = status;
+    this.code = code;
+  }
+};
+// src/client.ts
+var import_axios = __toESM(require("axios"));
+var DEFAULT_BASE_URL = "https://api.upcrawl.dev/api/v1";
+var DEFAULT_TIMEOUT = 12e4;
+var globalConfig = {
+  apiKey: void 0,
+  baseUrl: DEFAULT_BASE_URL,
+  timeout: DEFAULT_TIMEOUT
+};
+function createClient() {
+  if (!globalConfig.apiKey) {
+    throw new UpcrawlError(
+      "API key not set. Call Upcrawl.setApiKey(apiKey) before making requests.",
+      401,
+      "API_KEY_NOT_SET"
+    );
+  }
+  return import_axios.default.create({
+    baseURL: globalConfig.baseUrl,
+    timeout: globalConfig.timeout,
+    headers: {
+      "Content-Type": "application/json",
+      "Authorization": `Bearer ${globalConfig.apiKey}`
+    }
+  });
+}
+function handleError(error) {
+  if (import_axios.default.isAxiosError(error)) {
+    const axiosError = error;
+    const status = axiosError.response?.status || 500;
+    const data = axiosError.response?.data;
+    if (data?.error) {
+      throw new UpcrawlError(data.error.message, status, data.error.code);
+    }
+    switch (status) {
+      case 401:
+        throw new UpcrawlError("Invalid or missing API key", 401, "UNAUTHORIZED");
+      case 403:
+        throw new UpcrawlError("Access forbidden", 403, "FORBIDDEN");
+      case 404:
+        throw new UpcrawlError("Resource not found", 404, "NOT_FOUND");
+      case 429:
+        throw new UpcrawlError("Rate limit exceeded", 429, "RATE_LIMIT_EXCEEDED");
+      case 500:
+        throw new UpcrawlError("Internal server error", 500, "INTERNAL_ERROR");
+      case 503:
+        throw new UpcrawlError("Service unavailable", 503, "SERVICE_UNAVAILABLE");
+      default:
+        throw new UpcrawlError(
+          axiosError.message || "An unknown error occurred",
+          status,
+          "UNKNOWN_ERROR"
+        );
+    }
+  }
+  if (error instanceof UpcrawlError) {
+    throw error;
+  }
+  throw new UpcrawlError(
+    error instanceof Error ? error.message : "An unknown error occurred",
+    500,
+    "UNKNOWN_ERROR"
+  );
+}
+function setApiKey(apiKey) {
+  if (!apiKey || typeof apiKey !== "string") {
+    throw new UpcrawlError("API key must be a non-empty string", 400, "INVALID_API_KEY");
+  }
+  globalConfig.apiKey = apiKey;
+}
+function setBaseUrl(baseUrl) {
+  if (!baseUrl || typeof baseUrl !== "string") {
+    throw new UpcrawlError("Base URL must be a non-empty string", 400, "INVALID_BASE_URL");
+  }
+  globalConfig.baseUrl = baseUrl.replace(/\/$/, "");
+}
+function setTimeout(timeout) {
+  if (typeof timeout !== "number" || timeout < 1e3) {
+    throw new UpcrawlError("Timeout must be a number >= 1000ms", 400, "INVALID_TIMEOUT");
+  }
+  globalConfig.timeout = timeout;
+}
+function configure(config) {
+  if (config.apiKey) setApiKey(config.apiKey);
+  if (config.baseUrl) setBaseUrl(config.baseUrl);
+  if (config.timeout) setTimeout(config.timeout);
+}
+function getConfig() {
+  return {
+    apiKeySet: !!globalConfig.apiKey,
+    baseUrl: globalConfig.baseUrl,
+    timeout: globalConfig.timeout
+  };
+}
+function resetConfig() {
+  globalConfig = {
+    apiKey: void 0,
+    baseUrl: DEFAULT_BASE_URL,
+    timeout: DEFAULT_TIMEOUT
+  };
+}
+async function scrape(options) {
+  try {
+    const client = createClient();
+    const response = await client.post("/scrape/single", options);
+    return response.data;
+  } catch (error) {
+    handleError(error);
+  }
+}
+async function batchScrape(options) {
+  try {
+    const client = createClient();
+    const response = await client.post("/scrape/batch", options);
+    return response.data;
+  } catch (error) {
+    handleError(error);
+  }
+}
+async function search(options) {
+  try {
+    const client = createClient();
+    const response = await client.post("/search", options);
+    return response.data;
+  } catch (error) {
+    handleError(error);
+  }
+}
+// src/index.ts
+var Upcrawl = {
+  /**
+   * Set the API key globally
+   * @param apiKey - Your Upcrawl API key (starts with 'uc-')
+   */
+  setApiKey,
+  /**
+   * Set a custom base URL (useful for self-hosted or testing)
+   * @param baseUrl - Custom API base URL
+   */
+  setBaseUrl,
+  /**
+   * Set request timeout in milliseconds
+   * @param timeout - Timeout in milliseconds
+   */
+  setTimeout,
+  /**
+   * Configure multiple options at once
+   * @param config - Configuration object
+   */
+  configure,
+  /**
+   * Get current configuration (for debugging)
+   */
+  getConfig,
+  /**
+   * Reset configuration to defaults
+   */
+  resetConfig,
+  /**
+   * Scrape a single URL
+   * @param options - Scrape options including the URL to scrape
+   * @returns Promise with scrape response
+   */
+  scrape,
+  /**
+   * Scrape multiple URLs in a batch
+   * @param options - Batch scrape options including URLs to scrape
+   * @returns Promise with batch scrape response
+   */
+  batchScrape,
+  /**
+   * Search the web
+   * @param options - Search options including queries
+   * @returns Promise with search response
+   */
+  search,
+  /**
+   * Error class for Upcrawl API errors
+   */
+  UpcrawlError
+};
+var index_default = Upcrawl;
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  UpcrawlError,
+  batchScrape,
+  configure,
+  getConfig,
+  resetConfig,
+  scrape,
+  search,
+  setApiKey,
+  setBaseUrl,
+  setTimeout
+});