npm - ocr-ai - Versions diffs - 1.0.0 - Mend

ocr-ai 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,320 @@
+# extracta-ai
+Multi-provider AI document extraction for Node.js. Extract text or structured JSON from documents using Gemini, OpenAI, Claude, Grok, or Vertex AI.
+## Installation
+```bash
+npm install extracta-ai
+```
+## Quick Start
+### Using Gemini
+```typescript
+import { ExtractaAI } from 'extracta-ai';
+const extracta = new ExtractaAI({
+  provider: 'gemini',
+  apiKey: 'YOUR_GEMINI_API_KEY',
+});
+const result = await extracta.extract('./invoice.png');
+if (result.success) {
+  const text = result.content;
+  console.log(text);
+}
+```
+### Using OpenAI
+```typescript
+import { ExtractaAI } from 'extracta-ai';
+const extracta = new ExtractaAI({
+  provider: 'openai',
+  apiKey: 'YOUR_OPENAI_API_KEY',
+});
+const result = await extracta.extract('./document.pdf');
+if (result.success) {
+  const text = result.content;
+  console.log(text);
+}
+```
+### From URL
+Extract directly from a URL:
+```typescript
+const result = await extracta.extract('https://example.com/invoice.png');
+if (result.success) {
+  console.log(result.content);
+}
+```
+### Custom Instructions
+You can provide custom instructions to guide the extraction:
+```typescript
+const result = await extracta.extract('./receipt.png', {
+  prompt: 'Extract only the total amount and date from this receipt',
+});
+if (result.success) {
+  console.log(result.content);
+  // Output: "Total: $154.06, Date: 11/02/2019"
+}
+```
+### Output Format
+By default, extraction returns text. You can also extract structured JSON:
+```typescript
+// Text output (default)
+const textResult = await extracta.extract('./invoice.png', {
+  format: 'text',
+});
+if (textResult.success) {
+  console.log(textResult.content); // string
+}
+// JSON output with schema
+const jsonResult = await extracta.extract('./invoice.png', {
+  format: 'json',
+  schema: {
+    invoice_number: 'string',
+    date: 'string',
+    total: 'number',
+    items: [{ name: 'string', quantity: 'number', price: 'number' }],
+  },
+});
+if (jsonResult.success) {
+  console.log(jsonResult.data); // { invoice_number: "US-001", date: "11/02/2019", total: 154.06, items: [...] }
+}
+```
+### JSON Schema
+The schema defines the structure of the data you want to extract. Use a simple object where keys are field names and values are types:
+**Basic types:**
+- `'string'` - Text values
+- `'number'` - Numeric values
+- `'boolean'` - True/false values
+**Nested objects:**
+```typescript
+const schema = {
+  company: {
+    name: 'string',
+    address: 'string',
+    phone: 'string',
+  },
+  customer: {
+    name: 'string',
+    email: 'string',
+  },
+};
+```
+**Arrays:**
+```typescript
+const schema = {
+  // Array of objects
+  items: [
+    {
+      description: 'string',
+      quantity: 'number',
+      unit_price: 'number',
+      total: 'number',
+    },
+  ],
+  // Simple array
+  tags: ['string'],
+};
+```
+**Complete example (invoice):**
+```typescript
+const invoiceSchema = {
+  invoice_number: 'string',
+  date: 'string',
+  due_date: 'string',
+  company: {
+    name: 'string',
+    address: 'string',
+    phone: 'string',
+    email: 'string',
+  },
+  bill_to: {
+    name: 'string',
+    address: 'string',
+  },
+  items: [
+    {
+      description: 'string',
+      quantity: 'number',
+      unit_price: 'number',
+      total: 'number',
+    },
+  ],
+  subtotal: 'number',
+  tax: 'number',
+  total: 'number',
+};
+const result = await extracta.extract('./invoice.png', {
+  format: 'json',
+  schema: invoiceSchema,
+  prompt: 'Extract all invoice data from this document.',
+});
+```
+### Model Configuration
+You can pass model-specific parameters like temperature, max tokens, and more:
+```typescript
+// Gemini with model config
+const result = await extracta.extract('./invoice.png', {
+  modelConfig: {
+    temperature: 0.2,
+    maxTokens: 4096,
+    topP: 0.8,
+    topK: 40,
+  },
+});
+// OpenAI with model config
+const result = await extracta.extract('./invoice.png', {
+  modelConfig: {
+    temperature: 0,
+    maxTokens: 2048,
+    topP: 1,
+  },
+});
+```
+Available options:
+| Option | Description | Supported Providers |
+|--------|-------------|---------------------|
+| temperature | Controls randomness (0.0-1.0+) | All |
+| maxTokens | Maximum tokens to generate | All |
+| topP | Nucleus sampling | All |
+| topK | Top-k sampling | Gemini, Claude, Vertex |
+| stopSequences | Stop generation at these strings | All |
+### Token Usage
+Access token usage information from the metadata:
+```typescript
+const result = await extracta.extract('./invoice.png');
+if (result.success) {
+  console.log(result.content);
+  // Access metadata
+  console.log(result.metadata.processingTimeMs); // 2351
+  console.log(result.metadata.tokens?.inputTokens); // 1855
+  console.log(result.metadata.tokens?.outputTokens); // 260
+  console.log(result.metadata.tokens?.totalTokens); // 2115
+}
+```
+## Supported Providers
+| Provider | Default Model | Auth |
+|----------|---------------|------|
+| gemini   | gemini-1.5-flash | API Key |
+| openai   | gpt-4o | API Key |
+| claude   | claude-sonnet-4-20250514 | API Key |
+| grok     | grok-2-vision-1212 | API Key |
+| vertex   | gemini-2.0-flash | Google Cloud |
+> **Note:** For enterprise OCR needs, see [Advanced: Vertex AI](#advanced-vertex-ai-google-cloud) section below.
+## Supported Inputs
+- **Local files**: `./invoice.png`, `./document.pdf`
+- **URLs**: `https://example.com/invoice.png`
+## Supported Files
+- **Images**: jpg, png, gif, webp
+- **Documents**: pdf
+- **Text**: txt, md, csv, json, xml, html
+## Advanced: Vertex AI (Google Cloud)
+The `vertex` provider enables access to Google Cloud's AI infrastructure, which is useful for enterprise scenarios requiring:
+- **Compliance**: Data residency and regulatory requirements
+- **Integration**: Native integration with Google Cloud services (BigQuery, Cloud Storage, etc.)
+- **Specialized OCR**: Access to Google's Document AI and Vision AI processors
+### Basic Setup
+Vertex AI uses Google Cloud authentication instead of API keys:
+```typescript
+import { ExtractaAI } from 'extracta-ai';
+const extracta = new ExtractaAI({
+  provider: 'vertex',
+  vertexConfig: {
+    project: 'your-gcp-project-id',
+    location: 'us-central1',
+  },
+});
+const result = await extracta.extract('./invoice.png');
+```
+**Requirements:**
+1. Install the [gcloud CLI](https://cloud.google.com/sdk/docs/install)
+2. Run `gcloud auth application-default login`
+3. Enable the Vertex AI API in your GCP project
+### When to Use Vertex AI vs Gemini API
+| Scenario | Recommended |
+|----------|-------------|
+| Quick prototyping | Gemini (API Key) |
+| Personal projects | Gemini (API Key) |
+| Enterprise/production | Vertex AI |
+| Data residency requirements | Vertex AI |
+| High-volume processing | Vertex AI |
+### Related Google Cloud OCR Services
+For specialized document processing beyond what Gemini models offer, Google Cloud provides dedicated OCR services:
+**[Document AI](https://cloud.google.com/document-ai)** - Optimized for structured documents:
+- Invoice Parser, Receipt Parser, Form Parser
+- W2, 1040, Bank Statement processors
+- Custom extractors for domain-specific documents
+- Higher accuracy for tables, forms, and handwritten text
+**[Vision API](https://cloud.google.com/vision/docs/ocr)** - Optimized for images:
+- Real-time OCR with low latency
+- 80+ language support
+- Handwriting detection
+- Simple integration, ~98% accuracy on clean documents
+These services are separate from extracta-ai but can complement it for enterprise document pipelines.
+## License
+MIT

package/dist/index.d.mts ADDED Viewed

@@ -0,0 +1,355 @@
+/**
+ * Supported AI providers
+ */
+type AIProvider = 'gemini' | 'openai' | 'claude' | 'grok' | 'vertex';
+/**
+ * Output format for extraction
+ */
+type OutputFormat = 'text' | 'json';
+/**
+ * Supported file types
+ */
+type SupportedFileType = 'pdf' | 'image' | 'text';
+/**
+ * Configuration for a specific AI provider
+ */
+interface ProviderConfig {
+    apiKey: string;
+    model?: string;
+}
+/**
+ * Vertex AI specific configuration
+ */
+interface VertexConfig$1 {
+    project: string;
+    location: string;
+}
+/**
+ * Main configuration for ExtractaAI
+ */
+interface ExtractaConfig {
+    provider: AIProvider;
+    apiKey?: string;
+    model?: string;
+    /**
+     * Vertex AI configuration (required when provider is 'vertex')
+     */
+    vertexConfig?: VertexConfig$1;
+}
+/**
+ * Model-specific configuration parameters
+ */
+interface ModelConfig {
+    /**
+     * Controls randomness (0.0 = deterministic, 1.0+ = more random)
+     */
+    temperature?: number;
+    /**
+     * Maximum tokens to generate in the response
+     */
+    maxTokens?: number;
+    /**
+     * Top-p (nucleus) sampling
+     */
+    topP?: number;
+    /**
+     * Top-k sampling (Gemini/Claude only)
+     */
+    topK?: number;
+    /**
+     * Stop sequences to end generation
+     */
+    stopSequences?: string[];
+}
+/**
+ * Options for extraction
+ */
+interface ExtractionOptions {
+    /**
+     * Output format: 'text' for plain text, 'json' for structured JSON
+     */
+    format?: OutputFormat;
+    /**
+     * JSON schema to validate/structure the output (only for format: 'json')
+     * Can be a JSON Schema object or a simple object describing the structure
+     */
+    schema?: Record<string, unknown>;
+    /**
+     * Custom prompt to guide the extraction
+     */
+    prompt?: string;
+    /**
+     * Language for extraction (default: 'auto')
+     */
+    language?: string;
+    /**
+     * Output file path (if you want to save to disk)
+     */
+    outputPath?: string;
+    /**
+     * Model-specific configuration (temperature, maxTokens, etc.)
+     */
+    modelConfig?: ModelConfig;
+}
+/**
+ * Result of text extraction
+ */
+interface TextExtractionResult {
+    success: true;
+    format: 'text';
+    content: string;
+    metadata: ExtractionMetadata;
+}
+/**
+ * Result of JSON extraction
+ */
+interface JsonExtractionResult<T = Record<string, unknown>> {
+    success: true;
+    format: 'json';
+    data: T;
+    metadata: ExtractionMetadata;
+}
+/**
+ * Error result
+ */
+interface ExtractionError {
+    success: false;
+    error: string;
+    code: string;
+}
+/**
+ * Combined extraction result type
+ */
+type ExtractionResult<T = Record<string, unknown>> = TextExtractionResult | JsonExtractionResult<T> | ExtractionError;
+/**
+ * Token usage information
+ */
+interface TokenUsage {
+    inputTokens: number;
+    outputTokens: number;
+    totalTokens: number;
+}
+/**
+ * Metadata about the extraction
+ */
+interface ExtractionMetadata {
+    provider: AIProvider;
+    model: string;
+    fileType: SupportedFileType;
+    fileName: string;
+    processingTimeMs: number;
+    tokens?: TokenUsage;
+}
+/**
+ * File information after loading
+ */
+interface FileInfo {
+    path: string;
+    name: string;
+    type: SupportedFileType;
+    mimeType: string;
+    size: number;
+    content: Buffer;
+    base64?: string;
+}
+/**
+ * Result from provider extraction including tokens
+ */
+interface ProviderResult<T = string> {
+    content: T;
+    tokens?: TokenUsage;
+}
+/**
+ * Interface that all AI providers must implement
+ */
+interface IAIProvider {
+    readonly name: AIProvider;
+    readonly model: string;
+    /**
+     * Extract text from a file
+     */
+    extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
+    /**
+     * Extract structured JSON from a file
+     */
+    extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
+    /**
+     * Check if the provider supports the given file type
+     */
+    supportsFileType(type: SupportedFileType): boolean;
+}
+/**
+ * Main class for document extraction using AI
+ */
+declare class ExtractaAI {
+    private provider;
+    private config;
+    constructor(config: ExtractaConfig);
+    /**
+     * Create a provider instance based on configuration
+     */
+    private createProvider;
+    /**
+     * Extract content from a file path or URL
+     */
+    extract(source: string, options?: ExtractionOptions): Promise<ExtractionResult>;
+    /**
+     * Extract content from a Buffer
+     */
+    extractFromBuffer(buffer: Buffer, fileName: string, options?: ExtractionOptions): Promise<ExtractionResult>;
+    /**
+     * Extract content from a base64 string
+     */
+    extractFromBase64(base64: string, fileName: string, options?: ExtractionOptions): Promise<ExtractionResult>;
+    /**
+     * Process the extraction based on format
+     */
+    private processExtraction;
+    /**
+     * Create an error result
+     */
+    private createErrorResult;
+    /**
+     * Get current provider name
+     */
+    getProvider(): AIProvider;
+    /**
+     * Get current model
+     */
+    getModel(): string;
+    /**
+     * Change the AI provider
+     */
+    setProvider(provider: AIProvider, apiKey: string, model?: string): void;
+}
+/**
+ * Factory function to create ExtractaAI instance
+ */
+declare function createExtractaAI(config: ExtractaConfig): ExtractaAI;
+/**
+ * Load a file from disk and prepare it for AI processing
+ */
+declare function loadFile(filePath: string): Promise<FileInfo>;
+/**
+ * Load a file from a Buffer
+ */
+declare function loadFileFromBuffer(buffer: Buffer, fileName: string, mimeType?: string): FileInfo;
+/**
+ * Load a file from base64 string
+ */
+declare function loadFileFromBase64(base64: string, fileName: string, mimeType?: string): FileInfo;
+/**
+ * Save content to a file
+ */
+declare function saveToFile(filePath: string, content: string | Buffer): Promise<void>;
+/**
+ * Get supported file extensions
+ */
+declare function getSupportedExtensions(): string[];
+/**
+ * Check if a file extension is supported
+ */
+declare function isExtensionSupported(ext: string): boolean;
+/**
+ * Check if a string is a URL
+ */
+declare function isUrl(str: string): boolean;
+/**
+ * Load a file from a URL
+ */
+declare function loadFileFromUrl(url: string): Promise<FileInfo>;
+/**
+ * Base class for AI providers with common functionality
+ */
+declare abstract class BaseProvider implements IAIProvider {
+    abstract readonly name: AIProvider;
+    abstract readonly model: string;
+    protected apiKey: string;
+    constructor(apiKey: string);
+    abstract extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
+    abstract extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
+    supportsFileType(type: SupportedFileType): boolean;
+    /**
+     * Build the text extraction prompt
+     */
+    protected buildTextPrompt(options?: ExtractionOptions): string;
+    /**
+     * Build the JSON extraction prompt
+     */
+    protected buildJsonPrompt(schema: Record<string, unknown>, options?: ExtractionOptions): string;
+    /**
+     * Parse JSON response from AI, handling potential formatting issues
+     */
+    protected parseJsonResponse<T>(response: string): T;
+}
+declare class GeminiProvider extends BaseProvider {
+    readonly name: AIProvider;
+    readonly model: string;
+    private client;
+    constructor(apiKey: string, model?: string);
+    extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
+    extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
+    private buildGenerationConfig;
+    private extractTokenUsage;
+    private buildContent;
+}
+declare class OpenAIProvider extends BaseProvider {
+    readonly name: AIProvider;
+    readonly model: string;
+    private client;
+    constructor(apiKey: string, model?: string);
+    extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
+    extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
+    private buildCompletionOptions;
+    private extractTokenUsage;
+    private buildMessages;
+}
+declare class ClaudeProvider extends BaseProvider {
+    readonly name: AIProvider;
+    readonly model: string;
+    private client;
+    constructor(apiKey: string, model?: string);
+    extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
+    extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
+    private buildMessageOptions;
+    supportsFileType(type: SupportedFileType): boolean;
+    private extractTokenUsage;
+    private buildContent;
+    private getMediaType;
+}
+declare class GrokProvider extends BaseProvider {
+    readonly name: AIProvider;
+    readonly model: string;
+    private client;
+    constructor(apiKey: string, model?: string);
+    extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
+    extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
+    private buildCompletionOptions;
+    private extractTokenUsage;
+    private buildMessages;
+}
+interface VertexConfig {
+    project: string;
+    location: string;
+}
+declare class VertexProvider extends BaseProvider {
+    readonly name: AIProvider;
+    readonly model: string;
+    private client;
+    constructor(config: VertexConfig, model?: string);
+    extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
+    extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
+    private buildGenerationConfig;
+    private extractTokenUsage;
+    private buildContents;
+}
+export { type AIProvider, BaseProvider, ClaudeProvider, ExtractaAI, type ExtractaConfig, type ExtractionError, type ExtractionMetadata, type ExtractionOptions, type ExtractionResult, type FileInfo, GeminiProvider, GrokProvider, type IAIProvider, type JsonExtractionResult, type ModelConfig, OpenAIProvider, type OutputFormat, type ProviderConfig, type SupportedFileType, type TextExtractionResult, type TokenUsage, type VertexConfig$1 as VertexConfig, VertexProvider, createExtractaAI, getSupportedExtensions, isExtensionSupported, isUrl, loadFile, loadFileFromBase64, loadFileFromBuffer, loadFileFromUrl, saveToFile };