npm - @minded-ai/mindedjs - Versions diffs - 3.1.14 → 3.1.15 - Mend

@minded-ai/mindedjs 3.1.14 → 3.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/dist/index.d.ts +2 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +3 -2
package/dist/index.js.map +1 -1
package/dist/internalTools/documentExtraction/documentExtraction.d.ts +28 -80
package/dist/internalTools/documentExtraction/documentExtraction.d.ts.map +1 -1
package/dist/internalTools/documentExtraction/documentExtraction.js +39 -105
package/dist/internalTools/documentExtraction/documentExtraction.js.map +1 -1
package/dist/internalTools/documentExtraction/parseDocument.d.ts +66 -0
package/dist/internalTools/documentExtraction/parseDocument.d.ts.map +1 -0
package/dist/internalTools/documentExtraction/parseDocument.js +92 -0
package/dist/internalTools/documentExtraction/parseDocument.js.map +1 -0
package/dist/nodes/addAppToolNode.js +1 -1
package/dist/nodes/addAppToolNode.js.map +1 -1
package/dist/nodes/addToolNode.js +1 -1
package/dist/nodes/addToolNode.js.map +1 -1
package/dist/nodes/compilePrompt.d.ts +8 -0
package/dist/nodes/compilePrompt.d.ts.map +1 -1
package/dist/nodes/compilePrompt.js +56 -15
package/dist/nodes/compilePrompt.js.map +1 -1
package/dist/toolsLibrary/parseDocument.d.ts +6 -5
package/dist/toolsLibrary/parseDocument.d.ts.map +1 -1
package/dist/toolsLibrary/parseDocument.js +16 -8
package/dist/toolsLibrary/parseDocument.js.map +1 -1
package/docs/low-code-editor/nodes.md +101 -1
package/docs/low-code-editor/tools.md +24 -3
package/docs/tooling/document-processing.md +103 -19
package/package.json +4 -2
package/src/index.ts +2 -1
package/src/internalTools/documentExtraction/documentExtraction.ts +51 -131
package/src/internalTools/documentExtraction/parseDocument.ts +107 -0
package/src/nodes/addAppToolNode.ts +2 -2
package/src/nodes/addToolNode.ts +2 -2
package/src/nodes/compilePrompt.ts +56 -16
package/src/toolsLibrary/parseDocument.ts +17 -8

package/docs/tooling/document-processing.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Document Processing
-Parse and extract data from images, PDFs, Word documents, spreadsheets, and more using AI-powered document processing. This tool handles both document parsing and data extraction.
+Parse and extract data from images, PDFs, Word documents, spreadsheets, and more using AI-powered document processing. This tool handles both single and multiple document parsing with optional data extraction.
 ## Overview
@@ -11,6 +11,7 @@ Parse and extract data from images, PDFs, Word documents, spreadsheets, and more
 1. **Structured Extraction with Schema**: Extract data into a predefined Zod schema using AI
 2. **Structured Extraction with Prompt**: Guide extraction using custom prompts
 3. **Raw Text Extraction**: Parse document and extract plain text without AI processing
+4. **Multiple Document Processing**: Process multiple documents at once - content is concatenated for extraction
 ## Processing Modes
@@ -29,7 +30,7 @@ Document processing includes built-in AI extraction - use the node's `prompt` an
 **Available properties:**
-- `parameters.documentSource` (string, required): URL or file path to the document (auto-detected)
+- `parameters.documentSource` (string or array, required): URL or file path to a single document, or array of URLs/file paths to process multiple documents. When an array is provided, documents are parsed and concatenated with double newlines.
 - `parameters.returnStructuredOutput` (boolean, optional, default: `false`): Set to `true` to enable AI-powered extraction, `false` for raw text only. When `true`, requires either `prompt` or `outputSchema` (or both)
 - `prompt` (string, optional): Instructions for AI-powered extraction. Ignored when `returnStructuredOutput` is `false`
 - `outputSchema` (schema object, optional): Define the structure of extracted data for structured extraction. Ignored when `returnStructuredOutput` is `false`
@@ -99,15 +100,48 @@ nodes:
     prompt: 'Extract all names and addresses from this document'
 ```
+Processing multiple documents with structured extraction:
+```yaml
+name: Main flow
+nodes:
+  - name: 'parse-multiple-invoices'
+    type: appTool
+    displayName: 'Parse Multiple Invoices'
+    actionKey: 'minded-parse-documents'
+    actionName: 'Parse Document'
+    appName: 'Minded'
+    parameters:
+      documentSource: '{state.memory.invoiceUrls}'
+      # Also possible to pass an array as JSON with different items, including items from other arrays:
+      # documentSource: '["{state.memory.invoiceFilePath}", "{state.memory.invoiceUrls[0]}"]
+      returnStructuredOutput: true
+    prompt: 'Extract all invoice data from the provided documents'
+    outputSchema:
+      - name: invoices
+        type: array
+        description: Array of invoice data
+        items:
+          - name: invoiceNumber
+            type: string
+            description: Invoice number
+          - name: amount
+            type: number
+            description: Total amount
+          - name: date
+            type: string
+            description: Invoice date
+```
 ## Programmatic Usage
 The SDK provides three main functions for document processing:
-1. **`parseDocumentAndExtractStructuredData`** - Parse and optionally extract structured data with AI
+1. **`parseDocumentAndExtractStructuredData`** - Parse one or more documents and optionally extract structured data with AI
    ```typescript
-   parseDocumentAndExtractStructuredData<T>({
-     documentSource: string,                  // Required: URL or file path
+   type parseDocumentAndExtractStructuredData = <T>(options: {
+     documentSources: string[],               // Required: Array of URLs/file paths. The results of multiple documents are concatenated.
      sessionId: string,                       // Required: Session identifier
      returnStructuredOutput: boolean,         // Required: Enable/disable AI extraction
      llm?: BaseLanguageModel,                 // Optional: LLM instance (required when returnStructuredOutput is true)
@@ -115,22 +149,21 @@ The SDK provides three main functions for document processing:
      outputSchemaPrompt?: string,             // Optional: Instructions for extraction
      processingMode?: DocumentProcessingMode, // Optional: Processing mode (default: DocumentProcessingMode.MANAGED)
      llamaCloudApiKey?: string                // Optional: API key for local mode
-   }): Promise<{
-     rawContent?: string,
-     structuredContent?: T | string,
-     metadata?: { fileSize?: number, fileType: string, processingTime: number, contentLength: number }
+   }) => Promise<{
+     rawContent?: string,                     // Concatenated content when multiple documents provided
+     structuredContent?: T | string           // Extracted from concatenated content when multiple documents provided
    }>
    ```
 2. **`parseDocument`** - Parse document and extract raw text only
    ```typescript
-   parseDocument({
+   type parseDocument = (options: {
      documentSource: string,                  // Required: URL or file path
      sessionId: string,                       // Required: Session identifier
      processingMode?: DocumentProcessingMode, // Optional: Processing mode (default: DocumentProcessingMode.MANAGED)
      llamaCloudApiKey?: string                // Optional: API key for local mode
-   }): Promise<{
+   }) => Promise<{
      rawContent?: string,
      metadata?: { fileSize?: number, fileType: string, processingTime: number, contentLength: number }
    }>
@@ -139,13 +172,13 @@ The SDK provides three main functions for document processing:
 3. **`extractStructuredDataFromString`** - Extract structured data from already parsed text
    ```typescript
-   extractStructuredDataFromString<T>({
+   type extractStructuredDataFromString = <T>(options: {
      content: string,                  // Required: Text content to extract from
      llm: BaseLanguageModel,           // Required: LLM instance
      sessionId: string,                // Required: Session identifier
      schema?: ZodType<T>,              // Optional: Zod schema for structured extraction
      prompt?: string                   // Optional: Instructions for extraction
-   }): Promise<T | string>
+   }) => Promise<T | string>
    ```
 ### Structured Extraction with Schema
@@ -176,7 +209,7 @@ const invoiceSchema = z.object({
 });
 const result = await parseDocumentAndExtractStructuredData({
-  documentSource: './invoice.pdf',
+  documentSources: ['./invoice.pdf'],
   sessionId: state.sessionId,
   returnStructuredOutput: true,
   llm: agent.llm,
@@ -186,7 +219,6 @@ const result = await parseDocumentAndExtractStructuredData({
 console.log(result.structuredContent); // Typed data matching your schema
 console.log(result.rawContent); // Original raw text
-console.log(result.metadata); // Processing metadata
 ```
 ### Structured Extraction with Prompt Only
@@ -198,7 +230,7 @@ import { parseDocumentAndExtractStructuredData } from '@minded-ai/mindedjs';
 // Contract analysis with prompt guidance
 const result = await parseDocumentAndExtractStructuredData({
-  documentSource: './contract.pdf',
+  documentSources: ['./contract.pdf'],
   sessionId: state.sessionId,
   returnStructuredOutput: true,
   llm: agent.llm,
@@ -226,14 +258,14 @@ console.log(result.metadata); // File size, type, processing time, content lengt
 ### Using URLs
-All functions accept URLs in addition to file paths via the `documentSource` parameter.
+All functions accept URLs in addition to file paths via the `documentSource` and `documentSources` parameters.
 ```typescript
 import { parseDocumentAndExtractStructuredData } from '@minded-ai/mindedjs';
 import { z } from 'zod';
 const result = await parseDocumentAndExtractStructuredData({
-  documentSource: 'https://example.com/invoice.pdf',
+  documentSources: ['https://example.com/invoice.pdf'],
   sessionId: state.sessionId,
   returnStructuredOutput: true,
   llm: agent.llm,
@@ -245,6 +277,58 @@ const result = await parseDocumentAndExtractStructuredData({
 });
 ```
+### Processing Multiple Documents
+Process multiple documents by providing an array of URLs or file paths. Documents are parsed in parallel and their content is concatenated with double newlines before optional structured extraction.
+```typescript
+import { parseDocumentAndExtractStructuredData } from '@minded-ai/mindedjs';
+import { z } from 'zod';
+// Extract data from multiple invoices into an array
+const invoiceSchema = z.array(
+  z.object({
+    invoiceNumber: z.string(),
+    vendor: z.string(),
+    amount: z.number(),
+    date: z.string(),
+  })
+);
+const result = await parseDocumentAndExtractStructuredData({
+  documentSources: [
+    './invoice1.pdf',
+    './invoice2.pdf',
+    './invoice3.pdf',
+  ],
+  sessionId: state.sessionId,
+  returnStructuredOutput: true,
+  llm: agent.llm,
+  outputSchema: invoiceSchema,
+  outputSchemaPrompt: 'Extract invoice data from all provided invoices',
+});
+console.log(result.structuredContent); // Array of invoice data
+console.log(result.rawContent); // Concatenated text from all invoices
+```
+You can also extract raw text from multiple documents without structured extraction:
+```typescript
+import { parseDocumentAndExtractStructuredData } from '@minded-ai/mindedjs';
+const result = await parseDocumentAndExtractStructuredData({
+  documentSources: [
+    'https://example.com/doc1.pdf',
+    'https://example.com/doc2.pdf',
+  ],
+  sessionId: state.sessionId,
+  returnStructuredOutput: false,
+});
+console.log(result.rawContent); // Concatenated text from both documents
+```
 ### Identity Document Verification
 Extract personal information from ID documents with structured validation.
@@ -263,7 +347,7 @@ const idSchema = z.object({
 });
 const result = await parseDocumentAndExtractStructuredData({
-  documentSource: './id-card.jpg',
+  documentSources: ['./id-card.jpg'],
   sessionId: state.sessionId,
   returnStructuredOutput: true,
   llm: agent.llm,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@minded-ai/mindedjs",
-  "version": "3.1.14",
+  "version": "3.1.15",
   "description": "MindedJS is a TypeScript library for building agents.",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
@@ -33,6 +33,7 @@
     "@types/aws-lambda": "^8.10.150",
     "@types/chai": "^4.3.11",
     "@types/ejs": "^3.1.5",
+    "@types/lodash": "^4.17.21",
     "@types/mocha": "^10.0.6",
     "@types/node": "^20.11.19",
     "@types/sinon": "^17.0.4",
@@ -63,6 +64,7 @@
     "flatted": "^3.3.3",
     "js-yaml": "^4.1.0",
     "langchain": "^0.3.25",
+    "lodash": "^4.17.21",
     "pino": "^9.7.0",
     "pino-pretty": "^13.0.0",
     "socket.io-client": "^4.8.1",
@@ -74,4 +76,4 @@
   "peerDependencies": {
     "playwright": "^1.55.0"
   }
-}
+}

package/src/index.ts CHANGED Viewed

@@ -71,7 +71,8 @@ export type { Tool, ToolExecuteInput } from './types/Tools.types';
 export { ToolType, ProxyType } from './types/Tools.types';
 // Document processing utilities
-export { parseDocumentAndExtractStructuredData, parseDocument, DocumentProcessingMode } from './internalTools/documentExtraction/documentExtraction';
+export { parseDocumentAndExtractStructuredData } from './internalTools/documentExtraction/documentExtraction';
+export { parseDocument, DocumentProcessingMode } from './internalTools/documentExtraction/parseDocument';
 export { extractStructuredDataFromString } from './internalTools/documentExtraction/extractStructuredData';
 export type {
   DocumentProcessorConfig,

package/src/internalTools/documentExtraction/documentExtraction.ts CHANGED Viewed

@@ -1,31 +1,20 @@
 import { ZodType } from 'zod';
 import { BaseLanguageModel } from '@langchain/core/language_models/base';
 import { extractStructuredDataFromString } from './extractStructuredData';
-import { DocumentProcessResponse } from '../../platform/mindedConnectionTypes';
-import { parseDocumentWithManagedService } from './parseDocumentManaged';
-import { parseDocumentWithLocalService } from './parseDocumentLocal';
-/**
- * Document processing mode
- */
-export enum DocumentProcessingMode {
-  /** Process documents using Minded cloud service (default) */
-  MANAGED = 'managed',
-  /** Process documents locally using LlamaCloud */
-  LOCAL = 'local',
-}
+import { DocumentProcessingMode, parseDocument } from './parseDocument';
 /**
  * Parse document and extract structured data using AI.
  *
- * This function provides a flexible way to process documents with optional AI-powered extraction:
- * - Raw text extraction: Parse document without LLM processing
+ * This function provides a flexible way to process one or more documents with optional AI-powered extraction:
+ * - Raw text extraction: Parse document(s) without LLM processing
  * - Structured extraction with schema: Extract data matching a Zod schema
  * - Structured extraction with prompt: Guide extraction using custom prompts
+ * - Multiple document support: Process multiple documents by passing an array - their content will be concatenated
  * - Processing modes: Use DocumentProcessingMode.LOCAL (requires LlamaCloud API key) or DocumentProcessingMode.MANAGED (backend service)
  *
  * @param options - Document processing options
- * @param options.documentSource - URL or file path to the document
+ * @param options.documentSources - Array of URLs/file paths. When multiple documents are provided, they are parsed and concatenated with double newlines before optional structured extraction.
  * @param options.processingMode - Document parsing mode: DocumentProcessingMode.MANAGED (default, backend service) or DocumentProcessingMode.LOCAL (requires llamaCloudApiKey)
  * @param options.sessionId - Unique session identifier for logging and tracking
  * @param options.llamaCloudApiKey - LlamaCloud API key for local processing. Required when processingMode is DocumentProcessingMode.LOCAL. Can be provided as parameter or via LLAMA_CLOUD_API_KEY environment variable
@@ -35,11 +24,10 @@ export enum DocumentProcessingMode {
  * @param options.outputSchemaPrompt - Optional prompt to guide the llm how to extract the data
  *
  * @returns Promise resolving to an object containing:
- *   - rawContent: The raw extracted text from the document
- *   - structuredContent: AI-extracted structured data (if returnStructuredOutput is true)
- *   - metadata: Document metadata from processing
+ *   - rawContent: The raw extracted text from the document. When multiple documents are provided, contains their concatenated content separated by double newlines.
+ *   - structuredContent: AI-extracted structured data (only present if returnStructuredOutput is true). When multiple documents are provided, extracts from their concatenated content.
  *
- * @throws {Error} If documentSource is not provided
+ * @throws {Error} If documentSources is not provided
  * @throws {Error} If returnStructuredOutput is true but llm is not provided
  * @throws {Error} If document parsing or extraction fails
  *
@@ -48,9 +36,9 @@ export enum DocumentProcessingMode {
  * import { parseDocumentAndExtractStructuredData, DocumentProcessingMode } from '@minded-ai/mindedjs';
  * import { z } from 'zod';
  *
- * // Parse document and extract structured data using a schema
+ * // Parse single document and extract structured data using a schema
  * const result1 = await parseDocumentAndExtractStructuredData({
- *   documentSource: './invoice.pdf',
+ *   documentSources: ['./invoice.pdf'],
  *   processingMode: DocumentProcessingMode.MANAGED,
  *   sessionId: state.sessionId,
  *   returnStructuredOutput: true,
@@ -62,9 +50,23 @@ export enum DocumentProcessingMode {
  *   }),
  * });
  *
- * // Parse document only
+ * // Parse multiple documents and extract structured data from their combined content
+ * const result2 = await parseDocumentAndExtractStructuredData({
+ *   documentSources: ['./invoice1.pdf', './invoice2.pdf', './invoice3.pdf'],
+ *   processingMode: DocumentProcessingMode.MANAGED,
+ *   sessionId: state.sessionId,
+ *   returnStructuredOutput: true,
+ *   llm: agent.llm,
+ *   outputSchema: z.array(z.object({
+ *     invoiceNumber: z.string(),
+ *     totalAmount: z.number(),
+ *     date: z.string(),
+ *   })),
+ * });
+ *
+ * // Parse document without structured extraction
  * const result3 = await parseDocumentAndExtractStructuredData({
- *   documentSource: './document.pdf',
+ *   documentSources: ['./document.pdf'],
  *   processingMode: DocumentProcessingMode.MANAGED,
  *   sessionId: state.sessionId,
  *   returnStructuredOutput: false,
@@ -72,7 +74,7 @@ export enum DocumentProcessingMode {
  * ```
  */
 export async function parseDocumentAndExtractStructuredData<T extends Record<string, any>>({
-  documentSource,
+  documentSources,
   processingMode,
   sessionId,
   llamaCloudApiKey,
@@ -81,7 +83,7 @@ export async function parseDocumentAndExtractStructuredData<T extends Record<str
   outputSchema,
   outputSchemaPrompt,
 }: {
-  documentSource: string;
+  documentSources: string[];
   processingMode?: DocumentProcessingMode;
   sessionId: string;
   llamaCloudApiKey?: string;
@@ -92,15 +94,28 @@ export async function parseDocumentAndExtractStructuredData<T extends Record<str
 }): Promise<{
   rawContent?: string;
   structuredContent?: T | string;
-  metadata?: DocumentProcessResponse['metadata'];
 }> {
-  // Parse document
-  const result = await parseDocument({
-    documentSource,
-    processingMode,
-    sessionId,
-    llamaCloudApiKey,
-  });
+  // Parse each document and concatenate results
+  const results = await Promise.all(
+    documentSources.map((documentSource) =>
+      parseDocument({
+        documentSource,
+        processingMode,
+        sessionId,
+        llamaCloudApiKey,
+      }),
+    ),
+  );
+  // Concatenate the raw content of the parsed documents with two newlines
+  const concatenatedContent = results
+    .map((r) => r.rawContent)
+    .filter(Boolean)
+    .join('\n\n');
+  const result = {
+    rawContent: concatenatedContent,
+  };
   if (!returnStructuredOutput || !result.rawContent) {
     return result;
@@ -112,7 +127,7 @@ export async function parseDocumentAndExtractStructuredData<T extends Record<str
   // Extract structured data from the parsed document
   const structuredContent = await extractStructuredDataFromString<T>({
-    content: result.rawContent,
+    content: result.rawContent!,
     llm,
     schema: outputSchema,
     prompt: outputSchemaPrompt,
@@ -120,102 +135,7 @@ export async function parseDocumentAndExtractStructuredData<T extends Record<str
   });
   return {
-    ...result,
+    rawContent: result.rawContent,
     structuredContent,
   };
 }
-/**
- * Parse document and extract raw text content.
- *
- * This function processes various document types (PDFs, images, Word docs, etc.) and extracts
- * raw text content using either local processing with LlamaCloud or managed backend service.
- * Use this for raw text extraction without AI-powered data extraction.
- *
- * @param options - Document parsing options
- * @param options.documentSource - URL or file path to the document
- * @param options.processingMode - Parsing mode: DocumentProcessingMode.LOCAL (requires llamaCloudApiKey) or DocumentProcessingMode.MANAGED (backend service, default)
- * @param options.sessionId - Unique session identifier for logging and tracking
- * @param options.llamaCloudApiKey - LlamaCloud API key for local processing. Required when processingMode is DocumentProcessingMode.LOCAL. Can be provided as parameter or via LLAMA_CLOUD_API_KEY environment variable
- *
- * @returns Promise resolving to an object containing:
- *   - rawContent: The raw extracted text from the document
- *   - metadata: Document processing metadata (file size, type, processing time, content length)
- *
- * @throws {Error} If documentSource is not provided
- * @throws {Error} If document processing fails
- *
- * @example
- * ```typescript
- * import { parseDocument, DocumentProcessingMode } from '@minded-ai/mindedjs';
- *
- * // Parse document using managed service
- * const result1 = await parseDocument({
- *   documentSource: 'https://example.com/invoice.pdf',
- *   processingMode: DocumentProcessingMode.MANAGED,
- *   sessionId: state.sessionId,
- * });
- * // result1: { rawContent: "Invoice text...", metadata: {...} }
- *
- * // Parse local document using LlamaCloud
- * const result2 = await parseDocument({
- *   documentSource: './contract.pdf',
- *   processingMode: DocumentProcessingMode.LOCAL,
- *   sessionId: state.sessionId,
- *   llamaCloudApiKey: process.env.LLAMA_CLOUD_API_KEY,
- * });
- * // result2: { rawContent: "Contract text...", metadata: {...} }
- * ```
- */
-export async function parseDocument({
-  documentSource,
-  processingMode = DocumentProcessingMode.MANAGED,
-  sessionId,
-  llamaCloudApiKey,
-}: {
-  documentSource: string;
-  processingMode?: DocumentProcessingMode;
-  sessionId: string;
-  llamaCloudApiKey?: string;
-}): Promise<{
-  rawContent?: string;
-  metadata?: {
-    fileSize?: number;
-    fileType: string;
-    processingTime: number;
-    contentLength: number;
-  };
-}> {
-  if (!documentSource) {
-    throw new Error('documentSource is required - provide a URL or file path');
-  }
-  const isDocumentUrl = isUrl(documentSource);
-  if (processingMode === DocumentProcessingMode.MANAGED) {
-    return parseDocumentWithManagedService({
-      documentSource,
-      isDocumentUrl,
-      sessionId,
-    });
-  } else {
-    return parseDocumentWithLocalService({
-      documentSource,
-      isDocumentUrl,
-      sessionId,
-      llamaCloudApiKey: llamaCloudApiKey ?? process.env.LLAMA_CLOUD_API_KEY,
-    });
-  }
-}
-/**
- * Check if a string is a URL
- */
-function isUrl(source: string): boolean {
-  try {
-    const url = new URL(source);
-    return url.protocol === 'http:' || url.protocol === 'https:';
-  } catch {
-    return false;
-  }
-}

package/src/internalTools/documentExtraction/parseDocument.ts ADDED Viewed

@@ -0,0 +1,107 @@
+import { parseDocumentWithManagedService } from './parseDocumentManaged';
+import { parseDocumentWithLocalService } from './parseDocumentLocal';
+/**
+ * Document processing mode
+ */
+export enum DocumentProcessingMode {
+  /** Process documents using Minded cloud service (default) */
+  MANAGED = 'managed',
+  /** Process documents locally using LlamaCloud */
+  LOCAL = 'local',
+}
+/**
+ * Parse document and extract raw text content.
+ *
+ * This function processes various document types (PDFs, images, Word docs, etc.) and extracts
+ * raw text content using either local processing with LlamaCloud or managed backend service.
+ * Use this for raw text extraction without AI-powered data extraction.
+ *
+ * @param options - Document parsing options
+ * @param options.documentSource - URL or file path to the document
+ * @param options.processingMode - Parsing mode: DocumentProcessingMode.LOCAL (requires llamaCloudApiKey) or DocumentProcessingMode.MANAGED (backend service, default)
+ * @param options.sessionId - Unique session identifier for logging and tracking
+ * @param options.llamaCloudApiKey - LlamaCloud API key for local processing. Required when processingMode is DocumentProcessingMode.LOCAL. Can be provided as parameter or via LLAMA_CLOUD_API_KEY environment variable
+ *
+ * @returns Promise resolving to an object containing:
+ *   - rawContent: The raw extracted text from the document
+ *   - metadata: Document processing metadata (file size, type, processing time, content length)
+ *
+ * @throws {Error} If documentSource is not provided
+ * @throws {Error} If document processing fails
+ *
+ * @example
+ * ```typescript
+ * import { parseDocument, DocumentProcessingMode } from '@minded-ai/mindedjs';
+ *
+ * // Parse document using managed service
+ * const result1 = await parseDocument({
+ *   documentSource: 'https://example.com/invoice.pdf',
+ *   processingMode: DocumentProcessingMode.MANAGED,
+ *   sessionId: state.sessionId,
+ * });
+ * // result1: { rawContent: "Invoice text...", metadata: {...} }
+ *
+ * // Parse local document using LlamaCloud
+ * const result2 = await parseDocument({
+ *   documentSource: './contract.pdf',
+ *   processingMode: DocumentProcessingMode.LOCAL,
+ *   sessionId: state.sessionId,
+ *   llamaCloudApiKey: process.env.LLAMA_CLOUD_API_KEY,
+ * });
+ * // result2: { rawContent: "Contract text...", metadata: {...} }
+ * ```
+ */
+export async function parseDocument({
+  documentSource,
+  processingMode = DocumentProcessingMode.MANAGED,
+  sessionId,
+  llamaCloudApiKey,
+}: {
+  documentSource: string;
+  processingMode?: DocumentProcessingMode;
+  sessionId: string;
+  llamaCloudApiKey?: string;
+}): Promise<{
+  rawContent?: string;
+  metadata?: {
+    fileSize?: number;
+    fileType: string;
+    processingTime: number;
+    contentLength: number;
+  };
+}> {
+  if (!documentSource) {
+    throw new Error('documentSource is required - provide a URL or file path');
+  }
+  const isDocumentUrl = isUrl(documentSource);
+  if (processingMode === DocumentProcessingMode.MANAGED) {
+    return parseDocumentWithManagedService({
+      documentSource,
+      isDocumentUrl,
+      sessionId,
+    });
+  } else {
+    return parseDocumentWithLocalService({
+      documentSource,
+      isDocumentUrl,
+      sessionId,
+      llamaCloudApiKey: llamaCloudApiKey ?? process.env.LLAMA_CLOUD_API_KEY,
+    });
+  }
+}
+/**
+ * Check if a string is a URL
+ */
+function isUrl(source: string): boolean {
+  try {
+    const url = new URL(source);
+    return url.protocol === 'http:' || url.protocol === 'https:';
+  } catch {
+    return false;
+  }
+}

package/src/nodes/addAppToolNode.ts CHANGED Viewed

@@ -11,7 +11,7 @@ import { logger } from '../utils/logger';
 import { createHistoryStep } from '../utils/history';
 import { DefaultTool, Tool } from '../types/Tools.types';
 import { combinePlaybooks } from '../playbooks/playbooks';
-import { compilePrompt } from './compilePrompt';
+import { compileParameter, compilePrompt } from './compilePrompt';
 import { ToolMessage } from '@langchain/core/messages';
 import { AnalyticsEventName } from '../types/Analytics.types';
 import { trackAnalyticsEvent } from '../internalTools/analytics';
@@ -57,7 +57,7 @@ export const addAppToolNode = async ({
       if (value !== '') {
         // If the value is a string, compile it to allow variable injection
         if (typeof value === 'string') {
-          compiledParameters[key] = compilePrompt(value, compileContext);
+          compiledParameters[key] = compileParameter(value, compileContext);
         } else {
           compiledParameters[key] = value;
         }

package/src/nodes/addToolNode.ts CHANGED Viewed

@@ -10,7 +10,7 @@ import { Agent } from '../agent';
 import { createHistoryStep } from '../utils/history';
 import { HistoryStep } from '../types/Agent.types';
 import { combinePlaybooks } from '../playbooks/playbooks';
-import { compilePrompt } from './compilePrompt';
+import { compileParameter, compilePrompt } from './compilePrompt';
 import { AnalyticsEventName } from '../types/Analytics.types';
 import { trackAnalyticsEvent } from '../internalTools/analytics';
 import { z } from 'zod';
@@ -58,7 +58,7 @@ export const addToolNode = async ({
       if (value !== '') {
         // If the value is a string, compile it to allow variable injection
         if (typeof value === 'string') {
-          compiledParameters[key] = compilePrompt(value, compileContext);
+          compiledParameters[key] = compileParameter(value, compileContext);
         } else {
           compiledParameters[key] = value;
         }