PyPI - memra - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl - Mend

memra 0.2.12py3-none-any.whl → 0.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

memra/demos/etl_invoice_processing/test_conversion.py ADDED Viewed

@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""
+Test script to verify vision response conversion
+"""
+import json
+def convert_vision_response_to_extracted_data(vision_response: str) -> dict:
+    """Convert vision model response to extracted_data format"""
+    try:
+        # Clean up the response - remove markdown code blocks if present
+        if vision_response.startswith("```json"):
+            vision_response = vision_response.replace("```json", "").replace("```", "").strip()
+        # Parse the JSON response
+        data = json.loads(vision_response)
+        # Extract fields with fallback to different naming conventions
+        invoice_number = (
+            data.get("invoice_number") or
+            data.get("InvoiceNumber") or
+            data.get("invoiceNumber") or
+            ""
+        )
+        invoice_date = (
+            data.get("invoice_date") or
+            data.get("InvoiceDate") or
+            data.get("invoiceDate") or
+            ""
+        )
+        # Convert date format if needed
+        if invoice_date:
+            if "/" in invoice_date and len(invoice_date.split("/")) == 3:
+                parts = invoice_date.split("/")
+                month, day, year = parts[0], parts[1], parts[2]
+                if len(year) == 2:
+                    year = "20" + year
+                invoice_date = f"{year}-{month.zfill(2)}-{day.zfill(2)}"
+        amount = (
+            data.get("amount") or
+            data.get("InvoiceTotal") or
+            data.get("invoiceTotal") or
+            data.get("total") or
+            0
+        )
+        vendor_name = (
+            data.get("vendor_name") or
+            data.get("VendorName") or
+            data.get("vendorName") or
+            data.get("Company") or
+            data.get("company") or
+            ""
+        )
+        tax_amount = (
+            data.get("tax_amount") or
+            data.get("TaxAmount") or
+            data.get("taxAmount") or
+            0
+        )
+        line_items = (
+            data.get("line_items") or
+            data.get("Order") or
+            data.get("order") or
+            data.get("LineItems") or
+            data.get("lineItems") or
+            []
+        )
+        # Convert to expected format
+        extracted_data = {
+            "headerSection": {
+                "vendorName": vendor_name,
+                "subtotal": float(amount)
+            },
+            "billingDetails": {
+                "invoiceNumber": invoice_number,
+                "invoiceDate": invoice_date,
+                "dueDate": ""
+            },
+            "chargesSummary": {
+                "document_total": float(amount),
+                "secondary_tax": float(tax_amount),
+                "lineItemsBreakdown": line_items
+            },
+            "status": "processed"
+        }
+        return extracted_data
+    except Exception as e:
+        print(f"⚠️  Error converting vision response: {e}")
+        return {
+            "headerSection": {"vendorName": "", "subtotal": 0.0},
+            "billingDetails": {"invoiceNumber": "", "invoiceDate": "", "dueDate": ""},
+            "chargesSummary": {"document_total": 0.0, "secondary_tax": 0.0, "lineItemsBreakdown": []},
+            "status": "conversion_error"
+        }
+def test_conversion():
+    """Test the conversion function with actual vision model response"""
+    # This is the actual response from our test
+    vision_response = '''```json
+{
+  "InvoiceDate": "09/16/24",
+  "InvoiceNumber": "50482291",
+  "AccountNumber": "203258",
+  "InvoiceTotal": "197.79",
+  "Order": [
+    {
+      "Date": "09/16/24",
+      "OrderNumber": "923785",
+      "QuantityDelivered": "6.0",
+      "Description": "331L CYL AL/BOUT 331L",
+      "UnitPrice": "22.174",
+      "ExtendedTotal": "133.04"
+    },
+    {
+      "Description": "CARBURN TAX",
+      "ExtendedTotal": "11.94"
+    },
+    {
+      "Description": "FUEL CHARGE",
+      "ExtendedTotal": "22.06"
+    },
+    {
+      "Description": "DANG - HST / F/P/ PRAIS MAT.",
+      "ExtendedTotal": "4.95"
+    },
+    {
+      "Description": "GST - HST / TPS - TVH",
+      "ExtendedTotal": "25.80"
+    }
+  ]
+}
+```'''
+    print("🧪 Testing Vision Response Conversion")
+    print("=" * 50)
+    print(f"\n📄 Original Vision Response:")
+    print(vision_response)
+    print(f"\n🔄 Converting to extracted_data format...")
+    extracted_data = convert_vision_response_to_extracted_data(vision_response)
+    print(f"\n✅ Converted Data:")
+    print(json.dumps(extracted_data, indent=2))
+    # Verify the conversion worked
+    vendor = extracted_data["headerSection"]["vendorName"]
+    invoice_num = extracted_data["billingDetails"]["invoiceNumber"]
+    amount = extracted_data["chargesSummary"]["document_total"]
+    print(f"\n🎯 Key Fields:")
+    print(f"   Vendor: '{vendor}'")
+    print(f"   Invoice #: '{invoice_num}'")
+    print(f"   Amount: ${amount}")
+    if invoice_num == "50482291" and amount == 197.79:
+        print("✅ Conversion successful!")
+    else:
+        print("❌ Conversion failed!")
+if __name__ == "__main__":
+    test_conversion()

memra/demos/etl_invoice_processing/test_debug.py ADDED Viewed

@@ -0,0 +1,41 @@
+import os
+import requests
+import json
+api_url = "https://api.memra.co"
+api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
+schema = [
+    {"column_name": "vendor_name", "data_type": "character varying"}
+]
+resp = requests.post(
+    f"{api_url}/tools/execute",
+    json={
+        "tool_name": "PDFProcessor",
+        "hosted_by": "memra",
+        "input_data": {
+            "file": "/uploads/6f4538c0-8fce-4488-be49-1a78afc58a4a.pdf",
+            "schema": schema
+        }
+    },
+    headers={"X-API-Key": api_key}
+)
+print(f"Status: {resp.status_code}")
+if resp.status_code == 200:
+    result = resp.json()
+    print(f"Success: {result.get('success')}")
+    print(f"Keys: {list(result.keys())}")
+    if 'data' in result:
+        print(f"Data keys: {list(result['data'].keys())}")
+        if 'data' in result['data']:
+            inner_data = result['data']['data']
+            print(f"Inner data keys: {list(inner_data.keys())}")
+            if 'vision_prompt' in inner_data:
+                prompt = inner_data['vision_prompt']
+                print(f"\nPrompt length: {len(prompt)}")
+                print("Prompt preview:")
+                print(prompt[:200])
+else:
+    print(f"Error: {resp.text}")

memra/demos/etl_invoice_processing/test_direct_vision.py ADDED Viewed

@@ -0,0 +1,114 @@
+import os
+import requests
+import json
+import base64
+# Upload a test PDF and process it
+api_url = "https://api.memra.co"
+api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
+# First, upload a PDF file
+pdf_path = "data/invoices/10352259401.PDF"  # Superior Propane invoice
+# Read and encode the PDF
+with open(pdf_path, 'rb') as f:
+    pdf_content = f.read()
+pdf_b64 = base64.b64encode(pdf_content).decode('utf-8')
+# Upload the file
+print("📤 Uploading PDF file...")
+upload_resp = requests.post(
+    f"{api_url}/upload",
+    json={
+        "filename": os.path.basename(pdf_path),
+        "content": pdf_b64,
+        "content_type": "application/pdf"
+    },
+    headers={"X-API-Key": api_key}
+)
+if upload_resp.status_code != 200:
+    print(f"Upload failed: {upload_resp.text}")
+    exit(1)
+upload_result = upload_resp.json()
+remote_path = upload_result["data"]["remote_path"]
+print(f"✅ Uploaded to: {remote_path}")
+# Now process with PDFProcessor
+print("\n🔍 Processing with PDFProcessor...")
+schema = [
+    {"column_name": "vendor_name", "data_type": "character varying"},
+    {"column_name": "invoice_number", "data_type": "character varying"},
+    {"column_name": "invoice_date", "data_type": "date"},
+    {"column_name": "due_date", "data_type": "date"},
+    {"column_name": "total_amount", "data_type": "numeric"},
+    {"column_name": "tax_amount", "data_type": "numeric"},
+    {"column_name": "line_items", "data_type": "jsonb"}
+]
+process_resp = requests.post(
+    f"{api_url}/tools/execute",
+    json={
+        "tool_name": "PDFProcessor",
+        "hosted_by": "memra",
+        "input_data": {
+            "file": remote_path,
+            "schema": schema
+        }
+    },
+    headers={"X-API-Key": api_key}
+)
+if process_resp.status_code == 200:
+    result = process_resp.json()
+    if result.get('success') and 'data' in result and 'data' in result['data']:
+        inner_data = result['data']['data']
+        print("\n=== VISION MODEL RAW RESPONSE ===")
+        if 'vision_response' in inner_data:
+            vision_resp = inner_data['vision_response']
+            print(f"Raw response: {vision_resp[:200]}...")
+            try:
+                vision_data = json.loads(vision_resp)
+                print("\nVision model extracted:")
+                for key, value in vision_data.items():
+                    print(f"  {key}: {value}")
+            except Exception as e:
+                print(f"Could not parse vision response: {e}")
+        print("\n=== VISION PROMPT USED ===")
+        if 'vision_prompt' in inner_data:
+            print(inner_data['vision_prompt'][:500] + "...")
+        print("\n=== TRANSFORMED DATA (MCP Format) ===")
+        if 'extracted_data' in inner_data:
+            extracted = inner_data['extracted_data']
+            header = extracted.get('headerSection', {})
+            billing = extracted.get('billingDetails', {})
+            charges = extracted.get('chargesSummary', {})
+            print(f"Vendor: {header.get('vendorName', 'MISSING')}")
+            print(f"Invoice Number: {billing.get('invoiceNumber', 'MISSING')}")
+            print(f"Invoice Date: {billing.get('invoiceDate', 'MISSING')}")
+            print(f"Due Date: {billing.get('dueDate', 'MISSING')}")
+            print(f"Total Amount: ${charges.get('document_total', 'MISSING')}")
+            print(f"Tax Amount: ${charges.get('secondary_tax', 'MISSING')}")
+            print("\n=== ANALYSIS ===")
+            missing_fields = []
+            if not billing.get('dueDate'):
+                missing_fields.append('due_date')
+            if not charges.get('document_total'):
+                missing_fields.append('total_amount')
+            if not charges.get('secondary_tax'):
+                missing_fields.append('tax_amount')
+            if missing_fields:
+                print(f"❌ Missing fields: {', '.join(missing_fields)}")
+            else:
+                print("✅ All fields extracted successfully!")
+else:
+    print(f"Processing failed: {process_resp.text}")

memra/demos/etl_invoice_processing/test_full_response.py ADDED Viewed

@@ -0,0 +1,22 @@
+import os
+import requests
+import json
+api_url = "https://api.memra.co"
+api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
+resp = requests.post(
+    f"{api_url}/tools/execute",
+    json={
+        "tool_name": "PDFProcessor",
+        "hosted_by": "memra",
+        "input_data": {
+            "file": "/uploads/6f4538c0-8fce-4488-be49-1a78afc58a4a.pdf",
+            "schema": [{"column_name": "vendor_name", "data_type": "character varying"}]
+        }
+    },
+    headers={"X-API-Key": api_key}
+)
+print("Full response:")
+print(json.dumps(resp.json(), indent=2))

memra/demos/etl_invoice_processing/test_memra_response.py ADDED Viewed

@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+"""
+Test script to see what the memra library receives from PDFProcessor
+"""
+import os
+import sys
+from pathlib import Path
+# Set API key
+os.environ['MEMRA_API_KEY'] = 'test-secret-for-development'
+os.environ['MEMRA_API_URL'] = 'https://api.memra.co'
+# Add the parent directory to the path so we can import memra
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from memra import ExecutionEngine, Agent, LLM
+import base64
+import requests
+def test_memra_pdf_processor():
+    """Test what the memra library receives from PDFProcessor"""
+    print("🧪 Testing Memra Library PDFProcessor Response")
+    print("=" * 50)
+    # Create a simple agent to test PDFProcessor
+    test_llm = LLM(
+        model="llama-3.2-11b-vision-preview",
+        temperature=0.1,
+        max_tokens=2000
+    )
+    test_agent = Agent(
+        role="Test Parser",
+        job="Test PDFProcessor response",
+        llm=test_llm,
+        tools=[
+            {"name": "PDFProcessor", "hosted_by": "memra"}
+        ],
+        input_keys=["file"],
+        output_key="test_result"
+    )
+    # First upload a file
+    print("\n1️⃣ Uploading file...")
+    pdf_path = "data/invoices/10352260169.PDF"
+    with open(pdf_path, 'rb') as f:
+        file_content = f.read()
+    file_b64 = base64.b64encode(file_content).decode('utf-8')
+    upload_data = {
+        "filename": os.path.basename(pdf_path),
+        "content": file_b64,
+        "content_type": "application/pdf"
+    }
+    response = requests.post(
+        "https://api.memra.co/upload",
+        json=upload_data,
+        headers={
+            "X-API-Key": "test-secret-for-development",
+            "Content-Type": "application/json"
+        }
+    )
+    if response.status_code != 200:
+        print(f"❌ Upload failed: {response.status_code}")
+        return
+    result = response.json()
+    if not result.get("success"):
+        print(f"❌ Upload failed: {result.get('error')}")
+        return
+    remote_path = result["data"]["remote_path"]
+    print(f"✅ File uploaded: {remote_path}")
+    # Now test with memra library
+    print("\n2️⃣ Testing with memra library...")
+    engine = ExecutionEngine()
+    input_data = {
+        "file": remote_path
+    }
+    result = engine.execute_agent(test_agent, input_data)
+    print(f"\n📄 Memra Library Result:")
+    print(f"Success: {result.get('success')}")
+    print(f"Error: {result.get('error')}")
+    if result.get('success'):
+        test_result = result.get('result', {})
+        print(f"\n🔍 Test Result Keys: {list(test_result.keys())}")
+        # Check if there are tool results
+        if hasattr(result, 'trace') and result.trace and hasattr(result.trace, 'tool_results'):
+            tool_results = result.trace.tool_results
+            print(f"\n🔧 Tool Results:")
+            for tool_name, tool_result in tool_results.items():
+                print(f"\n📊 Tool: {tool_name}")
+                print(f"Success: {tool_result.get('success')}")
+                print(f"Data Keys: {list(tool_result.get('data', {}).keys())}")
+                data = tool_result.get('data', {})
+                if 'data' in data:
+                    inner_data = data['data']
+                    print(f"Inner Data Keys: {list(inner_data.keys())}")
+                    if 'vision_response' in inner_data:
+                        print(f"✅ Vision Response Found!")
+                        vision_response = inner_data['vision_response']
+                        print(f"Vision Response (first 200 chars): {vision_response[:200]}...")
+                    if 'extracted_data' in inner_data:
+                        print(f"✅ Extracted Data Found!")
+                        extracted_data = inner_data['extracted_data']
+                        print(f"Extracted Data: {extracted_data}")
+if __name__ == "__main__":
+    test_memra_pdf_processor()

memra/demos/etl_invoice_processing/test_pdf_processor_response.py ADDED Viewed

@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+"""
+Test script to see what PDFProcessor actually returns
+"""
+import os
+import base64
+import requests
+import json
+def test_pdf_processor_response():
+    """Test the PDFProcessor response structure"""
+    # Set API key
+    os.environ['MEMRA_API_KEY'] = 'test-secret-for-development'
+    API_BASE = "https://api.memra.co"
+    API_KEY = "test-secret-for-development"
+    print("🧪 Testing PDFProcessor Response Structure")
+    print("=" * 50)
+    # Step 1: Upload a file
+    print("\n1️⃣ Uploading file...")
+    pdf_path = "data/invoices/10352260169.PDF"
+    with open(pdf_path, 'rb') as f:
+        file_content = f.read()
+    file_b64 = base64.b64encode(file_content).decode('utf-8')
+    upload_data = {
+        "filename": os.path.basename(pdf_path),
+        "content": file_b64,
+        "content_type": "application/pdf"
+    }
+    response = requests.post(
+        f"{API_BASE}/upload",
+        json=upload_data,
+        headers={
+            "X-API-Key": API_KEY,
+            "Content-Type": "application/json"
+        }
+    )
+    if response.status_code != 200:
+        print(f"❌ Upload failed: {response.status_code}")
+        return
+    result = response.json()
+    if not result.get("success"):
+        print(f"❌ Upload failed: {result.get('error')}")
+        return
+    remote_path = result["data"]["remote_path"]
+    print(f"✅ File uploaded: {remote_path}")
+    # Step 2: Call PDFProcessor
+    print("\n2️⃣ Calling PDFProcessor...")
+    process_data = {
+        "tool_name": "PDFProcessor",
+        "hosted_by": "memra",
+        "input_data": {
+            "file": remote_path
+        }
+    }
+    response = requests.post(
+        f"{API_BASE}/tools/execute",
+        json=process_data,
+        headers={
+            "X-API-Key": API_KEY,
+            "Content-Type": "application/json"
+        }
+    )
+    print(f"Response status: {response.status_code}")
+    if response.status_code == 200:
+        result = response.json()
+        print(f"\n📄 Full Response Structure:")
+        print(json.dumps(result, indent=2))
+        if result.get("success"):
+            data = result.get("data", {})
+            print(f"\n🔍 Data Keys: {list(data.keys())}")
+            # Check for vision_response
+            if "vision_response" in data:
+                print(f"\n🎯 Vision Response Found!")
+                vision_response = data["vision_response"]
+                print(f"Vision Response (first 200 chars): {vision_response[:200]}...")
+                # Try to parse as JSON
+                try:
+                    if vision_response.startswith("```json"):
+                        vision_response = vision_response.replace("```json", "").replace("```", "").strip()
+                    vision_data = json.loads(vision_response)
+                    print(f"\n✅ Vision Response Parsed Successfully:")
+                    print(json.dumps(vision_data, indent=2))
+                except Exception as e:
+                    print(f"❌ Failed to parse vision response: {e}")
+            # Check for extracted_data
+            if "extracted_data" in data:
+                print(f"\n📊 Extracted Data Found:")
+                extracted_data = data["extracted_data"]
+                print(json.dumps(extracted_data, indent=2))
+            else:
+                print(f"\n❌ No extracted_data found")
+        else:
+            print(f"❌ API call failed: {result.get('error')}")
+    else:
+        print(f"❌ Request failed: {response.status_code}")
+        print(f"Response: {response.text}")
+if __name__ == "__main__":
+    test_pdf_processor_response()

memra/demos/etl_invoice_processing/test_pdfprocessor_direct.py ADDED Viewed

@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+"""
+Test PDFProcessor directly using the same mechanism as ETL workflow
+"""
+import os
+import sys
+import requests
+import json
+# Add the parent directory to the path to import memra modules
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+from memra.execution import ExecutionEngine
+from memra.models import Agent, ExecutionTrace
+# Set environment variables
+os.environ["MEMRA_API_URL"] = "https://api.memra.co"
+os.environ["MEMRA_API_KEY"] = "test-secret-for-development"
+def test_pdfprocessor_direct():
+    """Test PDFProcessor directly"""
+    print("🧪 Testing PDFProcessor Direct Call")
+    print("=" * 50)
+    # Create a simple agent that just calls PDFProcessor
+    test_agent = Agent(
+        role="Test Agent",
+        job="Test PDFProcessor directly",
+        llm=None,  # No LLM needed for direct tool call
+        tools=[
+            {"name": "PDFProcessor", "hosted_by": "memra"}
+        ],
+        input_keys=["file"],
+        output_key="result"
+    )
+    # Create execution engine
+    engine = ExecutionEngine()
+    # Test with the same file path that the ETL workflow uses
+    file_path = "/uploads/bd4b5a42-ff4b-4659-b050-e1f2d59f521a.PDF"  # From the upload test
+    print(f"📄 Testing with file: {file_path}")
+    # Prepare context and trace
+    context = {
+        "input": {"file": file_path},
+        "department_context": {},
+        "results": {}
+    }
+    trace = ExecutionTrace()
+    # Execute the agent
+    result = engine._execute_agent(test_agent, context, trace)
+    print(f"\n📊 Result:")
+    print(f"Success: {result.success}")
+    print(f"Error: {result.error}")
+    if result.success and result.data:
+        print(f"\n📄 Data keys: {list(result.data.keys())}")
+        if 'result' in result.data:
+            tool_result = result.data['result']
+            print(f"\n🔧 Tool Result:")
+            print(f"Success: {tool_result.get('success')}")
+            print(f"Error: {tool_result.get('error')}")
+            if 'data' in tool_result:
+                data = tool_result['data']
+                print(f"Data keys: {list(data.keys())}")
+                if 'vision_response' in data:
+                    print(f"\n📝 Vision Response found!")
+                    print(f"Length: {len(data['vision_response'])} characters")
+                    try:
+                        parsed = json.loads(data['vision_response'].replace('```json','').replace('```','').strip())
+                        print("✅ Valid JSON response:")
+                        print(json.dumps(parsed, indent=2))
+                    except Exception as e:
+                        print(f"❌ JSON parsing error: {e}")
+                        print(f"Raw response: {data['vision_response'][:500]}...")
+                if 'extracted_data' in data:
+                    print(f"\n🎯 Extracted Data found!")
+                    print(json.dumps(data['extracted_data'], indent=2))
+            else:
+                print("❌ No 'data' field in tool result")
+        else:
+            print("❌ No 'result' field in agent output")
+    else:
+        print(f"❌ Agent execution failed: {result.error}")
+if __name__ == "__main__":
+    test_pdfprocessor_direct()

memra 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl

memra 0.2.12py3-none-any.whl → 0.2.13py3-none-any.whl