PyPI - memra - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl - Mend

memra 0.2.12py3-none-any.whl → 0.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

memra/cli.py CHANGED Viewed

@@ -79,15 +79,121 @@ def setup_demo_environment():
 def extract_bundled_files(demo_dir):
     """Extract files bundled with the PyPI package"""
     try:
-        # Extract from package data
-        with pkg_resources.path('memra', 'demo_files') as demo_files_path:
-            if demo_files_path.exists():
-                # Copy all files from the bundled demo_files directory
-                shutil.copytree(demo_files_path, demo_dir, dirs_exist_ok=True)
+        import pkg_resources
+        import shutil
+        from pathlib import Path
+        # Extract demo files from package data
+        demo_dir.mkdir(exist_ok=True)
+        # Copy the main ETL demo script
+        try:
+            demo_script = pkg_resources.resource_filename('memra', 'demos/etl_invoice_processing/etl_invoice_demo.py')
+            if Path(demo_script).exists():
+                shutil.copy2(demo_script, demo_dir / "etl_invoice_demo.py")
+                print("✅ Copied ETL demo script")
+            else:
+                print("⚠️  ETL demo script not found in package")
+        except Exception as e:
+            print(f"⚠️  Could not copy ETL demo script: {e}")
+        # Copy supporting Python files
+        demo_files = [
+            "database_monitor_agent.py",
+            "simple_pdf_processor.py",
+            "setup_demo_data.py"
+        ]
+        for file_name in demo_files:
+            try:
+                file_path = pkg_resources.resource_filename('memra', f'demos/etl_invoice_processing/{file_name}')
+                if Path(file_path).exists():
+                    shutil.copy2(file_path, demo_dir / file_name)
+                    print(f"✅ Copied {file_name}")
+                else:
+                    print(f"⚠️  {file_name} not found in package")
+            except Exception as e:
+                print(f"⚠️  Could not copy {file_name}: {e}")
+        # Copy sample data directory
+        try:
+            data_source = pkg_resources.resource_filename('memra', 'demos/etl_invoice_processing/data')
+            if Path(data_source).exists():
+                data_dir = demo_dir / "data"
+                shutil.copytree(data_source, data_dir, dirs_exist_ok=True)
+                print("✅ Copied sample invoice data")
             else:
-                # Fallback: create minimal demo structure
-                create_minimal_demo(demo_dir)
+                print("⚠️  Sample data not found in package")
+        except Exception as e:
+            print(f"⚠️  Could not copy sample data: {e}")
+        # Create memra-ops directory with docker-compose
+        ops_dir = demo_dir / "memra-ops"
+        ops_dir.mkdir(exist_ok=True)
+        # Create basic docker-compose.yml
+        compose_content = """version: '3.8'
+services:
+  postgres:
+    image: postgres:15
+    environment:
+      POSTGRES_DB: local_workflow
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: postgres
+    ports:
+      - "5432:5432"
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+volumes:
+  postgres_data:
+"""
+        with open(ops_dir / "docker-compose.yml", "w") as f:
+            f.write(compose_content)
+        # Create basic MCP bridge server
+        mcp_content = """#!/usr/bin/env python3
+import asyncio
+import aiohttp
+from aiohttp import web
+import json
+async def health_handler(request):
+    return web.json_response({"status": "healthy"})
+async def execute_tool_handler(request):
+    data = await request.json()
+    tool_name = data.get('tool_name', 'unknown')
+    # Mock responses for demo
+    if tool_name == 'SQLExecutor':
+        return web.json_response({
+            "success": True,
+            "results": [{"message": "Demo SQL executed"}]
+        })
+    elif tool_name == 'PostgresInsert':
+        return web.json_response({
+            "success": True,
+            "id": 1
+        })
+    else:
+        return web.json_response({
+            "success": True,
+            "message": f"Demo {tool_name} executed"
+        })
+app = web.Application()
+app.router.add_get('/health', health_handler)
+app.router.add_post('/execute_tool', execute_tool_handler)
+if __name__ == '__main__':
+    web.run_app(app, host='0.0.0.0', port=8081)
+"""
+        with open(ops_dir / "mcp_bridge_server.py", "w") as f:
+            f.write(mcp_content)
     except Exception as e:
         print(f"⚠️  Could not extract bundled files: {e}")
         print("Creating minimal demo structure...")

memra/demos/etl_invoice_processing/check_after_workflow.py ADDED Viewed

@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+"""
+Check database after workflow
+"""
+import requests
+import json
+def check_database_after():
+    """Show table contents after workflow"""
+    bridge_url = "http://localhost:8081"
+    bridge_secret = "test-secret-for-development"
+    headers = {
+        "Content-Type": "application/json",
+        "X-Bridge-Secret": bridge_secret
+    }
+    print("📊 Database contents AFTER workflow:")
+    payload = {
+        "tool_name": "SQLExecutor",
+        "input_data": {
+            "sql_query": "SELECT * FROM invoices ORDER BY id;"
+        }
+    }
+    response = requests.post(f"{bridge_url}/execute_tool", json=payload, headers=headers)
+    if response.status_code == 200:
+        result = response.json()
+        if result.get("success"):
+            data = result["data"]
+            print(f"Query: {data['query']}")
+            print(f"Total Rows: {data['row_count']}")
+            print("\nAll records:")
+            for i, row in enumerate(data['results'], 1):
+                print(f"\n{i}. ID: {row['id']}")
+                print(f"   Invoice Number: {row['invoice_number']}")
+                print(f"   Vendor: {row['vendor_name']}")
+                print(f"   Date: {row['invoice_date']}")
+                print(f"   Amount: ${row['total_amount']}")
+                print(f"   Status: {row['status']}")
+                print(f"   Created: {row['created_at']}")
+        else:
+            print(f"Error: {result.get('error')}")
+    else:
+        print(f"HTTP Error: {response.status_code}")
+if __name__ == "__main__":
+    check_database_after()

memra/demos/etl_invoice_processing/check_database.py ADDED Viewed

@@ -0,0 +1,44 @@
+import psycopg2
+import json
+try:
+    conn = psycopg2.connect(
+        host='localhost',
+        port=5432,
+        database='memra_invoice_db',
+        user='memra',
+        password='memra123'
+    )
+    cursor = conn.cursor()
+    # Get detailed records
+    cursor.execute('SELECT * FROM invoices ORDER BY created_at DESC LIMIT 5;')
+    rows = cursor.fetchall()
+    print('📄 Detailed Invoice Records:')
+    for row in rows:
+        print(f'\nID: {row[0]}')
+        print(f'  Invoice Number: {row[1]}')
+        print(f'  Vendor: {row[2]}')
+        print(f'  Invoice Date: {row[3]}')
+        print(f'  Due Date: {row[4]}')
+        print(f'  Total Amount: ${row[5]}')
+        print(f'  Tax Amount: ${row[6]}')
+        print(f'  Status: {row[8]}')
+        print(f'  Created: {row[9]}')
+        if row[7]:  # line_items
+            try:
+                line_items = json.loads(row[7])
+                print(f'  Line Items: {len(line_items)} items')
+                for i, item in enumerate(line_items[:2]):  # Show first 2 items
+                    print(f'    {i+1}. {item.get("description", "N/A")} - Qty: {item.get("quantity", "N/A")} @ ${item.get("unit_price", "N/A")}')
+            except:
+                print(f'  Line Items: {row[7]}')
+    cursor.close()
+    conn.close()
+except Exception as e:
+    print(f'❌ Database connection failed: {e}')

memra/demos/etl_invoice_processing/check_recent_db.py ADDED Viewed

@@ -0,0 +1,42 @@
+import psycopg2
+from datetime import datetime
+# Database connection
+conn = psycopg2.connect(
+    host="localhost",
+    database="local_workflow",
+    user="postgres",
+    password="postgres",
+    port=5433
+)
+try:
+    with conn.cursor() as cur:
+        # Get the 10 most recent records
+        cur.execute("""
+            SELECT id, vendor_name, invoice_number, invoice_date, due_date,
+                   total_amount, tax_amount, created_at
+            FROM invoices
+            ORDER BY id DESC
+            LIMIT 10
+        """)
+        records = cur.fetchall()
+        if records:
+            print("🔍 Most Recent Invoice Records:\n")
+            for record in records:
+                id, vendor, invoice_num, inv_date, due_date, total, tax, created = record
+                print(f"ID: {id} (Created: {created})")
+                print(f"  Vendor: {vendor}")
+                print(f"  Invoice #: {invoice_num}")
+                print(f"  Invoice Date: {inv_date}")
+                print(f"  Due Date: {due_date}")
+                print(f"  Total: ${total:.2f}")
+                print(f"  Tax: ${tax:.2f}")
+                print("-" * 40)
+        else:
+            print("No records found in database")
+finally:
+    conn.close()

memra/demos/etl_invoice_processing/data/README.md ADDED Viewed

@@ -0,0 +1,112 @@
+# Demo Data Files
+This directory contains sample data files for the ETL Invoice Processing demo.
+## 📁 Directory Structure
+```
+data/
+├── invoices/           # PDF invoice files for processing
+│   ├── invoice_001.pdf
+│   ├── invoice_002.pdf
+│   └── ...
+└── README.md          # This file
+```
+## 📄 Invoice Files
+The `invoices/` directory contains sample PDF invoice files that demonstrate:
+- **Various invoice formats** from different vendors
+- **Different data structures** (line items, totals, tax calculations)
+- **Real-world scenarios** (missing data, edge cases)
+- **Multiple currencies** and payment terms
+## 🚀 Using the Demo Data
+### Option 1: Auto-Discovery (Recommended)
+The demo will automatically scan the `invoices/` directory:
+```python
+# The demo will find and process all PDF files
+python etl_invoice_demo.py
+```
+### Option 2: Specific File Processing
+Process a specific invoice file:
+```python
+# Update the demo to process a specific file
+input_data = {
+    "invoice_file": "data/invoices/invoice_001.pdf",
+    "connection": "postgresql://memra:memra123@localhost:5432/memra_invoice_db"
+}
+```
+### Option 3: External File Processing
+Copy files from external locations:
+```python
+# The demo can copy files from Downloads or other locations
+input_data = {
+    "source_path": "~/Downloads/new_invoice.pdf",
+    "connection": "postgresql://memra:memra123@localhost:5432/memra_invoice_db"
+}
+```
+## 📊 Expected Data Structure
+Each invoice file should contain:
+- **Vendor Information**: Company name, address, contact details
+- **Invoice Details**: Invoice number, date, due date
+- **Line Items**: Description, quantity, unit price, total
+- **Totals**: Subtotal, tax, shipping, grand total
+- **Payment Terms**: Due date, payment methods
+## 🔧 Customizing the Data
+### Adding New Invoice Files
+1. Place new PDF files in the `invoices/` directory
+2. Ensure they follow the expected invoice format
+3. Test with the demo to verify processing
+### Modifying Existing Files
+- Files are processed using AI vision models
+- No specific format requirements
+- The system adapts to different invoice layouts
+## 📈 Demo Scenarios
+The included files demonstrate:
+| Scenario | Description |
+|----------|-------------|
+| **Standard Invoice** | Typical business invoice with line items |
+| **Complex Invoice** | Multiple pages, detailed line items |
+| **Simple Invoice** | Basic invoice with minimal details |
+| **International** | Different currencies and formats |
+| **Edge Cases** | Missing data, unusual formats |
+## 🚨 Important Notes
+- **File Size**: Each file is approximately 1MB
+- **Total Size**: ~20MB for all demo files
+- **Git LFS**: Not required for these file sizes
+- **Version Control**: Files are tracked in Git for demo consistency
+## 🔄 Updating Demo Data
+When adding new invoice files:
+1. **Test locally** first
+2. **Verify processing** with the demo
+3. **Update this README** if adding new scenarios
+4. **Commit changes** with descriptive messages
+## 📚 Related Documentation
+- [ETL Demo Guide](../README.md)
+- [Database Schema](../../../docs/database_schema.sql)
+- [Sample Data](../../../docs/sample_data.sql)
+- [Quick Start Guide](../../../QUICK_START.md)

memra/demos/etl_invoice_processing/data/invoices/10352259401.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352259823.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352260169.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352260417.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352260599.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352260912.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352261134.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352261563.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352261647.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352261720.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352261811.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352262025.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352262454.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352262702.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352262884.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352263346.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/data/invoices/10352263429.PDF ADDED Viewed

Binary file

memra/demos/etl_invoice_processing/database_monitor_agent.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""
+Database Monitor Agent
+Monitors database state before and after ETL processes
+"""
+from memra import Agent, LLM
+def create_database_monitor_agent():
+    """Create a database monitoring agent"""
+    monitor_llm = LLM(
+        model="llama-3.2-11b-vision-preview",
+        temperature=0.1,
+        max_tokens=1000
+    )
+    monitor_agent = Agent(
+        role="Database Monitor",
+        job="Monitor database state and validate data integrity",
+        llm=monitor_llm,
+        sops=[
+            "Connect to database using provided credentials",
+            "Execute monitoring queries to count rows and validate data",
+            "Generate comprehensive monitoring report with statistics",
+            "Flag any data integrity issues or anomalies",
+            "Return structured monitoring results"
+        ],
+        systems=["Database"],
+        tools=[
+            {"name": "SQLExecutor", "hosted_by": "mcp"}
+        ],
+        input_keys=["table_name", "connection", "monitoring_phase"],
+        output_key="monitoring_report"
+    )
+    return monitor_agent
+def get_monitoring_queries(table_name: str, phase: str):
+    """Get appropriate SQL queries for monitoring phase"""
+    queries = {
+        "before": [
+            f"SELECT COUNT(*) as row_count FROM {table_name}",
+            f"SELECT COUNT(*) as null_vendor_count FROM {table_name} WHERE vendor_name IS NULL",
+            f"SELECT COUNT(*) as null_invoice_count FROM {table_name} WHERE invoice_number IS NULL",
+            f"SELECT COUNT(*) as null_amount_count FROM {table_name} WHERE total_amount IS NULL"
+        ],
+        "after": [
+            f"SELECT COUNT(*) as row_count FROM {table_name}",
+            f"SELECT COUNT(*) as null_vendor_count FROM {table_name} WHERE vendor_name IS NULL",
+            f"SELECT COUNT(*) as null_invoice_count FROM {table_name} WHERE invoice_number IS NULL",
+            f"SELECT COUNT(*) as null_amount_count FROM {table_name} WHERE total_amount IS NULL",
+            f"SELECT COUNT(*) as duplicate_invoices FROM (SELECT invoice_number, COUNT(*) as cnt FROM {table_name} GROUP BY invoice_number HAVING COUNT(*) > 1) as dups",
+            f"SELECT MIN(total_amount) as min_amount, MAX(total_amount) as max_amount, AVG(total_amount) as avg_amount FROM {table_name}",
+            f"SELECT COUNT(*) as recent_records FROM {table_name} WHERE created_at >= NOW() - INTERVAL '1 hour'"
+        ]
+    }
+    return queries.get(phase, queries["after"])
+def create_simple_monitor_agent():
+    """Create a simple database monitoring agent that works with the framework"""
+    monitor_llm = LLM(
+        model="llama-3.2-11b-vision-preview",
+        temperature=0.1,
+        max_tokens=1500
+    )
+    monitor_agent = Agent(
+        role="Database Monitor",
+        job="Monitor database state and validate data integrity",
+        llm=monitor_llm,
+        sops=[
+            "Connect to database using provided credentials",
+            "Execute monitoring query using sql_query input",
+            "Generate monitoring report with current statistics",
+            "Flag any data integrity issues",
+            "Return structured monitoring results"
+        ],
+        systems=["Database"],
+        tools=[
+            {"name": "SQLExecutor", "hosted_by": "mcp", "input_keys": ["sql_query"]}
+        ],
+        input_keys=["table_name", "connection", "monitoring_phase", "sql_query"],
+        output_key="monitoring_report"
+    )
+    return monitor_agent

memra/demos/etl_invoice_processing/debug_mcp.py ADDED Viewed

@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+"""
+Debug script to test MCP bridge connection
+"""
+import httpx
+import json
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def test_mcp_bridge():
+    """Test MCP bridge connection using httpx"""
+    bridge_url = "http://localhost:8081"
+    bridge_secret = "test-secret-for-development"
+    # Prepare request
+    payload = {
+        "tool_name": "SQLExecutor",
+        "input_data": {
+            "sql_query": "SELECT COUNT(*) as row_count FROM invoices"
+        }
+    }
+    headers = {
+        "Content-Type": "application/json",
+        "X-Bridge-Secret": bridge_secret
+    }
+    logger.info(f"Testing MCP bridge at {bridge_url}")
+    logger.info(f"Payload: {json.dumps(payload, indent=2)}")
+    logger.info(f"Headers: {headers}")
+    try:
+        with httpx.Client(timeout=60.0) as client:
+            logger.info("Making HTTP request...")
+            response = client.post(f"{bridge_url}/execute_tool", json=payload, headers=headers)
+            logger.info(f"Response status: {response.status_code}")
+            logger.info(f"Response headers: {dict(response.headers)}")
+            if response.status_code == 200:
+                result = response.json()
+                logger.info(f"Success! Result: {json.dumps(result, indent=2)}")
+                return True
+            else:
+                logger.error(f"HTTP error: {response.status_code}")
+                logger.error(f"Response text: {response.text}")
+                return False
+    except httpx.TimeoutException:
+        logger.error("Request timed out")
+        return False
+    except httpx.HTTPStatusError as e:
+        logger.error(f"HTTP error: {e.response.status_code} - {e.response.text}")
+        return False
+    except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        return False
+if __name__ == "__main__":
+    success = test_mcp_bridge()
+    print(f"\n{'✅ SUCCESS' if success else '❌ FAILED'}")

memra/demos/etl_invoice_processing/debug_schema.py ADDED Viewed

@@ -0,0 +1,45 @@
+import os
+import requests
+import json
+api_url = "https://api.memra.co"
+api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
+# Test the exact format we're sending
+schema = {
+    "columns": [
+        {"name": "invoice_number", "type": "character varying"},
+        {"name": "vendor_name", "type": "character varying"},
+        {"name": "invoice_date", "type": "date"},
+        {"name": "total_amount", "type": "numeric"}
+    ]
+}
+print("Schema being sent:")
+print(json.dumps(schema, indent=2))
+resp = requests.post(
+    f"{api_url}/tools/execute",
+    json={
+        "tool_name": "PDFProcessor",
+        "hosted_by": "memra",
+        "input_data": {
+            "file": "/uploads/6f4538c0-8fce-4488-be49-1a78afc58a4a.pdf",
+            "schema": schema
+        }
+    },
+    headers={"X-API-Key": api_key}
+)
+print(f"\nResponse status: {resp.status_code}")
+if resp.status_code == 200:
+    result = resp.json()
+    if result.get('success') and 'data' in result and 'data' in result['data']:
+        data = result['data']['data']
+        print(f"\nVision prompt length: {len(data.get('vision_prompt', ''))}")
+        print("Vision prompt preview:")
+        print(data.get('vision_prompt', '')[:500])
+        # Check if vendor was mentioned in response
+        vision_resp = data.get('vision_response', '')
+        print(f"\nVision response includes 'vendor': {'vendor' in vision_resp.lower()}")

memra 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl

memra 0.2.12py3-none-any.whl → 0.2.13py3-none-any.whl