memra 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. memra/cli.py +114 -8
  2. memra/demos/etl_invoice_processing/check_after_workflow.py +50 -0
  3. memra/demos/etl_invoice_processing/check_database.py +44 -0
  4. memra/demos/etl_invoice_processing/check_recent_db.py +42 -0
  5. memra/demos/etl_invoice_processing/data/README.md +112 -0
  6. memra/demos/etl_invoice_processing/data/invoices/10352259401.PDF +0 -0
  7. memra/demos/etl_invoice_processing/data/invoices/10352259823.PDF +0 -0
  8. memra/demos/etl_invoice_processing/data/invoices/10352260169.PDF +0 -0
  9. memra/demos/etl_invoice_processing/data/invoices/10352260417.PDF +0 -0
  10. memra/demos/etl_invoice_processing/data/invoices/10352260599.PDF +0 -0
  11. memra/demos/etl_invoice_processing/data/invoices/10352260912.PDF +0 -0
  12. memra/demos/etl_invoice_processing/data/invoices/10352261134.PDF +0 -0
  13. memra/demos/etl_invoice_processing/data/invoices/10352261563.PDF +0 -0
  14. memra/demos/etl_invoice_processing/data/invoices/10352261647.PDF +0 -0
  15. memra/demos/etl_invoice_processing/data/invoices/10352261720.PDF +0 -0
  16. memra/demos/etl_invoice_processing/data/invoices/10352261811.PDF +0 -0
  17. memra/demos/etl_invoice_processing/data/invoices/10352262025.PDF +0 -0
  18. memra/demos/etl_invoice_processing/data/invoices/10352262454.PDF +0 -0
  19. memra/demos/etl_invoice_processing/data/invoices/10352262702.PDF +0 -0
  20. memra/demos/etl_invoice_processing/data/invoices/10352262884.PDF +0 -0
  21. memra/demos/etl_invoice_processing/data/invoices/10352263346.PDF +0 -0
  22. memra/demos/etl_invoice_processing/data/invoices/10352263429.PDF +0 -0
  23. memra/demos/etl_invoice_processing/database_monitor_agent.py +89 -0
  24. memra/demos/etl_invoice_processing/debug_mcp.py +66 -0
  25. memra/demos/etl_invoice_processing/debug_schema.py +45 -0
  26. memra/demos/etl_invoice_processing/etl_invoice_demo.py +1233 -0
  27. memra/demos/etl_invoice_processing/modify_database.py +65 -0
  28. memra/demos/etl_invoice_processing/run_etl_batch.py +60 -0
  29. memra/demos/etl_invoice_processing/setup_demo_data.py +154 -0
  30. memra/demos/etl_invoice_processing/simple_pdf_processor.py +181 -0
  31. memra/demos/etl_invoice_processing/test_agent3.py +56 -0
  32. memra/demos/etl_invoice_processing/test_agent3_v2.py +32 -0
  33. memra/demos/etl_invoice_processing/test_api.py +28 -0
  34. memra/demos/etl_invoice_processing/test_api_client_direct.py +89 -0
  35. memra/demos/etl_invoice_processing/test_conversion.py +172 -0
  36. memra/demos/etl_invoice_processing/test_debug.py +41 -0
  37. memra/demos/etl_invoice_processing/test_direct_vision.py +114 -0
  38. memra/demos/etl_invoice_processing/test_full_response.py +22 -0
  39. memra/demos/etl_invoice_processing/test_memra_response.py +124 -0
  40. memra/demos/etl_invoice_processing/test_pdf_processor_response.py +118 -0
  41. memra/demos/etl_invoice_processing/test_pdfprocessor_direct.py +96 -0
  42. memra/demos/etl_invoice_processing/test_postgres_insert.py +120 -0
  43. memra/demos/etl_invoice_processing/test_remote_upload.py +143 -0
  44. memra/demos/etl_invoice_processing/test_schema_format.py +39 -0
  45. memra/demos/etl_invoice_processing/test_sql_executor.py +58 -0
  46. memra/demos/etl_invoice_processing/test_sql_executor_extra_fields.py +61 -0
  47. memra/demos/etl_invoice_processing/test_sql_executor_fix.py +40 -0
  48. memra/demos/etl_invoice_processing/test_updated_server.py +50 -0
  49. memra/demos/etl_invoice_processing/test_upload_functionality.py +156 -0
  50. memra/demos/etl_invoice_processing/test_upload_server.py +232 -0
  51. memra/demos/etl_invoice_processing/test_vision_output.py +75 -0
  52. memra/demos/etl_invoice_processing/test_vision_prompt.py +43 -0
  53. memra/demos/etl_invoice_processing/test_vision_simple.py +60 -0
  54. {memra-0.2.12.dist-info → memra-0.2.13.dist-info}/METADATA +53 -78
  55. memra-0.2.13.dist-info/RECORD +120 -0
  56. {memra-0.2.12.dist-info → memra-0.2.13.dist-info}/WHEEL +1 -1
  57. memra-0.2.12.dist-info/RECORD +0 -68
  58. {memra-0.2.12.dist-info/licenses → memra-0.2.13.dist-info}/LICENSE +0 -0
  59. {memra-0.2.12.dist-info → memra-0.2.13.dist-info}/entry_points.txt +0 -0
  60. {memra-0.2.12.dist-info → memra-0.2.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,172 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify vision response conversion
4
+ """
5
+
6
+ import json
7
+
8
+ def convert_vision_response_to_extracted_data(vision_response: str) -> dict:
9
+ """Convert vision model response to extracted_data format"""
10
+ try:
11
+ # Clean up the response - remove markdown code blocks if present
12
+ if vision_response.startswith("```json"):
13
+ vision_response = vision_response.replace("```json", "").replace("```", "").strip()
14
+
15
+ # Parse the JSON response
16
+ data = json.loads(vision_response)
17
+
18
+ # Extract fields with fallback to different naming conventions
19
+ invoice_number = (
20
+ data.get("invoice_number") or
21
+ data.get("InvoiceNumber") or
22
+ data.get("invoiceNumber") or
23
+ ""
24
+ )
25
+
26
+ invoice_date = (
27
+ data.get("invoice_date") or
28
+ data.get("InvoiceDate") or
29
+ data.get("invoiceDate") or
30
+ ""
31
+ )
32
+
33
+ # Convert date format if needed
34
+ if invoice_date:
35
+ if "/" in invoice_date and len(invoice_date.split("/")) == 3:
36
+ parts = invoice_date.split("/")
37
+ month, day, year = parts[0], parts[1], parts[2]
38
+ if len(year) == 2:
39
+ year = "20" + year
40
+ invoice_date = f"{year}-{month.zfill(2)}-{day.zfill(2)}"
41
+
42
+ amount = (
43
+ data.get("amount") or
44
+ data.get("InvoiceTotal") or
45
+ data.get("invoiceTotal") or
46
+ data.get("total") or
47
+ 0
48
+ )
49
+
50
+ vendor_name = (
51
+ data.get("vendor_name") or
52
+ data.get("VendorName") or
53
+ data.get("vendorName") or
54
+ data.get("Company") or
55
+ data.get("company") or
56
+ ""
57
+ )
58
+
59
+ tax_amount = (
60
+ data.get("tax_amount") or
61
+ data.get("TaxAmount") or
62
+ data.get("taxAmount") or
63
+ 0
64
+ )
65
+
66
+ line_items = (
67
+ data.get("line_items") or
68
+ data.get("Order") or
69
+ data.get("order") or
70
+ data.get("LineItems") or
71
+ data.get("lineItems") or
72
+ []
73
+ )
74
+
75
+ # Convert to expected format
76
+ extracted_data = {
77
+ "headerSection": {
78
+ "vendorName": vendor_name,
79
+ "subtotal": float(amount)
80
+ },
81
+ "billingDetails": {
82
+ "invoiceNumber": invoice_number,
83
+ "invoiceDate": invoice_date,
84
+ "dueDate": ""
85
+ },
86
+ "chargesSummary": {
87
+ "document_total": float(amount),
88
+ "secondary_tax": float(tax_amount),
89
+ "lineItemsBreakdown": line_items
90
+ },
91
+ "status": "processed"
92
+ }
93
+
94
+ return extracted_data
95
+
96
+ except Exception as e:
97
+ print(f"⚠️ Error converting vision response: {e}")
98
+ return {
99
+ "headerSection": {"vendorName": "", "subtotal": 0.0},
100
+ "billingDetails": {"invoiceNumber": "", "invoiceDate": "", "dueDate": ""},
101
+ "chargesSummary": {"document_total": 0.0, "secondary_tax": 0.0, "lineItemsBreakdown": []},
102
+ "status": "conversion_error"
103
+ }
104
+
105
+ def test_conversion():
106
+ """Test the conversion function with actual vision model response"""
107
+
108
+ # This is the actual response from our test
109
+ vision_response = '''```json
110
+ {
111
+ "InvoiceDate": "09/16/24",
112
+ "InvoiceNumber": "50482291",
113
+ "AccountNumber": "203258",
114
+ "InvoiceTotal": "197.79",
115
+ "Order": [
116
+ {
117
+ "Date": "09/16/24",
118
+ "OrderNumber": "923785",
119
+ "QuantityDelivered": "6.0",
120
+ "Description": "331L CYL AL/BOUT 331L",
121
+ "UnitPrice": "22.174",
122
+ "ExtendedTotal": "133.04"
123
+ },
124
+ {
125
+ "Description": "CARBURN TAX",
126
+ "ExtendedTotal": "11.94"
127
+ },
128
+ {
129
+ "Description": "FUEL CHARGE",
130
+ "ExtendedTotal": "22.06"
131
+ },
132
+ {
133
+ "Description": "DANG - HST / F/P/ PRAIS MAT.",
134
+ "ExtendedTotal": "4.95"
135
+ },
136
+ {
137
+ "Description": "GST - HST / TPS - TVH",
138
+ "ExtendedTotal": "25.80"
139
+ }
140
+ ]
141
+ }
142
+ ```'''
143
+
144
+ print("🧪 Testing Vision Response Conversion")
145
+ print("=" * 50)
146
+
147
+ print(f"\n📄 Original Vision Response:")
148
+ print(vision_response)
149
+
150
+ print(f"\n🔄 Converting to extracted_data format...")
151
+ extracted_data = convert_vision_response_to_extracted_data(vision_response)
152
+
153
+ print(f"\n✅ Converted Data:")
154
+ print(json.dumps(extracted_data, indent=2))
155
+
156
+ # Verify the conversion worked
157
+ vendor = extracted_data["headerSection"]["vendorName"]
158
+ invoice_num = extracted_data["billingDetails"]["invoiceNumber"]
159
+ amount = extracted_data["chargesSummary"]["document_total"]
160
+
161
+ print(f"\n🎯 Key Fields:")
162
+ print(f" Vendor: '{vendor}'")
163
+ print(f" Invoice #: '{invoice_num}'")
164
+ print(f" Amount: ${amount}")
165
+
166
+ if invoice_num == "50482291" and amount == 197.79:
167
+ print("✅ Conversion successful!")
168
+ else:
169
+ print("❌ Conversion failed!")
170
+
171
+ if __name__ == "__main__":
172
+ test_conversion()
@@ -0,0 +1,41 @@
1
+ import os
2
+ import requests
3
+ import json
4
+
5
+ api_url = "https://api.memra.co"
6
+ api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
7
+
8
+ schema = [
9
+ {"column_name": "vendor_name", "data_type": "character varying"}
10
+ ]
11
+
12
+ resp = requests.post(
13
+ f"{api_url}/tools/execute",
14
+ json={
15
+ "tool_name": "PDFProcessor",
16
+ "hosted_by": "memra",
17
+ "input_data": {
18
+ "file": "/uploads/6f4538c0-8fce-4488-be49-1a78afc58a4a.pdf",
19
+ "schema": schema
20
+ }
21
+ },
22
+ headers={"X-API-Key": api_key}
23
+ )
24
+
25
+ print(f"Status: {resp.status_code}")
26
+ if resp.status_code == 200:
27
+ result = resp.json()
28
+ print(f"Success: {result.get('success')}")
29
+ print(f"Keys: {list(result.keys())}")
30
+ if 'data' in result:
31
+ print(f"Data keys: {list(result['data'].keys())}")
32
+ if 'data' in result['data']:
33
+ inner_data = result['data']['data']
34
+ print(f"Inner data keys: {list(inner_data.keys())}")
35
+ if 'vision_prompt' in inner_data:
36
+ prompt = inner_data['vision_prompt']
37
+ print(f"\nPrompt length: {len(prompt)}")
38
+ print("Prompt preview:")
39
+ print(prompt[:200])
40
+ else:
41
+ print(f"Error: {resp.text}")
@@ -0,0 +1,114 @@
1
+ import os
2
+ import requests
3
+ import json
4
+ import base64
5
+
6
+ # Upload a test PDF and process it
7
+ api_url = "https://api.memra.co"
8
+ api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
9
+
10
+ # First, upload a PDF file
11
+ pdf_path = "data/invoices/10352259401.PDF" # Superior Propane invoice
12
+
13
+ # Read and encode the PDF
14
+ with open(pdf_path, 'rb') as f:
15
+ pdf_content = f.read()
16
+
17
+ pdf_b64 = base64.b64encode(pdf_content).decode('utf-8')
18
+
19
+ # Upload the file
20
+ print("📤 Uploading PDF file...")
21
+ upload_resp = requests.post(
22
+ f"{api_url}/upload",
23
+ json={
24
+ "filename": os.path.basename(pdf_path),
25
+ "content": pdf_b64,
26
+ "content_type": "application/pdf"
27
+ },
28
+ headers={"X-API-Key": api_key}
29
+ )
30
+
31
+ if upload_resp.status_code != 200:
32
+ print(f"Upload failed: {upload_resp.text}")
33
+ exit(1)
34
+
35
+ upload_result = upload_resp.json()
36
+ remote_path = upload_result["data"]["remote_path"]
37
+ print(f"✅ Uploaded to: {remote_path}")
38
+
39
+ # Now process with PDFProcessor
40
+ print("\n🔍 Processing with PDFProcessor...")
41
+ schema = [
42
+ {"column_name": "vendor_name", "data_type": "character varying"},
43
+ {"column_name": "invoice_number", "data_type": "character varying"},
44
+ {"column_name": "invoice_date", "data_type": "date"},
45
+ {"column_name": "due_date", "data_type": "date"},
46
+ {"column_name": "total_amount", "data_type": "numeric"},
47
+ {"column_name": "tax_amount", "data_type": "numeric"},
48
+ {"column_name": "line_items", "data_type": "jsonb"}
49
+ ]
50
+
51
+ process_resp = requests.post(
52
+ f"{api_url}/tools/execute",
53
+ json={
54
+ "tool_name": "PDFProcessor",
55
+ "hosted_by": "memra",
56
+ "input_data": {
57
+ "file": remote_path,
58
+ "schema": schema
59
+ }
60
+ },
61
+ headers={"X-API-Key": api_key}
62
+ )
63
+
64
+ if process_resp.status_code == 200:
65
+ result = process_resp.json()
66
+
67
+ if result.get('success') and 'data' in result and 'data' in result['data']:
68
+ inner_data = result['data']['data']
69
+
70
+ print("\n=== VISION MODEL RAW RESPONSE ===")
71
+ if 'vision_response' in inner_data:
72
+ vision_resp = inner_data['vision_response']
73
+ print(f"Raw response: {vision_resp[:200]}...")
74
+ try:
75
+ vision_data = json.loads(vision_resp)
76
+ print("\nVision model extracted:")
77
+ for key, value in vision_data.items():
78
+ print(f" {key}: {value}")
79
+ except Exception as e:
80
+ print(f"Could not parse vision response: {e}")
81
+
82
+ print("\n=== VISION PROMPT USED ===")
83
+ if 'vision_prompt' in inner_data:
84
+ print(inner_data['vision_prompt'][:500] + "...")
85
+
86
+ print("\n=== TRANSFORMED DATA (MCP Format) ===")
87
+ if 'extracted_data' in inner_data:
88
+ extracted = inner_data['extracted_data']
89
+ header = extracted.get('headerSection', {})
90
+ billing = extracted.get('billingDetails', {})
91
+ charges = extracted.get('chargesSummary', {})
92
+
93
+ print(f"Vendor: {header.get('vendorName', 'MISSING')}")
94
+ print(f"Invoice Number: {billing.get('invoiceNumber', 'MISSING')}")
95
+ print(f"Invoice Date: {billing.get('invoiceDate', 'MISSING')}")
96
+ print(f"Due Date: {billing.get('dueDate', 'MISSING')}")
97
+ print(f"Total Amount: ${charges.get('document_total', 'MISSING')}")
98
+ print(f"Tax Amount: ${charges.get('secondary_tax', 'MISSING')}")
99
+
100
+ print("\n=== ANALYSIS ===")
101
+ missing_fields = []
102
+ if not billing.get('dueDate'):
103
+ missing_fields.append('due_date')
104
+ if not charges.get('document_total'):
105
+ missing_fields.append('total_amount')
106
+ if not charges.get('secondary_tax'):
107
+ missing_fields.append('tax_amount')
108
+
109
+ if missing_fields:
110
+ print(f"❌ Missing fields: {', '.join(missing_fields)}")
111
+ else:
112
+ print("✅ All fields extracted successfully!")
113
+ else:
114
+ print(f"Processing failed: {process_resp.text}")
@@ -0,0 +1,22 @@
1
+ import os
2
+ import requests
3
+ import json
4
+
5
+ api_url = "https://api.memra.co"
6
+ api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
7
+
8
+ resp = requests.post(
9
+ f"{api_url}/tools/execute",
10
+ json={
11
+ "tool_name": "PDFProcessor",
12
+ "hosted_by": "memra",
13
+ "input_data": {
14
+ "file": "/uploads/6f4538c0-8fce-4488-be49-1a78afc58a4a.pdf",
15
+ "schema": [{"column_name": "vendor_name", "data_type": "character varying"}]
16
+ }
17
+ },
18
+ headers={"X-API-Key": api_key}
19
+ )
20
+
21
+ print("Full response:")
22
+ print(json.dumps(resp.json(), indent=2))
@@ -0,0 +1,124 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to see what the memra library receives from PDFProcessor
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ # Set API key
11
+ os.environ['MEMRA_API_KEY'] = 'test-secret-for-development'
12
+ os.environ['MEMRA_API_URL'] = 'https://api.memra.co'
13
+
14
+ # Add the parent directory to the path so we can import memra
15
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
16
+
17
+ from memra import ExecutionEngine, Agent, LLM
18
+ import base64
19
+ import requests
20
+
21
+ def test_memra_pdf_processor():
22
+ """Test what the memra library receives from PDFProcessor"""
23
+
24
+ print("🧪 Testing Memra Library PDFProcessor Response")
25
+ print("=" * 50)
26
+
27
+ # Create a simple agent to test PDFProcessor
28
+ test_llm = LLM(
29
+ model="llama-3.2-11b-vision-preview",
30
+ temperature=0.1,
31
+ max_tokens=2000
32
+ )
33
+
34
+ test_agent = Agent(
35
+ role="Test Parser",
36
+ job="Test PDFProcessor response",
37
+ llm=test_llm,
38
+ tools=[
39
+ {"name": "PDFProcessor", "hosted_by": "memra"}
40
+ ],
41
+ input_keys=["file"],
42
+ output_key="test_result"
43
+ )
44
+
45
+ # First upload a file
46
+ print("\n1️⃣ Uploading file...")
47
+ pdf_path = "data/invoices/10352260169.PDF"
48
+
49
+ with open(pdf_path, 'rb') as f:
50
+ file_content = f.read()
51
+
52
+ file_b64 = base64.b64encode(file_content).decode('utf-8')
53
+
54
+ upload_data = {
55
+ "filename": os.path.basename(pdf_path),
56
+ "content": file_b64,
57
+ "content_type": "application/pdf"
58
+ }
59
+
60
+ response = requests.post(
61
+ "https://api.memra.co/upload",
62
+ json=upload_data,
63
+ headers={
64
+ "X-API-Key": "test-secret-for-development",
65
+ "Content-Type": "application/json"
66
+ }
67
+ )
68
+
69
+ if response.status_code != 200:
70
+ print(f"❌ Upload failed: {response.status_code}")
71
+ return
72
+
73
+ result = response.json()
74
+ if not result.get("success"):
75
+ print(f"❌ Upload failed: {result.get('error')}")
76
+ return
77
+
78
+ remote_path = result["data"]["remote_path"]
79
+ print(f"✅ File uploaded: {remote_path}")
80
+
81
+ # Now test with memra library
82
+ print("\n2️⃣ Testing with memra library...")
83
+ engine = ExecutionEngine()
84
+
85
+ input_data = {
86
+ "file": remote_path
87
+ }
88
+
89
+ result = engine.execute_agent(test_agent, input_data)
90
+
91
+ print(f"\n📄 Memra Library Result:")
92
+ print(f"Success: {result.get('success')}")
93
+ print(f"Error: {result.get('error')}")
94
+
95
+ if result.get('success'):
96
+ test_result = result.get('result', {})
97
+ print(f"\n🔍 Test Result Keys: {list(test_result.keys())}")
98
+
99
+ # Check if there are tool results
100
+ if hasattr(result, 'trace') and result.trace and hasattr(result.trace, 'tool_results'):
101
+ tool_results = result.trace.tool_results
102
+ print(f"\n🔧 Tool Results:")
103
+ for tool_name, tool_result in tool_results.items():
104
+ print(f"\n📊 Tool: {tool_name}")
105
+ print(f"Success: {tool_result.get('success')}")
106
+ print(f"Data Keys: {list(tool_result.get('data', {}).keys())}")
107
+
108
+ data = tool_result.get('data', {})
109
+ if 'data' in data:
110
+ inner_data = data['data']
111
+ print(f"Inner Data Keys: {list(inner_data.keys())}")
112
+
113
+ if 'vision_response' in inner_data:
114
+ print(f"✅ Vision Response Found!")
115
+ vision_response = inner_data['vision_response']
116
+ print(f"Vision Response (first 200 chars): {vision_response[:200]}...")
117
+
118
+ if 'extracted_data' in inner_data:
119
+ print(f"✅ Extracted Data Found!")
120
+ extracted_data = inner_data['extracted_data']
121
+ print(f"Extracted Data: {extracted_data}")
122
+
123
+ if __name__ == "__main__":
124
+ test_memra_pdf_processor()
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to see what PDFProcessor actually returns
4
+ """
5
+
6
+ import os
7
+ import base64
8
+ import requests
9
+ import json
10
+
11
+ def test_pdf_processor_response():
12
+ """Test the PDFProcessor response structure"""
13
+
14
+ # Set API key
15
+ os.environ['MEMRA_API_KEY'] = 'test-secret-for-development'
16
+ API_BASE = "https://api.memra.co"
17
+ API_KEY = "test-secret-for-development"
18
+
19
+ print("🧪 Testing PDFProcessor Response Structure")
20
+ print("=" * 50)
21
+
22
+ # Step 1: Upload a file
23
+ print("\n1️⃣ Uploading file...")
24
+ pdf_path = "data/invoices/10352260169.PDF"
25
+
26
+ with open(pdf_path, 'rb') as f:
27
+ file_content = f.read()
28
+
29
+ file_b64 = base64.b64encode(file_content).decode('utf-8')
30
+
31
+ upload_data = {
32
+ "filename": os.path.basename(pdf_path),
33
+ "content": file_b64,
34
+ "content_type": "application/pdf"
35
+ }
36
+
37
+ response = requests.post(
38
+ f"{API_BASE}/upload",
39
+ json=upload_data,
40
+ headers={
41
+ "X-API-Key": API_KEY,
42
+ "Content-Type": "application/json"
43
+ }
44
+ )
45
+
46
+ if response.status_code != 200:
47
+ print(f"❌ Upload failed: {response.status_code}")
48
+ return
49
+
50
+ result = response.json()
51
+ if not result.get("success"):
52
+ print(f"❌ Upload failed: {result.get('error')}")
53
+ return
54
+
55
+ remote_path = result["data"]["remote_path"]
56
+ print(f"✅ File uploaded: {remote_path}")
57
+
58
+ # Step 2: Call PDFProcessor
59
+ print("\n2️⃣ Calling PDFProcessor...")
60
+ process_data = {
61
+ "tool_name": "PDFProcessor",
62
+ "hosted_by": "memra",
63
+ "input_data": {
64
+ "file": remote_path
65
+ }
66
+ }
67
+
68
+ response = requests.post(
69
+ f"{API_BASE}/tools/execute",
70
+ json=process_data,
71
+ headers={
72
+ "X-API-Key": API_KEY,
73
+ "Content-Type": "application/json"
74
+ }
75
+ )
76
+
77
+ print(f"Response status: {response.status_code}")
78
+
79
+ if response.status_code == 200:
80
+ result = response.json()
81
+ print(f"\n📄 Full Response Structure:")
82
+ print(json.dumps(result, indent=2))
83
+
84
+ if result.get("success"):
85
+ data = result.get("data", {})
86
+ print(f"\n🔍 Data Keys: {list(data.keys())}")
87
+
88
+ # Check for vision_response
89
+ if "vision_response" in data:
90
+ print(f"\n🎯 Vision Response Found!")
91
+ vision_response = data["vision_response"]
92
+ print(f"Vision Response (first 200 chars): {vision_response[:200]}...")
93
+
94
+ # Try to parse as JSON
95
+ try:
96
+ if vision_response.startswith("```json"):
97
+ vision_response = vision_response.replace("```json", "").replace("```", "").strip()
98
+ vision_data = json.loads(vision_response)
99
+ print(f"\n✅ Vision Response Parsed Successfully:")
100
+ print(json.dumps(vision_data, indent=2))
101
+ except Exception as e:
102
+ print(f"❌ Failed to parse vision response: {e}")
103
+
104
+ # Check for extracted_data
105
+ if "extracted_data" in data:
106
+ print(f"\n📊 Extracted Data Found:")
107
+ extracted_data = data["extracted_data"]
108
+ print(json.dumps(extracted_data, indent=2))
109
+ else:
110
+ print(f"\n❌ No extracted_data found")
111
+ else:
112
+ print(f"❌ API call failed: {result.get('error')}")
113
+ else:
114
+ print(f"❌ Request failed: {response.status_code}")
115
+ print(f"Response: {response.text}")
116
+
117
+ if __name__ == "__main__":
118
+ test_pdf_processor_response()
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test PDFProcessor directly using the same mechanism as ETL workflow
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import requests
9
+ import json
10
+
11
+ # Add the parent directory to the path to import memra modules
12
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
13
+
14
+ from memra.execution import ExecutionEngine
15
+ from memra.models import Agent, ExecutionTrace
16
+
17
+ # Set environment variables
18
+ os.environ["MEMRA_API_URL"] = "https://api.memra.co"
19
+ os.environ["MEMRA_API_KEY"] = "test-secret-for-development"
20
+
21
+ def test_pdfprocessor_direct():
22
+ """Test PDFProcessor directly"""
23
+ print("🧪 Testing PDFProcessor Direct Call")
24
+ print("=" * 50)
25
+
26
+ # Create a simple agent that just calls PDFProcessor
27
+ test_agent = Agent(
28
+ role="Test Agent",
29
+ job="Test PDFProcessor directly",
30
+ llm=None, # No LLM needed for direct tool call
31
+ tools=[
32
+ {"name": "PDFProcessor", "hosted_by": "memra"}
33
+ ],
34
+ input_keys=["file"],
35
+ output_key="result"
36
+ )
37
+
38
+ # Create execution engine
39
+ engine = ExecutionEngine()
40
+
41
+ # Test with the same file path that the ETL workflow uses
42
+ file_path = "/uploads/bd4b5a42-ff4b-4659-b050-e1f2d59f521a.PDF" # From the upload test
43
+
44
+ print(f"📄 Testing with file: {file_path}")
45
+
46
+ # Prepare context and trace
47
+ context = {
48
+ "input": {"file": file_path},
49
+ "department_context": {},
50
+ "results": {}
51
+ }
52
+ trace = ExecutionTrace()
53
+
54
+ # Execute the agent
55
+ result = engine._execute_agent(test_agent, context, trace)
56
+
57
+ print(f"\n📊 Result:")
58
+ print(f"Success: {result.success}")
59
+ print(f"Error: {result.error}")
60
+
61
+ if result.success and result.data:
62
+ print(f"\n📄 Data keys: {list(result.data.keys())}")
63
+
64
+ if 'result' in result.data:
65
+ tool_result = result.data['result']
66
+ print(f"\n🔧 Tool Result:")
67
+ print(f"Success: {tool_result.get('success')}")
68
+ print(f"Error: {tool_result.get('error')}")
69
+
70
+ if 'data' in tool_result:
71
+ data = tool_result['data']
72
+ print(f"Data keys: {list(data.keys())}")
73
+
74
+ if 'vision_response' in data:
75
+ print(f"\n📝 Vision Response found!")
76
+ print(f"Length: {len(data['vision_response'])} characters")
77
+ try:
78
+ parsed = json.loads(data['vision_response'].replace('```json','').replace('```','').strip())
79
+ print("✅ Valid JSON response:")
80
+ print(json.dumps(parsed, indent=2))
81
+ except Exception as e:
82
+ print(f"❌ JSON parsing error: {e}")
83
+ print(f"Raw response: {data['vision_response'][:500]}...")
84
+
85
+ if 'extracted_data' in data:
86
+ print(f"\n🎯 Extracted Data found!")
87
+ print(json.dumps(data['extracted_data'], indent=2))
88
+ else:
89
+ print("❌ No 'data' field in tool result")
90
+ else:
91
+ print("❌ No 'result' field in agent output")
92
+ else:
93
+ print(f"❌ Agent execution failed: {result.error}")
94
+
95
+ if __name__ == "__main__":
96
+ test_pdfprocessor_direct()