memra 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memra/cli.py +143 -9
- memra/demos/etl_invoice_processing/check_after_workflow.py +50 -0
- memra/demos/etl_invoice_processing/check_database.py +44 -0
- memra/demos/etl_invoice_processing/check_recent_db.py +42 -0
- memra/demos/etl_invoice_processing/data/README.md +112 -0
- memra/demos/etl_invoice_processing/data/invoices/10352259401.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352259823.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352260169.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352260417.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352260599.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352260912.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352261134.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352261563.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352261647.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352261720.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352261811.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352262025.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352262454.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352262702.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352262884.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352263346.PDF +0 -0
- memra/demos/etl_invoice_processing/data/invoices/10352263429.PDF +0 -0
- memra/demos/etl_invoice_processing/database_monitor_agent.py +89 -0
- memra/demos/etl_invoice_processing/debug_mcp.py +66 -0
- memra/demos/etl_invoice_processing/debug_schema.py +45 -0
- memra/demos/etl_invoice_processing/etl_invoice_demo.py +1233 -0
- memra/demos/etl_invoice_processing/modify_database.py +65 -0
- memra/demos/etl_invoice_processing/run_etl_batch.py +60 -0
- memra/demos/etl_invoice_processing/setup_demo_data.py +154 -0
- memra/demos/etl_invoice_processing/simple_pdf_processor.py +181 -0
- memra/demos/etl_invoice_processing/test_agent3.py +56 -0
- memra/demos/etl_invoice_processing/test_agent3_v2.py +32 -0
- memra/demos/etl_invoice_processing/test_api.py +28 -0
- memra/demos/etl_invoice_processing/test_api_client_direct.py +89 -0
- memra/demos/etl_invoice_processing/test_conversion.py +172 -0
- memra/demos/etl_invoice_processing/test_debug.py +41 -0
- memra/demos/etl_invoice_processing/test_direct_vision.py +114 -0
- memra/demos/etl_invoice_processing/test_full_response.py +22 -0
- memra/demos/etl_invoice_processing/test_memra_response.py +124 -0
- memra/demos/etl_invoice_processing/test_pdf_processor_response.py +118 -0
- memra/demos/etl_invoice_processing/test_pdfprocessor_direct.py +96 -0
- memra/demos/etl_invoice_processing/test_postgres_insert.py +120 -0
- memra/demos/etl_invoice_processing/test_remote_upload.py +143 -0
- memra/demos/etl_invoice_processing/test_schema_format.py +39 -0
- memra/demos/etl_invoice_processing/test_sql_executor.py +58 -0
- memra/demos/etl_invoice_processing/test_sql_executor_extra_fields.py +61 -0
- memra/demos/etl_invoice_processing/test_sql_executor_fix.py +40 -0
- memra/demos/etl_invoice_processing/test_updated_server.py +50 -0
- memra/demos/etl_invoice_processing/test_upload_functionality.py +156 -0
- memra/demos/etl_invoice_processing/test_upload_server.py +232 -0
- memra/demos/etl_invoice_processing/test_vision_output.py +75 -0
- memra/demos/etl_invoice_processing/test_vision_prompt.py +43 -0
- memra/demos/etl_invoice_processing/test_vision_simple.py +60 -0
- {memra-0.2.12.dist-info → memra-0.2.14.dist-info}/METADATA +53 -78
- memra-0.2.14.dist-info/RECORD +66 -0
- {memra-0.2.12.dist-info → memra-0.2.14.dist-info}/WHEEL +1 -1
- memra-0.2.14.dist-info/top_level.txt +1 -0
- memra-0.2.12.dist-info/RECORD +0 -68
- memra-0.2.12.dist-info/top_level.txt +0 -4
- memra-ops/app.py +0 -808
- memra-ops/config/config.py +0 -25
- memra-ops/config.py +0 -34
- memra-ops/logic/__init__.py +0 -1
- memra-ops/logic/file_tools.py +0 -43
- memra-ops/logic/invoice_tools.py +0 -668
- memra-ops/logic/invoice_tools_fix.py +0 -66
- memra-ops/mcp_bridge_server.py +0 -1178
- memra-ops/scripts/check_database.py +0 -37
- memra-ops/scripts/clear_database.py +0 -48
- memra-ops/scripts/monitor_database.py +0 -67
- memra-ops/scripts/release.py +0 -133
- memra-ops/scripts/reset_database.py +0 -65
- memra-ops/scripts/start_memra.py +0 -334
- memra-ops/scripts/stop_memra.py +0 -132
- memra-ops/server_tool_registry.py +0 -190
- memra-ops/tests/test_llm_text_to_sql.py +0 -115
- memra-ops/tests/test_llm_vs_pattern.py +0 -130
- memra-ops/tests/test_mcp_schema_aware.py +0 -124
- memra-ops/tests/test_schema_aware_sql.py +0 -139
- memra-ops/tests/test_schema_aware_sql_simple.py +0 -66
- memra-ops/tests/test_text_to_sql_demo.py +0 -140
- memra-ops/tools/mcp_bridge_server.py +0 -851
- memra-sdk/examples/accounts_payable.py +0 -215
- memra-sdk/examples/accounts_payable_client.py +0 -217
- memra-sdk/examples/accounts_payable_mcp.py +0 -200
- memra-sdk/examples/ask_questions.py +0 -123
- memra-sdk/examples/invoice_processing.py +0 -116
- memra-sdk/examples/propane_delivery.py +0 -87
- memra-sdk/examples/simple_text_to_sql.py +0 -158
- memra-sdk/memra/__init__.py +0 -31
- memra-sdk/memra/discovery.py +0 -15
- memra-sdk/memra/discovery_client.py +0 -49
- memra-sdk/memra/execution.py +0 -481
- memra-sdk/memra/models.py +0 -99
- memra-sdk/memra/tool_registry.py +0 -343
- memra-sdk/memra/tool_registry_client.py +0 -106
- memra-sdk/scripts/release.py +0 -133
- memra-sdk/setup.py +0 -52
- memra-workflows/accounts_payable/accounts_payable.py +0 -215
- memra-workflows/accounts_payable/accounts_payable_client.py +0 -216
- memra-workflows/accounts_payable/accounts_payable_mcp.py +0 -200
- memra-workflows/accounts_payable/accounts_payable_smart.py +0 -221
- memra-workflows/invoice_processing/invoice_processing.py +0 -116
- memra-workflows/invoice_processing/smart_invoice_processor.py +0 -220
- memra-workflows/logic/__init__.py +0 -1
- memra-workflows/logic/file_tools.py +0 -50
- memra-workflows/logic/invoice_tools.py +0 -501
- memra-workflows/logic/propane_agents.py +0 -52
- memra-workflows/mcp_bridge_server.py +0 -230
- memra-workflows/propane_delivery/propane_delivery.py +0 -87
- memra-workflows/text_to_sql/complete_invoice_workflow_with_queries.py +0 -208
- memra-workflows/text_to_sql/complete_text_to_sql_system.py +0 -266
- memra-workflows/text_to_sql/file_discovery_demo.py +0 -156
- {memra-0.2.12.dist-info/licenses → memra-0.2.14.dist-info}/LICENSE +0 -0
- {memra-0.2.12.dist-info → memra-0.2.14.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Modify database through MCP bridge
|
4
|
+
"""
|
5
|
+
|
6
|
+
import requests
|
7
|
+
import json
|
8
|
+
|
9
|
+
def modify_database():
|
10
|
+
"""Remove constraint and show table contents"""
|
11
|
+
|
12
|
+
bridge_url = "http://localhost:8081"
|
13
|
+
bridge_secret = "test-secret-for-development"
|
14
|
+
|
15
|
+
headers = {
|
16
|
+
"Content-Type": "application/json",
|
17
|
+
"X-Bridge-Secret": bridge_secret
|
18
|
+
}
|
19
|
+
|
20
|
+
print("🔧 Modifying database through MCP bridge...")
|
21
|
+
|
22
|
+
# 1. Show current table contents
|
23
|
+
print("\n📊 Current table contents:")
|
24
|
+
payload = {
|
25
|
+
"tool_name": "SQLExecutor",
|
26
|
+
"input_data": {
|
27
|
+
"sql_query": "SELECT * FROM invoices;"
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
response = requests.post(f"{bridge_url}/execute_tool", json=payload, headers=headers)
|
32
|
+
if response.status_code == 200:
|
33
|
+
result = response.json()
|
34
|
+
if result.get("success"):
|
35
|
+
data = result["data"]
|
36
|
+
print(f"Query: {data['query']}")
|
37
|
+
print(f"Rows: {data['row_count']}")
|
38
|
+
for row in data['results']:
|
39
|
+
print(f" {row}")
|
40
|
+
else:
|
41
|
+
print(f"Error: {result.get('error')}")
|
42
|
+
else:
|
43
|
+
print(f"HTTP Error: {response.status_code}")
|
44
|
+
|
45
|
+
# 2. Remove the unique constraint
|
46
|
+
print("\n🔧 Removing unique constraint on invoice_number...")
|
47
|
+
payload = {
|
48
|
+
"tool_name": "SQLExecutor",
|
49
|
+
"input_data": {
|
50
|
+
"sql_query": "ALTER TABLE invoices DROP CONSTRAINT IF EXISTS invoices_invoice_number_key;"
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
response = requests.post(f"{bridge_url}/execute_tool", json=payload, headers=headers)
|
55
|
+
if response.status_code == 200:
|
56
|
+
result = response.json()
|
57
|
+
if result.get("success"):
|
58
|
+
print("✅ Constraint removed successfully!")
|
59
|
+
else:
|
60
|
+
print(f"Error: {result.get('error')}")
|
61
|
+
else:
|
62
|
+
print(f"HTTP Error: {response.status_code}")
|
63
|
+
|
64
|
+
if __name__ == "__main__":
|
65
|
+
modify_database()
|
@@ -0,0 +1,60 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Run ETL workflow in batch mode
|
4
|
+
"""
|
5
|
+
|
6
|
+
import os
|
7
|
+
import sys
|
8
|
+
from pathlib import Path
|
9
|
+
|
10
|
+
# Add the parent directory to the path so we can import memra
|
11
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
12
|
+
|
13
|
+
from memra import Agent, Department, LLM, check_api_health, get_api_status
|
14
|
+
from memra.execution import ExecutionEngine
|
15
|
+
from etl_invoice_demo import etl_department, upload_file_to_api
|
16
|
+
|
17
|
+
# Set API key for authentication
|
18
|
+
os.environ['MEMRA_API_KEY'] = 'test-secret-for-development'
|
19
|
+
os.environ['MEMRA_API_URL'] = 'https://api.memra.co'
|
20
|
+
|
21
|
+
# Check API health
|
22
|
+
print("🔍 Checking Memra API status...")
|
23
|
+
api_status = get_api_status()
|
24
|
+
print(f"API Health: {'✅ Healthy' if api_status['api_healthy'] else '❌ Unavailable'}")
|
25
|
+
|
26
|
+
if not api_status['api_healthy']:
|
27
|
+
print("❌ Cannot proceed - Memra API is not available")
|
28
|
+
sys.exit(1)
|
29
|
+
|
30
|
+
# Process one invoice
|
31
|
+
invoice_path = "data/invoices/10352259401.PDF" # Superior Propane invoice
|
32
|
+
print(f"\n📄 Processing invoice: {invoice_path}")
|
33
|
+
|
34
|
+
# Upload file to remote API
|
35
|
+
remote_path = upload_file_to_api(invoice_path)
|
36
|
+
if not remote_path:
|
37
|
+
print("❌ Failed to upload file")
|
38
|
+
sys.exit(1)
|
39
|
+
|
40
|
+
# Database connection info
|
41
|
+
connection = "postgresql://postgres:postgres@localhost:5433/local_workflow"
|
42
|
+
|
43
|
+
# Prepare input for department
|
44
|
+
input_data = {
|
45
|
+
"file": remote_path,
|
46
|
+
"file_path": invoice_path,
|
47
|
+
"table_name": "invoices",
|
48
|
+
"connection": connection,
|
49
|
+
"sql_query": "SELECT column_name, data_type, is_nullable, column_default FROM information_schema.columns WHERE table_name = 'invoices' ORDER BY ordinal_position"
|
50
|
+
}
|
51
|
+
|
52
|
+
# Execute department
|
53
|
+
engine = ExecutionEngine()
|
54
|
+
result = engine.execute_department(etl_department, input_data)
|
55
|
+
|
56
|
+
if result.success:
|
57
|
+
print("\n✅ ETL workflow completed successfully!")
|
58
|
+
print(f"Data written to database: {result.data.get('write_confirmation', {})}")
|
59
|
+
else:
|
60
|
+
print(f"\n❌ ETL workflow failed: {result.error}")
|
@@ -0,0 +1,154 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Setup Demo Data Script
|
4
|
+
Helps users set up invoice files for the ETL demo
|
5
|
+
"""
|
6
|
+
|
7
|
+
import os
|
8
|
+
import shutil
|
9
|
+
import sys
|
10
|
+
from pathlib import Path
|
11
|
+
|
12
|
+
def create_demo_structure():
|
13
|
+
"""Create the demo data directory structure"""
|
14
|
+
|
15
|
+
# Define paths
|
16
|
+
demo_dir = Path(__file__).parent
|
17
|
+
data_dir = demo_dir / "data"
|
18
|
+
invoices_dir = data_dir / "invoices"
|
19
|
+
|
20
|
+
# Create directories
|
21
|
+
invoices_dir.mkdir(parents=True, exist_ok=True)
|
22
|
+
|
23
|
+
print("📁 Created demo data structure:")
|
24
|
+
print(f" {data_dir}")
|
25
|
+
print(f" {invoices_dir}")
|
26
|
+
|
27
|
+
return invoices_dir
|
28
|
+
|
29
|
+
def check_existing_files(invoices_dir):
|
30
|
+
"""Check for existing invoice files"""
|
31
|
+
|
32
|
+
pdf_files = list(invoices_dir.glob("*.pdf"))
|
33
|
+
|
34
|
+
if pdf_files:
|
35
|
+
print(f"\n📄 Found {len(pdf_files)} existing invoice files:")
|
36
|
+
for pdf_file in pdf_files:
|
37
|
+
size_mb = pdf_file.stat().st_size / (1024 * 1024)
|
38
|
+
print(f" - {pdf_file.name} ({size_mb:.1f} MB)")
|
39
|
+
return True
|
40
|
+
else:
|
41
|
+
print(f"\n📄 No invoice files found in {invoices_dir}")
|
42
|
+
return False
|
43
|
+
|
44
|
+
def copy_sample_files(invoices_dir):
|
45
|
+
"""Copy sample files from external locations if available"""
|
46
|
+
|
47
|
+
# Common locations where users might have invoice files
|
48
|
+
sample_locations = [
|
49
|
+
Path.home() / "Downloads",
|
50
|
+
Path.home() / "Desktop",
|
51
|
+
Path.home() / "Documents",
|
52
|
+
Path.cwd() / "invoices", # If they have an invoices folder in current directory
|
53
|
+
]
|
54
|
+
|
55
|
+
print("\n🔍 Looking for sample invoice files...")
|
56
|
+
|
57
|
+
for location in sample_locations:
|
58
|
+
if location.exists():
|
59
|
+
pdf_files = list(location.glob("*.pdf"))
|
60
|
+
if pdf_files:
|
61
|
+
print(f" Found {len(pdf_files)} PDF files in {location}")
|
62
|
+
|
63
|
+
# Copy first few files (up to 5)
|
64
|
+
copied = 0
|
65
|
+
for pdf_file in pdf_files[:5]:
|
66
|
+
if copied >= 5:
|
67
|
+
break
|
68
|
+
|
69
|
+
dest_file = invoices_dir / f"invoice_{copied+1:03d}.pdf"
|
70
|
+
try:
|
71
|
+
shutil.copy2(pdf_file, dest_file)
|
72
|
+
size_mb = pdf_file.stat().st_size / (1024 * 1024)
|
73
|
+
print(f" ✅ Copied {pdf_file.name} -> {dest_file.name} ({size_mb:.1f} MB)")
|
74
|
+
copied += 1
|
75
|
+
except Exception as e:
|
76
|
+
print(f" ❌ Failed to copy {pdf_file.name}: {e}")
|
77
|
+
|
78
|
+
if copied > 0:
|
79
|
+
return True
|
80
|
+
|
81
|
+
return False
|
82
|
+
|
83
|
+
def create_placeholder_files(invoices_dir):
|
84
|
+
"""Create placeholder files for testing"""
|
85
|
+
|
86
|
+
print("\n📝 Creating placeholder files for testing...")
|
87
|
+
|
88
|
+
# Create a simple text file as placeholder
|
89
|
+
placeholder_content = """
|
90
|
+
This is a placeholder file for the ETL Invoice Processing demo.
|
91
|
+
|
92
|
+
To use real invoice files:
|
93
|
+
1. Place your PDF invoice files in this directory
|
94
|
+
2. Rename them to invoice_001.pdf, invoice_002.pdf, etc.
|
95
|
+
3. Run the demo: python etl_invoice_demo.py
|
96
|
+
|
97
|
+
The demo will automatically discover and process all PDF files in this directory.
|
98
|
+
"""
|
99
|
+
|
100
|
+
placeholder_file = invoices_dir / "README.txt"
|
101
|
+
with open(placeholder_file, 'w') as f:
|
102
|
+
f.write(placeholder_content)
|
103
|
+
|
104
|
+
print(f" ✅ Created {placeholder_file}")
|
105
|
+
return False
|
106
|
+
|
107
|
+
def main():
|
108
|
+
"""Main setup function"""
|
109
|
+
|
110
|
+
print("🚀 Setting up ETL Invoice Processing Demo Data")
|
111
|
+
print("=" * 50)
|
112
|
+
|
113
|
+
# Create directory structure
|
114
|
+
invoices_dir = create_demo_structure()
|
115
|
+
|
116
|
+
# Check for existing files
|
117
|
+
has_files = check_existing_files(invoices_dir)
|
118
|
+
|
119
|
+
if not has_files:
|
120
|
+
# Try to copy sample files
|
121
|
+
copied = copy_sample_files(invoices_dir)
|
122
|
+
|
123
|
+
if not copied:
|
124
|
+
# Create placeholder files
|
125
|
+
create_placeholder_files(invoices_dir)
|
126
|
+
|
127
|
+
# Show next steps
|
128
|
+
print("\n🎯 Next Steps:")
|
129
|
+
print("1. Add your invoice PDF files to the data/invoices/ directory")
|
130
|
+
print("2. Run the demo: python etl_invoice_demo.py")
|
131
|
+
print("3. Check the demo output and database results")
|
132
|
+
|
133
|
+
print("\n📚 For more information:")
|
134
|
+
print("- See data/README.md for detailed usage instructions")
|
135
|
+
print("- Check the main demo README for workflow details")
|
136
|
+
|
137
|
+
# Show file size recommendations
|
138
|
+
print("\n💡 File Size Recommendations:")
|
139
|
+
print("- Individual files: 1-5 MB each")
|
140
|
+
print("- Total demo data: 20-50 MB")
|
141
|
+
print("- GitHub limit: 100 MB per file")
|
142
|
+
print("- Repository limit: 1 GB total")
|
143
|
+
|
144
|
+
return True
|
145
|
+
|
146
|
+
if __name__ == "__main__":
|
147
|
+
try:
|
148
|
+
main()
|
149
|
+
except KeyboardInterrupt:
|
150
|
+
print("\n\n⏹️ Setup cancelled by user")
|
151
|
+
sys.exit(1)
|
152
|
+
except Exception as e:
|
153
|
+
print(f"\n❌ Setup failed: {e}")
|
154
|
+
sys.exit(1)
|
@@ -0,0 +1,181 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Simple PDF Processor - Direct API call without complex post-processing
|
4
|
+
"""
|
5
|
+
|
6
|
+
import os
|
7
|
+
import sys
|
8
|
+
import json
|
9
|
+
import requests
|
10
|
+
import base64
|
11
|
+
|
12
|
+
# Add the parent directory to the path to import memra modules
|
13
|
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
14
|
+
|
15
|
+
from memra.tool_registry_client import ToolRegistryClient
|
16
|
+
|
17
|
+
# Set environment variables
|
18
|
+
os.environ["MEMRA_API_URL"] = "https://api.memra.co"
|
19
|
+
os.environ["MEMRA_API_KEY"] = "test-secret-for-development"
|
20
|
+
|
21
|
+
def upload_file_to_api(file_path: str, api_url: str = "https://api.memra.co") -> str:
|
22
|
+
"""Upload a file to the remote API"""
|
23
|
+
try:
|
24
|
+
print(f"📤 Uploading {os.path.basename(file_path)} to remote API")
|
25
|
+
|
26
|
+
# Read the file and encode as base64
|
27
|
+
with open(file_path, 'rb') as f:
|
28
|
+
file_content = f.read()
|
29
|
+
|
30
|
+
file_b64 = base64.b64encode(file_content).decode('utf-8')
|
31
|
+
|
32
|
+
# Prepare upload data
|
33
|
+
upload_data = {
|
34
|
+
"filename": os.path.basename(file_path),
|
35
|
+
"content": file_b64,
|
36
|
+
"content_type": "application/pdf"
|
37
|
+
}
|
38
|
+
|
39
|
+
# Upload to remote API
|
40
|
+
api_key = os.getenv("MEMRA_API_KEY")
|
41
|
+
response = requests.post(
|
42
|
+
f"{api_url}/upload",
|
43
|
+
json=upload_data,
|
44
|
+
headers={
|
45
|
+
"X-API-Key": api_key,
|
46
|
+
"Content-Type": "application/json"
|
47
|
+
}
|
48
|
+
)
|
49
|
+
|
50
|
+
if response.status_code == 200:
|
51
|
+
result = response.json()
|
52
|
+
if result.get("success"):
|
53
|
+
remote_path = result["data"]["remote_path"]
|
54
|
+
print(f"✅ File uploaded successfully")
|
55
|
+
print(f" Remote path: {remote_path}")
|
56
|
+
return remote_path
|
57
|
+
else:
|
58
|
+
print(f"❌ Upload failed: {result.get('error')}")
|
59
|
+
return file_path
|
60
|
+
else:
|
61
|
+
print(f"❌ Upload request failed: {response.status_code}")
|
62
|
+
return file_path
|
63
|
+
|
64
|
+
except Exception as e:
|
65
|
+
print(f"⚠️ Upload error: {e}")
|
66
|
+
return file_path
|
67
|
+
|
68
|
+
def process_pdf_simple(file_path: str) -> dict:
|
69
|
+
"""Simple PDF processing - direct API call, no post-processing"""
|
70
|
+
print(f"\n🔍 Processing PDF: {file_path}")
|
71
|
+
print("=" * 50)
|
72
|
+
|
73
|
+
# Step 1: Upload file to API
|
74
|
+
remote_path = upload_file_to_api(file_path)
|
75
|
+
|
76
|
+
# Step 2: Call PDFProcessor directly
|
77
|
+
client = ToolRegistryClient()
|
78
|
+
|
79
|
+
print(f"\n📄 Calling PDFProcessor with remote path: {remote_path}")
|
80
|
+
|
81
|
+
result = client.execute_tool(
|
82
|
+
tool_name="PDFProcessor",
|
83
|
+
hosted_by="memra",
|
84
|
+
input_data={"file": remote_path},
|
85
|
+
config=None
|
86
|
+
)
|
87
|
+
|
88
|
+
print(f"\n📊 API Response:")
|
89
|
+
print(f"Success: {result.get('success')}")
|
90
|
+
print(f"Error: {result.get('error')}")
|
91
|
+
|
92
|
+
# Debug: Print the full response structure
|
93
|
+
print(f"\n🔍 DEBUG: Full API Response Structure:")
|
94
|
+
print(f"Result keys: {list(result.keys())}")
|
95
|
+
print(f"Result: {json.dumps(result, indent=2, default=str)}")
|
96
|
+
|
97
|
+
if result.get('success') and 'data' in result:
|
98
|
+
data = result['data']
|
99
|
+
print(f"\n🎯 Raw JSON Response from Vision Model:")
|
100
|
+
|
101
|
+
if 'vision_response' in data:
|
102
|
+
vision_response = data['vision_response']
|
103
|
+
print(f"📝 Vision Response (raw):")
|
104
|
+
print(vision_response)
|
105
|
+
|
106
|
+
# Parse the JSON response
|
107
|
+
try:
|
108
|
+
# Clean the response - remove markdown code blocks if present
|
109
|
+
cleaned_response = vision_response.strip()
|
110
|
+
if cleaned_response.startswith('```json'):
|
111
|
+
cleaned_response = cleaned_response[7:] # Remove ```json
|
112
|
+
if cleaned_response.endswith('```'):
|
113
|
+
cleaned_response = cleaned_response[:-3] # Remove ```
|
114
|
+
cleaned_response = cleaned_response.strip()
|
115
|
+
|
116
|
+
parsed_json = json.loads(cleaned_response)
|
117
|
+
print(f"\n✅ Parsed JSON Response:")
|
118
|
+
print(json.dumps(parsed_json, indent=2))
|
119
|
+
|
120
|
+
return {
|
121
|
+
"success": True,
|
122
|
+
"raw_vision_response": vision_response,
|
123
|
+
"parsed_json": parsed_json,
|
124
|
+
"extracted_data": data.get('extracted_data', {})
|
125
|
+
}
|
126
|
+
|
127
|
+
except json.JSONDecodeError as e:
|
128
|
+
print(f"❌ JSON parsing error: {e}")
|
129
|
+
return {
|
130
|
+
"success": False,
|
131
|
+
"error": f"JSON parsing failed: {e}",
|
132
|
+
"raw_response": vision_response
|
133
|
+
}
|
134
|
+
else:
|
135
|
+
print("❌ No vision_response in data")
|
136
|
+
return {
|
137
|
+
"success": False,
|
138
|
+
"error": "No vision_response in API response",
|
139
|
+
"data": data
|
140
|
+
}
|
141
|
+
else:
|
142
|
+
print(f"❌ API call failed: {result.get('error')}")
|
143
|
+
return {
|
144
|
+
"success": False,
|
145
|
+
"error": result.get('error', 'Unknown error'),
|
146
|
+
"result": result
|
147
|
+
}
|
148
|
+
|
149
|
+
def main():
|
150
|
+
"""Main function to process a PDF file"""
|
151
|
+
if len(sys.argv) != 2:
|
152
|
+
print("Usage: python3 simple_pdf_processor.py <pdf_file_path>")
|
153
|
+
sys.exit(1)
|
154
|
+
|
155
|
+
file_path = sys.argv[1]
|
156
|
+
|
157
|
+
if not os.path.exists(file_path):
|
158
|
+
print(f"❌ File not found: {file_path}")
|
159
|
+
sys.exit(1)
|
160
|
+
|
161
|
+
# Process the PDF
|
162
|
+
result = process_pdf_simple(file_path)
|
163
|
+
|
164
|
+
if result.get('success'):
|
165
|
+
print(f"\n🎉 SUCCESS! PDF processed successfully")
|
166
|
+
print(f"📄 Invoice Number: {result['parsed_json'].get('InvoiceNumber', 'N/A')}")
|
167
|
+
print(f"💰 Total Amount: ${result['parsed_json'].get('InvoiceTotal', 'N/A')}")
|
168
|
+
print(f"📅 Date: {result['parsed_json'].get('InvoiceDate', 'N/A')}")
|
169
|
+
|
170
|
+
# Save the result to a JSON file for later use
|
171
|
+
output_file = f"{os.path.splitext(os.path.basename(file_path))[0]}_result.json"
|
172
|
+
with open(output_file, 'w') as f:
|
173
|
+
json.dump(result, f, indent=2)
|
174
|
+
print(f"\n💾 Result saved to: {output_file}")
|
175
|
+
|
176
|
+
else:
|
177
|
+
print(f"\n❌ FAILED: {result.get('error')}")
|
178
|
+
sys.exit(1)
|
179
|
+
|
180
|
+
if __name__ == "__main__":
|
181
|
+
main()
|
@@ -0,0 +1,56 @@
|
|
1
|
+
import os
|
2
|
+
import requests
|
3
|
+
import base64
|
4
|
+
|
5
|
+
# Test the PDFProcessor API directly
|
6
|
+
api_url = "https://api.memra.co"
|
7
|
+
api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
|
8
|
+
|
9
|
+
# Upload a test file
|
10
|
+
with open("data/invoices/10352260169.PDF", "rb") as f:
|
11
|
+
content = base64.b64encode(f.read()).decode('utf-8')
|
12
|
+
|
13
|
+
upload_resp = requests.post(
|
14
|
+
f"{api_url}/upload",
|
15
|
+
json={
|
16
|
+
"filename": "test.pdf",
|
17
|
+
"content": content,
|
18
|
+
"content_type": "application/pdf"
|
19
|
+
},
|
20
|
+
headers={"X-API-Key": api_key}
|
21
|
+
)
|
22
|
+
|
23
|
+
if upload_resp.status_code == 200:
|
24
|
+
remote_path = upload_resp.json()["data"]["remote_path"]
|
25
|
+
print(f"✅ Uploaded to: {remote_path}")
|
26
|
+
|
27
|
+
# Try different parameter formats
|
28
|
+
print("\n1. Testing with input_data format:")
|
29
|
+
resp = requests.post(
|
30
|
+
f"{api_url}/tools/execute",
|
31
|
+
json={
|
32
|
+
"tool_name": "PDFProcessor",
|
33
|
+
"hosted_by": "memra",
|
34
|
+
"input_data": {"file_path": remote_path}
|
35
|
+
},
|
36
|
+
headers={"X-API-Key": api_key}
|
37
|
+
)
|
38
|
+
print(f"Status: {resp.status_code}")
|
39
|
+
print(f"Response: {resp.json()}")
|
40
|
+
|
41
|
+
print("\n2. Testing with parameters format:")
|
42
|
+
resp = requests.post(
|
43
|
+
f"{api_url}/tools/execute",
|
44
|
+
json={
|
45
|
+
"tool_name": "PDFProcessor",
|
46
|
+
"hosted_by": "memra",
|
47
|
+
"parameters": {"file_path": remote_path}
|
48
|
+
},
|
49
|
+
headers={"X-API-Key": api_key}
|
50
|
+
)
|
51
|
+
print(f"Status: {resp.status_code}")
|
52
|
+
if resp.status_code == 200:
|
53
|
+
import json
|
54
|
+
print(f"Response: {json.dumps(resp.json(), indent=2)}")
|
55
|
+
else:
|
56
|
+
print(f"Response: {resp.text}")
|
@@ -0,0 +1,32 @@
|
|
1
|
+
import os
|
2
|
+
import requests
|
3
|
+
|
4
|
+
api_url = "https://api.memra.co"
|
5
|
+
api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
|
6
|
+
|
7
|
+
# Try with 'file' instead of 'file_path'
|
8
|
+
remote_path = "/uploads/6f4538c0-8fce-4488-be49-1a78afc58a4a.pdf"
|
9
|
+
|
10
|
+
print("Testing with 'file' parameter:")
|
11
|
+
resp = requests.post(
|
12
|
+
f"{api_url}/tools/execute",
|
13
|
+
json={
|
14
|
+
"tool_name": "PDFProcessor",
|
15
|
+
"hosted_by": "memra",
|
16
|
+
"input_data": {"file": remote_path}
|
17
|
+
},
|
18
|
+
headers={"X-API-Key": api_key}
|
19
|
+
)
|
20
|
+
print(f"Status: {resp.status_code}")
|
21
|
+
if resp.status_code == 200:
|
22
|
+
import json
|
23
|
+
result = resp.json()
|
24
|
+
print(f"Success: {result.get('success')}")
|
25
|
+
if result.get('success') and 'data' in result:
|
26
|
+
data = result['data']
|
27
|
+
if 'data' in data:
|
28
|
+
print("Found nested data structure")
|
29
|
+
inner_data = data['data']
|
30
|
+
if 'vision_response' in inner_data:
|
31
|
+
print("✅ Found vision_response\!")
|
32
|
+
print(inner_data['vision_response'][:200])
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""Test API connectivity"""
|
3
|
+
|
4
|
+
import os
|
5
|
+
import requests
|
6
|
+
import json
|
7
|
+
|
8
|
+
def test_api():
|
9
|
+
# Test remote API
|
10
|
+
print("Testing remote API...")
|
11
|
+
try:
|
12
|
+
response = requests.get("https://api.memra.co/health", timeout=10)
|
13
|
+
print(f"Remote API Status: {response.status_code}")
|
14
|
+
print(f"Response: {response.text}")
|
15
|
+
except Exception as e:
|
16
|
+
print(f"Remote API Error: {e}")
|
17
|
+
|
18
|
+
# Test local API
|
19
|
+
print("\nTesting local API...")
|
20
|
+
try:
|
21
|
+
response = requests.get("http://127.0.0.1:8081/health", timeout=5)
|
22
|
+
print(f"Local API Status: {response.status_code}")
|
23
|
+
print(f"Response: {response.text}")
|
24
|
+
except Exception as e:
|
25
|
+
print(f"Local API Error: {e}")
|
26
|
+
|
27
|
+
if __name__ == "__main__":
|
28
|
+
test_api()
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Test API client directly to see what PDFProcessor returns
|
4
|
+
"""
|
5
|
+
|
6
|
+
import os
|
7
|
+
import sys
|
8
|
+
import json
|
9
|
+
|
10
|
+
# Add the parent directory to the path to import memra modules
|
11
|
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
12
|
+
|
13
|
+
from memra.tool_registry_client import ToolRegistryClient
|
14
|
+
|
15
|
+
# Set environment variables
|
16
|
+
os.environ["MEMRA_API_URL"] = "https://api.memra.co"
|
17
|
+
os.environ["MEMRA_API_KEY"] = "test-secret-for-development"
|
18
|
+
|
19
|
+
def test_api_client_direct():
|
20
|
+
"""Test API client directly"""
|
21
|
+
print("🧪 Testing API Client Direct Call")
|
22
|
+
print("=" * 50)
|
23
|
+
|
24
|
+
# Create API client
|
25
|
+
client = ToolRegistryClient()
|
26
|
+
|
27
|
+
# Test with the same file path that the ETL workflow uses
|
28
|
+
file_path = "/uploads/22526d2e-dfcf-45eb-9e1a-47f093cd05ab.PDF" # From the latest ETL upload
|
29
|
+
|
30
|
+
print(f"📄 Testing with file: {file_path}")
|
31
|
+
|
32
|
+
# Execute the PDFProcessor tool
|
33
|
+
result = client.execute_tool(
|
34
|
+
tool_name="PDFProcessor",
|
35
|
+
hosted_by="memra",
|
36
|
+
input_data={"file": file_path},
|
37
|
+
config=None
|
38
|
+
)
|
39
|
+
|
40
|
+
print(f"\n📊 Raw API Client Result:")
|
41
|
+
print(f"Success: {result.get('success')}")
|
42
|
+
print(f"Error: {result.get('error')}")
|
43
|
+
|
44
|
+
if result.get('success'):
|
45
|
+
print(f"\n📄 Data keys: {list(result.keys())}")
|
46
|
+
|
47
|
+
if 'data' in result:
|
48
|
+
data = result['data']
|
49
|
+
print(f"\n🔧 Data field:")
|
50
|
+
print(f"Type: {type(data)}")
|
51
|
+
print(f"Keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}")
|
52
|
+
|
53
|
+
if isinstance(data, dict):
|
54
|
+
print(f"\n📄 Data content:")
|
55
|
+
print(json.dumps(data, indent=2, default=str))
|
56
|
+
|
57
|
+
# Check for nested data
|
58
|
+
if 'data' in data:
|
59
|
+
nested_data = data['data']
|
60
|
+
print(f"\n🔧 Nested data:")
|
61
|
+
print(f"Type: {type(nested_data)}")
|
62
|
+
print(f"Keys: {list(nested_data.keys()) if isinstance(nested_data, dict) else 'not a dict'}")
|
63
|
+
|
64
|
+
if isinstance(nested_data, dict):
|
65
|
+
print(f"\n📄 Nested data content:")
|
66
|
+
print(json.dumps(nested_data, indent=2, default=str))
|
67
|
+
|
68
|
+
# Check for vision_response and extracted_data
|
69
|
+
if 'vision_response' in nested_data:
|
70
|
+
print(f"\n📝 Vision Response found!")
|
71
|
+
print(f"Length: {len(nested_data['vision_response'])} characters")
|
72
|
+
try:
|
73
|
+
parsed = json.loads(nested_data['vision_response'].replace('```json','').replace('```','').strip())
|
74
|
+
print("✅ Valid JSON response:")
|
75
|
+
print(json.dumps(parsed, indent=2))
|
76
|
+
except Exception as e:
|
77
|
+
print(f"❌ JSON parsing error: {e}")
|
78
|
+
print(f"Raw response: {nested_data['vision_response'][:500]}...")
|
79
|
+
|
80
|
+
if 'extracted_data' in nested_data:
|
81
|
+
print(f"\n🎯 Extracted Data found!")
|
82
|
+
print(json.dumps(nested_data['extracted_data'], indent=2))
|
83
|
+
else:
|
84
|
+
print("❌ No 'data' field in result")
|
85
|
+
else:
|
86
|
+
print(f"❌ API call failed: {result.get('error')}")
|
87
|
+
|
88
|
+
if __name__ == "__main__":
|
89
|
+
test_api_client_direct()
|