memra 0.2.9__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {memra-0.2.9 → memra-0.2.11}/PKG-INFO +1 -1
  2. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/etl_invoice_demo.py +406 -251
  3. {memra-0.2.9 → memra-0.2.11}/memra/__init__.py +1 -1
  4. {memra-0.2.9 → memra-0.2.11}/memra/cli.py +41 -1
  5. {memra-0.2.9 → memra-0.2.11}/pyproject.toml +1 -1
  6. {memra-0.2.9 → memra-0.2.11}/setup.py +1 -1
  7. {memra-0.2.9 → memra-0.2.11}/CHANGELOG.md +0 -0
  8. {memra-0.2.9 → memra-0.2.11}/LICENSE +0 -0
  9. {memra-0.2.9 → memra-0.2.11}/MANIFEST.in +0 -0
  10. {memra-0.2.9 → memra-0.2.11}/README.md +0 -0
  11. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/.DS_Store +0 -0
  12. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/README.md +0 -0
  13. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/.DS_Store +0 -0
  14. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352259401.PDF +0 -0
  15. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352259823.PDF +0 -0
  16. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352260169.PDF +0 -0
  17. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352260417.PDF +0 -0
  18. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352260599.PDF +0 -0
  19. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352260912.PDF +0 -0
  20. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352261134.PDF +0 -0
  21. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352261563.PDF +0 -0
  22. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352261647.PDF +0 -0
  23. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352261720.PDF +0 -0
  24. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352261811.PDF +0 -0
  25. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352262025.PDF +0 -0
  26. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352262454.PDF +0 -0
  27. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352262702.PDF +0 -0
  28. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352262884.PDF +0 -0
  29. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352263346.PDF +0 -0
  30. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/10352263429.PDF +0 -0
  31. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/data/invoices/invoice_005.pdf +0 -0
  32. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/database_monitor_agent.py +0 -0
  33. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/setup_demo_data.py +0 -0
  34. {memra-0.2.9 → memra-0.2.11}/demos/etl_invoice_processing/simple_pdf_processor.py +0 -0
  35. {memra-0.2.9 → memra-0.2.11}/mcp_bridge_server.py +0 -0
  36. {memra-0.2.9 → memra-0.2.11}/memra/discovery.py +0 -0
  37. {memra-0.2.9 → memra-0.2.11}/memra/discovery_client.py +0 -0
  38. {memra-0.2.9 → memra-0.2.11}/memra/execution.py +0 -0
  39. {memra-0.2.9 → memra-0.2.11}/memra/models.py +0 -0
  40. {memra-0.2.9 → memra-0.2.11}/memra/tool_registry.py +0 -0
  41. {memra-0.2.9 → memra-0.2.11}/memra/tool_registry_client.py +0 -0
  42. {memra-0.2.9 → memra-0.2.11}/memra-ops/app.py +0 -0
  43. {memra-0.2.9 → memra-0.2.11}/memra-ops/config/config.py +0 -0
  44. {memra-0.2.9 → memra-0.2.11}/memra-ops/config.py +0 -0
  45. {memra-0.2.9 → memra-0.2.11}/memra-ops/logic/__init__.py +0 -0
  46. {memra-0.2.9 → memra-0.2.11}/memra-ops/logic/file_tools.py +0 -0
  47. {memra-0.2.9 → memra-0.2.11}/memra-ops/logic/invoice_tools.py +0 -0
  48. {memra-0.2.9 → memra-0.2.11}/memra-ops/logic/invoice_tools_fix.py +0 -0
  49. {memra-0.2.9 → memra-0.2.11}/memra-ops/mcp_bridge_server.py +0 -0
  50. {memra-0.2.9 → memra-0.2.11}/memra-ops/scripts/check_database.py +0 -0
  51. {memra-0.2.9 → memra-0.2.11}/memra-ops/scripts/clear_database.py +0 -0
  52. {memra-0.2.9 → memra-0.2.11}/memra-ops/scripts/monitor_database.py +0 -0
  53. {memra-0.2.9 → memra-0.2.11}/memra-ops/scripts/release.py +0 -0
  54. {memra-0.2.9 → memra-0.2.11}/memra-ops/scripts/reset_database.py +0 -0
  55. {memra-0.2.9 → memra-0.2.11}/memra-ops/scripts/start_memra.py +0 -0
  56. {memra-0.2.9 → memra-0.2.11}/memra-ops/scripts/stop_memra.py +0 -0
  57. {memra-0.2.9 → memra-0.2.11}/memra-ops/server_tool_registry.py +0 -0
  58. {memra-0.2.9 → memra-0.2.11}/memra-ops/tests/test_llm_text_to_sql.py +0 -0
  59. {memra-0.2.9 → memra-0.2.11}/memra-ops/tests/test_llm_vs_pattern.py +0 -0
  60. {memra-0.2.9 → memra-0.2.11}/memra-ops/tests/test_mcp_schema_aware.py +0 -0
  61. {memra-0.2.9 → memra-0.2.11}/memra-ops/tests/test_schema_aware_sql.py +0 -0
  62. {memra-0.2.9 → memra-0.2.11}/memra-ops/tests/test_schema_aware_sql_simple.py +0 -0
  63. {memra-0.2.9 → memra-0.2.11}/memra-ops/tests/test_text_to_sql_demo.py +0 -0
  64. {memra-0.2.9 → memra-0.2.11}/memra-ops/tools/mcp_bridge_server.py +0 -0
  65. {memra-0.2.9 → memra-0.2.11}/memra-sdk/examples/accounts_payable.py +0 -0
  66. {memra-0.2.9 → memra-0.2.11}/memra-sdk/examples/accounts_payable_client.py +0 -0
  67. {memra-0.2.9 → memra-0.2.11}/memra-sdk/examples/accounts_payable_mcp.py +0 -0
  68. {memra-0.2.9 → memra-0.2.11}/memra-sdk/examples/ask_questions.py +0 -0
  69. {memra-0.2.9 → memra-0.2.11}/memra-sdk/examples/invoice_processing.py +0 -0
  70. {memra-0.2.9 → memra-0.2.11}/memra-sdk/examples/propane_delivery.py +0 -0
  71. {memra-0.2.9 → memra-0.2.11}/memra-sdk/examples/simple_text_to_sql.py +0 -0
  72. {memra-0.2.9 → memra-0.2.11}/memra-sdk/memra/__init__.py +0 -0
  73. {memra-0.2.9 → memra-0.2.11}/memra-sdk/memra/discovery.py +0 -0
  74. {memra-0.2.9 → memra-0.2.11}/memra-sdk/memra/discovery_client.py +0 -0
  75. {memra-0.2.9 → memra-0.2.11}/memra-sdk/memra/execution.py +0 -0
  76. {memra-0.2.9 → memra-0.2.11}/memra-sdk/memra/models.py +0 -0
  77. {memra-0.2.9 → memra-0.2.11}/memra-sdk/memra/tool_registry.py +0 -0
  78. {memra-0.2.9 → memra-0.2.11}/memra-sdk/memra/tool_registry_client.py +0 -0
  79. {memra-0.2.9 → memra-0.2.11}/memra-sdk/scripts/release.py +0 -0
  80. {memra-0.2.9 → memra-0.2.11}/memra-sdk/setup.py +0 -0
  81. {memra-0.2.9 → memra-0.2.11}/memra-workflows/accounts_payable/accounts_payable.py +0 -0
  82. {memra-0.2.9 → memra-0.2.11}/memra-workflows/accounts_payable/accounts_payable_client.py +0 -0
  83. {memra-0.2.9 → memra-0.2.11}/memra-workflows/accounts_payable/accounts_payable_mcp.py +0 -0
  84. {memra-0.2.9 → memra-0.2.11}/memra-workflows/accounts_payable/accounts_payable_smart.py +0 -0
  85. {memra-0.2.9 → memra-0.2.11}/memra-workflows/invoice_processing/invoice_processing.py +0 -0
  86. {memra-0.2.9 → memra-0.2.11}/memra-workflows/invoice_processing/smart_invoice_processor.py +0 -0
  87. {memra-0.2.9 → memra-0.2.11}/memra-workflows/logic/__init__.py +0 -0
  88. {memra-0.2.9 → memra-0.2.11}/memra-workflows/logic/file_tools.py +0 -0
  89. {memra-0.2.9 → memra-0.2.11}/memra-workflows/logic/invoice_tools.py +0 -0
  90. {memra-0.2.9 → memra-0.2.11}/memra-workflows/logic/propane_agents.py +0 -0
  91. {memra-0.2.9 → memra-0.2.11}/memra-workflows/mcp_bridge_server.py +0 -0
  92. {memra-0.2.9 → memra-0.2.11}/memra-workflows/propane_delivery/propane_delivery.py +0 -0
  93. {memra-0.2.9 → memra-0.2.11}/memra-workflows/text_to_sql/complete_invoice_workflow_with_queries.py +0 -0
  94. {memra-0.2.9 → memra-0.2.11}/memra-workflows/text_to_sql/complete_text_to_sql_system.py +0 -0
  95. {memra-0.2.9 → memra-0.2.11}/memra-workflows/text_to_sql/file_discovery_demo.py +0 -0
  96. {memra-0.2.9 → memra-0.2.11}/memra.egg-info/SOURCES.txt +0 -0
  97. {memra-0.2.9 → memra-0.2.11}/requirements.txt +0 -0
  98. {memra-0.2.9 → memra-0.2.11}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: memra
3
- Version: 0.2.9
3
+ Version: 0.2.11
4
4
  Summary: Declarative framework for enterprise workflows with MCP integration - Client SDK
5
5
  Home-page: https://github.com/memra/memra-sdk
6
6
  Author: Memra
@@ -6,6 +6,8 @@ Complete ETL workflow with database monitoring before and after
6
6
 
7
7
  import os
8
8
  import sys
9
+ import time
10
+ import random
9
11
  from pathlib import Path
10
12
  from memra import Agent, Department, LLM, check_api_health, get_api_status
11
13
  from memra.execution import ExecutionEngine, ExecutionTrace
@@ -35,6 +37,35 @@ os.environ["MEMRA_API_URL"] = "https://api.memra.co"
35
37
  # Store the remote API URL for PDF processing
36
38
  REMOTE_API_URL = "https://api.memra.co"
37
39
 
40
+ # Define the specific 15 files to process
41
+ TARGET_FILES = [
42
+ "10352259401.PDF",
43
+ "10352259823.PDF",
44
+ "10352260169.PDF",
45
+ "10352260417.PDF",
46
+ "10352260599.PDF",
47
+ "10352260912.PDF",
48
+ "10352261134.PDF",
49
+ "10352261563.PDF",
50
+ "10352261647.PDF",
51
+ "10352261720.PDF",
52
+ "10352261811.PDF",
53
+ "10352262025.PDF",
54
+ "10352262454.PDF",
55
+ "10352262702.PDF",
56
+ "10352262884.PDF"
57
+ ]
58
+
59
+ # Configuration for robust processing
60
+ PROCESSING_CONFIG = {
61
+ "delay_between_files": 2.5, # seconds
62
+ "max_retries": 3,
63
+ "retry_delay_base": 2, # seconds
64
+ "retry_delay_max": 30, # seconds
65
+ "timeout_seconds": 120,
66
+ "rate_limit_delay": 5 # additional delay if rate limited
67
+ }
68
+
38
69
  # Check API health before starting
39
70
  print("🔍 Checking Memra API status...")
40
71
  api_status = get_api_status()
@@ -409,7 +440,7 @@ def fix_pdfprocessor_response(agent, result_data, **kwargs):
409
440
  return result_data
410
441
 
411
442
  def direct_vision_processing(agent, result_data, **kwargs):
412
- """Direct vision model processing without using tools"""
443
+ """Direct vision model processing without using tools with retry logic"""
413
444
  print(f"\n[DEBUG] direct_vision_processing called for {agent.role}")
414
445
  print(f"[DEBUG] Result data type: {type(result_data)}")
415
446
  print(f"[DEBUG] Result data: {result_data}")
@@ -430,170 +461,214 @@ def direct_vision_processing(agent, result_data, **kwargs):
430
461
  print("❌ No file path provided")
431
462
  return result_data
432
463
 
433
- try:
434
- import requests
435
- import json
436
- import os
437
- import base64
438
-
439
- # Use the remote API for PDF processing
440
- api_url = "https://api.memra.co"
441
- api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
442
-
443
- # Check if file is already a remote path
444
- if file_path.startswith('/uploads/'):
445
- print(f"✅ File already uploaded to remote API: {file_path}")
446
- remote_path = file_path
447
- else:
448
- # Local file - need to upload
449
- print(f"📤 Uploading file to remote API...")
464
+ # Retry logic for vision processing
465
+ for attempt in range(PROCESSING_CONFIG["max_retries"] + 1):
466
+ try:
467
+ import requests
468
+ import json
469
+ import os
470
+ import base64
450
471
 
451
- # Read the file and encode as base64
452
- with open(file_path, 'rb') as f:
453
- file_content = f.read()
454
-
455
- file_b64 = base64.b64encode(file_content).decode('utf-8')
472
+ # Use the remote API for PDF processing
473
+ api_url = "https://api.memra.co"
474
+ api_key = os.getenv("MEMRA_API_KEY", "test-secret-for-development")
456
475
 
457
- # Prepare upload data
458
- upload_data = {
459
- "filename": os.path.basename(file_path),
460
- "content": file_b64,
461
- "content_type": "application/pdf"
462
- }
476
+ # Check if file is already a remote path
477
+ if file_path.startswith('/uploads/'):
478
+ print(f"✅ File already uploaded to remote API: {file_path}")
479
+ remote_path = file_path
480
+ else:
481
+ # Local file - need to upload
482
+ print(f"📤 Uploading file to remote API (attempt {attempt + 1})...")
483
+
484
+ # Read the file and encode as base64
485
+ with open(file_path, 'rb') as f:
486
+ file_content = f.read()
487
+
488
+ file_b64 = base64.b64encode(file_content).decode('utf-8')
489
+
490
+ # Prepare upload data
491
+ upload_data = {
492
+ "filename": os.path.basename(file_path),
493
+ "content": file_b64,
494
+ "content_type": "application/pdf"
495
+ }
496
+
497
+ # Upload to remote API with timeout
498
+ response = requests.post(
499
+ f"{api_url}/upload",
500
+ json=upload_data,
501
+ headers={
502
+ "X-API-Key": api_key,
503
+ "Content-Type": "application/json"
504
+ },
505
+ timeout=PROCESSING_CONFIG["timeout_seconds"]
506
+ )
507
+
508
+ if response.status_code != 200:
509
+ print(f"❌ Upload failed: {response.status_code}")
510
+ print(f" Response: {response.text}")
511
+
512
+ # Check for rate limiting
513
+ if response.status_code == 429:
514
+ delay = PROCESSING_CONFIG["rate_limit_delay"] * (2 ** attempt)
515
+ print(f"⏳ Rate limited, waiting {delay}s before retry...")
516
+ time.sleep(delay)
517
+ continue
518
+ else:
519
+ return result_data
520
+
521
+ upload_result = response.json()
522
+ if not upload_result.get("success"):
523
+ print(f"❌ Upload failed: {upload_result.get('error')}")
524
+ return result_data
525
+
526
+ remote_path = upload_result["data"]["remote_path"]
527
+ print(f"✅ File uploaded successfully")
528
+ print(f" Remote path: {remote_path}")
529
+
530
+ # Now call the PDFProcessor with the remote path
531
+ print(f"🔍 Calling PDFProcessor with remote path (attempt {attempt + 1})...")
532
+
533
+ # Convert schema to format expected by PDFProcessor
534
+ schema_for_pdf = None
535
+ if schema_results:
536
+ # Send the raw schema array - server now handles both formats
537
+ schema_for_pdf = [
538
+ col for col in schema_results
539
+ if col["column_name"] not in ["id", "created_at", "updated_at", "status", "raw_json"]
540
+ ]
541
+ print(f"📋 Passing schema with {len(schema_for_pdf)} fields to PDFProcessor")
542
+ print(f"📋 Schema fields: {[c['column_name'] for c in schema_for_pdf]}")
463
543
 
464
- # Upload to remote API
465
544
  response = requests.post(
466
- f"{api_url}/upload",
467
- json=upload_data,
545
+ f"{api_url}/tools/execute",
546
+ json={
547
+ "tool_name": "PDFProcessor",
548
+ "hosted_by": "memra",
549
+ "input_data": {
550
+ "file": remote_path,
551
+ "schema": schema_for_pdf
552
+ }
553
+ },
468
554
  headers={
469
555
  "X-API-Key": api_key,
470
556
  "Content-Type": "application/json"
471
- }
557
+ },
558
+ timeout=PROCESSING_CONFIG["timeout_seconds"]
472
559
  )
473
560
 
474
561
  if response.status_code != 200:
475
- print(f"❌ Upload failed: {response.status_code}")
562
+ print(f"❌ PDFProcessor call failed: {response.status_code}")
476
563
  print(f" Response: {response.text}")
477
- return result_data
478
-
479
- upload_result = response.json()
480
- if not upload_result.get("success"):
481
- print(f" Upload failed: {upload_result.get('error')}")
482
- return result_data
483
-
484
- remote_path = upload_result["data"]["remote_path"]
485
- print(f"✅ File uploaded successfully")
486
- print(f" Remote path: {remote_path}")
487
-
488
- # Now call the PDFProcessor with the remote path
489
- print(f"🔍 Calling PDFProcessor with remote path...")
490
-
491
- # Convert schema to format expected by PDFProcessor
492
- schema_for_pdf = None
493
- if schema_results:
494
- # Send the raw schema array - server now handles both formats
495
- schema_for_pdf = [
496
- col for col in schema_results
497
- if col["column_name"] not in ["id", "created_at", "updated_at", "status", "raw_json"]
498
- ]
499
- print(f"📋 Passing schema with {len(schema_for_pdf)} fields to PDFProcessor")
500
- print(f"📋 Schema fields: {[c['column_name'] for c in schema_for_pdf]}")
501
-
502
- response = requests.post(
503
- f"{api_url}/tools/execute",
504
- json={
505
- "tool_name": "PDFProcessor",
506
- "hosted_by": "memra",
507
- "input_data": {
508
- "file": remote_path,
509
- "schema": schema_for_pdf
510
- }
511
- },
512
- headers={
513
- "X-API-Key": api_key,
514
- "Content-Type": "application/json"
515
- }
516
- )
517
-
518
- if response.status_code != 200:
519
- print(f"❌ PDFProcessor call failed: {response.status_code}")
520
- print(f" Response: {response.text}")
521
- return result_data
522
-
523
- pdf_result = response.json()
524
- print(f"\n🎯 AGENT 3 - FULL PDFPROCESSOR RESPONSE:")
525
- print("=" * 60)
526
- print(json.dumps(pdf_result, indent=2, default=str))
527
- print("=" * 60)
528
-
529
- # Extract the vision response from the nested structure
530
- vision_response = None
531
- if pdf_result.get("success") and "data" in pdf_result:
532
- data = pdf_result["data"]
564
+
565
+ # Check for rate limiting
566
+ if response.status_code == 429:
567
+ delay = PROCESSING_CONFIG["rate_limit_delay"] * (2 ** attempt)
568
+ print(f" Rate limited, waiting {delay}s before retry...")
569
+ time.sleep(delay)
570
+ continue
571
+ else:
572
+ return result_data
533
573
 
534
- # Check for nested data structure
535
- if isinstance(data, dict) and "data" in data:
536
- actual_data = data["data"]
537
- if "vision_response" in actual_data:
538
- vision_response = actual_data["vision_response"]
539
- elif "vision_response" in data:
540
- vision_response = data["vision_response"]
541
-
542
- if vision_response:
543
- print(f"\n🎯 AGENT 3 - RAW VISION MODEL JSON:")
574
+ pdf_result = response.json()
575
+ print(f"\n🎯 AGENT 3 - FULL PDFPROCESSOR RESPONSE:")
544
576
  print("=" * 60)
545
- print(vision_response)
577
+ print(json.dumps(pdf_result, indent=2, default=str))
546
578
  print("=" * 60)
547
579
 
548
- # Try to parse the JSON response
549
- try:
550
- # Clean up the response - remove markdown code blocks if present
551
- cleaned_response = vision_response
552
- if cleaned_response.startswith("```json"):
553
- cleaned_response = cleaned_response.replace("```json", "").replace("```", "").strip()
554
- elif cleaned_response.startswith("```"):
555
- cleaned_response = cleaned_response.replace("```", "").strip()
580
+ # Extract the vision response from the nested structure
581
+ vision_response = None
582
+ if pdf_result.get("success") and "data" in pdf_result:
583
+ data = pdf_result["data"]
556
584
 
557
- parsed_data = json.loads(cleaned_response)
558
- print(f"\n✅ [AGENT 3] Successfully parsed JSON:")
559
- print(json.dumps(parsed_data, indent=2))
560
-
561
- # Convert to the expected format
562
- extracted_data = convert_vision_response_to_extracted_data(cleaned_response)
563
-
564
- # Debug vendor extraction
565
- print(f"\n🔍 [AGENT 3] Extracted vendor: '{extracted_data['headerSection']['vendorName']}'")
566
- print(f" Invoice #: {extracted_data['billingDetails']['invoiceNumber']}")
567
- print(f" Amount: ${extracted_data['chargesSummary']['document_total']}")
585
+ # Check for nested data structure
586
+ if isinstance(data, dict) and "data" in data:
587
+ actual_data = data["data"]
588
+ if "vision_response" in actual_data:
589
+ vision_response = actual_data["vision_response"]
590
+ elif "vision_response" in data:
591
+ vision_response = data["vision_response"]
592
+
593
+ if vision_response:
594
+ print(f"\n🎯 AGENT 3 - RAW VISION MODEL JSON:")
595
+ print("=" * 60)
596
+ print(vision_response)
597
+ print("=" * 60)
568
598
 
569
- # Update the result_data
570
- result_data = {
571
- "success": True,
572
- "data": {
573
- "vision_response": vision_response,
574
- "extracted_data": extracted_data
575
- },
576
- "_memra_metadata": {
577
- "agent_role": agent.role,
578
- "tools_real_work": ["PDFProcessor"],
579
- "tools_mock_work": [],
580
- "work_quality": "real"
599
+ # Try to parse the JSON response
600
+ try:
601
+ # Clean up the response - remove markdown code blocks if present
602
+ cleaned_response = vision_response
603
+ if cleaned_response.startswith("```json"):
604
+ cleaned_response = cleaned_response.replace("```json", "").replace("```", "").strip()
605
+ elif cleaned_response.startswith("```"):
606
+ cleaned_response = cleaned_response.replace("```", "").strip()
607
+
608
+ parsed_data = json.loads(cleaned_response)
609
+ print(f"\n✅ [AGENT 3] Successfully parsed JSON:")
610
+ print(json.dumps(parsed_data, indent=2))
611
+
612
+ # Convert to the expected format
613
+ extracted_data = convert_vision_response_to_extracted_data(cleaned_response)
614
+
615
+ # Debug vendor extraction
616
+ print(f"\n🔍 [AGENT 3] Extracted vendor: '{extracted_data['headerSection']['vendorName']}'")
617
+ print(f" Invoice #: {extracted_data['billingDetails']['invoiceNumber']}")
618
+ print(f" Amount: ${extracted_data['chargesSummary']['document_total']}")
619
+
620
+ # Update the result_data
621
+ result_data = {
622
+ "success": True,
623
+ "data": {
624
+ "vision_response": vision_response,
625
+ "extracted_data": extracted_data
626
+ },
627
+ "_memra_metadata": {
628
+ "agent_role": agent.role,
629
+ "tools_real_work": ["PDFProcessor"],
630
+ "tools_mock_work": [],
631
+ "work_quality": "real"
632
+ }
581
633
  }
582
- }
583
-
584
- return result_data
634
+
635
+ return result_data
636
+
637
+ except json.JSONDecodeError as e:
638
+ print(f"❌ JSON parsing error: {e}")
639
+ print(f"Raw response: {vision_response}")
640
+
641
+ # Don't retry on JSON parsing errors
642
+ return result_data
643
+ else:
644
+ print(f"❌ No vision_response found in PDFProcessor result")
585
645
 
586
- except json.JSONDecodeError as e:
587
- print(f"❌ JSON parsing error: {e}")
588
- print(f"Raw response: {vision_response}")
589
- return result_data
590
- else:
591
- print(f"❌ No vision_response found in PDFProcessor result")
592
- return result_data
593
-
594
- except Exception as e:
595
- print(f"❌ Error in PDF processing: {e}")
596
- return result_data
646
+ # Retry if no vision response (might be temporary API issue)
647
+ if attempt < PROCESSING_CONFIG["max_retries"]:
648
+ delay = PROCESSING_CONFIG["retry_delay_base"] * (2 ** attempt)
649
+ print(f"⏳ No vision response, waiting {delay}s before retry...")
650
+ time.sleep(delay)
651
+ continue
652
+ else:
653
+ return result_data
654
+
655
+ except requests.exceptions.Timeout:
656
+ print(f"⏰ Vision processing timeout (attempt {attempt + 1})")
657
+ if attempt < PROCESSING_CONFIG["max_retries"]:
658
+ delay = PROCESSING_CONFIG["retry_delay_base"] * (2 ** attempt)
659
+ print(f"⏳ Waiting {delay}s before retry...")
660
+ time.sleep(delay)
661
+ continue
662
+ except Exception as e:
663
+ print(f"❌ Error in PDF processing (attempt {attempt + 1}): {e}")
664
+ if attempt < PROCESSING_CONFIG["max_retries"]:
665
+ delay = PROCESSING_CONFIG["retry_delay_base"] * (2 ** attempt)
666
+ print(f"⏳ Waiting {delay}s before retry...")
667
+ time.sleep(delay)
668
+ continue
669
+
670
+ print(f"❌ Failed to process vision after {PROCESSING_CONFIG['max_retries'] + 1} attempts")
671
+ return result_data
597
672
 
598
673
  # Create a new Agent 3 that bypasses the tool system
599
674
  direct_vision_agent = Agent(
@@ -792,54 +867,86 @@ etl_department = Department(
792
867
  }
793
868
  )
794
869
 
795
- def upload_file_to_api(file_path: str, api_url: str = "https://api.memra.co") -> str:
796
- """Upload a file to the remote API for vision-based PDF processing"""
797
- try:
798
- print(f"📤 Uploading {os.path.basename(file_path)} to remote API")
799
- print(f" File path: {file_path}")
800
-
801
- # Read the file and encode as base64
802
- with open(file_path, 'rb') as f:
803
- file_content = f.read()
804
-
805
- file_b64 = base64.b64encode(file_content).decode('utf-8')
806
-
807
- # Prepare upload data
808
- upload_data = {
809
- "filename": os.path.basename(file_path),
810
- "content": file_b64,
811
- "content_type": "application/pdf"
812
- }
813
-
814
- # Upload to remote API
815
- api_key = os.getenv("MEMRA_API_KEY")
816
- response = requests.post(
817
- f"{api_url}/upload",
818
- json=upload_data,
819
- headers={
820
- "X-API-Key": api_key,
821
- "Content-Type": "application/json"
870
+ def upload_file_to_api(file_path: str, api_url: str = "https://api.memra.co", max_retries: int = 3) -> str:
871
+ """Upload a file to the remote API for vision-based PDF processing with retry logic"""
872
+
873
+ for attempt in range(max_retries + 1):
874
+ try:
875
+ print(f"📤 Uploading {os.path.basename(file_path)} to remote API (attempt {attempt + 1}/{max_retries + 1})")
876
+ print(f" File path: {file_path}")
877
+
878
+ # Read the file and encode as base64
879
+ with open(file_path, 'rb') as f:
880
+ file_content = f.read()
881
+
882
+ file_b64 = base64.b64encode(file_content).decode('utf-8')
883
+
884
+ # Prepare upload data
885
+ upload_data = {
886
+ "filename": os.path.basename(file_path),
887
+ "content": file_b64,
888
+ "content_type": "application/pdf"
822
889
  }
823
- )
824
-
825
- if response.status_code == 200:
826
- result = response.json()
827
- if result.get("success"):
828
- remote_path = result["data"]["remote_path"]
829
- print(f"✅ File uploaded successfully")
830
- print(f" Remote path: {remote_path}")
831
- return remote_path
890
+
891
+ # Upload to remote API
892
+ api_key = os.getenv("MEMRA_API_KEY")
893
+ response = requests.post(
894
+ f"{api_url}/upload",
895
+ json=upload_data,
896
+ headers={
897
+ "X-API-Key": api_key,
898
+ "Content-Type": "application/json"
899
+ },
900
+ timeout=PROCESSING_CONFIG["timeout_seconds"]
901
+ )
902
+
903
+ if response.status_code == 200:
904
+ result = response.json()
905
+ if result.get("success"):
906
+ remote_path = result["data"]["remote_path"]
907
+ print(f"✅ File uploaded successfully")
908
+ print(f" Remote path: {remote_path}")
909
+ return remote_path
910
+ else:
911
+ error_msg = result.get('error', 'Unknown error')
912
+ print(f"❌ Upload failed: {error_msg}")
913
+
914
+ # Check if it's a rate limit error
915
+ if "rate limit" in error_msg.lower() or "too many requests" in error_msg.lower():
916
+ delay = PROCESSING_CONFIG["rate_limit_delay"] * (2 ** attempt)
917
+ print(f"⏳ Rate limited, waiting {delay}s before retry...")
918
+ time.sleep(delay)
919
+ continue
920
+ elif response.status_code == 429: # Rate limited
921
+ delay = PROCESSING_CONFIG["rate_limit_delay"] * (2 ** attempt)
922
+ print(f"⏳ Rate limited (HTTP 429), waiting {delay}s before retry...")
923
+ time.sleep(delay)
924
+ continue
832
925
  else:
833
- print(f"❌ Upload failed: {result.get('error')}")
834
- return file_path
835
- else:
836
- print(f"❌ Upload request failed: {response.status_code}")
837
- print(f" Response: {response.text}")
838
- return file_path
926
+ print(f"❌ Upload request failed: {response.status_code}")
927
+ print(f" Response: {response.text}")
839
928
 
840
- except Exception as e:
841
- print(f"⚠️ Upload error: {e}")
842
- return file_path
929
+ # Don't retry on client errors (4xx) except 429
930
+ if 400 <= response.status_code < 500 and response.status_code != 429:
931
+ break
932
+
933
+ except requests.exceptions.Timeout:
934
+ print(f"⏰ Upload timeout (attempt {attempt + 1})")
935
+ if attempt < max_retries:
936
+ delay = PROCESSING_CONFIG["retry_delay_base"] * (2 ** attempt)
937
+ print(f"⏳ Waiting {delay}s before retry...")
938
+ time.sleep(delay)
939
+ continue
940
+ except Exception as e:
941
+ print(f"⚠️ Upload error (attempt {attempt + 1}): {e}")
942
+ if attempt < max_retries:
943
+ delay = PROCESSING_CONFIG["retry_delay_base"] * (2 ** attempt)
944
+ print(f"⏳ Waiting {delay}s before retry...")
945
+ time.sleep(delay)
946
+ continue
947
+
948
+ print(f"❌ Failed to upload {os.path.basename(file_path)} after {max_retries + 1} attempts")
949
+ return file_path
843
950
 
844
951
  def print_vision_model_data(agent, tool_results):
845
952
  """Print out the JSON data returned by vision model tools"""
@@ -976,18 +1083,18 @@ def validate_agent_configuration(department):
976
1083
  return True
977
1084
 
978
1085
  def main():
979
- """Run the ETL demo workflow"""
1086
+ """Run the ETL demo workflow with robust processing"""
980
1087
  print("\n🚀 Starting ETL Invoice Processing Demo...")
981
1088
  print("📊 This demo includes comprehensive database monitoring")
982
1089
  print("📡 Tools will execute on Memra API server")
983
- print("📝 Note: Processing real PDF files from data/invoices/ directory")
984
- print(" The remote API will take photos of PDF pages and process with vision model")
1090
+ print("📝 Processing 15 specific invoice files with robust error handling")
1091
+ print("⏱️ Includes delays between files and retry logic for API resilience")
1092
+ print("🎯 Target files:", ", ".join(TARGET_FILES))
985
1093
 
986
- # Configuration - Make these configurable via environment variables or config file
1094
+ # Configuration
987
1095
  config = {
988
1096
  "table_name": os.getenv("MEMRA_TABLE_NAME", "invoices"),
989
1097
  "data_directory": os.getenv("MEMRA_DATA_DIR", "data/invoices"),
990
- "file_patterns": ["*.PDF", "*.pdf"], # Could be env var: "*.PDF,*.pdf"
991
1098
  "company_id": os.getenv("MEMRA_COMPANY_ID", "acme_corp"),
992
1099
  "fiscal_year": os.getenv("MEMRA_FISCAL_YEAR", "2024"),
993
1100
  "database_url": os.getenv("MEMRA_DATABASE_URL", "postgresql://memra:memra123@localhost:5432/memra_invoice_db")
@@ -1008,71 +1115,119 @@ def main():
1008
1115
  current_dir = os.path.dirname(os.path.abspath(__file__))
1009
1116
  data_dir = os.path.join(current_dir, config["data_directory"])
1010
1117
 
1011
- # Use configurable file patterns
1118
+ # Find only the target files
1012
1119
  invoice_files = []
1013
- for pattern in config["file_patterns"]:
1014
- invoice_files.extend(glob.glob(os.path.join(data_dir, pattern)))
1120
+ missing_files = []
1121
+
1122
+ for target_file in TARGET_FILES:
1123
+ file_path = os.path.join(data_dir, target_file)
1124
+ if os.path.exists(file_path):
1125
+ invoice_files.append(file_path)
1126
+ else:
1127
+ missing_files.append(target_file)
1128
+
1129
+ if missing_files:
1130
+ print(f"⚠️ Missing files: {', '.join(missing_files)}")
1015
1131
 
1016
1132
  if not invoice_files:
1017
- print(f"⚠️ No PDF files found in {config['data_directory']}/ directory")
1018
- print("📝 Demo will use mock data")
1019
- invoice_files = [os.path.join(data_dir, "sample_invoice.pdf")]
1133
+ print(f"No target files found in {config['data_directory']}/ directory")
1134
+ print("📝 Available files:")
1135
+ available_files = glob.glob(os.path.join(data_dir, "*.PDF"))
1136
+ for file in available_files[:10]: # Show first 10
1137
+ print(f" - {os.path.basename(file)}")
1138
+ if len(available_files) > 10:
1139
+ print(f" ... and {len(available_files) - 10} more")
1140
+ sys.exit(1)
1020
1141
 
1021
- batch_mode = False
1142
+ print(f"\n📁 Found {len(invoice_files)} target files to process")
1143
+ print(f"⏱️ Estimated processing time: {len(invoice_files) * PROCESSING_CONFIG['delay_between_files']:.1f} seconds (plus processing time)")
1144
+
1145
+ # Process files with robust error handling
1146
+ successful_processing = 0
1147
+ failed_processing = 0
1148
+ skipped_processing = 0
1149
+
1022
1150
  for idx, invoice_file in enumerate(invoice_files):
1023
- if not batch_mode:
1024
- print(f"\n📄 Next invoice: {os.path.basename(invoice_file)}")
1025
- user_input = input("Process this document? (Y/n) or process all remaining in batch mode? (B): ").strip().lower()
1026
- if user_input == 'b':
1027
- batch_mode = True
1028
- elif user_input == 'n':
1029
- print(f"⏭️ Skipping {os.path.basename(invoice_file)}")
1151
+ filename = os.path.basename(invoice_file)
1152
+ print(f"\n{'='*60}")
1153
+ print(f"📄 Processing file {idx + 1}/{len(invoice_files)}: {filename}")
1154
+ print(f"{'='*60}")
1155
+
1156
+ # Add delay between files (except for the first one)
1157
+ if idx > 0:
1158
+ delay = PROCESSING_CONFIG["delay_between_files"] + random.uniform(0, 1) # Add some randomness
1159
+ print(f"⏳ Waiting {delay:.1f}s between files...")
1160
+ time.sleep(delay)
1161
+
1162
+ try:
1163
+ # Upload file with retry logic
1164
+ remote_file_path = upload_file_to_api(invoice_file, max_retries=PROCESSING_CONFIG["max_retries"])
1165
+
1166
+ if remote_file_path == invoice_file:
1167
+ print(f"❌ Failed to upload {filename}, skipping...")
1168
+ failed_processing += 1
1030
1169
  continue
1031
- print(f"\n🚀 Processing: {os.path.basename(invoice_file)}")
1032
- remote_file_path = upload_file_to_api(invoice_file)
1033
1170
 
1034
- # Run the full ETL workflow with configurable parameters
1035
- input_data = {
1036
- "file": remote_file_path,
1037
- "connection": config["database_url"],
1038
- "table_name": config["table_name"],
1039
- "sql_query": schema_query
1040
- }
1041
- result = engine.execute_department(etl_department, input_data)
1042
- if result.success:
1043
- print("\n✅ ETL process completed successfully!")
1044
- if 'etl_summary' in result.data:
1045
- summary = result.data['etl_summary']
1046
- print(f"\n📋 ETL Summary Report:")
1047
- print(f"Status: {summary.get('status', 'unknown')}")
1048
- print(f"Summary: {summary.get('summary', 'No summary available')}")
1049
- if 'monitoring_comparison' in summary:
1050
- comparison = summary['monitoring_comparison']
1051
- print(f"\n📊 Database State Comparison:")
1052
- print(f"Pre-ETL Rows: {comparison.get('pre_rows', 'N/A')}")
1053
- print(f"Post-ETL Rows: {comparison.get('post_rows', 'N/A')}")
1054
- print(f"New Records: {comparison.get('new_records', 'N/A')}")
1055
- print(f"Data Quality: {comparison.get('data_quality', 'N/A')}")
1056
- else:
1057
- print("\n📋 ETL Summary Report:")
1058
- print("Status: success")
1059
- print("Summary: ETL process completed with database monitoring")
1060
- if 'invoice_data' in result.data:
1061
- invoice_data = result.data['invoice_data']
1062
- if isinstance(invoice_data, dict) and 'headerSection' in invoice_data:
1063
- vendor = invoice_data['headerSection'].get('vendorName', 'Unknown')
1064
- amount = invoice_data.get('totalAmount', 'Unknown')
1065
- print(f" Processed Invoice: {vendor} - ${amount}")
1171
+ # Run the full ETL workflow with configurable parameters
1172
+ input_data = {
1173
+ "file": remote_file_path,
1174
+ "connection": config["database_url"],
1175
+ "table_name": config["table_name"],
1176
+ "sql_query": schema_query
1177
+ }
1178
+
1179
+ result = engine.execute_department(etl_department, input_data)
1180
+
1181
+ if result.success:
1182
+ successful_processing += 1
1183
+ print(f"\n Successfully processed: {filename}")
1184
+
1185
+ # Show summary if available
1186
+ if 'etl_summary' in result.data:
1187
+ summary = result.data['etl_summary']
1188
+ print(f"📋 Status: {summary.get('status', 'success')}")
1066
1189
  if 'write_confirmation' in result.data:
1067
1190
  write_conf = result.data['write_confirmation']
1068
1191
  if isinstance(write_conf, dict) and 'record_id' in write_conf:
1069
- print(f"💾 Database Record: ID {write_conf['record_id']}")
1070
- else:
1071
- print(f"\n❌ ETL process failed: {result.error}")
1072
- if result.trace and result.trace.errors:
1073
- print("🔍 Error details:")
1074
- for error in result.trace.errors:
1075
- print(f" - {error}")
1192
+ print(f"💾 Database Record ID: {write_conf['record_id']}")
1193
+ else:
1194
+ failed_processing += 1
1195
+ print(f"\n❌ Failed to process: {filename}")
1196
+ print(f" Error: {result.error}")
1197
+ if result.trace and result.trace.errors:
1198
+ print(" Details:")
1199
+ for error in result.trace.errors:
1200
+ print(f" - {error}")
1201
+
1202
+ except Exception as e:
1203
+ failed_processing += 1
1204
+ print(f"\n💥 Unexpected error processing {filename}: {e}")
1205
+ print(" Continuing with next file...")
1206
+ continue
1207
+
1208
+ # Final summary
1209
+ print(f"\n{'='*60}")
1210
+ print(f"🎯 ETL DEMO COMPLETED")
1211
+ print(f"{'='*60}")
1212
+ print(f"📊 Processing Summary:")
1213
+ print(f" ✅ Successful: {successful_processing}")
1214
+ print(f" ❌ Failed: {failed_processing}")
1215
+ print(f" ⏭️ Skipped: {skipped_processing}")
1216
+ print(f" 📄 Total: {len(invoice_files)}")
1217
+
1218
+ if successful_processing > 0:
1219
+ print(f"\n🎉 Demo completed successfully!")
1220
+ print(f" Processed {successful_processing} invoices with robust error handling")
1221
+ print(f" This demonstrates real-world API resilience and rate limiting")
1222
+ else:
1223
+ print(f"\n⚠️ No files were processed successfully")
1224
+ print(f" Check API connectivity and file availability")
1225
+
1226
+ print(f"\n💡 This demo shows realistic production scenarios:")
1227
+ print(f" - API rate limiting and retry logic")
1228
+ print(f" - Graceful error handling and file skipping")
1229
+ print(f" - Delays between requests to avoid overwhelming APIs")
1230
+ print(f" - Exponential backoff for failed requests")
1076
1231
 
1077
1232
  if __name__ == "__main__":
1078
1233
  main()
@@ -6,7 +6,7 @@ Think of it as "Kubernetes for business logic" where agents are the pods and
6
6
  departments are the deployments.
7
7
  """
8
8
 
9
- __version__ = "0.2.9"
9
+ __version__ = "0.2.11"
10
10
 
11
11
  # Core imports
12
12
  from .models import Agent, Department, Tool, LLM
@@ -24,6 +24,9 @@ def run_demo():
24
24
  print("🔧 Configuring environment...")
25
25
  setup_environment()
26
26
 
27
+ # Step 2.5: Install dependencies
28
+ install_dependencies()
29
+
27
30
  # Step 3: Start Docker containers
28
31
  print("🐳 Starting Docker services...")
29
32
  if not start_docker_services(demo_dir):
@@ -279,6 +282,38 @@ def setup_environment():
279
282
  os.environ['DATABASE_URL'] = 'postgresql://postgres:postgres@localhost:5432/local_workflow'
280
283
  print("✅ Set DATABASE_URL")
281
284
 
285
+ def install_dependencies():
286
+ """Install required dependencies for the demo"""
287
+ try:
288
+ print("📦 Installing demo dependencies...")
289
+ dependencies = [
290
+ 'requests==2.31.0',
291
+ 'fastapi==0.104.1',
292
+ 'uvicorn[standard]==0.24.0',
293
+ 'pydantic==2.5.0',
294
+ 'aiohttp',
295
+ 'psycopg2-binary',
296
+ 'httpx',
297
+ 'huggingface_hub'
298
+ ]
299
+
300
+ for dep in dependencies:
301
+ print(f" Installing {dep}...")
302
+ result = subprocess.run([
303
+ sys.executable, '-m', 'pip', 'install', dep
304
+ ], capture_output=True, text=True)
305
+
306
+ if result.returncode != 0:
307
+ print(f"⚠️ Warning: Failed to install {dep}: {result.stderr}")
308
+ else:
309
+ print(f" ✅ {dep} installed")
310
+
311
+ print("✅ Dependencies installed")
312
+
313
+ except Exception as e:
314
+ print(f"⚠️ Warning: Could not install dependencies: {e}")
315
+ print(" You may need to install them manually: pip install requests fastapi uvicorn pydantic")
316
+
282
317
  def start_docker_services(demo_dir):
283
318
  """Start Docker containers using docker-compose"""
284
319
  try:
@@ -343,7 +378,8 @@ def run_etl_workflow(demo_dir):
343
378
  real_demo_script = demo_dir / "etl_invoice_demo.py"
344
379
  if real_demo_script.exists():
345
380
  print("🎯 Running real ETL workflow...")
346
- result = subprocess.run([sys.executable, str(real_demo_script)], cwd=demo_dir)
381
+ print("⏱️ Processing 15 files with delays - this may take 10-15 minutes")
382
+ result = subprocess.run([sys.executable, str(real_demo_script)], cwd=demo_dir, timeout=1800) # 30 minute timeout
347
383
  return result.returncode == 0
348
384
  else:
349
385
  # Fallback to simplified demo
@@ -356,6 +392,10 @@ def run_etl_workflow(demo_dir):
356
392
  print("❌ No demo script found")
357
393
  return False
358
394
 
395
+ except subprocess.TimeoutExpired:
396
+ print("⏰ ETL workflow timed out after 30 minutes")
397
+ print("This is normal for large batches - the demo processes 15 files with delays")
398
+ return False
359
399
  except Exception as e:
360
400
  print(f"❌ Error running ETL workflow: {e}")
361
401
  return False
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "memra"
7
- version = "0.2.9"
7
+ version = "0.2.11"
8
8
  description = "Declarative framework for enterprise workflows with MCP integration - Client SDK"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="memra",
8
- version="0.2.9",
8
+ version="0.2.11",
9
9
  author="Memra",
10
10
  author_email="support@memra.com",
11
11
  description="Declarative framework for enterprise workflows with MCP integration - Client SDK",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes