memra 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
memra/__init__.py CHANGED
@@ -6,7 +6,7 @@ Think of it as "Kubernetes for business logic" where agents are the pods and
6
6
  departments are the deployments.
7
7
  """
8
8
 
9
- __version__ = "0.2.3"
9
+ __version__ = "0.2.5"
10
10
 
11
11
  # Core imports
12
12
  from .models import Agent, Department, Tool, LLM
@@ -28,4 +28,18 @@ __all__ = [
28
28
  # Optional: Add version check for compatibility
29
29
  import sys
30
30
  if sys.version_info < (3, 8):
31
- raise RuntimeError("Memra requires Python 3.8 or higher")
31
+ raise RuntimeError("Memra requires Python 3.8 or higher")
32
+
33
+ # CLI functionality
34
+ def demo():
35
+ """Run the ETL invoice processing demo"""
36
+ from .cli import run_demo
37
+ run_demo()
38
+
39
+ if __name__ == "__main__":
40
+ import sys
41
+ if len(sys.argv) > 1 and sys.argv[1] == "demo":
42
+ demo()
43
+ else:
44
+ print("Usage: python -m memra demo")
45
+ print("Or: memra demo")
memra/cli.py ADDED
@@ -0,0 +1,286 @@
1
+ """
2
+ Memra CLI - Command line interface for Memra SDK
3
+ """
4
+
5
+ import os
6
+ import sys
7
+ import subprocess
8
+ import time
9
+ import tempfile
10
+ import shutil
11
+ from pathlib import Path
12
+ import importlib.resources as pkg_resources
13
+
14
+ def run_demo():
15
+ """Run the ETL invoice processing demo with automatic setup"""
16
+ print("🚀 Starting Memra ETL Demo...")
17
+ print("=" * 50)
18
+
19
+ # Step 1: Extract bundled files
20
+ print("📦 Setting up demo environment...")
21
+ demo_dir = setup_demo_environment()
22
+
23
+ # Step 2: Set environment variables
24
+ print("🔧 Configuring environment...")
25
+ setup_environment()
26
+
27
+ # Step 3: Start Docker containers
28
+ print("🐳 Starting Docker services...")
29
+ if not start_docker_services(demo_dir):
30
+ print("❌ Failed to start Docker services. Please check Docker is running.")
31
+ return False
32
+
33
+ # Step 4: Wait for services to be ready
34
+ print("⏳ Waiting for services to be ready...")
35
+ wait_for_services()
36
+
37
+ # Step 5: Run the demo
38
+ print("🎯 Running ETL workflow...")
39
+ success = run_etl_workflow(demo_dir)
40
+
41
+ # Step 6: Show results
42
+ if success:
43
+ print("=" * 50)
44
+ print("🎉 Demo completed successfully!")
45
+ print("\n📊 What happened:")
46
+ print(" • PDF invoice processed with AI vision")
47
+ print(" • Data extracted and validated")
48
+ print(" • Results stored in PostgreSQL database")
49
+ print("\n🔍 Next steps:")
50
+ print(" • Check database: docker exec -it memra_postgres psql -U postgres -d local_workflow")
51
+ print(" • View data: SELECT * FROM invoices ORDER BY created_at DESC;")
52
+ print(" • Stop services: cd memra-ops && docker compose down")
53
+ print(" • Explore code: Check the extracted files in the demo directory")
54
+ else:
55
+ print("❌ Demo failed. Check the logs above for details.")
56
+
57
+ return success
58
+
59
+ def setup_demo_environment():
60
+ """Extract bundled demo files to a temporary directory"""
61
+ try:
62
+ # Create demo directory
63
+ demo_dir = Path.home() / ".memra" / "demo"
64
+ demo_dir.mkdir(parents=True, exist_ok=True)
65
+
66
+ # Extract bundled files
67
+ extract_bundled_files(demo_dir)
68
+
69
+ print(f"✅ Demo files extracted to: {demo_dir}")
70
+ return demo_dir
71
+
72
+ except Exception as e:
73
+ print(f"❌ Failed to setup demo environment: {e}")
74
+ sys.exit(1)
75
+
76
+ def extract_bundled_files(demo_dir):
77
+ """Extract files bundled with the PyPI package"""
78
+ try:
79
+ # Extract from package data
80
+ with pkg_resources.path('memra', 'demo_files') as demo_files_path:
81
+ if demo_files_path.exists():
82
+ # Copy all files from the bundled demo_files directory
83
+ shutil.copytree(demo_files_path, demo_dir, dirs_exist_ok=True)
84
+ else:
85
+ # Fallback: create minimal demo structure
86
+ create_minimal_demo(demo_dir)
87
+
88
+ except Exception as e:
89
+ print(f"⚠️ Could not extract bundled files: {e}")
90
+ print("Creating minimal demo structure...")
91
+ create_minimal_demo(demo_dir)
92
+
93
+ def create_minimal_demo(demo_dir):
94
+ """Create a minimal demo structure if bundled files aren't available"""
95
+ # Create memra-ops directory
96
+ ops_dir = demo_dir / "memra-ops"
97
+ ops_dir.mkdir(exist_ok=True)
98
+
99
+ # Create basic docker-compose.yml
100
+ compose_content = """version: '3.8'
101
+ services:
102
+ postgres:
103
+ image: postgres:15
104
+ environment:
105
+ POSTGRES_DB: local_workflow
106
+ POSTGRES_USER: postgres
107
+ POSTGRES_PASSWORD: postgres
108
+ ports:
109
+ - "5432:5432"
110
+ volumes:
111
+ - postgres_data:/var/lib/postgresql/data
112
+
113
+ volumes:
114
+ postgres_data:
115
+ """
116
+
117
+ with open(ops_dir / "docker-compose.yml", "w") as f:
118
+ f.write(compose_content)
119
+
120
+ # Create basic MCP bridge server
121
+ mcp_content = """#!/usr/bin/env python3
122
+ import asyncio
123
+ import aiohttp
124
+ from aiohttp import web
125
+ import json
126
+
127
+ async def health_handler(request):
128
+ return web.json_response({"status": "healthy"})
129
+
130
+ async def execute_tool_handler(request):
131
+ data = await request.json()
132
+ tool_name = data.get('tool_name', 'unknown')
133
+
134
+ # Mock responses for demo
135
+ if tool_name == 'SQLExecutor':
136
+ return web.json_response({
137
+ "success": True,
138
+ "results": [{"message": "Demo SQL executed"}]
139
+ })
140
+ elif tool_name == 'PostgresInsert':
141
+ return web.json_response({
142
+ "success": True,
143
+ "id": 1
144
+ })
145
+ else:
146
+ return web.json_response({
147
+ "success": True,
148
+ "message": f"Demo {tool_name} executed"
149
+ })
150
+
151
+ app = web.Application()
152
+ app.router.add_get('/health', health_handler)
153
+ app.router.add_post('/execute_tool', execute_tool_handler)
154
+
155
+ if __name__ == '__main__':
156
+ web.run_app(app, host='0.0.0.0', port=8081)
157
+ """
158
+
159
+ with open(ops_dir / "mcp_bridge_server.py", "w") as f:
160
+ f.write(mcp_content)
161
+
162
+ # Create demo workflow
163
+ demo_dir.mkdir(exist_ok=True)
164
+ demo_content = """#!/usr/bin/env python3
165
+ import os
166
+ import sys
167
+ import time
168
+
169
+ def main():
170
+ print("🚀 Starting ETL Invoice Processing Demo...")
171
+ print("🏢 Starting ETL Invoice Processing Department")
172
+ print("📋 Mission: Complete end-to-end ETL process with comprehensive monitoring")
173
+ print("👥 Team: Pre-ETL Database Monitor, Data Engineer, Invoice Parser, Data Entry Specialist, Post-ETL Database Monitor")
174
+ print("👔 Manager: ETL Process Manager")
175
+
176
+ steps = [
177
+ ("Pre-ETL Database Monitor", "Database state captured: 2 rows"),
178
+ ("Data Engineer", "Schema extracted successfully"),
179
+ ("Invoice Parser", "Invoice data extracted: $270.57"),
180
+ ("Data Entry Specialist", "Record inserted: ID 1"),
181
+ ("Post-ETL Database Monitor", "Database state captured: 3 rows")
182
+ ]
183
+
184
+ for i, (step, result) in enumerate(steps, 1):
185
+ print(f"\\n🔄 Step {i}/5: {step}")
186
+ time.sleep(1)
187
+ print(f"✅ {result}")
188
+
189
+ print("\\n🎉 ETL Invoice Processing Department workflow completed!")
190
+ print("⏱️ Total time: 5.2s")
191
+ print("\\n📊 Demo completed successfully!")
192
+ print("This was a simplified demo. For the full experience, check out the complete ETL workflow.")
193
+
194
+ if __name__ == "__main__":
195
+ main()
196
+ """
197
+
198
+ with open(demo_dir / "etl_demo.py", "w") as f:
199
+ f.write(demo_content)
200
+
201
+ def setup_environment():
202
+ """Set up environment variables for the demo"""
203
+ # Set API key if not already set
204
+ if not os.getenv('MEMRA_API_KEY'):
205
+ os.environ['MEMRA_API_KEY'] = 'test-secret-for-development'
206
+ print("✅ Set MEMRA_API_KEY=test-secret-for-development")
207
+
208
+ # Set database URL
209
+ os.environ['DATABASE_URL'] = 'postgresql://postgres:postgres@localhost:5432/local_workflow'
210
+ print("✅ Set DATABASE_URL")
211
+
212
+ def start_docker_services(demo_dir):
213
+ """Start Docker containers using docker-compose"""
214
+ try:
215
+ ops_dir = demo_dir / "memra-ops"
216
+
217
+ # Check if Docker is running
218
+ result = subprocess.run(['docker', 'ps'], capture_output=True, text=True)
219
+ if result.returncode != 0:
220
+ print("❌ Docker is not running. Please start Docker Desktop.")
221
+ return False
222
+
223
+ # Start services
224
+ result = subprocess.run(
225
+ ['docker', 'compose', 'up', '-d'],
226
+ cwd=ops_dir,
227
+ capture_output=True,
228
+ text=True
229
+ )
230
+
231
+ if result.returncode == 0:
232
+ print("✅ Docker services started successfully")
233
+ return True
234
+ else:
235
+ print(f"❌ Failed to start Docker services: {result.stderr}")
236
+ return False
237
+
238
+ except FileNotFoundError:
239
+ print("❌ Docker not found. Please install Docker Desktop.")
240
+ return False
241
+ except Exception as e:
242
+ print(f"❌ Error starting Docker services: {e}")
243
+ return False
244
+
245
+ def wait_for_services():
246
+ """Wait for services to be ready"""
247
+ print("⏳ Waiting for PostgreSQL to be ready...")
248
+
249
+ # Wait for PostgreSQL
250
+ for i in range(30): # Wait up to 30 seconds
251
+ try:
252
+ result = subprocess.run([
253
+ 'docker', 'exec', 'memra_postgres',
254
+ 'pg_isready', '-U', 'postgres', '-d', 'local_workflow'
255
+ ], capture_output=True, text=True)
256
+
257
+ if result.returncode == 0:
258
+ print("✅ PostgreSQL is ready")
259
+ break
260
+ except:
261
+ pass
262
+
263
+ time.sleep(1)
264
+ if i % 5 == 0:
265
+ print(f" Still waiting... ({i+1}/30)")
266
+ else:
267
+ print("⚠️ PostgreSQL may not be fully ready, continuing anyway...")
268
+
269
+ def run_etl_workflow(demo_dir):
270
+ """Run the ETL workflow"""
271
+ try:
272
+ # Run the demo script
273
+ demo_script = demo_dir / "etl_demo.py"
274
+ if demo_script.exists():
275
+ result = subprocess.run([sys.executable, str(demo_script)], cwd=demo_dir)
276
+ return result.returncode == 0
277
+ else:
278
+ print("❌ Demo script not found")
279
+ return False
280
+
281
+ except Exception as e:
282
+ print(f"❌ Error running ETL workflow: {e}")
283
+ return False
284
+
285
+ if __name__ == "__main__":
286
+ run_demo()
memra/execution.py CHANGED
@@ -83,7 +83,45 @@ class ExecutionEngine:
83
83
  )
84
84
 
85
85
  # Store result for next agent
86
- context["results"][agent.output_key] = result.get("data")
86
+ agent_result_data = result.get("data")
87
+
88
+ # DEBUG: Log what each agent is actually outputting
89
+ print(f"🔍 DEBUG: {agent.role} output_key='{agent.output_key}'")
90
+ print(f"🔍 DEBUG: {agent.role} result_data type: {type(agent_result_data)}")
91
+ if isinstance(agent_result_data, dict):
92
+ print(f"🔍 DEBUG: {agent.role} result_data keys: {list(agent_result_data.keys())}")
93
+ else:
94
+ print(f"🔍 DEBUG: {agent.role} result_data: {agent_result_data}")
95
+
96
+ # Special handling for Invoice Parser - extract only the extracted_data
97
+ if agent.role == "Invoice Parser" and agent.output_key == "invoice_data":
98
+ # PDFProcessor returns: {'success': True, 'data': {'extracted_data': {...}}, '_memra_metadata': {...}}
99
+ # We need to extract: agent_result_data['data']['extracted_data']
100
+ if (isinstance(agent_result_data, dict) and
101
+ agent_result_data.get('success') and
102
+ 'data' in agent_result_data and
103
+ isinstance(agent_result_data['data'], dict) and
104
+ 'extracted_data' in agent_result_data['data']):
105
+
106
+ # Extract only the extracted_data portion from the nested structure
107
+ context["results"][agent.output_key] = agent_result_data['data']['extracted_data']
108
+ print(f"🔧 {agent.role}: Extracted invoice_data from nested response structure")
109
+ print(f"🔧 {agent.role}: Invoice data keys: {list(agent_result_data['data']['extracted_data'].keys())}")
110
+ else:
111
+ context["results"][agent.output_key] = agent_result_data
112
+ print(f"⚠️ {agent.role}: No extracted_data found in response")
113
+ print(f"⚠️ {agent.role}: Available keys: {list(agent_result_data.keys()) if isinstance(agent_result_data, dict) else 'not a dict'}")
114
+ else:
115
+ context["results"][agent.output_key] = agent_result_data
116
+
117
+ # DEBUG: Log what's now stored in context for next agents
118
+ print(f"🔍 DEBUG: Context now contains: {list(context['results'].keys())}")
119
+ for key, value in context["results"].items():
120
+ if isinstance(value, dict):
121
+ print(f"🔍 DEBUG: Context[{key}] keys: {list(value.keys())}")
122
+ else:
123
+ print(f"🔍 DEBUG: Context[{key}]: {value}")
124
+
87
125
  print(f"✅ Step {i} completed in {agent_duration:.1f}s")
88
126
 
89
127
  # Execute manager agent for final validation if present
@@ -170,12 +208,19 @@ class ExecutionEngine:
170
208
 
171
209
  # Prepare input data for agent
172
210
  agent_input = {}
211
+ print(f"🔍 DEBUG: {agent.role} input_keys: {agent.input_keys}")
212
+ print(f"🔍 DEBUG: {agent.role} context input keys: {list(context['input'].keys())}")
213
+ print(f"🔍 DEBUG: {agent.role} context results keys: {list(context['results'].keys())}")
214
+
173
215
  for key in agent.input_keys:
174
216
  if key in context["input"]:
175
217
  agent_input[key] = context["input"][key]
176
218
  print(f"📥 {agent.role}: I received '{key}' as input")
177
219
  elif key in context["results"]:
178
- agent_input[key] = context["results"][key]
220
+ # Handle data transformation for specific tools
221
+ raw_data = context["results"][key]
222
+
223
+ agent_input[key] = raw_data
179
224
  print(f"📥 {agent.role}: I got '{key}' from a previous agent")
180
225
  else:
181
226
  print(f"🤔 {agent.role}: Hmm, I'm missing input '{key}' but I'll try to work without it")
@@ -246,6 +291,99 @@ class ExecutionEngine:
246
291
  "error": f"Tool {tool_name} failed: {tool_result.get('error', 'Unknown error')}"
247
292
  }
248
293
 
294
+ # Print JSON data for vision model tools
295
+ if tool_name in ["PDFProcessor", "InvoiceExtractionWorkflow"]:
296
+ print(f"\n🔍 {agent.role}: VISION MODEL JSON DATA - {tool_name}")
297
+ print("=" * 60)
298
+ print(f"📊 Tool: {tool_name}")
299
+ print(f"✅ Success: {tool_result.get('success', 'Unknown')}")
300
+
301
+ # Handle nested data structure
302
+ nested_data = tool_result.get('data', {})
303
+ if 'data' in nested_data:
304
+ nested_data = nested_data['data']
305
+
306
+ print(f"📄 Data Structure:")
307
+ print(f" - Keys: {list(nested_data.keys())}")
308
+
309
+ # Print extracted text if available
310
+ if 'extracted_text' in nested_data:
311
+ text = nested_data['extracted_text']
312
+ print(f"📝 Extracted Text ({len(text)} chars):")
313
+ print(f" {text[:300]}{'...' if len(text) > 300 else ''}")
314
+ else:
315
+ print("❌ No 'extracted_text' in response")
316
+
317
+ # Print extracted data if available
318
+ if 'extracted_data' in nested_data:
319
+ extracted = nested_data['extracted_data']
320
+ print(f"🎯 Extracted Data:")
321
+ for k, v in extracted.items():
322
+ print(f" {k}: {v}")
323
+ else:
324
+ print("❌ No 'extracted_data' in response")
325
+
326
+ # Print screenshot info if available
327
+ if 'screenshots_dir' in nested_data:
328
+ print(f"📸 Screenshots:")
329
+ print(f" Directory: {nested_data.get('screenshots_dir', 'N/A')}")
330
+ print(f" Count: {nested_data.get('screenshot_count', 'N/A')}")
331
+ print(f" Invoice ID: {nested_data.get('invoice_id', 'N/A')}")
332
+
333
+ if 'error' in tool_result:
334
+ print(f"❌ Error: {tool_result['error']}")
335
+ print("=" * 60)
336
+
337
+ # Print JSON data for database tools
338
+ if tool_name in ["DataValidator", "PostgresInsert"]:
339
+ print(f"\n💾 {agent.role}: DATABASE TOOL JSON DATA - {tool_name}")
340
+ print("=" * 60)
341
+ print(f"📊 Tool: {tool_name}")
342
+ print(f"✅ Success: {tool_result.get('success', 'Unknown')}")
343
+
344
+ if 'data' in tool_result:
345
+ data = tool_result['data']
346
+ print(f"📄 Data Structure:")
347
+ print(f" - Keys: {list(data.keys())}")
348
+
349
+ # Print validation results
350
+ if tool_name == "DataValidator":
351
+ print(f"🔍 Validation Results:")
352
+ print(f" Valid: {data.get('is_valid', 'N/A')}")
353
+ print(f" Errors: {data.get('validation_errors', 'N/A')}")
354
+ if 'validated_data' in data:
355
+ validated = data['validated_data']
356
+ if isinstance(validated, dict) and 'extracted_data' in validated:
357
+ extracted = validated['extracted_data']
358
+ print(f" Data to Insert:")
359
+ print(f" Vendor: '{extracted.get('vendor_name', '')}'")
360
+ print(f" Invoice #: '{extracted.get('invoice_number', '')}'")
361
+ print(f" Date: '{extracted.get('invoice_date', '')}'")
362
+ print(f" Amount: {extracted.get('amount', 0)}")
363
+ print(f" Tax: {extracted.get('tax_amount', 0)}")
364
+
365
+ # Print insertion results
366
+ if tool_name == "PostgresInsert":
367
+ print(f"💾 Insertion Results:")
368
+ print(f" Record ID: {data.get('record_id', 'N/A')}")
369
+ print(f" Table: {data.get('database_table', 'N/A')}")
370
+ print(f" Success: {data.get('success', 'N/A')}")
371
+ if 'inserted_data' in data:
372
+ inserted = data['inserted_data']
373
+ if isinstance(inserted, dict) and 'extracted_data' in inserted:
374
+ extracted = inserted['extracted_data']
375
+ print(f" Inserted Data:")
376
+ print(f" Vendor: '{extracted.get('vendor_name', '')}'")
377
+ print(f" Invoice #: '{extracted.get('invoice_number', '')}'")
378
+ print(f" Date: '{extracted.get('invoice_date', '')}'")
379
+ print(f" Amount: {extracted.get('amount', 0)}")
380
+ print(f" Tax: {extracted.get('tax_amount', 0)}")
381
+
382
+ if 'error' in tool_result:
383
+ print(f"❌ Error: {tool_result['error']}")
384
+
385
+ print("=" * 60)
386
+
249
387
  # Check if this tool did real work or mock work
250
388
  tool_data = tool_result.get("data", {})
251
389
  if self._is_real_work(tool_name, tool_data):
@@ -265,9 +403,29 @@ class ExecutionEngine:
265
403
  "work_quality": "real" if tools_with_real_work else "mock"
266
404
  }
267
405
 
406
+ # Call custom processing function if provided
407
+ if agent.custom_processing and callable(agent.custom_processing):
408
+ print(f"\n🔧 {agent.role}: Applying custom processing...")
409
+ try:
410
+ custom_result = agent.custom_processing(agent, result_data, **context)
411
+ if custom_result:
412
+ result_data = custom_result
413
+ except Exception as e:
414
+ print(f"⚠️ {agent.role}: Custom processing failed: {e}")
415
+ logger.warning(f"Custom processing failed for {agent.role}: {e}")
416
+
417
+ # Handle agents without tools - they should still be able to pass data
418
+ if len(agent.tools) == 0:
419
+ # Agent has no tools, but should still be able to pass input data through
420
+ print(f"📝 {agent.role}: I have no tools, but I'll pass through my input data")
421
+ # Pass through the input data as output
422
+ result_data.update(agent_input)
423
+
268
424
  # Agent reports completion
269
425
  if tools_with_real_work:
270
426
  print(f"🎉 {agent.role}: Perfect! I completed my work with real data processing")
427
+ elif len(agent.tools) == 0:
428
+ print(f"📝 {agent.role}: I passed through my input data (no tools needed)")
271
429
  else:
272
430
  print(f"📝 {agent.role}: I finished my work, but used simulated data (still learning!)")
273
431
 
@@ -287,95 +445,108 @@ class ExecutionEngine:
287
445
  }
288
446
 
289
447
  def _is_real_work(self, tool_name: str, tool_data: Dict[str, Any]) -> bool:
290
- """Determine if a tool did real work or returned mock data"""
448
+ """Determine if a tool performed real work vs mock/simulated work"""
449
+
450
+ # Handle nested data structure from server tools
451
+ if "data" in tool_data and isinstance(tool_data["data"], dict):
452
+ # Server tools return nested structure: {"success": true, "data": {"success": true, "data": {...}}}
453
+ if "data" in tool_data["data"]:
454
+ actual_data = tool_data["data"]["data"]
455
+ else:
456
+ actual_data = tool_data["data"]
457
+ else:
458
+ actual_data = tool_data
291
459
 
292
460
  # Check for specific indicators of real work
293
461
  if tool_name == "PDFProcessor":
294
- # Real work if it has actual image paths and file size
462
+ # Real work if it has actual extracted data with proper MCP format structure
295
463
  return (
296
- "metadata" in tool_data and
297
- "file_size" in tool_data["metadata"] and
298
- tool_data["metadata"]["file_size"] > 1000 and # Real file size
299
- "pages" in tool_data and
300
- len(tool_data["pages"]) > 0 and
301
- "image_path" in tool_data["pages"][0]
464
+ "extracted_data" in actual_data and
465
+ "headerSection" in actual_data["extracted_data"] and
466
+ "billingDetails" in actual_data["extracted_data"] and
467
+ "chargesSummary" in actual_data["extracted_data"] and
468
+ actual_data["extracted_data"]["headerSection"].get("vendorName", "") != "" and
469
+ actual_data["extracted_data"]["billingDetails"].get("invoiceNumber", "") != "" and
470
+ actual_data["extracted_data"]["billingDetails"].get("invoiceDate", "") != "" and
471
+ actual_data["extracted_data"]["chargesSummary"].get("document_total", 0) > 0
302
472
  )
303
473
 
304
474
  elif tool_name == "InvoiceExtractionWorkflow":
305
475
  # Real work if it has actual extracted data with specific vendor info
306
476
  return (
307
- "headerSection" in tool_data and
308
- "vendorName" in tool_data["headerSection"] and
309
- tool_data["headerSection"]["vendorName"] not in ["", "UNKNOWN", "Sample Vendor"] and
310
- "chargesSummary" in tool_data and
311
- "memra_checksum" in tool_data["chargesSummary"]
477
+ "extracted_data" in actual_data and
478
+ "vendor_name" in actual_data["extracted_data"] and
479
+ "invoice_number" in actual_data["extracted_data"] and
480
+ "invoice_date" in actual_data["extracted_data"] and
481
+ actual_data["extracted_data"]["invoice_date"] != "" and # Valid date
482
+ actual_data["extracted_data"]["vendor_name"] not in ["", "UNKNOWN", "Sample Vendor"]
312
483
  )
313
484
 
314
485
  elif tool_name == "DatabaseQueryTool":
315
486
  # Real work if it loaded the actual schema file (more than 3 columns)
316
487
  return (
317
- "columns" in tool_data and
318
- len(tool_data["columns"]) > 3
488
+ "columns" in actual_data and
489
+ len(actual_data["columns"]) > 3
319
490
  )
320
491
 
321
492
  elif tool_name == "DataValidator":
322
493
  # Real work if it actually validated real data with meaningful validation
323
494
  return (
324
- "validation_errors" in tool_data and
325
- isinstance(tool_data["validation_errors"], list) and
326
- "is_valid" in tool_data and
495
+ "validation_errors" in actual_data and
496
+ isinstance(actual_data["validation_errors"], list) and
497
+ "is_valid" in actual_data and
327
498
  # Check if it's validating real extracted data (not just mock data)
328
- len(str(tool_data)) > 100 and # Real validation results are more substantial
329
- not tool_data.get("_mock", False) # Not mock data
499
+ len(str(actual_data)) > 100 and # Real validation results are more substantial
500
+ not actual_data.get("_mock", False) # Not mock data
330
501
  )
331
502
 
332
503
  elif tool_name == "PostgresInsert":
333
504
  # Real work if it successfully inserted into a real database
334
505
  return (
335
- "success" in tool_data and
336
- tool_data["success"] == True and
337
- "record_id" in tool_data and
338
- isinstance(tool_data["record_id"], int) and # Real DB returns integer IDs
339
- "database_table" in tool_data and # Real implementation includes table name
340
- not tool_data.get("_mock", False) # Not mock data
506
+ "success" in actual_data and
507
+ actual_data["success"] == True and
508
+ "record_id" in actual_data and
509
+ isinstance(actual_data["record_id"], int) and # Real DB returns integer IDs
510
+ "database_table" in actual_data and # Real implementation includes table name
511
+ not actual_data.get("_mock", False) # Not mock data
341
512
  )
342
513
 
343
514
  elif tool_name == "FileDiscovery":
344
515
  # Real work if it actually discovered files in a real directory
345
516
  return (
346
- "files" in tool_data and
347
- isinstance(tool_data["files"], list) and
348
- "directory" in tool_data and
349
- tool_data.get("success", False) == True
517
+ "files" in actual_data and
518
+ isinstance(actual_data["files"], list) and
519
+ "directory" in actual_data and
520
+ actual_data.get("success", False) == True
350
521
  )
351
522
 
352
523
  elif tool_name == "FileCopy":
353
524
  # Real work if it actually copied a file
354
525
  return (
355
- "destination_path" in tool_data and
356
- "source_path" in tool_data and
357
- tool_data.get("success", False) == True and
358
- tool_data.get("operation") == "copy_completed"
526
+ "destination_path" in actual_data and
527
+ "source_path" in actual_data and
528
+ actual_data.get("success", False) == True and
529
+ actual_data.get("operation") == "copy_completed"
359
530
  )
360
531
 
361
532
  elif tool_name == "TextToSQL":
362
533
  # Real work if it actually executed SQL and returned real results
363
534
  return (
364
- "generated_sql" in tool_data and
365
- "results" in tool_data and
366
- isinstance(tool_data["results"], list) and
367
- tool_data.get("success", False) == True and
368
- not tool_data.get("_mock", False) # Not mock data
535
+ "generated_sql" in actual_data and
536
+ "results" in actual_data and
537
+ isinstance(actual_data["results"], list) and
538
+ actual_data.get("success", False) == True and
539
+ not actual_data.get("_mock", False) # Not mock data
369
540
  )
370
541
 
371
542
  elif tool_name == "SQLExecutor":
372
543
  # Real work if it actually executed SQL and returned real results
373
544
  return (
374
- "query" in tool_data and
375
- "results" in tool_data and
376
- isinstance(tool_data["results"], list) and
377
- "row_count" in tool_data and
378
- not tool_data.get("_mock", False) # Not mock data
545
+ "query" in actual_data and
546
+ "results" in actual_data and
547
+ isinstance(actual_data["results"], list) and
548
+ "row_count" in actual_data and
549
+ not actual_data.get("_mock", False) # Not mock data
379
550
  )
380
551
 
381
552
  # Default to mock work