memra 0.2.13__py3-none-any.whl → 0.2.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. memra/cli.py +322 -51
  2. {memra-0.2.13.dist-info → memra-0.2.15.dist-info}/METADATA +1 -1
  3. {memra-0.2.13.dist-info → memra-0.2.15.dist-info}/RECORD +7 -61
  4. memra-0.2.15.dist-info/top_level.txt +1 -0
  5. memra-0.2.13.dist-info/top_level.txt +0 -4
  6. memra-ops/app.py +0 -808
  7. memra-ops/config/config.py +0 -25
  8. memra-ops/config.py +0 -34
  9. memra-ops/logic/__init__.py +0 -1
  10. memra-ops/logic/file_tools.py +0 -43
  11. memra-ops/logic/invoice_tools.py +0 -668
  12. memra-ops/logic/invoice_tools_fix.py +0 -66
  13. memra-ops/mcp_bridge_server.py +0 -1178
  14. memra-ops/scripts/check_database.py +0 -37
  15. memra-ops/scripts/clear_database.py +0 -48
  16. memra-ops/scripts/monitor_database.py +0 -67
  17. memra-ops/scripts/release.py +0 -133
  18. memra-ops/scripts/reset_database.py +0 -65
  19. memra-ops/scripts/start_memra.py +0 -334
  20. memra-ops/scripts/stop_memra.py +0 -132
  21. memra-ops/server_tool_registry.py +0 -190
  22. memra-ops/tests/test_llm_text_to_sql.py +0 -115
  23. memra-ops/tests/test_llm_vs_pattern.py +0 -130
  24. memra-ops/tests/test_mcp_schema_aware.py +0 -124
  25. memra-ops/tests/test_schema_aware_sql.py +0 -139
  26. memra-ops/tests/test_schema_aware_sql_simple.py +0 -66
  27. memra-ops/tests/test_text_to_sql_demo.py +0 -140
  28. memra-ops/tools/mcp_bridge_server.py +0 -851
  29. memra-sdk/examples/accounts_payable.py +0 -215
  30. memra-sdk/examples/accounts_payable_client.py +0 -217
  31. memra-sdk/examples/accounts_payable_mcp.py +0 -200
  32. memra-sdk/examples/ask_questions.py +0 -123
  33. memra-sdk/examples/invoice_processing.py +0 -116
  34. memra-sdk/examples/propane_delivery.py +0 -87
  35. memra-sdk/examples/simple_text_to_sql.py +0 -158
  36. memra-sdk/memra/__init__.py +0 -31
  37. memra-sdk/memra/discovery.py +0 -15
  38. memra-sdk/memra/discovery_client.py +0 -49
  39. memra-sdk/memra/execution.py +0 -481
  40. memra-sdk/memra/models.py +0 -99
  41. memra-sdk/memra/tool_registry.py +0 -343
  42. memra-sdk/memra/tool_registry_client.py +0 -106
  43. memra-sdk/scripts/release.py +0 -133
  44. memra-sdk/setup.py +0 -52
  45. memra-workflows/accounts_payable/accounts_payable.py +0 -215
  46. memra-workflows/accounts_payable/accounts_payable_client.py +0 -216
  47. memra-workflows/accounts_payable/accounts_payable_mcp.py +0 -200
  48. memra-workflows/accounts_payable/accounts_payable_smart.py +0 -221
  49. memra-workflows/invoice_processing/invoice_processing.py +0 -116
  50. memra-workflows/invoice_processing/smart_invoice_processor.py +0 -220
  51. memra-workflows/logic/__init__.py +0 -1
  52. memra-workflows/logic/file_tools.py +0 -50
  53. memra-workflows/logic/invoice_tools.py +0 -501
  54. memra-workflows/logic/propane_agents.py +0 -52
  55. memra-workflows/mcp_bridge_server.py +0 -230
  56. memra-workflows/propane_delivery/propane_delivery.py +0 -87
  57. memra-workflows/text_to_sql/complete_invoice_workflow_with_queries.py +0 -208
  58. memra-workflows/text_to_sql/complete_text_to_sql_system.py +0 -266
  59. memra-workflows/text_to_sql/file_discovery_demo.py +0 -156
  60. {memra-0.2.13.dist-info → memra-0.2.15.dist-info}/LICENSE +0 -0
  61. {memra-0.2.13.dist-info → memra-0.2.15.dist-info}/WHEEL +0 -0
  62. {memra-0.2.13.dist-info → memra-0.2.15.dist-info}/entry_points.txt +0 -0
@@ -1,501 +0,0 @@
1
- import os
2
- import sys
3
- import subprocess
4
- import base64
5
- import json
6
- import re
7
- import logging
8
- from typing import Dict, Any
9
- from pathlib import Path
10
- from huggingface_hub import InferenceClient
11
- import psycopg2
12
- from psycopg2.extras import RealDictCursor
13
- from datetime import datetime, date
14
-
15
- # Add project root to path to import config
16
- sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
17
-
18
- # Try to import local config, fallback to environment variables
19
- try:
20
- from config import API_CONFIG
21
- except ImportError:
22
- # Server deployment - use environment variables
23
- API_CONFIG = {
24
- "huggingface": {
25
- "api_key": os.getenv("HUGGINGFACE_API_KEY", ""),
26
- "model": os.getenv("HUGGINGFACE_MODEL", "meta-llama/Llama-4-Maverick-17B-128E-Instruct"),
27
- "max_tokens": int(os.getenv("HUGGINGFACE_MAX_TOKENS", "2000"))
28
- }
29
- }
30
-
31
- logger = logging.getLogger(__name__)
32
-
33
- # Propane invoice data model
34
- PROPANE_DATA_MODEL = {
35
- "headerSection": {
36
- "vendorName": "string",
37
- "customerName": "string",
38
- "serviceAddress": "string",
39
- "subtotal": "number or null"
40
- },
41
- "billingDetails": {
42
- "invoiceDate": "string",
43
- "invoiceNumber": "string",
44
- "accountNumber": "string",
45
- "referenceNumber": "string or null",
46
- "Service Address": "string or null",
47
- "Subtotal": "number or null"
48
- },
49
- "chargesSummary": {
50
- "lineItemsBreakdown": [
51
- {
52
- "description": "string",
53
- "quantity": "number or null",
54
- "unit_price": "number or null",
55
- "amount": "number",
56
- "main_product": "boolean"
57
- }
58
- ],
59
- "extended_total": "number",
60
- "calculated_subtotal": "number",
61
- "secondary_tax": "number",
62
- "calculated_total": "number",
63
- "document_total": "number",
64
- "invoiceNumber": "string",
65
- "accountNumber": "string",
66
- "memra_checksum": "string"
67
- },
68
- "paymentInstructions": {
69
- "dueOnInvoiceAmount": "number or null",
70
- "payInFullByDate": "string or null",
71
- "remitToAddress": "string or null",
72
- "barcodeFooter": "string or null",
73
- "vendor_name": "string or null"
74
- }
75
- }
76
-
77
- class DatabaseQueryTool:
78
- """Tool for querying database schemas and data"""
79
-
80
- def __init__(self, credentials: Dict[str, Any]):
81
- self.credentials = credentials
82
- # TODO: Initialize database connection
83
-
84
- def get_schema(self, table_name: str) -> Dict[str, Any]:
85
- """Get schema information for a table"""
86
- logger.info(f"Getting schema for table: {table_name}")
87
- # TODO: Implement actual database query
88
- # For now, return the schema from our local file
89
- try:
90
- schema_path = Path(__file__).parent.parent / "local" / "dependencies" / "data_model.json"
91
- with open(schema_path, 'r') as f:
92
- return json.load(f)
93
- except Exception as e:
94
- logger.error(f"Failed to load schema: {e}")
95
- return {
96
- "columns": [
97
- {"name": "id", "type": "integer", "nullable": False},
98
- {"name": "invoice_number", "type": "varchar", "nullable": False},
99
- {"name": "total_amount", "type": "decimal", "nullable": False},
100
- ],
101
- "constraints": [
102
- {"type": "primary_key", "columns": ["id"]},
103
- {"type": "unique", "columns": ["invoice_number"]}
104
- ]
105
- }
106
-
107
- class PDFProcessor:
108
- """Tool for processing PDF files"""
109
-
110
- def process_pdf(self, file_path: str) -> Dict[str, Any]:
111
- """Process a PDF file and convert to high-resolution images"""
112
- logger.info(f"Processing PDF: {file_path}")
113
-
114
- try:
115
- # Create output directory for this PDF
116
- pdf_path = Path(file_path)
117
- if not pdf_path.exists():
118
- raise FileNotFoundError(f"PDF file not found: {file_path}")
119
-
120
- pdf_name = pdf_path.stem
121
- output_dir = Path("temp_processing") / pdf_name
122
- output_dir.mkdir(parents=True, exist_ok=True)
123
-
124
- # Convert PDF to images using pdftoppm
125
- cmd = [
126
- "pdftoppm",
127
- "-png",
128
- "-r", "300", # 300 DPI for high resolution
129
- str(pdf_path),
130
- str(output_dir / "page")
131
- ]
132
-
133
- result = subprocess.run(cmd, capture_output=True, text=True)
134
- if result.returncode != 0:
135
- logger.error(f"Error converting PDF {file_path}: {result.stderr}")
136
- raise Exception(f"PDF conversion failed: {result.stderr}")
137
-
138
- # Get list of generated images
139
- image_files = list(output_dir.glob("*.png"))
140
-
141
- return {
142
- "pages": [
143
- {
144
- "page_number": i+1,
145
- "image_path": str(img_path),
146
- "content": f"Page {i+1} converted to image"
147
- }
148
- for i, img_path in enumerate(sorted(image_files))
149
- ],
150
- "metadata": {
151
- "page_count": len(image_files),
152
- "file_size": pdf_path.stat().st_size,
153
- "output_directory": str(output_dir)
154
- }
155
- }
156
-
157
- except Exception as e:
158
- logger.error(f"PDF processing failed: {str(e)}")
159
- raise
160
-
161
- class OCRTool:
162
- """Tool for performing OCR on images"""
163
-
164
- def extract_text(self, image_data: Dict[str, Any]) -> str:
165
- """Extract text from an image using OCR"""
166
- logger.info("Performing OCR on image")
167
-
168
- # For now, we'll skip OCR since the LLM can process images directly
169
- # In a full implementation, you could use pytesseract here
170
- return "OCR text extraction - delegated to LLM vision processing"
171
-
172
- class InvoiceExtractionWorkflow:
173
- """Workflow for extracting structured data from invoices using LLM"""
174
-
175
- def __init__(self):
176
- self.client = InferenceClient(
177
- provider="fireworks-ai",
178
- api_key=API_CONFIG["huggingface"]["api_key"],
179
- )
180
-
181
- def encode_image(self, image_path: str) -> str:
182
- """Encode image to base64"""
183
- with open(image_path, "rb") as image_file:
184
- return base64.b64encode(image_file.read()).decode('utf-8')
185
-
186
- def extract_json_from_markdown(self, text: str) -> str:
187
- """Extract JSON from markdown response"""
188
- # Remove markdown code block markers
189
- text = re.sub(r'```json\n?', '', text)
190
- text = re.sub(r'\n?```', '', text)
191
- # Remove any newlines and extra spaces
192
- text = re.sub(r'\s+', ' ', text)
193
- # Convert Python dict syntax to JSON
194
- text = text.replace("'", '"')
195
- # Try to find JSON content
196
- match = re.search(r'\{.*\}', text, re.DOTALL)
197
- if match:
198
- json_text = match.group(0)
199
- # Ensure the JSON is properly closed
200
- if not json_text.strip().endswith('}'):
201
- last_brace = json_text.rfind('}')
202
- if last_brace != -1:
203
- json_text = json_text[:last_brace+1]
204
- else:
205
- json_text = json_text.rstrip() + '}}'
206
- return json_text
207
- return text.strip()
208
-
209
- def validate_calculations(self, data: Dict[str, Any]) -> Dict[str, Any]:
210
- """Validate and calculate invoice totals"""
211
- try:
212
- charges = data.get("chargesSummary", {})
213
- line_items = charges.get("lineItemsBreakdown", [])
214
-
215
- # Find main product line item
216
- main_product = None
217
- additional_items_total = 0
218
-
219
- for item in line_items:
220
- if item.get("main_product"):
221
- main_product = item
222
- elif item.get("amount") is not None:
223
- additional_items_total += float(item["amount"])
224
-
225
- if main_product and main_product.get("unit_price") and main_product.get("quantity"):
226
- # Calculate extended total (price * quantity for main product)
227
- extended_total = float(main_product["unit_price"]) * float(main_product["quantity"])
228
- # Calculate subtotal (extended total + sum of additional items)
229
- calculated_subtotal = extended_total + additional_items_total
230
- # Get the document total
231
- document_total = float(charges.get("document_total", 0))
232
-
233
- # Calculate secondary tax
234
- secondary_tax = document_total - calculated_subtotal
235
- if abs(secondary_tax) < 0.01:
236
- secondary_tax = 0.0
237
-
238
- # Update the data with calculated values
239
- charges["extended_total"] = round(extended_total, 2)
240
- charges["calculated_subtotal"] = round(calculated_subtotal, 2)
241
- charges["secondary_tax"] = round(secondary_tax, 2)
242
- charges["calculated_total"] = round(document_total, 2)
243
-
244
- # Set checksum
245
- if abs((calculated_subtotal + secondary_tax) - document_total) <= 0.01:
246
- charges["memra_checksum"] = "pass"
247
- else:
248
- charges["memra_checksum"] = "fail"
249
- else:
250
- charges["memra_checksum"] = "fail"
251
-
252
- except Exception as e:
253
- logger.error(f"Error in calculations: {e}")
254
- data.get("chargesSummary", {})["memra_checksum"] = "fail"
255
-
256
- return data
257
-
258
- def extract_data(self, text: str, schema: Dict[str, Any]) -> Dict[str, Any]:
259
- """Extract structured data from invoice using LLM vision"""
260
- logger.info("Extracting structured data from invoice using LLM")
261
-
262
- # Get the first image from the PDF processing results
263
- # This is a simplified approach - in practice you'd handle multiple pages
264
- temp_dir = Path("temp_processing")
265
- image_files = list(temp_dir.rglob("*.png"))
266
-
267
- if not image_files:
268
- raise Exception("No processed images found for extraction")
269
-
270
- # Use the first image
271
- image_path = image_files[0]
272
- base64_image = self.encode_image(str(image_path))
273
-
274
- # Create the completion with both text and image
275
- completion = self.client.chat.completions.create(
276
- model=API_CONFIG["huggingface"]["model"],
277
- messages=[
278
- {
279
- "role": "user",
280
- "content": [
281
- {
282
- "type": "text",
283
- "text": f"""Please analyze this propane invoice image and fill out the following data model with the information you find.
284
- Pay special attention to:
285
-
286
- 1. For each line item in chargesSummary, include:
287
- - description (keep full description on one line)
288
- - quantity (if available, otherwise null)
289
- - unit_price (if available, otherwise null)
290
- - amount (total for the line, must be a number)
291
- - main_product: true for the main product (usually bulk propane), false for additional charges
292
-
293
- 2. The main product will typically be the bulk propane line item
294
- - This should have main_product = true
295
- - All other items (taxes, fees, etc.) should have main_product = false
296
- - Make sure to include quantity and unit_price for the main product
297
-
298
- 3. Look for a barcode in the footer of the invoice
299
- - Include it in paymentInstructions.barcodeFooter if found
300
- - Set to null if not found
301
-
302
- Return the data in proper JSON format with double quotes around property names and string values.
303
- Do not include any additional text or explanation.
304
- Here is the data model structure:
305
- {json.dumps(PROPANE_DATA_MODEL)}
306
- """
307
- },
308
- {
309
- "type": "image_url",
310
- "image_url": {
311
- "url": f"data:image/png;base64,{base64_image}"
312
- }
313
- }
314
- ]
315
- }
316
- ],
317
- max_tokens=API_CONFIG["huggingface"]["max_tokens"],
318
- )
319
-
320
- # Extract and parse the JSON response
321
- try:
322
- json_text = self.extract_json_from_markdown(completion.choices[0].message.content)
323
- response_data = json.loads(json_text)
324
- # Validate calculations
325
- response_data = self.validate_calculations(response_data)
326
- return response_data
327
- except json.JSONDecodeError as e:
328
- logger.error(f"Error parsing JSON response: {e}")
329
- logger.error(f"Raw response: {completion.choices[0].message.content}")
330
- raise Exception(f"Failed to parse LLM response: {e}")
331
-
332
- class DataValidator:
333
- """Tool for validating extracted data"""
334
-
335
- def validate(self, data: Dict[str, Any], schema: Dict[str, Any]) -> Dict[str, Any]:
336
- """Validate data against schema"""
337
- logger.info("Validating extracted data")
338
-
339
- validation_errors = []
340
-
341
- try:
342
- # Check required fields
343
- required_fields = [
344
- "headerSection.vendorName",
345
- "billingDetails.invoiceNumber",
346
- "chargesSummary.document_total"
347
- ]
348
-
349
- for field_path in required_fields:
350
- keys = field_path.split('.')
351
- current = data
352
- try:
353
- for key in keys:
354
- current = current[key]
355
- if current is None or current == "":
356
- validation_errors.append(f"Required field {field_path} is missing or empty")
357
- except KeyError:
358
- validation_errors.append(f"Required field {field_path} not found")
359
-
360
- # Validate calculations checksum
361
- checksum = data.get("chargesSummary", {}).get("memra_checksum")
362
- if checksum != "pass":
363
- validation_errors.append("Invoice calculations do not match (checksum failed)")
364
-
365
- return {
366
- "is_valid": len(validation_errors) == 0,
367
- "validation_errors": validation_errors
368
- }
369
-
370
- except Exception as e:
371
- logger.error(f"Validation error: {e}")
372
- return {
373
- "is_valid": False,
374
- "validation_errors": [f"Validation process failed: {str(e)}"]
375
- }
376
-
377
- class PostgresInsert:
378
- """Tool for inserting data into Postgres"""
379
-
380
- def __init__(self, credentials: Dict[str, Any]):
381
- self.credentials = credentials
382
- self.connection = None
383
-
384
- def _connect(self):
385
- """Establish database connection"""
386
- if self.connection is None:
387
- try:
388
- # Build connection string from credentials
389
- host = self.credentials.get("host", "localhost")
390
- port = self.credentials.get("port", 5432)
391
- database = self.credentials.get("database", "memra_invoice_db")
392
- user = self.credentials.get("user", "tarpus")
393
- password = self.credentials.get("password", "")
394
-
395
- if password:
396
- conn_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
397
- else:
398
- conn_string = f"postgresql://{user}@{host}:{port}/{database}"
399
-
400
- self.connection = psycopg2.connect(conn_string)
401
- logger.info(f"Connected to database: {database}")
402
-
403
- except Exception as e:
404
- logger.error(f"Database connection failed: {e}")
405
- raise
406
-
407
- def _parse_invoice_date(self, date_str: str) -> date:
408
- """Parse invoice date from various formats"""
409
- if not date_str:
410
- return datetime.now().date()
411
-
412
- # Try common date formats
413
- formats = ["%m/%d/%Y", "%Y-%m-%d", "%m-%d-%Y", "%d/%m/%Y"]
414
- for fmt in formats:
415
- try:
416
- return datetime.strptime(date_str, fmt).date()
417
- except ValueError:
418
- continue
419
-
420
- # If all formats fail, return today's date
421
- logger.warning(f"Could not parse date '{date_str}', using today's date")
422
- return datetime.now().date()
423
-
424
- def insert_record(self, table: str, data: Dict[str, Any]) -> Dict[str, Any]:
425
- """Insert a record into the database"""
426
- logger.info(f"Inserting record into {table}")
427
-
428
- try:
429
- self._connect()
430
-
431
- # Extract key fields from the invoice data
432
- header = data.get("headerSection", {})
433
- billing = data.get("billingDetails", {})
434
- charges = data.get("chargesSummary", {})
435
-
436
- invoice_number = charges.get("invoiceNumber") or billing.get("invoiceNumber", "UNKNOWN")
437
- vendor_name = header.get("vendorName", "UNKNOWN")
438
- total_amount = charges.get("document_total", 0)
439
- tax_amount = charges.get("secondary_tax", 0)
440
- invoice_date = self._parse_invoice_date(billing.get("invoiceDate", ""))
441
-
442
- # Prepare line items as JSONB
443
- line_items = charges.get("lineItemsBreakdown", [])
444
-
445
- # Insert query
446
- insert_query = """
447
- INSERT INTO invoices (
448
- invoice_number, vendor_name, invoice_date, total_amount,
449
- tax_amount, line_items, status
450
- ) VALUES (
451
- %s, %s, %s, %s, %s, %s, %s
452
- ) RETURNING id;
453
- """
454
-
455
- with self.connection.cursor() as cursor:
456
- cursor.execute(insert_query, (
457
- invoice_number,
458
- vendor_name,
459
- invoice_date,
460
- float(total_amount),
461
- float(tax_amount) if tax_amount else None,
462
- json.dumps(line_items),
463
- 'pending'
464
- ))
465
-
466
- record_id = cursor.fetchone()[0]
467
- self.connection.commit()
468
-
469
- logger.info(f"Successfully inserted invoice {invoice_number} with ID {record_id}")
470
-
471
- return {
472
- "success": True,
473
- "record_id": record_id,
474
- "invoice_number": invoice_number,
475
- "total_amount": total_amount,
476
- "vendor_name": vendor_name,
477
- "database_table": table
478
- }
479
-
480
- except psycopg2.IntegrityError as e:
481
- logger.error(f"Database integrity error: {e}")
482
- if self.connection:
483
- self.connection.rollback()
484
- return {
485
- "success": False,
486
- "error": f"Database integrity error (possibly duplicate invoice): {str(e)}"
487
- }
488
-
489
- except Exception as e:
490
- logger.error(f"Database insert failed: {str(e)}")
491
- if self.connection:
492
- self.connection.rollback()
493
- return {
494
- "success": False,
495
- "error": str(e)
496
- }
497
-
498
- def __del__(self):
499
- """Close database connection when object is destroyed"""
500
- if self.connection:
501
- self.connection.close()
@@ -1,52 +0,0 @@
1
- from typing import Dict, Any
2
- import logging
3
-
4
- logger = logging.getLogger(__name__)
5
-
6
- class PropaneDataExtractor:
7
- """Agent responsible for extracting propane-related data from various sources"""
8
-
9
- def extract_data(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
10
- """
11
- Extract propane-related data from the input.
12
- This would typically involve API calls, database queries, etc.
13
- """
14
- logger.info("Extracting propane data...")
15
- # TODO: Implement actual data extraction logic
16
- return {
17
- "propane_levels": [],
18
- "delivery_schedule": [],
19
- "customer_info": {}
20
- }
21
-
22
- class PropaneDeliveryPlanner:
23
- """Agent responsible for planning propane deliveries"""
24
-
25
- def plan_deliveries(self, data: Dict[str, Any]) -> Dict[str, Any]:
26
- """
27
- Plan propane deliveries based on extracted data.
28
- This would involve optimization algorithms, scheduling logic, etc.
29
- """
30
- logger.info("Planning propane deliveries...")
31
- # TODO: Implement actual delivery planning logic
32
- return {
33
- "delivery_routes": [],
34
- "estimated_times": [],
35
- "resource_allocation": {}
36
- }
37
-
38
- class PropaneDeliveryExecutor:
39
- """Agent responsible for executing propane deliveries"""
40
-
41
- def execute_deliveries(self, plan: Dict[str, Any]) -> Dict[str, Any]:
42
- """
43
- Execute the planned propane deliveries.
44
- This would involve communication with delivery systems, tracking, etc.
45
- """
46
- logger.info("Executing propane deliveries...")
47
- # TODO: Implement actual delivery execution logic
48
- return {
49
- "delivery_status": "pending",
50
- "tracking_info": {},
51
- "completion_estimates": {}
52
- }