banko-ai-assistant 1.0.12__py3-none-any.whl → 1.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- banko_ai/__main__.py +18 -5
- banko_ai/ai_providers/watsonx_provider.py +232 -21
- banko_ai/config/settings.py +24 -7
- banko_ai/utils/cache_manager.py +1 -1
- banko_ai/vector_search/enrichment.py +11 -71
- banko_ai/vector_search/generator.py +56 -82
- banko_ai/vector_search/search.py +43 -1
- banko_ai/web/app.py +167 -82
- {banko_ai_assistant-1.0.12.dist-info → banko_ai_assistant-1.0.13.dist-info}/METADATA +1 -1
- {banko_ai_assistant-1.0.12.dist-info → banko_ai_assistant-1.0.13.dist-info}/RECORD +14 -14
- {banko_ai_assistant-1.0.12.dist-info → banko_ai_assistant-1.0.13.dist-info}/WHEEL +0 -0
- {banko_ai_assistant-1.0.12.dist-info → banko_ai_assistant-1.0.13.dist-info}/entry_points.txt +0 -0
- {banko_ai_assistant-1.0.12.dist-info → banko_ai_assistant-1.0.13.dist-info}/licenses/LICENSE +0 -0
- {banko_ai_assistant-1.0.12.dist-info → banko_ai_assistant-1.0.13.dist-info}/top_level.txt +0 -0
banko_ai/__main__.py
CHANGED
@@ -1,10 +1,23 @@
|
|
1
1
|
"""
|
2
2
|
Main entry point for Banko AI Assistant.
|
3
|
-
|
4
|
-
This module allows the package to be run as a module: python -m banko_ai
|
3
|
+
This matches the original app.py behavior.
|
5
4
|
"""
|
6
5
|
|
7
|
-
from .cli import main
|
8
|
-
|
9
6
|
if __name__ == '__main__':
|
10
|
-
|
7
|
+
import os
|
8
|
+
from .web.app import create_app
|
9
|
+
|
10
|
+
print("🏦 === Banko AI Assistant Starting === 🏦")
|
11
|
+
|
12
|
+
# Create the Flask app
|
13
|
+
app = create_app()
|
14
|
+
|
15
|
+
# Get port from environment variable or default to 5000 (matching original)
|
16
|
+
port = int(os.environ.get("PORT", 5000))
|
17
|
+
|
18
|
+
print(f"🚀 Starting server on http://localhost:{port}")
|
19
|
+
print("🎉 Banko AI is ready to help with your finances!")
|
20
|
+
print("=" * 45)
|
21
|
+
|
22
|
+
# Run the app on all interfaces, using the configured port (matching original)
|
23
|
+
app.run(host='0.0.0.0', port=port, debug=True)
|
@@ -76,10 +76,64 @@ class WatsonxProvider(AIProvider):
|
|
76
76
|
limit: int = 10,
|
77
77
|
threshold: float = 0.7
|
78
78
|
) -> List[SearchResult]:
|
79
|
-
"""Search for expenses using vector similarity."""
|
80
|
-
|
81
|
-
|
82
|
-
|
79
|
+
"""Search for expenses using vector similarity - matches original implementation."""
|
80
|
+
try:
|
81
|
+
# Use the same simple search logic as the original watsonx.py
|
82
|
+
from sentence_transformers import SentenceTransformer
|
83
|
+
from sqlalchemy import create_engine, text
|
84
|
+
import json
|
85
|
+
|
86
|
+
# Database connection (matching original)
|
87
|
+
DB_URI = "cockroachdb://root@localhost:26257/defaultdb?sslmode=disable"
|
88
|
+
engine = create_engine(DB_URI)
|
89
|
+
|
90
|
+
# Generate embedding (matching original)
|
91
|
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
92
|
+
raw_embedding = model.encode(query)
|
93
|
+
search_embedding = json.dumps(raw_embedding.flatten().tolist())
|
94
|
+
|
95
|
+
# Use exact same query as original
|
96
|
+
search_query = text("""
|
97
|
+
SELECT
|
98
|
+
description,
|
99
|
+
merchant,
|
100
|
+
shopping_type,
|
101
|
+
expense_amount,
|
102
|
+
embedding <=> :search_embedding as similarity_score
|
103
|
+
FROM expenses
|
104
|
+
ORDER BY embedding <=> :search_embedding
|
105
|
+
LIMIT :limit
|
106
|
+
""")
|
107
|
+
|
108
|
+
with engine.connect() as conn:
|
109
|
+
results = conn.execute(search_query,
|
110
|
+
{'search_embedding': search_embedding, 'limit': limit})
|
111
|
+
search_results = [dict(row._mapping) for row in results]
|
112
|
+
|
113
|
+
# Convert to SearchResult objects
|
114
|
+
results_list = []
|
115
|
+
for result in search_results:
|
116
|
+
results_list.append(SearchResult(
|
117
|
+
expense_id="", # Original doesn't have expense_id
|
118
|
+
user_id="", # Original doesn't have user_id
|
119
|
+
description=result['description'],
|
120
|
+
merchant=result['merchant'],
|
121
|
+
amount=result['expense_amount'],
|
122
|
+
date="", # Original doesn't have date
|
123
|
+
similarity_score=result['similarity_score'],
|
124
|
+
metadata={
|
125
|
+
'shopping_type': result['shopping_type'],
|
126
|
+
'payment_method': 'Unknown', # Original doesn't have this
|
127
|
+
'recurring': False,
|
128
|
+
'tags': []
|
129
|
+
}
|
130
|
+
))
|
131
|
+
|
132
|
+
return results_list
|
133
|
+
|
134
|
+
except Exception as e:
|
135
|
+
print(f"Error in search_expenses: {e}")
|
136
|
+
return []
|
83
137
|
|
84
138
|
def get_available_models(self) -> List[str]:
|
85
139
|
"""Get list of available Watsonx models."""
|
@@ -240,6 +294,143 @@ class WatsonxProvider(AIProvider):
|
|
240
294
|
|
241
295
|
return "\n".join(recommendations) if recommendations else ""
|
242
296
|
|
297
|
+
def simple_rag_response(self, prompt: str, search_results: List[Dict[str, Any]]) -> str:
|
298
|
+
"""
|
299
|
+
Simple RAG response that matches the original implementation exactly.
|
300
|
+
Takes a prompt and list of dictionaries (like original search results).
|
301
|
+
"""
|
302
|
+
try:
|
303
|
+
print(f"\n🤖 SIMPLE WATSONX RAG:")
|
304
|
+
print(f"1. Query: '{prompt[:60]}...'")
|
305
|
+
|
306
|
+
# Check for cached response first
|
307
|
+
if self.cache_manager:
|
308
|
+
cached_response = self.cache_manager.get_cached_response(
|
309
|
+
prompt, search_results, "watsonx"
|
310
|
+
)
|
311
|
+
if cached_response:
|
312
|
+
print(f"2. ✅ Response cache HIT! Returning cached response")
|
313
|
+
return cached_response
|
314
|
+
print(f"2. ❌ Response cache MISS, generating fresh response")
|
315
|
+
else:
|
316
|
+
print(f"2. No cache manager available, generating fresh response")
|
317
|
+
|
318
|
+
# Generate financial insights and categorization analysis (matching original)
|
319
|
+
insights = self._get_financial_insights_from_dicts(search_results)
|
320
|
+
budget_recommendations = self._generate_budget_recommendations(insights, prompt)
|
321
|
+
|
322
|
+
# Prepare the search results context with enhanced analysis (matching original)
|
323
|
+
search_results_text = ""
|
324
|
+
if search_results:
|
325
|
+
search_results_text = "\n".join(
|
326
|
+
f"• **{result['shopping_type']}** at {result['merchant']}: ${result['expense_amount']} - {result['description']}"
|
327
|
+
for result in search_results
|
328
|
+
)
|
329
|
+
|
330
|
+
# Add financial summary (matching original)
|
331
|
+
if insights:
|
332
|
+
search_results_text += f"\n\n**📊 Financial Summary:**\n"
|
333
|
+
search_results_text += f"• Total Amount: **${insights['total_amount']:.2f}**\n"
|
334
|
+
search_results_text += f"• Number of Transactions: **{insights['num_transactions']}**\n"
|
335
|
+
search_results_text += f"• Average Transaction: **${insights['avg_transaction']:.2f}**\n"
|
336
|
+
if insights.get('top_category'):
|
337
|
+
cat, amt = insights['top_category']
|
338
|
+
search_results_text += f"• Top Category: **{cat}** (${amt:.2f})\n"
|
339
|
+
else:
|
340
|
+
search_results_text = "No specific expense records found for this query."
|
341
|
+
|
342
|
+
# Create optimized prompt (matching original)
|
343
|
+
enhanced_prompt = f"""You are Banko, a financial assistant. Answer based on this expense data:
|
344
|
+
|
345
|
+
Q: {prompt}
|
346
|
+
|
347
|
+
Data:
|
348
|
+
{search_results_text}
|
349
|
+
|
350
|
+
{budget_recommendations if budget_recommendations else ''}
|
351
|
+
|
352
|
+
Provide helpful insights with numbers, markdown formatting, and actionable advice."""
|
353
|
+
|
354
|
+
# Prepare messages for chat format (matching original)
|
355
|
+
messages = [
|
356
|
+
{
|
357
|
+
"role": "user",
|
358
|
+
"content": enhanced_prompt
|
359
|
+
}
|
360
|
+
]
|
361
|
+
|
362
|
+
# Call Watsonx API (matching original implementation)
|
363
|
+
print(f"3. 🔄 Calling Watsonx API...")
|
364
|
+
response = self._call_watsonx_api(messages)
|
365
|
+
print(f"4. ✅ Watsonx response generated successfully")
|
366
|
+
|
367
|
+
# Cache the response for future similar queries
|
368
|
+
if self.cache_manager and response:
|
369
|
+
# Estimate token usage (rough approximation)
|
370
|
+
prompt_tokens = len(enhanced_prompt.split()) * 1.3 # ~1.3 tokens per word
|
371
|
+
response_tokens = len(response.split()) * 1.3
|
372
|
+
|
373
|
+
self.cache_manager.cache_response(
|
374
|
+
prompt, response, search_results, "watsonx",
|
375
|
+
int(prompt_tokens), int(response_tokens)
|
376
|
+
)
|
377
|
+
print(f"5. ✅ Cached response (est. {int(prompt_tokens + response_tokens)} tokens)")
|
378
|
+
|
379
|
+
return response
|
380
|
+
|
381
|
+
except Exception as e:
|
382
|
+
error_msg = f"❌ Error generating Watsonx response: {str(e)}"
|
383
|
+
print(error_msg)
|
384
|
+
|
385
|
+
# Check if it's a network connectivity issue
|
386
|
+
if "Failed to resolve" in str(e) or "nodename nor servname provided" in str(e) or "Network connectivity issue" in str(e):
|
387
|
+
return f"""I apologize, but I'm experiencing network connectivity issues with IBM Watsonx AI.
|
388
|
+
|
389
|
+
**🔧 Troubleshooting suggestions:**
|
390
|
+
- Check your internet connection
|
391
|
+
- Try switching to AWS Bedrock by setting `AI_SERVICE=aws` in your environment
|
392
|
+
- Verify your network allows access to `iam.cloud.ibm.com`
|
393
|
+
|
394
|
+
**💡 Quick fix:** You can switch AI providers by running:
|
395
|
+
```bash
|
396
|
+
export AI_SERVICE=aws
|
397
|
+
```
|
398
|
+
|
399
|
+
(Network Error: {str(e)})"""
|
400
|
+
else:
|
401
|
+
return f"I apologize, but I'm experiencing technical difficulties with IBM Watsonx AI. Please try again later or consider switching to AWS Bedrock. (Error: {str(e)})"
|
402
|
+
|
403
|
+
def _get_financial_insights_from_dicts(self, search_results: List[Dict[str, Any]]) -> dict:
|
404
|
+
"""Generate financial insights from dictionary format (matching original)."""
|
405
|
+
if not search_results:
|
406
|
+
return {}
|
407
|
+
|
408
|
+
total_amount = sum(float(result['expense_amount']) for result in search_results)
|
409
|
+
categories = {}
|
410
|
+
merchants = {}
|
411
|
+
|
412
|
+
for result in search_results:
|
413
|
+
# Category analysis
|
414
|
+
category = result['shopping_type']
|
415
|
+
categories[category] = categories.get(category, 0) + float(result['expense_amount'])
|
416
|
+
|
417
|
+
# Merchant analysis
|
418
|
+
merchant = result['merchant']
|
419
|
+
merchants[merchant] = merchants.get(merchant, 0) + float(result['expense_amount'])
|
420
|
+
|
421
|
+
# Find top categories and merchants
|
422
|
+
top_category = max(categories.items(), key=lambda x: x[1]) if categories else None
|
423
|
+
top_merchant = max(merchants.items(), key=lambda x: x[1]) if merchants else None
|
424
|
+
|
425
|
+
return {
|
426
|
+
'total_amount': total_amount,
|
427
|
+
'num_transactions': len(search_results),
|
428
|
+
'avg_transaction': total_amount / len(search_results) if search_results else 0,
|
429
|
+
'categories': categories,
|
430
|
+
'top_category': top_category,
|
431
|
+
'top_merchant': top_merchant
|
432
|
+
}
|
433
|
+
|
243
434
|
def rag_response(
|
244
435
|
self,
|
245
436
|
query: str,
|
@@ -420,9 +611,21 @@ Provide helpful insights with numbers, markdown formatting, and actionable advic
|
|
420
611
|
print(f"2. ❌ Response cache MISS, generating fresh response")
|
421
612
|
else:
|
422
613
|
print(f"2. No cache manager available, generating fresh response")
|
423
|
-
|
614
|
+
|
615
|
+
# Initialize ai_response to avoid variable scope issues
|
616
|
+
ai_response = ""
|
617
|
+
|
618
|
+
# FIXED: Use AI with actual search results instead of bypassing it completely
|
619
|
+
print(f"🔍 DEBUG: Using AI with REAL search results for query: {query}")
|
620
|
+
if context:
|
621
|
+
print(f"🔍 DEBUG: Processing {len(context)} REAL search results for AI context")
|
622
|
+
for i, result in enumerate(context):
|
623
|
+
print(f"🔍 DEBUG: Real Result {i+1}: {result.merchant} - ${result.amount} - {result.description[:50]}...")
|
624
|
+
|
625
|
+
# Re-enable AI generation with real search results
|
626
|
+
if True: # Re-enabled AI generation with real data
|
424
627
|
# Return structured demo response if no API credentials
|
425
|
-
if not
|
628
|
+
if not self.api_key or not self.project_id:
|
426
629
|
ai_response = f"""## Financial Analysis for: "{query}"
|
427
630
|
|
428
631
|
### 📋 Transaction Details
|
@@ -438,21 +641,21 @@ I couldn't find any relevant expense records for your query. Please try:
|
|
438
641
|
- Time periods (e.g., "last month", "this week")
|
439
642
|
|
440
643
|
**Note**: I need API credentials to generate more detailed AI-powered insights."""
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
644
|
+
else:
|
645
|
+
# Generate financial insights from search results
|
646
|
+
insights = self._get_financial_insights(context)
|
647
|
+
budget_recommendations = self._generate_budget_recommendations(insights, query)
|
648
|
+
|
649
|
+
# Create table text from search results
|
650
|
+
table_text = ""
|
651
|
+
if context:
|
652
|
+
table_text = "\n".join([
|
653
|
+
f"• **{result.metadata.get('shopping_type', 'Unknown')}** at {result.merchant}: ${result.amount} ({result.metadata.get('payment_method', 'Unknown')}) - {result.description}"
|
654
|
+
for result in context
|
655
|
+
])
|
656
|
+
|
657
|
+
# Create context text with financial summary
|
658
|
+
context_text = f"""**📊 Financial Summary:**
|
456
659
|
• Total Amount: ${insights.get('total_amount', 0):.2f}
|
457
660
|
• Number of Transactions: {insights.get('num_transactions', 0)}
|
458
661
|
• Average Transaction: ${insights.get('avg_transaction', 0):.2f}
|
@@ -463,6 +666,14 @@ I couldn't find any relevant expense records for your query. Please try:
|
|
463
666
|
{budget_recommendations if budget_recommendations else '• Consider reviewing your spending patterns regularly' + chr(10) + '• Set up budget alerts for high-value categories'}
|
464
667
|
|
465
668
|
**Note**: I can see {len(context)} relevant expense records, but I need API credentials to generate more detailed AI-powered insights."""
|
669
|
+
|
670
|
+
ai_response = f"""## Financial Analysis for: "{query}"
|
671
|
+
|
672
|
+
### 📋 Transaction Details
|
673
|
+
{table_text}
|
674
|
+
|
675
|
+
### 📊 Financial Summary
|
676
|
+
{context_text}"""
|
466
677
|
else:
|
467
678
|
# Make actual Watsonx API call with enhanced prompt (copied from original)
|
468
679
|
try:
|
banko_ai/config/settings.py
CHANGED
@@ -60,20 +60,37 @@ class Config:
|
|
60
60
|
@classmethod
|
61
61
|
def from_env(cls) -> "Config":
|
62
62
|
"""Create configuration from environment variables."""
|
63
|
-
# Database configuration
|
64
|
-
database_url = os.getenv("DATABASE_URL", "cockroachdb://root@localhost:26257/
|
63
|
+
# Database configuration - match original app.py
|
64
|
+
database_url = os.getenv("DATABASE_URL", "cockroachdb://root@localhost:26257/defaultdb?sslmode=disable")
|
65
65
|
|
66
66
|
# Parse database URL for individual components
|
67
67
|
db_host = os.getenv("DATABASE_HOST", "localhost")
|
68
68
|
db_port = int(os.getenv("DATABASE_PORT", "26257"))
|
69
|
-
db_name = os.getenv("DATABASE_NAME", "
|
69
|
+
db_name = os.getenv("DATABASE_NAME", "defaultdb") # Match original
|
70
70
|
db_user = os.getenv("DATABASE_USER", "root")
|
71
71
|
db_password = os.getenv("DATABASE_PASSWORD", "")
|
72
72
|
ssl_mode = os.getenv("DATABASE_SSL_MODE", "disable")
|
73
73
|
|
74
|
-
# AI Service configuration
|
74
|
+
# AI Service configuration - match original app.py
|
75
75
|
ai_service = os.getenv("AI_SERVICE", "watsonx").lower()
|
76
76
|
|
77
|
+
# Try to load from config.py like the original app.py does
|
78
|
+
try:
|
79
|
+
import sys
|
80
|
+
# Add parent directory to path to import config.py
|
81
|
+
parent_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
82
|
+
if parent_dir not in sys.path:
|
83
|
+
sys.path.insert(0, parent_dir)
|
84
|
+
from config import WATSONX_API_KEY, WATSONX_PROJECT_ID, WATSONX_MODEL_ID
|
85
|
+
watsonx_api_key = WATSONX_API_KEY
|
86
|
+
watsonx_project_id = WATSONX_PROJECT_ID
|
87
|
+
watsonx_model = WATSONX_MODEL_ID
|
88
|
+
except ImportError:
|
89
|
+
# Fall back to environment variables
|
90
|
+
watsonx_api_key = os.getenv("WATSONX_API_KEY")
|
91
|
+
watsonx_project_id = os.getenv("WATSONX_PROJECT_ID")
|
92
|
+
watsonx_model = os.getenv("WATSONX_MODEL", "openai/gpt-oss-120b")
|
93
|
+
|
77
94
|
return cls(
|
78
95
|
# Database
|
79
96
|
database_url=database_url,
|
@@ -92,9 +109,9 @@ class Config:
|
|
92
109
|
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
93
110
|
aws_region=os.getenv("AWS_REGION", "us-east-1"),
|
94
111
|
aws_model=os.getenv("AWS_MODEL", "us.anthropic.claude-3-5-sonnet-20241022-v2:0"),
|
95
|
-
watsonx_api_key=
|
96
|
-
watsonx_project_id=
|
97
|
-
watsonx_model=
|
112
|
+
watsonx_api_key=watsonx_api_key,
|
113
|
+
watsonx_project_id=watsonx_project_id,
|
114
|
+
watsonx_model=watsonx_model,
|
98
115
|
google_project_id=os.getenv("GOOGLE_PROJECT_ID"),
|
99
116
|
google_location=os.getenv("GOOGLE_LOCATION", "us-central1"),
|
100
117
|
google_model=os.getenv("GOOGLE_MODEL", "gemini-1.5-pro"),
|
banko_ai/utils/cache_manager.py
CHANGED
@@ -25,7 +25,7 @@ from sqlalchemy.dialects.postgresql import JSONB
|
|
25
25
|
import os
|
26
26
|
|
27
27
|
# Database configuration
|
28
|
-
DB_URI = os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/
|
28
|
+
DB_URI = os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/defaultdb?sslmode=disable")
|
29
29
|
|
30
30
|
# Apply CockroachDB version parsing workaround
|
31
31
|
from sqlalchemy.dialects.postgresql.base import PGDialect
|
@@ -43,7 +43,7 @@ class DataEnricher:
|
|
43
43
|
**kwargs
|
44
44
|
) -> str:
|
45
45
|
"""
|
46
|
-
|
46
|
+
Create a simple description that matches the original CSV format.
|
47
47
|
|
48
48
|
Args:
|
49
49
|
description: Original expense description
|
@@ -55,49 +55,10 @@ class DataEnricher:
|
|
55
55
|
**kwargs: Additional metadata
|
56
56
|
|
57
57
|
Returns:
|
58
|
-
|
58
|
+
Simple description string matching original CSV format
|
59
59
|
"""
|
60
|
-
#
|
61
|
-
|
62
|
-
|
63
|
-
# Add merchant name and amount prominently
|
64
|
-
enriched_parts.append(f"at {merchant} for ${amount:.2f}")
|
65
|
-
|
66
|
-
# Add merchant context
|
67
|
-
merchant_context = self._get_merchant_context(merchant, amount)
|
68
|
-
if merchant_context:
|
69
|
-
enriched_parts.append(merchant_context)
|
70
|
-
|
71
|
-
# Add amount context
|
72
|
-
amount_context = self._get_amount_context(amount)
|
73
|
-
if amount_context:
|
74
|
-
enriched_parts.append(amount_context)
|
75
|
-
|
76
|
-
# Add category context
|
77
|
-
category_context = self._get_category_context(category, merchant)
|
78
|
-
if category_context:
|
79
|
-
enriched_parts.append(category_context)
|
80
|
-
|
81
|
-
# Add payment method context
|
82
|
-
payment_context = self._get_payment_context(payment_method)
|
83
|
-
if payment_context:
|
84
|
-
enriched_parts.append(payment_context)
|
85
|
-
|
86
|
-
# Add temporal context
|
87
|
-
temporal_context = self._get_temporal_context(date)
|
88
|
-
if temporal_context:
|
89
|
-
enriched_parts.append(temporal_context)
|
90
|
-
|
91
|
-
# Add merchant category context
|
92
|
-
merchant_category = self._get_merchant_category(merchant)
|
93
|
-
if merchant_category:
|
94
|
-
enriched_parts.append(f"at {merchant_category} store")
|
95
|
-
|
96
|
-
# Combine all parts
|
97
|
-
enriched_description = " ".join(enriched_parts)
|
98
|
-
|
99
|
-
# Clean up and format
|
100
|
-
enriched_description = self._clean_description(enriched_description)
|
60
|
+
# Create the exact same format as the original CSV
|
61
|
+
enriched_description = f"Spent ${amount:.2f} on {category.lower()} at {merchant} using {payment_method}."
|
101
62
|
|
102
63
|
return enriched_description
|
103
64
|
|
@@ -243,36 +204,15 @@ class DataEnricher:
|
|
243
204
|
**kwargs
|
244
205
|
) -> str:
|
245
206
|
"""
|
246
|
-
Create a
|
207
|
+
Create a simple searchable text that matches the original CSV format.
|
247
208
|
|
248
|
-
This creates
|
249
|
-
|
209
|
+
This creates the exact same format as the original CSV:
|
210
|
+
"Spent $X.XX on [category] at [merchant] using [payment_method]."
|
250
211
|
"""
|
251
212
|
# Extract required parameters from kwargs
|
252
|
-
payment_method = kwargs.get('payment_method', '')
|
253
|
-
date = kwargs.get('date', datetime.now())
|
254
|
-
|
255
|
-
# Remove these from kwargs to avoid conflicts
|
256
|
-
filtered_kwargs = {k: v for k, v in kwargs.items() if k not in ['payment_method', 'date']}
|
257
|
-
|
258
|
-
enriched_description = self.enrich_expense_description(
|
259
|
-
description, merchant, amount, category, payment_method, date, **filtered_kwargs
|
260
|
-
)
|
261
|
-
|
262
|
-
# Create a comprehensive searchable text
|
263
|
-
searchable_parts = [
|
264
|
-
f"Spent ${amount:.2f} on {enriched_description}",
|
265
|
-
f"Merchant: {merchant}",
|
266
|
-
f"Category: {category}",
|
267
|
-
f"Amount: ${amount:.2f}"
|
268
|
-
]
|
269
|
-
|
270
|
-
# Add any additional context
|
271
|
-
if kwargs.get('payment_method'):
|
272
|
-
searchable_parts.append(f"Payment: {kwargs['payment_method']}")
|
213
|
+
payment_method = kwargs.get('payment_method', 'Credit Card')
|
273
214
|
|
274
|
-
|
275
|
-
|
276
|
-
searchable_parts.append(f"Tags: {tags}")
|
215
|
+
# Create the exact same format as the original CSV
|
216
|
+
searchable_text = f"Spent ${amount:.2f} on {category.lower()} at {merchant} using {payment_method}."
|
277
217
|
|
278
|
-
return
|
218
|
+
return searchable_text
|
@@ -19,7 +19,7 @@ class EnhancedExpenseGenerator:
|
|
19
19
|
|
20
20
|
def __init__(self, database_url: Optional[str] = None):
|
21
21
|
"""Initialize the enhanced expense generator."""
|
22
|
-
self.database_url = database_url or os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/
|
22
|
+
self.database_url = database_url or os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/defaultdb?sslmode=disable")
|
23
23
|
self._engine = None
|
24
24
|
self.enricher = DataEnricher()
|
25
25
|
self._embedding_model = None
|
@@ -73,88 +73,77 @@ class EnhancedExpenseGenerator:
|
|
73
73
|
return self._user_ids
|
74
74
|
|
75
75
|
def _init_merchants_and_categories(self):
|
76
|
-
"""Initialize merchants and categories data."""
|
77
|
-
#
|
78
|
-
self._merchants =
|
79
|
-
"
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
"
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
"Starbucks", "McDonald's", "Chipotle", "Subway", "Pizza Hut",
|
89
|
-
"Domino's", "Panera Bread", "Dunkin' Donuts", "Taco Bell", "KFC"
|
90
|
-
],
|
91
|
-
"transportation": [
|
92
|
-
"Shell Gas Station", "Exxon", "Chevron", "Uber", "Lyft",
|
93
|
-
"Metro", "Parking Garage", "Toll Road", "Car Wash", "Auto Repair"
|
94
|
-
],
|
95
|
-
"healthcare": [
|
96
|
-
"CVS Pharmacy", "Walgreens", "Rite Aid", "Hospital", "Clinic",
|
97
|
-
"Dentist", "Optometrist", "Pharmacy", "Medical Center", "Urgent Care"
|
98
|
-
],
|
99
|
-
"entertainment": [
|
100
|
-
"Netflix", "Spotify", "Movie Theater", "Concert Hall", "Gaming Store",
|
101
|
-
"Bookstore", "Museum", "Theme Park", "Sports Venue", "Theater"
|
102
|
-
],
|
103
|
-
"utilities": [
|
104
|
-
"Electric Company", "Internet Provider", "Phone Company", "Water Company",
|
105
|
-
"Gas Company", "Cable Company", "Trash Service", "Security System", "Insurance", "Bank"
|
106
|
-
]
|
107
|
-
}
|
76
|
+
"""Initialize merchants and categories data - matches original CSV exactly."""
|
77
|
+
# Use the exact merchants from the original CSV
|
78
|
+
self._merchants = [
|
79
|
+
"Starbucks", "Local Market", "McDonald's", "IKEA", "Amazon", "Whole Foods",
|
80
|
+
"Italian Bistro", "Uber", "Lyft", "Spotify", "Delta Airlines", "Costco",
|
81
|
+
"Home Depot", "Shell Gas Station", "Lowe's", "Tesla Supercharger", "Planet Fitness",
|
82
|
+
"Apple Store", "Walmart", "Target", "Netflix", "Best Buy", "CVS Pharmacy",
|
83
|
+
"Walgreens", "Rite Aid", "Chipotle", "Subway", "Pizza Hut", "Domino's",
|
84
|
+
"Exxon", "Chevron", "BP", "Dunkin' Donuts", "Peet's Coffee", "Ace Hardware",
|
85
|
+
"Movie Theater", "Concert Venue", "Gaming Store", "Electric Company",
|
86
|
+
"Internet Provider", "Phone Company", "Water Company"
|
87
|
+
]
|
108
88
|
|
89
|
+
# Use the exact categories from the original CSV
|
109
90
|
self._categories = {
|
110
91
|
"Groceries": {
|
111
92
|
"items": ["Fresh produce", "Dairy products", "Meat and poultry", "Pantry staples", "Organic foods", "Beverages", "Snacks"],
|
112
|
-
"merchants": self.merchants["grocery"],
|
113
93
|
"amount_range": (10, 150)
|
114
94
|
},
|
115
|
-
"
|
116
|
-
"items": ["
|
117
|
-
"
|
118
|
-
"amount_range": (5, 100)
|
119
|
-
},
|
120
|
-
"Dining": {
|
121
|
-
"items": ["Coffee and pastry", "Lunch meeting", "Dinner date", "Fast food", "Food delivery", "Restaurant meal", "Catering"],
|
122
|
-
"merchants": self.merchants["dining"],
|
123
|
-
"amount_range": (8, 80)
|
95
|
+
"Home Improvement": {
|
96
|
+
"items": ["Tools", "Hardware", "Paint", "Lumber", "Electrical supplies", "Plumbing supplies", "Garden supplies"],
|
97
|
+
"amount_range": (20, 500)
|
124
98
|
},
|
125
|
-
"
|
126
|
-
"items": ["
|
127
|
-
"
|
128
|
-
"amount_range": (5, 200)
|
99
|
+
"Electronics": {
|
100
|
+
"items": ["Smartphone", "Laptop", "Tablet", "Headphones", "Camera", "Gaming console", "Smart home device"],
|
101
|
+
"amount_range": (50, 1000)
|
129
102
|
},
|
130
|
-
"
|
131
|
-
"items": ["
|
132
|
-
"
|
133
|
-
"amount_range": (15, 500)
|
103
|
+
"Subscription": {
|
104
|
+
"items": ["Streaming service", "Software subscription", "Gym membership", "News subscription", "Cloud storage", "Music service"],
|
105
|
+
"amount_range": (10, 50)
|
134
106
|
},
|
135
107
|
"Shopping": {
|
136
|
-
"items": ["Clothing", "
|
137
|
-
"
|
138
|
-
|
108
|
+
"items": ["Clothing", "Shoes", "Accessories", "Home decor", "Books", "Toys", "Beauty products"],
|
109
|
+
"amount_range": (15, 200)
|
110
|
+
},
|
111
|
+
"Restaurant": {
|
112
|
+
"items": ["Dinner", "Lunch", "Breakfast", "Takeout", "Delivery", "Catering", "Fine dining"],
|
113
|
+
"amount_range": (15, 100)
|
114
|
+
},
|
115
|
+
"Transport": {
|
116
|
+
"items": ["Uber ride", "Lyft ride", "Taxi", "Bus fare", "Train ticket", "Flight", "Car rental"],
|
117
|
+
"amount_range": (5, 500)
|
118
|
+
},
|
119
|
+
"Fuel": {
|
120
|
+
"items": ["Gas fill-up", "Electric charging", "Diesel fuel", "Premium gas", "Regular gas"],
|
121
|
+
"amount_range": (20, 100)
|
122
|
+
},
|
123
|
+
"Travel": {
|
124
|
+
"items": ["Flight", "Hotel", "Car rental", "Travel insurance", "Airport parking", "Baggage fee"],
|
125
|
+
"amount_range": (100, 2000)
|
139
126
|
},
|
140
|
-
"
|
141
|
-
"items": ["
|
142
|
-
"
|
143
|
-
"amount_range": (30, 300)
|
127
|
+
"Coffee": {
|
128
|
+
"items": ["Coffee", "Espresso", "Latte", "Cappuccino", "Pastry", "Sandwich", "Breakfast"],
|
129
|
+
"amount_range": (3, 25)
|
144
130
|
}
|
145
131
|
}
|
146
132
|
|
147
|
-
|
133
|
+
# Use the exact payment methods from the original CSV
|
134
|
+
self._payment_methods = [
|
135
|
+
"Debit Card", "PayPal", "Apple Pay", "Bank Transfer", "Credit Card"
|
136
|
+
]
|
148
137
|
self._user_ids = [str(uuid.uuid4()) for _ in range(100)] # Generate 100 user IDs
|
149
138
|
|
150
139
|
def generate_expense(self, user_id: Optional[str] = None) -> Dict[str, Any]:
|
151
|
-
"""Generate a single enriched expense record."""
|
140
|
+
"""Generate a single enriched expense record that matches the original CSV format."""
|
152
141
|
# Select category and get associated data
|
153
142
|
category = random.choice(list(self.categories.keys()))
|
154
143
|
category_data = self.categories[category]
|
155
144
|
|
156
|
-
# Select merchant from
|
157
|
-
merchant = random.choice(
|
145
|
+
# Select merchant from the full merchant list (matching original CSV)
|
146
|
+
merchant = random.choice(self.merchants)
|
158
147
|
|
159
148
|
# Generate amount within category range
|
160
149
|
amount = round(random.uniform(*category_data["amount_range"]), 2)
|
@@ -171,29 +160,14 @@ class EnhancedExpenseGenerator:
|
|
171
160
|
|
172
161
|
# Generate additional metadata
|
173
162
|
payment_method = random.choice(self.payment_methods)
|
174
|
-
recurring = random.choice([True, False]) if category in ["
|
163
|
+
recurring = random.choice([True, False]) if category in ["Subscription", "Coffee"] else False
|
175
164
|
tags = [category.lower(), merchant.lower().replace(" ", "_")]
|
176
165
|
|
177
|
-
#
|
178
|
-
enriched_description =
|
179
|
-
description=basic_description,
|
180
|
-
merchant=merchant,
|
181
|
-
amount=amount,
|
182
|
-
category=category,
|
183
|
-
payment_method=payment_method,
|
184
|
-
date=expense_date,
|
185
|
-
tags=tags
|
186
|
-
)
|
166
|
+
# Create the exact same description format as the original CSV
|
167
|
+
enriched_description = f"Spent ${amount:.2f} on {category.lower()} at {merchant} using {payment_method}."
|
187
168
|
|
188
|
-
# Create searchable text for embedding
|
189
|
-
searchable_text =
|
190
|
-
description=basic_description,
|
191
|
-
merchant=merchant,
|
192
|
-
amount=amount,
|
193
|
-
category=category,
|
194
|
-
payment_method=payment_method,
|
195
|
-
tags=tags
|
196
|
-
)
|
169
|
+
# Create searchable text for embedding (same as description for simplicity)
|
170
|
+
searchable_text = enriched_description
|
197
171
|
|
198
172
|
# Generate embedding
|
199
173
|
embedding = self.embedding_model.encode([searchable_text])[0].tolist()
|
banko_ai/vector_search/search.py
CHANGED
@@ -19,7 +19,7 @@ class VectorSearchEngine:
|
|
19
19
|
|
20
20
|
def __init__(self, database_url: Optional[str] = None, cache_manager=None):
|
21
21
|
"""Initialize the vector search engine."""
|
22
|
-
self.database_url = database_url or os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/
|
22
|
+
self.database_url = database_url or os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/defaultdb?sslmode=disable")
|
23
23
|
self.cache_manager = cache_manager
|
24
24
|
|
25
25
|
# Apply version parsing workaround for CockroachDB
|
@@ -46,6 +46,48 @@ class VectorSearchEngine:
|
|
46
46
|
)
|
47
47
|
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
48
48
|
|
49
|
+
def simple_search_expenses(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
50
|
+
"""
|
51
|
+
Simple search function that matches the original implementation exactly.
|
52
|
+
Returns list of dictionaries like the original search_expenses function.
|
53
|
+
"""
|
54
|
+
try:
|
55
|
+
print(f"\n🔍 SIMPLE VECTOR SEARCH:")
|
56
|
+
print(f"1. Query: '{query}' | Limit: {limit}")
|
57
|
+
|
58
|
+
# Generate embedding
|
59
|
+
raw_embedding = self.embedding_model.encode(query)
|
60
|
+
print(f"2. Generated embedding with {len(raw_embedding)} dimensions")
|
61
|
+
|
62
|
+
# Convert to PostgreSQL vector format (matching original implementation)
|
63
|
+
import json
|
64
|
+
search_embedding = json.dumps(raw_embedding.flatten().tolist())
|
65
|
+
|
66
|
+
# Use the exact same query as the original implementation
|
67
|
+
search_query = text("""
|
68
|
+
SELECT
|
69
|
+
description,
|
70
|
+
merchant,
|
71
|
+
shopping_type,
|
72
|
+
expense_amount,
|
73
|
+
embedding <=> :search_embedding as similarity_score
|
74
|
+
FROM expenses
|
75
|
+
ORDER BY embedding <=> :search_embedding
|
76
|
+
LIMIT :limit
|
77
|
+
""")
|
78
|
+
|
79
|
+
with self.engine.connect() as conn:
|
80
|
+
results = conn.execute(search_query,
|
81
|
+
{'search_embedding': search_embedding, 'limit': limit})
|
82
|
+
search_results = [dict(row._mapping) for row in results]
|
83
|
+
print(f"3. Database query returned {len(search_results)} expense records")
|
84
|
+
|
85
|
+
return search_results
|
86
|
+
|
87
|
+
except Exception as e:
|
88
|
+
print(f"❌ Error executing simple expense search query: {e}")
|
89
|
+
return []
|
90
|
+
|
49
91
|
def search_expenses(
|
50
92
|
self,
|
51
93
|
query: str,
|
banko_ai/web/app.py
CHANGED
@@ -28,6 +28,109 @@ from ..utils.cache_manager import BankoCacheManager
|
|
28
28
|
from .auth import UserManager
|
29
29
|
|
30
30
|
|
31
|
+
def check_database_connection(database_url: str):
|
32
|
+
"""
|
33
|
+
Check if the database is accessible and has the required table.
|
34
|
+
Matches the original app.py implementation.
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
tuple: (success: bool, message: str, table_exists: bool, record_count: int)
|
38
|
+
"""
|
39
|
+
try:
|
40
|
+
engine = create_engine(database_url)
|
41
|
+
|
42
|
+
with engine.connect() as conn:
|
43
|
+
# Test basic connection
|
44
|
+
result = conn.execute(text('SELECT version()'))
|
45
|
+
version = result.fetchone()[0]
|
46
|
+
|
47
|
+
# Check if expenses table exists
|
48
|
+
result = conn.execute(text("""
|
49
|
+
SELECT table_name
|
50
|
+
FROM information_schema.tables
|
51
|
+
WHERE table_schema = 'public' AND table_name = 'expenses'
|
52
|
+
"""))
|
53
|
+
table_exists = result.fetchone() is not None
|
54
|
+
|
55
|
+
record_count = 0
|
56
|
+
if table_exists:
|
57
|
+
result = conn.execute(text('SELECT COUNT(*) FROM expenses'))
|
58
|
+
record_count = result.fetchone()[0]
|
59
|
+
|
60
|
+
return True, f"Connected to {version.split()[1]}", table_exists, record_count
|
61
|
+
|
62
|
+
except Exception as e:
|
63
|
+
return False, f"Database connection failed: {str(e)}", False, 0
|
64
|
+
|
65
|
+
|
66
|
+
def auto_setup_data_if_needed(database_url: str):
|
67
|
+
"""
|
68
|
+
Automatically set up data if the database is empty or has very few records.
|
69
|
+
This integrates seamlessly into the app startup - matches original app.py.
|
70
|
+
"""
|
71
|
+
try:
|
72
|
+
db_connected, db_message, table_exists, record_count = check_database_connection(database_url)
|
73
|
+
|
74
|
+
if not db_connected:
|
75
|
+
print(f"❌ Database connection failed: {db_message}")
|
76
|
+
return False
|
77
|
+
|
78
|
+
# Create table if it doesn't exist
|
79
|
+
if not table_exists:
|
80
|
+
print("🔧 Creating expenses table...")
|
81
|
+
try:
|
82
|
+
import subprocess
|
83
|
+
import sys
|
84
|
+
import os
|
85
|
+
# Get the project root directory
|
86
|
+
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
87
|
+
create_table_script = os.path.join(project_root, 'vector_search', 'create_table.py')
|
88
|
+
|
89
|
+
if os.path.exists(create_table_script):
|
90
|
+
result = subprocess.run([sys.executable, create_table_script],
|
91
|
+
capture_output=True, text=True, cwd=project_root)
|
92
|
+
if result.returncode == 0:
|
93
|
+
print("✅ Expenses table created successfully")
|
94
|
+
# Re-check the database status
|
95
|
+
db_connected, db_message, table_exists, record_count = check_database_connection(database_url)
|
96
|
+
else:
|
97
|
+
print(f"❌ Failed to create table: {result.stderr}")
|
98
|
+
return False
|
99
|
+
else:
|
100
|
+
print(f"❌ Table creation script not found: {create_table_script}")
|
101
|
+
return False
|
102
|
+
except Exception as e:
|
103
|
+
print(f"❌ Table creation error: {e}")
|
104
|
+
return False
|
105
|
+
|
106
|
+
# If we have very few records, offer to generate more
|
107
|
+
if record_count < 100:
|
108
|
+
print(f"🔍 Found {record_count} expense records")
|
109
|
+
print("🎯 Generating sample data for better demo experience...")
|
110
|
+
|
111
|
+
try:
|
112
|
+
# Use the unified data generator
|
113
|
+
from ..vector_search.generator import EnhancedExpenseGenerator
|
114
|
+
|
115
|
+
generator = EnhancedExpenseGenerator(database_url)
|
116
|
+
|
117
|
+
# Generate a reasonable amount for demos (5K records)
|
118
|
+
generator.generate_and_save(5000, user_id=None, clear_existing=False)
|
119
|
+
|
120
|
+
print("✅ Generated 5,000 realistic expense records")
|
121
|
+
return True
|
122
|
+
|
123
|
+
except Exception as e:
|
124
|
+
print(f"⚠️ Data generation failed: {e}")
|
125
|
+
return False
|
126
|
+
|
127
|
+
return True
|
128
|
+
|
129
|
+
except Exception as e:
|
130
|
+
print(f"⚠️ Auto-setup error: {e}")
|
131
|
+
return False
|
132
|
+
|
133
|
+
|
31
134
|
def create_app() -> Flask:
|
32
135
|
"""Create and configure the Flask application."""
|
33
136
|
# Get the directory containing this file
|
@@ -74,6 +177,10 @@ def create_app() -> Flask:
|
|
74
177
|
print(f"Warning: Could not initialize AI provider: {e}")
|
75
178
|
ai_provider = None
|
76
179
|
|
180
|
+
# Auto-setup data if needed (matching original app.py)
|
181
|
+
print("🔍 Checking database setup...")
|
182
|
+
auto_setup_data_if_needed(config.database_url)
|
183
|
+
|
77
184
|
@app.route('/')
|
78
185
|
def index():
|
79
186
|
"""Main application page."""
|
@@ -111,35 +218,17 @@ def create_app() -> Flask:
|
|
111
218
|
query = data.get('query', '')
|
112
219
|
limit = data.get('limit', 10)
|
113
220
|
threshold = data.get('threshold', 0.7)
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
if user_id is None:
|
122
|
-
results = search_engine.search_expenses(
|
123
|
-
query=query,
|
124
|
-
user_id=None,
|
125
|
-
limit=limit,
|
126
|
-
threshold=threshold
|
127
|
-
)
|
128
|
-
else:
|
129
|
-
results = search_engine.search_expenses(
|
130
|
-
query=query,
|
131
|
-
user_id=user_id,
|
132
|
-
limit=limit,
|
133
|
-
threshold=threshold
|
134
|
-
)
|
135
|
-
|
136
|
-
print(f"🔍 API Search Debug: search_engine returned {len(results) if results else 0} results")
|
221
|
+
# Use original simple logic - no user filtering
|
222
|
+
results = search_engine.search_expenses(
|
223
|
+
query=query,
|
224
|
+
user_id=None, # No user filtering like original
|
225
|
+
limit=limit,
|
226
|
+
threshold=threshold
|
227
|
+
)
|
137
228
|
|
138
229
|
# Convert to serializable format
|
139
230
|
search_results = []
|
140
|
-
|
141
|
-
for i, result in enumerate(results):
|
142
|
-
print(f"🔍 API Search Debug: Processing result {i+1}: {result.description[:50]}... (score: {result.similarity_score})")
|
231
|
+
for result in results:
|
143
232
|
search_results.append({
|
144
233
|
'expense_id': result.expense_id,
|
145
234
|
'user_id': result.user_id,
|
@@ -151,13 +240,11 @@ def create_app() -> Flask:
|
|
151
240
|
'metadata': result.metadata
|
152
241
|
})
|
153
242
|
|
154
|
-
print(f"🔍 API Search Debug: Final search_results has {len(search_results)} items")
|
155
|
-
|
156
243
|
return jsonify({
|
157
244
|
'success': True,
|
158
245
|
'results': search_results,
|
159
246
|
'query': query,
|
160
|
-
'user_id':
|
247
|
+
'user_id': None
|
161
248
|
})
|
162
249
|
|
163
250
|
except Exception as e:
|
@@ -179,30 +266,19 @@ def create_app() -> Flask:
|
|
179
266
|
data = request.get_json()
|
180
267
|
query = data.get('query', '')
|
181
268
|
language = data.get('language', 'en')
|
182
|
-
|
269
|
+
# Use original simple logic - no user filtering
|
270
|
+
search_results = search_engine.search_expenses(
|
271
|
+
query=query,
|
272
|
+
user_id=None, # No user filtering like original
|
273
|
+
limit=5,
|
274
|
+
threshold=0.7
|
275
|
+
)
|
183
276
|
|
184
|
-
#
|
185
|
-
# This ensures we get results from the database
|
186
|
-
if user_id is None:
|
187
|
-
search_results = search_engine.search_expenses(
|
188
|
-
query=query,
|
189
|
-
user_id=None,
|
190
|
-
limit=5,
|
191
|
-
threshold=0.7
|
192
|
-
)
|
193
|
-
else:
|
194
|
-
search_results = search_engine.search_expenses(
|
195
|
-
query=query,
|
196
|
-
user_id=user_id,
|
197
|
-
limit=5,
|
198
|
-
threshold=0.7
|
199
|
-
)
|
200
|
-
|
201
|
-
# Generate RAG response
|
277
|
+
# Generate RAG response - use original simple logic
|
202
278
|
rag_response = ai_provider.generate_rag_response(
|
203
279
|
query=query,
|
204
280
|
context=search_results,
|
205
|
-
user_id=
|
281
|
+
user_id=None, # No user filtering like original
|
206
282
|
language=language
|
207
283
|
)
|
208
284
|
|
@@ -396,16 +472,15 @@ def create_app() -> Flask:
|
|
396
472
|
|
397
473
|
@app.route('/banko', methods=['GET', 'POST'])
|
398
474
|
def chat():
|
399
|
-
"""Main chat interface."""
|
475
|
+
"""Main chat interface - using original simple logic."""
|
400
476
|
if 'chat' not in session:
|
401
477
|
session['chat'] = []
|
402
478
|
|
403
479
|
# Get AI provider info for display
|
404
|
-
provider_info = ai_provider.get_provider_info() if ai_provider else {'name': 'Unknown', 'current_model': 'Unknown'}
|
405
480
|
ai_provider_display = {
|
406
|
-
'name':
|
407
|
-
'current_service':
|
408
|
-
'icon': '🧠'
|
481
|
+
'name': 'IBM Watsonx',
|
482
|
+
'current_service': 'WATSONX',
|
483
|
+
'icon': '🧠'
|
409
484
|
}
|
410
485
|
|
411
486
|
if request.method == 'POST':
|
@@ -434,42 +509,52 @@ def create_app() -> Flask:
|
|
434
509
|
target_language = language_map.get(response_language, 'English')
|
435
510
|
|
436
511
|
try:
|
437
|
-
#
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
# If no user is logged in, search all expenses (no user filtering)
|
442
|
-
# This ensures we get results from the database
|
443
|
-
if user_id is None:
|
444
|
-
result = search_engine.search_expenses(
|
445
|
-
query=prompt,
|
446
|
-
user_id=None,
|
447
|
-
limit=10,
|
448
|
-
threshold=0.7
|
449
|
-
)
|
512
|
+
# Use simple search that matches original implementation
|
513
|
+
if hasattr(ai_provider, 'search_expenses'):
|
514
|
+
# Use the simple search method that returns dictionaries like original
|
515
|
+
search_results = ai_provider.search_expenses(prompt, limit=10)
|
450
516
|
else:
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
threshold=0.7
|
456
|
-
)
|
457
|
-
print(f"Using {provider_info['name']} for response generation in {target_language}")
|
517
|
+
# Fallback to search engine simple method
|
518
|
+
search_results = search_engine.simple_search_expenses(prompt, limit=10)
|
519
|
+
|
520
|
+
print(f"Using watsonx for response generation in {target_language}")
|
458
521
|
|
459
|
-
#
|
522
|
+
# Convert SearchResult objects to dictionaries if needed
|
523
|
+
if search_results and hasattr(search_results[0], 'description'):
|
524
|
+
# Convert SearchResult objects to dict format
|
525
|
+
search_results_dict = []
|
526
|
+
for result in search_results:
|
527
|
+
search_results_dict.append({
|
528
|
+
'description': result.description,
|
529
|
+
'merchant': result.merchant,
|
530
|
+
'shopping_type': result.metadata.get('shopping_type', 'Unknown'),
|
531
|
+
'expense_amount': result.amount,
|
532
|
+
'similarity_score': result.similarity_score
|
533
|
+
})
|
534
|
+
search_results = search_results_dict
|
535
|
+
|
536
|
+
# Generate RAG response with language preference - use original simple approach
|
460
537
|
if target_language != 'English':
|
461
538
|
enhanced_prompt = f"{user_message}\n\nPlease respond in {target_language}."
|
462
|
-
|
539
|
+
if hasattr(ai_provider, 'simple_rag_response'):
|
540
|
+
rag_response_text = ai_provider.simple_rag_response(enhanced_prompt, search_results)
|
541
|
+
else:
|
542
|
+
rag_response = ai_provider.generate_rag_response(enhanced_prompt, search_results, None, response_language)
|
543
|
+
rag_response_text = rag_response.response if hasattr(rag_response, 'response') else str(rag_response)
|
463
544
|
else:
|
464
|
-
|
465
|
-
|
466
|
-
|
545
|
+
if hasattr(ai_provider, 'simple_rag_response'):
|
546
|
+
rag_response_text = ai_provider.simple_rag_response(user_message, search_results)
|
547
|
+
else:
|
548
|
+
rag_response = ai_provider.generate_rag_response(user_message, search_results, None, response_language)
|
549
|
+
rag_response_text = rag_response.response if hasattr(rag_response, 'response') else str(rag_response)
|
550
|
+
|
551
|
+
print(f"Response from watsonx: {rag_response_text}")
|
467
552
|
|
468
|
-
session['chat'].append({'text':
|
553
|
+
session['chat'].append({'text': rag_response_text, 'class': 'Assistant'})
|
469
554
|
|
470
555
|
except Exception as e:
|
471
|
-
error_message = f"Sorry, I'm experiencing technical difficulties
|
472
|
-
print(f"Error with
|
556
|
+
error_message = f"Sorry, I'm experiencing technical difficulties. Error: {str(e)}"
|
557
|
+
print(f"Error with watsonx: {str(e)}")
|
473
558
|
session['chat'].append({'text': error_message, 'class': 'Assistant'})
|
474
559
|
|
475
560
|
return render_template('index.html',
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: banko-ai-assistant
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.13
|
4
4
|
Summary: AI-powered expense analysis and RAG system with CockroachDB vector search and multi-provider AI support
|
5
5
|
Author-email: Virag Tripathi <virag.tripathi@gmail.com>
|
6
6
|
License-Expression: MIT
|
@@ -1,5 +1,5 @@
|
|
1
1
|
banko_ai/__init__.py,sha256=G1InyKemqQxP9xx6yGZgolBmrmOLSpBXqGYY8LaFOeo,568
|
2
|
-
banko_ai/__main__.py,sha256=
|
2
|
+
banko_ai/__main__.py,sha256=U-KkrXtL8JNIyV25PE5v_eYhlhjR7jd6kG-txfYfs0M,709
|
3
3
|
banko_ai/cli.py,sha256=SsCsikOykmTM_uId-n0ivilKXu1hKch17XXwhMHfFnU,13760
|
4
4
|
banko_ai/ai_providers/__init__.py,sha256=JdBgw5Mji2pe9nU-aiRYUmJuZk0q8KbcMtbpMJC5Dq8,483
|
5
5
|
banko_ai/ai_providers/aws_provider.py,sha256=-tR-8tlEeSL-Fspx05tTMFguvQylkW_pz0PI2XJEByM,13074
|
@@ -7,9 +7,9 @@ banko_ai/ai_providers/base.py,sha256=zbuAgkHIfJ0YkG83LXzieJuvXBcB2-nx7NhbL-I4Pf0
|
|
7
7
|
banko_ai/ai_providers/factory.py,sha256=Bqq9HcbyTfPvaOTxsHSM9eSvkB71cJoq21cMmXo4LLc,2885
|
8
8
|
banko_ai/ai_providers/gemini_provider.py,sha256=KqzHLLl7EYnai9-zFenRmktVk0zOA8AtsYScQZIcdLU,13044
|
9
9
|
banko_ai/ai_providers/openai_provider.py,sha256=Myu2And6kTD2EgIVcWHGak5fDIq0pu1HQzf-jj72y3k,11657
|
10
|
-
banko_ai/ai_providers/watsonx_provider.py,sha256=
|
10
|
+
banko_ai/ai_providers/watsonx_provider.py,sha256=Jl33LKApIED4nl2EmhpSp3x7aFYe4d2QSqq5udtGlZg,39258
|
11
11
|
banko_ai/config/__init__.py,sha256=YObKfKjjW89kSfARiTzXnGuSPz1C92aSMKgmO3BtQb8,133
|
12
|
-
banko_ai/config/settings.py,sha256=
|
12
|
+
banko_ai/config/settings.py,sha256=6M8YoaxzyCULn6BOot4hahtDkgjsgyLmMd8BxDKVt4k,9317
|
13
13
|
banko_ai/static/Anallytics.png,sha256=fWLddd5hlB4YEUYSIzOFVNnTny6D8VExQeVn31mywTc,80242
|
14
14
|
banko_ai/static/Graph.png,sha256=eOPCPBUAs6KGeIWbDk9aDt_pJYRxBCzm4dkYQ7xdj4g,58591
|
15
15
|
banko_ai/static/Graph2.png,sha256=nVj9Qdu5hvBI3AoWAMnL7WJb-T22aaBX3ATVsDXTM2I,32462
|
@@ -146,19 +146,19 @@ banko_ai/templates/dashboard.html,sha256=-HIQ3sU86hqVn1jGeHV8_w2HlnPZ4uS2cD-BNGU
|
|
146
146
|
banko_ai/templates/index.html,sha256=SkDXWH_ttH2T_a_46_OETgy1Q3zDfvot9eEKGB0S1x0,66973
|
147
147
|
banko_ai/templates/login.html,sha256=YPMtJcvCzFlknwmUrG7VskeM691J4msAjZw-t4CcPn4,2063
|
148
148
|
banko_ai/utils/__init__.py,sha256=0n1JYzZUWwgwOzV82I6OnmfUV_TOnal1V0DoEb0E2Cs,225
|
149
|
-
banko_ai/utils/cache_manager.py,sha256=
|
149
|
+
banko_ai/utils/cache_manager.py,sha256=fFnHk6dGui3T1KrSNKTYKdonCD7Fc8VvhDBhqmL-syc,22385
|
150
150
|
banko_ai/utils/database.py,sha256=sJYAFTApkWReEJuMbbBDiz7XfgiiEd6lPSSyF6BQDpk,7754
|
151
151
|
banko_ai/utils/migration.py,sha256=j1lYUVZyYMcMvxZUOFymoK19QTPqkDZFXD-iysVCnQo,4764
|
152
152
|
banko_ai/vector_search/__init__.py,sha256=vYksnkUU4FA8XBNzYZIH4FoGjXCx9oIbrDeapSzrNuE,621
|
153
|
-
banko_ai/vector_search/enrichment.py,sha256=
|
154
|
-
banko_ai/vector_search/generator.py,sha256=
|
155
|
-
banko_ai/vector_search/search.py,sha256=
|
153
|
+
banko_ai/vector_search/enrichment.py,sha256=tgAImLehkp2kL46vI5GEHsE8B5E4gT3PweXZLqqKei4,8097
|
154
|
+
banko_ai/vector_search/generator.py,sha256=KAXp8wr7xXI9hzYRslyMhYaFHbticbWpT-a2RmJgmy4,16087
|
155
|
+
banko_ai/vector_search/search.py,sha256=I-DgxTqib_VbRKX0Ttk9QmzGcAW2hLYm0_Y9HXSaibQ,18763
|
156
156
|
banko_ai/web/__init__.py,sha256=hjWVVxYpIZhOAN1qBf4xTd36a5AUHM03Q8BF8pykhJQ,363
|
157
|
-
banko_ai/web/app.py,sha256=
|
157
|
+
banko_ai/web/app.py,sha256=Qdd9gIccFJ5NeEYFiwKS3evFNbba6t5LNs733isloWE,32127
|
158
158
|
banko_ai/web/auth.py,sha256=js6qIixSFHyLbETDm8GNLCPrDkCDcaQZPFOrqtZP1uw,2125
|
159
|
-
banko_ai_assistant-1.0.
|
160
|
-
banko_ai_assistant-1.0.
|
161
|
-
banko_ai_assistant-1.0.
|
162
|
-
banko_ai_assistant-1.0.
|
163
|
-
banko_ai_assistant-1.0.
|
164
|
-
banko_ai_assistant-1.0.
|
159
|
+
banko_ai_assistant-1.0.13.dist-info/licenses/LICENSE,sha256=skG0LkywIClj8fgSIXiG6o9vUDJ678BKBObIyJ19OMw,1075
|
160
|
+
banko_ai_assistant-1.0.13.dist-info/METADATA,sha256=HBUsepFohIoHmecHe7hQAXgbE5La1FTvti2c3Jkn0S0,13244
|
161
|
+
banko_ai_assistant-1.0.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
162
|
+
banko_ai_assistant-1.0.13.dist-info/entry_points.txt,sha256=IxPjBjMvbpCp-ikCA43bOSbYboTGPX4HYcZlvu2_vcA,47
|
163
|
+
banko_ai_assistant-1.0.13.dist-info/top_level.txt,sha256=xNMa9Z67UssefOQ2ubFObtqUYIfYmCIclfz0xdo5OPE,9
|
164
|
+
banko_ai_assistant-1.0.13.dist-info/RECORD,,
|
File without changes
|
{banko_ai_assistant-1.0.12.dist-info → banko_ai_assistant-1.0.13.dist-info}/entry_points.txt
RENAMED
File without changes
|
{banko_ai_assistant-1.0.12.dist-info → banko_ai_assistant-1.0.13.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|