banko-ai-assistant 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- banko_ai/__init__.py +19 -0
- banko_ai/__main__.py +10 -0
- banko_ai/ai_providers/__init__.py +18 -0
- banko_ai/ai_providers/aws_provider.py +337 -0
- banko_ai/ai_providers/base.py +175 -0
- banko_ai/ai_providers/factory.py +84 -0
- banko_ai/ai_providers/gemini_provider.py +340 -0
- banko_ai/ai_providers/openai_provider.py +295 -0
- banko_ai/ai_providers/watsonx_provider.py +591 -0
- banko_ai/cli.py +374 -0
- banko_ai/config/__init__.py +5 -0
- banko_ai/config/settings.py +216 -0
- banko_ai/static/Anallytics.png +0 -0
- banko_ai/static/Graph.png +0 -0
- banko_ai/static/Graph2.png +0 -0
- banko_ai/static/ai-status.png +0 -0
- banko_ai/static/banko-ai-assistant-watsonx.gif +0 -0
- banko_ai/static/banko-db-ops.png +0 -0
- banko_ai/static/banko-response.png +0 -0
- banko_ai/static/cache-stats.png +0 -0
- banko_ai/static/creditcard.png +0 -0
- banko_ai/static/profilepic.jpeg +0 -0
- banko_ai/static/query_watcher.png +0 -0
- banko_ai/static/roach-logo.svg +54 -0
- banko_ai/static/watsonx-icon.svg +1 -0
- banko_ai/templates/base.html +59 -0
- banko_ai/templates/dashboard.html +569 -0
- banko_ai/templates/index.html +1499 -0
- banko_ai/templates/login.html +41 -0
- banko_ai/utils/__init__.py +8 -0
- banko_ai/utils/cache_manager.py +525 -0
- banko_ai/utils/database.py +202 -0
- banko_ai/utils/migration.py +123 -0
- banko_ai/vector_search/__init__.py +18 -0
- banko_ai/vector_search/enrichment.py +278 -0
- banko_ai/vector_search/generator.py +329 -0
- banko_ai/vector_search/search.py +463 -0
- banko_ai/web/__init__.py +13 -0
- banko_ai/web/app.py +668 -0
- banko_ai/web/auth.py +73 -0
- banko_ai_assistant-1.0.0.dist-info/METADATA +414 -0
- banko_ai_assistant-1.0.0.dist-info/RECORD +46 -0
- banko_ai_assistant-1.0.0.dist-info/WHEEL +5 -0
- banko_ai_assistant-1.0.0.dist-info/entry_points.txt +2 -0
- banko_ai_assistant-1.0.0.dist-info/licenses/LICENSE +21 -0
- banko_ai_assistant-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,463 @@
|
|
1
|
+
"""
|
2
|
+
Vector search engine for expense data.
|
3
|
+
|
4
|
+
This module provides vector similarity search functionality with user-specific
|
5
|
+
filtering and advanced indexing support.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import os
|
9
|
+
import json
|
10
|
+
from typing import List, Dict, Any, Optional, Tuple
|
11
|
+
from sentence_transformers import SentenceTransformer
|
12
|
+
from sqlalchemy import create_engine, text
|
13
|
+
|
14
|
+
from ..ai_providers.base import SearchResult
|
15
|
+
|
16
|
+
|
17
|
+
class VectorSearchEngine:
|
18
|
+
"""Vector search engine for expense data with user-specific filtering."""
|
19
|
+
|
20
|
+
def __init__(self, database_url: Optional[str] = None, cache_manager=None):
|
21
|
+
"""Initialize the vector search engine."""
|
22
|
+
self.database_url = database_url or os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/banko_ai?sslmode=disable")
|
23
|
+
self.cache_manager = cache_manager
|
24
|
+
|
25
|
+
# Apply version parsing workaround for CockroachDB
|
26
|
+
from sqlalchemy.dialects.postgresql.base import PGDialect
|
27
|
+
original_get_server_version_info = PGDialect._get_server_version_info
|
28
|
+
|
29
|
+
def patched_get_server_version_info(self, connection):
|
30
|
+
try:
|
31
|
+
return original_get_server_version_info(self, connection)
|
32
|
+
except Exception:
|
33
|
+
return (25, 3, 0)
|
34
|
+
|
35
|
+
PGDialect._get_server_version_info = patched_get_server_version_info
|
36
|
+
|
37
|
+
# Convert cockroachdb:// to postgresql:// for SQLAlchemy compatibility
|
38
|
+
database_url = self.database_url.replace("cockroachdb://", "postgresql://")
|
39
|
+
self.engine = create_engine(
|
40
|
+
database_url,
|
41
|
+
connect_args={
|
42
|
+
"options": "-c default_transaction_isolation=serializable"
|
43
|
+
},
|
44
|
+
pool_pre_ping=True,
|
45
|
+
pool_recycle=300
|
46
|
+
)
|
47
|
+
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
48
|
+
|
49
|
+
def search_expenses(
|
50
|
+
self,
|
51
|
+
query: str,
|
52
|
+
user_id: Optional[str] = None,
|
53
|
+
limit: int = 10,
|
54
|
+
threshold: float = 0.7,
|
55
|
+
use_user_index: bool = True
|
56
|
+
) -> List[SearchResult]:
|
57
|
+
"""
|
58
|
+
Search for expenses using vector similarity.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
query: Search query text
|
62
|
+
user_id: Optional user ID to filter results
|
63
|
+
limit: Maximum number of results to return
|
64
|
+
threshold: Minimum similarity score threshold
|
65
|
+
use_user_index: Whether to use user-specific vector index
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
List of SearchResult objects
|
69
|
+
"""
|
70
|
+
try:
|
71
|
+
print(f"\n🔍 VECTOR SEARCH (with caching):")
|
72
|
+
print(f"1. Query: '{query}' | Limit: {limit}")
|
73
|
+
|
74
|
+
# Use cached embedding generation if available
|
75
|
+
if self.cache_manager:
|
76
|
+
raw_embedding = self.cache_manager._get_embedding_with_cache(query)
|
77
|
+
|
78
|
+
# Check for cached vector search results
|
79
|
+
cached_results = self.cache_manager.get_cached_vector_search(raw_embedding, limit)
|
80
|
+
if cached_results:
|
81
|
+
print(f"2. ✅ Vector search cache HIT! Found {len(cached_results)} cached results")
|
82
|
+
# Convert cached results to SearchResult objects
|
83
|
+
search_results = []
|
84
|
+
for result in cached_results[:limit]:
|
85
|
+
search_results.append(SearchResult(
|
86
|
+
expense_id=result['expense_id'],
|
87
|
+
user_id=result['user_id'],
|
88
|
+
description=result['description'],
|
89
|
+
merchant=result['merchant'],
|
90
|
+
amount=result['expense_amount'],
|
91
|
+
date=result['expense_date'],
|
92
|
+
similarity_score=result['similarity_score'],
|
93
|
+
metadata={
|
94
|
+
'shopping_type': result['shopping_type'],
|
95
|
+
'payment_method': result['payment_method'],
|
96
|
+
'recurring': result.get('recurring', False),
|
97
|
+
'tags': result.get('tags', [])
|
98
|
+
}
|
99
|
+
))
|
100
|
+
return search_results
|
101
|
+
print(f"2. ❌ Vector search cache MISS, querying database")
|
102
|
+
else:
|
103
|
+
# Fallback to direct embedding generation
|
104
|
+
query_embedding = self.embedding_model.encode([query])[0]
|
105
|
+
raw_embedding = query_embedding
|
106
|
+
print(f"2. Generated fresh embedding (no cache available)")
|
107
|
+
|
108
|
+
# Convert to PostgreSQL vector format (matching original implementation)
|
109
|
+
import json
|
110
|
+
search_embedding = json.dumps(raw_embedding.flatten().tolist())
|
111
|
+
|
112
|
+
# Build SQL query based on whether we're using user-specific search
|
113
|
+
if user_id and use_user_index:
|
114
|
+
sql = self._build_user_specific_query()
|
115
|
+
else:
|
116
|
+
sql = self._build_general_query()
|
117
|
+
|
118
|
+
# Prepare parameters as a dictionary
|
119
|
+
params = {
|
120
|
+
'search_embedding': search_embedding,
|
121
|
+
'limit': limit
|
122
|
+
}
|
123
|
+
|
124
|
+
if user_id:
|
125
|
+
params['user_id'] = user_id
|
126
|
+
|
127
|
+
# Execute query
|
128
|
+
with self.engine.connect() as conn:
|
129
|
+
result = conn.execute(text(sql), params)
|
130
|
+
rows = result.fetchall()
|
131
|
+
|
132
|
+
# Convert to SearchResult objects
|
133
|
+
results = []
|
134
|
+
for row in rows:
|
135
|
+
results.append(SearchResult(
|
136
|
+
expense_id=str(row[0]),
|
137
|
+
user_id=str(row[1]),
|
138
|
+
description=row[2] or "",
|
139
|
+
merchant=row[3] or "",
|
140
|
+
amount=float(row[4]),
|
141
|
+
date=str(row[5]),
|
142
|
+
similarity_score=float(row[6]),
|
143
|
+
metadata={
|
144
|
+
"shopping_type": row[7] if len(row) > 7 else None,
|
145
|
+
"payment_method": row[8] if len(row) > 8 else None,
|
146
|
+
"recurring": row[9] if len(row) > 9 else None,
|
147
|
+
"tags": row[10] if len(row) > 10 else None
|
148
|
+
}
|
149
|
+
))
|
150
|
+
|
151
|
+
print(f"3. Database query returned {len(results)} expense records")
|
152
|
+
|
153
|
+
# Cache the results for future use (convert back to dict format for caching)
|
154
|
+
if self.cache_manager and results:
|
155
|
+
# Convert SearchResult objects back to dict format for caching
|
156
|
+
search_results_dict = []
|
157
|
+
for result in results:
|
158
|
+
search_results_dict.append({
|
159
|
+
'expense_id': result.expense_id,
|
160
|
+
'user_id': result.user_id,
|
161
|
+
'description': result.description,
|
162
|
+
'merchant': result.merchant,
|
163
|
+
'expense_amount': result.amount,
|
164
|
+
'expense_date': result.date,
|
165
|
+
'similarity_score': result.similarity_score,
|
166
|
+
'shopping_type': result.metadata.get('shopping_type'),
|
167
|
+
'payment_method': result.metadata.get('payment_method'),
|
168
|
+
'recurring': result.metadata.get('recurring'),
|
169
|
+
'tags': result.metadata.get('tags')
|
170
|
+
})
|
171
|
+
|
172
|
+
self.cache_manager.cache_vector_search_results(raw_embedding, search_results_dict)
|
173
|
+
print(f"4. ✅ Cached vector search results for future queries")
|
174
|
+
|
175
|
+
return results
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
print(f"Search failed: {e}")
|
179
|
+
return []
|
180
|
+
|
181
|
+
def _build_user_specific_query(self) -> str:
|
182
|
+
"""Build SQL query for user-specific vector search."""
|
183
|
+
return """
|
184
|
+
SELECT
|
185
|
+
expense_id,
|
186
|
+
user_id,
|
187
|
+
description,
|
188
|
+
merchant,
|
189
|
+
expense_amount,
|
190
|
+
expense_date,
|
191
|
+
embedding <-> :search_embedding as similarity_score,
|
192
|
+
shopping_type,
|
193
|
+
payment_method,
|
194
|
+
recurring,
|
195
|
+
tags
|
196
|
+
FROM expenses
|
197
|
+
WHERE user_id = :user_id
|
198
|
+
ORDER BY embedding <-> :search_embedding
|
199
|
+
LIMIT :limit
|
200
|
+
"""
|
201
|
+
|
202
|
+
def _build_general_query(self) -> str:
|
203
|
+
"""Build SQL query for general vector search."""
|
204
|
+
return """
|
205
|
+
SELECT
|
206
|
+
expense_id,
|
207
|
+
user_id,
|
208
|
+
description,
|
209
|
+
merchant,
|
210
|
+
expense_amount,
|
211
|
+
expense_date,
|
212
|
+
embedding <-> :search_embedding as similarity_score,
|
213
|
+
shopping_type,
|
214
|
+
payment_method,
|
215
|
+
recurring,
|
216
|
+
tags
|
217
|
+
FROM expenses
|
218
|
+
ORDER BY embedding <-> :search_embedding
|
219
|
+
LIMIT :limit
|
220
|
+
"""
|
221
|
+
|
222
|
+
def search_by_category(
|
223
|
+
self,
|
224
|
+
category: str,
|
225
|
+
user_id: Optional[str] = None,
|
226
|
+
limit: int = 10
|
227
|
+
) -> List[SearchResult]:
|
228
|
+
"""Search expenses by category."""
|
229
|
+
try:
|
230
|
+
sql = """
|
231
|
+
SELECT
|
232
|
+
expense_id,
|
233
|
+
user_id,
|
234
|
+
description,
|
235
|
+
merchant,
|
236
|
+
expense_amount,
|
237
|
+
expense_date,
|
238
|
+
1.0 as similarity_score,
|
239
|
+
shopping_type,
|
240
|
+
payment_method,
|
241
|
+
recurring,
|
242
|
+
tags
|
243
|
+
FROM expenses
|
244
|
+
WHERE shopping_type ILIKE %s
|
245
|
+
"""
|
246
|
+
|
247
|
+
params = [f"%{category}%"]
|
248
|
+
|
249
|
+
if user_id:
|
250
|
+
sql += " AND user_id = %s"
|
251
|
+
params.append(user_id)
|
252
|
+
|
253
|
+
sql += " ORDER BY expense_date DESC LIMIT %s"
|
254
|
+
params.append(limit)
|
255
|
+
|
256
|
+
with self.engine.connect() as conn:
|
257
|
+
result = conn.execute(text(sql), params)
|
258
|
+
rows = result.fetchall()
|
259
|
+
|
260
|
+
results = []
|
261
|
+
for row in rows:
|
262
|
+
results.append(SearchResult(
|
263
|
+
expense_id=str(row[0]),
|
264
|
+
user_id=str(row[1]),
|
265
|
+
description=row[2] or "",
|
266
|
+
merchant=row[3] or "",
|
267
|
+
amount=float(row[4]),
|
268
|
+
date=str(row[5]),
|
269
|
+
similarity_score=float(row[6]),
|
270
|
+
metadata={
|
271
|
+
"shopping_type": row[7],
|
272
|
+
"payment_method": row[8],
|
273
|
+
"recurring": row[9],
|
274
|
+
"tags": row[10]
|
275
|
+
}
|
276
|
+
))
|
277
|
+
|
278
|
+
return results
|
279
|
+
|
280
|
+
except Exception as e:
|
281
|
+
print(f"Category search failed: {e}")
|
282
|
+
return []
|
283
|
+
|
284
|
+
def search_by_merchant(
|
285
|
+
self,
|
286
|
+
merchant: str,
|
287
|
+
user_id: Optional[str] = None,
|
288
|
+
limit: int = 10
|
289
|
+
) -> List[SearchResult]:
|
290
|
+
"""Search expenses by merchant."""
|
291
|
+
try:
|
292
|
+
sql = """
|
293
|
+
SELECT
|
294
|
+
expense_id,
|
295
|
+
user_id,
|
296
|
+
description,
|
297
|
+
merchant,
|
298
|
+
expense_amount,
|
299
|
+
expense_date,
|
300
|
+
1.0 as similarity_score,
|
301
|
+
shopping_type,
|
302
|
+
payment_method,
|
303
|
+
recurring,
|
304
|
+
tags
|
305
|
+
FROM expenses
|
306
|
+
WHERE merchant ILIKE %s
|
307
|
+
"""
|
308
|
+
|
309
|
+
params = [f"%{merchant}%"]
|
310
|
+
|
311
|
+
if user_id:
|
312
|
+
sql += " AND user_id = %s"
|
313
|
+
params.append(user_id)
|
314
|
+
|
315
|
+
sql += " ORDER BY expense_date DESC LIMIT %s"
|
316
|
+
params.append(limit)
|
317
|
+
|
318
|
+
with self.engine.connect() as conn:
|
319
|
+
result = conn.execute(text(sql), params)
|
320
|
+
rows = result.fetchall()
|
321
|
+
|
322
|
+
results = []
|
323
|
+
for row in rows:
|
324
|
+
results.append(SearchResult(
|
325
|
+
expense_id=str(row[0]),
|
326
|
+
user_id=str(row[1]),
|
327
|
+
description=row[2] or "",
|
328
|
+
merchant=row[3] or "",
|
329
|
+
amount=float(row[4]),
|
330
|
+
date=str(row[5]),
|
331
|
+
similarity_score=float(row[6]),
|
332
|
+
metadata={
|
333
|
+
"shopping_type": row[7],
|
334
|
+
"payment_method": row[8],
|
335
|
+
"recurring": row[9],
|
336
|
+
"tags": row[10]
|
337
|
+
}
|
338
|
+
))
|
339
|
+
|
340
|
+
return results
|
341
|
+
|
342
|
+
except Exception as e:
|
343
|
+
print(f"Merchant search failed: {e}")
|
344
|
+
return []
|
345
|
+
|
346
|
+
def get_user_spending_summary(
|
347
|
+
self,
|
348
|
+
user_id: str,
|
349
|
+
days: int = 30
|
350
|
+
) -> Dict[str, Any]:
|
351
|
+
"""Get spending summary for a specific user."""
|
352
|
+
try:
|
353
|
+
sql = """
|
354
|
+
SELECT
|
355
|
+
COUNT(*) as transaction_count,
|
356
|
+
SUM(expense_amount) as total_amount,
|
357
|
+
AVG(expense_amount) as average_amount,
|
358
|
+
shopping_type,
|
359
|
+
COUNT(*) as category_count
|
360
|
+
FROM expenses
|
361
|
+
WHERE user_id = :user_id
|
362
|
+
AND expense_date >= CURRENT_DATE - INTERVAL ':days days'
|
363
|
+
GROUP BY shopping_type
|
364
|
+
ORDER BY total_amount DESC
|
365
|
+
"""
|
366
|
+
|
367
|
+
with self.engine.connect() as conn:
|
368
|
+
result = conn.execute(text(sql), {'user_id': user_id, 'days': days})
|
369
|
+
rows = result.fetchall()
|
370
|
+
|
371
|
+
summary = {
|
372
|
+
"user_id": user_id,
|
373
|
+
"period_days": days,
|
374
|
+
"total_transactions": sum(row[0] for row in rows),
|
375
|
+
"total_amount": sum(row[1] for row in rows),
|
376
|
+
"average_transaction": sum(row[2] for row in rows) / len(rows) if rows else 0,
|
377
|
+
"categories": []
|
378
|
+
}
|
379
|
+
|
380
|
+
for row in rows:
|
381
|
+
summary["categories"].append({
|
382
|
+
"category": row[3],
|
383
|
+
"count": row[0],
|
384
|
+
"total_amount": float(row[1]),
|
385
|
+
"average_amount": float(row[2])
|
386
|
+
})
|
387
|
+
|
388
|
+
return summary
|
389
|
+
|
390
|
+
except Exception as e:
|
391
|
+
print(f"Spending summary failed: {e}")
|
392
|
+
return {}
|
393
|
+
|
394
|
+
def get_similar_expenses(
|
395
|
+
self,
|
396
|
+
expense_id: str,
|
397
|
+
limit: int = 5
|
398
|
+
) -> List[SearchResult]:
|
399
|
+
"""Find expenses similar to a given expense."""
|
400
|
+
try:
|
401
|
+
# First get the expense details
|
402
|
+
expense_sql = """
|
403
|
+
SELECT user_id, description, merchant, embedding
|
404
|
+
FROM expenses
|
405
|
+
WHERE expense_id = %s
|
406
|
+
"""
|
407
|
+
|
408
|
+
with self.engine.connect() as conn:
|
409
|
+
result = conn.execute(text(expense_sql), [expense_id])
|
410
|
+
row = result.fetchone()
|
411
|
+
|
412
|
+
if not row:
|
413
|
+
return []
|
414
|
+
|
415
|
+
user_id, description, merchant, embedding = row
|
416
|
+
|
417
|
+
# Search for similar expenses
|
418
|
+
similar_sql = """
|
419
|
+
SELECT
|
420
|
+
expense_id,
|
421
|
+
user_id,
|
422
|
+
description,
|
423
|
+
merchant,
|
424
|
+
expense_amount,
|
425
|
+
expense_date,
|
426
|
+
1 - (embedding <=> %s) as similarity_score,
|
427
|
+
shopping_type,
|
428
|
+
payment_method,
|
429
|
+
recurring,
|
430
|
+
tags
|
431
|
+
FROM expenses
|
432
|
+
WHERE expense_id != %s
|
433
|
+
AND user_id = %s
|
434
|
+
ORDER BY similarity_score DESC
|
435
|
+
LIMIT %s
|
436
|
+
"""
|
437
|
+
|
438
|
+
result = conn.execute(text(similar_sql), [embedding, expense_id, user_id, limit])
|
439
|
+
rows = result.fetchall()
|
440
|
+
|
441
|
+
results = []
|
442
|
+
for row in rows:
|
443
|
+
results.append(SearchResult(
|
444
|
+
expense_id=str(row[0]),
|
445
|
+
user_id=str(row[1]),
|
446
|
+
description=row[2] or "",
|
447
|
+
merchant=row[3] or "",
|
448
|
+
amount=float(row[4]),
|
449
|
+
date=str(row[5]),
|
450
|
+
similarity_score=float(row[6]),
|
451
|
+
metadata={
|
452
|
+
"shopping_type": row[7],
|
453
|
+
"payment_method": row[8],
|
454
|
+
"recurring": row[9],
|
455
|
+
"tags": row[10]
|
456
|
+
}
|
457
|
+
))
|
458
|
+
|
459
|
+
return results
|
460
|
+
|
461
|
+
except Exception as e:
|
462
|
+
print(f"Similar expenses search failed: {e}")
|
463
|
+
return []
|
banko_ai/web/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
"""Web application module for Banko AI Assistant."""
|
2
|
+
|
3
|
+
def create_app():
|
4
|
+
"""Create Flask application (lazy import)."""
|
5
|
+
from .app import create_app as _create_app
|
6
|
+
return _create_app()
|
7
|
+
|
8
|
+
def get_user_manager():
|
9
|
+
"""Get UserManager (lazy import)."""
|
10
|
+
from .auth import UserManager
|
11
|
+
return UserManager
|
12
|
+
|
13
|
+
__all__ = ["create_app", "get_user_manager"]
|