banko-ai-assistant 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. banko_ai/__init__.py +19 -0
  2. banko_ai/__main__.py +10 -0
  3. banko_ai/ai_providers/__init__.py +18 -0
  4. banko_ai/ai_providers/aws_provider.py +337 -0
  5. banko_ai/ai_providers/base.py +175 -0
  6. banko_ai/ai_providers/factory.py +84 -0
  7. banko_ai/ai_providers/gemini_provider.py +340 -0
  8. banko_ai/ai_providers/openai_provider.py +295 -0
  9. banko_ai/ai_providers/watsonx_provider.py +591 -0
  10. banko_ai/cli.py +374 -0
  11. banko_ai/config/__init__.py +5 -0
  12. banko_ai/config/settings.py +216 -0
  13. banko_ai/static/Anallytics.png +0 -0
  14. banko_ai/static/Graph.png +0 -0
  15. banko_ai/static/Graph2.png +0 -0
  16. banko_ai/static/ai-status.png +0 -0
  17. banko_ai/static/banko-ai-assistant-watsonx.gif +0 -0
  18. banko_ai/static/banko-db-ops.png +0 -0
  19. banko_ai/static/banko-response.png +0 -0
  20. banko_ai/static/cache-stats.png +0 -0
  21. banko_ai/static/creditcard.png +0 -0
  22. banko_ai/static/profilepic.jpeg +0 -0
  23. banko_ai/static/query_watcher.png +0 -0
  24. banko_ai/static/roach-logo.svg +54 -0
  25. banko_ai/static/watsonx-icon.svg +1 -0
  26. banko_ai/templates/base.html +59 -0
  27. banko_ai/templates/dashboard.html +569 -0
  28. banko_ai/templates/index.html +1499 -0
  29. banko_ai/templates/login.html +41 -0
  30. banko_ai/utils/__init__.py +8 -0
  31. banko_ai/utils/cache_manager.py +525 -0
  32. banko_ai/utils/database.py +202 -0
  33. banko_ai/utils/migration.py +123 -0
  34. banko_ai/vector_search/__init__.py +18 -0
  35. banko_ai/vector_search/enrichment.py +278 -0
  36. banko_ai/vector_search/generator.py +329 -0
  37. banko_ai/vector_search/search.py +463 -0
  38. banko_ai/web/__init__.py +13 -0
  39. banko_ai/web/app.py +668 -0
  40. banko_ai/web/auth.py +73 -0
  41. banko_ai_assistant-1.0.0.dist-info/METADATA +414 -0
  42. banko_ai_assistant-1.0.0.dist-info/RECORD +46 -0
  43. banko_ai_assistant-1.0.0.dist-info/WHEEL +5 -0
  44. banko_ai_assistant-1.0.0.dist-info/entry_points.txt +2 -0
  45. banko_ai_assistant-1.0.0.dist-info/licenses/LICENSE +21 -0
  46. banko_ai_assistant-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,463 @@
1
+ """
2
+ Vector search engine for expense data.
3
+
4
+ This module provides vector similarity search functionality with user-specific
5
+ filtering and advanced indexing support.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ from typing import List, Dict, Any, Optional, Tuple
11
+ from sentence_transformers import SentenceTransformer
12
+ from sqlalchemy import create_engine, text
13
+
14
+ from ..ai_providers.base import SearchResult
15
+
16
+
17
+ class VectorSearchEngine:
18
+ """Vector search engine for expense data with user-specific filtering."""
19
+
20
+ def __init__(self, database_url: Optional[str] = None, cache_manager=None):
21
+ """Initialize the vector search engine."""
22
+ self.database_url = database_url or os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/banko_ai?sslmode=disable")
23
+ self.cache_manager = cache_manager
24
+
25
+ # Apply version parsing workaround for CockroachDB
26
+ from sqlalchemy.dialects.postgresql.base import PGDialect
27
+ original_get_server_version_info = PGDialect._get_server_version_info
28
+
29
+ def patched_get_server_version_info(self, connection):
30
+ try:
31
+ return original_get_server_version_info(self, connection)
32
+ except Exception:
33
+ return (25, 3, 0)
34
+
35
+ PGDialect._get_server_version_info = patched_get_server_version_info
36
+
37
+ # Convert cockroachdb:// to postgresql:// for SQLAlchemy compatibility
38
+ database_url = self.database_url.replace("cockroachdb://", "postgresql://")
39
+ self.engine = create_engine(
40
+ database_url,
41
+ connect_args={
42
+ "options": "-c default_transaction_isolation=serializable"
43
+ },
44
+ pool_pre_ping=True,
45
+ pool_recycle=300
46
+ )
47
+ self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
48
+
49
+ def search_expenses(
50
+ self,
51
+ query: str,
52
+ user_id: Optional[str] = None,
53
+ limit: int = 10,
54
+ threshold: float = 0.7,
55
+ use_user_index: bool = True
56
+ ) -> List[SearchResult]:
57
+ """
58
+ Search for expenses using vector similarity.
59
+
60
+ Args:
61
+ query: Search query text
62
+ user_id: Optional user ID to filter results
63
+ limit: Maximum number of results to return
64
+ threshold: Minimum similarity score threshold
65
+ use_user_index: Whether to use user-specific vector index
66
+
67
+ Returns:
68
+ List of SearchResult objects
69
+ """
70
+ try:
71
+ print(f"\n🔍 VECTOR SEARCH (with caching):")
72
+ print(f"1. Query: '{query}' | Limit: {limit}")
73
+
74
+ # Use cached embedding generation if available
75
+ if self.cache_manager:
76
+ raw_embedding = self.cache_manager._get_embedding_with_cache(query)
77
+
78
+ # Check for cached vector search results
79
+ cached_results = self.cache_manager.get_cached_vector_search(raw_embedding, limit)
80
+ if cached_results:
81
+ print(f"2. ✅ Vector search cache HIT! Found {len(cached_results)} cached results")
82
+ # Convert cached results to SearchResult objects
83
+ search_results = []
84
+ for result in cached_results[:limit]:
85
+ search_results.append(SearchResult(
86
+ expense_id=result['expense_id'],
87
+ user_id=result['user_id'],
88
+ description=result['description'],
89
+ merchant=result['merchant'],
90
+ amount=result['expense_amount'],
91
+ date=result['expense_date'],
92
+ similarity_score=result['similarity_score'],
93
+ metadata={
94
+ 'shopping_type': result['shopping_type'],
95
+ 'payment_method': result['payment_method'],
96
+ 'recurring': result.get('recurring', False),
97
+ 'tags': result.get('tags', [])
98
+ }
99
+ ))
100
+ return search_results
101
+ print(f"2. ❌ Vector search cache MISS, querying database")
102
+ else:
103
+ # Fallback to direct embedding generation
104
+ query_embedding = self.embedding_model.encode([query])[0]
105
+ raw_embedding = query_embedding
106
+ print(f"2. Generated fresh embedding (no cache available)")
107
+
108
+ # Convert to PostgreSQL vector format (matching original implementation)
109
+ import json
110
+ search_embedding = json.dumps(raw_embedding.flatten().tolist())
111
+
112
+ # Build SQL query based on whether we're using user-specific search
113
+ if user_id and use_user_index:
114
+ sql = self._build_user_specific_query()
115
+ else:
116
+ sql = self._build_general_query()
117
+
118
+ # Prepare parameters as a dictionary
119
+ params = {
120
+ 'search_embedding': search_embedding,
121
+ 'limit': limit
122
+ }
123
+
124
+ if user_id:
125
+ params['user_id'] = user_id
126
+
127
+ # Execute query
128
+ with self.engine.connect() as conn:
129
+ result = conn.execute(text(sql), params)
130
+ rows = result.fetchall()
131
+
132
+ # Convert to SearchResult objects
133
+ results = []
134
+ for row in rows:
135
+ results.append(SearchResult(
136
+ expense_id=str(row[0]),
137
+ user_id=str(row[1]),
138
+ description=row[2] or "",
139
+ merchant=row[3] or "",
140
+ amount=float(row[4]),
141
+ date=str(row[5]),
142
+ similarity_score=float(row[6]),
143
+ metadata={
144
+ "shopping_type": row[7] if len(row) > 7 else None,
145
+ "payment_method": row[8] if len(row) > 8 else None,
146
+ "recurring": row[9] if len(row) > 9 else None,
147
+ "tags": row[10] if len(row) > 10 else None
148
+ }
149
+ ))
150
+
151
+ print(f"3. Database query returned {len(results)} expense records")
152
+
153
+ # Cache the results for future use (convert back to dict format for caching)
154
+ if self.cache_manager and results:
155
+ # Convert SearchResult objects back to dict format for caching
156
+ search_results_dict = []
157
+ for result in results:
158
+ search_results_dict.append({
159
+ 'expense_id': result.expense_id,
160
+ 'user_id': result.user_id,
161
+ 'description': result.description,
162
+ 'merchant': result.merchant,
163
+ 'expense_amount': result.amount,
164
+ 'expense_date': result.date,
165
+ 'similarity_score': result.similarity_score,
166
+ 'shopping_type': result.metadata.get('shopping_type'),
167
+ 'payment_method': result.metadata.get('payment_method'),
168
+ 'recurring': result.metadata.get('recurring'),
169
+ 'tags': result.metadata.get('tags')
170
+ })
171
+
172
+ self.cache_manager.cache_vector_search_results(raw_embedding, search_results_dict)
173
+ print(f"4. ✅ Cached vector search results for future queries")
174
+
175
+ return results
176
+
177
+ except Exception as e:
178
+ print(f"Search failed: {e}")
179
+ return []
180
+
181
+ def _build_user_specific_query(self) -> str:
182
+ """Build SQL query for user-specific vector search."""
183
+ return """
184
+ SELECT
185
+ expense_id,
186
+ user_id,
187
+ description,
188
+ merchant,
189
+ expense_amount,
190
+ expense_date,
191
+ embedding <-> :search_embedding as similarity_score,
192
+ shopping_type,
193
+ payment_method,
194
+ recurring,
195
+ tags
196
+ FROM expenses
197
+ WHERE user_id = :user_id
198
+ ORDER BY embedding <-> :search_embedding
199
+ LIMIT :limit
200
+ """
201
+
202
+ def _build_general_query(self) -> str:
203
+ """Build SQL query for general vector search."""
204
+ return """
205
+ SELECT
206
+ expense_id,
207
+ user_id,
208
+ description,
209
+ merchant,
210
+ expense_amount,
211
+ expense_date,
212
+ embedding <-> :search_embedding as similarity_score,
213
+ shopping_type,
214
+ payment_method,
215
+ recurring,
216
+ tags
217
+ FROM expenses
218
+ ORDER BY embedding <-> :search_embedding
219
+ LIMIT :limit
220
+ """
221
+
222
+ def search_by_category(
223
+ self,
224
+ category: str,
225
+ user_id: Optional[str] = None,
226
+ limit: int = 10
227
+ ) -> List[SearchResult]:
228
+ """Search expenses by category."""
229
+ try:
230
+ sql = """
231
+ SELECT
232
+ expense_id,
233
+ user_id,
234
+ description,
235
+ merchant,
236
+ expense_amount,
237
+ expense_date,
238
+ 1.0 as similarity_score,
239
+ shopping_type,
240
+ payment_method,
241
+ recurring,
242
+ tags
243
+ FROM expenses
244
+ WHERE shopping_type ILIKE %s
245
+ """
246
+
247
+ params = [f"%{category}%"]
248
+
249
+ if user_id:
250
+ sql += " AND user_id = %s"
251
+ params.append(user_id)
252
+
253
+ sql += " ORDER BY expense_date DESC LIMIT %s"
254
+ params.append(limit)
255
+
256
+ with self.engine.connect() as conn:
257
+ result = conn.execute(text(sql), params)
258
+ rows = result.fetchall()
259
+
260
+ results = []
261
+ for row in rows:
262
+ results.append(SearchResult(
263
+ expense_id=str(row[0]),
264
+ user_id=str(row[1]),
265
+ description=row[2] or "",
266
+ merchant=row[3] or "",
267
+ amount=float(row[4]),
268
+ date=str(row[5]),
269
+ similarity_score=float(row[6]),
270
+ metadata={
271
+ "shopping_type": row[7],
272
+ "payment_method": row[8],
273
+ "recurring": row[9],
274
+ "tags": row[10]
275
+ }
276
+ ))
277
+
278
+ return results
279
+
280
+ except Exception as e:
281
+ print(f"Category search failed: {e}")
282
+ return []
283
+
284
+ def search_by_merchant(
285
+ self,
286
+ merchant: str,
287
+ user_id: Optional[str] = None,
288
+ limit: int = 10
289
+ ) -> List[SearchResult]:
290
+ """Search expenses by merchant."""
291
+ try:
292
+ sql = """
293
+ SELECT
294
+ expense_id,
295
+ user_id,
296
+ description,
297
+ merchant,
298
+ expense_amount,
299
+ expense_date,
300
+ 1.0 as similarity_score,
301
+ shopping_type,
302
+ payment_method,
303
+ recurring,
304
+ tags
305
+ FROM expenses
306
+ WHERE merchant ILIKE %s
307
+ """
308
+
309
+ params = [f"%{merchant}%"]
310
+
311
+ if user_id:
312
+ sql += " AND user_id = %s"
313
+ params.append(user_id)
314
+
315
+ sql += " ORDER BY expense_date DESC LIMIT %s"
316
+ params.append(limit)
317
+
318
+ with self.engine.connect() as conn:
319
+ result = conn.execute(text(sql), params)
320
+ rows = result.fetchall()
321
+
322
+ results = []
323
+ for row in rows:
324
+ results.append(SearchResult(
325
+ expense_id=str(row[0]),
326
+ user_id=str(row[1]),
327
+ description=row[2] or "",
328
+ merchant=row[3] or "",
329
+ amount=float(row[4]),
330
+ date=str(row[5]),
331
+ similarity_score=float(row[6]),
332
+ metadata={
333
+ "shopping_type": row[7],
334
+ "payment_method": row[8],
335
+ "recurring": row[9],
336
+ "tags": row[10]
337
+ }
338
+ ))
339
+
340
+ return results
341
+
342
+ except Exception as e:
343
+ print(f"Merchant search failed: {e}")
344
+ return []
345
+
346
+ def get_user_spending_summary(
347
+ self,
348
+ user_id: str,
349
+ days: int = 30
350
+ ) -> Dict[str, Any]:
351
+ """Get spending summary for a specific user."""
352
+ try:
353
+ sql = """
354
+ SELECT
355
+ COUNT(*) as transaction_count,
356
+ SUM(expense_amount) as total_amount,
357
+ AVG(expense_amount) as average_amount,
358
+ shopping_type,
359
+ COUNT(*) as category_count
360
+ FROM expenses
361
+ WHERE user_id = :user_id
362
+ AND expense_date >= CURRENT_DATE - INTERVAL ':days days'
363
+ GROUP BY shopping_type
364
+ ORDER BY total_amount DESC
365
+ """
366
+
367
+ with self.engine.connect() as conn:
368
+ result = conn.execute(text(sql), {'user_id': user_id, 'days': days})
369
+ rows = result.fetchall()
370
+
371
+ summary = {
372
+ "user_id": user_id,
373
+ "period_days": days,
374
+ "total_transactions": sum(row[0] for row in rows),
375
+ "total_amount": sum(row[1] for row in rows),
376
+ "average_transaction": sum(row[2] for row in rows) / len(rows) if rows else 0,
377
+ "categories": []
378
+ }
379
+
380
+ for row in rows:
381
+ summary["categories"].append({
382
+ "category": row[3],
383
+ "count": row[0],
384
+ "total_amount": float(row[1]),
385
+ "average_amount": float(row[2])
386
+ })
387
+
388
+ return summary
389
+
390
+ except Exception as e:
391
+ print(f"Spending summary failed: {e}")
392
+ return {}
393
+
394
+ def get_similar_expenses(
395
+ self,
396
+ expense_id: str,
397
+ limit: int = 5
398
+ ) -> List[SearchResult]:
399
+ """Find expenses similar to a given expense."""
400
+ try:
401
+ # First get the expense details
402
+ expense_sql = """
403
+ SELECT user_id, description, merchant, embedding
404
+ FROM expenses
405
+ WHERE expense_id = %s
406
+ """
407
+
408
+ with self.engine.connect() as conn:
409
+ result = conn.execute(text(expense_sql), [expense_id])
410
+ row = result.fetchone()
411
+
412
+ if not row:
413
+ return []
414
+
415
+ user_id, description, merchant, embedding = row
416
+
417
+ # Search for similar expenses
418
+ similar_sql = """
419
+ SELECT
420
+ expense_id,
421
+ user_id,
422
+ description,
423
+ merchant,
424
+ expense_amount,
425
+ expense_date,
426
+ 1 - (embedding <=> %s) as similarity_score,
427
+ shopping_type,
428
+ payment_method,
429
+ recurring,
430
+ tags
431
+ FROM expenses
432
+ WHERE expense_id != %s
433
+ AND user_id = %s
434
+ ORDER BY similarity_score DESC
435
+ LIMIT %s
436
+ """
437
+
438
+ result = conn.execute(text(similar_sql), [embedding, expense_id, user_id, limit])
439
+ rows = result.fetchall()
440
+
441
+ results = []
442
+ for row in rows:
443
+ results.append(SearchResult(
444
+ expense_id=str(row[0]),
445
+ user_id=str(row[1]),
446
+ description=row[2] or "",
447
+ merchant=row[3] or "",
448
+ amount=float(row[4]),
449
+ date=str(row[5]),
450
+ similarity_score=float(row[6]),
451
+ metadata={
452
+ "shopping_type": row[7],
453
+ "payment_method": row[8],
454
+ "recurring": row[9],
455
+ "tags": row[10]
456
+ }
457
+ ))
458
+
459
+ return results
460
+
461
+ except Exception as e:
462
+ print(f"Similar expenses search failed: {e}")
463
+ return []
@@ -0,0 +1,13 @@
1
+ """Web application module for Banko AI Assistant."""
2
+
3
+ def create_app():
4
+ """Create Flask application (lazy import)."""
5
+ from .app import create_app as _create_app
6
+ return _create_app()
7
+
8
+ def get_user_manager():
9
+ """Get UserManager (lazy import)."""
10
+ from .auth import UserManager
11
+ return UserManager
12
+
13
+ __all__ = ["create_app", "get_user_manager"]