banko-ai-assistant 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. banko_ai/__init__.py +19 -0
  2. banko_ai/__main__.py +10 -0
  3. banko_ai/ai_providers/__init__.py +18 -0
  4. banko_ai/ai_providers/aws_provider.py +337 -0
  5. banko_ai/ai_providers/base.py +175 -0
  6. banko_ai/ai_providers/factory.py +84 -0
  7. banko_ai/ai_providers/gemini_provider.py +340 -0
  8. banko_ai/ai_providers/openai_provider.py +295 -0
  9. banko_ai/ai_providers/watsonx_provider.py +591 -0
  10. banko_ai/cli.py +374 -0
  11. banko_ai/config/__init__.py +5 -0
  12. banko_ai/config/settings.py +216 -0
  13. banko_ai/static/Anallytics.png +0 -0
  14. banko_ai/static/Graph.png +0 -0
  15. banko_ai/static/Graph2.png +0 -0
  16. banko_ai/static/ai-status.png +0 -0
  17. banko_ai/static/banko-ai-assistant-watsonx.gif +0 -0
  18. banko_ai/static/banko-db-ops.png +0 -0
  19. banko_ai/static/banko-response.png +0 -0
  20. banko_ai/static/cache-stats.png +0 -0
  21. banko_ai/static/creditcard.png +0 -0
  22. banko_ai/static/profilepic.jpeg +0 -0
  23. banko_ai/static/query_watcher.png +0 -0
  24. banko_ai/static/roach-logo.svg +54 -0
  25. banko_ai/static/watsonx-icon.svg +1 -0
  26. banko_ai/templates/base.html +59 -0
  27. banko_ai/templates/dashboard.html +569 -0
  28. banko_ai/templates/index.html +1499 -0
  29. banko_ai/templates/login.html +41 -0
  30. banko_ai/utils/__init__.py +8 -0
  31. banko_ai/utils/cache_manager.py +525 -0
  32. banko_ai/utils/database.py +202 -0
  33. banko_ai/utils/migration.py +123 -0
  34. banko_ai/vector_search/__init__.py +18 -0
  35. banko_ai/vector_search/enrichment.py +278 -0
  36. banko_ai/vector_search/generator.py +329 -0
  37. banko_ai/vector_search/search.py +463 -0
  38. banko_ai/web/__init__.py +13 -0
  39. banko_ai/web/app.py +668 -0
  40. banko_ai/web/auth.py +73 -0
  41. banko_ai_assistant-1.0.0.dist-info/METADATA +414 -0
  42. banko_ai_assistant-1.0.0.dist-info/RECORD +46 -0
  43. banko_ai_assistant-1.0.0.dist-info/WHEEL +5 -0
  44. banko_ai_assistant-1.0.0.dist-info/entry_points.txt +2 -0
  45. banko_ai_assistant-1.0.0.dist-info/licenses/LICENSE +21 -0
  46. banko_ai_assistant-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,329 @@
1
+ """
2
+ Enhanced expense data generator with data enrichment.
3
+
4
+ This module generates realistic expense data with enriched descriptions
5
+ for improved vector search accuracy.
6
+ """
7
+
8
+ import os
9
+ import uuid
10
+ import random
11
+ from datetime import datetime, timedelta
12
+ from typing import List, Dict, Any, Optional
13
+
14
+ from .enrichment import DataEnricher
15
+
16
+
17
+ class EnhancedExpenseGenerator:
18
+ """Enhanced expense generator with data enrichment for better vector search."""
19
+
20
+ def __init__(self, database_url: Optional[str] = None):
21
+ """Initialize the enhanced expense generator."""
22
+ self.database_url = database_url or os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/banko_ai?sslmode=disable")
23
+ self._engine = None
24
+ self.enricher = DataEnricher()
25
+ self._embedding_model = None
26
+ self._merchants = None
27
+ self._categories = None
28
+ self._payment_methods = None
29
+ self._user_ids = None
30
+
31
+ @property
32
+ def engine(self):
33
+ """Get SQLAlchemy engine (lazy import)."""
34
+ if self._engine is None:
35
+ from sqlalchemy import create_engine
36
+ self._engine = create_engine(self.database_url)
37
+ return self._engine
38
+
39
+ @property
40
+ def embedding_model(self):
41
+ """Get embedding model (lazy import)."""
42
+ if self._embedding_model is None:
43
+ from sentence_transformers import SentenceTransformer
44
+ self._embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
45
+ return self._embedding_model
46
+
47
+ @property
48
+ def merchants(self):
49
+ """Get merchants data (lazy load)."""
50
+ if self._merchants is None:
51
+ self._init_merchants_and_categories()
52
+ return self._merchants
53
+
54
+ @property
55
+ def categories(self):
56
+ """Get categories data (lazy load)."""
57
+ if self._categories is None:
58
+ self._init_merchants_and_categories()
59
+ return self._categories
60
+
61
+ @property
62
+ def payment_methods(self):
63
+ """Get payment methods (lazy load)."""
64
+ if self._payment_methods is None:
65
+ self._init_merchants_and_categories()
66
+ return self._payment_methods
67
+
68
+ @property
69
+ def user_ids(self):
70
+ """Get user IDs (lazy load)."""
71
+ if self._user_ids is None:
72
+ self._init_merchants_and_categories()
73
+ return self._user_ids
74
+
75
+ def _init_merchants_and_categories(self):
76
+ """Initialize merchants and categories data."""
77
+ # Enhanced merchant and category data
78
+ self._merchants = {
79
+ "grocery": [
80
+ "Whole Foods Market", "Trader Joe's", "Kroger", "Safeway", "Publix",
81
+ "Walmart", "Target", "Costco", "Local Market", "Food Lion"
82
+ ],
83
+ "retail": [
84
+ "Amazon", "Best Buy", "Apple Store", "Home Depot", "Lowes",
85
+ "Target", "Walmart", "Macy's", "Nordstrom", "TJ Maxx"
86
+ ],
87
+ "dining": [
88
+ "Starbucks", "McDonald's", "Chipotle", "Subway", "Pizza Hut",
89
+ "Domino's", "Panera Bread", "Dunkin' Donuts", "Taco Bell", "KFC"
90
+ ],
91
+ "transportation": [
92
+ "Shell Gas Station", "Exxon", "Chevron", "Uber", "Lyft",
93
+ "Metro", "Parking Garage", "Toll Road", "Car Wash", "Auto Repair"
94
+ ],
95
+ "healthcare": [
96
+ "CVS Pharmacy", "Walgreens", "Rite Aid", "Hospital", "Clinic",
97
+ "Dentist", "Optometrist", "Pharmacy", "Medical Center", "Urgent Care"
98
+ ],
99
+ "entertainment": [
100
+ "Netflix", "Spotify", "Movie Theater", "Concert Hall", "Gaming Store",
101
+ "Bookstore", "Museum", "Theme Park", "Sports Venue", "Theater"
102
+ ],
103
+ "utilities": [
104
+ "Electric Company", "Internet Provider", "Phone Company", "Water Company",
105
+ "Gas Company", "Cable Company", "Trash Service", "Security System", "Insurance", "Bank"
106
+ ]
107
+ }
108
+
109
+ self._categories = {
110
+ "Groceries": {
111
+ "items": ["Fresh produce", "Dairy products", "Meat and poultry", "Pantry staples", "Organic foods", "Beverages", "Snacks"],
112
+ "merchants": self.merchants["grocery"],
113
+ "amount_range": (10, 150)
114
+ },
115
+ "Transportation": {
116
+ "items": ["Gas fill-up", "Uber ride", "Metro card reload", "Parking fee", "Car maintenance", "Toll payment", "Car wash"],
117
+ "merchants": self.merchants["transportation"],
118
+ "amount_range": (5, 100)
119
+ },
120
+ "Dining": {
121
+ "items": ["Coffee and pastry", "Lunch meeting", "Dinner date", "Fast food", "Food delivery", "Restaurant meal", "Catering"],
122
+ "merchants": self.merchants["dining"],
123
+ "amount_range": (8, 80)
124
+ },
125
+ "Entertainment": {
126
+ "items": ["Movie tickets", "Streaming service", "Concert tickets", "Gaming", "Books", "Magazine subscription", "Music"],
127
+ "merchants": self.merchants["entertainment"],
128
+ "amount_range": (5, 200)
129
+ },
130
+ "Healthcare": {
131
+ "items": ["Prescription medication", "Doctor visit", "Dental cleaning", "Vitamins", "Health insurance", "Medical test", "Therapy"],
132
+ "merchants": self.merchants["healthcare"],
133
+ "amount_range": (15, 500)
134
+ },
135
+ "Shopping": {
136
+ "items": ["Clothing", "Electronics", "Home goods", "Personal care", "Gifts", "Furniture", "Appliances"],
137
+ "merchants": self.merchants["retail"],
138
+ "amount_range": (20, 1000)
139
+ },
140
+ "Utilities": {
141
+ "items": ["Electric bill", "Internet service", "Phone bill", "Water bill", "Trash service", "Cable TV", "Security system"],
142
+ "merchants": self.merchants["utilities"],
143
+ "amount_range": (30, 300)
144
+ }
145
+ }
146
+
147
+ self._payment_methods = ["Credit Card", "Debit Card", "Cash", "Mobile Payment", "Bank Transfer", "Check"]
148
+ self._user_ids = [str(uuid.uuid4()) for _ in range(100)] # Generate 100 user IDs
149
+
150
+ def generate_expense(self, user_id: Optional[str] = None) -> Dict[str, Any]:
151
+ """Generate a single enriched expense record."""
152
+ # Select category and get associated data
153
+ category = random.choice(list(self.categories.keys()))
154
+ category_data = self.categories[category]
155
+
156
+ # Select merchant from category-specific merchants
157
+ merchant = random.choice(category_data["merchants"])
158
+
159
+ # Generate amount within category range
160
+ amount = round(random.uniform(*category_data["amount_range"]), 2)
161
+
162
+ # Select item from category items
163
+ item = random.choice(category_data["items"])
164
+
165
+ # Generate basic description
166
+ basic_description = f"Bought {item.lower()}"
167
+
168
+ # Generate date (last 90 days)
169
+ days_ago = random.randint(0, 90)
170
+ expense_date = (datetime.now() - timedelta(days=days_ago)).date()
171
+
172
+ # Generate additional metadata
173
+ payment_method = random.choice(self.payment_methods)
174
+ recurring = random.choice([True, False]) if category in ["Utilities", "Entertainment"] else False
175
+ tags = [category.lower(), merchant.lower().replace(" ", "_")]
176
+
177
+ # Enrich the description
178
+ enriched_description = self.enricher.enrich_expense_description(
179
+ description=basic_description,
180
+ merchant=merchant,
181
+ amount=amount,
182
+ category=category,
183
+ payment_method=payment_method,
184
+ date=expense_date,
185
+ tags=tags
186
+ )
187
+
188
+ # Create searchable text for embedding
189
+ searchable_text = self.enricher.create_searchable_text(
190
+ description=basic_description,
191
+ merchant=merchant,
192
+ amount=amount,
193
+ category=category,
194
+ payment_method=payment_method,
195
+ tags=tags
196
+ )
197
+
198
+ # Generate embedding
199
+ embedding = self.embedding_model.encode([searchable_text])[0].tolist()
200
+
201
+ return {
202
+ "expense_id": str(uuid.uuid4()),
203
+ "user_id": user_id or random.choice(self.user_ids),
204
+ "expense_date": expense_date,
205
+ "expense_amount": amount,
206
+ "shopping_type": category,
207
+ "description": enriched_description,
208
+ "merchant": merchant,
209
+ "payment_method": payment_method,
210
+ "recurring": recurring,
211
+ "tags": tags,
212
+ "embedding": embedding,
213
+ "searchable_text": searchable_text # Store for debugging
214
+ }
215
+
216
+ def generate_expenses(self, count: int, user_id: Optional[str] = None) -> List[Dict[str, Any]]:
217
+ """Generate multiple enriched expense records."""
218
+ expenses = []
219
+
220
+ for _ in range(count):
221
+ expense = self.generate_expense(user_id)
222
+ expenses.append(expense)
223
+
224
+ return expenses
225
+
226
+ def save_expenses_to_database(self, expenses: List[Dict[str, Any]]) -> int:
227
+ """Save expenses to the database."""
228
+ try:
229
+ import pandas as pd
230
+ with self.engine.connect() as conn:
231
+ # Prepare data for insertion
232
+ data_to_insert = []
233
+ for expense in expenses:
234
+ data_to_insert.append({
235
+ 'expense_id': expense['expense_id'],
236
+ 'user_id': expense['user_id'],
237
+ 'expense_date': expense['expense_date'],
238
+ 'expense_amount': expense['expense_amount'],
239
+ 'shopping_type': expense['shopping_type'],
240
+ 'description': expense['description'],
241
+ 'merchant': expense['merchant'],
242
+ 'payment_method': expense['payment_method'],
243
+ 'recurring': expense['recurring'],
244
+ 'tags': expense['tags'],
245
+ 'embedding': expense['embedding']
246
+ })
247
+
248
+ # Insert in batches
249
+ batch_size = 100
250
+ total_inserted = 0
251
+
252
+ for i in range(0, len(data_to_insert), batch_size):
253
+ batch = data_to_insert[i:i + batch_size]
254
+
255
+ # Use pandas to insert the batch
256
+ df = pd.DataFrame(batch)
257
+ df.to_sql('expenses', conn, if_exists='append', index=False, method='multi')
258
+ total_inserted += len(batch)
259
+
260
+ return total_inserted
261
+
262
+ except Exception as e:
263
+ print(f"Error saving expenses to database: {e}")
264
+ return 0
265
+
266
+ def clear_expenses(self) -> bool:
267
+ """Clear all expenses from the database."""
268
+ try:
269
+ from sqlalchemy import text
270
+ with self.engine.connect() as conn:
271
+ conn.execute(text("DELETE FROM expenses"))
272
+ conn.commit()
273
+ return True
274
+ except Exception as e:
275
+ print(f"Error clearing expenses: {e}")
276
+ return False
277
+
278
+ def get_expense_count(self) -> int:
279
+ """Get the current number of expenses in the database."""
280
+ try:
281
+ from sqlalchemy import text
282
+ with self.engine.connect() as conn:
283
+ result = conn.execute(text("SELECT COUNT(*) FROM expenses"))
284
+ return result.scalar()
285
+ except Exception as e:
286
+ print(f"Error getting expense count: {e}")
287
+ return 0
288
+
289
+ def generate_and_save(
290
+ self,
291
+ count: int,
292
+ user_id: Optional[str] = None,
293
+ clear_existing: bool = False
294
+ ) -> int:
295
+ """Generate and save expenses to the database."""
296
+ if clear_existing:
297
+ self.clear_expenses()
298
+
299
+ expenses = self.generate_expenses(count, user_id)
300
+ return self.save_expenses_to_database(expenses)
301
+
302
+ def create_user_specific_indexes(self) -> bool:
303
+ """Create user-specific vector indexes for CockroachDB."""
304
+ try:
305
+ with self.engine.connect() as conn:
306
+ # Create user-specific vector index
307
+ conn.execute(text("""
308
+ CREATE INDEX IF NOT EXISTS idx_expenses_user_embedding
309
+ ON expenses (user_id, embedding)
310
+ USING ivfflat (embedding vector_cosine_ops)
311
+ WITH (lists = 100)
312
+ """))
313
+
314
+ # Create regional index if supported
315
+ try:
316
+ conn.execute(text("""
317
+ CREATE INDEX IF NOT EXISTS idx_expenses_user_embedding_regional
318
+ ON expenses (user_id, embedding)
319
+ LOCALITY REGIONAL BY ROW AS region
320
+ """))
321
+ except Exception:
322
+ # Regional indexing might not be supported in all deployments
323
+ pass
324
+
325
+ conn.commit()
326
+ return True
327
+ except Exception as e:
328
+ print(f"Error creating user-specific indexes: {e}")
329
+ return False