banko-ai-assistant 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- banko_ai/__init__.py +19 -0
- banko_ai/__main__.py +10 -0
- banko_ai/ai_providers/__init__.py +18 -0
- banko_ai/ai_providers/aws_provider.py +337 -0
- banko_ai/ai_providers/base.py +175 -0
- banko_ai/ai_providers/factory.py +84 -0
- banko_ai/ai_providers/gemini_provider.py +340 -0
- banko_ai/ai_providers/openai_provider.py +295 -0
- banko_ai/ai_providers/watsonx_provider.py +591 -0
- banko_ai/cli.py +374 -0
- banko_ai/config/__init__.py +5 -0
- banko_ai/config/settings.py +216 -0
- banko_ai/static/Anallytics.png +0 -0
- banko_ai/static/Graph.png +0 -0
- banko_ai/static/Graph2.png +0 -0
- banko_ai/static/ai-status.png +0 -0
- banko_ai/static/banko-ai-assistant-watsonx.gif +0 -0
- banko_ai/static/banko-db-ops.png +0 -0
- banko_ai/static/banko-response.png +0 -0
- banko_ai/static/cache-stats.png +0 -0
- banko_ai/static/creditcard.png +0 -0
- banko_ai/static/profilepic.jpeg +0 -0
- banko_ai/static/query_watcher.png +0 -0
- banko_ai/static/roach-logo.svg +54 -0
- banko_ai/static/watsonx-icon.svg +1 -0
- banko_ai/templates/base.html +59 -0
- banko_ai/templates/dashboard.html +569 -0
- banko_ai/templates/index.html +1499 -0
- banko_ai/templates/login.html +41 -0
- banko_ai/utils/__init__.py +8 -0
- banko_ai/utils/cache_manager.py +525 -0
- banko_ai/utils/database.py +202 -0
- banko_ai/utils/migration.py +123 -0
- banko_ai/vector_search/__init__.py +18 -0
- banko_ai/vector_search/enrichment.py +278 -0
- banko_ai/vector_search/generator.py +329 -0
- banko_ai/vector_search/search.py +463 -0
- banko_ai/web/__init__.py +13 -0
- banko_ai/web/app.py +668 -0
- banko_ai/web/auth.py +73 -0
- banko_ai_assistant-1.0.0.dist-info/METADATA +414 -0
- banko_ai_assistant-1.0.0.dist-info/RECORD +46 -0
- banko_ai_assistant-1.0.0.dist-info/WHEEL +5 -0
- banko_ai_assistant-1.0.0.dist-info/entry_points.txt +2 -0
- banko_ai_assistant-1.0.0.dist-info/licenses/LICENSE +21 -0
- banko_ai_assistant-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,329 @@
|
|
1
|
+
"""
|
2
|
+
Enhanced expense data generator with data enrichment.
|
3
|
+
|
4
|
+
This module generates realistic expense data with enriched descriptions
|
5
|
+
for improved vector search accuracy.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import os
|
9
|
+
import uuid
|
10
|
+
import random
|
11
|
+
from datetime import datetime, timedelta
|
12
|
+
from typing import List, Dict, Any, Optional
|
13
|
+
|
14
|
+
from .enrichment import DataEnricher
|
15
|
+
|
16
|
+
|
17
|
+
class EnhancedExpenseGenerator:
|
18
|
+
"""Enhanced expense generator with data enrichment for better vector search."""
|
19
|
+
|
20
|
+
def __init__(self, database_url: Optional[str] = None):
|
21
|
+
"""Initialize the enhanced expense generator."""
|
22
|
+
self.database_url = database_url or os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/banko_ai?sslmode=disable")
|
23
|
+
self._engine = None
|
24
|
+
self.enricher = DataEnricher()
|
25
|
+
self._embedding_model = None
|
26
|
+
self._merchants = None
|
27
|
+
self._categories = None
|
28
|
+
self._payment_methods = None
|
29
|
+
self._user_ids = None
|
30
|
+
|
31
|
+
@property
|
32
|
+
def engine(self):
|
33
|
+
"""Get SQLAlchemy engine (lazy import)."""
|
34
|
+
if self._engine is None:
|
35
|
+
from sqlalchemy import create_engine
|
36
|
+
self._engine = create_engine(self.database_url)
|
37
|
+
return self._engine
|
38
|
+
|
39
|
+
@property
|
40
|
+
def embedding_model(self):
|
41
|
+
"""Get embedding model (lazy import)."""
|
42
|
+
if self._embedding_model is None:
|
43
|
+
from sentence_transformers import SentenceTransformer
|
44
|
+
self._embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
45
|
+
return self._embedding_model
|
46
|
+
|
47
|
+
@property
|
48
|
+
def merchants(self):
|
49
|
+
"""Get merchants data (lazy load)."""
|
50
|
+
if self._merchants is None:
|
51
|
+
self._init_merchants_and_categories()
|
52
|
+
return self._merchants
|
53
|
+
|
54
|
+
@property
|
55
|
+
def categories(self):
|
56
|
+
"""Get categories data (lazy load)."""
|
57
|
+
if self._categories is None:
|
58
|
+
self._init_merchants_and_categories()
|
59
|
+
return self._categories
|
60
|
+
|
61
|
+
@property
|
62
|
+
def payment_methods(self):
|
63
|
+
"""Get payment methods (lazy load)."""
|
64
|
+
if self._payment_methods is None:
|
65
|
+
self._init_merchants_and_categories()
|
66
|
+
return self._payment_methods
|
67
|
+
|
68
|
+
@property
|
69
|
+
def user_ids(self):
|
70
|
+
"""Get user IDs (lazy load)."""
|
71
|
+
if self._user_ids is None:
|
72
|
+
self._init_merchants_and_categories()
|
73
|
+
return self._user_ids
|
74
|
+
|
75
|
+
def _init_merchants_and_categories(self):
|
76
|
+
"""Initialize merchants and categories data."""
|
77
|
+
# Enhanced merchant and category data
|
78
|
+
self._merchants = {
|
79
|
+
"grocery": [
|
80
|
+
"Whole Foods Market", "Trader Joe's", "Kroger", "Safeway", "Publix",
|
81
|
+
"Walmart", "Target", "Costco", "Local Market", "Food Lion"
|
82
|
+
],
|
83
|
+
"retail": [
|
84
|
+
"Amazon", "Best Buy", "Apple Store", "Home Depot", "Lowes",
|
85
|
+
"Target", "Walmart", "Macy's", "Nordstrom", "TJ Maxx"
|
86
|
+
],
|
87
|
+
"dining": [
|
88
|
+
"Starbucks", "McDonald's", "Chipotle", "Subway", "Pizza Hut",
|
89
|
+
"Domino's", "Panera Bread", "Dunkin' Donuts", "Taco Bell", "KFC"
|
90
|
+
],
|
91
|
+
"transportation": [
|
92
|
+
"Shell Gas Station", "Exxon", "Chevron", "Uber", "Lyft",
|
93
|
+
"Metro", "Parking Garage", "Toll Road", "Car Wash", "Auto Repair"
|
94
|
+
],
|
95
|
+
"healthcare": [
|
96
|
+
"CVS Pharmacy", "Walgreens", "Rite Aid", "Hospital", "Clinic",
|
97
|
+
"Dentist", "Optometrist", "Pharmacy", "Medical Center", "Urgent Care"
|
98
|
+
],
|
99
|
+
"entertainment": [
|
100
|
+
"Netflix", "Spotify", "Movie Theater", "Concert Hall", "Gaming Store",
|
101
|
+
"Bookstore", "Museum", "Theme Park", "Sports Venue", "Theater"
|
102
|
+
],
|
103
|
+
"utilities": [
|
104
|
+
"Electric Company", "Internet Provider", "Phone Company", "Water Company",
|
105
|
+
"Gas Company", "Cable Company", "Trash Service", "Security System", "Insurance", "Bank"
|
106
|
+
]
|
107
|
+
}
|
108
|
+
|
109
|
+
self._categories = {
|
110
|
+
"Groceries": {
|
111
|
+
"items": ["Fresh produce", "Dairy products", "Meat and poultry", "Pantry staples", "Organic foods", "Beverages", "Snacks"],
|
112
|
+
"merchants": self.merchants["grocery"],
|
113
|
+
"amount_range": (10, 150)
|
114
|
+
},
|
115
|
+
"Transportation": {
|
116
|
+
"items": ["Gas fill-up", "Uber ride", "Metro card reload", "Parking fee", "Car maintenance", "Toll payment", "Car wash"],
|
117
|
+
"merchants": self.merchants["transportation"],
|
118
|
+
"amount_range": (5, 100)
|
119
|
+
},
|
120
|
+
"Dining": {
|
121
|
+
"items": ["Coffee and pastry", "Lunch meeting", "Dinner date", "Fast food", "Food delivery", "Restaurant meal", "Catering"],
|
122
|
+
"merchants": self.merchants["dining"],
|
123
|
+
"amount_range": (8, 80)
|
124
|
+
},
|
125
|
+
"Entertainment": {
|
126
|
+
"items": ["Movie tickets", "Streaming service", "Concert tickets", "Gaming", "Books", "Magazine subscription", "Music"],
|
127
|
+
"merchants": self.merchants["entertainment"],
|
128
|
+
"amount_range": (5, 200)
|
129
|
+
},
|
130
|
+
"Healthcare": {
|
131
|
+
"items": ["Prescription medication", "Doctor visit", "Dental cleaning", "Vitamins", "Health insurance", "Medical test", "Therapy"],
|
132
|
+
"merchants": self.merchants["healthcare"],
|
133
|
+
"amount_range": (15, 500)
|
134
|
+
},
|
135
|
+
"Shopping": {
|
136
|
+
"items": ["Clothing", "Electronics", "Home goods", "Personal care", "Gifts", "Furniture", "Appliances"],
|
137
|
+
"merchants": self.merchants["retail"],
|
138
|
+
"amount_range": (20, 1000)
|
139
|
+
},
|
140
|
+
"Utilities": {
|
141
|
+
"items": ["Electric bill", "Internet service", "Phone bill", "Water bill", "Trash service", "Cable TV", "Security system"],
|
142
|
+
"merchants": self.merchants["utilities"],
|
143
|
+
"amount_range": (30, 300)
|
144
|
+
}
|
145
|
+
}
|
146
|
+
|
147
|
+
self._payment_methods = ["Credit Card", "Debit Card", "Cash", "Mobile Payment", "Bank Transfer", "Check"]
|
148
|
+
self._user_ids = [str(uuid.uuid4()) for _ in range(100)] # Generate 100 user IDs
|
149
|
+
|
150
|
+
def generate_expense(self, user_id: Optional[str] = None) -> Dict[str, Any]:
|
151
|
+
"""Generate a single enriched expense record."""
|
152
|
+
# Select category and get associated data
|
153
|
+
category = random.choice(list(self.categories.keys()))
|
154
|
+
category_data = self.categories[category]
|
155
|
+
|
156
|
+
# Select merchant from category-specific merchants
|
157
|
+
merchant = random.choice(category_data["merchants"])
|
158
|
+
|
159
|
+
# Generate amount within category range
|
160
|
+
amount = round(random.uniform(*category_data["amount_range"]), 2)
|
161
|
+
|
162
|
+
# Select item from category items
|
163
|
+
item = random.choice(category_data["items"])
|
164
|
+
|
165
|
+
# Generate basic description
|
166
|
+
basic_description = f"Bought {item.lower()}"
|
167
|
+
|
168
|
+
# Generate date (last 90 days)
|
169
|
+
days_ago = random.randint(0, 90)
|
170
|
+
expense_date = (datetime.now() - timedelta(days=days_ago)).date()
|
171
|
+
|
172
|
+
# Generate additional metadata
|
173
|
+
payment_method = random.choice(self.payment_methods)
|
174
|
+
recurring = random.choice([True, False]) if category in ["Utilities", "Entertainment"] else False
|
175
|
+
tags = [category.lower(), merchant.lower().replace(" ", "_")]
|
176
|
+
|
177
|
+
# Enrich the description
|
178
|
+
enriched_description = self.enricher.enrich_expense_description(
|
179
|
+
description=basic_description,
|
180
|
+
merchant=merchant,
|
181
|
+
amount=amount,
|
182
|
+
category=category,
|
183
|
+
payment_method=payment_method,
|
184
|
+
date=expense_date,
|
185
|
+
tags=tags
|
186
|
+
)
|
187
|
+
|
188
|
+
# Create searchable text for embedding
|
189
|
+
searchable_text = self.enricher.create_searchable_text(
|
190
|
+
description=basic_description,
|
191
|
+
merchant=merchant,
|
192
|
+
amount=amount,
|
193
|
+
category=category,
|
194
|
+
payment_method=payment_method,
|
195
|
+
tags=tags
|
196
|
+
)
|
197
|
+
|
198
|
+
# Generate embedding
|
199
|
+
embedding = self.embedding_model.encode([searchable_text])[0].tolist()
|
200
|
+
|
201
|
+
return {
|
202
|
+
"expense_id": str(uuid.uuid4()),
|
203
|
+
"user_id": user_id or random.choice(self.user_ids),
|
204
|
+
"expense_date": expense_date,
|
205
|
+
"expense_amount": amount,
|
206
|
+
"shopping_type": category,
|
207
|
+
"description": enriched_description,
|
208
|
+
"merchant": merchant,
|
209
|
+
"payment_method": payment_method,
|
210
|
+
"recurring": recurring,
|
211
|
+
"tags": tags,
|
212
|
+
"embedding": embedding,
|
213
|
+
"searchable_text": searchable_text # Store for debugging
|
214
|
+
}
|
215
|
+
|
216
|
+
def generate_expenses(self, count: int, user_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
217
|
+
"""Generate multiple enriched expense records."""
|
218
|
+
expenses = []
|
219
|
+
|
220
|
+
for _ in range(count):
|
221
|
+
expense = self.generate_expense(user_id)
|
222
|
+
expenses.append(expense)
|
223
|
+
|
224
|
+
return expenses
|
225
|
+
|
226
|
+
def save_expenses_to_database(self, expenses: List[Dict[str, Any]]) -> int:
|
227
|
+
"""Save expenses to the database."""
|
228
|
+
try:
|
229
|
+
import pandas as pd
|
230
|
+
with self.engine.connect() as conn:
|
231
|
+
# Prepare data for insertion
|
232
|
+
data_to_insert = []
|
233
|
+
for expense in expenses:
|
234
|
+
data_to_insert.append({
|
235
|
+
'expense_id': expense['expense_id'],
|
236
|
+
'user_id': expense['user_id'],
|
237
|
+
'expense_date': expense['expense_date'],
|
238
|
+
'expense_amount': expense['expense_amount'],
|
239
|
+
'shopping_type': expense['shopping_type'],
|
240
|
+
'description': expense['description'],
|
241
|
+
'merchant': expense['merchant'],
|
242
|
+
'payment_method': expense['payment_method'],
|
243
|
+
'recurring': expense['recurring'],
|
244
|
+
'tags': expense['tags'],
|
245
|
+
'embedding': expense['embedding']
|
246
|
+
})
|
247
|
+
|
248
|
+
# Insert in batches
|
249
|
+
batch_size = 100
|
250
|
+
total_inserted = 0
|
251
|
+
|
252
|
+
for i in range(0, len(data_to_insert), batch_size):
|
253
|
+
batch = data_to_insert[i:i + batch_size]
|
254
|
+
|
255
|
+
# Use pandas to insert the batch
|
256
|
+
df = pd.DataFrame(batch)
|
257
|
+
df.to_sql('expenses', conn, if_exists='append', index=False, method='multi')
|
258
|
+
total_inserted += len(batch)
|
259
|
+
|
260
|
+
return total_inserted
|
261
|
+
|
262
|
+
except Exception as e:
|
263
|
+
print(f"Error saving expenses to database: {e}")
|
264
|
+
return 0
|
265
|
+
|
266
|
+
def clear_expenses(self) -> bool:
|
267
|
+
"""Clear all expenses from the database."""
|
268
|
+
try:
|
269
|
+
from sqlalchemy import text
|
270
|
+
with self.engine.connect() as conn:
|
271
|
+
conn.execute(text("DELETE FROM expenses"))
|
272
|
+
conn.commit()
|
273
|
+
return True
|
274
|
+
except Exception as e:
|
275
|
+
print(f"Error clearing expenses: {e}")
|
276
|
+
return False
|
277
|
+
|
278
|
+
def get_expense_count(self) -> int:
|
279
|
+
"""Get the current number of expenses in the database."""
|
280
|
+
try:
|
281
|
+
from sqlalchemy import text
|
282
|
+
with self.engine.connect() as conn:
|
283
|
+
result = conn.execute(text("SELECT COUNT(*) FROM expenses"))
|
284
|
+
return result.scalar()
|
285
|
+
except Exception as e:
|
286
|
+
print(f"Error getting expense count: {e}")
|
287
|
+
return 0
|
288
|
+
|
289
|
+
def generate_and_save(
|
290
|
+
self,
|
291
|
+
count: int,
|
292
|
+
user_id: Optional[str] = None,
|
293
|
+
clear_existing: bool = False
|
294
|
+
) -> int:
|
295
|
+
"""Generate and save expenses to the database."""
|
296
|
+
if clear_existing:
|
297
|
+
self.clear_expenses()
|
298
|
+
|
299
|
+
expenses = self.generate_expenses(count, user_id)
|
300
|
+
return self.save_expenses_to_database(expenses)
|
301
|
+
|
302
|
+
def create_user_specific_indexes(self) -> bool:
|
303
|
+
"""Create user-specific vector indexes for CockroachDB."""
|
304
|
+
try:
|
305
|
+
with self.engine.connect() as conn:
|
306
|
+
# Create user-specific vector index
|
307
|
+
conn.execute(text("""
|
308
|
+
CREATE INDEX IF NOT EXISTS idx_expenses_user_embedding
|
309
|
+
ON expenses (user_id, embedding)
|
310
|
+
USING ivfflat (embedding vector_cosine_ops)
|
311
|
+
WITH (lists = 100)
|
312
|
+
"""))
|
313
|
+
|
314
|
+
# Create regional index if supported
|
315
|
+
try:
|
316
|
+
conn.execute(text("""
|
317
|
+
CREATE INDEX IF NOT EXISTS idx_expenses_user_embedding_regional
|
318
|
+
ON expenses (user_id, embedding)
|
319
|
+
LOCALITY REGIONAL BY ROW AS region
|
320
|
+
"""))
|
321
|
+
except Exception:
|
322
|
+
# Regional indexing might not be supported in all deployments
|
323
|
+
pass
|
324
|
+
|
325
|
+
conn.commit()
|
326
|
+
return True
|
327
|
+
except Exception as e:
|
328
|
+
print(f"Error creating user-specific indexes: {e}")
|
329
|
+
return False
|