banko-ai-assistant 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- banko_ai/__init__.py +19 -0
- banko_ai/__main__.py +10 -0
- banko_ai/ai_providers/__init__.py +18 -0
- banko_ai/ai_providers/aws_provider.py +337 -0
- banko_ai/ai_providers/base.py +175 -0
- banko_ai/ai_providers/factory.py +84 -0
- banko_ai/ai_providers/gemini_provider.py +340 -0
- banko_ai/ai_providers/openai_provider.py +295 -0
- banko_ai/ai_providers/watsonx_provider.py +591 -0
- banko_ai/cli.py +374 -0
- banko_ai/config/__init__.py +5 -0
- banko_ai/config/settings.py +216 -0
- banko_ai/static/Anallytics.png +0 -0
- banko_ai/static/Graph.png +0 -0
- banko_ai/static/Graph2.png +0 -0
- banko_ai/static/ai-status.png +0 -0
- banko_ai/static/banko-ai-assistant-watsonx.gif +0 -0
- banko_ai/static/banko-db-ops.png +0 -0
- banko_ai/static/banko-response.png +0 -0
- banko_ai/static/cache-stats.png +0 -0
- banko_ai/static/creditcard.png +0 -0
- banko_ai/static/profilepic.jpeg +0 -0
- banko_ai/static/query_watcher.png +0 -0
- banko_ai/static/roach-logo.svg +54 -0
- banko_ai/static/watsonx-icon.svg +1 -0
- banko_ai/templates/base.html +59 -0
- banko_ai/templates/dashboard.html +569 -0
- banko_ai/templates/index.html +1499 -0
- banko_ai/templates/login.html +41 -0
- banko_ai/utils/__init__.py +8 -0
- banko_ai/utils/cache_manager.py +525 -0
- banko_ai/utils/database.py +202 -0
- banko_ai/utils/migration.py +123 -0
- banko_ai/vector_search/__init__.py +18 -0
- banko_ai/vector_search/enrichment.py +278 -0
- banko_ai/vector_search/generator.py +329 -0
- banko_ai/vector_search/search.py +463 -0
- banko_ai/web/__init__.py +13 -0
- banko_ai/web/app.py +668 -0
- banko_ai/web/auth.py +73 -0
- banko_ai_assistant-1.0.0.dist-info/METADATA +414 -0
- banko_ai_assistant-1.0.0.dist-info/RECORD +46 -0
- banko_ai_assistant-1.0.0.dist-info/WHEEL +5 -0
- banko_ai_assistant-1.0.0.dist-info/entry_points.txt +2 -0
- banko_ai_assistant-1.0.0.dist-info/licenses/LICENSE +21 -0
- banko_ai_assistant-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
{% extends "base.html" %}
|
2
|
+
|
3
|
+
{% block title %}Login - Banko AI Assistant{% endblock %}
|
4
|
+
|
5
|
+
{% block content %}
|
6
|
+
<div class="min-h-screen flex items-center justify-center bg-gray-50 py-12 px-4 sm:px-6 lg:px-8">
|
7
|
+
<div class="max-w-md w-full space-y-8">
|
8
|
+
<div>
|
9
|
+
<div class="mx-auto h-12 w-12 flex items-center justify-center">
|
10
|
+
<img src="{{ url_for('static', filename='roach-logo.svg') }}" alt="CockroachDB" class="h-12 w-12">
|
11
|
+
</div>
|
12
|
+
<h2 class="mt-6 text-center text-3xl font-extrabold text-gray-900">
|
13
|
+
Sign in to Banko AI
|
14
|
+
</h2>
|
15
|
+
<p class="mt-2 text-center text-sm text-gray-600">
|
16
|
+
AI-powered expense analysis and RAG system
|
17
|
+
</p>
|
18
|
+
</div>
|
19
|
+
<form class="mt-8 space-y-6" method="POST">
|
20
|
+
<div class="rounded-md shadow-sm -space-y-px">
|
21
|
+
<div>
|
22
|
+
<label for="username" class="sr-only">Username</label>
|
23
|
+
<input id="username" name="username" type="text" required
|
24
|
+
class="appearance-none rounded-none relative block w-full px-3 py-2 border border-gray-300 placeholder-gray-500 text-gray-900 rounded-t-md focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 focus:z-10 sm:text-sm"
|
25
|
+
placeholder="Enter your username">
|
26
|
+
</div>
|
27
|
+
</div>
|
28
|
+
|
29
|
+
<div>
|
30
|
+
<button type="submit"
|
31
|
+
class="group relative w-full flex justify-center py-2 px-4 border border-transparent text-sm font-medium rounded-md text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500">
|
32
|
+
<span class="absolute left-0 inset-y-0 flex items-center pl-3">
|
33
|
+
<i class="fas fa-sign-in-alt text-indigo-500 group-hover:text-indigo-400"></i>
|
34
|
+
</span>
|
35
|
+
Sign in
|
36
|
+
</button>
|
37
|
+
</div>
|
38
|
+
</form>
|
39
|
+
</div>
|
40
|
+
</div>
|
41
|
+
{% endblock %}
|
@@ -0,0 +1,525 @@
|
|
1
|
+
"""
|
2
|
+
Banko AI Token Optimization & Caching System
|
3
|
+
|
4
|
+
This module implements a multi-layer caching strategy to reduce token usage:
|
5
|
+
1. Query Similarity Cache - Cache responses for semantically similar queries
|
6
|
+
2. Embedding Cache - Cache embeddings to avoid regeneration
|
7
|
+
3. Response Fragment Cache - Cache financial insights and recommendations
|
8
|
+
4. Vector Search Cache - Cache vector search results
|
9
|
+
|
10
|
+
Uses CockroachDB for persistent caching with TTL support.
|
11
|
+
"""
|
12
|
+
|
13
|
+
import json
|
14
|
+
import hashlib
|
15
|
+
import time
|
16
|
+
from datetime import datetime, timedelta
|
17
|
+
from typing import Dict, List, Optional, Tuple, Any
|
18
|
+
import numpy as np
|
19
|
+
import decimal
|
20
|
+
import uuid
|
21
|
+
from sentence_transformers import SentenceTransformer
|
22
|
+
from sqlalchemy import create_engine, text, MetaData, Table, Column, String, Integer, DateTime, Float, Boolean
|
23
|
+
from sqlalchemy import Text as TextColumn
|
24
|
+
from sqlalchemy.dialects.postgresql import JSONB
|
25
|
+
import os
|
26
|
+
|
27
|
+
# Database configuration
|
28
|
+
DB_URI = os.getenv('DATABASE_URL', "cockroachdb://root@localhost:26257/banko_ai?sslmode=disable")
|
29
|
+
|
30
|
+
# Apply CockroachDB version parsing workaround
|
31
|
+
from sqlalchemy.dialects.postgresql.base import PGDialect
|
32
|
+
original_get_server_version_info = PGDialect._get_server_version_info
|
33
|
+
|
34
|
+
def patched_get_server_version_info(self, connection):
|
35
|
+
try:
|
36
|
+
return original_get_server_version_info(self, connection)
|
37
|
+
except Exception:
|
38
|
+
return (25, 3, 0)
|
39
|
+
|
40
|
+
PGDialect._get_server_version_info = patched_get_server_version_info
|
41
|
+
|
42
|
+
# Convert cockroachdb:// to postgresql:// for SQLAlchemy compatibility
|
43
|
+
database_url = DB_URI.replace("cockroachdb://", "postgresql://")
|
44
|
+
engine = create_engine(database_url)
|
45
|
+
|
46
|
+
class CustomJSONEncoder(json.JSONEncoder):
|
47
|
+
"""Custom JSON encoder to handle Decimal and UUID objects"""
|
48
|
+
def default(self, obj):
|
49
|
+
if isinstance(obj, decimal.Decimal):
|
50
|
+
return float(obj)
|
51
|
+
elif isinstance(obj, uuid.UUID):
|
52
|
+
return str(obj)
|
53
|
+
return super().default(obj)
|
54
|
+
|
55
|
+
def safe_json_dumps(obj, **kwargs):
|
56
|
+
"""Safe JSON dumps that handles Decimal and UUID objects"""
|
57
|
+
return json.dumps(obj, cls=CustomJSONEncoder, **kwargs)
|
58
|
+
|
59
|
+
class BankoCacheManager:
|
60
|
+
"""
|
61
|
+
Intelligent caching system for Banko AI to optimize token usage.
|
62
|
+
"""
|
63
|
+
|
64
|
+
def __init__(self, similarity_threshold=0.85, cache_ttl_hours=24):
|
65
|
+
"""
|
66
|
+
Initialize the cache manager.
|
67
|
+
|
68
|
+
Args:
|
69
|
+
similarity_threshold: Minimum similarity score to consider queries equivalent
|
70
|
+
cache_ttl_hours: Time-to-live for cached responses in hours
|
71
|
+
"""
|
72
|
+
self.similarity_threshold = similarity_threshold
|
73
|
+
self.cache_ttl_hours = cache_ttl_hours
|
74
|
+
self.model = None # Lazy load the model
|
75
|
+
self._ensure_cache_tables()
|
76
|
+
|
77
|
+
def _get_model(self):
|
78
|
+
"""Lazy load the SentenceTransformer model."""
|
79
|
+
if self.model is None:
|
80
|
+
try:
|
81
|
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
82
|
+
except Exception as e:
|
83
|
+
print(f"Warning: Could not load SentenceTransformer model: {e}")
|
84
|
+
print("Cache functionality will be limited.")
|
85
|
+
return None
|
86
|
+
return self.model
|
87
|
+
|
88
|
+
def _ensure_cache_tables(self):
|
89
|
+
"""Create cache tables if they don't exist."""
|
90
|
+
create_tables_sql = text("""
|
91
|
+
-- Query cache for similar questions and responses
|
92
|
+
CREATE TABLE IF NOT EXISTS query_cache (
|
93
|
+
cache_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
94
|
+
query_hash STRING UNIQUE NOT NULL,
|
95
|
+
query_text STRING NOT NULL,
|
96
|
+
query_embedding VECTOR(384),
|
97
|
+
response_text TEXT NOT NULL,
|
98
|
+
response_tokens INTEGER DEFAULT 0,
|
99
|
+
prompt_tokens INTEGER DEFAULT 0,
|
100
|
+
ai_service STRING NOT NULL,
|
101
|
+
expense_data_hash STRING,
|
102
|
+
created_at TIMESTAMP DEFAULT now(),
|
103
|
+
expires_at TIMESTAMP,
|
104
|
+
hit_count INTEGER DEFAULT 0,
|
105
|
+
last_accessed TIMESTAMP DEFAULT now(),
|
106
|
+
INDEX idx_query_hash (query_hash),
|
107
|
+
INDEX idx_expires_at (expires_at)
|
108
|
+
);
|
109
|
+
|
110
|
+
-- Embedding cache to avoid regenerating embeddings
|
111
|
+
CREATE TABLE IF NOT EXISTS embedding_cache (
|
112
|
+
embedding_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
113
|
+
text_hash STRING UNIQUE NOT NULL,
|
114
|
+
text_content STRING NOT NULL,
|
115
|
+
embedding VECTOR(384) NOT NULL,
|
116
|
+
model_name STRING NOT NULL DEFAULT 'all-MiniLM-L6-v2',
|
117
|
+
created_at TIMESTAMP DEFAULT now(),
|
118
|
+
access_count INTEGER DEFAULT 0,
|
119
|
+
INDEX idx_text_hash (text_hash)
|
120
|
+
);
|
121
|
+
|
122
|
+
-- Financial insights cache for expense data combinations
|
123
|
+
CREATE TABLE IF NOT EXISTS insights_cache (
|
124
|
+
insight_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
125
|
+
expense_data_hash STRING UNIQUE NOT NULL,
|
126
|
+
total_amount DECIMAL(12,2),
|
127
|
+
num_transactions INTEGER,
|
128
|
+
avg_transaction DECIMAL(10,2),
|
129
|
+
top_categories JSONB,
|
130
|
+
insights_json JSONB NOT NULL,
|
131
|
+
created_at TIMESTAMP DEFAULT now(),
|
132
|
+
expires_at TIMESTAMP,
|
133
|
+
INDEX idx_expense_hash (expense_data_hash)
|
134
|
+
);
|
135
|
+
|
136
|
+
-- Vector search results cache
|
137
|
+
CREATE TABLE IF NOT EXISTS vector_search_cache (
|
138
|
+
search_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
139
|
+
query_embedding_hash STRING UNIQUE NOT NULL,
|
140
|
+
search_results JSONB NOT NULL,
|
141
|
+
result_count INTEGER,
|
142
|
+
similarity_threshold FLOAT,
|
143
|
+
created_at TIMESTAMP DEFAULT now(),
|
144
|
+
expires_at TIMESTAMP,
|
145
|
+
access_count INTEGER DEFAULT 0,
|
146
|
+
INDEX idx_embedding_hash (query_embedding_hash),
|
147
|
+
INDEX idx_expires_at (expires_at)
|
148
|
+
);
|
149
|
+
|
150
|
+
-- Cache statistics for monitoring
|
151
|
+
CREATE TABLE IF NOT EXISTS cache_stats (
|
152
|
+
stat_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
153
|
+
cache_type STRING NOT NULL,
|
154
|
+
operation STRING NOT NULL, -- 'hit', 'miss', 'write'
|
155
|
+
tokens_saved INTEGER DEFAULT 0,
|
156
|
+
timestamp TIMESTAMP DEFAULT now(),
|
157
|
+
details JSONB
|
158
|
+
);
|
159
|
+
""")
|
160
|
+
|
161
|
+
try:
|
162
|
+
with engine.connect() as conn:
|
163
|
+
conn.execute(create_tables_sql)
|
164
|
+
conn.commit()
|
165
|
+
print("✅ Cache tables initialized successfully")
|
166
|
+
except Exception as e:
|
167
|
+
print(f"⚠️ Error creating cache tables: {e}")
|
168
|
+
|
169
|
+
def _generate_hash(self, content: str) -> str:
|
170
|
+
"""Generate a consistent hash for content."""
|
171
|
+
return hashlib.md5(content.encode('utf-8')).hexdigest()
|
172
|
+
|
173
|
+
def _get_embedding_with_cache(self, input_text: str) -> np.ndarray:
|
174
|
+
"""Get embedding for text, using cache when possible."""
|
175
|
+
text_hash = self._generate_hash(input_text)
|
176
|
+
|
177
|
+
# Try to get from cache first
|
178
|
+
cache_query = text("""
|
179
|
+
SELECT embedding, access_count
|
180
|
+
FROM embedding_cache
|
181
|
+
WHERE text_hash = :text_hash
|
182
|
+
""")
|
183
|
+
|
184
|
+
try:
|
185
|
+
with engine.connect() as conn:
|
186
|
+
result = conn.execute(cache_query, {'text_hash': text_hash})
|
187
|
+
row = result.fetchone()
|
188
|
+
|
189
|
+
if row:
|
190
|
+
# Cache hit - update access count
|
191
|
+
update_query = text("""
|
192
|
+
UPDATE embedding_cache
|
193
|
+
SET access_count = access_count + 1
|
194
|
+
WHERE text_hash = :text_hash
|
195
|
+
""")
|
196
|
+
conn.execute(update_query, {'text_hash': text_hash})
|
197
|
+
conn.commit()
|
198
|
+
|
199
|
+
self._log_cache_stat('embedding', 'hit', tokens_saved=10)
|
200
|
+
return np.array(json.loads(row.embedding))
|
201
|
+
|
202
|
+
except Exception as e:
|
203
|
+
print(f"⚠️ Error reading embedding cache: {e}")
|
204
|
+
|
205
|
+
# Cache miss - generate embedding and store
|
206
|
+
model = self._get_model()
|
207
|
+
if model is None:
|
208
|
+
return None
|
209
|
+
embedding = model.encode(input_text)
|
210
|
+
embedding_json = json.dumps(embedding.tolist())
|
211
|
+
|
212
|
+
try:
|
213
|
+
with engine.connect() as conn:
|
214
|
+
insert_query = text("""
|
215
|
+
INSERT INTO embedding_cache (text_hash, text_content, embedding, access_count)
|
216
|
+
VALUES (:text_hash, :text_content, :embedding, 1)
|
217
|
+
ON CONFLICT (text_hash) DO UPDATE SET access_count = embedding_cache.access_count + 1
|
218
|
+
""")
|
219
|
+
conn.execute(insert_query, {
|
220
|
+
'text_hash': text_hash,
|
221
|
+
'text_content': input_text[:500], # Truncate for storage
|
222
|
+
'embedding': embedding_json
|
223
|
+
})
|
224
|
+
conn.commit()
|
225
|
+
self._log_cache_stat('embedding', 'miss')
|
226
|
+
|
227
|
+
except Exception as e:
|
228
|
+
print(f"⚠️ Error caching embedding: {e}")
|
229
|
+
|
230
|
+
return embedding
|
231
|
+
|
232
|
+
def get_cached_response(self, query: str, expense_data: List[Dict], ai_service: str) -> Optional[str]:
|
233
|
+
"""
|
234
|
+
Check if we have a cached response for a similar query.
|
235
|
+
|
236
|
+
Args:
|
237
|
+
query: User query text
|
238
|
+
expense_data: Current expense data context
|
239
|
+
ai_service: AI service being used (watsonx, bedrock)
|
240
|
+
|
241
|
+
Returns:
|
242
|
+
Cached response text if found, None otherwise
|
243
|
+
"""
|
244
|
+
query_embedding = self._get_embedding_with_cache(query)
|
245
|
+
expense_hash = self._generate_hash(safe_json_dumps(expense_data, sort_keys=True))
|
246
|
+
|
247
|
+
# Find similar cached queries
|
248
|
+
similarity_query = text("""
|
249
|
+
SELECT cache_id, query_text, response_text, response_tokens, prompt_tokens,
|
250
|
+
query_embedding <-> :query_embedding as similarity_score,
|
251
|
+
hit_count, expires_at
|
252
|
+
FROM query_cache
|
253
|
+
WHERE ai_service = :ai_service
|
254
|
+
AND (expense_data_hash = :expense_hash OR expense_data_hash IS NULL)
|
255
|
+
AND expires_at > now()
|
256
|
+
ORDER BY query_embedding <-> :query_embedding
|
257
|
+
LIMIT 5
|
258
|
+
""")
|
259
|
+
|
260
|
+
try:
|
261
|
+
with engine.connect() as conn:
|
262
|
+
result = conn.execute(similarity_query, {
|
263
|
+
'query_embedding': json.dumps(query_embedding.tolist()),
|
264
|
+
'ai_service': ai_service,
|
265
|
+
'expense_hash': expense_hash
|
266
|
+
})
|
267
|
+
|
268
|
+
for row in result:
|
269
|
+
similarity_score = 1 - row.similarity_score # Convert distance to similarity
|
270
|
+
|
271
|
+
if similarity_score >= self.similarity_threshold:
|
272
|
+
# Cache hit! Update statistics
|
273
|
+
update_query = text("""
|
274
|
+
UPDATE query_cache
|
275
|
+
SET hit_count = hit_count + 1, last_accessed = now()
|
276
|
+
WHERE cache_id = :cache_id
|
277
|
+
""")
|
278
|
+
conn.execute(update_query, {'cache_id': row.cache_id})
|
279
|
+
conn.commit()
|
280
|
+
|
281
|
+
tokens_saved = (row.response_tokens or 500) + (row.prompt_tokens or 400)
|
282
|
+
self._log_cache_stat('query', 'hit', tokens_saved=tokens_saved, details={
|
283
|
+
'original_query': row.query_text,
|
284
|
+
'new_query': query,
|
285
|
+
'similarity_score': similarity_score
|
286
|
+
})
|
287
|
+
|
288
|
+
print(f"🎯 Cache HIT! Similarity: {similarity_score:.3f} | Tokens saved: {tokens_saved}")
|
289
|
+
print(f" Original: '{row.query_text[:50]}...'")
|
290
|
+
print(f" Current: '{query[:50]}...'")
|
291
|
+
|
292
|
+
return row.response_text
|
293
|
+
|
294
|
+
except Exception as e:
|
295
|
+
print(f"⚠️ Error checking query cache: {e}")
|
296
|
+
|
297
|
+
self._log_cache_stat('query', 'miss')
|
298
|
+
return None
|
299
|
+
|
300
|
+
def cache_response(self, query: str, response: str, expense_data: List[Dict],
|
301
|
+
ai_service: str, prompt_tokens: int = 0, response_tokens: int = 0):
|
302
|
+
"""
|
303
|
+
Cache a query response for future use.
|
304
|
+
|
305
|
+
Args:
|
306
|
+
query: Original user query
|
307
|
+
response: AI response
|
308
|
+
expense_data: Expense data context
|
309
|
+
ai_service: AI service used
|
310
|
+
prompt_tokens: Number of prompt tokens used
|
311
|
+
response_tokens: Number of response tokens generated
|
312
|
+
"""
|
313
|
+
query_hash = self._generate_hash(query)
|
314
|
+
query_embedding = self._get_embedding_with_cache(query)
|
315
|
+
expense_hash = self._generate_hash(safe_json_dumps(expense_data, sort_keys=True))
|
316
|
+
expires_at = datetime.utcnow() + timedelta(hours=self.cache_ttl_hours)
|
317
|
+
|
318
|
+
try:
|
319
|
+
with engine.connect() as conn:
|
320
|
+
insert_query = text("""
|
321
|
+
INSERT INTO query_cache (
|
322
|
+
query_hash, query_text, query_embedding, response_text,
|
323
|
+
response_tokens, prompt_tokens, ai_service, expense_data_hash,
|
324
|
+
expires_at
|
325
|
+
) VALUES (
|
326
|
+
:query_hash, :query_text, :query_embedding, :response_text,
|
327
|
+
:response_tokens, :prompt_tokens, :ai_service, :expense_hash,
|
328
|
+
:expires_at
|
329
|
+
)
|
330
|
+
ON CONFLICT (query_hash) DO UPDATE SET
|
331
|
+
response_text = EXCLUDED.response_text,
|
332
|
+
response_tokens = EXCLUDED.response_tokens,
|
333
|
+
prompt_tokens = EXCLUDED.prompt_tokens,
|
334
|
+
expires_at = EXCLUDED.expires_at,
|
335
|
+
hit_count = 0,
|
336
|
+
last_accessed = now()
|
337
|
+
""")
|
338
|
+
|
339
|
+
conn.execute(insert_query, {
|
340
|
+
'query_hash': query_hash,
|
341
|
+
'query_text': query,
|
342
|
+
'query_embedding': json.dumps(query_embedding.tolist()),
|
343
|
+
'response_text': response,
|
344
|
+
'response_tokens': response_tokens,
|
345
|
+
'prompt_tokens': prompt_tokens,
|
346
|
+
'ai_service': ai_service,
|
347
|
+
'expense_hash': expense_hash,
|
348
|
+
'expires_at': expires_at
|
349
|
+
})
|
350
|
+
conn.commit()
|
351
|
+
|
352
|
+
self._log_cache_stat('query', 'write', details={
|
353
|
+
'query_length': len(query),
|
354
|
+
'response_length': len(response)
|
355
|
+
})
|
356
|
+
|
357
|
+
except Exception as e:
|
358
|
+
print(f"⚠️ Error caching response: {e}")
|
359
|
+
|
360
|
+
def get_cached_vector_search(self, query_embedding: np.ndarray, limit: int = 5) -> Optional[List[Dict]]:
|
361
|
+
"""Get cached vector search results."""
|
362
|
+
embedding_hash = self._generate_hash(json.dumps(query_embedding.tolist()))
|
363
|
+
|
364
|
+
cache_query = text("""
|
365
|
+
SELECT search_results, access_count
|
366
|
+
FROM vector_search_cache
|
367
|
+
WHERE query_embedding_hash = :embedding_hash
|
368
|
+
AND expires_at > now()
|
369
|
+
AND result_count >= :limit
|
370
|
+
ORDER BY created_at DESC
|
371
|
+
LIMIT 1
|
372
|
+
""")
|
373
|
+
|
374
|
+
try:
|
375
|
+
with engine.connect() as conn:
|
376
|
+
result = conn.execute(cache_query, {
|
377
|
+
'embedding_hash': embedding_hash,
|
378
|
+
'limit': limit
|
379
|
+
})
|
380
|
+
row = result.fetchone()
|
381
|
+
|
382
|
+
if row:
|
383
|
+
# Update access count
|
384
|
+
update_query = text("""
|
385
|
+
UPDATE vector_search_cache
|
386
|
+
SET access_count = access_count + 1
|
387
|
+
WHERE query_embedding_hash = :embedding_hash
|
388
|
+
""")
|
389
|
+
conn.execute(update_query, {'embedding_hash': embedding_hash})
|
390
|
+
conn.commit()
|
391
|
+
|
392
|
+
self._log_cache_stat('vector_search', 'hit', tokens_saved=50)
|
393
|
+
# search_results is already a list from JSONB, no need to parse
|
394
|
+
if isinstance(row.search_results, list):
|
395
|
+
return row.search_results[:limit]
|
396
|
+
else:
|
397
|
+
# Fallback: try to parse if it's a string
|
398
|
+
return json.loads(row.search_results)[:limit]
|
399
|
+
|
400
|
+
except Exception as e:
|
401
|
+
print(f"⚠️ Error reading vector search cache: {e}")
|
402
|
+
|
403
|
+
self._log_cache_stat('vector_search', 'miss')
|
404
|
+
return None
|
405
|
+
|
406
|
+
def cache_vector_search_results(self, query_embedding: np.ndarray, results: List[Dict]):
|
407
|
+
"""Cache vector search results."""
|
408
|
+
embedding_hash = self._generate_hash(json.dumps(query_embedding.tolist()))
|
409
|
+
expires_at = datetime.utcnow() + timedelta(hours=self.cache_ttl_hours)
|
410
|
+
|
411
|
+
try:
|
412
|
+
with engine.connect() as conn:
|
413
|
+
insert_query = text("""
|
414
|
+
INSERT INTO vector_search_cache (
|
415
|
+
query_embedding_hash, search_results, result_count,
|
416
|
+
similarity_threshold, expires_at
|
417
|
+
) VALUES (
|
418
|
+
:embedding_hash, :results, :count, :threshold, :expires_at
|
419
|
+
)
|
420
|
+
ON CONFLICT (query_embedding_hash) DO UPDATE SET
|
421
|
+
search_results = EXCLUDED.search_results,
|
422
|
+
result_count = EXCLUDED.result_count,
|
423
|
+
expires_at = EXCLUDED.expires_at,
|
424
|
+
access_count = 0
|
425
|
+
""")
|
426
|
+
|
427
|
+
conn.execute(insert_query, {
|
428
|
+
'embedding_hash': embedding_hash,
|
429
|
+
'results': safe_json_dumps(results),
|
430
|
+
'count': len(results),
|
431
|
+
'threshold': self.similarity_threshold,
|
432
|
+
'expires_at': expires_at
|
433
|
+
})
|
434
|
+
conn.commit()
|
435
|
+
|
436
|
+
self._log_cache_stat('vector_search', 'write')
|
437
|
+
|
438
|
+
except Exception as e:
|
439
|
+
print(f"⚠️ Error caching vector search: {e}")
|
440
|
+
|
441
|
+
def _log_cache_stat(self, cache_type: str, operation: str, tokens_saved: int = 0, details: Dict = None):
|
442
|
+
"""Log cache statistics for monitoring."""
|
443
|
+
try:
|
444
|
+
with engine.connect() as conn:
|
445
|
+
insert_query = text("""
|
446
|
+
INSERT INTO cache_stats (cache_type, operation, tokens_saved, details)
|
447
|
+
VALUES (:cache_type, :operation, :tokens_saved, :details)
|
448
|
+
""")
|
449
|
+
conn.execute(insert_query, {
|
450
|
+
'cache_type': cache_type,
|
451
|
+
'operation': operation,
|
452
|
+
'tokens_saved': tokens_saved,
|
453
|
+
'details': json.dumps(details) if details else None
|
454
|
+
})
|
455
|
+
conn.commit()
|
456
|
+
except Exception as e:
|
457
|
+
print(f"⚠️ Error logging cache stats: {e}")
|
458
|
+
|
459
|
+
def get_cache_stats(self, hours: int = 24) -> Dict:
|
460
|
+
"""Get cache performance statistics."""
|
461
|
+
stats_query = text("""
|
462
|
+
WITH cache_summary AS (
|
463
|
+
SELECT
|
464
|
+
cache_type,
|
465
|
+
operation,
|
466
|
+
COUNT(*) as count,
|
467
|
+
SUM(tokens_saved) as total_tokens_saved
|
468
|
+
FROM cache_stats
|
469
|
+
WHERE timestamp >= now() - INTERVAL ':hours hours'
|
470
|
+
GROUP BY cache_type, operation
|
471
|
+
)
|
472
|
+
SELECT
|
473
|
+
cache_type,
|
474
|
+
SUM(CASE WHEN operation = 'hit' THEN count ELSE 0 END) as hits,
|
475
|
+
SUM(CASE WHEN operation = 'miss' THEN count ELSE 0 END) as misses,
|
476
|
+
SUM(CASE WHEN operation = 'write' THEN count ELSE 0 END) as writes,
|
477
|
+
SUM(total_tokens_saved) as tokens_saved
|
478
|
+
FROM cache_summary
|
479
|
+
GROUP BY cache_type
|
480
|
+
ORDER BY cache_type
|
481
|
+
""")
|
482
|
+
|
483
|
+
try:
|
484
|
+
with engine.connect() as conn:
|
485
|
+
result = conn.execute(stats_query, {'hours': hours})
|
486
|
+
stats = {}
|
487
|
+
total_tokens_saved = 0
|
488
|
+
|
489
|
+
for row in result:
|
490
|
+
hit_rate = row.hits / (row.hits + row.misses) if (row.hits + row.misses) > 0 else 0
|
491
|
+
stats[row.cache_type] = {
|
492
|
+
'hits': row.hits,
|
493
|
+
'misses': row.misses,
|
494
|
+
'writes': row.writes,
|
495
|
+
'hit_rate': hit_rate,
|
496
|
+
'tokens_saved': row.tokens_saved
|
497
|
+
}
|
498
|
+
total_tokens_saved += float(row.tokens_saved or 0)
|
499
|
+
|
500
|
+
stats['total_tokens_saved'] = total_tokens_saved
|
501
|
+
return stats
|
502
|
+
|
503
|
+
except Exception as e:
|
504
|
+
print(f"⚠️ Error getting cache stats: {e}")
|
505
|
+
return {}
|
506
|
+
|
507
|
+
def cleanup_expired_cache(self):
|
508
|
+
"""Remove expired cache entries."""
|
509
|
+
cleanup_queries = [
|
510
|
+
"DELETE FROM query_cache WHERE expires_at < now()",
|
511
|
+
"DELETE FROM insights_cache WHERE expires_at < now()",
|
512
|
+
"DELETE FROM vector_search_cache WHERE expires_at < now()"
|
513
|
+
]
|
514
|
+
|
515
|
+
try:
|
516
|
+
with engine.connect() as conn:
|
517
|
+
for query in cleanup_queries:
|
518
|
+
result = conn.execute(text(query))
|
519
|
+
print(f"🧹 Cleaned up {result.rowcount} expired cache entries")
|
520
|
+
conn.commit()
|
521
|
+
except Exception as e:
|
522
|
+
print(f"⚠️ Error cleaning up cache: {e}")
|
523
|
+
|
524
|
+
# Global cache manager instance
|
525
|
+
cache_manager = BankoCacheManager()
|