marqeta-diva-mcp 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- marqeta_diva_mcp/__init__.py +3 -0
- marqeta_diva_mcp/__main__.py +6 -0
- marqeta_diva_mcp/client.py +471 -0
- marqeta_diva_mcp/embeddings.py +131 -0
- marqeta_diva_mcp/local_storage.py +348 -0
- marqeta_diva_mcp/rag_tools.py +366 -0
- marqeta_diva_mcp/server.py +940 -0
- marqeta_diva_mcp/vector_store.py +274 -0
- marqeta_diva_mcp-0.2.0.dist-info/METADATA +515 -0
- marqeta_diva_mcp-0.2.0.dist-info/RECORD +13 -0
- marqeta_diva_mcp-0.2.0.dist-info/WHEEL +4 -0
- marqeta_diva_mcp-0.2.0.dist-info/entry_points.txt +2 -0
- marqeta_diva_mcp-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
"""RAG tool implementations for semantic search and transaction analysis."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Any, Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
from .client import DiVAClient
|
|
7
|
+
from .embeddings import get_embedder
|
|
8
|
+
from .vector_store import get_vector_store
|
|
9
|
+
from .local_storage import get_storage
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def sync_transactions(
|
|
13
|
+
diva_client: DiVAClient,
|
|
14
|
+
view_name: str = "authorizations",
|
|
15
|
+
aggregation: str = "detail",
|
|
16
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
17
|
+
max_records: Optional[int] = None,
|
|
18
|
+
**kwargs: Any
|
|
19
|
+
) -> Dict[str, Any]:
|
|
20
|
+
"""
|
|
21
|
+
Fetch transactions from DiVA and store in BOTH SQLite (full data) and ChromaDB (embeddings).
|
|
22
|
+
This is the main function to populate local storage.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
diva_client: DiVA API client
|
|
26
|
+
view_name: DiVA view to query (e.g., 'authorizations')
|
|
27
|
+
aggregation: Aggregation level
|
|
28
|
+
filters: Transaction filters. For date filtering, use the actual date field name
|
|
29
|
+
with operators. Example: {"transaction_timestamp": ">=2023-10-20"}
|
|
30
|
+
max_records: Maximum number of records to sync (up to 10,000)
|
|
31
|
+
**kwargs: Additional DiVA query parameters
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Sync statistics
|
|
35
|
+
"""
|
|
36
|
+
print(f"[Sync] Starting transaction sync from {view_name}...", file=sys.stderr)
|
|
37
|
+
|
|
38
|
+
# Get all components
|
|
39
|
+
embedder = get_embedder()
|
|
40
|
+
vector_store = get_vector_store()
|
|
41
|
+
storage = get_storage()
|
|
42
|
+
|
|
43
|
+
# Fetch transactions from DiVA
|
|
44
|
+
query_params = {
|
|
45
|
+
"filters": filters,
|
|
46
|
+
**kwargs
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Remove None values
|
|
50
|
+
query_params = {k: v for k, v in query_params.items() if v is not None}
|
|
51
|
+
|
|
52
|
+
# Set count limit (DiVA API limit: 10,000 for JSON responses)
|
|
53
|
+
if max_records:
|
|
54
|
+
query_params["count"] = min(max_records, 10000)
|
|
55
|
+
else:
|
|
56
|
+
query_params["count"] = 10000 # Max per DiVA API
|
|
57
|
+
|
|
58
|
+
all_transactions = []
|
|
59
|
+
|
|
60
|
+
print(f"[Sync] Fetching transactions (count={query_params['count']})...", file=sys.stderr)
|
|
61
|
+
result = diva_client.get_view(view_name, aggregation, **query_params)
|
|
62
|
+
|
|
63
|
+
transactions = result.get("records", [])
|
|
64
|
+
|
|
65
|
+
if transactions:
|
|
66
|
+
all_transactions.extend(transactions)
|
|
67
|
+
|
|
68
|
+
# Truncate if we got more than max_records
|
|
69
|
+
if max_records and len(all_transactions) > max_records:
|
|
70
|
+
all_transactions = all_transactions[:max_records]
|
|
71
|
+
|
|
72
|
+
# Warn if there are more records available
|
|
73
|
+
if result.get("is_more", False):
|
|
74
|
+
print(f"[Sync] Warning: More records available but DiVA API does not support offset pagination.", file=sys.stderr)
|
|
75
|
+
print(f"[Sync] Retrieved {len(all_transactions)} records. To get more data, use narrower date ranges or filters.", file=sys.stderr)
|
|
76
|
+
|
|
77
|
+
if not all_transactions:
|
|
78
|
+
return {
|
|
79
|
+
"success": False,
|
|
80
|
+
"message": "No transactions found matching the criteria",
|
|
81
|
+
"synced_count": 0
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
print(f"[Sync] Fetched {len(all_transactions)} transactions.", file=sys.stderr)
|
|
85
|
+
|
|
86
|
+
# 1. Store full data in SQLite
|
|
87
|
+
print(f"[Sync] Storing full data in SQLite...", file=sys.stderr)
|
|
88
|
+
storage_count = storage.add_transactions(all_transactions, view_name, aggregation)
|
|
89
|
+
|
|
90
|
+
# 2. Generate embeddings and store in ChromaDB
|
|
91
|
+
print(f"[Sync] Generating embeddings...", file=sys.stderr)
|
|
92
|
+
embeddings = embedder.embed_transactions_batch(all_transactions)
|
|
93
|
+
|
|
94
|
+
print(f"[Sync] Storing embeddings in ChromaDB...", file=sys.stderr)
|
|
95
|
+
vector_count = vector_store.add_transactions(all_transactions, embeddings)
|
|
96
|
+
|
|
97
|
+
# Get stats
|
|
98
|
+
storage_stats = storage.get_stats()
|
|
99
|
+
vector_stats = vector_store.get_stats()
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
"success": True,
|
|
103
|
+
"message": f"Successfully synced {storage_count} transactions",
|
|
104
|
+
"synced_count": storage_count,
|
|
105
|
+
"storage": {
|
|
106
|
+
"total_transactions": storage_stats["total_transactions"],
|
|
107
|
+
"database_size_mb": storage_stats["database_size_mb"]
|
|
108
|
+
},
|
|
109
|
+
"vector_store": {
|
|
110
|
+
"total_indexed": vector_stats["count"]
|
|
111
|
+
},
|
|
112
|
+
"view_name": view_name,
|
|
113
|
+
"aggregation": aggregation
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# Keep old name for backward compatibility, but point to new function
|
|
118
|
+
def index_transactions(*args, **kwargs):
|
|
119
|
+
"""Alias for sync_transactions for backward compatibility."""
|
|
120
|
+
return sync_transactions(*args, **kwargs)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def semantic_search_transactions(
|
|
124
|
+
diva_client: DiVAClient,
|
|
125
|
+
query: str,
|
|
126
|
+
n_results: int = 10,
|
|
127
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
128
|
+
enrich: bool = True
|
|
129
|
+
) -> Dict[str, Any]:
|
|
130
|
+
"""
|
|
131
|
+
Search for transactions using natural language semantic search.
|
|
132
|
+
Returns FULL transaction data from local SQLite storage (no token limits!).
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
diva_client: DiVA API client (not used if enrich=True, kept for compatibility)
|
|
136
|
+
query: Natural language search query (e.g., "coffee shop purchases")
|
|
137
|
+
n_results: Number of results to return
|
|
138
|
+
filters: Metadata filters (e.g., {"transaction_amount": {"$gt": 10}})
|
|
139
|
+
enrich: If True, fetch full transaction details from local SQLite storage
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Search results with similarity scores and full transaction data
|
|
143
|
+
"""
|
|
144
|
+
print(f"[Search] Semantic search: '{query}'", file=sys.stderr)
|
|
145
|
+
|
|
146
|
+
# Get components
|
|
147
|
+
embedder = get_embedder()
|
|
148
|
+
vector_store = get_vector_store()
|
|
149
|
+
storage = get_storage()
|
|
150
|
+
|
|
151
|
+
# Generate query embedding
|
|
152
|
+
query_embedding = embedder.embed_query(query)
|
|
153
|
+
|
|
154
|
+
# Search vector store (gets transaction IDs + similarity scores)
|
|
155
|
+
results = vector_store.search(
|
|
156
|
+
query_embedding=query_embedding,
|
|
157
|
+
n_results=n_results,
|
|
158
|
+
where=filters
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if results["count"] == 0:
|
|
162
|
+
return {
|
|
163
|
+
"success": True,
|
|
164
|
+
"query": query,
|
|
165
|
+
"count": 0,
|
|
166
|
+
"transactions": [],
|
|
167
|
+
"message": "No matching transactions found. Try syncing more transactions first."
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
# Enrich with full transaction data from LOCAL STORAGE (not DiVA API!)
|
|
171
|
+
if enrich and results["transactions"]:
|
|
172
|
+
transaction_tokens = [txn["transaction_token"] for txn in results["transactions"]]
|
|
173
|
+
|
|
174
|
+
print(f"[Search] Fetching full data for {len(transaction_tokens)} transactions from local storage...", file=sys.stderr)
|
|
175
|
+
|
|
176
|
+
# Fetch from SQLite (fast, no API calls, no token limits!)
|
|
177
|
+
full_transactions = storage.get_transactions(transaction_tokens)
|
|
178
|
+
|
|
179
|
+
# Create a map of token -> full transaction
|
|
180
|
+
full_txns_map = {
|
|
181
|
+
txn["transaction_token"]: txn
|
|
182
|
+
for txn in full_transactions
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
# Enrich results with full transaction data
|
|
186
|
+
for result_txn in results["transactions"]:
|
|
187
|
+
token = result_txn["transaction_token"]
|
|
188
|
+
if token in full_txns_map:
|
|
189
|
+
result_txn["full_transaction"] = full_txns_map[token]
|
|
190
|
+
else:
|
|
191
|
+
print(f"[Search] Warning: Transaction {token} not found in local storage", file=sys.stderr)
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
"success": True,
|
|
195
|
+
"query": query,
|
|
196
|
+
"count": results["count"],
|
|
197
|
+
"transactions": results["transactions"],
|
|
198
|
+
"note": "Full transaction data retrieved from local storage (no API calls)"
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def find_similar_transactions(
|
|
203
|
+
diva_client: DiVAClient,
|
|
204
|
+
transaction_token: str,
|
|
205
|
+
n_results: int = 10,
|
|
206
|
+
filters: Optional[Dict[str, Any]] = None
|
|
207
|
+
) -> Dict[str, Any]:
|
|
208
|
+
"""
|
|
209
|
+
Find transactions similar to a given transaction.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
diva_client: DiVA API client
|
|
213
|
+
transaction_token: Token of the reference transaction
|
|
214
|
+
n_results: Number of similar transactions to return
|
|
215
|
+
filters: Additional metadata filters
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Similar transactions with similarity scores
|
|
219
|
+
"""
|
|
220
|
+
print(f"[RAG] Finding similar transactions to {transaction_token}", file=sys.stderr)
|
|
221
|
+
|
|
222
|
+
# Get vector store
|
|
223
|
+
vector_store = get_vector_store()
|
|
224
|
+
|
|
225
|
+
# Get the reference transaction's embedding
|
|
226
|
+
ref_txn = vector_store.get_by_id(transaction_token)
|
|
227
|
+
|
|
228
|
+
if not ref_txn or not ref_txn.get("embedding"):
|
|
229
|
+
return {
|
|
230
|
+
"success": False,
|
|
231
|
+
"message": f"Transaction {transaction_token} not found in vector store. Index it first.",
|
|
232
|
+
"transaction_token": transaction_token
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
# Search for similar transactions
|
|
236
|
+
results = vector_store.search(
|
|
237
|
+
query_embedding=ref_txn["embedding"],
|
|
238
|
+
n_results=n_results + 1, # +1 because the reference itself will be included
|
|
239
|
+
where=filters
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Filter out the reference transaction itself
|
|
243
|
+
similar_txns = [
|
|
244
|
+
txn for txn in results["transactions"]
|
|
245
|
+
if txn["transaction_token"] != transaction_token
|
|
246
|
+
][:n_results]
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
"success": True,
|
|
250
|
+
"reference_transaction": transaction_token,
|
|
251
|
+
"reference_metadata": ref_txn.get("metadata", {}),
|
|
252
|
+
"count": len(similar_txns),
|
|
253
|
+
"similar_transactions": similar_txns
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def query_local_transactions(
|
|
258
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
259
|
+
limit: int = 100,
|
|
260
|
+
offset: int = 0,
|
|
261
|
+
order_by: str = "created_time DESC"
|
|
262
|
+
) -> Dict[str, Any]:
|
|
263
|
+
"""
|
|
264
|
+
Query transactions directly from local SQLite storage.
|
|
265
|
+
No API calls, no token limits, full transaction data.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
filters: Dictionary of filters (e.g., {"merchant_name": "Starbucks", "transaction_amount": {">": 10}})
|
|
269
|
+
limit: Maximum number of results (default: 100)
|
|
270
|
+
offset: Offset for pagination (default: 0)
|
|
271
|
+
order_by: SQL ORDER BY clause (default: "created_time DESC")
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Query results with full transaction data
|
|
275
|
+
"""
|
|
276
|
+
print(f"[Query] Querying local storage with filters: {filters}", file=sys.stderr)
|
|
277
|
+
|
|
278
|
+
storage = get_storage()
|
|
279
|
+
|
|
280
|
+
results = storage.query_transactions(
|
|
281
|
+
filters=filters,
|
|
282
|
+
limit=limit,
|
|
283
|
+
offset=offset,
|
|
284
|
+
order_by=order_by
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
return {
|
|
288
|
+
"success": True,
|
|
289
|
+
**results,
|
|
290
|
+
"note": "Data retrieved from local SQLite storage (no API calls, no token limits)"
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def get_storage_stats() -> Dict[str, Any]:
|
|
295
|
+
"""
|
|
296
|
+
Get comprehensive statistics about local storage (SQLite + ChromaDB).
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Storage statistics including counts, sizes, and status
|
|
300
|
+
"""
|
|
301
|
+
storage = get_storage()
|
|
302
|
+
vector_store = get_vector_store()
|
|
303
|
+
embedder = get_embedder()
|
|
304
|
+
|
|
305
|
+
storage_stats = storage.get_stats()
|
|
306
|
+
vector_stats = vector_store.get_stats()
|
|
307
|
+
|
|
308
|
+
return {
|
|
309
|
+
"success": True,
|
|
310
|
+
"sqlite_storage": storage_stats,
|
|
311
|
+
"vector_store": vector_stats,
|
|
312
|
+
"embedding_model": {
|
|
313
|
+
"name": embedder.model_name,
|
|
314
|
+
"dimension": embedder.embedding_dim
|
|
315
|
+
},
|
|
316
|
+
"note": "Local storage eliminates token limits and API dependency"
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
# Keep old name for backward compatibility
|
|
321
|
+
def get_index_stats():
|
|
322
|
+
"""Alias for get_storage_stats for backward compatibility."""
|
|
323
|
+
return get_storage_stats()
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def clear_local_storage(clear_sqlite: bool = True, clear_vector_store: bool = True) -> Dict[str, Any]:
|
|
327
|
+
"""
|
|
328
|
+
Clear data from local storage (SQLite and/or ChromaDB).
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
clear_sqlite: If True, clear SQLite database
|
|
332
|
+
clear_vector_store: If True, clear ChromaDB vector store
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
Confirmation of clearing
|
|
336
|
+
"""
|
|
337
|
+
print("[Clear] Clearing local storage...", file=sys.stderr)
|
|
338
|
+
|
|
339
|
+
results = {}
|
|
340
|
+
|
|
341
|
+
if clear_sqlite:
|
|
342
|
+
storage = get_storage()
|
|
343
|
+
count = storage.clear()
|
|
344
|
+
results["sqlite"] = {
|
|
345
|
+
"cleared": True,
|
|
346
|
+
"transactions_deleted": count
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
if clear_vector_store:
|
|
350
|
+
vector_store = get_vector_store()
|
|
351
|
+
vector_store.clear()
|
|
352
|
+
results["vector_store"] = {
|
|
353
|
+
"cleared": True
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
return {
|
|
357
|
+
"success": True,
|
|
358
|
+
"message": "Local storage cleared successfully",
|
|
359
|
+
**results
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
# Keep old name for backward compatibility
|
|
364
|
+
def clear_index():
|
|
365
|
+
"""Alias for clear_local_storage for backward compatibility."""
|
|
366
|
+
return clear_local_storage(clear_sqlite=True, clear_vector_store=True)
|