marqeta-diva-mcp 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,366 @@
1
+ """RAG tool implementations for semantic search and transaction analysis."""
2
+
3
+ import sys
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ from .client import DiVAClient
7
+ from .embeddings import get_embedder
8
+ from .vector_store import get_vector_store
9
+ from .local_storage import get_storage
10
+
11
+
12
+ def sync_transactions(
13
+ diva_client: DiVAClient,
14
+ view_name: str = "authorizations",
15
+ aggregation: str = "detail",
16
+ filters: Optional[Dict[str, Any]] = None,
17
+ max_records: Optional[int] = None,
18
+ **kwargs: Any
19
+ ) -> Dict[str, Any]:
20
+ """
21
+ Fetch transactions from DiVA and store in BOTH SQLite (full data) and ChromaDB (embeddings).
22
+ This is the main function to populate local storage.
23
+
24
+ Args:
25
+ diva_client: DiVA API client
26
+ view_name: DiVA view to query (e.g., 'authorizations')
27
+ aggregation: Aggregation level
28
+ filters: Transaction filters. For date filtering, use the actual date field name
29
+ with operators. Example: {"transaction_timestamp": ">=2023-10-20"}
30
+ max_records: Maximum number of records to sync (up to 10,000)
31
+ **kwargs: Additional DiVA query parameters
32
+
33
+ Returns:
34
+ Sync statistics
35
+ """
36
+ print(f"[Sync] Starting transaction sync from {view_name}...", file=sys.stderr)
37
+
38
+ # Get all components
39
+ embedder = get_embedder()
40
+ vector_store = get_vector_store()
41
+ storage = get_storage()
42
+
43
+ # Fetch transactions from DiVA
44
+ query_params = {
45
+ "filters": filters,
46
+ **kwargs
47
+ }
48
+
49
+ # Remove None values
50
+ query_params = {k: v for k, v in query_params.items() if v is not None}
51
+
52
+ # Set count limit (DiVA API limit: 10,000 for JSON responses)
53
+ if max_records:
54
+ query_params["count"] = min(max_records, 10000)
55
+ else:
56
+ query_params["count"] = 10000 # Max per DiVA API
57
+
58
+ all_transactions = []
59
+
60
+ print(f"[Sync] Fetching transactions (count={query_params['count']})...", file=sys.stderr)
61
+ result = diva_client.get_view(view_name, aggregation, **query_params)
62
+
63
+ transactions = result.get("records", [])
64
+
65
+ if transactions:
66
+ all_transactions.extend(transactions)
67
+
68
+ # Truncate if we got more than max_records
69
+ if max_records and len(all_transactions) > max_records:
70
+ all_transactions = all_transactions[:max_records]
71
+
72
+ # Warn if there are more records available
73
+ if result.get("is_more", False):
74
+ print(f"[Sync] Warning: More records available but DiVA API does not support offset pagination.", file=sys.stderr)
75
+ print(f"[Sync] Retrieved {len(all_transactions)} records. To get more data, use narrower date ranges or filters.", file=sys.stderr)
76
+
77
+ if not all_transactions:
78
+ return {
79
+ "success": False,
80
+ "message": "No transactions found matching the criteria",
81
+ "synced_count": 0
82
+ }
83
+
84
+ print(f"[Sync] Fetched {len(all_transactions)} transactions.", file=sys.stderr)
85
+
86
+ # 1. Store full data in SQLite
87
+ print(f"[Sync] Storing full data in SQLite...", file=sys.stderr)
88
+ storage_count = storage.add_transactions(all_transactions, view_name, aggregation)
89
+
90
+ # 2. Generate embeddings and store in ChromaDB
91
+ print(f"[Sync] Generating embeddings...", file=sys.stderr)
92
+ embeddings = embedder.embed_transactions_batch(all_transactions)
93
+
94
+ print(f"[Sync] Storing embeddings in ChromaDB...", file=sys.stderr)
95
+ vector_count = vector_store.add_transactions(all_transactions, embeddings)
96
+
97
+ # Get stats
98
+ storage_stats = storage.get_stats()
99
+ vector_stats = vector_store.get_stats()
100
+
101
+ return {
102
+ "success": True,
103
+ "message": f"Successfully synced {storage_count} transactions",
104
+ "synced_count": storage_count,
105
+ "storage": {
106
+ "total_transactions": storage_stats["total_transactions"],
107
+ "database_size_mb": storage_stats["database_size_mb"]
108
+ },
109
+ "vector_store": {
110
+ "total_indexed": vector_stats["count"]
111
+ },
112
+ "view_name": view_name,
113
+ "aggregation": aggregation
114
+ }
115
+
116
+
117
+ # Keep old name for backward compatibility, but point to new function
118
+ def index_transactions(*args, **kwargs):
119
+ """Alias for sync_transactions for backward compatibility."""
120
+ return sync_transactions(*args, **kwargs)
121
+
122
+
123
+ def semantic_search_transactions(
124
+ diva_client: DiVAClient,
125
+ query: str,
126
+ n_results: int = 10,
127
+ filters: Optional[Dict[str, Any]] = None,
128
+ enrich: bool = True
129
+ ) -> Dict[str, Any]:
130
+ """
131
+ Search for transactions using natural language semantic search.
132
+ Returns FULL transaction data from local SQLite storage (no token limits!).
133
+
134
+ Args:
135
+ diva_client: DiVA API client (not used if enrich=True, kept for compatibility)
136
+ query: Natural language search query (e.g., "coffee shop purchases")
137
+ n_results: Number of results to return
138
+ filters: Metadata filters (e.g., {"transaction_amount": {"$gt": 10}})
139
+ enrich: If True, fetch full transaction details from local SQLite storage
140
+
141
+ Returns:
142
+ Search results with similarity scores and full transaction data
143
+ """
144
+ print(f"[Search] Semantic search: '{query}'", file=sys.stderr)
145
+
146
+ # Get components
147
+ embedder = get_embedder()
148
+ vector_store = get_vector_store()
149
+ storage = get_storage()
150
+
151
+ # Generate query embedding
152
+ query_embedding = embedder.embed_query(query)
153
+
154
+ # Search vector store (gets transaction IDs + similarity scores)
155
+ results = vector_store.search(
156
+ query_embedding=query_embedding,
157
+ n_results=n_results,
158
+ where=filters
159
+ )
160
+
161
+ if results["count"] == 0:
162
+ return {
163
+ "success": True,
164
+ "query": query,
165
+ "count": 0,
166
+ "transactions": [],
167
+ "message": "No matching transactions found. Try syncing more transactions first."
168
+ }
169
+
170
+ # Enrich with full transaction data from LOCAL STORAGE (not DiVA API!)
171
+ if enrich and results["transactions"]:
172
+ transaction_tokens = [txn["transaction_token"] for txn in results["transactions"]]
173
+
174
+ print(f"[Search] Fetching full data for {len(transaction_tokens)} transactions from local storage...", file=sys.stderr)
175
+
176
+ # Fetch from SQLite (fast, no API calls, no token limits!)
177
+ full_transactions = storage.get_transactions(transaction_tokens)
178
+
179
+ # Create a map of token -> full transaction
180
+ full_txns_map = {
181
+ txn["transaction_token"]: txn
182
+ for txn in full_transactions
183
+ }
184
+
185
+ # Enrich results with full transaction data
186
+ for result_txn in results["transactions"]:
187
+ token = result_txn["transaction_token"]
188
+ if token in full_txns_map:
189
+ result_txn["full_transaction"] = full_txns_map[token]
190
+ else:
191
+ print(f"[Search] Warning: Transaction {token} not found in local storage", file=sys.stderr)
192
+
193
+ return {
194
+ "success": True,
195
+ "query": query,
196
+ "count": results["count"],
197
+ "transactions": results["transactions"],
198
+ "note": "Full transaction data retrieved from local storage (no API calls)"
199
+ }
200
+
201
+
202
+ def find_similar_transactions(
203
+ diva_client: DiVAClient,
204
+ transaction_token: str,
205
+ n_results: int = 10,
206
+ filters: Optional[Dict[str, Any]] = None
207
+ ) -> Dict[str, Any]:
208
+ """
209
+ Find transactions similar to a given transaction.
210
+
211
+ Args:
212
+ diva_client: DiVA API client
213
+ transaction_token: Token of the reference transaction
214
+ n_results: Number of similar transactions to return
215
+ filters: Additional metadata filters
216
+
217
+ Returns:
218
+ Similar transactions with similarity scores
219
+ """
220
+ print(f"[RAG] Finding similar transactions to {transaction_token}", file=sys.stderr)
221
+
222
+ # Get vector store
223
+ vector_store = get_vector_store()
224
+
225
+ # Get the reference transaction's embedding
226
+ ref_txn = vector_store.get_by_id(transaction_token)
227
+
228
+ if not ref_txn or not ref_txn.get("embedding"):
229
+ return {
230
+ "success": False,
231
+ "message": f"Transaction {transaction_token} not found in vector store. Index it first.",
232
+ "transaction_token": transaction_token
233
+ }
234
+
235
+ # Search for similar transactions
236
+ results = vector_store.search(
237
+ query_embedding=ref_txn["embedding"],
238
+ n_results=n_results + 1, # +1 because the reference itself will be included
239
+ where=filters
240
+ )
241
+
242
+ # Filter out the reference transaction itself
243
+ similar_txns = [
244
+ txn for txn in results["transactions"]
245
+ if txn["transaction_token"] != transaction_token
246
+ ][:n_results]
247
+
248
+ return {
249
+ "success": True,
250
+ "reference_transaction": transaction_token,
251
+ "reference_metadata": ref_txn.get("metadata", {}),
252
+ "count": len(similar_txns),
253
+ "similar_transactions": similar_txns
254
+ }
255
+
256
+
257
+ def query_local_transactions(
258
+ filters: Optional[Dict[str, Any]] = None,
259
+ limit: int = 100,
260
+ offset: int = 0,
261
+ order_by: str = "created_time DESC"
262
+ ) -> Dict[str, Any]:
263
+ """
264
+ Query transactions directly from local SQLite storage.
265
+ No API calls, no token limits, full transaction data.
266
+
267
+ Args:
268
+ filters: Dictionary of filters (e.g., {"merchant_name": "Starbucks", "transaction_amount": {">": 10}})
269
+ limit: Maximum number of results (default: 100)
270
+ offset: Offset for pagination (default: 0)
271
+ order_by: SQL ORDER BY clause (default: "created_time DESC")
272
+
273
+ Returns:
274
+ Query results with full transaction data
275
+ """
276
+ print(f"[Query] Querying local storage with filters: {filters}", file=sys.stderr)
277
+
278
+ storage = get_storage()
279
+
280
+ results = storage.query_transactions(
281
+ filters=filters,
282
+ limit=limit,
283
+ offset=offset,
284
+ order_by=order_by
285
+ )
286
+
287
+ return {
288
+ "success": True,
289
+ **results,
290
+ "note": "Data retrieved from local SQLite storage (no API calls, no token limits)"
291
+ }
292
+
293
+
294
+ def get_storage_stats() -> Dict[str, Any]:
295
+ """
296
+ Get comprehensive statistics about local storage (SQLite + ChromaDB).
297
+
298
+ Returns:
299
+ Storage statistics including counts, sizes, and status
300
+ """
301
+ storage = get_storage()
302
+ vector_store = get_vector_store()
303
+ embedder = get_embedder()
304
+
305
+ storage_stats = storage.get_stats()
306
+ vector_stats = vector_store.get_stats()
307
+
308
+ return {
309
+ "success": True,
310
+ "sqlite_storage": storage_stats,
311
+ "vector_store": vector_stats,
312
+ "embedding_model": {
313
+ "name": embedder.model_name,
314
+ "dimension": embedder.embedding_dim
315
+ },
316
+ "note": "Local storage eliminates token limits and API dependency"
317
+ }
318
+
319
+
320
+ # Keep old name for backward compatibility
321
+ def get_index_stats():
322
+ """Alias for get_storage_stats for backward compatibility."""
323
+ return get_storage_stats()
324
+
325
+
326
+ def clear_local_storage(clear_sqlite: bool = True, clear_vector_store: bool = True) -> Dict[str, Any]:
327
+ """
328
+ Clear data from local storage (SQLite and/or ChromaDB).
329
+
330
+ Args:
331
+ clear_sqlite: If True, clear SQLite database
332
+ clear_vector_store: If True, clear ChromaDB vector store
333
+
334
+ Returns:
335
+ Confirmation of clearing
336
+ """
337
+ print("[Clear] Clearing local storage...", file=sys.stderr)
338
+
339
+ results = {}
340
+
341
+ if clear_sqlite:
342
+ storage = get_storage()
343
+ count = storage.clear()
344
+ results["sqlite"] = {
345
+ "cleared": True,
346
+ "transactions_deleted": count
347
+ }
348
+
349
+ if clear_vector_store:
350
+ vector_store = get_vector_store()
351
+ vector_store.clear()
352
+ results["vector_store"] = {
353
+ "cleared": True
354
+ }
355
+
356
+ return {
357
+ "success": True,
358
+ "message": "Local storage cleared successfully",
359
+ **results
360
+ }
361
+
362
+
363
+ # Keep old name for backward compatibility
364
+ def clear_index():
365
+ """Alias for clear_local_storage for backward compatibility."""
366
+ return clear_local_storage(clear_sqlite=True, clear_vector_store=True)