ragfallback 2.1.0__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {ragfallback-2.1.0/ragfallback.egg-info → ragfallback-2.2.0}/PKG-INFO +1 -1
  2. {ragfallback-2.1.0 → ragfallback-2.2.0}/pyproject.toml +1 -1
  3. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/__init__.py +3 -1
  4. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/core/adaptive_retriever.py +281 -4
  5. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/mlops/golden_runner.py +90 -15
  6. ragfallback-2.2.0/ragfallback/tracking/__init__.py +16 -0
  7. ragfallback-2.2.0/ragfallback/tracking/cache_monitor.py +245 -0
  8. {ragfallback-2.1.0 → ragfallback-2.2.0/ragfallback.egg-info}/PKG-INFO +1 -1
  9. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback.egg-info/SOURCES.txt +3 -0
  10. {ragfallback-2.1.0 → ragfallback-2.2.0}/requirements-dev.txt +1 -0
  11. ragfallback-2.2.0/tests/unit/test_async_retriever.py +160 -0
  12. ragfallback-2.2.0/tests/unit/test_cache_monitor.py +189 -0
  13. ragfallback-2.1.0/ragfallback/tracking/__init__.py +0 -15
  14. {ragfallback-2.1.0 → ragfallback-2.2.0}/INSTALL_AND_RUN.md +0 -0
  15. {ragfallback-2.1.0 → ragfallback-2.2.0}/LICENSE +0 -0
  16. {ragfallback-2.1.0 → ragfallback-2.2.0}/MANIFEST.in +0 -0
  17. {ragfallback-2.1.0 → ragfallback-2.2.0}/README.md +0 -0
  18. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/_kb_common.py +0 -0
  19. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/build_golden_dataset.py +0 -0
  20. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/chroma_real_kb_demo.py +0 -0
  21. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/ci_regression_gate.py +0 -0
  22. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/financial_risk_analysis.py +0 -0
  23. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/legal_document_analysis.py +0 -0
  24. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/medical_research_synthesis.py +0 -0
  25. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/mlops_demo.py +0 -0
  26. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/production_reliability_example.py +0 -0
  27. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/qdrant_local_demo.py +0 -0
  28. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/real_data_demo.py +0 -0
  29. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc10_metadata_sanitizer.py +0 -0
  30. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc1_retrieval_health.py +0 -0
  31. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc2_embedding_guard.py +0 -0
  32. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc3_chunk_quality.py +0 -0
  33. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc4_context_window.py +0 -0
  34. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc5_hybrid_failover.py +0 -0
  35. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc6_adaptive_rag.py +0 -0
  36. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc6_multi_hop_demo.py +0 -0
  37. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc7_rag_evaluator.py +0 -0
  38. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc8_context_stitcher.py +0 -0
  39. {ragfallback-2.1.0 → ragfallback-2.2.0}/examples/uc9_embedding_probe.py +0 -0
  40. {ragfallback-2.1.0 → ragfallback-2.2.0}/pytest.ini +0 -0
  41. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/core/__init__.py +0 -0
  42. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/diagnostics/__init__.py +0 -0
  43. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/diagnostics/chunking.py +0 -0
  44. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/diagnostics/context_stitcher.py +0 -0
  45. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/diagnostics/context_window.py +0 -0
  46. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/diagnostics/embedding_guard.py +0 -0
  47. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/diagnostics/embedding_probe.py +0 -0
  48. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/diagnostics/embedding_validator.py +0 -0
  49. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/diagnostics/retrieval_health.py +0 -0
  50. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/diagnostics/schema_sanitizer.py +0 -0
  51. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/diagnostics/stale_index.py +0 -0
  52. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/evaluation/__init__.py +0 -0
  53. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/evaluation/rag_evaluator.py +0 -0
  54. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/exceptions.py +0 -0
  55. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/mlops/__init__.py +0 -0
  56. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/mlops/baseline_registry.py +0 -0
  57. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/mlops/locust_template.py +0 -0
  58. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/mlops/mlflow_logger.py +0 -0
  59. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/mlops/query_simulator.py +0 -0
  60. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/mlops/ragas_hook.py +0 -0
  61. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/py.typed +0 -0
  62. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/retrieval/__init__.py +0 -0
  63. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/retrieval/failover.py +0 -0
  64. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/retrieval/rerank_guard.py +0 -0
  65. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/retrieval/smart_hybrid.py +0 -0
  66. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/retrieval/wrappers.py +0 -0
  67. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/strategies/__init__.py +0 -0
  68. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/strategies/base.py +0 -0
  69. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/strategies/multi_hop.py +0 -0
  70. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/strategies/query_variations.py +0 -0
  71. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/tracking/cost_tracker.py +0 -0
  72. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/tracking/metrics.py +0 -0
  73. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/utils/__init__.py +0 -0
  74. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/utils/confidence_scorer.py +0 -0
  75. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/utils/embedding_factory.py +0 -0
  76. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/utils/env.py +0 -0
  77. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/utils/llm_factory.py +0 -0
  78. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback/utils/vector_store_factory.py +0 -0
  79. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback.egg-info/dependency_links.txt +0 -0
  80. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback.egg-info/requires.txt +0 -0
  81. {ragfallback-2.1.0 → ragfallback-2.2.0}/ragfallback.egg-info/top_level.txt +0 -0
  82. {ragfallback-2.1.0 → ragfallback-2.2.0}/setup.cfg +0 -0
  83. {ragfallback-2.1.0 → ragfallback-2.2.0}/setup.py +0 -0
  84. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/__init__.py +0 -0
  85. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/conftest.py +0 -0
  86. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/integration/__init__.py +0 -0
  87. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/integration/test_adaptive_workflow.py +0 -0
  88. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/integration/test_chroma_pipeline.py +0 -0
  89. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/unit/__init__.py +0 -0
  90. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/unit/test_adaptive_multi_hop_bridge.py +0 -0
  91. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/unit/test_confidence_scorer.py +0 -0
  92. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/unit/test_cost_tracker.py +0 -0
  93. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/unit/test_diagnostics.py +0 -0
  94. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/unit/test_hybrid_retrieval.py +0 -0
  95. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/unit/test_metrics.py +0 -0
  96. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/unit/test_multi_hop.py +0 -0
  97. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/unit/test_query_variations.py +0 -0
  98. {ragfallback-2.1.0 → ragfallback-2.2.0}/tests/unit/test_retrieval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragfallback
3
- Version: 2.1.0
3
+ Version: 2.2.0
4
4
  Summary: Prevents silent RAG failures — chunk quality, retrieval fallback, adaptive querying, and answer evaluation in one library.
5
5
  Home-page: https://github.com/irfanalidv/ragfallback
6
6
  Author: Irfan Ali
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ragfallback"
7
- version = "2.1.0"
7
+ version = "2.2.0"
8
8
  description = "Prevents silent RAG failures — chunk quality, retrieval fallback, adaptive querying, and answer evaluation in one library."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -9,16 +9,18 @@ This module exposes a small curated shortcut only (see ``__all__``).
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
- __version__ = "2.1.0"
12
+ __version__ = "2.2.0"
13
13
  __author__ = "Irfan Ali"
14
14
 
15
15
  from ragfallback.core.adaptive_retriever import AdaptiveRAGRetriever, QueryResult
16
+ from ragfallback.tracking.cache_monitor import CacheMonitor
16
17
  from ragfallback.tracking.cost_tracker import CostTracker
17
18
  from ragfallback.tracking.metrics import MetricsCollector
18
19
 
19
20
  __all__ = [
20
21
  "AdaptiveRAGRetriever",
21
22
  "QueryResult",
23
+ "CacheMonitor",
22
24
  "CostTracker",
23
25
  "MetricsCollector",
24
26
  ]
@@ -1,11 +1,15 @@
1
1
  """Retriever wrapper that retries failed or low-confidence queries using pluggable strategies."""
2
2
 
3
- from typing import List, Optional, Dict, Any, Tuple
4
- from dataclasses import dataclass
5
- import logging
6
- import time
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import functools
7
7
  import json
8
+ import logging
8
9
  import re
10
+ import time
11
+ from dataclasses import dataclass
12
+ from typing import Any, Dict, List, Optional, Tuple
9
13
 
10
14
  from langchain_core.vectorstores import VectorStore
11
15
  from langchain_core.language_models import BaseLanguageModel
@@ -270,6 +274,279 @@ Return your answer in JSON format: {"answer": "...", "source": "..."}"""
270
274
  intermediate_steps=intermediate_steps if return_intermediate_steps else None
271
275
  )
272
276
 
277
+ async def aquery_with_fallback(
278
+ self,
279
+ question: str,
280
+ context: Optional[Dict[str, Any]] = None,
281
+ return_intermediate_steps: bool = False,
282
+ enforce_budget: bool = False,
283
+ ) -> QueryResult:
284
+ """Async mirror of :meth:`query_with_fallback` using LangChain ``ainvoke``.
285
+
286
+ Uses ``ainvoke`` on the retriever and LLM so concurrent callers can overlap
287
+ I/O-bound work. Falls back transparently to a thread-pool for objects that
288
+ do not implement ``ainvoke``.
289
+
290
+ Args:
291
+ question: The question to answer.
292
+ context: Optional metadata filters / context dict.
293
+ return_intermediate_steps: Include all attempt dicts in the result.
294
+ enforce_budget: Stop early if the cost budget is exceeded.
295
+
296
+ Returns:
297
+ :class:`QueryResult` with the same fields as the sync version.
298
+ """
299
+ context = context or {}
300
+ intermediate_steps: List[Dict[str, Any]] = []
301
+ total_cost = 0.0
302
+ loop = asyncio.get_event_loop()
303
+ start = loop.time()
304
+
305
+ for strategy_idx, strategy in enumerate(self.strategies):
306
+ if strategy_idx >= self.max_attempts:
307
+ break
308
+
309
+ if enforce_budget and self.cost_tracker.budget_exceeded():
310
+ if self.logger:
311
+ self.logger.warning("Budget exceeded, stopping fallback attempts")
312
+ break
313
+
314
+ attempt_num = strategy_idx + 1
315
+
316
+ # Delegate multi-hop strategies to thread pool (they have no async API)
317
+ if callable(getattr(strategy, "run", None)):
318
+ if self.logger:
319
+ self.logger.debug(
320
+ "strategy %s has run() — delegating to thread pool",
321
+ strategy.__class__.__name__,
322
+ )
323
+ retriever = self.vector_store.as_retriever()
324
+ hop_result = await loop.run_in_executor(
325
+ None,
326
+ functools.partial(
327
+ strategy.run,
328
+ question=question,
329
+ retriever=retriever,
330
+ llm=self.llm,
331
+ ),
332
+ )
333
+ step_data: Dict[str, Any] = {
334
+ "attempt": attempt_num,
335
+ "query": question,
336
+ "strategy": "multi_hop",
337
+ "hops": hop_result.total_hops,
338
+ "answer": hop_result.final_answer,
339
+ "confidence": 0.85 if hop_result.success else 0.0,
340
+ "cost": 0.0,
341
+ }
342
+ intermediate_steps.append(step_data)
343
+ if hop_result.success and hop_result.final_answer:
344
+ latency_ms = (loop.time() - start) * 1000
345
+ self.metrics_collector.record_success(
346
+ attempts=attempt_num,
347
+ confidence=0.85,
348
+ cost=total_cost,
349
+ latency_ms=latency_ms,
350
+ strategy_used="multi_hop",
351
+ )
352
+ return QueryResult(
353
+ answer=hop_result.final_answer,
354
+ source="multi_hop",
355
+ confidence=0.85,
356
+ attempts=attempt_num,
357
+ cost=total_cost,
358
+ intermediate_steps=intermediate_steps if return_intermediate_steps else None,
359
+ )
360
+ continue
361
+
362
+ queries = strategy.generate_queries(
363
+ original_query=question,
364
+ context=context,
365
+ attempt=strategy_idx + 1,
366
+ llm=self.llm,
367
+ )
368
+
369
+ for query_idx, query in enumerate(queries):
370
+ attempt_num = strategy_idx * len(queries) + query_idx + 1
371
+
372
+ if attempt_num > self.max_attempts:
373
+ break
374
+
375
+ if self.logger:
376
+ self.logger.info(
377
+ "async attempt %d/%d: %s", attempt_num, self.max_attempts, query[:100]
378
+ )
379
+
380
+ docs = await self._aretrieve_documents(query, context)
381
+
382
+ if not docs:
383
+ if self.logger:
384
+ self.logger.warning("no documents found for query: %s", query)
385
+ intermediate_steps.append(
386
+ {"attempt": attempt_num, "query": query,
387
+ "documents": 0, "confidence": 0.0, "cost": 0.0}
388
+ )
389
+ continue
390
+
391
+ answer, source, confidence, cost = await self._agenerate_answer(
392
+ question=question,
393
+ query=query,
394
+ documents=docs,
395
+ context=context,
396
+ )
397
+
398
+ total_cost += cost
399
+ latency_ms = (loop.time() - start) * 1000
400
+
401
+ step_data = {
402
+ "attempt": attempt_num,
403
+ "query": query,
404
+ "documents": len(docs),
405
+ "answer": answer,
406
+ "source": source,
407
+ "confidence": confidence,
408
+ "cost": cost,
409
+ }
410
+ intermediate_steps.append(step_data)
411
+
412
+ if confidence >= self.min_confidence and answer.lower() not in [
413
+ "x", "not found", "n/a", "unknown"
414
+ ]:
415
+ if self.logger:
416
+ self.logger.debug(
417
+ "async attempt %d succeeded (confidence %.2f)",
418
+ attempt_num,
419
+ confidence,
420
+ )
421
+ self.metrics_collector.record_success(
422
+ attempts=attempt_num,
423
+ confidence=confidence,
424
+ cost=total_cost,
425
+ latency_ms=latency_ms,
426
+ strategy_used=strategy.get_name(),
427
+ )
428
+ return QueryResult(
429
+ answer=answer,
430
+ source=source,
431
+ confidence=confidence,
432
+ attempts=attempt_num,
433
+ cost=total_cost,
434
+ intermediate_steps=intermediate_steps if return_intermediate_steps else None,
435
+ )
436
+
437
+ latency_ms = (loop.time() - start) * 1000
438
+
439
+ if self.logger:
440
+ self.logger.warning(
441
+ "all %d async attempts exhausted without meeting confidence threshold",
442
+ len(intermediate_steps),
443
+ )
444
+
445
+ if intermediate_steps:
446
+ best_attempt = max(intermediate_steps, key=lambda x: x.get("confidence", 0.0))
447
+ best_answer = best_attempt.get("answer", "No answer found")
448
+ best_source = best_attempt.get("source", "")
449
+ best_confidence = best_attempt.get("confidence", 0.0)
450
+ else:
451
+ best_answer = "No answer found"
452
+ best_source = ""
453
+ best_confidence = 0.0
454
+
455
+ self.metrics_collector.record_failure(
456
+ attempts=len(intermediate_steps),
457
+ cost=total_cost,
458
+ latency_ms=latency_ms,
459
+ strategy_used=self.strategies[0].get_name() if self.strategies else "unknown",
460
+ )
461
+
462
+ return QueryResult(
463
+ answer=best_answer,
464
+ source=best_source,
465
+ confidence=best_confidence,
466
+ attempts=len(intermediate_steps) or 1,
467
+ cost=total_cost,
468
+ intermediate_steps=intermediate_steps if return_intermediate_steps else None,
469
+ )
470
+
471
+ async def _aretrieve_documents(
472
+ self, query: str, context: Dict[str, Any]
473
+ ) -> List[Any]:
474
+ """Async document retrieval; falls back to thread pool if ``ainvoke`` absent."""
475
+ try:
476
+ search_kwargs = self._build_search_kwargs(context)
477
+ retriever = self.vector_store.as_retriever(search_kwargs=search_kwargs)
478
+ ainvoke = getattr(retriever, "ainvoke", None)
479
+ if ainvoke is not None:
480
+ result = await ainvoke(query)
481
+ return list(result or [])
482
+ # Fall back: run sync invoke in executor
483
+ loop = asyncio.get_event_loop()
484
+ invoke = getattr(retriever, "invoke", retriever.get_relevant_documents)
485
+ return list(
486
+ await loop.run_in_executor(None, functools.partial(invoke, query)) or []
487
+ )
488
+ except Exception as exc:
489
+ if self.logger:
490
+ self.logger.error("async retrieve error: %s", exc)
491
+ return []
492
+
493
+ async def _agenerate_answer(
494
+ self,
495
+ question: str,
496
+ query: str,
497
+ documents: List[Any],
498
+ context: Dict[str, Any],
499
+ ) -> Tuple[str, str, float, float]:
500
+ """Async answer generation; falls back to thread pool if ``ainvoke`` absent."""
501
+ docs_text = self._format_documents(documents)
502
+ prompt = self._build_answer_prompt(question, docs_text, context)
503
+ loop = asyncio.get_event_loop()
504
+
505
+ with self.cost_tracker.track(operation="answer_generation"):
506
+ messages = [
507
+ SystemMessage(content=self.answer_prompt_template),
508
+ HumanMessage(content=prompt),
509
+ ]
510
+ try:
511
+ ainvoke = getattr(self.llm, "ainvoke", None)
512
+ if ainvoke is not None:
513
+ response = await ainvoke(messages)
514
+ else:
515
+ response = await loop.run_in_executor(
516
+ None, functools.partial(self.llm.invoke, messages)
517
+ )
518
+ except AttributeError:
519
+ response = await loop.run_in_executor(
520
+ None, functools.partial(self.llm.invoke, messages)
521
+ )
522
+
523
+ answer_text = response.content if hasattr(response, "content") else str(response)
524
+
525
+ if hasattr(response, "response_metadata"):
526
+ metadata = response.response_metadata
527
+ if "token_usage" in metadata:
528
+ usage = metadata["token_usage"]
529
+ self.cost_tracker.record_tokens(
530
+ input_tokens=usage.get("prompt_tokens", 0),
531
+ output_tokens=usage.get("completion_tokens", 0),
532
+ model=getattr(self.llm, "model_name", "gpt-4"),
533
+ )
534
+
535
+ answer, source = self._parse_answer(answer_text)
536
+ scorer = ConfidenceScorer(llm=self.llm)
537
+ confidence = await loop.run_in_executor(
538
+ None,
539
+ functools.partial(
540
+ scorer.score,
541
+ question=question,
542
+ answer=answer,
543
+ documents=documents,
544
+ context=context,
545
+ ),
546
+ )
547
+ cost = self.cost_tracker.get_last_cost()
548
+ return answer, source, confidence, cost
549
+
273
550
  def _retrieve_documents(
274
551
  self,
275
552
  query: str,
@@ -3,12 +3,14 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
+ import functools
6
7
  import json
8
+ import logging
7
9
  import time
8
- from dataclasses import dataclass
10
+ from dataclasses import dataclass, field
9
11
  from datetime import datetime
10
12
  from pathlib import Path
11
- from typing import Any, Dict, List, Sequence, Set, Union
13
+ from typing import Any, Dict, List, Optional, Sequence, Set, Union
12
14
 
13
15
  import numpy as np
14
16
 
@@ -16,6 +18,8 @@ from ragfallback.core.adaptive_retriever import AdaptiveRAGRetriever, QueryResul
16
18
  from ragfallback.evaluation import recall_at_k
17
19
  from ragfallback.mlops.ragas_hook import RagasHook, RagasReport
18
20
 
21
+ logger = logging.getLogger(__name__)
22
+
19
23
 
20
24
  @dataclass
21
25
  class GoldenReport:
@@ -30,6 +34,7 @@ class GoldenReport:
30
34
  n_samples: int
31
35
  timestamp: datetime
32
36
  per_sample: List[Dict[str, Any]]
37
+ cache_stats: Optional[Dict[str, Any]] = field(default=None)
33
38
 
34
39
 
35
40
  class GoldenRunner:
@@ -40,10 +45,22 @@ class GoldenRunner:
40
45
  retriever: AdaptiveRAGRetriever,
41
46
  ragas_hook: RagasHook,
42
47
  dataset: Union[str, List[Dict[str, Any]]],
48
+ cache_monitor: Optional[Any] = None,
43
49
  ) -> None:
44
- """Load JSON path or use in-memory list of ``query`` / ``ground_truth`` / optional ids."""
50
+ """Load JSON path or use in-memory list; optionally wrap retriever with CacheMonitor.
51
+
52
+ Args:
53
+ retriever: :class:`~ragfallback.core.adaptive_retriever.AdaptiveRAGRetriever`
54
+ instance to query.
55
+ ragas_hook: :class:`~ragfallback.mlops.ragas_hook.RagasHook` for scoring.
56
+ dataset: JSON file path or list of ``{"query", "ground_truth", ...}`` dicts.
57
+ cache_monitor: Optional :class:`~ragfallback.tracking.cache_monitor.CacheMonitor`
58
+ instance. When provided, the retriever's vector store retriever is wrapped
59
+ to track cache hits/misses. Stats appear in ``GoldenReport.cache_stats``.
60
+ """
45
61
  self.retriever = retriever
46
62
  self.ragas_hook = ragas_hook
63
+ self._cache_monitor = cache_monitor
47
64
  if isinstance(dataset, str):
48
65
  raw = Path(dataset).read_text(encoding="utf-8")
49
66
  self._dataset = json.loads(raw)
@@ -57,11 +74,27 @@ class GoldenRunner:
57
74
  def _retrieve_docs(self, query: str, k: int = 5) -> List[Any]:
58
75
  """Fetch top-``k`` documents for context and id extraction."""
59
76
  r = self.retriever.vector_store.as_retriever(search_kwargs={"k": k})
77
+ if self._cache_monitor is not None:
78
+ r = self._cache_monitor.wrap_retriever(r, k=k)
60
79
  invoke = getattr(r, "invoke", None)
61
80
  if invoke is not None:
62
81
  return list(invoke(query) or [])
63
82
  return list(r.get_relevant_documents(query))
64
83
 
84
+ async def _aretrieve_docs(self, query: str, k: int = 5) -> List[Any]:
85
+ """Async fetch of top-``k`` documents."""
86
+ r = self.retriever.vector_store.as_retriever(search_kwargs={"k": k})
87
+ if self._cache_monitor is not None:
88
+ r = self._cache_monitor.wrap_retriever(r, k=k)
89
+ ainvoke = getattr(r, "ainvoke", None)
90
+ if ainvoke is not None:
91
+ return list(await ainvoke(query) or [])
92
+ loop = asyncio.get_event_loop()
93
+ invoke = getattr(r, "invoke", None)
94
+ if invoke is not None:
95
+ return list(await loop.run_in_executor(None, functools.partial(invoke, query)) or [])
96
+ return []
97
+
65
98
  def _doc_ids(self, docs: Sequence[Any]) -> List[str]:
66
99
  """Stable string ids from document metadata or content hash."""
67
100
  out: List[str] = []
@@ -77,9 +110,7 @@ class GoldenRunner:
77
110
 
78
111
  def _contexts_from_docs(self, docs: Sequence[Any]) -> List[str]:
79
112
  """Plain-text contexts for evaluation."""
80
- return [
81
- (getattr(d, "page_content", str(d)) or "") for d in docs
82
- ]
113
+ return [(getattr(d, "page_content", str(d)) or "") for d in docs]
83
114
 
84
115
  def _ids_from_intermediate(self, result: QueryResult) -> List[str]:
85
116
  """Best-effort doc id list from intermediate steps (often empty)."""
@@ -92,7 +123,7 @@ class GoldenRunner:
92
123
  return []
93
124
 
94
125
  def _run_single(self, item: Dict[str, Any]) -> Dict[str, Any]:
95
- """Run one golden row and return diagnostics plus ragas-oriented fields."""
126
+ """Run one golden row synchronously and return diagnostics."""
96
127
  query = item["query"]
97
128
  gt = item.get("ground_truth", "")
98
129
  rel_ids: Set[str] = set(str(x) for x in item.get("relevant_doc_ids", []))
@@ -111,7 +142,6 @@ class GoldenRunner:
111
142
 
112
143
  r3 = recall_at_k(retrieved_ids, rel_ids, 3)
113
144
  r5 = recall_at_k(retrieved_ids, rel_ids, 5)
114
- fallback_triggered = result.attempts > 1
115
145
 
116
146
  return {
117
147
  "question": query,
@@ -121,7 +151,50 @@ class GoldenRunner:
121
151
  "contexts": contexts,
122
152
  "latency_ms": latency_ms,
123
153
  "retrieved_ids": retrieved_ids,
124
- "fallback_triggered": fallback_triggered,
154
+ "fallback_triggered": result.attempts > 1,
155
+ "recall_at_3": r3,
156
+ "recall_at_5": r5,
157
+ }
158
+
159
+ async def _arun_single(self, item: Dict[str, Any]) -> Dict[str, Any]:
160
+ """Run one golden row natively async using :meth:`aquery_with_fallback`."""
161
+ query = item["query"]
162
+ gt = item.get("ground_truth", "")
163
+ rel_ids: Set[str] = set(str(x) for x in item.get("relevant_doc_ids", []))
164
+
165
+ try:
166
+ t0 = time.perf_counter()
167
+ result = await self.retriever.aquery_with_fallback(
168
+ query, return_intermediate_steps=True
169
+ )
170
+ latency_ms = (time.perf_counter() - t0) * 1000.0
171
+ except AttributeError:
172
+ logger.warning(
173
+ "retriever does not support aquery_with_fallback — "
174
+ "falling back to thread pool for query: %s",
175
+ query[:80],
176
+ )
177
+ loop = asyncio.get_event_loop()
178
+ return await loop.run_in_executor(None, self._run_single, item)
179
+
180
+ docs = await self._aretrieve_docs(query, k=5)
181
+ retrieved_ids = self._ids_from_intermediate(result)
182
+ if not retrieved_ids:
183
+ retrieved_ids = self._doc_ids(docs)
184
+ contexts = self._contexts_from_docs(docs)
185
+
186
+ r3 = recall_at_k(retrieved_ids, rel_ids, 3)
187
+ r5 = recall_at_k(retrieved_ids, rel_ids, 5)
188
+
189
+ return {
190
+ "question": query,
191
+ "ground_truth": gt,
192
+ "answer": result.answer,
193
+ "confidence": result.confidence,
194
+ "contexts": contexts,
195
+ "latency_ms": latency_ms,
196
+ "retrieved_ids": retrieved_ids,
197
+ "fallback_triggered": result.attempts > 1,
125
198
  "recall_at_3": r3,
126
199
  "recall_at_5": r5,
127
200
  }
@@ -139,6 +212,11 @@ class GoldenRunner:
139
212
  fb = sum(1 for s in per_sample if s.get("fallback_triggered")) / n
140
213
  p95 = float(np.percentile(latencies, 95)) if latencies else 0.0
141
214
  mean_lat = sum(latencies) / len(latencies) if latencies else 0.0
215
+ cache_stats = (
216
+ self._cache_monitor.get_stats().as_dict()
217
+ if self._cache_monitor is not None
218
+ else None
219
+ )
142
220
  return GoldenReport(
143
221
  ragas=ragas_report,
144
222
  recall_at_3=mean_r3,
@@ -149,6 +227,7 @@ class GoldenRunner:
149
227
  n_samples=len(per_sample),
150
228
  timestamp=datetime.utcnow(),
151
229
  per_sample=per_sample,
230
+ cache_stats=cache_stats,
152
231
  )
153
232
 
154
233
  def run(self) -> GoldenReport:
@@ -169,12 +248,8 @@ class GoldenRunner:
169
248
  return self._build_report(per_sample, ragas_rep)
170
249
 
171
250
  async def run_async(self) -> GoldenReport:
172
- """Evaluate golden rows concurrently (thread pool per row), then Ragas async."""
173
- loop = asyncio.get_event_loop()
174
- tasks = [
175
- loop.run_in_executor(None, self._run_single, item)
176
- for item in self._dataset
177
- ]
251
+ """Evaluate golden rows concurrently using native async, then Ragas async."""
252
+ tasks = [self._arun_single(item) for item in self._dataset]
178
253
  per_sample = list(await asyncio.gather(*tasks))
179
254
  ragas_samples = [
180
255
  {
@@ -0,0 +1,16 @@
1
+ """Cost tracking, metrics collection, and cache monitoring."""
2
+
3
+ from ragfallback.tracking.cache_monitor import CacheMonitor, CacheStats
4
+ from ragfallback.tracking.cost_tracker import CostTracker, ModelPricing
5
+ from ragfallback.tracking.metrics import MetricsCollector
6
+
7
+ __all__ = ["CacheMonitor", "CacheStats", "CostTracker", "ModelPricing", "MetricsCollector"]
8
+
9
+
10
+
11
+
12
+
13
+
14
+
15
+
16
+