local-deep-research 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/defaults/default_settings.json +35 -35
  118. local_deep_research/metrics/__init__.py +13 -0
  119. local_deep_research/metrics/database.py +58 -0
  120. local_deep_research/metrics/db_models.py +115 -0
  121. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  122. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  123. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  124. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  125. local_deep_research/metrics/models.py +61 -0
  126. local_deep_research/metrics/pricing/__init__.py +12 -0
  127. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  128. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  129. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  130. local_deep_research/metrics/query_utils.py +51 -0
  131. local_deep_research/metrics/search_tracker.py +380 -0
  132. local_deep_research/metrics/token_counter.py +1078 -0
  133. local_deep_research/migrate_db.py +3 -1
  134. local_deep_research/report_generator.py +22 -8
  135. local_deep_research/search_system.py +390 -9
  136. local_deep_research/test_migration.py +15 -5
  137. local_deep_research/utilities/db_utils.py +7 -4
  138. local_deep_research/utilities/es_utils.py +115 -104
  139. local_deep_research/utilities/llm_utils.py +15 -5
  140. local_deep_research/utilities/log_utils.py +151 -0
  141. local_deep_research/utilities/search_cache.py +387 -0
  142. local_deep_research/utilities/search_utilities.py +14 -6
  143. local_deep_research/utilities/threading_utils.py +92 -0
  144. local_deep_research/utilities/url_utils.py +6 -0
  145. local_deep_research/web/api.py +347 -0
  146. local_deep_research/web/app.py +13 -17
  147. local_deep_research/web/app_factory.py +71 -66
  148. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  149. local_deep_research/web/database/migrations.py +5 -3
  150. local_deep_research/web/database/models.py +51 -2
  151. local_deep_research/web/database/schema_upgrade.py +49 -29
  152. local_deep_research/web/models/database.py +51 -61
  153. local_deep_research/web/routes/api_routes.py +56 -22
  154. local_deep_research/web/routes/benchmark_routes.py +4 -1
  155. local_deep_research/web/routes/globals.py +22 -0
  156. local_deep_research/web/routes/history_routes.py +71 -46
  157. local_deep_research/web/routes/metrics_routes.py +1155 -0
  158. local_deep_research/web/routes/research_routes.py +227 -41
  159. local_deep_research/web/routes/settings_routes.py +156 -55
  160. local_deep_research/web/services/research_service.py +310 -103
  161. local_deep_research/web/services/resource_service.py +36 -11
  162. local_deep_research/web/services/settings_manager.py +58 -18
  163. local_deep_research/web/services/settings_service.py +12 -4
  164. local_deep_research/web/services/socket_service.py +295 -188
  165. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  166. local_deep_research/web/static/css/styles.css +39 -1
  167. local_deep_research/web/static/js/components/detail.js +633 -267
  168. local_deep_research/web/static/js/components/details.js +751 -0
  169. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  170. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  171. local_deep_research/web/static/js/components/history.js +76 -76
  172. local_deep_research/web/static/js/components/logpanel.js +61 -13
  173. local_deep_research/web/static/js/components/progress.js +13 -2
  174. local_deep_research/web/static/js/components/research.js +99 -12
  175. local_deep_research/web/static/js/components/results.js +239 -106
  176. local_deep_research/web/static/js/components/settings.js +70 -47
  177. local_deep_research/web/static/js/main.js +40 -40
  178. local_deep_research/web/static/js/services/audio.js +1 -1
  179. local_deep_research/web/static/js/services/formatting.js +11 -11
  180. local_deep_research/web/static/js/services/keyboard.js +157 -0
  181. local_deep_research/web/static/js/services/pdf.js +80 -80
  182. local_deep_research/web/static/sounds/README.md +1 -1
  183. local_deep_research/web/templates/base.html +1 -0
  184. local_deep_research/web/templates/components/log_panel.html +7 -1
  185. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  186. local_deep_research/web/templates/components/sidebar.html +3 -0
  187. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  188. local_deep_research/web/templates/pages/details.html +325 -24
  189. local_deep_research/web/templates/pages/history.html +1 -1
  190. local_deep_research/web/templates/pages/metrics.html +1929 -0
  191. local_deep_research/web/templates/pages/progress.html +2 -2
  192. local_deep_research/web/templates/pages/research.html +53 -17
  193. local_deep_research/web/templates/pages/results.html +12 -1
  194. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  195. local_deep_research/web/utils/formatters.py +9 -3
  196. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  197. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  198. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  199. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  200. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  201. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  202. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  203. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  204. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  205. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  206. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  207. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  208. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  209. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  210. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  211. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  212. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  213. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  214. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  215. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  216. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  217. {local_deep_research-0.4.3.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +8 -2
  218. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  219. local_deep_research-0.4.3.dist-info/RECORD +0 -177
  220. {local_deep_research-0.4.3.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  221. {local_deep_research-0.4.3.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  222. {local_deep_research-0.4.3.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,240 @@
1
+ """
2
+ LLM Pricing Data Fetcher
3
+
4
+ Fetches real-time pricing data from various LLM providers.
5
+ Supports multiple providers and fallback to static pricing.
6
+ """
7
+
8
+ from typing import Any, Dict, Optional
9
+
10
+ import aiohttp
11
+ from loguru import logger
12
+
13
+
14
+ class PricingFetcher:
15
+ """Fetches LLM pricing data from various sources."""
16
+
17
+ def __init__(self):
18
+ self.session = None
19
+ self.static_pricing = self._load_static_pricing()
20
+
21
+ async def __aenter__(self):
22
+ self.session = aiohttp.ClientSession()
23
+ return self
24
+
25
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
26
+ if self.session:
27
+ await self.session.close()
28
+
29
+ def _load_static_pricing(self) -> Dict[str, Dict[str, float]]:
30
+ """Load static pricing as fallback (per 1K tokens in USD)."""
31
+ return {
32
+ # OpenAI Models
33
+ "gpt-4": {"prompt": 0.03, "completion": 0.06},
34
+ "gpt-4-turbo": {"prompt": 0.01, "completion": 0.03},
35
+ "gpt-4o": {"prompt": 0.005, "completion": 0.015},
36
+ "gpt-4o-mini": {"prompt": 0.00015, "completion": 0.0006},
37
+ "gpt-3.5-turbo": {"prompt": 0.001, "completion": 0.002},
38
+ # Anthropic Models
39
+ "claude-3-opus": {"prompt": 0.015, "completion": 0.075},
40
+ "claude-3-sonnet": {"prompt": 0.003, "completion": 0.015},
41
+ "claude-3-haiku": {"prompt": 0.00025, "completion": 0.00125},
42
+ "claude-3-5-sonnet": {"prompt": 0.003, "completion": 0.015},
43
+ # Google Models
44
+ "gemini-pro": {"prompt": 0.0005, "completion": 0.0015},
45
+ "gemini-pro-vision": {"prompt": 0.0005, "completion": 0.0015},
46
+ "gemini-1.5-pro": {"prompt": 0.0035, "completion": 0.0105},
47
+ "gemini-1.5-flash": {"prompt": 0.00035, "completion": 0.00105},
48
+ # Local/Open Source (free)
49
+ "ollama": {"prompt": 0.0, "completion": 0.0},
50
+ "llama": {"prompt": 0.0, "completion": 0.0},
51
+ "mistral": {"prompt": 0.0, "completion": 0.0},
52
+ "gemma": {"prompt": 0.0, "completion": 0.0},
53
+ "qwen": {"prompt": 0.0, "completion": 0.0},
54
+ "codellama": {"prompt": 0.0, "completion": 0.0},
55
+ "vicuna": {"prompt": 0.0, "completion": 0.0},
56
+ "alpaca": {"prompt": 0.0, "completion": 0.0},
57
+ "vllm": {"prompt": 0.0, "completion": 0.0},
58
+ "lmstudio": {"prompt": 0.0, "completion": 0.0},
59
+ "llamacpp": {"prompt": 0.0, "completion": 0.0},
60
+ }
61
+
62
+ async def fetch_openai_pricing(self) -> Optional[Dict[str, Any]]:
63
+ """Fetch OpenAI pricing from their API (if available)."""
64
+ try:
65
+ # Note: OpenAI doesn't have a public pricing API
66
+ # This would need to be web scraping or manual updates
67
+ logger.info("Using static OpenAI pricing (no public API available)")
68
+ return None
69
+ except Exception as e:
70
+ logger.warning(f"Failed to fetch OpenAI pricing: {e}")
71
+ return None
72
+
73
+ async def fetch_anthropic_pricing(self) -> Optional[Dict[str, Any]]:
74
+ """Fetch Anthropic pricing."""
75
+ try:
76
+ # Note: Anthropic doesn't have a public pricing API
77
+ # This would need to be web scraping or manual updates
78
+ logger.info(
79
+ "Using static Anthropic pricing (no public API available)"
80
+ )
81
+ return None
82
+ except Exception as e:
83
+ logger.warning(f"Failed to fetch Anthropic pricing: {e}")
84
+ return None
85
+
86
+ async def fetch_google_pricing(self) -> Optional[Dict[str, Any]]:
87
+ """Fetch Google/Gemini pricing."""
88
+ try:
89
+ # Note: Google doesn't have a dedicated pricing API for individual models
90
+ # This would need to be web scraping or manual updates
91
+ logger.info("Using static Google pricing (no public API available)")
92
+ return None
93
+ except Exception as e:
94
+ logger.warning(f"Failed to fetch Google pricing: {e}")
95
+ return None
96
+
97
+ async def fetch_huggingface_pricing(self) -> Optional[Dict[str, Any]]:
98
+ """Fetch HuggingFace Inference API pricing."""
99
+ try:
100
+ if not self.session:
101
+ return None
102
+
103
+ # HuggingFace has some pricing info but not a structured API
104
+ # This is more for hosted inference endpoints
105
+ url = "https://huggingface.co/pricing"
106
+ async with self.session.get(url) as response:
107
+ if response.status == 200:
108
+ # Would need to parse HTML for pricing info
109
+ logger.info(
110
+ "HuggingFace pricing would require HTML parsing"
111
+ )
112
+ return None
113
+ except Exception as e:
114
+ logger.warning(f"Failed to fetch HuggingFace pricing: {e}")
115
+ return None
116
+
117
+ async def get_model_pricing(
118
+ self, model_name: str, provider: str = None
119
+ ) -> Optional[Dict[str, float]]:
120
+ """Get pricing for a specific model and provider."""
121
+ # Normalize inputs
122
+ model_name = model_name.lower() if model_name else ""
123
+ provider = provider.lower() if provider else ""
124
+
125
+ # Provider-first approach: Check if provider indicates local/free models
126
+ local_providers = ["ollama", "vllm", "lmstudio", "llamacpp"]
127
+ if provider in local_providers:
128
+ logger.debug(
129
+ f"Local provider '{provider}' detected - returning zero cost"
130
+ )
131
+ return {"prompt": 0.0, "completion": 0.0}
132
+
133
+ # Try to fetch live pricing first (most providers don't have APIs)
134
+ if (
135
+ provider == "openai"
136
+ or "gpt" in model_name
137
+ or "openai" in model_name
138
+ ):
139
+ await self.fetch_openai_pricing()
140
+ elif (
141
+ provider == "anthropic"
142
+ or "claude" in model_name
143
+ or "anthropic" in model_name
144
+ ):
145
+ await self.fetch_anthropic_pricing()
146
+ elif (
147
+ provider == "google"
148
+ or "gemini" in model_name
149
+ or "google" in model_name
150
+ ):
151
+ await self.fetch_google_pricing()
152
+
153
+ # Fallback to static pricing with provider priority
154
+ if provider:
155
+ # First try provider-specific lookup with exact matching
156
+ provider_models = self._get_models_by_provider(provider)
157
+ # Try exact match
158
+ if model_name in provider_models:
159
+ return provider_models[model_name]
160
+ # Try exact match without provider prefix
161
+ if "/" in model_name:
162
+ model_only = model_name.split("/")[-1]
163
+ if model_only in provider_models:
164
+ return provider_models[model_only]
165
+
166
+ # Exact model name matching only
167
+ # First try exact match
168
+ if model_name in self.static_pricing:
169
+ return self.static_pricing[model_name]
170
+
171
+ # Try exact match without provider prefix (e.g., "openai/gpt-4o-mini" -> "gpt-4o-mini")
172
+ if "/" in model_name:
173
+ model_only = model_name.split("/")[-1]
174
+ if model_only in self.static_pricing:
175
+ return self.static_pricing[model_only]
176
+
177
+ # No pricing found - return None instead of default pricing
178
+ logger.warning(
179
+ f"No pricing found for model: {model_name}, provider: {provider}"
180
+ )
181
+ return None
182
+
183
+ def _get_models_by_provider(
184
+ self, provider: str
185
+ ) -> Dict[str, Dict[str, float]]:
186
+ """Get models for a specific provider."""
187
+ provider = provider.lower()
188
+ provider_models = {}
189
+
190
+ if provider == "openai":
191
+ provider_models = {
192
+ k: v
193
+ for k, v in self.static_pricing.items()
194
+ if k.startswith("gpt")
195
+ }
196
+ elif provider == "anthropic":
197
+ provider_models = {
198
+ k: v
199
+ for k, v in self.static_pricing.items()
200
+ if k.startswith("claude")
201
+ }
202
+ elif provider == "google":
203
+ provider_models = {
204
+ k: v
205
+ for k, v in self.static_pricing.items()
206
+ if k.startswith("gemini")
207
+ }
208
+ elif provider in ["ollama", "vllm", "lmstudio", "llamacpp"]:
209
+ # All local models are free
210
+ provider_models = {
211
+ k: v
212
+ for k, v in self.static_pricing.items()
213
+ if v["prompt"] == 0.0 and v["completion"] == 0.0
214
+ }
215
+
216
+ return provider_models
217
+
218
+ async def get_all_pricing(self) -> Dict[str, Dict[str, float]]:
219
+ """Get pricing for all known models."""
220
+ # In the future, this could aggregate from multiple live sources
221
+ return self.static_pricing.copy()
222
+
223
+ def get_provider_from_model(self, model_name: str) -> str:
224
+ """Determine the provider from model name."""
225
+ model_name = model_name.lower()
226
+
227
+ if "gpt" in model_name or "openai" in model_name:
228
+ return "openai"
229
+ elif "claude" in model_name or "anthropic" in model_name:
230
+ return "anthropic"
231
+ elif "gemini" in model_name or "google" in model_name:
232
+ return "google"
233
+ elif "llama" in model_name or "meta" in model_name:
234
+ return "meta"
235
+ elif "mistral" in model_name:
236
+ return "mistral"
237
+ elif "ollama" in model_name:
238
+ return "ollama"
239
+ else:
240
+ return "unknown"
@@ -0,0 +1,51 @@
1
+ """Common query utilities for metrics module."""
2
+
3
+ from datetime import datetime, timedelta
4
+ from typing import Any
5
+
6
+ from sqlalchemy import Column
7
+
8
+
9
+ def get_time_filter_condition(period: str, timestamp_column: Column) -> Any:
10
+ """Get SQLAlchemy condition for time filtering.
11
+
12
+ Args:
13
+ period: Time period ('7d', '30d', '3m', '1y', 'all')
14
+ timestamp_column: SQLAlchemy timestamp column to filter on
15
+
16
+ Returns:
17
+ SQLAlchemy condition object or None for 'all'
18
+ """
19
+ if period == "all":
20
+ return None
21
+ elif period == "7d":
22
+ cutoff = datetime.now() - timedelta(days=7)
23
+ elif period == "30d":
24
+ cutoff = datetime.now() - timedelta(days=30)
25
+ elif period == "3m":
26
+ cutoff = datetime.now() - timedelta(days=90)
27
+ elif period == "1y":
28
+ cutoff = datetime.now() - timedelta(days=365)
29
+ else:
30
+ # Default to 30 days for unknown periods
31
+ cutoff = datetime.now() - timedelta(days=30)
32
+
33
+ return timestamp_column >= cutoff
34
+
35
+
36
+ def get_research_mode_condition(research_mode: str, mode_column: Column) -> Any:
37
+ """Get SQLAlchemy condition for research mode filtering.
38
+
39
+ Args:
40
+ research_mode: Research mode ('quick', 'detailed', 'all')
41
+ mode_column: SQLAlchemy column to filter on
42
+
43
+ Returns:
44
+ SQLAlchemy condition object or None for 'all'
45
+ """
46
+ if research_mode == "all":
47
+ return None
48
+ elif research_mode in ["quick", "detailed"]:
49
+ return mode_column == research_mode
50
+ else:
51
+ return None
@@ -0,0 +1,380 @@
1
+ """
2
+ Search call tracking system for metrics collection.
3
+ Similar to token_counter.py but tracks search engine usage.
4
+ """
5
+
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from loguru import logger
9
+ from sqlalchemy import case, func
10
+
11
+ from .database import MetricsDatabase
12
+ from .db_models import SearchCall
13
+ from .query_utils import get_research_mode_condition, get_time_filter_condition
14
+
15
+
16
+ class SearchTracker:
17
+ """Track search engine calls and performance metrics."""
18
+
19
+ def __init__(self, db: Optional[MetricsDatabase] = None):
20
+ """Initialize the search tracker."""
21
+ self.db = db or MetricsDatabase()
22
+ self.research_context = {}
23
+
24
+ def set_research_context(self, context: Dict[str, Any]) -> None:
25
+ """Set the current research context for search tracking."""
26
+ self.research_context = context or {}
27
+ logger.debug(f"Search tracker context updated: {self.research_context}")
28
+
29
+ def record_search(
30
+ self,
31
+ engine_name: str,
32
+ query: str,
33
+ results_count: int = 0,
34
+ response_time_ms: int = 0,
35
+ success: bool = True,
36
+ error_message: Optional[str] = None,
37
+ ) -> None:
38
+ """Record a completed search operation directly to database."""
39
+
40
+ # Extract research context
41
+ research_id = self.research_context.get("research_id")
42
+ research_query = self.research_context.get("research_query")
43
+ research_mode = self.research_context.get("research_mode", "unknown")
44
+ research_phase = self.research_context.get("research_phase", "search")
45
+ search_iteration = self.research_context.get("search_iteration", 0)
46
+
47
+ # Determine success status
48
+ success_status = "success" if success else "error"
49
+ error_type = None
50
+ if error_message:
51
+ error_type = (
52
+ type(error_message).__name__
53
+ if isinstance(error_message, Exception)
54
+ else "unknown_error"
55
+ )
56
+
57
+ # Record search call in database
58
+ try:
59
+ with self.db.get_session() as session:
60
+ # Create search call record
61
+ search_call = SearchCall(
62
+ research_id=research_id,
63
+ research_query=research_query,
64
+ research_mode=research_mode,
65
+ research_phase=research_phase,
66
+ search_iteration=search_iteration,
67
+ search_engine=engine_name,
68
+ query=query,
69
+ results_count=results_count,
70
+ response_time_ms=response_time_ms,
71
+ success_status=success_status,
72
+ error_type=error_type,
73
+ error_message=str(error_message) if error_message else None,
74
+ )
75
+ session.add(search_call)
76
+ session.commit()
77
+
78
+ logger.debug(
79
+ f"Search call recorded: {engine_name} - "
80
+ f"{results_count} results in {response_time_ms}ms"
81
+ )
82
+
83
+ except Exception as e:
84
+ logger.error(f"Failed to record search call: {e}")
85
+
86
+ def get_search_metrics(
87
+ self, period: str = "30d", research_mode: str = "all"
88
+ ) -> Dict[str, Any]:
89
+ """Get search engine usage metrics."""
90
+ with self.db.get_session() as session:
91
+ try:
92
+ # Build base query with filters
93
+ query = session.query(SearchCall).filter(
94
+ SearchCall.search_engine.isnot(None)
95
+ )
96
+
97
+ # Apply time filter
98
+ time_condition = get_time_filter_condition(
99
+ period, SearchCall.timestamp
100
+ )
101
+ if time_condition is not None:
102
+ query = query.filter(time_condition)
103
+
104
+ # Apply research mode filter
105
+ mode_condition = get_research_mode_condition(
106
+ research_mode, SearchCall.research_mode
107
+ )
108
+ if mode_condition is not None:
109
+ query = query.filter(mode_condition)
110
+
111
+ # Get search engine statistics using ORM aggregation
112
+ search_stats = session.query(
113
+ SearchCall.search_engine,
114
+ func.count().label("call_count"),
115
+ func.avg(SearchCall.response_time_ms).label(
116
+ "avg_response_time"
117
+ ),
118
+ func.sum(SearchCall.results_count).label("total_results"),
119
+ func.avg(SearchCall.results_count).label(
120
+ "avg_results_per_call"
121
+ ),
122
+ func.sum(
123
+ case(
124
+ (SearchCall.success_status == "success", 1), else_=0
125
+ )
126
+ ).label("success_count"),
127
+ func.sum(
128
+ case((SearchCall.success_status == "error", 1), else_=0)
129
+ ).label("error_count"),
130
+ ).filter(SearchCall.search_engine.isnot(None))
131
+
132
+ # Apply same filters to stats query
133
+ if time_condition is not None:
134
+ search_stats = search_stats.filter(time_condition)
135
+ if mode_condition is not None:
136
+ search_stats = search_stats.filter(mode_condition)
137
+
138
+ search_stats = (
139
+ search_stats.group_by(SearchCall.search_engine)
140
+ .order_by(func.count().desc())
141
+ .all()
142
+ )
143
+
144
+ # Get recent search calls
145
+ recent_calls_query = session.query(SearchCall)
146
+ if time_condition is not None:
147
+ recent_calls_query = recent_calls_query.filter(
148
+ time_condition
149
+ )
150
+ if mode_condition is not None:
151
+ recent_calls_query = recent_calls_query.filter(
152
+ mode_condition
153
+ )
154
+
155
+ recent_calls = (
156
+ recent_calls_query.order_by(SearchCall.timestamp.desc())
157
+ .limit(20)
158
+ .all()
159
+ )
160
+
161
+ return {
162
+ "search_engine_stats": [
163
+ {
164
+ "engine": stat.search_engine,
165
+ "call_count": stat.call_count,
166
+ "avg_response_time": stat.avg_response_time or 0,
167
+ "total_results": stat.total_results or 0,
168
+ "avg_results_per_call": stat.avg_results_per_call
169
+ or 0,
170
+ "success_rate": (
171
+ (stat.success_count / stat.call_count * 100)
172
+ if stat.call_count > 0
173
+ else 0
174
+ ),
175
+ "error_count": stat.error_count or 0,
176
+ }
177
+ for stat in search_stats
178
+ ],
179
+ "recent_calls": [
180
+ {
181
+ "engine": call.search_engine,
182
+ "query": (
183
+ call.query[:100] + "..."
184
+ if len(call.query or "") > 100
185
+ else call.query
186
+ ),
187
+ "results_count": call.results_count,
188
+ "response_time_ms": call.response_time_ms,
189
+ "success_status": call.success_status,
190
+ "timestamp": str(call.timestamp),
191
+ }
192
+ for call in recent_calls
193
+ ],
194
+ }
195
+
196
+ except Exception as e:
197
+ logger.exception(f"Error getting search metrics: {e}")
198
+ return {"search_engine_stats": [], "recent_calls": []}
199
+
200
+ def get_research_search_metrics(self, research_id: int) -> Dict[str, Any]:
201
+ """Get search metrics for a specific research session."""
202
+ with self.db.get_session() as session:
203
+ try:
204
+ # Get all search calls for this research
205
+ search_calls = (
206
+ session.query(SearchCall)
207
+ .filter(SearchCall.research_id == research_id)
208
+ .order_by(SearchCall.timestamp.asc())
209
+ .all()
210
+ )
211
+
212
+ # Get search engine stats for this research
213
+ engine_stats = (
214
+ session.query(
215
+ SearchCall.search_engine,
216
+ func.count().label("call_count"),
217
+ func.avg(SearchCall.response_time_ms).label(
218
+ "avg_response_time"
219
+ ),
220
+ func.sum(SearchCall.results_count).label(
221
+ "total_results"
222
+ ),
223
+ func.sum(
224
+ case(
225
+ (SearchCall.success_status == "success", 1),
226
+ else_=0,
227
+ )
228
+ ).label("success_count"),
229
+ )
230
+ .filter(SearchCall.research_id == research_id)
231
+ .group_by(SearchCall.search_engine)
232
+ .order_by(func.count().desc())
233
+ .all()
234
+ )
235
+
236
+ # Calculate totals
237
+ total_searches = len(search_calls)
238
+ total_results = sum(
239
+ call.results_count or 0 for call in search_calls
240
+ )
241
+ avg_response_time = (
242
+ sum(call.response_time_ms or 0 for call in search_calls)
243
+ / total_searches
244
+ if total_searches > 0
245
+ else 0
246
+ )
247
+ successful_searches = sum(
248
+ 1
249
+ for call in search_calls
250
+ if call.success_status == "success"
251
+ )
252
+ success_rate = (
253
+ (successful_searches / total_searches * 100)
254
+ if total_searches > 0
255
+ else 0
256
+ )
257
+
258
+ return {
259
+ "total_searches": total_searches,
260
+ "total_results": total_results,
261
+ "avg_response_time": round(avg_response_time),
262
+ "success_rate": round(success_rate, 1),
263
+ "search_calls": [
264
+ {
265
+ "engine": call.search_engine,
266
+ "query": call.query,
267
+ "results_count": call.results_count,
268
+ "response_time_ms": call.response_time_ms,
269
+ "success_status": call.success_status,
270
+ "timestamp": str(call.timestamp),
271
+ }
272
+ for call in search_calls
273
+ ],
274
+ "engine_stats": [
275
+ {
276
+ "engine": stat.search_engine,
277
+ "call_count": stat.call_count,
278
+ "avg_response_time": stat.avg_response_time or 0,
279
+ "total_results": stat.total_results or 0,
280
+ "success_rate": (
281
+ (stat.success_count / stat.call_count * 100)
282
+ if stat.call_count > 0
283
+ else 0
284
+ ),
285
+ }
286
+ for stat in engine_stats
287
+ ],
288
+ }
289
+
290
+ except Exception as e:
291
+ logger.exception(f"Error getting research search metrics: {e}")
292
+ return {
293
+ "total_searches": 0,
294
+ "total_results": 0,
295
+ "avg_response_time": 0,
296
+ "success_rate": 0,
297
+ "search_calls": [],
298
+ "engine_stats": [],
299
+ }
300
+
301
+ def get_search_time_series(
302
+ self, period: str = "30d", research_mode: str = "all"
303
+ ) -> List[Dict[str, Any]]:
304
+ """Get search activity time series data for charting.
305
+
306
+ Args:
307
+ period: Time period to filter by ('7d', '30d', '3m', '1y', 'all')
308
+ research_mode: Research mode to filter by ('quick', 'detailed', 'all')
309
+
310
+ Returns:
311
+ List of time series data points with search engine activity
312
+ """
313
+ with self.db.get_session() as session:
314
+ try:
315
+ # Build base query
316
+ query = session.query(SearchCall).filter(
317
+ SearchCall.search_engine.isnot(None),
318
+ SearchCall.timestamp.isnot(None),
319
+ )
320
+
321
+ # Apply time filter
322
+ time_condition = get_time_filter_condition(
323
+ period, SearchCall.timestamp
324
+ )
325
+ if time_condition is not None:
326
+ query = query.filter(time_condition)
327
+
328
+ # Apply research mode filter
329
+ mode_condition = get_research_mode_condition(
330
+ research_mode, SearchCall.research_mode
331
+ )
332
+ if mode_condition is not None:
333
+ query = query.filter(mode_condition)
334
+
335
+ # Get all search calls ordered by time
336
+ search_calls = query.order_by(SearchCall.timestamp.asc()).all()
337
+
338
+ # Create time series data
339
+ time_series = []
340
+ for call in search_calls:
341
+ time_series.append(
342
+ {
343
+ "timestamp": (
344
+ str(call.timestamp) if call.timestamp else None
345
+ ),
346
+ "search_engine": call.search_engine,
347
+ "results_count": call.results_count or 0,
348
+ "response_time_ms": call.response_time_ms or 0,
349
+ "success_status": call.success_status,
350
+ "query": (
351
+ call.query[:50] + "..."
352
+ if call.query and len(call.query) > 50
353
+ else call.query
354
+ ),
355
+ }
356
+ )
357
+
358
+ return time_series
359
+
360
+ except Exception as e:
361
+ logger.exception(f"Error getting search time series: {e}")
362
+ return []
363
+
364
+
365
+ # Global search tracker instance
366
+ _search_tracker = None
367
+
368
+
369
+ def get_search_tracker() -> SearchTracker:
370
+ """Get the global search tracker instance."""
371
+ global _search_tracker
372
+ if _search_tracker is None:
373
+ _search_tracker = SearchTracker()
374
+ return _search_tracker
375
+
376
+
377
+ def set_search_context(context: Dict[str, Any]) -> None:
378
+ """Set search context for the global tracker."""
379
+ tracker = get_search_tracker()
380
+ tracker.set_research_context(context)