local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -2,10 +2,30 @@
2
2
  Utilities for logging.
3
3
  """
4
4
 
5
+ # Needed for loguru annotations
6
+ from __future__ import annotations
7
+
5
8
  import inspect
6
9
  import logging
10
+ import sys
11
+ from functools import wraps
12
+ from pathlib import Path
13
+ from typing import Any, Callable
7
14
 
15
+ import loguru
16
+ from flask import g, has_app_context
8
17
  from loguru import logger
18
+ from sqlalchemy.exc import OperationalError
19
+
20
+ from ..web.database.models import ResearchLog
21
+ from ..web.services.socket_service import SocketIOService
22
+ from .db_utils import get_db_session
23
+
24
+ _LOG_DIR = Path(__file__).parents[2] / "data" / "logs"
25
+ _LOG_DIR.mkdir(exist_ok=True)
26
+ """
27
+ Default log directory to use.
28
+ """
9
29
 
10
30
 
11
31
  class InterceptHandler(logging.Handler):
@@ -34,3 +54,134 @@ class InterceptHandler(logging.Handler):
34
54
  logger.opt(depth=depth, exception=record.exc_info).log(
35
55
  level, record.getMessage()
36
56
  )
57
+
58
+
59
+ def log_for_research(
60
+ to_wrap: Callable[[int, ...], Any],
61
+ ) -> Callable[[int, ...], Any]:
62
+ """
63
+ Decorator for a function that's part of the research process. It expects the function to
64
+ take the research ID as the first parameter, and configures all log
65
+ messages made during this request to include the research ID.
66
+
67
+ Args:
68
+ to_wrap: The function to wrap. Should take the research ID as the first parameter.
69
+
70
+ Returns:
71
+ The wrapped function.
72
+
73
+ """
74
+
75
+ @wraps(to_wrap)
76
+ def wrapped(research_id: int, *args: Any, **kwargs: Any) -> Any:
77
+ g.research_id = research_id
78
+ to_wrap(research_id, *args, **kwargs)
79
+ g.pop("research_id")
80
+
81
+ return wrapped
82
+
83
+
84
+ def _get_research_id() -> int | None:
85
+ """
86
+ Gets the current research ID, if present.
87
+
88
+ Returns:
89
+ The current research ID, or None if it does not exist.
90
+
91
+ """
92
+ research_id = None
93
+ if has_app_context():
94
+ research_id = g.get("research_id")
95
+ return research_id
96
+
97
+
98
+ def database_sink(message: loguru.Message) -> None:
99
+ """
100
+ Sink that saves messages to the database.
101
+
102
+ Args:
103
+ message: The log message to save.
104
+
105
+ """
106
+ record = message.record
107
+ research_id = _get_research_id()
108
+
109
+ # Create a new database entry.
110
+ db_log = ResearchLog(
111
+ timestamp=record["time"],
112
+ message=str(message),
113
+ module=record["name"],
114
+ function=record["function"],
115
+ line_no=int(record["line"]),
116
+ level=record["level"].name,
117
+ research_id=research_id,
118
+ )
119
+
120
+ # Save the entry to the database.
121
+ db_session = get_db_session()
122
+ try:
123
+ db_session.add(db_log)
124
+ db_session.commit()
125
+ except OperationalError:
126
+ # Something else is probably using the DB and we can't write to it
127
+ # right now. Ignore this.
128
+ db_session.rollback()
129
+ return
130
+
131
+
132
+ def frontend_progress_sink(message: loguru.Message) -> None:
133
+ """
134
+ Sink that sends messages to the frontend.
135
+
136
+ Args:
137
+ message: The log message to send.
138
+
139
+ """
140
+ research_id = _get_research_id()
141
+ if research_id is None:
142
+ # If we don't have a research ID, don't send anything.
143
+ return
144
+
145
+ record = message.record
146
+ frontend_log = dict(
147
+ log_entry=dict(
148
+ message=record["message"],
149
+ type=record["level"].name,
150
+ time=record["time"].isoformat(),
151
+ ),
152
+ )
153
+ SocketIOService().emit_to_subscribers(
154
+ "progress", research_id, frontend_log, enable_logging=False
155
+ )
156
+
157
+
158
+ def config_logger(name: str) -> None:
159
+ """
160
+ Configures the default logger.
161
+
162
+ Args:
163
+ name: The name to use for the log file.
164
+
165
+ """
166
+ logger.enable("local_deep_research")
167
+ logger.remove()
168
+
169
+ # Log more important stuff to the console.
170
+ logger.add(sys.stderr, level="INFO")
171
+ logger.add(
172
+ _LOG_DIR / f"{name}.log",
173
+ level="DEBUG",
174
+ enqueue=True,
175
+ rotation="00:00",
176
+ retention="30 days",
177
+ compression="zip",
178
+ )
179
+ logger.add(database_sink)
180
+ logger.add(frontend_progress_sink)
181
+
182
+ # Add a special log level for milestones.
183
+ try:
184
+ logger.level("milestone", no=26, color="<magenta><bold>")
185
+ except ValueError:
186
+ # Level already exists, that's fine
187
+ pass
@@ -0,0 +1,387 @@
1
+ """
2
+ Search Cache Utility
3
+ Provides intelligent caching for search results to avoid repeated queries.
4
+ Includes TTL, LRU eviction, and query normalization.
5
+ """
6
+
7
+ import hashlib
8
+ import json
9
+ import os
10
+ import sqlite3
11
+ import time
12
+ from functools import lru_cache
13
+ from typing import Any, Dict, List, Optional
14
+
15
+ from loguru import logger
16
+
17
+
18
+ class SearchCache:
19
+ """
20
+ Persistent cache for search results with TTL and LRU eviction.
21
+ Stores results in SQLite for persistence across sessions.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ cache_dir: str = None,
27
+ max_memory_items: int = 1000,
28
+ default_ttl: int = 3600,
29
+ ):
30
+ """
31
+ Initialize search cache.
32
+
33
+ Args:
34
+ cache_dir: Directory for cache database. Defaults to data/__CACHE_DIR__
35
+ max_memory_items: Maximum items in memory cache
36
+ default_ttl: Default time-to-live in seconds (1 hour default)
37
+ """
38
+ self.max_memory_items = max_memory_items
39
+ self.default_ttl = default_ttl
40
+
41
+ # Setup cache directory
42
+ if cache_dir is None:
43
+ cache_dir = os.path.join(
44
+ os.getcwd(), "data", "__CACHE_DIR__", "search_cache"
45
+ )
46
+
47
+ os.makedirs(cache_dir, exist_ok=True)
48
+ self.db_path = os.path.join(cache_dir, "search_cache.db")
49
+
50
+ # Initialize database
51
+ self._init_db()
52
+
53
+ # In-memory cache for frequently accessed items
54
+ self._memory_cache = {}
55
+ self._access_times = {}
56
+
57
+ def _init_db(self):
58
+ """Initialize SQLite database for persistent cache."""
59
+ try:
60
+ with sqlite3.connect(self.db_path) as conn:
61
+ conn.execute(
62
+ """
63
+ CREATE TABLE IF NOT EXISTS search_cache (
64
+ query_hash TEXT PRIMARY KEY,
65
+ query_text TEXT NOT NULL,
66
+ results TEXT NOT NULL,
67
+ created_at INTEGER NOT NULL,
68
+ expires_at INTEGER NOT NULL,
69
+ access_count INTEGER DEFAULT 1,
70
+ last_accessed INTEGER NOT NULL
71
+ )
72
+ """
73
+ )
74
+ conn.execute(
75
+ """
76
+ CREATE INDEX IF NOT EXISTS idx_expires_at ON search_cache(expires_at)
77
+ """
78
+ )
79
+ conn.execute(
80
+ """
81
+ CREATE INDEX IF NOT EXISTS idx_last_accessed ON search_cache(last_accessed)
82
+ """
83
+ )
84
+ conn.commit()
85
+ except Exception as e:
86
+ logger.error(f"Failed to initialize search cache database: {e}")
87
+
88
+ def _normalize_query(self, query: str) -> str:
89
+ """Normalize query for consistent caching."""
90
+ # Convert to lowercase and remove extra whitespace
91
+ normalized = " ".join(query.lower().strip().split())
92
+
93
+ # Remove common punctuation that doesn't affect search
94
+ normalized = normalized.replace('"', "").replace("'", "")
95
+
96
+ return normalized
97
+
98
+ def _get_query_hash(
99
+ self, query: str, search_engine: str = "default"
100
+ ) -> str:
101
+ """Generate hash for query + search engine combination."""
102
+ normalized_query = self._normalize_query(query)
103
+ cache_key = f"{search_engine}:{normalized_query}"
104
+ return hashlib.md5(cache_key.encode()).hexdigest()
105
+
106
+ def _cleanup_expired(self):
107
+ """Remove expired entries from database."""
108
+ try:
109
+ current_time = int(time.time())
110
+ with sqlite3.connect(self.db_path) as conn:
111
+ cursor = conn.cursor()
112
+ cursor.execute(
113
+ "DELETE FROM search_cache WHERE expires_at < ?",
114
+ (current_time,),
115
+ )
116
+ deleted = cursor.rowcount
117
+ conn.commit()
118
+ if deleted > 0:
119
+ logger.debug(f"Cleaned up {deleted} expired cache entries")
120
+ except Exception as e:
121
+ logger.error(f"Failed to cleanup expired cache entries: {e}")
122
+
123
+ def _evict_lru_memory(self):
124
+ """Evict least recently used items from memory cache."""
125
+ if len(self._memory_cache) <= self.max_memory_items:
126
+ return
127
+
128
+ # Sort by access time and remove oldest
129
+ sorted_items = sorted(self._access_times.items(), key=lambda x: x[1])
130
+ items_to_remove = (
131
+ len(self._memory_cache) - self.max_memory_items + 100
132
+ ) # Remove extra for efficiency
133
+
134
+ for query_hash, _ in sorted_items[:items_to_remove]:
135
+ self._memory_cache.pop(query_hash, None)
136
+ self._access_times.pop(query_hash, None)
137
+
138
+ def get(
139
+ self, query: str, search_engine: str = "default"
140
+ ) -> Optional[List[Dict[str, Any]]]:
141
+ """
142
+ Get cached search results for a query.
143
+
144
+ Args:
145
+ query: Search query
146
+ search_engine: Search engine identifier for cache partitioning
147
+
148
+ Returns:
149
+ Cached results or None if not found/expired
150
+ """
151
+ query_hash = self._get_query_hash(query, search_engine)
152
+ current_time = int(time.time())
153
+
154
+ # Check memory cache first
155
+ if query_hash in self._memory_cache:
156
+ entry = self._memory_cache[query_hash]
157
+ if entry["expires_at"] > current_time:
158
+ self._access_times[query_hash] = current_time
159
+ logger.debug(f"Cache hit (memory) for query: {query[:50]}...")
160
+ return entry["results"]
161
+ else:
162
+ # Expired, remove from memory
163
+ self._memory_cache.pop(query_hash, None)
164
+ self._access_times.pop(query_hash, None)
165
+
166
+ # Check database cache
167
+ try:
168
+ with sqlite3.connect(self.db_path) as conn:
169
+ cursor = conn.cursor()
170
+ cursor.execute(
171
+ """
172
+ SELECT results, expires_at FROM search_cache
173
+ WHERE query_hash = ? AND expires_at > ?
174
+ """,
175
+ (query_hash, current_time),
176
+ )
177
+
178
+ row = cursor.fetchone()
179
+ if row:
180
+ results_json, expires_at = row
181
+ results = json.loads(results_json)
182
+
183
+ # Update access statistics
184
+ cursor.execute(
185
+ """
186
+ UPDATE search_cache
187
+ SET access_count = access_count + 1, last_accessed = ?
188
+ WHERE query_hash = ?
189
+ """,
190
+ (current_time, query_hash),
191
+ )
192
+ conn.commit()
193
+
194
+ # Add to memory cache
195
+ self._memory_cache[query_hash] = {
196
+ "results": results,
197
+ "expires_at": expires_at,
198
+ }
199
+ self._access_times[query_hash] = current_time
200
+ self._evict_lru_memory()
201
+
202
+ logger.debug(
203
+ f"Cache hit (database) for query: {query[:50]}..."
204
+ )
205
+ return results
206
+
207
+ except Exception as e:
208
+ logger.error(f"Failed to retrieve from search cache: {e}")
209
+
210
+ logger.debug(f"Cache miss for query: {query[:50]}...")
211
+ return None
212
+
213
+ def put(
214
+ self,
215
+ query: str,
216
+ results: List[Dict[str, Any]],
217
+ search_engine: str = "default",
218
+ ttl: Optional[int] = None,
219
+ ) -> bool:
220
+ """
221
+ Store search results in cache.
222
+
223
+ Args:
224
+ query: Search query
225
+ results: Search results to cache
226
+ search_engine: Search engine identifier
227
+ ttl: Time-to-live in seconds (uses default if None)
228
+
229
+ Returns:
230
+ True if successfully cached
231
+ """
232
+ if not results: # Don't cache empty results
233
+ return False
234
+
235
+ query_hash = self._get_query_hash(query, search_engine)
236
+ current_time = int(time.time())
237
+ expires_at = current_time + (ttl or self.default_ttl)
238
+
239
+ try:
240
+ results_json = json.dumps(results)
241
+
242
+ # Store in database
243
+ with sqlite3.connect(self.db_path) as conn:
244
+ conn.execute(
245
+ """
246
+ INSERT OR REPLACE INTO search_cache
247
+ (query_hash, query_text, results, created_at, expires_at, access_count, last_accessed)
248
+ VALUES (?, ?, ?, ?, ?, 1, ?)
249
+ """,
250
+ (
251
+ query_hash,
252
+ self._normalize_query(query),
253
+ results_json,
254
+ current_time,
255
+ expires_at,
256
+ current_time,
257
+ ),
258
+ )
259
+ conn.commit()
260
+
261
+ # Store in memory cache
262
+ self._memory_cache[query_hash] = {
263
+ "results": results,
264
+ "expires_at": expires_at,
265
+ }
266
+ self._access_times[query_hash] = current_time
267
+ self._evict_lru_memory()
268
+
269
+ logger.debug(f"Cached results for query: {query[:50]}...")
270
+ return True
271
+
272
+ except Exception as e:
273
+ logger.error(f"Failed to store in search cache: {e}")
274
+ return False
275
+
276
+ def invalidate(self, query: str, search_engine: str = "default") -> bool:
277
+ """Invalidate cached results for a specific query."""
278
+ query_hash = self._get_query_hash(query, search_engine)
279
+
280
+ try:
281
+ # Remove from memory
282
+ self._memory_cache.pop(query_hash, None)
283
+ self._access_times.pop(query_hash, None)
284
+
285
+ # Remove from database
286
+ with sqlite3.connect(self.db_path) as conn:
287
+ cursor = conn.cursor()
288
+ cursor.execute(
289
+ "DELETE FROM search_cache WHERE query_hash = ?",
290
+ (query_hash,),
291
+ )
292
+ deleted = cursor.rowcount
293
+ conn.commit()
294
+
295
+ logger.debug(f"Invalidated cache for query: {query[:50]}...")
296
+ return deleted > 0
297
+
298
+ except Exception as e:
299
+ logger.error(f"Failed to invalidate cache: {e}")
300
+ return False
301
+
302
+ def clear_all(self) -> bool:
303
+ """Clear all cached results."""
304
+ try:
305
+ self._memory_cache.clear()
306
+ self._access_times.clear()
307
+
308
+ with sqlite3.connect(self.db_path) as conn:
309
+ conn.execute("DELETE FROM search_cache")
310
+ conn.commit()
311
+
312
+ logger.info("Cleared all search cache")
313
+ return True
314
+
315
+ except Exception as e:
316
+ logger.error(f"Failed to clear search cache: {e}")
317
+ return False
318
+
319
+ def get_stats(self) -> Dict[str, Any]:
320
+ """Get cache statistics."""
321
+ try:
322
+ current_time = int(time.time())
323
+ with sqlite3.connect(self.db_path) as conn:
324
+ cursor = conn.cursor()
325
+
326
+ # Total entries
327
+ cursor.execute(
328
+ "SELECT COUNT(*) FROM search_cache WHERE expires_at > ?",
329
+ (current_time,),
330
+ )
331
+ total_entries = cursor.fetchone()[0]
332
+
333
+ # Total expired entries
334
+ cursor.execute(
335
+ "SELECT COUNT(*) FROM search_cache WHERE expires_at <= ?",
336
+ (current_time,),
337
+ )
338
+ expired_entries = cursor.fetchone()[0]
339
+
340
+ # Average access count
341
+ cursor.execute(
342
+ "SELECT AVG(access_count) FROM search_cache WHERE expires_at > ?",
343
+ (current_time,),
344
+ )
345
+ avg_access = cursor.fetchone()[0] or 0
346
+
347
+ return {
348
+ "total_valid_entries": total_entries,
349
+ "expired_entries": expired_entries,
350
+ "memory_cache_size": len(self._memory_cache),
351
+ "average_access_count": round(avg_access, 2),
352
+ "cache_hit_potential": (
353
+ f"{(total_entries / (total_entries + 1)) * 100:.1f}%"
354
+ if total_entries > 0
355
+ else "0%"
356
+ ),
357
+ }
358
+
359
+ except Exception as e:
360
+ logger.error(f"Failed to get cache stats: {e}")
361
+ return {"error": str(e)}
362
+
363
+
364
+ # Global cache instance
365
+ _global_cache = None
366
+
367
+
368
+ def get_search_cache() -> SearchCache:
369
+ """Get global search cache instance."""
370
+ global _global_cache
371
+ if _global_cache is None:
372
+ _global_cache = SearchCache()
373
+ return _global_cache
374
+
375
+
376
+ @lru_cache(maxsize=100)
377
+ def normalize_entity_query(entity: str, constraint: str) -> str:
378
+ """
379
+ Normalize entity + constraint combination for consistent caching.
380
+ Uses LRU cache for frequent normalizations.
381
+ """
382
+ # Remove quotes and normalize whitespace
383
+ entity_clean = " ".join(entity.strip().lower().split())
384
+ constraint_clean = " ".join(constraint.strip().lower().split())
385
+
386
+ # Create canonical form
387
+ return f"{entity_clean} {constraint_clean}"
@@ -47,7 +47,6 @@ def format_links_to_markdown(all_links: List[Dict]) -> str:
47
47
  logger.info(f"Formatting {len(all_links)} links to markdown...")
48
48
 
49
49
  if all_links:
50
-
51
50
  # Group links by URL and collect all their indices
52
51
  url_to_indices = {}
53
52
  for link in all_links:
@@ -57,7 +56,6 @@ def format_links_to_markdown(all_links: List[Dict]) -> str:
57
56
  index = link.get("index", "")
58
57
  # logger.info(f"URL \n {str(url)} ")
59
58
  if url:
60
-
61
59
  if url not in url_to_indices:
62
60
  url_to_indices[url] = []
63
61
  url_to_indices[url].append(index)
@@ -139,7 +137,9 @@ def format_findings(
139
137
  logger.info(f"Formatting {len(findings_list)} detailed finding items.")
140
138
 
141
139
  for idx, finding in enumerate(findings_list):
142
- logger.debug(f"Formatting finding item {idx}. Keys: {list(finding.keys())}")
140
+ logger.debug(
141
+ f"Formatting finding item {idx}. Keys: {list(finding.keys())}"
142
+ )
143
143
  # Use .get() for safety
144
144
  phase = finding.get("phase", "Unknown Phase")
145
145
  content = finding.get("content", "No content available.")
@@ -201,8 +201,14 @@ def format_findings(
201
201
  )
202
202
 
203
203
  # If the question is in the finding itself, display it
204
- if not question_displayed and "question" in finding and finding["question"]:
205
- formatted_text += f"### SEARCH QUESTION:\n{finding['question']}\n\n"
204
+ if (
205
+ not question_displayed
206
+ and "question" in finding
207
+ and finding["question"]
208
+ ):
209
+ formatted_text += (
210
+ f"### SEARCH QUESTION:\n{finding['question']}\n\n"
211
+ )
206
212
 
207
213
  # Content
208
214
  formatted_text += f"\n\n{content}\n\n"
@@ -213,7 +219,9 @@ def format_findings(
213
219
  links = extract_links_from_search_results(search_results)
214
220
  if links:
215
221
  formatted_text += "### SOURCES USED IN THIS SECTION:\n"
216
- formatted_text += format_links_to_markdown(links) + "\n\n"
222
+ formatted_text += (
223
+ format_links_to_markdown(links) + "\n\n"
224
+ )
217
225
  except Exception:
218
226
  logger.exception(
219
227
  f"Error processing search results/links for finding {idx}"
@@ -0,0 +1,92 @@
1
+ import threading
2
+ from functools import wraps
3
+ from typing import Any, Callable, Tuple
4
+ from loguru import logger
5
+
6
+ from cachetools import cached, keys
7
+ from flask import current_app, g
8
+ from flask.ctx import AppContext
9
+
10
+
11
+ def thread_specific_cache(*args: Any, **kwargs: Any) -> Callable:
12
+ """
13
+ A version of `cached()` that is local to a single thread. In other words,
14
+ cache entries will only be valid in the thread where they were created.
15
+
16
+ Args:
17
+ *args: Will be forwarded to `cached()`.
18
+ **kwargs: Will be forwarded to `cached()`.
19
+
20
+ Returns:
21
+ The wrapped function.
22
+
23
+ """
24
+
25
+ def _key_func(*args_: Any, **kwargs_: Any) -> Tuple[int, ...]:
26
+ base_hash = keys.hashkey(*args_, **kwargs_)
27
+ return (threading.get_ident(),) + base_hash
28
+
29
+ return cached(*args, **kwargs, key=_key_func)
30
+
31
+
32
+ def thread_with_app_context(to_wrap: Callable) -> Callable:
33
+ """
34
+ Decorator that wraps the entry point to a thread and injects the current
35
+ app context from Flask. This is useful when we want to use multiple
36
+ threads to handle a single request.
37
+
38
+ When using this wrapped function, `current_app.app_context()` should be
39
+ passed as the first argument when initializing the thread.
40
+
41
+ Args:
42
+ to_wrap: The function to wrap.
43
+
44
+ Returns:
45
+ The wrapped function.
46
+
47
+ """
48
+
49
+ @wraps(to_wrap)
50
+ def _run_with_context(
51
+ app_context: AppContext | None, *args: Any, **kwargs: Any
52
+ ) -> Any:
53
+ if app_context is None:
54
+ # Do nothing.
55
+ return to_wrap(*args, **kwargs)
56
+
57
+ with app_context:
58
+ return to_wrap(*args, **kwargs)
59
+
60
+ return _run_with_context
61
+
62
+
63
+ def thread_context() -> AppContext | None:
64
+ """
65
+ Pushes a new app context for a thread that is being spawned to handle the
66
+ current request. Will copy all the global data from the current context.
67
+
68
+ Returns:
69
+ The new context, or None if no context is active.
70
+
71
+ """
72
+ # Copy global data.
73
+ global_data = {}
74
+ try:
75
+ for key in g:
76
+ global_data[key] = g.get(key)
77
+ except TypeError:
78
+ # Context is not initialized. Don't change anything.
79
+ pass
80
+
81
+ try:
82
+ context = current_app.app_context()
83
+ except RuntimeError:
84
+ # Context is not initialized.
85
+ logger.debug("No current app context, not passing to thread.")
86
+ return None
87
+
88
+ with context:
89
+ for key, value in global_data.items():
90
+ setattr(g, key, value)
91
+
92
+ return context