local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -8,14 +8,96 @@ from loguru import logger
8
8
 
9
9
  from ...config.llm_config import get_llm
10
10
  from ...config.search_config import get_search
11
+ from ...metrics.search_tracker import set_search_context
11
12
  from ...report_generator import IntegratedReportGenerator
12
13
  from ...search_system import AdvancedSearchSystem
14
+ from ...utilities.log_utils import log_for_research
13
15
  from ...utilities.search_utilities import extract_links_from_search_results
14
- from ..models.database import add_log_to_db, calculate_duration, get_db_connection
15
- from .socket_service import emit_to_subscribers
16
+ from ...utilities.db_utils import get_db_session
17
+ from ...utilities.threading_utils import thread_context, thread_with_app_context
18
+ from ..database.models import ResearchStrategy
19
+ from ..models.database import calculate_duration, get_db_connection
20
+ from .socket_service import SocketIOService
16
21
 
17
22
  # Output directory for research results
18
- OUTPUT_DIR = Path("research_outputs")
23
+ _PROJECT_ROOT = Path(__file__).parents[4]
24
+ OUTPUT_DIR = _PROJECT_ROOT / "research_outputs"
25
+
26
+
27
+ def save_research_strategy(research_id, strategy_name):
28
+ """
29
+ Save the strategy used for a research to the database.
30
+
31
+ Args:
32
+ research_id: The ID of the research
33
+ strategy_name: The name of the strategy used
34
+ """
35
+ try:
36
+ logger.debug(
37
+ f"save_research_strategy called with research_id={research_id}, strategy_name={strategy_name}"
38
+ )
39
+ session = get_db_session()
40
+
41
+ try:
42
+ # Check if a strategy already exists for this research
43
+ existing_strategy = (
44
+ session.query(ResearchStrategy)
45
+ .filter_by(research_id=research_id)
46
+ .first()
47
+ )
48
+
49
+ if existing_strategy:
50
+ # Update existing strategy
51
+ existing_strategy.strategy_name = strategy_name
52
+ logger.debug(
53
+ f"Updating existing strategy for research {research_id}"
54
+ )
55
+ else:
56
+ # Create new strategy record
57
+ new_strategy = ResearchStrategy(
58
+ research_id=research_id, strategy_name=strategy_name
59
+ )
60
+ session.add(new_strategy)
61
+ logger.debug(
62
+ f"Creating new strategy record for research {research_id}"
63
+ )
64
+
65
+ session.commit()
66
+ logger.info(
67
+ f"Saved strategy '{strategy_name}' for research {research_id}"
68
+ )
69
+ finally:
70
+ session.close()
71
+ except Exception:
72
+ logger.exception("Error saving research strategy")
73
+
74
+
75
+ def get_research_strategy(research_id):
76
+ """
77
+ Get the strategy used for a research.
78
+
79
+ Args:
80
+ research_id: The ID of the research
81
+
82
+ Returns:
83
+ str: The strategy name or None if not found
84
+ """
85
+ try:
86
+ session = get_db_session()
87
+
88
+ try:
89
+ strategy = (
90
+ session.query(ResearchStrategy)
91
+ .filter_by(research_id=research_id)
92
+ .first()
93
+ )
94
+
95
+ return strategy.strategy_name if strategy else None
96
+ finally:
97
+ session.close()
98
+ except Exception:
99
+ logger.exception("Error getting research strategy")
100
+ return None
19
101
 
20
102
 
21
103
  def start_research_process(
@@ -42,10 +124,20 @@ def start_research_process(
42
124
  Returns:
43
125
  threading.Thread: The thread running the research
44
126
  """
127
+ # Pass the app context to the thread.
128
+ run_research_callback = thread_with_app_context(run_research_callback)
129
+
45
130
  # Start research process in a background thread
46
131
  thread = threading.Thread(
47
132
  target=run_research_callback,
48
- args=(research_id, query, mode, active_research, termination_flags),
133
+ args=(
134
+ thread_context(),
135
+ research_id,
136
+ query,
137
+ mode,
138
+ active_research,
139
+ termination_flags,
140
+ ),
49
141
  kwargs=kwargs,
50
142
  )
51
143
  thread.daemon = True
@@ -55,13 +147,7 @@ def start_research_process(
55
147
  "thread": thread,
56
148
  "progress": 0,
57
149
  "status": "in_progress",
58
- "log": [
59
- {
60
- "time": datetime.utcnow().isoformat(),
61
- "message": "Research started",
62
- "progress": 0,
63
- }
64
- ],
150
+ "log": [],
65
151
  "settings": kwargs, # Store settings for reference
66
152
  }
67
153
 
@@ -87,6 +173,7 @@ def _generate_report_path(query: str) -> Path:
87
173
  )
88
174
 
89
175
 
176
+ @log_for_research
90
177
  def run_research_process(
91
178
  research_id, query, mode, active_research, termination_flags, **kwargs
92
179
  ):
@@ -104,8 +191,12 @@ def run_research_process(
104
191
  try:
105
192
  # Check if this research has been terminated before we even start
106
193
  if research_id in termination_flags and termination_flags[research_id]:
107
- logger.info(f"Research {research_id} was terminated before starting")
108
- cleanup_research_resources(research_id, active_research, termination_flags)
194
+ logger.info(
195
+ f"Research {research_id} was terminated before starting"
196
+ )
197
+ cleanup_research_resources(
198
+ research_id, active_research, termination_flags
199
+ )
109
200
  return
110
201
 
111
202
  logger.info(
@@ -121,12 +212,22 @@ def run_research_process(
121
212
  time_period = kwargs.get("time_period")
122
213
  iterations = kwargs.get("iterations")
123
214
  questions_per_iteration = kwargs.get("questions_per_iteration")
215
+ strategy = kwargs.get(
216
+ "strategy", "source-based"
217
+ ) # Default to source-based
218
+
219
+ # Save the strategy to the database
220
+ logger.debug(
221
+ f"About to call save_research_strategy with research_id={research_id}, strategy={strategy}"
222
+ )
223
+ save_research_strategy(research_id, strategy)
224
+ logger.debug("save_research_strategy call completed")
124
225
 
125
226
  # Log all parameters for debugging
126
227
  logger.info(
127
228
  "Research parameters: provider=%s, model=%s, search_engine=%s, "
128
229
  "max_results=%s, time_period=%s, iterations=%s, "
129
- "questions_per_iteration=%s, custom_endpoint=%s",
230
+ "questions_per_iteration=%s, custom_endpoint=%s, strategy=%s",
130
231
  model_provider,
131
232
  model,
132
233
  search_engine,
@@ -135,41 +236,86 @@ def run_research_process(
135
236
  iterations,
136
237
  questions_per_iteration,
137
238
  custom_endpoint,
239
+ strategy,
138
240
  )
139
241
 
140
242
  # Set up the AI Context Manager
141
243
  output_dir = OUTPUT_DIR / f"research_{research_id}"
142
244
  output_dir.mkdir(parents=True, exist_ok=True)
143
245
 
246
+ # Create shared research context that can be updated during research
247
+ shared_research_context = {
248
+ "research_id": research_id,
249
+ "research_query": query,
250
+ "research_mode": mode,
251
+ "research_phase": "init",
252
+ "search_iteration": 0,
253
+ "search_engines_planned": None,
254
+ "search_engine_selected": search_engine,
255
+ }
256
+
257
+ # Set search context for search tracking
258
+ set_search_context(shared_research_context)
259
+
144
260
  # Set up progress callback
145
261
  def progress_callback(message, progress_percent, metadata):
146
262
  # Frequent termination check
147
- if research_id in termination_flags and termination_flags[research_id]:
148
- handle_termination(research_id, active_research, termination_flags)
263
+ if (
264
+ research_id in termination_flags
265
+ and termination_flags[research_id]
266
+ ):
267
+ handle_termination(
268
+ research_id, active_research, termination_flags
269
+ )
149
270
  raise Exception("Research was terminated by user")
271
+
272
+ logger.log("milestone", message)
273
+
150
274
  if "SEARCH_PLAN:" in message:
151
275
  engines = message.split("SEARCH_PLAN:")[1].strip()
152
276
  metadata["planned_engines"] = engines
153
277
  metadata["phase"] = "search_planning" # Use existing phase
278
+ # Update shared context for token tracking
279
+ shared_research_context["search_engines_planned"] = engines
280
+ shared_research_context["research_phase"] = "search_planning"
154
281
 
155
282
  if "ENGINE_SELECTED:" in message:
156
283
  engine = message.split("ENGINE_SELECTED:")[1].strip()
157
284
  metadata["selected_engine"] = engine
158
285
  metadata["phase"] = "search" # Use existing 'search' phase
286
+ # Update shared context for token tracking
287
+ shared_research_context["search_engine_selected"] = engine
288
+ shared_research_context["research_phase"] = "search"
289
+
290
+ # Capture other research phases for better context tracking
291
+ if metadata.get("phase"):
292
+ shared_research_context["research_phase"] = metadata["phase"]
159
293
 
160
- timestamp = datetime.utcnow().isoformat()
294
+ # Update search iteration if available
295
+ if "iteration" in metadata:
296
+ shared_research_context["search_iteration"] = metadata[
297
+ "iteration"
298
+ ]
161
299
 
162
300
  # Adjust progress based on research mode
163
301
  adjusted_progress = progress_percent
164
- if mode == "detailed" and metadata.get("phase") == "output_generation":
302
+ if (
303
+ mode == "detailed"
304
+ and metadata.get("phase") == "output_generation"
305
+ ):
165
306
  # For detailed mode, adjust the progress range for output generation
166
307
  adjusted_progress = min(80, progress_percent)
167
- elif mode == "detailed" and metadata.get("phase") == "report_generation":
308
+ elif (
309
+ mode == "detailed"
310
+ and metadata.get("phase") == "report_generation"
311
+ ):
168
312
  # Scale the progress from 80% to 95% for the report generation phase
169
313
  if progress_percent is not None:
170
314
  normalized = progress_percent / 100
171
315
  adjusted_progress = 80 + (normalized * 15)
172
- elif mode == "quick" and metadata.get("phase") == "output_generation":
316
+ elif (
317
+ mode == "quick" and metadata.get("phase") == "output_generation"
318
+ ):
173
319
  # For quick mode, ensure we're at least at 85% during output generation
174
320
  adjusted_progress = max(85, progress_percent)
175
321
  # Map any further progress within output_generation to 85-95% range
@@ -179,40 +325,16 @@ def run_research_process(
179
325
 
180
326
  # Don't let progress go backwards
181
327
  if research_id in active_research and adjusted_progress is not None:
182
- current_progress = active_research[research_id].get("progress", 0)
328
+ current_progress = active_research[research_id].get(
329
+ "progress", 0
330
+ )
183
331
  adjusted_progress = max(current_progress, adjusted_progress)
184
332
 
185
- log_entry = {
186
- "time": timestamp,
187
- "message": message,
188
- "progress": adjusted_progress,
189
- "metadata": metadata,
190
- }
191
-
192
333
  # Update active research record
193
334
  if research_id in active_research:
194
- active_research[research_id]["log"].append(log_entry)
195
335
  if adjusted_progress is not None:
196
336
  active_research[research_id]["progress"] = adjusted_progress
197
337
 
198
- # Determine log type for database storage
199
- log_type = "info"
200
- if metadata and metadata.get("phase"):
201
- phase = metadata.get("phase")
202
- if phase in ["complete", "iteration_complete"]:
203
- log_type = "milestone"
204
- elif phase == "error" or "error" in message.lower():
205
- log_type = "error"
206
-
207
- # Save logs to the database
208
- add_log_to_db(
209
- research_id,
210
- message,
211
- log_type=log_type,
212
- progress=adjusted_progress,
213
- metadata=metadata,
214
- )
215
-
216
338
  # Update progress in the research_history table (for backward compatibility)
217
339
  conn = get_db_connection()
218
340
  cursor = conn.cursor()
@@ -237,7 +359,6 @@ def run_research_process(
237
359
  except Exception:
238
360
  current_log = []
239
361
 
240
- current_log.append(log_entry)
241
362
  cursor.execute(
242
363
  "UPDATE research_history SET progress_log = ? WHERE id = ?",
243
364
  (json.dumps(current_log), research_id),
@@ -249,20 +370,23 @@ def run_research_process(
249
370
  # Emit a socket event
250
371
  try:
251
372
  # Basic event data
252
- event_data = {"message": message, "progress": adjusted_progress}
253
-
254
- # Add log entry in full format for detailed logging on client
255
- if metadata:
256
- event_data["log_entry"] = log_entry
373
+ event_data = {"progress": adjusted_progress}
257
374
 
258
- emit_to_subscribers("research_progress", research_id, event_data)
375
+ SocketIOService().emit_to_subscribers(
376
+ "progress", research_id, event_data
377
+ )
259
378
  except Exception:
260
379
  logger.exception("Socket emit error (non-critical)")
261
380
 
262
381
  # Function to check termination during long-running operations
263
382
  def check_termination():
264
- if research_id in termination_flags and termination_flags[research_id]:
265
- handle_termination(research_id, active_research, termination_flags)
383
+ if (
384
+ research_id in termination_flags
385
+ and termination_flags[research_id]
386
+ ):
387
+ handle_termination(
388
+ research_id, active_research, termination_flags
389
+ )
266
390
  raise Exception(
267
391
  "Research was terminated by user during long-running operation"
268
392
  )
@@ -279,12 +403,22 @@ def run_research_process(
279
403
  # Override LLM if model or model_provider specified
280
404
  if model or model_provider:
281
405
  try:
406
+ # Phase 1 Enhancement: Build research context for token tracking
407
+ research_context = {
408
+ "research_query": query,
409
+ "research_mode": mode,
410
+ "research_phase": "init",
411
+ "search_iteration": 0,
412
+ }
413
+
282
414
  # Get LLM with the overridden settings
283
415
  # Explicitly create the model with parameters to avoid fallback issues
284
416
  use_llm = get_llm(
285
417
  model_name=model,
286
418
  provider=model_provider,
287
419
  openai_endpoint_url=custom_endpoint,
420
+ research_id=research_id,
421
+ research_context=research_context,
288
422
  )
289
423
 
290
424
  logger.info(
@@ -300,7 +434,7 @@ def run_research_process(
300
434
  )
301
435
 
302
436
  # Set the progress callback in the system
303
- system = AdvancedSearchSystem(llm=use_llm)
437
+ system = AdvancedSearchSystem(llm=use_llm, strategy_name=strategy)
304
438
  system.set_progress_callback(progress_callback)
305
439
 
306
440
  # Override search engine if specified
@@ -309,16 +443,22 @@ def run_research_process(
309
443
  if iterations:
310
444
  system.max_iterations = int(iterations)
311
445
  if questions_per_iteration:
312
- system.questions_per_iteration = int(questions_per_iteration)
446
+ system.questions_per_iteration = int(
447
+ questions_per_iteration
448
+ )
313
449
 
314
450
  # Create a new search object with these settings
315
451
  system.search = get_search(
316
452
  search_tool=search_engine, llm_instance=system.model
317
453
  )
318
454
 
319
- logger.info("Successfully set search engine to: %s", search_engine)
455
+ logger.info(
456
+ "Successfully set search engine to: %s", search_engine
457
+ )
320
458
  except Exception:
321
- logger.exception("Error setting search engine to %s", search_engine)
459
+ logger.exception(
460
+ "Error setting search engine to %s", search_engine
461
+ )
322
462
 
323
463
  # Run the search
324
464
  progress_callback("Starting research process", 5, {"phase": "init"})
@@ -385,21 +525,32 @@ def run_research_process(
385
525
  ):
386
526
  error_type = "token_limit"
387
527
  # Log specific error type
388
- logger.warning("Detected token limit error in synthesis")
528
+ logger.warning(
529
+ "Detected token limit error in synthesis"
530
+ )
389
531
 
390
532
  # Update progress with specific error type
391
533
  progress_callback(
392
534
  "Synthesis hit token limits. Attempting fallback...",
393
535
  87,
394
- {"phase": "synthesis_error", "error_type": error_type},
536
+ {
537
+ "phase": "synthesis_error",
538
+ "error_type": error_type,
539
+ },
395
540
  )
396
- elif "timeout" in error_message or "timed out" in error_message:
541
+ elif (
542
+ "timeout" in error_message
543
+ or "timed out" in error_message
544
+ ):
397
545
  error_type = "timeout"
398
546
  logger.warning("Detected timeout error in synthesis")
399
547
  progress_callback(
400
548
  "Synthesis timed out. Attempting fallback...",
401
549
  87,
402
- {"phase": "synthesis_error", "error_type": error_type},
550
+ {
551
+ "phase": "synthesis_error",
552
+ "error_type": error_type,
553
+ },
403
554
  )
404
555
  elif "rate limit" in error_message:
405
556
  error_type = "rate_limit"
@@ -407,26 +558,40 @@ def run_research_process(
407
558
  progress_callback(
408
559
  "LLM rate limit reached. Attempting fallback...",
409
560
  87,
410
- {"phase": "synthesis_error", "error_type": error_type},
561
+ {
562
+ "phase": "synthesis_error",
563
+ "error_type": error_type,
564
+ },
411
565
  )
412
- elif "connection" in error_message or "network" in error_message:
566
+ elif (
567
+ "connection" in error_message
568
+ or "network" in error_message
569
+ ):
413
570
  error_type = "connection"
414
571
  logger.warning("Detected connection error in synthesis")
415
572
  progress_callback(
416
573
  "Connection issue with LLM. Attempting fallback...",
417
574
  87,
418
- {"phase": "synthesis_error", "error_type": error_type},
575
+ {
576
+ "phase": "synthesis_error",
577
+ "error_type": error_type,
578
+ },
419
579
  )
420
580
  elif (
421
581
  "llm error" in error_message
422
582
  or "final answer synthesis fail" in error_message
423
583
  ):
424
584
  error_type = "llm_error"
425
- logger.warning("Detected general LLM error in synthesis")
585
+ logger.warning(
586
+ "Detected general LLM error in synthesis"
587
+ )
426
588
  progress_callback(
427
589
  "LLM error during synthesis. Attempting fallback...",
428
590
  87,
429
- {"phase": "synthesis_error", "error_type": error_type},
591
+ {
592
+ "phase": "synthesis_error",
593
+ "error_type": error_type,
594
+ },
430
595
  )
431
596
  else:
432
597
  # Generic error
@@ -434,7 +599,10 @@ def run_research_process(
434
599
  progress_callback(
435
600
  "Error during synthesis. Attempting fallback...",
436
601
  87,
437
- {"phase": "synthesis_error", "error_type": "unknown"},
602
+ {
603
+ "phase": "synthesis_error",
604
+ "error_type": "unknown",
605
+ },
438
606
  )
439
607
 
440
608
  # Extract synthesized content from findings if available
@@ -445,11 +613,13 @@ def run_research_process(
445
613
  break
446
614
 
447
615
  # Use synthesized content as fallback
448
- if synthesized_content and not synthesized_content.startswith(
449
- "Error:"
616
+ if (
617
+ synthesized_content
618
+ and not synthesized_content.startswith("Error:")
450
619
  ):
451
-
452
- logger.info("Using existing synthesized content as fallback")
620
+ logger.info(
621
+ "Using existing synthesized content as fallback"
622
+ )
453
623
  raw_formatted_findings = synthesized_content
454
624
 
455
625
  # Or use current_knowledge as another fallback
@@ -465,17 +635,19 @@ def run_research_process(
465
635
  f"## {finding.get('phase', 'Finding')}\n\n{finding.get('content', '')}"
466
636
  for finding in results.get("findings", [])
467
637
  if finding.get("content")
468
- and not finding.get("content", "").startswith("Error:")
638
+ and not finding.get("content", "").startswith(
639
+ "Error:"
640
+ )
469
641
  ]
470
642
 
471
643
  if valid_findings:
472
644
  raw_formatted_findings = (
473
645
  "# Research Results (Fallback Mode)\n\n"
474
646
  )
475
- raw_formatted_findings += "\n\n".join(valid_findings)
476
- raw_formatted_findings += (
477
- f"\n\n## Error Information\n{raw_formatted_findings}"
647
+ raw_formatted_findings += "\n\n".join(
648
+ valid_findings
478
649
  )
650
+ raw_formatted_findings += f"\n\n## Error Information\n{raw_formatted_findings}"
479
651
  else:
480
652
  # Last resort: use everything including errors
481
653
  raw_formatted_findings = (
@@ -491,7 +663,10 @@ def run_research_process(
491
663
  progress_callback(
492
664
  f"Using fallback synthesis due to {error_type} error",
493
665
  88,
494
- {"phase": "synthesis_fallback", "error_type": error_type},
666
+ {
667
+ "phase": "synthesis_fallback",
668
+ "error_type": error_type,
669
+ },
495
670
  )
496
671
 
497
672
  logger.info(
@@ -531,8 +706,9 @@ def run_research_process(
531
706
  )
532
707
 
533
708
  # Save as markdown file
534
- OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
535
709
  report_path = _generate_report_path(query)
710
+ output_dir = report_path.parent
711
+ output_dir.mkdir(parents=True, exist_ok=True)
536
712
 
537
713
  # Send progress update for writing to file
538
714
  progress_callback(
@@ -547,8 +723,12 @@ def run_research_process(
547
723
  f.write(f"Query: {query}\n\n")
548
724
  f.write(clean_markdown)
549
725
  f.write("\n\n## Research Metrics\n")
550
- f.write(f"- Search Iterations: {results['iterations']}\n")
551
- f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
726
+ f.write(
727
+ f"- Search Iterations: {results['iterations']}\n"
728
+ )
729
+ f.write(
730
+ f"- Generated at: {datetime.utcnow().isoformat()}\n"
731
+ )
552
732
 
553
733
  # Update database
554
734
  metadata = {
@@ -560,7 +740,9 @@ def run_research_process(
560
740
  now = datetime.utcnow()
561
741
  completed_at = now.isoformat()
562
742
 
563
- logger.info("Updating database for research_id: %s", research_id)
743
+ logger.info(
744
+ "Updating database for research_id: %s", research_id
745
+ )
564
746
  # Get the start time from the database
565
747
  conn = get_db_connection()
566
748
  cursor = conn.cursor()
@@ -605,11 +787,15 @@ def run_research_process(
605
787
  cleanup_research_resources(
606
788
  research_id, active_research, termination_flags
607
789
  )
608
- logger.info("Resources cleaned up for research_id: %s", research_id)
790
+ logger.info(
791
+ "Resources cleaned up for research_id: %s", research_id
792
+ )
609
793
 
610
794
  except Exception as inner_e:
611
795
  logger.exception("Error during quick summary generation")
612
- raise Exception(f"Error generating quick summary: {str(inner_e)}")
796
+ raise Exception(
797
+ f"Error generating quick summary: {str(inner_e)}"
798
+ )
613
799
  else:
614
800
  raise Exception(
615
801
  "No research findings were generated. Please try again."
@@ -617,14 +803,18 @@ def run_research_process(
617
803
  else:
618
804
  # Full Report
619
805
  progress_callback(
620
- "Generating detailed report...", 85, {"phase": "report_generation"}
806
+ "Generating detailed report...",
807
+ 85,
808
+ {"phase": "report_generation"},
621
809
  )
622
810
 
623
811
  # Extract the search system from the results if available
624
812
  search_system = results.get("search_system", None)
625
813
 
626
814
  # Pass the existing search system to maintain citation indices
627
- report_generator = IntegratedReportGenerator(search_system=search_system)
815
+ report_generator = IntegratedReportGenerator(
816
+ search_system=search_system
817
+ )
628
818
  final_report = report_generator.generate_report(results, query)
629
819
 
630
820
  progress_callback(
@@ -632,8 +822,9 @@ def run_research_process(
632
822
  )
633
823
 
634
824
  # Save as markdown file
635
- OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
636
825
  report_path = _generate_report_path(query)
826
+ output_dir = report_path.parent
827
+ output_dir.mkdir(parents=True, exist_ok=True)
637
828
 
638
829
  with report_path.open("w", encoding="utf-8") as f:
639
830
  f.write(final_report["content"])
@@ -650,7 +841,8 @@ def run_research_process(
650
841
  conn = get_db_connection()
651
842
  cursor = conn.cursor()
652
843
  cursor.execute(
653
- "SELECT created_at FROM research_history WHERE id = ?", (research_id,)
844
+ "SELECT created_at FROM research_history WHERE id = ?",
845
+ (research_id,),
654
846
  )
655
847
  result = cursor.fetchone()
656
848
 
@@ -678,7 +870,9 @@ def run_research_process(
678
870
  )
679
871
 
680
872
  # Clean up resources
681
- cleanup_research_resources(research_id, active_research, termination_flags)
873
+ cleanup_research_resources(
874
+ research_id, active_research, termination_flags
875
+ )
682
876
 
683
877
  except Exception as e:
684
878
  # Handle error
@@ -696,9 +890,7 @@ def run_research_process(
696
890
  "solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."
697
891
  }
698
892
  elif "Error type: model_not_found" in user_friendly_error:
699
- user_friendly_error = (
700
- "Required Ollama model not found. Please pull the model first."
701
- )
893
+ user_friendly_error = "Required Ollama model not found. Please pull the model first."
702
894
  error_context = {
703
895
  "solution": "Run 'ollama pull mistral' to download the required model."
704
896
  }
@@ -709,7 +901,9 @@ def run_research_process(
709
901
  }
710
902
  elif "Error type: api_error" in user_friendly_error:
711
903
  # Keep the original error message as it's already improved
712
- error_context = {"solution": "Check API configuration and credentials."}
904
+ error_context = {
905
+ "solution": "Check API configuration and credentials."
906
+ }
713
907
 
714
908
  # Update metadata with more context about the error
715
909
  metadata = {"phase": "error", "error": user_friendly_error}
@@ -726,7 +920,10 @@ def run_research_process(
726
920
  # If termination was requested, mark as suspended instead of failed
727
921
  status = (
728
922
  "suspended"
729
- if (research_id in termination_flags and termination_flags[research_id])
923
+ if (
924
+ research_id in termination_flags
925
+ and termination_flags[research_id]
926
+ )
730
927
  else "failed"
731
928
  )
732
929
  message = (
@@ -742,7 +939,8 @@ def run_research_process(
742
939
  # Get the start time from the database
743
940
  duration_seconds = None
744
941
  cursor.execute(
745
- "SELECT created_at FROM research_history WHERE id = ?", (research_id,)
942
+ "SELECT created_at FROM research_history WHERE id = ?",
943
+ (research_id,),
746
944
  )
747
945
  result = cursor.fetchone()
748
946
 
@@ -764,7 +962,7 @@ def run_research_process(
764
962
  conn.close()
765
963
 
766
964
  try:
767
- emit_to_subscribers(
965
+ SocketIOService().emit_to_subscribers(
768
966
  "research_progress",
769
967
  research_id,
770
968
  {"status": status, "error": message},
@@ -776,7 +974,9 @@ def run_research_process(
776
974
  logger.exception("Error in error handler")
777
975
 
778
976
  # Clean up resources
779
- cleanup_research_resources(research_id, active_research, termination_flags)
977
+ cleanup_research_resources(
978
+ research_id, active_research, termination_flags
979
+ )
780
980
 
781
981
 
782
982
  def cleanup_research_resources(research_id, active_research, termination_flags):
@@ -816,13 +1016,16 @@ def cleanup_research_resources(research_id, active_research, termination_flags):
816
1016
  # Send a final message to subscribers
817
1017
  try:
818
1018
  # Import here to avoid circular imports
819
- from ..routes.research_routes import get_globals
1019
+ from ..routes.globals import get_globals
820
1020
 
821
1021
  globals_dict = get_globals()
822
1022
  socket_subscriptions = globals_dict.get("socket_subscriptions", {})
823
1023
 
824
1024
  # Send a final message to any remaining subscribers with explicit status
825
- if research_id in socket_subscriptions and socket_subscriptions[research_id]:
1025
+ if (
1026
+ research_id in socket_subscriptions
1027
+ and socket_subscriptions[research_id]
1028
+ ):
826
1029
  # Use the proper status message based on database status
827
1030
  if current_status == "suspended" or current_status == "failed":
828
1031
  final_message = {
@@ -843,7 +1046,9 @@ def cleanup_research_resources(research_id, active_research, termination_flags):
843
1046
  research_id,
844
1047
  )
845
1048
 
846
- emit_to_subscribers("research_progress", research_id, final_message)
1049
+ SocketIOService().emit_to_subscribers(
1050
+ "research_progress", research_id, final_message
1051
+ )
847
1052
 
848
1053
  except Exception:
849
1054
  logger.error("Error sending final cleanup message")
@@ -874,7 +1079,9 @@ def handle_termination(research_id, active_research, termination_flags):
874
1079
  result = cursor.fetchone()
875
1080
 
876
1081
  # Calculate the duration
877
- duration_seconds = calculate_duration(result[0]) if result and result[0] else None
1082
+ duration_seconds = (
1083
+ calculate_duration(result[0]) if result and result[0] else None
1084
+ )
878
1085
 
879
1086
  # Update the database with suspended status
880
1087
  cursor.execute(
@@ -899,7 +1106,7 @@ def cancel_research(research_id):
899
1106
  bool: True if the research was found and cancelled, False otherwise
900
1107
  """
901
1108
  # Import globals from research routes
902
- from ..routes.research_routes import get_globals
1109
+ from ..routes.globals import get_globals
903
1110
 
904
1111
  globals_dict = get_globals()
905
1112
  active_research = globals_dict["active_research"]