local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,4 @@
1
1
  import hashlib
2
- import json
3
2
  import threading
4
3
  from datetime import datetime
5
4
  from pathlib import Path
@@ -8,14 +7,96 @@ from loguru import logger
8
7
 
9
8
  from ...config.llm_config import get_llm
10
9
  from ...config.search_config import get_search
10
+ from ...metrics.search_tracker import set_search_context
11
11
  from ...report_generator import IntegratedReportGenerator
12
12
  from ...search_system import AdvancedSearchSystem
13
+ from ...utilities.log_utils import log_for_research
13
14
  from ...utilities.search_utilities import extract_links_from_search_results
14
- from ..models.database import add_log_to_db, calculate_duration, get_db_connection
15
- from .socket_service import emit_to_subscribers
15
+ from ...utilities.db_utils import get_db_session
16
+ from ...utilities.threading_utils import thread_context, thread_with_app_context
17
+ from ..database.models import ResearchStrategy, ResearchHistory
18
+ from ..models.database import calculate_duration
19
+ from .socket_service import SocketIOService
16
20
 
17
21
  # Output directory for research results
18
- OUTPUT_DIR = Path("research_outputs")
22
+ _PROJECT_ROOT = Path(__file__).parents[4]
23
+ OUTPUT_DIR = _PROJECT_ROOT / "research_outputs"
24
+
25
+
26
+ def save_research_strategy(research_id, strategy_name):
27
+ """
28
+ Save the strategy used for a research to the database.
29
+
30
+ Args:
31
+ research_id: The ID of the research
32
+ strategy_name: The name of the strategy used
33
+ """
34
+ try:
35
+ logger.debug(
36
+ f"save_research_strategy called with research_id={research_id}, strategy_name={strategy_name}"
37
+ )
38
+ session = get_db_session()
39
+
40
+ try:
41
+ # Check if a strategy already exists for this research
42
+ existing_strategy = (
43
+ session.query(ResearchStrategy)
44
+ .filter_by(research_id=research_id)
45
+ .first()
46
+ )
47
+
48
+ if existing_strategy:
49
+ # Update existing strategy
50
+ existing_strategy.strategy_name = strategy_name
51
+ logger.debug(
52
+ f"Updating existing strategy for research {research_id}"
53
+ )
54
+ else:
55
+ # Create new strategy record
56
+ new_strategy = ResearchStrategy(
57
+ research_id=research_id, strategy_name=strategy_name
58
+ )
59
+ session.add(new_strategy)
60
+ logger.debug(
61
+ f"Creating new strategy record for research {research_id}"
62
+ )
63
+
64
+ session.commit()
65
+ logger.info(
66
+ f"Saved strategy '{strategy_name}' for research {research_id}"
67
+ )
68
+ finally:
69
+ session.close()
70
+ except Exception:
71
+ logger.exception("Error saving research strategy")
72
+
73
+
74
+ def get_research_strategy(research_id):
75
+ """
76
+ Get the strategy used for a research.
77
+
78
+ Args:
79
+ research_id: The ID of the research
80
+
81
+ Returns:
82
+ str: The strategy name or None if not found
83
+ """
84
+ try:
85
+ session = get_db_session()
86
+
87
+ try:
88
+ strategy = (
89
+ session.query(ResearchStrategy)
90
+ .filter_by(research_id=research_id)
91
+ .first()
92
+ )
93
+
94
+ return strategy.strategy_name if strategy else None
95
+ finally:
96
+ session.close()
97
+ except Exception:
98
+ logger.exception("Error getting research strategy")
99
+ return None
19
100
 
20
101
 
21
102
  def start_research_process(
@@ -42,10 +123,20 @@ def start_research_process(
42
123
  Returns:
43
124
  threading.Thread: The thread running the research
44
125
  """
126
+ # Pass the app context to the thread.
127
+ run_research_callback = thread_with_app_context(run_research_callback)
128
+
45
129
  # Start research process in a background thread
46
130
  thread = threading.Thread(
47
131
  target=run_research_callback,
48
- args=(research_id, query, mode, active_research, termination_flags),
132
+ args=(
133
+ thread_context(),
134
+ research_id,
135
+ query,
136
+ mode,
137
+ active_research,
138
+ termination_flags,
139
+ ),
49
140
  kwargs=kwargs,
50
141
  )
51
142
  thread.daemon = True
@@ -55,13 +146,7 @@ def start_research_process(
55
146
  "thread": thread,
56
147
  "progress": 0,
57
148
  "status": "in_progress",
58
- "log": [
59
- {
60
- "time": datetime.utcnow().isoformat(),
61
- "message": "Research started",
62
- "progress": 0,
63
- }
64
- ],
149
+ "log": [],
65
150
  "settings": kwargs, # Store settings for reference
66
151
  }
67
152
 
@@ -87,6 +172,7 @@ def _generate_report_path(query: str) -> Path:
87
172
  )
88
173
 
89
174
 
175
+ @log_for_research
90
176
  def run_research_process(
91
177
  research_id, query, mode, active_research, termination_flags, **kwargs
92
178
  ):
@@ -104,8 +190,12 @@ def run_research_process(
104
190
  try:
105
191
  # Check if this research has been terminated before we even start
106
192
  if research_id in termination_flags and termination_flags[research_id]:
107
- logger.info(f"Research {research_id} was terminated before starting")
108
- cleanup_research_resources(research_id, active_research, termination_flags)
193
+ logger.info(
194
+ f"Research {research_id} was terminated before starting"
195
+ )
196
+ cleanup_research_resources(
197
+ research_id, active_research, termination_flags
198
+ )
109
199
  return
110
200
 
111
201
  logger.info(
@@ -121,12 +211,22 @@ def run_research_process(
121
211
  time_period = kwargs.get("time_period")
122
212
  iterations = kwargs.get("iterations")
123
213
  questions_per_iteration = kwargs.get("questions_per_iteration")
214
+ strategy = kwargs.get(
215
+ "strategy", "source-based"
216
+ ) # Default to source-based
217
+
218
+ # Save the strategy to the database
219
+ logger.debug(
220
+ f"About to call save_research_strategy with research_id={research_id}, strategy={strategy}"
221
+ )
222
+ save_research_strategy(research_id, strategy)
223
+ logger.debug("save_research_strategy call completed")
124
224
 
125
225
  # Log all parameters for debugging
126
226
  logger.info(
127
227
  "Research parameters: provider=%s, model=%s, search_engine=%s, "
128
228
  "max_results=%s, time_period=%s, iterations=%s, "
129
- "questions_per_iteration=%s, custom_endpoint=%s",
229
+ "questions_per_iteration=%s, custom_endpoint=%s, strategy=%s",
130
230
  model_provider,
131
231
  model,
132
232
  search_engine,
@@ -135,41 +235,86 @@ def run_research_process(
135
235
  iterations,
136
236
  questions_per_iteration,
137
237
  custom_endpoint,
238
+ strategy,
138
239
  )
139
240
 
140
241
  # Set up the AI Context Manager
141
242
  output_dir = OUTPUT_DIR / f"research_{research_id}"
142
243
  output_dir.mkdir(parents=True, exist_ok=True)
143
244
 
245
+ # Create shared research context that can be updated during research
246
+ shared_research_context = {
247
+ "research_id": research_id,
248
+ "research_query": query,
249
+ "research_mode": mode,
250
+ "research_phase": "init",
251
+ "search_iteration": 0,
252
+ "search_engines_planned": None,
253
+ "search_engine_selected": search_engine,
254
+ }
255
+
256
+ # Set search context for search tracking
257
+ set_search_context(shared_research_context)
258
+
144
259
  # Set up progress callback
145
260
  def progress_callback(message, progress_percent, metadata):
146
261
  # Frequent termination check
147
- if research_id in termination_flags and termination_flags[research_id]:
148
- handle_termination(research_id, active_research, termination_flags)
262
+ if (
263
+ research_id in termination_flags
264
+ and termination_flags[research_id]
265
+ ):
266
+ handle_termination(
267
+ research_id, active_research, termination_flags
268
+ )
149
269
  raise Exception("Research was terminated by user")
270
+
271
+ logger.log("milestone", message)
272
+
150
273
  if "SEARCH_PLAN:" in message:
151
274
  engines = message.split("SEARCH_PLAN:")[1].strip()
152
275
  metadata["planned_engines"] = engines
153
276
  metadata["phase"] = "search_planning" # Use existing phase
277
+ # Update shared context for token tracking
278
+ shared_research_context["search_engines_planned"] = engines
279
+ shared_research_context["research_phase"] = "search_planning"
154
280
 
155
281
  if "ENGINE_SELECTED:" in message:
156
282
  engine = message.split("ENGINE_SELECTED:")[1].strip()
157
283
  metadata["selected_engine"] = engine
158
284
  metadata["phase"] = "search" # Use existing 'search' phase
285
+ # Update shared context for token tracking
286
+ shared_research_context["search_engine_selected"] = engine
287
+ shared_research_context["research_phase"] = "search"
159
288
 
160
- timestamp = datetime.utcnow().isoformat()
289
+ # Capture other research phases for better context tracking
290
+ if metadata.get("phase"):
291
+ shared_research_context["research_phase"] = metadata["phase"]
292
+
293
+ # Update search iteration if available
294
+ if "iteration" in metadata:
295
+ shared_research_context["search_iteration"] = metadata[
296
+ "iteration"
297
+ ]
161
298
 
162
299
  # Adjust progress based on research mode
163
300
  adjusted_progress = progress_percent
164
- if mode == "detailed" and metadata.get("phase") == "output_generation":
301
+ if (
302
+ mode == "detailed"
303
+ and metadata.get("phase") == "output_generation"
304
+ ):
165
305
  # For detailed mode, adjust the progress range for output generation
166
306
  adjusted_progress = min(80, progress_percent)
167
- elif mode == "detailed" and metadata.get("phase") == "report_generation":
307
+ elif (
308
+ mode == "detailed"
309
+ and metadata.get("phase") == "report_generation"
310
+ ):
168
311
  # Scale the progress from 80% to 95% for the report generation phase
169
312
  if progress_percent is not None:
170
313
  normalized = progress_percent / 100
171
314
  adjusted_progress = 80 + (normalized * 15)
172
- elif mode == "quick" and metadata.get("phase") == "output_generation":
315
+ elif (
316
+ mode == "quick" and metadata.get("phase") == "output_generation"
317
+ ):
173
318
  # For quick mode, ensure we're at least at 85% during output generation
174
319
  adjusted_progress = max(85, progress_percent)
175
320
  # Map any further progress within output_generation to 85-95% range
@@ -179,90 +324,51 @@ def run_research_process(
179
324
 
180
325
  # Don't let progress go backwards
181
326
  if research_id in active_research and adjusted_progress is not None:
182
- current_progress = active_research[research_id].get("progress", 0)
327
+ current_progress = active_research[research_id].get(
328
+ "progress", 0
329
+ )
183
330
  adjusted_progress = max(current_progress, adjusted_progress)
184
331
 
185
- log_entry = {
186
- "time": timestamp,
187
- "message": message,
188
- "progress": adjusted_progress,
189
- "metadata": metadata,
190
- }
191
-
192
332
  # Update active research record
193
333
  if research_id in active_research:
194
- active_research[research_id]["log"].append(log_entry)
195
334
  if adjusted_progress is not None:
196
335
  active_research[research_id]["progress"] = adjusted_progress
197
336
 
198
- # Determine log type for database storage
199
- log_type = "info"
200
- if metadata and metadata.get("phase"):
201
- phase = metadata.get("phase")
202
- if phase in ["complete", "iteration_complete"]:
203
- log_type = "milestone"
204
- elif phase == "error" or "error" in message.lower():
205
- log_type = "error"
206
-
207
- # Save logs to the database
208
- add_log_to_db(
209
- research_id,
210
- message,
211
- log_type=log_type,
212
- progress=adjusted_progress,
213
- metadata=metadata,
214
- )
215
-
216
337
  # Update progress in the research_history table (for backward compatibility)
217
- conn = get_db_connection()
218
- cursor = conn.cursor()
338
+ db_session = get_db_session()
219
339
 
220
340
  # Update the progress and log separately to avoid race conditions
221
- if adjusted_progress is not None:
222
- cursor.execute(
223
- "UPDATE research_history SET progress = ? WHERE id = ?",
224
- (adjusted_progress, research_id),
225
- )
226
-
227
- # Add the log entry to the progress_log
228
- cursor.execute(
229
- "SELECT progress_log FROM research_history WHERE id = ?",
230
- (research_id,),
231
- )
232
- log_result = cursor.fetchone()
233
-
234
- if log_result:
235
- try:
236
- current_log = json.loads(log_result[0])
237
- except Exception:
238
- current_log = []
239
-
240
- current_log.append(log_entry)
241
- cursor.execute(
242
- "UPDATE research_history SET progress_log = ? WHERE id = ?",
243
- (json.dumps(current_log), research_id),
244
- )
245
-
246
- conn.commit()
247
- conn.close()
341
+ with db_session:
342
+ if adjusted_progress is not None:
343
+ research = (
344
+ db_session.query(ResearchHistory)
345
+ .filter(ResearchHistory.id == research_id)
346
+ .first()
347
+ )
348
+ if research:
349
+ research.progress = adjusted_progress
350
+ db_session.commit()
248
351
 
249
352
  # Emit a socket event
250
353
  try:
251
354
  # Basic event data
252
- event_data = {"message": message, "progress": adjusted_progress}
253
-
254
- # Add log entry in full format for detailed logging on client
255
- if metadata:
256
- event_data["log_entry"] = log_entry
355
+ event_data = {"progress": adjusted_progress}
257
356
 
258
- emit_to_subscribers("research_progress", research_id, event_data)
357
+ SocketIOService().emit_to_subscribers(
358
+ "progress", research_id, event_data
359
+ )
259
360
  except Exception:
260
361
  logger.exception("Socket emit error (non-critical)")
261
362
 
262
363
  # Function to check termination during long-running operations
263
364
  def check_termination():
264
- if research_id in termination_flags and termination_flags[research_id]:
265
- handle_termination(research_id, active_research, termination_flags)
365
+ if (
366
+ research_id in termination_flags
367
+ and termination_flags[research_id]
368
+ ):
369
+ handle_termination(
370
+ research_id, active_research, termination_flags
371
+ )
266
372
  raise Exception(
267
373
  "Research was terminated by user during long-running operation"
268
374
  )
@@ -279,12 +385,22 @@ def run_research_process(
279
385
  # Override LLM if model or model_provider specified
280
386
  if model or model_provider:
281
387
  try:
388
+ # Phase 1 Enhancement: Build research context for token tracking
389
+ research_context = {
390
+ "research_query": query,
391
+ "research_mode": mode,
392
+ "research_phase": "init",
393
+ "search_iteration": 0,
394
+ }
395
+
282
396
  # Get LLM with the overridden settings
283
397
  # Explicitly create the model with parameters to avoid fallback issues
284
398
  use_llm = get_llm(
285
399
  model_name=model,
286
400
  provider=model_provider,
287
401
  openai_endpoint_url=custom_endpoint,
402
+ research_id=research_id,
403
+ research_context=research_context,
288
404
  )
289
405
 
290
406
  logger.info(
@@ -300,7 +416,7 @@ def run_research_process(
300
416
  )
301
417
 
302
418
  # Set the progress callback in the system
303
- system = AdvancedSearchSystem(llm=use_llm)
419
+ system = AdvancedSearchSystem(llm=use_llm, strategy_name=strategy)
304
420
  system.set_progress_callback(progress_callback)
305
421
 
306
422
  # Override search engine if specified
@@ -309,16 +425,22 @@ def run_research_process(
309
425
  if iterations:
310
426
  system.max_iterations = int(iterations)
311
427
  if questions_per_iteration:
312
- system.questions_per_iteration = int(questions_per_iteration)
428
+ system.questions_per_iteration = int(
429
+ questions_per_iteration
430
+ )
313
431
 
314
432
  # Create a new search object with these settings
315
433
  system.search = get_search(
316
434
  search_tool=search_engine, llm_instance=system.model
317
435
  )
318
436
 
319
- logger.info("Successfully set search engine to: %s", search_engine)
437
+ logger.info(
438
+ "Successfully set search engine to: %s", search_engine
439
+ )
320
440
  except Exception:
321
- logger.exception("Error setting search engine to %s", search_engine)
441
+ logger.exception(
442
+ "Error setting search engine to %s", search_engine
443
+ )
322
444
 
323
445
  # Run the search
324
446
  progress_callback("Starting research process", 5, {"phase": "init"})
@@ -385,21 +507,32 @@ def run_research_process(
385
507
  ):
386
508
  error_type = "token_limit"
387
509
  # Log specific error type
388
- logger.warning("Detected token limit error in synthesis")
510
+ logger.warning(
511
+ "Detected token limit error in synthesis"
512
+ )
389
513
 
390
514
  # Update progress with specific error type
391
515
  progress_callback(
392
516
  "Synthesis hit token limits. Attempting fallback...",
393
517
  87,
394
- {"phase": "synthesis_error", "error_type": error_type},
518
+ {
519
+ "phase": "synthesis_error",
520
+ "error_type": error_type,
521
+ },
395
522
  )
396
- elif "timeout" in error_message or "timed out" in error_message:
523
+ elif (
524
+ "timeout" in error_message
525
+ or "timed out" in error_message
526
+ ):
397
527
  error_type = "timeout"
398
528
  logger.warning("Detected timeout error in synthesis")
399
529
  progress_callback(
400
530
  "Synthesis timed out. Attempting fallback...",
401
531
  87,
402
- {"phase": "synthesis_error", "error_type": error_type},
532
+ {
533
+ "phase": "synthesis_error",
534
+ "error_type": error_type,
535
+ },
403
536
  )
404
537
  elif "rate limit" in error_message:
405
538
  error_type = "rate_limit"
@@ -407,26 +540,40 @@ def run_research_process(
407
540
  progress_callback(
408
541
  "LLM rate limit reached. Attempting fallback...",
409
542
  87,
410
- {"phase": "synthesis_error", "error_type": error_type},
543
+ {
544
+ "phase": "synthesis_error",
545
+ "error_type": error_type,
546
+ },
411
547
  )
412
- elif "connection" in error_message or "network" in error_message:
548
+ elif (
549
+ "connection" in error_message
550
+ or "network" in error_message
551
+ ):
413
552
  error_type = "connection"
414
553
  logger.warning("Detected connection error in synthesis")
415
554
  progress_callback(
416
555
  "Connection issue with LLM. Attempting fallback...",
417
556
  87,
418
- {"phase": "synthesis_error", "error_type": error_type},
557
+ {
558
+ "phase": "synthesis_error",
559
+ "error_type": error_type,
560
+ },
419
561
  )
420
562
  elif (
421
563
  "llm error" in error_message
422
564
  or "final answer synthesis fail" in error_message
423
565
  ):
424
566
  error_type = "llm_error"
425
- logger.warning("Detected general LLM error in synthesis")
567
+ logger.warning(
568
+ "Detected general LLM error in synthesis"
569
+ )
426
570
  progress_callback(
427
571
  "LLM error during synthesis. Attempting fallback...",
428
572
  87,
429
- {"phase": "synthesis_error", "error_type": error_type},
573
+ {
574
+ "phase": "synthesis_error",
575
+ "error_type": error_type,
576
+ },
430
577
  )
431
578
  else:
432
579
  # Generic error
@@ -434,7 +581,10 @@ def run_research_process(
434
581
  progress_callback(
435
582
  "Error during synthesis. Attempting fallback...",
436
583
  87,
437
- {"phase": "synthesis_error", "error_type": "unknown"},
584
+ {
585
+ "phase": "synthesis_error",
586
+ "error_type": "unknown",
587
+ },
438
588
  )
439
589
 
440
590
  # Extract synthesized content from findings if available
@@ -445,11 +595,13 @@ def run_research_process(
445
595
  break
446
596
 
447
597
  # Use synthesized content as fallback
448
- if synthesized_content and not synthesized_content.startswith(
449
- "Error:"
598
+ if (
599
+ synthesized_content
600
+ and not synthesized_content.startswith("Error:")
450
601
  ):
451
-
452
- logger.info("Using existing synthesized content as fallback")
602
+ logger.info(
603
+ "Using existing synthesized content as fallback"
604
+ )
453
605
  raw_formatted_findings = synthesized_content
454
606
 
455
607
  # Or use current_knowledge as another fallback
@@ -465,17 +617,19 @@ def run_research_process(
465
617
  f"## {finding.get('phase', 'Finding')}\n\n{finding.get('content', '')}"
466
618
  for finding in results.get("findings", [])
467
619
  if finding.get("content")
468
- and not finding.get("content", "").startswith("Error:")
620
+ and not finding.get("content", "").startswith(
621
+ "Error:"
622
+ )
469
623
  ]
470
624
 
471
625
  if valid_findings:
472
626
  raw_formatted_findings = (
473
627
  "# Research Results (Fallback Mode)\n\n"
474
628
  )
475
- raw_formatted_findings += "\n\n".join(valid_findings)
476
- raw_formatted_findings += (
477
- f"\n\n## Error Information\n{raw_formatted_findings}"
629
+ raw_formatted_findings += "\n\n".join(
630
+ valid_findings
478
631
  )
632
+ raw_formatted_findings += f"\n\n## Error Information\n{raw_formatted_findings}"
479
633
  else:
480
634
  # Last resort: use everything including errors
481
635
  raw_formatted_findings = (
@@ -491,7 +645,10 @@ def run_research_process(
491
645
  progress_callback(
492
646
  f"Using fallback synthesis due to {error_type} error",
493
647
  88,
494
- {"phase": "synthesis_fallback", "error_type": error_type},
648
+ {
649
+ "phase": "synthesis_fallback",
650
+ "error_type": error_type,
651
+ },
495
652
  )
496
653
 
497
654
  logger.info(
@@ -531,8 +688,9 @@ def run_research_process(
531
688
  )
532
689
 
533
690
  # Save as markdown file
534
- OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
535
691
  report_path = _generate_report_path(query)
692
+ output_dir = report_path.parent
693
+ output_dir.mkdir(parents=True, exist_ok=True)
536
694
 
537
695
  # Send progress update for writing to file
538
696
  progress_callback(
@@ -547,8 +705,12 @@ def run_research_process(
547
705
  f.write(f"Query: {query}\n\n")
548
706
  f.write(clean_markdown)
549
707
  f.write("\n\n## Research Metrics\n")
550
- f.write(f"- Search Iterations: {results['iterations']}\n")
551
- f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
708
+ f.write(
709
+ f"- Search Iterations: {results['iterations']}\n"
710
+ )
711
+ f.write(
712
+ f"- Generated at: {datetime.utcnow().isoformat()}\n"
713
+ )
552
714
 
553
715
  # Update database
554
716
  metadata = {
@@ -560,33 +722,31 @@ def run_research_process(
560
722
  now = datetime.utcnow()
561
723
  completed_at = now.isoformat()
562
724
 
563
- logger.info("Updating database for research_id: %s", research_id)
564
- # Get the start time from the database
565
- conn = get_db_connection()
566
- cursor = conn.cursor()
567
- cursor.execute(
568
- "SELECT created_at FROM research_history WHERE id = ?",
569
- (research_id,),
570
- )
571
- result = cursor.fetchone()
572
-
573
- # Use the helper function for consistent duration calculation
574
- duration_seconds = calculate_duration(result[0])
575
-
576
- # Update the record
577
- cursor.execute(
578
- "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?",
579
- (
580
- "completed",
581
- completed_at,
582
- duration_seconds,
583
- str(report_path),
584
- json.dumps(metadata),
585
- research_id,
586
- ),
725
+ logger.info(
726
+ "Updating database for research_id: %s", research_id
587
727
  )
588
- conn.commit()
589
- conn.close()
728
+
729
+ db_session = get_db_session()
730
+ with db_session:
731
+ research = (
732
+ db_session.query(ResearchHistory)
733
+ .filter_by(id=research_id)
734
+ .first()
735
+ )
736
+
737
+ # Use the helper function for consistent duration calculation
738
+ duration_seconds = calculate_duration(
739
+ research.created_at, research.completed_at
740
+ )
741
+
742
+ research.status = "completed"
743
+ research.completed_at = completed_at
744
+ research.duration_seconds = duration_seconds
745
+ research.report_path = str(report_path)
746
+ research.research_meta = metadata
747
+
748
+ db_session.commit()
749
+
590
750
  logger.info(
591
751
  f"Database updated successfully for research_id: {research_id}"
592
752
  )
@@ -605,11 +765,15 @@ def run_research_process(
605
765
  cleanup_research_resources(
606
766
  research_id, active_research, termination_flags
607
767
  )
608
- logger.info("Resources cleaned up for research_id: %s", research_id)
768
+ logger.info(
769
+ "Resources cleaned up for research_id: %s", research_id
770
+ )
609
771
 
610
772
  except Exception as inner_e:
611
773
  logger.exception("Error during quick summary generation")
612
- raise Exception(f"Error generating quick summary: {str(inner_e)}")
774
+ raise Exception(
775
+ f"Error generating quick summary: {str(inner_e)}"
776
+ )
613
777
  else:
614
778
  raise Exception(
615
779
  "No research findings were generated. Please try again."
@@ -617,14 +781,18 @@ def run_research_process(
617
781
  else:
618
782
  # Full Report
619
783
  progress_callback(
620
- "Generating detailed report...", 85, {"phase": "report_generation"}
784
+ "Generating detailed report...",
785
+ 85,
786
+ {"phase": "report_generation"},
621
787
  )
622
788
 
623
789
  # Extract the search system from the results if available
624
790
  search_system = results.get("search_system", None)
625
791
 
626
792
  # Pass the existing search system to maintain citation indices
627
- report_generator = IntegratedReportGenerator(search_system=search_system)
793
+ report_generator = IntegratedReportGenerator(
794
+ search_system=search_system
795
+ )
628
796
  final_report = report_generator.generate_report(results, query)
629
797
 
630
798
  progress_callback(
@@ -632,8 +800,9 @@ def run_research_process(
632
800
  )
633
801
 
634
802
  # Save as markdown file
635
- OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
636
803
  report_path = _generate_report_path(query)
804
+ output_dir = report_path.parent
805
+ output_dir.mkdir(parents=True, exist_ok=True)
637
806
 
638
807
  with report_path.open("w", encoding="utf-8") as f:
639
808
  f.write(final_report["content"])
@@ -646,30 +815,26 @@ def run_research_process(
646
815
  now = datetime.utcnow()
647
816
  completed_at = now.isoformat()
648
817
 
649
- # Get the start time from the database
650
- conn = get_db_connection()
651
- cursor = conn.cursor()
652
- cursor.execute(
653
- "SELECT created_at FROM research_history WHERE id = ?", (research_id,)
654
- )
655
- result = cursor.fetchone()
656
-
657
- # Use the helper function for consistent duration calculation
658
- duration_seconds = calculate_duration(result[0])
659
-
660
- cursor.execute(
661
- "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?",
662
- (
663
- "completed",
664
- completed_at,
665
- duration_seconds,
666
- str(report_path),
667
- json.dumps(metadata),
668
- research_id,
669
- ),
670
- )
671
- conn.commit()
672
- conn.close()
818
+ db_session = get_db_session()
819
+ with db_session:
820
+ research = (
821
+ db_session.query(ResearchHistory)
822
+ .filter_by(id=research_id)
823
+ .first()
824
+ )
825
+
826
+ # Use the helper function for consistent duration calculation
827
+ duration_seconds = calculate_duration(
828
+ research.created_at, research.completed_at
829
+ )
830
+
831
+ research.status = "completed"
832
+ research.completed_at = completed_at
833
+ research.duration_seconds = duration_seconds
834
+ research.report_path = str(report_path)
835
+ research.research_meta = metadata
836
+
837
+ db_session.commit()
673
838
 
674
839
  progress_callback(
675
840
  "Research completed successfully",
@@ -678,7 +843,9 @@ def run_research_process(
678
843
  )
679
844
 
680
845
  # Clean up resources
681
- cleanup_research_resources(research_id, active_research, termination_flags)
846
+ cleanup_research_resources(
847
+ research_id, active_research, termination_flags
848
+ )
682
849
 
683
850
  except Exception as e:
684
851
  # Handle error
@@ -696,9 +863,7 @@ def run_research_process(
696
863
  "solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."
697
864
  }
698
865
  elif "Error type: model_not_found" in user_friendly_error:
699
- user_friendly_error = (
700
- "Required Ollama model not found. Please pull the model first."
701
- )
866
+ user_friendly_error = "Required Ollama model not found. Please pull the model first."
702
867
  error_context = {
703
868
  "solution": "Run 'ollama pull mistral' to download the required model."
704
869
  }
@@ -709,7 +874,9 @@ def run_research_process(
709
874
  }
710
875
  elif "Error type: api_error" in user_friendly_error:
711
876
  # Keep the original error message as it's already improved
712
- error_context = {"solution": "Check API configuration and credentials."}
877
+ error_context = {
878
+ "solution": "Check API configuration and credentials."
879
+ }
713
880
 
714
881
  # Update metadata with more context about the error
715
882
  metadata = {"phase": "error", "error": user_friendly_error}
@@ -720,13 +887,13 @@ def run_research_process(
720
887
  if research_id in active_research:
721
888
  progress_callback(user_friendly_error, None, metadata)
722
889
 
723
- conn = get_db_connection()
724
- cursor = conn.cursor()
725
-
726
890
  # If termination was requested, mark as suspended instead of failed
727
891
  status = (
728
892
  "suspended"
729
- if (research_id in termination_flags and termination_flags[research_id])
893
+ if (
894
+ research_id in termination_flags
895
+ and termination_flags[research_id]
896
+ )
730
897
  else "failed"
731
898
  )
732
899
  message = (
@@ -741,30 +908,36 @@ def run_research_process(
741
908
 
742
909
  # Get the start time from the database
743
910
  duration_seconds = None
744
- cursor.execute(
745
- "SELECT created_at FROM research_history WHERE id = ?", (research_id,)
746
- )
747
- result = cursor.fetchone()
748
-
749
- # Use the helper function for consistent duration calculation
750
- if result and result[0]:
751
- duration_seconds = calculate_duration(result[0])
752
-
753
- cursor.execute(
754
- "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?",
755
- (
756
- status,
757
- completed_at,
758
- duration_seconds,
759
- json.dumps(metadata),
760
- research_id,
761
- ),
762
- )
763
- conn.commit()
764
- conn.close()
911
+ db_session = get_db_session()
912
+ with db_session:
913
+ research = (
914
+ db_session.query(ResearchHistory)
915
+ .filter_by(id=research_id)
916
+ .first()
917
+ )
918
+ assert research is not None, "Research not in database"
919
+
920
+ duration_seconds = calculate_duration(research.created_at)
921
+
922
+ db_session = get_db_session()
923
+ with db_session:
924
+ research = (
925
+ db_session.query(ResearchHistory)
926
+ .filter_by(id=research_id)
927
+ .first()
928
+ )
929
+ assert research is not None, "Research not in database"
930
+
931
+ # Update the ResearchHistory object with the new status and completion time
932
+ research.status = status
933
+ research.completed_at = completed_at
934
+ research.duration_seconds = duration_seconds
935
+ research.metadata = metadata
936
+
937
+ db_session.commit()
765
938
 
766
939
  try:
767
- emit_to_subscribers(
940
+ SocketIOService().emit_to_subscribers(
768
941
  "research_progress",
769
942
  research_id,
770
943
  {"status": status, "error": message},
@@ -776,7 +949,9 @@ def run_research_process(
776
949
  logger.exception("Error in error handler")
777
950
 
778
951
  # Clean up resources
779
- cleanup_research_resources(research_id, active_research, termination_flags)
952
+ cleanup_research_resources(
953
+ research_id, active_research, termination_flags
954
+ )
780
955
 
781
956
 
782
957
  def cleanup_research_resources(research_id, active_research, termination_flags):
@@ -793,15 +968,17 @@ def cleanup_research_resources(research_id, active_research, termination_flags):
793
968
  # Get the current status from the database to determine the final status message
794
969
  current_status = "completed" # Default
795
970
  try:
796
- conn = get_db_connection()
797
- cursor = conn.cursor()
798
- cursor.execute(
799
- "SELECT status FROM research_history WHERE id = ?", (research_id,)
800
- )
801
- result = cursor.fetchone()
802
- if result and result[0]:
803
- current_status = result[0]
804
- conn.close()
971
+ db_session = get_db_session()
972
+ with db_session:
973
+ research = (
974
+ db_session.query(ResearchHistory)
975
+ .filter(ResearchHistory.id == research_id)
976
+ .first()
977
+ )
978
+ if research:
979
+ current_status = research.status
980
+ else:
981
+ logger.error("Research with ID %s not found", research_id)
805
982
  except Exception:
806
983
  logger.exception("Error retrieving research status during cleanup")
807
984
 
@@ -816,13 +993,16 @@ def cleanup_research_resources(research_id, active_research, termination_flags):
816
993
  # Send a final message to subscribers
817
994
  try:
818
995
  # Import here to avoid circular imports
819
- from ..routes.research_routes import get_globals
996
+ from ..routes.globals import get_globals
820
997
 
821
998
  globals_dict = get_globals()
822
999
  socket_subscriptions = globals_dict.get("socket_subscriptions", {})
823
1000
 
824
1001
  # Send a final message to any remaining subscribers with explicit status
825
- if research_id in socket_subscriptions and socket_subscriptions[research_id]:
1002
+ if (
1003
+ research_id in socket_subscriptions
1004
+ and socket_subscriptions[research_id]
1005
+ ):
826
1006
  # Use the proper status message based on database status
827
1007
  if current_status == "suspended" or current_status == "failed":
828
1008
  final_message = {
@@ -843,7 +1023,9 @@ def cleanup_research_resources(research_id, active_research, termination_flags):
843
1023
  research_id,
844
1024
  )
845
1025
 
846
- emit_to_subscribers("research_progress", research_id, final_message)
1026
+ SocketIOService().emit_to_subscribers(
1027
+ "research_progress", research_id, final_message
1028
+ )
847
1029
 
848
1030
  except Exception:
849
1031
  logger.error("Error sending final cleanup message")
@@ -858,31 +1040,23 @@ def handle_termination(research_id, active_research, termination_flags):
858
1040
  active_research: Dictionary of active research processes
859
1041
  termination_flags: Dictionary of termination flags
860
1042
  """
861
- # Explicitly set the status to suspended in the database
862
- conn = get_db_connection()
863
- cursor = conn.cursor()
864
-
865
- # Calculate duration up to termination point - using UTC consistently
866
1043
  now = datetime.utcnow()
867
1044
  completed_at = now.isoformat()
868
1045
 
869
- # Get the start time from the database
870
- cursor.execute(
871
- "SELECT created_at FROM research_history WHERE id = ?",
872
- (research_id,),
873
- )
874
- result = cursor.fetchone()
1046
+ # Fetch the start time from the database using the ORM
1047
+ session = get_db_session()
1048
+ research = session.query(ResearchHistory).filter_by(id=research_id).first()
875
1049
 
876
- # Calculate the duration
877
- duration_seconds = calculate_duration(result[0]) if result and result[0] else None
1050
+ if research:
1051
+ duration_seconds = calculate_duration(research.created_at)
878
1052
 
879
- # Update the database with suspended status
880
- cursor.execute(
881
- "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?",
882
- ("suspended", completed_at, duration_seconds, research_id),
883
- )
884
- conn.commit()
885
- conn.close()
1053
+ # Update the database with suspended status using the ORM
1054
+ research.status = "suspended"
1055
+ research.completed_at = completed_at
1056
+ research.duration_seconds = duration_seconds
1057
+ session.commit()
1058
+ else:
1059
+ logger.error(f"Research with ID {research_id} not found.")
886
1060
 
887
1061
  # Clean up resources
888
1062
  cleanup_research_resources(research_id, active_research, termination_flags)
@@ -890,7 +1064,7 @@ def handle_termination(research_id, active_research, termination_flags):
890
1064
 
891
1065
  def cancel_research(research_id):
892
1066
  """
893
- Cancel/terminate a research process
1067
+ Cancel/terminate a research process using ORM.
894
1068
 
895
1069
  Args:
896
1070
  research_id: The ID of the research to cancel
@@ -899,7 +1073,7 @@ def cancel_research(research_id):
899
1073
  bool: True if the research was found and cancelled, False otherwise
900
1074
  """
901
1075
  # Import globals from research routes
902
- from ..routes.research_routes import get_globals
1076
+ from ..routes.globals import get_globals
903
1077
 
904
1078
  globals_dict = get_globals()
905
1079
  active_research = globals_dict["active_research"]
@@ -915,27 +1089,14 @@ def cancel_research(research_id):
915
1089
  return True
916
1090
  else:
917
1091
  # Update database directly if not found in active_research
918
- from ..models.database import get_db_connection
919
-
920
- conn = get_db_connection()
921
- cursor = conn.cursor()
922
-
923
- # First check if the research exists
924
- cursor.execute(
925
- "SELECT status FROM research_history WHERE id = ?", (research_id,)
1092
+ session = get_db_session()
1093
+ research = (
1094
+ session.query(ResearchHistory).filter_by(id=research_id).first()
926
1095
  )
927
- result = cursor.fetchone()
928
-
929
- if not result:
930
- conn.close()
1096
+ if not research:
931
1097
  return False
932
1098
 
933
1099
  # If it exists but isn't in active_research, still update status
934
- cursor.execute(
935
- "UPDATE research_history SET status = ? WHERE id = ?",
936
- ("suspended", research_id),
937
- )
938
- conn.commit()
939
- conn.close()
940
-
1100
+ research.status = "suspended"
1101
+ session.commit()
941
1102
  return True