local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +96 -84
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +72 -44
  41. local_deep_research/search_system.py +147 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1592 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.0.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,947 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import threading
5
+ import traceback
6
+ from datetime import datetime
7
+
8
+ from ...config.llm_config import get_llm
9
+ from ...config.search_config import get_search
10
+ from ...report_generator import IntegratedReportGenerator
11
+ from ...search_system import AdvancedSearchSystem
12
+ from ...utilities.search_utilities import extract_links_from_search_results
13
+ from ..models.database import add_log_to_db, calculate_duration, get_db_connection
14
+ from .socket_service import emit_to_subscribers
15
+
16
+ # Initialize logger
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Output directory for research results
20
+ OUTPUT_DIR = "research_outputs"
21
+
22
+
23
+ def start_research_process(
24
+ research_id,
25
+ query,
26
+ mode,
27
+ active_research,
28
+ termination_flags,
29
+ run_research_callback,
30
+ **kwargs,
31
+ ):
32
+ """
33
+ Start a research process in a background thread.
34
+
35
+ Args:
36
+ research_id: The ID of the research
37
+ query: The research query
38
+ mode: The research mode (quick/detailed)
39
+ active_research: Dictionary of active research processes
40
+ termination_flags: Dictionary of termination flags
41
+ run_research_callback: The callback function to run the research
42
+ **kwargs: Additional parameters to pass to the research process (model, search_engine, etc.)
43
+
44
+ Returns:
45
+ threading.Thread: The thread running the research
46
+ """
47
+ # Start research process in a background thread
48
+ thread = threading.Thread(
49
+ target=run_research_callback,
50
+ args=(research_id, query, mode, active_research, termination_flags),
51
+ kwargs=kwargs,
52
+ )
53
+ thread.daemon = True
54
+ thread.start()
55
+
56
+ active_research[research_id] = {
57
+ "thread": thread,
58
+ "progress": 0,
59
+ "status": "in_progress",
60
+ "log": [
61
+ {
62
+ "time": datetime.utcnow().isoformat(),
63
+ "message": "Research started",
64
+ "progress": 0,
65
+ }
66
+ ],
67
+ "settings": kwargs, # Store settings for reference
68
+ }
69
+
70
+ return thread
71
+
72
+
73
+ def run_research_process(
74
+ research_id, query, mode, active_research, termination_flags, **kwargs
75
+ ):
76
+ """
77
+ Run the research process in the background for a given research ID.
78
+
79
+ Args:
80
+ research_id: The ID of the research
81
+ query: The research query
82
+ mode: The research mode (quick/detailed)
83
+ active_research: Dictionary of active research processes
84
+ termination_flags: Dictionary of termination flags
85
+ **kwargs: Additional parameters for the research (model_provider, model, search_engine, etc.)
86
+ """
87
+ try:
88
+ # Check if this research has been terminated before we even start
89
+ if research_id in termination_flags and termination_flags[research_id]:
90
+ logger.info(f"Research {research_id} was terminated before starting")
91
+ cleanup_research_resources(research_id, active_research, termination_flags)
92
+ return
93
+
94
+ logger.info(
95
+ "Starting research process for ID %s, query: %s", research_id, query
96
+ )
97
+
98
+ # Extract key parameters
99
+ model_provider = kwargs.get("model_provider")
100
+ model = kwargs.get("model")
101
+ custom_endpoint = kwargs.get("custom_endpoint")
102
+ search_engine = kwargs.get("search_engine")
103
+ max_results = kwargs.get("max_results")
104
+ time_period = kwargs.get("time_period")
105
+ iterations = kwargs.get("iterations")
106
+ questions_per_iteration = kwargs.get("questions_per_iteration")
107
+
108
+ # Log all parameters for debugging
109
+ logger.info(
110
+ "Research parameters: provider=%s, model=%s, search_engine=%s, "
111
+ "max_results=%s, time_period=%s, iterations=%s, "
112
+ "questions_per_iteration=%s, custom_endpoint=%s",
113
+ model_provider,
114
+ model,
115
+ search_engine,
116
+ max_results,
117
+ time_period,
118
+ iterations,
119
+ questions_per_iteration,
120
+ custom_endpoint,
121
+ )
122
+
123
+ # Set up the AI Context Manager
124
+ output_dir = os.path.join(OUTPUT_DIR, f"research_{research_id}")
125
+ os.makedirs(output_dir, exist_ok=True)
126
+
127
+ # Set up progress callback
128
+ def progress_callback(message, progress_percent, metadata):
129
+ # Frequent termination check
130
+ if research_id in termination_flags and termination_flags[research_id]:
131
+ handle_termination(research_id, active_research, termination_flags)
132
+ raise Exception("Research was terminated by user")
133
+ if "SEARCH_PLAN:" in message:
134
+ engines = message.split("SEARCH_PLAN:")[1].strip()
135
+ metadata["planned_engines"] = engines
136
+ metadata["phase"] = "search_planning" # Use existing phase
137
+
138
+ if "ENGINE_SELECTED:" in message:
139
+ engine = message.split("ENGINE_SELECTED:")[1].strip()
140
+ metadata["selected_engine"] = engine
141
+ metadata["phase"] = "search" # Use existing 'search' phase
142
+
143
+ timestamp = datetime.utcnow().isoformat()
144
+
145
+ # Adjust progress based on research mode
146
+ adjusted_progress = progress_percent
147
+ if mode == "detailed" and metadata.get("phase") == "output_generation":
148
+ # For detailed mode, adjust the progress range for output generation
149
+ adjusted_progress = min(80, progress_percent)
150
+ elif mode == "detailed" and metadata.get("phase") == "report_generation":
151
+ # Scale the progress from 80% to 95% for the report generation phase
152
+ if progress_percent is not None:
153
+ normalized = progress_percent / 100
154
+ adjusted_progress = 80 + (normalized * 15)
155
+ elif mode == "quick" and metadata.get("phase") == "output_generation":
156
+ # For quick mode, ensure we're at least at 85% during output generation
157
+ adjusted_progress = max(85, progress_percent)
158
+ # Map any further progress within output_generation to 85-95% range
159
+ if progress_percent is not None and progress_percent > 0:
160
+ normalized = progress_percent / 100
161
+ adjusted_progress = 85 + (normalized * 10)
162
+
163
+ # Don't let progress go backwards
164
+ if research_id in active_research and adjusted_progress is not None:
165
+ current_progress = active_research[research_id].get("progress", 0)
166
+ adjusted_progress = max(current_progress, adjusted_progress)
167
+
168
+ log_entry = {
169
+ "time": timestamp,
170
+ "message": message,
171
+ "progress": adjusted_progress,
172
+ "metadata": metadata,
173
+ }
174
+
175
+ # Update active research record
176
+ if research_id in active_research:
177
+ active_research[research_id]["log"].append(log_entry)
178
+ if adjusted_progress is not None:
179
+ active_research[research_id]["progress"] = adjusted_progress
180
+
181
+ # Determine log type for database storage
182
+ log_type = "info"
183
+ if metadata and metadata.get("phase"):
184
+ phase = metadata.get("phase")
185
+ if phase in ["complete", "iteration_complete"]:
186
+ log_type = "milestone"
187
+ elif phase == "error" or "error" in message.lower():
188
+ log_type = "error"
189
+
190
+ # Save logs to the database
191
+ add_log_to_db(
192
+ research_id,
193
+ message,
194
+ log_type=log_type,
195
+ progress=adjusted_progress,
196
+ metadata=metadata,
197
+ )
198
+
199
+ # Update progress in the research_history table (for backward compatibility)
200
+ conn = get_db_connection()
201
+ cursor = conn.cursor()
202
+
203
+ # Update the progress and log separately to avoid race conditions
204
+ if adjusted_progress is not None:
205
+ cursor.execute(
206
+ "UPDATE research_history SET progress = ? WHERE id = ?",
207
+ (adjusted_progress, research_id),
208
+ )
209
+
210
+ # Add the log entry to the progress_log
211
+ cursor.execute(
212
+ "SELECT progress_log FROM research_history WHERE id = ?",
213
+ (research_id,),
214
+ )
215
+ log_result = cursor.fetchone()
216
+
217
+ if log_result:
218
+ try:
219
+ current_log = json.loads(log_result[0])
220
+ except Exception:
221
+ current_log = []
222
+
223
+ current_log.append(log_entry)
224
+ cursor.execute(
225
+ "UPDATE research_history SET progress_log = ? WHERE id = ?",
226
+ (json.dumps(current_log), research_id),
227
+ )
228
+
229
+ conn.commit()
230
+ conn.close()
231
+
232
+ # Emit a socket event
233
+ try:
234
+ # Basic event data
235
+ event_data = {"message": message, "progress": adjusted_progress}
236
+
237
+ # Add log entry in full format for detailed logging on client
238
+ if metadata:
239
+ event_data["log_entry"] = log_entry
240
+
241
+ emit_to_subscribers("research_progress", research_id, event_data)
242
+ except Exception as e:
243
+ logger.error(f"Socket emit error (non-critical): {str(e)}")
244
+
245
+ # Function to check termination during long-running operations
246
+ def check_termination():
247
+ if research_id in termination_flags and termination_flags[research_id]:
248
+ handle_termination(research_id, active_research, termination_flags)
249
+ raise Exception(
250
+ "Research was terminated by user during long-running operation"
251
+ )
252
+ return False # Not terminated
253
+
254
+ # Configure the system with the specified parameters
255
+ use_llm = None
256
+ if model or search_engine or model_provider:
257
+ # Log that we're overriding system settings
258
+ logger.info(
259
+ f"Overriding system settings with: provider={model_provider}, model={model}, search_engine={search_engine}"
260
+ )
261
+
262
+ # Override LLM if model or model_provider specified
263
+ if model or model_provider:
264
+ try:
265
+ # Get LLM with the overridden settings
266
+ # Explicitly create the model with parameters to avoid fallback issues
267
+ use_llm = get_llm(
268
+ model_name=model,
269
+ provider=model_provider,
270
+ openai_endpoint_url=custom_endpoint,
271
+ )
272
+
273
+ logger.info(
274
+ "Successfully set LLM to: provider=%s, model=%s",
275
+ model_provider,
276
+ model,
277
+ )
278
+ except Exception as e:
279
+ logger.error(
280
+ "Error setting LLM provider=%s, model=%s: %s",
281
+ model_provider,
282
+ model,
283
+ str(e),
284
+ )
285
+ logger.error(traceback.format_exc())
286
+
287
+ # Set the progress callback in the system
288
+ system = AdvancedSearchSystem(llm=use_llm)
289
+ system.set_progress_callback(progress_callback)
290
+
291
+ # Override search engine if specified
292
+ if search_engine:
293
+ try:
294
+ if iterations:
295
+ system.max_iterations = int(iterations)
296
+ if questions_per_iteration:
297
+ system.questions_per_iteration = int(questions_per_iteration)
298
+
299
+ # Create a new search object with these settings
300
+ system.search = get_search(
301
+ search_tool=search_engine, llm_instance=system.model
302
+ )
303
+
304
+ logger.info("Successfully set search engine to: %s", search_engine)
305
+ except Exception as e:
306
+ logger.error(
307
+ "Error setting search engine to %s: %s", search_engine, str(e)
308
+ )
309
+
310
+ # Run the search
311
+ progress_callback("Starting research process", 5, {"phase": "init"})
312
+
313
+ try:
314
+ results = system.analyze_topic(query)
315
+ if mode == "quick":
316
+ progress_callback(
317
+ "Search complete, preparing to generate summary...",
318
+ 85,
319
+ {"phase": "output_generation"},
320
+ )
321
+ else:
322
+ progress_callback(
323
+ "Search complete, generating output",
324
+ 80,
325
+ {"phase": "output_generation"},
326
+ )
327
+ except Exception as search_error:
328
+ # Better handling of specific search errors
329
+ error_message = str(search_error)
330
+ error_type = "unknown"
331
+
332
+ # Extract error details for common issues
333
+ if "status code: 503" in error_message:
334
+ error_message = "Ollama AI service is unavailable (HTTP 503). Please check that Ollama is running properly on your system."
335
+ error_type = "ollama_unavailable"
336
+ elif "status code: 404" in error_message:
337
+ error_message = "Ollama model not found (HTTP 404). Please check that you have pulled the required model."
338
+ error_type = "model_not_found"
339
+ elif "status code:" in error_message:
340
+ # Extract the status code for other HTTP errors
341
+ status_code = error_message.split("status code:")[1].strip()
342
+ error_message = f"API request failed with status code {status_code}. Please check your configuration."
343
+ error_type = "api_error"
344
+ elif "connection" in error_message.lower():
345
+ error_message = "Connection error. Please check that your LLM service (Ollama/API) is running and accessible."
346
+ error_type = "connection_error"
347
+
348
+ # Raise with improved error message
349
+ raise Exception(f"{error_message} (Error type: {error_type})")
350
+
351
+ # Generate output based on mode
352
+ if mode == "quick":
353
+ # Quick Summary
354
+ if results.get("findings") or results.get("formatted_findings"):
355
+ raw_formatted_findings = results["formatted_findings"]
356
+
357
+ # Check if formatted_findings contains an error message
358
+ if isinstance(
359
+ raw_formatted_findings, str
360
+ ) and raw_formatted_findings.startswith("Error:"):
361
+ import traceback
362
+
363
+ logger.warning(
364
+ f"Detected error in formatted findings: {raw_formatted_findings[:100]}... stack trace: {traceback.format_exc()}"
365
+ )
366
+
367
+ # Determine error type for better user feedback
368
+ error_type = "unknown"
369
+ error_message = raw_formatted_findings.lower()
370
+
371
+ if (
372
+ "token limit" in error_message
373
+ or "context length" in error_message
374
+ ):
375
+ error_type = "token_limit"
376
+ # Log specific error type
377
+ logger.warning("Detected token limit error in synthesis")
378
+
379
+ # Update progress with specific error type
380
+ progress_callback(
381
+ "Synthesis hit token limits. Attempting fallback...",
382
+ 87,
383
+ {"phase": "synthesis_error", "error_type": error_type},
384
+ )
385
+ elif "timeout" in error_message or "timed out" in error_message:
386
+ error_type = "timeout"
387
+ logger.warning("Detected timeout error in synthesis")
388
+ progress_callback(
389
+ "Synthesis timed out. Attempting fallback...",
390
+ 87,
391
+ {"phase": "synthesis_error", "error_type": error_type},
392
+ )
393
+ elif "rate limit" in error_message:
394
+ error_type = "rate_limit"
395
+ logger.warning("Detected rate limit error in synthesis")
396
+ progress_callback(
397
+ "LLM rate limit reached. Attempting fallback...",
398
+ 87,
399
+ {"phase": "synthesis_error", "error_type": error_type},
400
+ )
401
+ elif "connection" in error_message or "network" in error_message:
402
+ error_type = "connection"
403
+ logger.warning("Detected connection error in synthesis")
404
+ progress_callback(
405
+ "Connection issue with LLM. Attempting fallback...",
406
+ 87,
407
+ {"phase": "synthesis_error", "error_type": error_type},
408
+ )
409
+ elif (
410
+ "llm error" in error_message
411
+ or "final answer synthesis fail" in error_message
412
+ ):
413
+ error_type = "llm_error"
414
+ logger.warning("Detected general LLM error in synthesis")
415
+ progress_callback(
416
+ "LLM error during synthesis. Attempting fallback...",
417
+ 87,
418
+ {"phase": "synthesis_error", "error_type": error_type},
419
+ )
420
+ else:
421
+ # Generic error
422
+ logger.warning("Detected unknown error in synthesis")
423
+ progress_callback(
424
+ "Error during synthesis. Attempting fallback...",
425
+ 87,
426
+ {"phase": "synthesis_error", "error_type": "unknown"},
427
+ )
428
+
429
+ # Extract synthesized content from findings if available
430
+ synthesized_content = ""
431
+ for finding in results.get("findings", []):
432
+ if finding.get("phase") == "Final synthesis":
433
+ synthesized_content = finding.get("content", "")
434
+ break
435
+
436
+ # Use synthesized content as fallback
437
+ if synthesized_content and not synthesized_content.startswith(
438
+ "Error:"
439
+ ):
440
+
441
+ logger.info("Using existing synthesized content as fallback")
442
+ raw_formatted_findings = synthesized_content
443
+
444
+ # Or use current_knowledge as another fallback
445
+ elif results.get("current_knowledge"):
446
+ logger.info("Using current_knowledge as fallback")
447
+ raw_formatted_findings = results["current_knowledge"]
448
+
449
+ # Or combine all finding contents as last resort
450
+ elif results.get("findings"):
451
+ logger.info("Combining all findings as fallback")
452
+ # First try to use any findings that are not errors
453
+ valid_findings = [
454
+ f"## {finding.get('phase', 'Finding')}\n\n{finding.get('content', '')}"
455
+ for finding in results.get("findings", [])
456
+ if finding.get("content")
457
+ and not finding.get("content", "").startswith("Error:")
458
+ ]
459
+
460
+ if valid_findings:
461
+ raw_formatted_findings = (
462
+ "# Research Results (Fallback Mode)\n\n"
463
+ )
464
+ raw_formatted_findings += "\n\n".join(valid_findings)
465
+ raw_formatted_findings += (
466
+ f"\n\n## Error Information\n{raw_formatted_findings}"
467
+ )
468
+ else:
469
+ # Last resort: use everything including errors
470
+ raw_formatted_findings = (
471
+ "# Research Results (Emergency Fallback)\n\n"
472
+ )
473
+ raw_formatted_findings += "The system encountered errors during final synthesis.\n\n"
474
+ raw_formatted_findings += "\n\n".join(
475
+ f"## {finding.get('phase', 'Finding')}\n\n{finding.get('content', '')}"
476
+ for finding in results.get("findings", [])
477
+ if finding.get("content")
478
+ )
479
+
480
+ progress_callback(
481
+ f"Using fallback synthesis due to {error_type} error",
482
+ 88,
483
+ {"phase": "synthesis_fallback", "error_type": error_type},
484
+ )
485
+
486
+ logger.info(
487
+ "Found formatted_findings of length: %s",
488
+ len(str(raw_formatted_findings)),
489
+ )
490
+
491
+ try:
492
+ # Get the synthesized content from the LLM directly
493
+ clean_markdown = raw_formatted_findings
494
+
495
+ # Extract all sources from findings to add them to the summary
496
+ all_links = []
497
+ for finding in results.get("findings", []):
498
+ search_results = finding.get("search_results", [])
499
+ if search_results:
500
+ try:
501
+ links = extract_links_from_search_results(
502
+ search_results
503
+ )
504
+ all_links.extend(links)
505
+ except Exception as link_err:
506
+ logger.error(
507
+ f"Error processing search results/links: {link_err}"
508
+ )
509
+
510
+ logger.info(
511
+ "Successfully converted to clean markdown of length: %s",
512
+ len(clean_markdown),
513
+ )
514
+
515
+ # First send a progress update for generating the summary
516
+ progress_callback(
517
+ "Generating clean summary from research data...",
518
+ 90,
519
+ {"phase": "output_generation"},
520
+ )
521
+
522
+ # Save as markdown file
523
+ if not os.path.exists(OUTPUT_DIR):
524
+ os.makedirs(OUTPUT_DIR)
525
+
526
+ safe_query = "".join(
527
+ x for x in query if x.isalnum() or x in [" ", "-", "_"]
528
+ )[:50]
529
+ safe_query = safe_query.replace(" ", "_").lower()
530
+ report_path = os.path.join(
531
+ OUTPUT_DIR, f"quick_summary_{safe_query}.md"
532
+ )
533
+
534
+ # Send progress update for writing to file
535
+ progress_callback(
536
+ "Writing research report to file...",
537
+ 95,
538
+ {"phase": "report_complete"},
539
+ )
540
+
541
+ logger.info("Writing report to: %s", report_path)
542
+ with open(report_path, "w", encoding="utf-8") as f:
543
+ f.write("# Quick Research Summary\n\n")
544
+ f.write(f"Query: {query}\n\n")
545
+ f.write(clean_markdown)
546
+ f.write("\n\n## Research Metrics\n")
547
+ f.write(f"- Search Iterations: {results['iterations']}\n")
548
+ f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
549
+
550
+ # Update database
551
+ metadata = {
552
+ "iterations": results["iterations"],
553
+ "generated_at": datetime.utcnow().isoformat(),
554
+ }
555
+
556
+ # Calculate duration in seconds - using UTC consistently
557
+ now = datetime.utcnow()
558
+ completed_at = now.isoformat()
559
+
560
+ logger.info("Updating database for research_id: %s", research_id)
561
+ # Get the start time from the database
562
+ conn = get_db_connection()
563
+ cursor = conn.cursor()
564
+ cursor.execute(
565
+ "SELECT created_at FROM research_history WHERE id = ?",
566
+ (research_id,),
567
+ )
568
+ result = cursor.fetchone()
569
+
570
+ # Use the helper function for consistent duration calculation
571
+ duration_seconds = calculate_duration(result[0])
572
+
573
+ # Update the record
574
+ cursor.execute(
575
+ "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?",
576
+ (
577
+ "completed",
578
+ completed_at,
579
+ duration_seconds,
580
+ report_path,
581
+ json.dumps(metadata),
582
+ research_id,
583
+ ),
584
+ )
585
+ conn.commit()
586
+ conn.close()
587
+ logger.info(
588
+ f"Database updated successfully for research_id: {research_id}"
589
+ )
590
+
591
+ # Send the final completion message
592
+ progress_callback(
593
+ "Research completed successfully",
594
+ 100,
595
+ {"phase": "complete", "report_path": report_path},
596
+ )
597
+
598
+ # Clean up resources
599
+ logger.info(
600
+ "Cleaning up resources for research_id: %s", research_id
601
+ )
602
+ cleanup_research_resources(
603
+ research_id, active_research, termination_flags
604
+ )
605
+ logger.info("Resources cleaned up for research_id: %s", research_id)
606
+
607
+ except Exception as inner_e:
608
+ logger.error(
609
+ "Error during quick summary generation: %s", str(inner_e)
610
+ )
611
+ logger.error(traceback.format_exc())
612
+ raise Exception(f"Error generating quick summary: {str(inner_e)}")
613
+ else:
614
+ raise Exception(
615
+ "No research findings were generated. Please try again."
616
+ )
617
+ else:
618
+ # Full Report
619
+ progress_callback(
620
+ "Generating detailed report...", 85, {"phase": "report_generation"}
621
+ )
622
+
623
+ # Extract the search system from the results if available
624
+ search_system = results.get("search_system", None)
625
+
626
+ # Pass the existing search system to maintain citation indices
627
+ report_generator = IntegratedReportGenerator(search_system=search_system)
628
+ final_report = report_generator.generate_report(results, query)
629
+
630
+ progress_callback(
631
+ "Report generation complete", 95, {"phase": "report_complete"}
632
+ )
633
+
634
+ # Save as markdown file
635
+ if not os.path.exists(OUTPUT_DIR):
636
+ os.makedirs(OUTPUT_DIR)
637
+
638
+ safe_query = "".join(
639
+ x for x in query if x.isalnum() or x in [" ", "-", "_"]
640
+ )[:50]
641
+ safe_query = safe_query.replace(" ", "_").lower()
642
+ report_path = os.path.join(OUTPUT_DIR, f"detailed_report_{safe_query}.md")
643
+
644
+ with open(report_path, "w", encoding="utf-8") as f:
645
+ f.write(final_report["content"])
646
+
647
+ # Update database
648
+ metadata = final_report["metadata"]
649
+ metadata["iterations"] = results["iterations"]
650
+
651
+ # Calculate duration in seconds - using UTC consistently
652
+ now = datetime.utcnow()
653
+ completed_at = now.isoformat()
654
+
655
+ # Get the start time from the database
656
+ conn = get_db_connection()
657
+ cursor = conn.cursor()
658
+ cursor.execute(
659
+ "SELECT created_at FROM research_history WHERE id = ?", (research_id,)
660
+ )
661
+ result = cursor.fetchone()
662
+
663
+ # Use the helper function for consistent duration calculation
664
+ duration_seconds = calculate_duration(result[0])
665
+
666
+ cursor.execute(
667
+ "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?",
668
+ (
669
+ "completed",
670
+ completed_at,
671
+ duration_seconds,
672
+ report_path,
673
+ json.dumps(metadata),
674
+ research_id,
675
+ ),
676
+ )
677
+ conn.commit()
678
+ conn.close()
679
+
680
+ progress_callback(
681
+ "Research completed successfully",
682
+ 100,
683
+ {"phase": "complete", "report_path": report_path},
684
+ )
685
+
686
+ # Clean up resources
687
+ cleanup_research_resources(research_id, active_research, termination_flags)
688
+
689
+ except Exception as e:
690
+ # Handle error
691
+ error_message = f"Research failed: {str(e)}"
692
+ logger.error(error_message)
693
+ try:
694
+ # Check for common Ollama error patterns in the exception and provide more user-friendly errors
695
+ user_friendly_error = str(e)
696
+ error_context = {}
697
+
698
+ if "Error type: ollama_unavailable" in user_friendly_error:
699
+ user_friendly_error = "Ollama AI service is unavailable. Please check that Ollama is running properly on your system."
700
+ error_context = {
701
+ "solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."
702
+ }
703
+ elif "Error type: model_not_found" in user_friendly_error:
704
+ user_friendly_error = (
705
+ "Required Ollama model not found. Please pull the model first."
706
+ )
707
+ error_context = {
708
+ "solution": "Run 'ollama pull mistral' to download the required model."
709
+ }
710
+ elif "Error type: connection_error" in user_friendly_error:
711
+ user_friendly_error = "Connection error with LLM service. Please check that your AI service is running."
712
+ error_context = {
713
+ "solution": "Ensure Ollama or your API service is running and accessible."
714
+ }
715
+ elif "Error type: api_error" in user_friendly_error:
716
+ # Keep the original error message as it's already improved
717
+ error_context = {"solution": "Check API configuration and credentials."}
718
+
719
+ # Update metadata with more context about the error
720
+ metadata = {"phase": "error", "error": user_friendly_error}
721
+ if error_context:
722
+ metadata.update(error_context)
723
+
724
+ # If we still have an active research record, update its log
725
+ if research_id in active_research:
726
+ progress_callback(user_friendly_error, None, metadata)
727
+
728
+ conn = get_db_connection()
729
+ cursor = conn.cursor()
730
+
731
+ # If termination was requested, mark as suspended instead of failed
732
+ status = (
733
+ "suspended"
734
+ if (research_id in termination_flags and termination_flags[research_id])
735
+ else "failed"
736
+ )
737
+ message = (
738
+ "Research was terminated by user"
739
+ if status == "suspended"
740
+ else user_friendly_error
741
+ )
742
+
743
+ # Calculate duration up to termination point - using UTC consistently
744
+ now = datetime.utcnow()
745
+ completed_at = now.isoformat()
746
+
747
+ # Get the start time from the database
748
+ duration_seconds = None
749
+ cursor.execute(
750
+ "SELECT created_at FROM research_history WHERE id = ?", (research_id,)
751
+ )
752
+ result = cursor.fetchone()
753
+
754
+ # Use the helper function for consistent duration calculation
755
+ if result and result[0]:
756
+ duration_seconds = calculate_duration(result[0])
757
+
758
+ cursor.execute(
759
+ "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?",
760
+ (
761
+ status,
762
+ completed_at,
763
+ duration_seconds,
764
+ json.dumps(metadata),
765
+ research_id,
766
+ ),
767
+ )
768
+ conn.commit()
769
+ conn.close()
770
+
771
+ try:
772
+ emit_to_subscribers(
773
+ "research_progress",
774
+ research_id,
775
+ {"status": status, "error": message},
776
+ )
777
+ except Exception as socket_error:
778
+ logger.error(f"Failed to emit error via socket: {str(socket_error)}")
779
+
780
+ except Exception as inner_e:
781
+ logger.error(f"Error in error handler: {str(inner_e)}")
782
+ logger.error(traceback.format_exc())
783
+
784
+ # Clean up resources
785
+ cleanup_research_resources(research_id, active_research, termination_flags)
786
+
787
+
788
+ def cleanup_research_resources(research_id, active_research, termination_flags):
789
+ """
790
+ Clean up resources for a completed research.
791
+
792
+ Args:
793
+ research_id: The ID of the research
794
+ active_research: Dictionary of active research processes
795
+ termination_flags: Dictionary of termination flags
796
+ """
797
+ logger.info("Cleaning up resources for research %s", research_id)
798
+
799
+ # Get the current status from the database to determine the final status message
800
+ current_status = "completed" # Default
801
+ try:
802
+ conn = get_db_connection()
803
+ cursor = conn.cursor()
804
+ cursor.execute(
805
+ "SELECT status FROM research_history WHERE id = ?", (research_id,)
806
+ )
807
+ result = cursor.fetchone()
808
+ if result and result[0]:
809
+ current_status = result[0]
810
+ conn.close()
811
+ except Exception as e:
812
+ logger.error("Error retrieving research status during cleanup: %s", e)
813
+
814
+ # Remove from active research
815
+ if research_id in active_research:
816
+ del active_research[research_id]
817
+
818
+ # Remove from termination flags
819
+ if research_id in termination_flags:
820
+ del termination_flags[research_id]
821
+
822
+ # Send a final message to subscribers
823
+ try:
824
+ # Import here to avoid circular imports
825
+ from ..routes.research_routes import get_globals
826
+
827
+ globals_dict = get_globals()
828
+ socket_subscriptions = globals_dict.get("socket_subscriptions", {})
829
+
830
+ # Send a final message to any remaining subscribers with explicit status
831
+ if research_id in socket_subscriptions and socket_subscriptions[research_id]:
832
+ # Use the proper status message based on database status
833
+ if current_status == "suspended" or current_status == "failed":
834
+ final_message = {
835
+ "status": current_status,
836
+ "message": f"Research was {current_status}",
837
+ "progress": 0, # For suspended research, show 0% not 100%
838
+ }
839
+ else:
840
+ final_message = {
841
+ "status": "completed",
842
+ "message": "Research process has ended and resources have been cleaned up",
843
+ "progress": 100,
844
+ }
845
+
846
+ logger.info(
847
+ "Sending final %s socket message for research %s",
848
+ current_status,
849
+ research_id,
850
+ )
851
+
852
+ emit_to_subscribers("research_progress", research_id, final_message)
853
+
854
+ except Exception as e:
855
+ logger.error("Error sending final cleanup message: %s", e)
856
+
857
+
858
+ def handle_termination(research_id, active_research, termination_flags):
859
+ """
860
+ Handle the termination of a research process.
861
+
862
+ Args:
863
+ research_id: The ID of the research
864
+ active_research: Dictionary of active research processes
865
+ termination_flags: Dictionary of termination flags
866
+ """
867
+ # Explicitly set the status to suspended in the database
868
+ conn = get_db_connection()
869
+ cursor = conn.cursor()
870
+
871
+ # Calculate duration up to termination point - using UTC consistently
872
+ now = datetime.utcnow()
873
+ completed_at = now.isoformat()
874
+
875
+ # Get the start time from the database
876
+ cursor.execute(
877
+ "SELECT created_at FROM research_history WHERE id = ?",
878
+ (research_id,),
879
+ )
880
+ result = cursor.fetchone()
881
+
882
+ # Calculate the duration
883
+ duration_seconds = calculate_duration(result[0]) if result and result[0] else None
884
+
885
+ # Update the database with suspended status
886
+ cursor.execute(
887
+ "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?",
888
+ ("suspended", completed_at, duration_seconds, research_id),
889
+ )
890
+ conn.commit()
891
+ conn.close()
892
+
893
+ # Clean up resources
894
+ cleanup_research_resources(research_id, active_research, termination_flags)
895
+
896
+
897
+ def cancel_research(research_id):
898
+ """
899
+ Cancel/terminate a research process
900
+
901
+ Args:
902
+ research_id: The ID of the research to cancel
903
+
904
+ Returns:
905
+ bool: True if the research was found and cancelled, False otherwise
906
+ """
907
+ # Import globals from research routes
908
+ from ..routes.research_routes import get_globals
909
+
910
+ globals_dict = get_globals()
911
+ active_research = globals_dict["active_research"]
912
+ termination_flags = globals_dict["termination_flags"]
913
+
914
+ # Set termination flag
915
+ termination_flags[research_id] = True
916
+
917
+ # Check if the research is active
918
+ if research_id in active_research:
919
+ # Call handle_termination to update database
920
+ handle_termination(research_id, active_research, termination_flags)
921
+ return True
922
+ else:
923
+ # Update database directly if not found in active_research
924
+ from ..models.database import get_db_connection
925
+
926
+ conn = get_db_connection()
927
+ cursor = conn.cursor()
928
+
929
+ # First check if the research exists
930
+ cursor.execute(
931
+ "SELECT status FROM research_history WHERE id = ?", (research_id,)
932
+ )
933
+ result = cursor.fetchone()
934
+
935
+ if not result:
936
+ conn.close()
937
+ return False
938
+
939
+ # If it exists but isn't in active_research, still update status
940
+ cursor.execute(
941
+ "UPDATE research_history SET status = ? WHERE id = ?",
942
+ ("suspended", research_id),
943
+ )
944
+ conn.commit()
945
+ conn.close()
946
+
947
+ return True