local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +96 -84
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +72 -44
  41. local_deep_research/search_system.py +147 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1592 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.0.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,452 @@
1
+ """
2
+ Findings repository for managing research findings.
3
+ """
4
+
5
+ import logging
6
+ from typing import Dict, List, Union
7
+
8
+ from langchain_core.documents import Document
9
+ from langchain_core.language_models import BaseLLM
10
+
11
+ from ...utilities.search_utilities import format_findings
12
+ from .base_findings import BaseFindingsRepository
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def format_links(links: List[Dict]) -> str:
18
+ """Format a list of links into a readable string.
19
+
20
+ Args:
21
+ links: List of dictionaries containing 'title' and 'url' keys
22
+
23
+ Returns:
24
+ str: Formatted string of links
25
+ """
26
+ return "\n".join(
27
+ f"{i + 1}. {link['title']}\n URL: {link['url']}"
28
+ for i, link in enumerate(links)
29
+ )
30
+
31
+
32
+ class FindingsRepository(BaseFindingsRepository):
33
+ """Repository for managing research findings."""
34
+
35
+ def __init__(self, model: BaseLLM):
36
+ """Initialize the repository.
37
+
38
+ Args:
39
+ model: The LLM model to use for synthesis
40
+ """
41
+ super().__init__(model)
42
+ self.findings: Dict[str, List[Dict]] = {}
43
+ self.documents: List[Document] = []
44
+ self.questions_by_iteration: Dict[int, List[str]] = {}
45
+
46
+ def add_finding(self, query: str, finding: Dict | str) -> None:
47
+ """Add a finding for a query."""
48
+ self.findings.setdefault(query, [])
49
+
50
+ # Convert to dictionary if it's a string
51
+ if isinstance(finding, str):
52
+ finding_dict = {
53
+ "phase": "Synthesis",
54
+ "content": finding,
55
+ "question": query,
56
+ "search_results": [],
57
+ "documents": [],
58
+ }
59
+ self.findings[query].append(finding_dict)
60
+ else:
61
+ # It's already a dictionary
62
+ self.findings[query].append(finding)
63
+
64
+ # Store raw synthesized content if it's the final synthesis
65
+ # Only check for phase if it's a dictionary
66
+ if isinstance(finding, dict) and finding.get("phase") == "Final synthesis":
67
+ self.findings[query + "_synthesis"] = [
68
+ {
69
+ "phase": "Synthesis",
70
+ "content": finding.get("content", ""),
71
+ "question": query,
72
+ "search_results": [],
73
+ "documents": [],
74
+ }
75
+ ]
76
+
77
+ logger.info(
78
+ f"Added finding for query: {query}. Total findings: {len(self.findings[query])}"
79
+ )
80
+
81
+ def get_findings(self, query: str) -> List[Dict]:
82
+ """Get findings for a query.
83
+
84
+ Args:
85
+ query: The query to get findings for
86
+
87
+ Returns:
88
+ List of findings for the query
89
+ """
90
+ return self.findings.get(query, [])
91
+
92
+ def clear_findings(self, query: str) -> None:
93
+ """Clear findings for a query.
94
+
95
+ Args:
96
+ query: The query to clear findings for
97
+ """
98
+ if query in self.findings:
99
+ del self.findings[query]
100
+ logger.info(f"Cleared findings for query: {query}")
101
+
102
+ def add_documents(self, documents: List[Document]) -> None:
103
+ """Add documents to the repository.
104
+
105
+ Args:
106
+ documents: List of documents to add
107
+ """
108
+ self.documents.extend(documents)
109
+ logger.info(f"Added {len(documents)} documents to repository")
110
+
111
+ def set_questions_by_iteration(
112
+ self, questions_by_iteration: Dict[int, List[str]]
113
+ ) -> None:
114
+ """Set the questions by iteration.
115
+
116
+ Args:
117
+ questions_by_iteration: Dictionary mapping iteration numbers to lists of questions
118
+ """
119
+ self.questions_by_iteration = questions_by_iteration.copy()
120
+ logger.info(f"Set questions for {len(questions_by_iteration)} iterations")
121
+
122
+ def format_findings_to_text(
123
+ self, findings_list: List[Dict], synthesized_content: str
124
+ ) -> str:
125
+ """Format findings into a detailed text output using the utility function.
126
+
127
+ Args:
128
+ findings_list: List of finding dictionaries from the strategy execution.
129
+ synthesized_content: The final synthesized content generated by the LLM.
130
+
131
+ Returns:
132
+ str: Formatted text output.
133
+ """
134
+ logger.info(
135
+ f"Formatting final report. Number of detailed findings: {len(findings_list)}. Synthesized content length: {len(synthesized_content)}. Number of question iterations: {len(self.questions_by_iteration)}"
136
+ )
137
+ # Log details about the inputs
138
+ logger.debug(
139
+ f"Detailed findings list structure (first item type if exists): {type(findings_list[0]) if findings_list else 'Empty'}"
140
+ )
141
+ logger.debug(
142
+ f"Questions by iteration keys: {list(self.questions_by_iteration.keys())}"
143
+ )
144
+ if findings_list:
145
+ logger.debug(f"First finding item keys: {list(findings_list[0].keys())}")
146
+
147
+ try:
148
+ # Pass the detailed findings list, the synthesized content (as current_knowledge), and the stored questions
149
+ formatted_report = format_findings(
150
+ findings_list,
151
+ synthesized_content, # This goes to the 'current_knowledge' param in format_findings
152
+ self.questions_by_iteration,
153
+ )
154
+ logger.info("Successfully formatted final report.")
155
+ return formatted_report
156
+ except Exception as e:
157
+ logger.error(
158
+ f"Error occurred during final report formatting: {str(e)}",
159
+ exc_info=True,
160
+ )
161
+ # Fallback: return just the synthesized content if formatting fails
162
+ return f"Error during final formatting. Raw Synthesized Content:\n\n{synthesized_content}"
163
+
164
+ def synthesize_findings(
165
+ self,
166
+ query: str,
167
+ sub_queries: List[str],
168
+ findings: List[Union[Dict, str]],
169
+ accumulated_knowledge: str = None,
170
+ old_formatting: bool = False,
171
+ ) -> str:
172
+ """
173
+ Synthesize accumulated knowledge into a final answer.
174
+
175
+ Args:
176
+ query: The original query
177
+ sub_queries: List of sub-queries (for context)
178
+ findings: List of findings strings or dictionaries from previous steps
179
+ accumulated_knowledge: Optional pre-existing knowledge to incorporate
180
+ old_formatting: Whether to use the old formatting approach
181
+
182
+ Returns:
183
+ str: Synthesized final answer content.
184
+ """
185
+ logger.info(f"synthesize_findings called with query: '{query}'")
186
+ logger.info(
187
+ f"sub_queries type: {type(sub_queries)}, length: {len(sub_queries)}"
188
+ )
189
+ logger.info(f"findings type: {type(findings)}, length: {len(findings)}")
190
+
191
+ # Use provided accumulated_knowledge or join findings if it's None
192
+ if accumulated_knowledge is None:
193
+ # Convert findings to text if they are dictionaries
194
+ finding_texts = []
195
+ for item in findings:
196
+ if isinstance(item, dict) and "content" in item:
197
+ finding_texts.append(item["content"])
198
+ elif isinstance(item, str):
199
+ finding_texts.append(item)
200
+ accumulated_knowledge = "\n\n".join(finding_texts)
201
+
202
+ if findings:
203
+ logger.info(f"first finding type: {type(findings[0])}")
204
+ if isinstance(findings[0], dict):
205
+ logger.info(
206
+ f"first finding keys: {list(findings[0].keys()) if hasattr(findings[0], 'keys') else 'No keys'}"
207
+ )
208
+ if "content" in findings[0]:
209
+ logger.info(
210
+ f"first finding content type: {type(findings[0]['content'])}"
211
+ )
212
+ elif isinstance(findings[0], str):
213
+ logger.info(f"first finding string length: {len(findings[0])}")
214
+ logger.info(f"first finding string preview: {findings[0][:100]}...")
215
+
216
+ if old_formatting:
217
+ # Convert findings list if it contains strings instead of dictionaries
218
+ findings_list = []
219
+ for i, item in enumerate(findings):
220
+ if isinstance(item, str):
221
+ findings_list.append({"phase": f"Finding {i + 1}", "content": item})
222
+ elif isinstance(item, dict):
223
+ findings_list.append(item)
224
+
225
+ return format_findings(
226
+ findings_list=findings_list,
227
+ synthesized_content=accumulated_knowledge,
228
+ questions_by_iteration=self.questions_by_iteration,
229
+ )
230
+ try:
231
+ # Extract finding content texts for the prompt
232
+ finding_texts = []
233
+ for item in findings:
234
+ if isinstance(item, dict) and "content" in item:
235
+ finding_texts.append(item["content"])
236
+ elif isinstance(item, str):
237
+ finding_texts.append(item)
238
+
239
+ # Use finding_texts for the prompt
240
+ current_knowledge = "\n\n".join(finding_texts) if finding_texts else ""
241
+
242
+ # Check if knowledge exceeds a reasonable token limit (rough estimate based on characters)
243
+ # 1 token ≈ 4 characters in English
244
+ estimated_tokens = len(current_knowledge) / 4
245
+ max_safe_tokens = 12000 # Adjust based on your model's context window
246
+
247
+ if estimated_tokens > max_safe_tokens:
248
+ logger.warning(
249
+ f"Knowledge size may exceed model's capacity: ~{int(estimated_tokens)} tokens"
250
+ )
251
+ # Truncate if needed (keeping the beginning and end which are often most important)
252
+ # This is a simple approach - a more sophisticated chunking might be better
253
+ if len(current_knowledge) > 24000: # ~6000 tokens
254
+ first_part = current_knowledge[:12000] # ~3000 tokens from start
255
+ last_part = current_knowledge[-12000:] # ~3000 tokens from end
256
+ current_knowledge = f"{first_part}\n\n[...content truncated due to length...]\n\n{last_part}"
257
+ logger.info("Knowledge truncated to fit within token limits")
258
+
259
+ prompt = f"""Use IEEE style citations [1], [2], etc. Never make up your own citations. Synthesize the following accumulated knowledge into a comprehensive answer for the original query.
260
+ Format the response with clear sections, citations, and a concise summary.
261
+
262
+ Original Query: {query}
263
+
264
+ Accumulated Knowledge:
265
+ {current_knowledge}
266
+
267
+ Sub-questions asked (for context):
268
+ {chr(10).join(f"- {sq}" for sq in sub_queries)}
269
+
270
+ Generate a well-structured, concise answer that:
271
+ 1. Starts with a clear explanation of the most important points
272
+ 2. Organizes information into logical sections with headers if needed
273
+ 3. Maintains logical flow and prioritizes important information over minor details
274
+ 4. Avoids repetition and unnecessary detail
275
+
276
+ Use IEEE style citations [1], [2], etc. Never make up your own citations.
277
+ """
278
+
279
+ logger.info(
280
+ f"Synthesizing final answer. Query: '{query}'. Knowledge length: {len(current_knowledge)}. Prompt length: {len(prompt)}"
281
+ )
282
+ # Log first 500 chars of prompt for debugging context length issues
283
+ logger.debug(f"Synthesis prompt (first 500 chars): {prompt[:500]}...")
284
+
285
+ try:
286
+ # Add timeout handling
287
+ import platform
288
+ import signal
289
+ import threading
290
+ from contextlib import contextmanager
291
+
292
+ # Check if we're on Windows
293
+ if platform.system() == "Windows":
294
+ # Windows-compatible timeout using threading
295
+ class TimeoutError(Exception):
296
+ pass
297
+
298
+ def timeout_handler(timeout_seconds, callback, args):
299
+ def handler():
300
+ callback(*args)
301
+
302
+ timer = threading.Timer(timeout_seconds, handler)
303
+ timer.daemon = True
304
+ return timer
305
+
306
+ def invoke_with_timeout(timeout_seconds, func, *args, **kwargs):
307
+ """
308
+ Function for implementing timeouts on Windows
309
+ """
310
+ result = None
311
+ exception = None
312
+ completed = False
313
+
314
+ def target():
315
+ nonlocal result, exception, completed
316
+ try:
317
+ result = func(*args, **kwargs)
318
+ completed = True
319
+ except Exception as e:
320
+ exception = e
321
+
322
+ thread = threading.Thread(target=target)
323
+ thread.daemon = True
324
+
325
+ try:
326
+ thread.start()
327
+ thread.join(timeout_seconds)
328
+ if not completed and thread.is_alive():
329
+ raise TimeoutError(
330
+ f"Operation timed out after {timeout_seconds} seconds"
331
+ )
332
+ if exception:
333
+ raise exception
334
+ return result
335
+ finally:
336
+ # Nothing to clean up
337
+ pass
338
+
339
+ # Use Windows-compatible timeout
340
+ try:
341
+ logger.info(
342
+ "Using Windows-compatible timeout for LLM invocation"
343
+ )
344
+ response = invoke_with_timeout(120, self.model.invoke, prompt)
345
+
346
+ # Handle different response types (string or object with content attribute)
347
+ if hasattr(response, "content"):
348
+ synthesized_content = response.content
349
+ else:
350
+ # Handle string responses
351
+ synthesized_content = str(response)
352
+
353
+ logger.info(
354
+ f"Successfully synthesized final answer for query: '{query}'"
355
+ )
356
+ # Return only the synthesized content from the LLM
357
+ return synthesized_content
358
+ except TimeoutError as timeout_error:
359
+ logger.error(
360
+ f"LLM invocation timed out during synthesis for query '{query}': {timeout_error}",
361
+ exc_info=True,
362
+ )
363
+ # Return more specific error about timeout
364
+ return "Error: Final answer synthesis failed due to LLM timeout. Please check your LLM service or try with a smaller query scope."
365
+
366
+ else:
367
+ # Unix-compatible timeout using SIGALRM
368
+ @contextmanager
369
+ def timeout(seconds, message="Operation timed out"):
370
+ def signal_handler(signum, frame):
371
+ raise TimeoutError(message)
372
+
373
+ signal.signal(signal.SIGALRM, signal_handler)
374
+ signal.alarm(seconds)
375
+ try:
376
+ yield
377
+ finally:
378
+ signal.alarm(0)
379
+
380
+ # Try with a timeout (adjust seconds as needed)
381
+ try:
382
+ with timeout(120, "LLM invocation timed out after 120 seconds"):
383
+ response = self.model.invoke(prompt)
384
+
385
+ # Handle different response types (string or object with content attribute)
386
+ if hasattr(response, "content"):
387
+ synthesized_content = response.content
388
+ else:
389
+ # Handle string responses
390
+ synthesized_content = str(response)
391
+
392
+ logger.info(
393
+ f"Successfully synthesized final answer for query: '{query}'"
394
+ )
395
+ # Return only the synthesized content from the LLM
396
+ return synthesized_content
397
+ except TimeoutError as timeout_error:
398
+ logger.error(
399
+ f"LLM invocation timed out during synthesis for query '{query}': {timeout_error}",
400
+ exc_info=True,
401
+ )
402
+ # Return more specific error about timeout
403
+ return "Error: Final answer synthesis failed due to LLM timeout. Please check your LLM service or try with a smaller query scope."
404
+
405
+ except Exception as invoke_error:
406
+ logger.error(
407
+ f"LLM invocation failed during synthesis for query '{query}': {invoke_error}",
408
+ exc_info=True,
409
+ )
410
+
411
+ # Attempt to determine the type of error
412
+ error_message = str(invoke_error).lower()
413
+ error_type = "unknown"
414
+
415
+ if "timeout" in error_message or "timed out" in error_message:
416
+ error_type = "timeout"
417
+ elif (
418
+ "too many tokens" in error_message
419
+ or "context length" in error_message
420
+ or "token limit" in error_message
421
+ ):
422
+ error_type = "token_limit"
423
+ elif "rate limit" in error_message or "rate_limit" in error_message:
424
+ error_type = "rate_limit"
425
+ elif "connection" in error_message or "network" in error_message:
426
+ error_type = "connection"
427
+ elif "api key" in error_message or "authentication" in error_message:
428
+ error_type = "authentication"
429
+
430
+ # Return more detailed error message based on type
431
+ if error_type == "timeout":
432
+ return "Error: Failed to synthesize final answer due to LLM timeout. Please check your connection or try again later."
433
+ elif error_type == "token_limit":
434
+ return "Error: Failed to synthesize final answer due to token limit exceeded. Try reducing the scope of your query."
435
+ elif error_type == "rate_limit":
436
+ return "Error: Failed to synthesize final answer due to LLM rate limit. Please try again in a few minutes."
437
+ elif error_type == "connection":
438
+ return "Error: Failed to synthesize final answer due to connection issues. Please check your internet connection and LLM service status."
439
+ elif error_type == "authentication":
440
+ return "Error: Failed to synthesize final answer due to authentication issues. Please check your API keys."
441
+ else:
442
+ # Generic error with details
443
+ return f"Error: Failed to synthesize final answer. LLM error: {str(invoke_error)}"
444
+
445
+ except Exception as e:
446
+ # Catch potential errors during prompt construction or logging itself
447
+ logger.error(
448
+ f"Error preparing or executing synthesis for query '{query}': {str(e)}",
449
+ exc_info=True,
450
+ )
451
+ # Return a specific error message for synthesis failure
452
+ return f"Error: Failed to synthesize final answer from knowledge. Details: {str(e)}"
@@ -0,0 +1 @@
1
+ # Search System Knowledge Package
@@ -0,0 +1,151 @@
1
+ """
2
+ Base class for knowledge extraction and generation.
3
+ """
4
+
5
+ import logging
6
+ from abc import ABC, abstractmethod
7
+ from typing import List
8
+
9
+ from langchain_core.language_models.chat_models import BaseChatModel
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class BaseKnowledgeGenerator(ABC):
15
+ """Base class for generating knowledge from text."""
16
+
17
+ def __init__(self, model: BaseChatModel):
18
+ """
19
+ Initialize the knowledge generator.
20
+
21
+ Args:
22
+ model: The language model to use
23
+ """
24
+ self.model = model
25
+
26
+ @abstractmethod
27
+ def generate(self, query: str, context: str) -> str:
28
+ """
29
+ Generate knowledge from the given query and context.
30
+
31
+ Args:
32
+ query: The query to generate knowledge for
33
+ context: Additional context for knowledge generation
34
+
35
+ Returns:
36
+ """
37
+ pass
38
+
39
+ @abstractmethod
40
+ def generate_knowledge(
41
+ self,
42
+ query: str,
43
+ context: str = "",
44
+ current_knowledge: str = "",
45
+ questions: List[str] = None,
46
+ ) -> str:
47
+ """
48
+ Generate knowledge based on query and context.
49
+
50
+ Args:
51
+ query: The query to generate knowledge for
52
+ context: Additional context for knowledge generation
53
+ current_knowledge: Current accumulated knowledge
54
+ questions: List of questions to address
55
+
56
+ Returns:
57
+ str: Generated knowledge
58
+ """
59
+ pass
60
+
61
+ @abstractmethod
62
+ def generate_sub_knowledge(self, sub_query: str, context: str = "") -> str:
63
+ """
64
+ Generate knowledge for a sub-question.
65
+
66
+ Args:
67
+ sub_query: The sub-question to generate knowledge for
68
+ context: Additional context for knowledge generation
69
+
70
+ Returns:
71
+ str: Generated knowledge for the sub-question
72
+ """
73
+ pass
74
+
75
+ @abstractmethod
76
+ def compress_knowledge(
77
+ self, current_knowledge: str, query: str, section_links: list, **kwargs
78
+ ) -> str:
79
+ """
80
+ Compress and summarize accumulated knowledge.
81
+
82
+ Args:
83
+ current_knowledge: The accumulated knowledge to compress
84
+ query: The original research query
85
+ section_links: List of source links
86
+ **kwargs: Additional arguments
87
+
88
+ Returns:
89
+ str: Compressed knowledge
90
+ """
91
+ pass
92
+
93
+ @abstractmethod
94
+ def format_citations(self, links: List[str]) -> str:
95
+ """
96
+ Format source links into citations.
97
+
98
+ Args:
99
+ links: List of source links
100
+
101
+ Returns:
102
+ str: Formatted citations
103
+ """
104
+ pass
105
+
106
+ def _validate_knowledge(self, knowledge: str) -> bool:
107
+ """
108
+ Validate the knowledge input.
109
+
110
+ Args:
111
+ knowledge: The knowledge to validate
112
+
113
+ Returns:
114
+ bool: True if knowledge is valid, False otherwise
115
+ """
116
+ if not knowledge or not isinstance(knowledge, str):
117
+ logger.error("Invalid knowledge provided")
118
+ return False
119
+ return True
120
+
121
+ def _validate_links(self, links: List[str]) -> bool:
122
+ """
123
+ Validate the source links.
124
+
125
+ Args:
126
+ links: List of source links to validate
127
+
128
+ Returns:
129
+ bool: True if links are valid, False otherwise
130
+ """
131
+ if not isinstance(links, list):
132
+ logger.error("Invalid links format")
133
+ return False
134
+ if not all(isinstance(link, str) for link in links):
135
+ logger.error("Invalid link type in links list")
136
+ return False
137
+ return True
138
+
139
+ def _extract_key_points(self, knowledge: str) -> List[str]:
140
+ """
141
+ Extract key points from knowledge.
142
+
143
+ Args:
144
+ knowledge: The knowledge to analyze
145
+
146
+ Returns:
147
+ List[str]: List of key points
148
+ """
149
+ # This is a placeholder implementation
150
+ # Specific implementations should override this method
151
+ return knowledge.split("\n")