local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +154 -160
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +87 -45
  41. local_deep_research/search_system.py +153 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1583 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.2.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -3,89 +3,115 @@ API module for Local Deep Research.
3
3
  Provides programmatic access to search and research capabilities.
4
4
  """
5
5
 
6
- from typing import Dict, List, Optional, Union, Any, Callable
7
6
  import logging
8
7
  import os
9
- import traceback
8
+ from typing import Any, Callable, Dict, Optional
9
+
10
10
  import toml
11
- from ..search_system import AdvancedSearchSystem
11
+
12
+ from ..config.llm_config import get_llm
13
+ from ..config.search_config import get_search
12
14
  from ..report_generator import IntegratedReportGenerator
13
- from ..config import get_llm, get_search, settings
14
- from ..utilties.search_utilities import remove_think_tags
15
+ from ..search_system import AdvancedSearchSystem
16
+ from ..utilities.search_utilities import remove_think_tags
15
17
 
16
18
  logger = logging.getLogger(__name__)
17
19
 
18
- def quick_summary(
19
- query: str,
20
+
21
+ def _init_search_system(
22
+ model_name: str | None = None,
23
+ temperature: float = 0.7,
24
+ provider: str | None = None,
25
+ openai_endpoint_url: str | None = None,
26
+ progress_callback: Callable[[str, int, dict], None] | None = None,
20
27
  search_tool: Optional[str] = None,
21
28
  iterations: int = 1,
22
29
  questions_per_iteration: int = 1,
23
- max_results: int = 20,
24
- max_filtered_results: int = 5,
25
- region: str = "us",
26
- time_period: str = "y",
27
- safe_search: bool = True,
28
- temperature: float = 0.7,
29
- progress_callback: Optional[Callable] = None,
30
- ) -> Dict[str, Any]:
30
+ ) -> AdvancedSearchSystem:
31
31
  """
32
- Generate a quick research summary for a given query.
33
-
32
+ Initializes the advanced search system with specified parameters. This function sets up
33
+ and returns an instance of the AdvancedSearchSystem using the provided configuration
34
+ options such as model name, temperature for randomness in responses, provider service
35
+ details, endpoint URL, and an optional search tool.
36
+
34
37
  Args:
35
- query: The research query to analyze
38
+ model_name: Name of the model to use (if None, uses database setting)
39
+ temperature: LLM temperature for generation
40
+ provider: Provider to use (if None, uses database setting)
41
+ openai_endpoint_url: Custom endpoint URL to use (if None, uses database
42
+ setting)
43
+ progress_callback: Optional callback function to receive progress updates
36
44
  search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
37
45
  iterations: Number of research cycles to perform
38
46
  questions_per_iteration: Number of questions to generate per cycle
39
- max_results: Maximum number of search results to consider
40
- max_filtered_results: Maximum results after relevance filtering
41
- region: Search region/locale
42
- time_period: Time period for search results (d=day, w=week, m=month, y=year)
43
- safe_search: Whether to enable safe search
44
- temperature: LLM temperature for generation
45
- progress_callback: Optional callback function to receive progress updates
46
-
47
+
47
48
  Returns:
48
- Dictionary containing the research results with keys:
49
- - 'summary': The generated summary text
50
- - 'findings': List of detailed findings from each search
51
- - 'iterations': Number of iterations performed
52
- - 'questions': Questions generated during research
53
- """
54
- logger.info(f"Generating quick summary for query: {query}")
55
-
49
+ AdvancedSearchSystem: An instance of the configured AdvancedSearchSystem.
56
50
 
51
+ """
57
52
  # Get language model with custom temperature
58
- llm = get_llm(temperature=temperature)
59
-
53
+ llm = get_llm(
54
+ temperature=temperature,
55
+ openai_endpoint_url=openai_endpoint_url,
56
+ model_name=model_name,
57
+ provider=provider,
58
+ )
59
+
60
+ # Set the search engine if specified
61
+ search_engine = None
62
+ if search_tool:
63
+ search_engine = get_search(search_tool, llm_instance=llm)
64
+ if search_engine is None:
65
+ logger.warning(
66
+ f"Could not create search engine '{search_tool}', using default."
67
+ )
68
+
60
69
  # Create search system with custom parameters
61
- system = AdvancedSearchSystem()
62
-
70
+ system = AdvancedSearchSystem(llm=llm, search=search_engine)
71
+
63
72
  # Override default settings with user-provided values
64
- system.max_iterations = iterations
73
+ system.max_iterations = iterations
65
74
  system.questions_per_iteration = questions_per_iteration
66
- system.model = llm # Ensure the model is directly attached to the system
67
-
68
- # Set the search engine if specified
69
- if search_tool:
70
- search_engine = get_search(search_tool)
71
- if search_engine:
72
- system.search = search_engine
73
- else:
74
- logger.warning(f"Could not create search engine '{search_tool}', using default.")
75
-
75
+
76
76
  # Set progress callback if provided
77
77
  if progress_callback:
78
78
  system.set_progress_callback(progress_callback)
79
-
79
+
80
+ return system
81
+
82
+
83
+ def quick_summary(
84
+ query: str,
85
+ **kwargs: Any,
86
+ ) -> Dict[str, Any]:
87
+ """
88
+ Generate a quick research summary for a given query.
89
+
90
+ Args:
91
+ query: The research query to analyze
92
+ **kwargs: Configuration for the search system. Will be forwarded to
93
+ `_init_search_system()`.
94
+
95
+ Returns:
96
+ Dictionary containing the research results with keys:
97
+ - 'summary': The generated summary text
98
+ - 'findings': List of detailed findings from each search
99
+ - 'iterations': Number of iterations performed
100
+ - 'questions': Questions generated during research
101
+ """
102
+ logger.info("Generating quick summary for query: %s", query)
103
+
104
+ system = _init_search_system(**kwargs)
105
+
80
106
  # Perform the search and analysis
81
107
  results = system.analyze_topic(query)
82
-
108
+
83
109
  # Extract the summary from the current knowledge
84
110
  if results and "current_knowledge" in results:
85
111
  summary = results["current_knowledge"]
86
112
  else:
87
113
  summary = "Unable to generate summary for the query."
88
-
114
+
89
115
  # Prepare the return value
90
116
  return {
91
117
  "summary": summary,
@@ -93,100 +119,60 @@ def quick_summary(
93
119
  "iterations": results.get("iterations", 0),
94
120
  "questions": results.get("questions", {}),
95
121
  "formatted_findings": results.get("formatted_findings", ""),
96
- "sources": results.get("all_links_of_system", [])
122
+ "sources": results.get("all_links_of_system", []),
97
123
  }
98
124
 
99
125
 
100
126
  def generate_report(
101
127
  query: str,
102
- search_tool: Optional[str] = None,
103
- iterations: int = 2,
104
- questions_per_iteration: int = 2,
105
- searches_per_section: int = 2,
106
- max_results: int = 50,
107
- max_filtered_results: int = 5,
108
- region: str = "us",
109
- time_period: str = "y",
110
- safe_search: bool = True,
111
- temperature: float = 0.7,
112
128
  output_file: Optional[str] = None,
113
129
  progress_callback: Optional[Callable] = None,
130
+ searches_per_section: int = 2,
131
+ **kwargs: Any,
114
132
  ) -> Dict[str, Any]:
115
133
  """
116
134
  Generate a comprehensive, structured research report for a given query.
117
-
135
+
118
136
  Args:
119
137
  query: The research query to analyze
120
- search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
121
- iterations: Number of research cycles to perform
122
- questions_per_iteration: Number of questions to generate per cycle
123
- searches_per_section: Number of searches to perform per report section
124
- max_results: Maximum number of search results to consider
125
- max_filtered_results: Maximum results after relevance filtering
126
- region: Search region/locale
127
- time_period: Time period for search results (d=day, w=week, m=month, y=year)
128
- safe_search: Whether to enable safe search
129
- temperature: LLM temperature for generation
130
138
  output_file: Optional path to save report markdown file
131
139
  progress_callback: Optional callback function to receive progress updates
132
-
140
+ searches_per_section: The number of searches to perform for each
141
+ section in the report.
142
+
133
143
  Returns:
134
144
  Dictionary containing the research report with keys:
135
145
  - 'content': The full report content in markdown format
136
146
  - 'metadata': Report metadata including generated timestamp and query
137
147
  """
138
- logger.info(f"Generating comprehensive research report for query: {query}")
139
-
148
+ logger.info("Generating comprehensive research report for query: %s", query)
149
+
150
+ system = _init_search_system(**kwargs)
140
151
 
141
- # Get language model with custom temperature
142
- llm = get_llm(temperature=temperature)
143
-
144
- # Create search system with custom parameters
145
- system = AdvancedSearchSystem()
146
-
147
- # Override default settings with user-provided values
148
- system.max_iterations = iterations
149
- system.questions_per_iteration = questions_per_iteration
150
- system.model = llm # Ensure the model is directly attached to the system
151
-
152
- # Set the search engine if specified
153
- if search_tool:
154
- search_engine = get_search(
155
- search_tool,
156
- llm_instance=llm,
157
- max_results=max_results,
158
- max_filtered_results=max_filtered_results,
159
- region=region,
160
- time_period=time_period,
161
- safe_search=safe_search
162
- )
163
- if search_engine:
164
- system.search = search_engine
165
- else:
166
- logger.warning(f"Could not create search engine '{search_tool}', using default.")
167
-
168
152
  # Set progress callback if provided
169
153
  if progress_callback:
170
154
  system.set_progress_callback(progress_callback)
171
-
155
+
172
156
  # Perform the initial research
173
157
  initial_findings = system.analyze_topic(query)
174
-
158
+
175
159
  # Generate the structured report
176
- report_generator = IntegratedReportGenerator(searches_per_section=searches_per_section)
177
- report_generator.model = llm # Ensure the model is set on the report generator too
160
+ report_generator = IntegratedReportGenerator(
161
+ search_system=system,
162
+ llm=system.model,
163
+ searches_per_section=searches_per_section,
164
+ )
178
165
  report = report_generator.generate_report(initial_findings, query)
179
-
166
+
180
167
  # Save report to file if path is provided
181
168
  if output_file and report and "content" in report:
182
- with open(output_file, "w", encoding="utf-8") as f:
183
- f.write(report["content"])
184
- logger.info(f"Report saved to {output_file}")
185
- report["file_path"] = output_file
169
+ with open(output_file, "w", encoding="utf-8") as f:
170
+ f.write(report["content"])
171
+ logger.info(f"Report saved to {output_file}")
172
+ report["file_path"] = output_file
186
173
  return report
187
174
 
188
175
 
189
-
190
176
  def analyze_documents(
191
177
  query: str,
192
178
  collection_name: str,
@@ -197,7 +183,7 @@ def analyze_documents(
197
183
  ) -> Dict[str, Any]:
198
184
  """
199
185
  Search and analyze documents in a specific local collection.
200
-
186
+
201
187
  Args:
202
188
  query: The search query
203
189
  collection_name: Name of the local document collection to search
@@ -205,58 +191,63 @@ def analyze_documents(
205
191
  temperature: LLM temperature for summary generation
206
192
  force_reindex: Whether to force reindexing the collection
207
193
  output_file: Optional path to save analysis results to a file
208
-
194
+
209
195
  Returns:
210
196
  Dictionary containing:
211
197
  - 'summary': Summary of the findings
212
198
  - 'documents': List of matching documents with content and metadata
213
199
  """
214
- logger.info(f"Analyzing documents in collection '{collection_name}' for query: {query}")
215
-
200
+ logger.info(
201
+ f"Analyzing documents in collection '{collection_name}' for query: {query}"
202
+ )
216
203
 
217
204
  # Get language model with custom temperature
218
205
  llm = get_llm(temperature=temperature)
219
-
206
+
220
207
  # Get search engine for the specified collection
221
208
  search = get_search(collection_name, llm_instance=llm)
222
-
209
+
223
210
  if not search:
224
211
  return {
225
212
  "summary": f"Error: Collection '{collection_name}' not found or not properly configured.",
226
- "documents": []
213
+ "documents": [],
227
214
  }
228
-
215
+
229
216
  # Set max results
230
217
  search.max_results = max_results
231
-
218
+
232
219
  # Force reindex if requested
233
- if force_reindex and hasattr(search, 'embedding_manager'):
234
- for folder_path in search.folder_paths:
235
- search.embedding_manager.index_folder(folder_path, force_reindex=True)
220
+ if force_reindex and hasattr(search, "embedding_manager"):
221
+ for folder_path in search.folder_paths:
222
+ search.embedding_manager.index_folder(folder_path, force_reindex=True)
236
223
 
237
224
  # Perform the search
238
225
  results = search.run(query)
239
-
226
+
240
227
  if not results:
241
228
  return {
242
229
  "summary": f"No documents found in collection '{collection_name}' for query: '{query}'",
243
- "documents": []
230
+ "documents": [],
244
231
  }
245
-
232
+
246
233
  # Get LLM to generate a summary of the results
247
234
 
248
- docs_text = "\n\n".join([f"Document {i+1}: {doc.get('content', doc.get('snippet', ''))[:1000]}"
249
- for i, doc in enumerate(results[:5])]) # Limit to first 5 docs and 1000 chars each
250
-
235
+ docs_text = "\n\n".join(
236
+ [
237
+ f"Document {i + 1}:" f" {doc.get('content', doc.get('snippet', ''))[:1000]}"
238
+ for i, doc in enumerate(results[:5])
239
+ ]
240
+ ) # Limit to first 5 docs and 1000 chars each
241
+
251
242
  summary_prompt = f"""Analyze these document excerpts related to the query: "{query}"
252
-
243
+
253
244
  {docs_text}
254
-
245
+
255
246
  Provide a concise summary of the key information found in these documents related to the query.
256
247
  """
257
-
248
+
258
249
  summary_response = llm.invoke(summary_prompt)
259
- if hasattr(summary_response, 'content'):
250
+ if hasattr(summary_response, "content"):
260
251
  summary = remove_think_tags(summary_response.content)
261
252
  else:
262
253
  summary = str(summary_response)
@@ -266,38 +257,42 @@ def analyze_documents(
266
257
  "summary": summary,
267
258
  "documents": results,
268
259
  "collection": collection_name,
269
- "document_count": len(results)
260
+ "document_count": len(results),
270
261
  }
271
-
262
+
272
263
  # Save to file if requested
273
264
  if output_file:
274
- with open(output_file, "w", encoding="utf-8") as f:
275
- f.write(f"# Document Analysis: {query}\n\n")
276
- f.write(f"## Summary\n\n{summary}\n\n")
277
- f.write(f"## Documents Found: {len(results)}\n\n")
278
-
279
- for i, doc in enumerate(results):
280
- f.write(f"### Document {i+1}: {doc.get('title', 'Untitled')}\n\n")
281
- f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
282
- f.write(f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n")
283
- f.write("---\n\n")
284
-
285
- analysis_result["file_path"] = output_file
286
- logger.info(f"Analysis saved to {output_file}")
265
+ with open(output_file, "w", encoding="utf-8") as f:
266
+ f.write(f"# Document Analysis: {query}\n\n")
267
+ f.write(f"## Summary\n\n{summary}\n\n")
268
+ f.write(f"## Documents Found: {len(results)}\n\n")
269
+
270
+ for i, doc in enumerate(results):
271
+ f.write(f"### Document {i + 1}:" f" {doc.get('title', 'Untitled')}\n\n")
272
+ f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
273
+ f.write(
274
+ f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n"
275
+ )
276
+ f.write("---\n\n")
277
+
278
+ analysis_result["file_path"] = output_file
279
+ logger.info(f"Analysis saved to {output_file}")
287
280
 
288
281
  return analysis_result
289
282
 
283
+
290
284
  def get_available_search_engines() -> Dict[str, str]:
291
285
  """
292
286
  Get a dictionary of available search engines.
293
-
287
+
294
288
  Returns:
295
289
  Dictionary mapping engine names to descriptions
296
290
  """
297
291
 
298
292
  from ..web_search_engines.search_engine_factory import get_available_engines
293
+
299
294
  engines = get_available_engines()
300
-
295
+
301
296
  # Add some descriptions for common engines
302
297
  descriptions = {
303
298
  "auto": "Automatic selection based on query type",
@@ -306,25 +301,24 @@ def get_available_search_engines() -> Dict[str, str]:
306
301
  "pubmed": "Medical and biomedical literature",
307
302
  "semantic_scholar": "Academic papers across all fields",
308
303
  "github": "Code repositories and technical documentation",
309
- "local_all": "All local document collections"
304
+ "local_all": "All local document collections",
310
305
  }
311
-
306
+
312
307
  return {engine: descriptions.get(engine, "Search engine") for engine in engines}
313
308
 
314
309
 
315
310
  def get_available_collections() -> Dict[str, Dict[str, Any]]:
316
311
  """
317
312
  Get a dictionary of available local document collections.
318
-
313
+
319
314
  Returns:
320
315
  Dictionary mapping collection names to their configuration
321
316
  """
322
317
 
318
+ from ..config.config_files import LOCAL_COLLECTIONS_FILE
323
319
 
324
- from ..config import LOCAL_COLLECTIONS_FILE
325
-
326
320
  if os.path.exists(LOCAL_COLLECTIONS_FILE):
327
- collections = toml.load(LOCAL_COLLECTIONS_FILE)
328
- return collections
321
+ collections = toml.load(LOCAL_COLLECTIONS_FILE)
322
+ return collections
329
323
 
330
324
  return {}
@@ -0,0 +1,8 @@
1
+ """
2
+ Main entry point for the Local Deep Research application.
3
+ """
4
+
5
+ from src.local_deep_research.web.app import main
6
+
7
+ if __name__ == "__main__":
8
+ main()
@@ -1,10 +1,12 @@
1
1
  # citation_handler.py
2
2
 
3
+ from typing import Any, Dict, List, Union
4
+
3
5
  from langchain_core.documents import Document
4
- from typing import Dict, List, Union, Any
5
- import re
6
- from .utilties.search_utilities import remove_think_tags
7
- from .config import settings
6
+
7
+ from .config.config_files import settings
8
+ from .utilities.db_utils import get_db_setting
9
+
8
10
 
9
11
  class CitationHandler:
10
12
  def __init__(self, llm):
@@ -13,7 +15,10 @@ class CitationHandler:
13
15
  def _create_documents(
14
16
  self, search_results: Union[str, List[Dict]], nr_of_links: int = 0
15
17
  ) -> List[Document]:
16
- """Convert search results to LangChain documents format and add index to original search results."""
18
+ """
19
+ Convert search results to LangChain documents format and add index
20
+ to original search results.
21
+ """
17
22
  documents = []
18
23
  if isinstance(search_results, str):
19
24
  return documents
@@ -22,14 +27,14 @@ class CitationHandler:
22
27
  if isinstance(result, dict):
23
28
  # Add index to the original search result dictionary
24
29
  result["index"] = str(i + nr_of_links + 1)
25
-
30
+
26
31
  content = result.get("full_content", result.get("snippet", ""))
27
32
  documents.append(
28
33
  Document(
29
34
  page_content=content,
30
35
  metadata={
31
- "source": result.get("link", f"source_{i+1}"),
32
- "title": result.get("title", f"Source {i+1}"),
36
+ "source": result.get("link", f"source_{i + 1}"),
37
+ "title": result.get("title", f"Source {i + 1}"),
33
38
  "index": i + nr_of_links + 1,
34
39
  },
35
40
  )
@@ -57,19 +62,20 @@ Question: {query}
57
62
  Sources:
58
63
  {formatted_sources}
59
64
 
60
- Provide a detailed analysis with citations and always keep URLS. Never make up sources. Example format: "According to the research [1], ..."
65
+ Provide a detailed analysis with citations. Do not create the bibliography, it will be provided automatically. Never make up sources. Never write or create urls. Only write text relevant to the question. Example format: "According to the research [1], ..."
61
66
  """
62
67
 
63
68
  response = self.llm.invoke(prompt)
64
-
65
- return {"content": remove_think_tags(response.content), "documents": documents}
69
+ if not isinstance(response, str):
70
+ response = response.content
71
+ return {"content": response, "documents": documents}
66
72
 
67
73
  def analyze_followup(
68
74
  self,
69
75
  question: str,
70
76
  search_results: Union[str, List[Dict]],
71
77
  previous_knowledge: str,
72
- nr_of_links : int
78
+ nr_of_links: int,
73
79
  ) -> Dict[str, Any]:
74
80
  """Process follow-up analysis with citations."""
75
81
  documents = self._create_documents(search_results, nr_of_links=nr_of_links)
@@ -80,7 +86,7 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
80
86
  2. Identify and flag any contradictions
81
87
  3. Verify basic facts (dates, company names, ownership)
82
88
  4. Note when sources disagree
83
-
89
+
84
90
  Previous Knowledge:
85
91
  {previous_knowledge}
86
92
 
@@ -88,8 +94,11 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
88
94
  {formatted_sources}
89
95
 
90
96
  Return any inconsistencies or conflicts found."""
91
- if settings.GENERAL.ENABLE_FACT_CHECKING:
92
- fact_check_response = remove_think_tags(self.llm.invoke(fact_check_prompt).content)
97
+ if get_db_setting(
98
+ "general.enable_fact_checking", settings.general.enable_fact_checking
99
+ ):
100
+ fact_check_response = self.llm.invoke(fact_check_prompt).content
101
+
93
102
  else:
94
103
  fact_check_response = ""
95
104
 
@@ -108,4 +117,4 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
108
117
 
109
118
  response = self.llm.invoke(prompt)
110
119
 
111
- return {"content": remove_think_tags(response.content), "documents": documents}
120
+ return {"content": response.content, "documents": documents}