local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +96 -84
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +72 -44
- local_deep_research/search_system.py +147 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1592 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
- local_deep_research-0.2.0.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -3,18 +3,21 @@ API module for Local Deep Research.
|
|
3
3
|
Provides programmatic access to search and research capabilities.
|
4
4
|
"""
|
5
5
|
|
6
|
-
from typing import Dict, List, Optional, Union, Any, Callable
|
7
6
|
import logging
|
8
7
|
import os
|
9
|
-
import
|
8
|
+
from typing import Any, Callable, Dict, Optional
|
9
|
+
|
10
10
|
import toml
|
11
|
+
|
12
|
+
from .. import get_report_generator # Use the lazy import function
|
13
|
+
from ..config.llm_config import get_llm
|
14
|
+
from ..config.search_config import get_search
|
11
15
|
from ..search_system import AdvancedSearchSystem
|
12
|
-
from ..
|
13
|
-
from ..config import get_llm, get_search, settings
|
14
|
-
from ..utilties.search_utilities import remove_think_tags
|
16
|
+
from ..utilities.search_utilities import remove_think_tags
|
15
17
|
|
16
18
|
logger = logging.getLogger(__name__)
|
17
19
|
|
20
|
+
|
18
21
|
def quick_summary(
|
19
22
|
query: str,
|
20
23
|
search_tool: Optional[str] = None,
|
@@ -30,7 +33,7 @@ def quick_summary(
|
|
30
33
|
) -> Dict[str, Any]:
|
31
34
|
"""
|
32
35
|
Generate a quick research summary for a given query.
|
33
|
-
|
36
|
+
|
34
37
|
Args:
|
35
38
|
query: The research query to analyze
|
36
39
|
search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
|
@@ -43,7 +46,7 @@ def quick_summary(
|
|
43
46
|
safe_search: Whether to enable safe search
|
44
47
|
temperature: LLM temperature for generation
|
45
48
|
progress_callback: Optional callback function to receive progress updates
|
46
|
-
|
49
|
+
|
47
50
|
Returns:
|
48
51
|
Dictionary containing the research results with keys:
|
49
52
|
- 'summary': The generated summary text
|
@@ -51,41 +54,42 @@ def quick_summary(
|
|
51
54
|
- 'iterations': Number of iterations performed
|
52
55
|
- 'questions': Questions generated during research
|
53
56
|
"""
|
54
|
-
logger.info(
|
55
|
-
|
57
|
+
logger.info("Generating quick summary for query: %s", query)
|
56
58
|
|
57
59
|
# Get language model with custom temperature
|
58
60
|
llm = get_llm(temperature=temperature)
|
59
|
-
|
61
|
+
|
60
62
|
# Create search system with custom parameters
|
61
63
|
system = AdvancedSearchSystem()
|
62
|
-
|
64
|
+
|
63
65
|
# Override default settings with user-provided values
|
64
|
-
system.max_iterations = iterations
|
66
|
+
system.max_iterations = iterations
|
65
67
|
system.questions_per_iteration = questions_per_iteration
|
66
68
|
system.model = llm # Ensure the model is directly attached to the system
|
67
|
-
|
69
|
+
|
68
70
|
# Set the search engine if specified
|
69
71
|
if search_tool:
|
70
72
|
search_engine = get_search(search_tool)
|
71
73
|
if search_engine:
|
72
74
|
system.search = search_engine
|
73
75
|
else:
|
74
|
-
logger.warning(
|
75
|
-
|
76
|
+
logger.warning(
|
77
|
+
f"Could not create search engine '{search_tool}', using default."
|
78
|
+
)
|
79
|
+
|
76
80
|
# Set progress callback if provided
|
77
81
|
if progress_callback:
|
78
82
|
system.set_progress_callback(progress_callback)
|
79
|
-
|
83
|
+
|
80
84
|
# Perform the search and analysis
|
81
85
|
results = system.analyze_topic(query)
|
82
|
-
|
86
|
+
|
83
87
|
# Extract the summary from the current knowledge
|
84
88
|
if results and "current_knowledge" in results:
|
85
89
|
summary = results["current_knowledge"]
|
86
90
|
else:
|
87
91
|
summary = "Unable to generate summary for the query."
|
88
|
-
|
92
|
+
|
89
93
|
# Prepare the return value
|
90
94
|
return {
|
91
95
|
"summary": summary,
|
@@ -93,7 +97,7 @@ def quick_summary(
|
|
93
97
|
"iterations": results.get("iterations", 0),
|
94
98
|
"questions": results.get("questions", {}),
|
95
99
|
"formatted_findings": results.get("formatted_findings", ""),
|
96
|
-
"sources": results.get("all_links_of_system", [])
|
100
|
+
"sources": results.get("all_links_of_system", []),
|
97
101
|
}
|
98
102
|
|
99
103
|
|
@@ -114,7 +118,7 @@ def generate_report(
|
|
114
118
|
) -> Dict[str, Any]:
|
115
119
|
"""
|
116
120
|
Generate a comprehensive, structured research report for a given query.
|
117
|
-
|
121
|
+
|
118
122
|
Args:
|
119
123
|
query: The research query to analyze
|
120
124
|
search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
|
@@ -129,26 +133,25 @@ def generate_report(
|
|
129
133
|
temperature: LLM temperature for generation
|
130
134
|
output_file: Optional path to save report markdown file
|
131
135
|
progress_callback: Optional callback function to receive progress updates
|
132
|
-
|
136
|
+
|
133
137
|
Returns:
|
134
138
|
Dictionary containing the research report with keys:
|
135
139
|
- 'content': The full report content in markdown format
|
136
140
|
- 'metadata': Report metadata including generated timestamp and query
|
137
141
|
"""
|
138
|
-
logger.info(
|
139
|
-
|
142
|
+
logger.info("Generating comprehensive research report for query: %s", query)
|
140
143
|
|
141
144
|
# Get language model with custom temperature
|
142
145
|
llm = get_llm(temperature=temperature)
|
143
|
-
|
146
|
+
|
144
147
|
# Create search system with custom parameters
|
145
148
|
system = AdvancedSearchSystem()
|
146
|
-
|
149
|
+
|
147
150
|
# Override default settings with user-provided values
|
148
151
|
system.max_iterations = iterations
|
149
152
|
system.questions_per_iteration = questions_per_iteration
|
150
153
|
system.model = llm # Ensure the model is directly attached to the system
|
151
|
-
|
154
|
+
|
152
155
|
# Set the search engine if specified
|
153
156
|
if search_tool:
|
154
157
|
search_engine = get_search(
|
@@ -158,35 +161,36 @@ def generate_report(
|
|
158
161
|
max_filtered_results=max_filtered_results,
|
159
162
|
region=region,
|
160
163
|
time_period=time_period,
|
161
|
-
safe_search=safe_search
|
164
|
+
safe_search=safe_search,
|
162
165
|
)
|
163
166
|
if search_engine:
|
164
167
|
system.search = search_engine
|
165
168
|
else:
|
166
|
-
logger.warning(
|
167
|
-
|
169
|
+
logger.warning(
|
170
|
+
f"Could not create search engine '{search_tool}', using default."
|
171
|
+
)
|
172
|
+
|
168
173
|
# Set progress callback if provided
|
169
174
|
if progress_callback:
|
170
175
|
system.set_progress_callback(progress_callback)
|
171
|
-
|
176
|
+
|
172
177
|
# Perform the initial research
|
173
178
|
initial_findings = system.analyze_topic(query)
|
174
|
-
|
179
|
+
|
175
180
|
# Generate the structured report
|
176
|
-
report_generator =
|
181
|
+
report_generator = get_report_generator(searches_per_section=searches_per_section)
|
177
182
|
report_generator.model = llm # Ensure the model is set on the report generator too
|
178
183
|
report = report_generator.generate_report(initial_findings, query)
|
179
|
-
|
184
|
+
|
180
185
|
# Save report to file if path is provided
|
181
186
|
if output_file and report and "content" in report:
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
187
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
188
|
+
f.write(report["content"])
|
189
|
+
logger.info(f"Report saved to {output_file}")
|
190
|
+
report["file_path"] = output_file
|
186
191
|
return report
|
187
192
|
|
188
193
|
|
189
|
-
|
190
194
|
def analyze_documents(
|
191
195
|
query: str,
|
192
196
|
collection_name: str,
|
@@ -197,7 +201,7 @@ def analyze_documents(
|
|
197
201
|
) -> Dict[str, Any]:
|
198
202
|
"""
|
199
203
|
Search and analyze documents in a specific local collection.
|
200
|
-
|
204
|
+
|
201
205
|
Args:
|
202
206
|
query: The search query
|
203
207
|
collection_name: Name of the local document collection to search
|
@@ -205,58 +209,63 @@ def analyze_documents(
|
|
205
209
|
temperature: LLM temperature for summary generation
|
206
210
|
force_reindex: Whether to force reindexing the collection
|
207
211
|
output_file: Optional path to save analysis results to a file
|
208
|
-
|
212
|
+
|
209
213
|
Returns:
|
210
214
|
Dictionary containing:
|
211
215
|
- 'summary': Summary of the findings
|
212
216
|
- 'documents': List of matching documents with content and metadata
|
213
217
|
"""
|
214
|
-
logger.info(
|
215
|
-
|
218
|
+
logger.info(
|
219
|
+
f"Analyzing documents in collection '{collection_name}' for query: {query}"
|
220
|
+
)
|
216
221
|
|
217
222
|
# Get language model with custom temperature
|
218
223
|
llm = get_llm(temperature=temperature)
|
219
|
-
|
224
|
+
|
220
225
|
# Get search engine for the specified collection
|
221
226
|
search = get_search(collection_name, llm_instance=llm)
|
222
|
-
|
227
|
+
|
223
228
|
if not search:
|
224
229
|
return {
|
225
230
|
"summary": f"Error: Collection '{collection_name}' not found or not properly configured.",
|
226
|
-
"documents": []
|
231
|
+
"documents": [],
|
227
232
|
}
|
228
|
-
|
233
|
+
|
229
234
|
# Set max results
|
230
235
|
search.max_results = max_results
|
231
|
-
|
236
|
+
|
232
237
|
# Force reindex if requested
|
233
|
-
if force_reindex and hasattr(search,
|
234
|
-
|
235
|
-
|
238
|
+
if force_reindex and hasattr(search, "embedding_manager"):
|
239
|
+
for folder_path in search.folder_paths:
|
240
|
+
search.embedding_manager.index_folder(folder_path, force_reindex=True)
|
236
241
|
|
237
242
|
# Perform the search
|
238
243
|
results = search.run(query)
|
239
|
-
|
244
|
+
|
240
245
|
if not results:
|
241
246
|
return {
|
242
247
|
"summary": f"No documents found in collection '{collection_name}' for query: '{query}'",
|
243
|
-
"documents": []
|
248
|
+
"documents": [],
|
244
249
|
}
|
245
|
-
|
250
|
+
|
246
251
|
# Get LLM to generate a summary of the results
|
247
252
|
|
248
|
-
docs_text = "\n\n".join(
|
249
|
-
|
250
|
-
|
253
|
+
docs_text = "\n\n".join(
|
254
|
+
[
|
255
|
+
f"Document {i + 1}:" f" {doc.get('content', doc.get('snippet', ''))[:1000]}"
|
256
|
+
for i, doc in enumerate(results[:5])
|
257
|
+
]
|
258
|
+
) # Limit to first 5 docs and 1000 chars each
|
259
|
+
|
251
260
|
summary_prompt = f"""Analyze these document excerpts related to the query: "{query}"
|
252
|
-
|
261
|
+
|
253
262
|
{docs_text}
|
254
|
-
|
263
|
+
|
255
264
|
Provide a concise summary of the key information found in these documents related to the query.
|
256
265
|
"""
|
257
|
-
|
266
|
+
|
258
267
|
summary_response = llm.invoke(summary_prompt)
|
259
|
-
if hasattr(summary_response,
|
268
|
+
if hasattr(summary_response, "content"):
|
260
269
|
summary = remove_think_tags(summary_response.content)
|
261
270
|
else:
|
262
271
|
summary = str(summary_response)
|
@@ -266,38 +275,42 @@ def analyze_documents(
|
|
266
275
|
"summary": summary,
|
267
276
|
"documents": results,
|
268
277
|
"collection": collection_name,
|
269
|
-
"document_count": len(results)
|
278
|
+
"document_count": len(results),
|
270
279
|
}
|
271
|
-
|
280
|
+
|
272
281
|
# Save to file if requested
|
273
282
|
if output_file:
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
f.
|
284
|
-
|
285
|
-
|
286
|
-
|
283
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
284
|
+
f.write(f"# Document Analysis: {query}\n\n")
|
285
|
+
f.write(f"## Summary\n\n{summary}\n\n")
|
286
|
+
f.write(f"## Documents Found: {len(results)}\n\n")
|
287
|
+
|
288
|
+
for i, doc in enumerate(results):
|
289
|
+
f.write(f"### Document {i + 1}:" f" {doc.get('title', 'Untitled')}\n\n")
|
290
|
+
f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
|
291
|
+
f.write(
|
292
|
+
f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n"
|
293
|
+
)
|
294
|
+
f.write("---\n\n")
|
295
|
+
|
296
|
+
analysis_result["file_path"] = output_file
|
297
|
+
logger.info(f"Analysis saved to {output_file}")
|
287
298
|
|
288
299
|
return analysis_result
|
289
300
|
|
301
|
+
|
290
302
|
def get_available_search_engines() -> Dict[str, str]:
|
291
303
|
"""
|
292
304
|
Get a dictionary of available search engines.
|
293
|
-
|
305
|
+
|
294
306
|
Returns:
|
295
307
|
Dictionary mapping engine names to descriptions
|
296
308
|
"""
|
297
309
|
|
298
310
|
from ..web_search_engines.search_engine_factory import get_available_engines
|
311
|
+
|
299
312
|
engines = get_available_engines()
|
300
|
-
|
313
|
+
|
301
314
|
# Add some descriptions for common engines
|
302
315
|
descriptions = {
|
303
316
|
"auto": "Automatic selection based on query type",
|
@@ -306,25 +319,24 @@ def get_available_search_engines() -> Dict[str, str]:
|
|
306
319
|
"pubmed": "Medical and biomedical literature",
|
307
320
|
"semantic_scholar": "Academic papers across all fields",
|
308
321
|
"github": "Code repositories and technical documentation",
|
309
|
-
"local_all": "All local document collections"
|
322
|
+
"local_all": "All local document collections",
|
310
323
|
}
|
311
|
-
|
324
|
+
|
312
325
|
return {engine: descriptions.get(engine, "Search engine") for engine in engines}
|
313
326
|
|
314
327
|
|
315
328
|
def get_available_collections() -> Dict[str, Dict[str, Any]]:
|
316
329
|
"""
|
317
330
|
Get a dictionary of available local document collections.
|
318
|
-
|
331
|
+
|
319
332
|
Returns:
|
320
333
|
Dictionary mapping collection names to their configuration
|
321
334
|
"""
|
322
335
|
|
336
|
+
from ..config.config_files import LOCAL_COLLECTIONS_FILE
|
323
337
|
|
324
|
-
from ..config import LOCAL_COLLECTIONS_FILE
|
325
|
-
|
326
338
|
if os.path.exists(LOCAL_COLLECTIONS_FILE):
|
327
|
-
|
328
|
-
|
339
|
+
collections = toml.load(LOCAL_COLLECTIONS_FILE)
|
340
|
+
return collections
|
329
341
|
|
330
342
|
return {}
|
@@ -1,10 +1,12 @@
|
|
1
1
|
# citation_handler.py
|
2
2
|
|
3
|
+
from typing import Any, Dict, List, Union
|
4
|
+
|
3
5
|
from langchain_core.documents import Document
|
4
|
-
|
5
|
-
import
|
6
|
-
from .
|
7
|
-
|
6
|
+
|
7
|
+
from .config.config_files import settings
|
8
|
+
from .utilities.db_utils import get_db_setting
|
9
|
+
|
8
10
|
|
9
11
|
class CitationHandler:
|
10
12
|
def __init__(self, llm):
|
@@ -13,7 +15,10 @@ class CitationHandler:
|
|
13
15
|
def _create_documents(
|
14
16
|
self, search_results: Union[str, List[Dict]], nr_of_links: int = 0
|
15
17
|
) -> List[Document]:
|
16
|
-
"""
|
18
|
+
"""
|
19
|
+
Convert search results to LangChain documents format and add index
|
20
|
+
to original search results.
|
21
|
+
"""
|
17
22
|
documents = []
|
18
23
|
if isinstance(search_results, str):
|
19
24
|
return documents
|
@@ -22,14 +27,14 @@ class CitationHandler:
|
|
22
27
|
if isinstance(result, dict):
|
23
28
|
# Add index to the original search result dictionary
|
24
29
|
result["index"] = str(i + nr_of_links + 1)
|
25
|
-
|
30
|
+
|
26
31
|
content = result.get("full_content", result.get("snippet", ""))
|
27
32
|
documents.append(
|
28
33
|
Document(
|
29
34
|
page_content=content,
|
30
35
|
metadata={
|
31
|
-
"source": result.get("link", f"source_{i+1}"),
|
32
|
-
"title": result.get("title", f"Source {i+1}"),
|
36
|
+
"source": result.get("link", f"source_{i + 1}"),
|
37
|
+
"title": result.get("title", f"Source {i + 1}"),
|
33
38
|
"index": i + nr_of_links + 1,
|
34
39
|
},
|
35
40
|
)
|
@@ -57,19 +62,20 @@ Question: {query}
|
|
57
62
|
Sources:
|
58
63
|
{formatted_sources}
|
59
64
|
|
60
|
-
Provide a detailed analysis with citations
|
65
|
+
Provide a detailed analysis with citations. Do not create the bibliography, it will be provided automatically. Never make up sources. Never write or create urls. Only write text relevant to the question. Example format: "According to the research [1], ..."
|
61
66
|
"""
|
62
67
|
|
63
68
|
response = self.llm.invoke(prompt)
|
64
|
-
|
65
|
-
|
69
|
+
if not isinstance(response, str):
|
70
|
+
response = response.content
|
71
|
+
return {"content": response, "documents": documents}
|
66
72
|
|
67
73
|
def analyze_followup(
|
68
74
|
self,
|
69
75
|
question: str,
|
70
76
|
search_results: Union[str, List[Dict]],
|
71
77
|
previous_knowledge: str,
|
72
|
-
nr_of_links
|
78
|
+
nr_of_links: int,
|
73
79
|
) -> Dict[str, Any]:
|
74
80
|
"""Process follow-up analysis with citations."""
|
75
81
|
documents = self._create_documents(search_results, nr_of_links=nr_of_links)
|
@@ -80,7 +86,7 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
|
|
80
86
|
2. Identify and flag any contradictions
|
81
87
|
3. Verify basic facts (dates, company names, ownership)
|
82
88
|
4. Note when sources disagree
|
83
|
-
|
89
|
+
|
84
90
|
Previous Knowledge:
|
85
91
|
{previous_knowledge}
|
86
92
|
|
@@ -88,8 +94,11 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
|
|
88
94
|
{formatted_sources}
|
89
95
|
|
90
96
|
Return any inconsistencies or conflicts found."""
|
91
|
-
if
|
92
|
-
|
97
|
+
if get_db_setting(
|
98
|
+
"general.enable_fact_checking", settings.general.enable_fact_checking
|
99
|
+
):
|
100
|
+
fact_check_response = self.llm.invoke(fact_check_prompt).content
|
101
|
+
|
93
102
|
else:
|
94
103
|
fact_check_response = ""
|
95
104
|
|
@@ -108,4 +117,4 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
|
|
108
117
|
|
109
118
|
response = self.llm.invoke(prompt)
|
110
119
|
|
111
|
-
return {"content":
|
120
|
+
return {"content": response.content, "documents": documents}
|