local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +154 -160
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +87 -45
- local_deep_research/search_system.py +153 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1583 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
- local_deep_research-0.2.2.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -3,89 +3,115 @@ API module for Local Deep Research.
|
|
3
3
|
Provides programmatic access to search and research capabilities.
|
4
4
|
"""
|
5
5
|
|
6
|
-
from typing import Dict, List, Optional, Union, Any, Callable
|
7
6
|
import logging
|
8
7
|
import os
|
9
|
-
import
|
8
|
+
from typing import Any, Callable, Dict, Optional
|
9
|
+
|
10
10
|
import toml
|
11
|
-
|
11
|
+
|
12
|
+
from ..config.llm_config import get_llm
|
13
|
+
from ..config.search_config import get_search
|
12
14
|
from ..report_generator import IntegratedReportGenerator
|
13
|
-
from ..
|
14
|
-
from ..
|
15
|
+
from ..search_system import AdvancedSearchSystem
|
16
|
+
from ..utilities.search_utilities import remove_think_tags
|
15
17
|
|
16
18
|
logger = logging.getLogger(__name__)
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
+
|
21
|
+
def _init_search_system(
|
22
|
+
model_name: str | None = None,
|
23
|
+
temperature: float = 0.7,
|
24
|
+
provider: str | None = None,
|
25
|
+
openai_endpoint_url: str | None = None,
|
26
|
+
progress_callback: Callable[[str, int, dict], None] | None = None,
|
20
27
|
search_tool: Optional[str] = None,
|
21
28
|
iterations: int = 1,
|
22
29
|
questions_per_iteration: int = 1,
|
23
|
-
|
24
|
-
max_filtered_results: int = 5,
|
25
|
-
region: str = "us",
|
26
|
-
time_period: str = "y",
|
27
|
-
safe_search: bool = True,
|
28
|
-
temperature: float = 0.7,
|
29
|
-
progress_callback: Optional[Callable] = None,
|
30
|
-
) -> Dict[str, Any]:
|
30
|
+
) -> AdvancedSearchSystem:
|
31
31
|
"""
|
32
|
-
|
33
|
-
|
32
|
+
Initializes the advanced search system with specified parameters. This function sets up
|
33
|
+
and returns an instance of the AdvancedSearchSystem using the provided configuration
|
34
|
+
options such as model name, temperature for randomness in responses, provider service
|
35
|
+
details, endpoint URL, and an optional search tool.
|
36
|
+
|
34
37
|
Args:
|
35
|
-
|
38
|
+
model_name: Name of the model to use (if None, uses database setting)
|
39
|
+
temperature: LLM temperature for generation
|
40
|
+
provider: Provider to use (if None, uses database setting)
|
41
|
+
openai_endpoint_url: Custom endpoint URL to use (if None, uses database
|
42
|
+
setting)
|
43
|
+
progress_callback: Optional callback function to receive progress updates
|
36
44
|
search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
|
37
45
|
iterations: Number of research cycles to perform
|
38
46
|
questions_per_iteration: Number of questions to generate per cycle
|
39
|
-
|
40
|
-
max_filtered_results: Maximum results after relevance filtering
|
41
|
-
region: Search region/locale
|
42
|
-
time_period: Time period for search results (d=day, w=week, m=month, y=year)
|
43
|
-
safe_search: Whether to enable safe search
|
44
|
-
temperature: LLM temperature for generation
|
45
|
-
progress_callback: Optional callback function to receive progress updates
|
46
|
-
|
47
|
+
|
47
48
|
Returns:
|
48
|
-
|
49
|
-
- 'summary': The generated summary text
|
50
|
-
- 'findings': List of detailed findings from each search
|
51
|
-
- 'iterations': Number of iterations performed
|
52
|
-
- 'questions': Questions generated during research
|
53
|
-
"""
|
54
|
-
logger.info(f"Generating quick summary for query: {query}")
|
55
|
-
|
49
|
+
AdvancedSearchSystem: An instance of the configured AdvancedSearchSystem.
|
56
50
|
|
51
|
+
"""
|
57
52
|
# Get language model with custom temperature
|
58
|
-
llm = get_llm(
|
59
|
-
|
53
|
+
llm = get_llm(
|
54
|
+
temperature=temperature,
|
55
|
+
openai_endpoint_url=openai_endpoint_url,
|
56
|
+
model_name=model_name,
|
57
|
+
provider=provider,
|
58
|
+
)
|
59
|
+
|
60
|
+
# Set the search engine if specified
|
61
|
+
search_engine = None
|
62
|
+
if search_tool:
|
63
|
+
search_engine = get_search(search_tool, llm_instance=llm)
|
64
|
+
if search_engine is None:
|
65
|
+
logger.warning(
|
66
|
+
f"Could not create search engine '{search_tool}', using default."
|
67
|
+
)
|
68
|
+
|
60
69
|
# Create search system with custom parameters
|
61
|
-
system = AdvancedSearchSystem()
|
62
|
-
|
70
|
+
system = AdvancedSearchSystem(llm=llm, search=search_engine)
|
71
|
+
|
63
72
|
# Override default settings with user-provided values
|
64
|
-
system.max_iterations = iterations
|
73
|
+
system.max_iterations = iterations
|
65
74
|
system.questions_per_iteration = questions_per_iteration
|
66
|
-
|
67
|
-
|
68
|
-
# Set the search engine if specified
|
69
|
-
if search_tool:
|
70
|
-
search_engine = get_search(search_tool)
|
71
|
-
if search_engine:
|
72
|
-
system.search = search_engine
|
73
|
-
else:
|
74
|
-
logger.warning(f"Could not create search engine '{search_tool}', using default.")
|
75
|
-
|
75
|
+
|
76
76
|
# Set progress callback if provided
|
77
77
|
if progress_callback:
|
78
78
|
system.set_progress_callback(progress_callback)
|
79
|
-
|
79
|
+
|
80
|
+
return system
|
81
|
+
|
82
|
+
|
83
|
+
def quick_summary(
|
84
|
+
query: str,
|
85
|
+
**kwargs: Any,
|
86
|
+
) -> Dict[str, Any]:
|
87
|
+
"""
|
88
|
+
Generate a quick research summary for a given query.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
query: The research query to analyze
|
92
|
+
**kwargs: Configuration for the search system. Will be forwarded to
|
93
|
+
`_init_search_system()`.
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
Dictionary containing the research results with keys:
|
97
|
+
- 'summary': The generated summary text
|
98
|
+
- 'findings': List of detailed findings from each search
|
99
|
+
- 'iterations': Number of iterations performed
|
100
|
+
- 'questions': Questions generated during research
|
101
|
+
"""
|
102
|
+
logger.info("Generating quick summary for query: %s", query)
|
103
|
+
|
104
|
+
system = _init_search_system(**kwargs)
|
105
|
+
|
80
106
|
# Perform the search and analysis
|
81
107
|
results = system.analyze_topic(query)
|
82
|
-
|
108
|
+
|
83
109
|
# Extract the summary from the current knowledge
|
84
110
|
if results and "current_knowledge" in results:
|
85
111
|
summary = results["current_knowledge"]
|
86
112
|
else:
|
87
113
|
summary = "Unable to generate summary for the query."
|
88
|
-
|
114
|
+
|
89
115
|
# Prepare the return value
|
90
116
|
return {
|
91
117
|
"summary": summary,
|
@@ -93,100 +119,60 @@ def quick_summary(
|
|
93
119
|
"iterations": results.get("iterations", 0),
|
94
120
|
"questions": results.get("questions", {}),
|
95
121
|
"formatted_findings": results.get("formatted_findings", ""),
|
96
|
-
"sources": results.get("all_links_of_system", [])
|
122
|
+
"sources": results.get("all_links_of_system", []),
|
97
123
|
}
|
98
124
|
|
99
125
|
|
100
126
|
def generate_report(
|
101
127
|
query: str,
|
102
|
-
search_tool: Optional[str] = None,
|
103
|
-
iterations: int = 2,
|
104
|
-
questions_per_iteration: int = 2,
|
105
|
-
searches_per_section: int = 2,
|
106
|
-
max_results: int = 50,
|
107
|
-
max_filtered_results: int = 5,
|
108
|
-
region: str = "us",
|
109
|
-
time_period: str = "y",
|
110
|
-
safe_search: bool = True,
|
111
|
-
temperature: float = 0.7,
|
112
128
|
output_file: Optional[str] = None,
|
113
129
|
progress_callback: Optional[Callable] = None,
|
130
|
+
searches_per_section: int = 2,
|
131
|
+
**kwargs: Any,
|
114
132
|
) -> Dict[str, Any]:
|
115
133
|
"""
|
116
134
|
Generate a comprehensive, structured research report for a given query.
|
117
|
-
|
135
|
+
|
118
136
|
Args:
|
119
137
|
query: The research query to analyze
|
120
|
-
search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
|
121
|
-
iterations: Number of research cycles to perform
|
122
|
-
questions_per_iteration: Number of questions to generate per cycle
|
123
|
-
searches_per_section: Number of searches to perform per report section
|
124
|
-
max_results: Maximum number of search results to consider
|
125
|
-
max_filtered_results: Maximum results after relevance filtering
|
126
|
-
region: Search region/locale
|
127
|
-
time_period: Time period for search results (d=day, w=week, m=month, y=year)
|
128
|
-
safe_search: Whether to enable safe search
|
129
|
-
temperature: LLM temperature for generation
|
130
138
|
output_file: Optional path to save report markdown file
|
131
139
|
progress_callback: Optional callback function to receive progress updates
|
132
|
-
|
140
|
+
searches_per_section: The number of searches to perform for each
|
141
|
+
section in the report.
|
142
|
+
|
133
143
|
Returns:
|
134
144
|
Dictionary containing the research report with keys:
|
135
145
|
- 'content': The full report content in markdown format
|
136
146
|
- 'metadata': Report metadata including generated timestamp and query
|
137
147
|
"""
|
138
|
-
logger.info(
|
139
|
-
|
148
|
+
logger.info("Generating comprehensive research report for query: %s", query)
|
149
|
+
|
150
|
+
system = _init_search_system(**kwargs)
|
140
151
|
|
141
|
-
# Get language model with custom temperature
|
142
|
-
llm = get_llm(temperature=temperature)
|
143
|
-
|
144
|
-
# Create search system with custom parameters
|
145
|
-
system = AdvancedSearchSystem()
|
146
|
-
|
147
|
-
# Override default settings with user-provided values
|
148
|
-
system.max_iterations = iterations
|
149
|
-
system.questions_per_iteration = questions_per_iteration
|
150
|
-
system.model = llm # Ensure the model is directly attached to the system
|
151
|
-
|
152
|
-
# Set the search engine if specified
|
153
|
-
if search_tool:
|
154
|
-
search_engine = get_search(
|
155
|
-
search_tool,
|
156
|
-
llm_instance=llm,
|
157
|
-
max_results=max_results,
|
158
|
-
max_filtered_results=max_filtered_results,
|
159
|
-
region=region,
|
160
|
-
time_period=time_period,
|
161
|
-
safe_search=safe_search
|
162
|
-
)
|
163
|
-
if search_engine:
|
164
|
-
system.search = search_engine
|
165
|
-
else:
|
166
|
-
logger.warning(f"Could not create search engine '{search_tool}', using default.")
|
167
|
-
|
168
152
|
# Set progress callback if provided
|
169
153
|
if progress_callback:
|
170
154
|
system.set_progress_callback(progress_callback)
|
171
|
-
|
155
|
+
|
172
156
|
# Perform the initial research
|
173
157
|
initial_findings = system.analyze_topic(query)
|
174
|
-
|
158
|
+
|
175
159
|
# Generate the structured report
|
176
|
-
report_generator = IntegratedReportGenerator(
|
177
|
-
|
160
|
+
report_generator = IntegratedReportGenerator(
|
161
|
+
search_system=system,
|
162
|
+
llm=system.model,
|
163
|
+
searches_per_section=searches_per_section,
|
164
|
+
)
|
178
165
|
report = report_generator.generate_report(initial_findings, query)
|
179
|
-
|
166
|
+
|
180
167
|
# Save report to file if path is provided
|
181
168
|
if output_file and report and "content" in report:
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
169
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
170
|
+
f.write(report["content"])
|
171
|
+
logger.info(f"Report saved to {output_file}")
|
172
|
+
report["file_path"] = output_file
|
186
173
|
return report
|
187
174
|
|
188
175
|
|
189
|
-
|
190
176
|
def analyze_documents(
|
191
177
|
query: str,
|
192
178
|
collection_name: str,
|
@@ -197,7 +183,7 @@ def analyze_documents(
|
|
197
183
|
) -> Dict[str, Any]:
|
198
184
|
"""
|
199
185
|
Search and analyze documents in a specific local collection.
|
200
|
-
|
186
|
+
|
201
187
|
Args:
|
202
188
|
query: The search query
|
203
189
|
collection_name: Name of the local document collection to search
|
@@ -205,58 +191,63 @@ def analyze_documents(
|
|
205
191
|
temperature: LLM temperature for summary generation
|
206
192
|
force_reindex: Whether to force reindexing the collection
|
207
193
|
output_file: Optional path to save analysis results to a file
|
208
|
-
|
194
|
+
|
209
195
|
Returns:
|
210
196
|
Dictionary containing:
|
211
197
|
- 'summary': Summary of the findings
|
212
198
|
- 'documents': List of matching documents with content and metadata
|
213
199
|
"""
|
214
|
-
logger.info(
|
215
|
-
|
200
|
+
logger.info(
|
201
|
+
f"Analyzing documents in collection '{collection_name}' for query: {query}"
|
202
|
+
)
|
216
203
|
|
217
204
|
# Get language model with custom temperature
|
218
205
|
llm = get_llm(temperature=temperature)
|
219
|
-
|
206
|
+
|
220
207
|
# Get search engine for the specified collection
|
221
208
|
search = get_search(collection_name, llm_instance=llm)
|
222
|
-
|
209
|
+
|
223
210
|
if not search:
|
224
211
|
return {
|
225
212
|
"summary": f"Error: Collection '{collection_name}' not found or not properly configured.",
|
226
|
-
"documents": []
|
213
|
+
"documents": [],
|
227
214
|
}
|
228
|
-
|
215
|
+
|
229
216
|
# Set max results
|
230
217
|
search.max_results = max_results
|
231
|
-
|
218
|
+
|
232
219
|
# Force reindex if requested
|
233
|
-
if force_reindex and hasattr(search,
|
234
|
-
|
235
|
-
|
220
|
+
if force_reindex and hasattr(search, "embedding_manager"):
|
221
|
+
for folder_path in search.folder_paths:
|
222
|
+
search.embedding_manager.index_folder(folder_path, force_reindex=True)
|
236
223
|
|
237
224
|
# Perform the search
|
238
225
|
results = search.run(query)
|
239
|
-
|
226
|
+
|
240
227
|
if not results:
|
241
228
|
return {
|
242
229
|
"summary": f"No documents found in collection '{collection_name}' for query: '{query}'",
|
243
|
-
"documents": []
|
230
|
+
"documents": [],
|
244
231
|
}
|
245
|
-
|
232
|
+
|
246
233
|
# Get LLM to generate a summary of the results
|
247
234
|
|
248
|
-
docs_text = "\n\n".join(
|
249
|
-
|
250
|
-
|
235
|
+
docs_text = "\n\n".join(
|
236
|
+
[
|
237
|
+
f"Document {i + 1}:" f" {doc.get('content', doc.get('snippet', ''))[:1000]}"
|
238
|
+
for i, doc in enumerate(results[:5])
|
239
|
+
]
|
240
|
+
) # Limit to first 5 docs and 1000 chars each
|
241
|
+
|
251
242
|
summary_prompt = f"""Analyze these document excerpts related to the query: "{query}"
|
252
|
-
|
243
|
+
|
253
244
|
{docs_text}
|
254
|
-
|
245
|
+
|
255
246
|
Provide a concise summary of the key information found in these documents related to the query.
|
256
247
|
"""
|
257
|
-
|
248
|
+
|
258
249
|
summary_response = llm.invoke(summary_prompt)
|
259
|
-
if hasattr(summary_response,
|
250
|
+
if hasattr(summary_response, "content"):
|
260
251
|
summary = remove_think_tags(summary_response.content)
|
261
252
|
else:
|
262
253
|
summary = str(summary_response)
|
@@ -266,38 +257,42 @@ def analyze_documents(
|
|
266
257
|
"summary": summary,
|
267
258
|
"documents": results,
|
268
259
|
"collection": collection_name,
|
269
|
-
"document_count": len(results)
|
260
|
+
"document_count": len(results),
|
270
261
|
}
|
271
|
-
|
262
|
+
|
272
263
|
# Save to file if requested
|
273
264
|
if output_file:
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
f.
|
284
|
-
|
285
|
-
|
286
|
-
|
265
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
266
|
+
f.write(f"# Document Analysis: {query}\n\n")
|
267
|
+
f.write(f"## Summary\n\n{summary}\n\n")
|
268
|
+
f.write(f"## Documents Found: {len(results)}\n\n")
|
269
|
+
|
270
|
+
for i, doc in enumerate(results):
|
271
|
+
f.write(f"### Document {i + 1}:" f" {doc.get('title', 'Untitled')}\n\n")
|
272
|
+
f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
|
273
|
+
f.write(
|
274
|
+
f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n"
|
275
|
+
)
|
276
|
+
f.write("---\n\n")
|
277
|
+
|
278
|
+
analysis_result["file_path"] = output_file
|
279
|
+
logger.info(f"Analysis saved to {output_file}")
|
287
280
|
|
288
281
|
return analysis_result
|
289
282
|
|
283
|
+
|
290
284
|
def get_available_search_engines() -> Dict[str, str]:
|
291
285
|
"""
|
292
286
|
Get a dictionary of available search engines.
|
293
|
-
|
287
|
+
|
294
288
|
Returns:
|
295
289
|
Dictionary mapping engine names to descriptions
|
296
290
|
"""
|
297
291
|
|
298
292
|
from ..web_search_engines.search_engine_factory import get_available_engines
|
293
|
+
|
299
294
|
engines = get_available_engines()
|
300
|
-
|
295
|
+
|
301
296
|
# Add some descriptions for common engines
|
302
297
|
descriptions = {
|
303
298
|
"auto": "Automatic selection based on query type",
|
@@ -306,25 +301,24 @@ def get_available_search_engines() -> Dict[str, str]:
|
|
306
301
|
"pubmed": "Medical and biomedical literature",
|
307
302
|
"semantic_scholar": "Academic papers across all fields",
|
308
303
|
"github": "Code repositories and technical documentation",
|
309
|
-
"local_all": "All local document collections"
|
304
|
+
"local_all": "All local document collections",
|
310
305
|
}
|
311
|
-
|
306
|
+
|
312
307
|
return {engine: descriptions.get(engine, "Search engine") for engine in engines}
|
313
308
|
|
314
309
|
|
315
310
|
def get_available_collections() -> Dict[str, Dict[str, Any]]:
|
316
311
|
"""
|
317
312
|
Get a dictionary of available local document collections.
|
318
|
-
|
313
|
+
|
319
314
|
Returns:
|
320
315
|
Dictionary mapping collection names to their configuration
|
321
316
|
"""
|
322
317
|
|
318
|
+
from ..config.config_files import LOCAL_COLLECTIONS_FILE
|
323
319
|
|
324
|
-
from ..config import LOCAL_COLLECTIONS_FILE
|
325
|
-
|
326
320
|
if os.path.exists(LOCAL_COLLECTIONS_FILE):
|
327
|
-
|
328
|
-
|
321
|
+
collections = toml.load(LOCAL_COLLECTIONS_FILE)
|
322
|
+
return collections
|
329
323
|
|
330
324
|
return {}
|
@@ -1,10 +1,12 @@
|
|
1
1
|
# citation_handler.py
|
2
2
|
|
3
|
+
from typing import Any, Dict, List, Union
|
4
|
+
|
3
5
|
from langchain_core.documents import Document
|
4
|
-
|
5
|
-
import
|
6
|
-
from .
|
7
|
-
|
6
|
+
|
7
|
+
from .config.config_files import settings
|
8
|
+
from .utilities.db_utils import get_db_setting
|
9
|
+
|
8
10
|
|
9
11
|
class CitationHandler:
|
10
12
|
def __init__(self, llm):
|
@@ -13,7 +15,10 @@ class CitationHandler:
|
|
13
15
|
def _create_documents(
|
14
16
|
self, search_results: Union[str, List[Dict]], nr_of_links: int = 0
|
15
17
|
) -> List[Document]:
|
16
|
-
"""
|
18
|
+
"""
|
19
|
+
Convert search results to LangChain documents format and add index
|
20
|
+
to original search results.
|
21
|
+
"""
|
17
22
|
documents = []
|
18
23
|
if isinstance(search_results, str):
|
19
24
|
return documents
|
@@ -22,14 +27,14 @@ class CitationHandler:
|
|
22
27
|
if isinstance(result, dict):
|
23
28
|
# Add index to the original search result dictionary
|
24
29
|
result["index"] = str(i + nr_of_links + 1)
|
25
|
-
|
30
|
+
|
26
31
|
content = result.get("full_content", result.get("snippet", ""))
|
27
32
|
documents.append(
|
28
33
|
Document(
|
29
34
|
page_content=content,
|
30
35
|
metadata={
|
31
|
-
"source": result.get("link", f"source_{i+1}"),
|
32
|
-
"title": result.get("title", f"Source {i+1}"),
|
36
|
+
"source": result.get("link", f"source_{i + 1}"),
|
37
|
+
"title": result.get("title", f"Source {i + 1}"),
|
33
38
|
"index": i + nr_of_links + 1,
|
34
39
|
},
|
35
40
|
)
|
@@ -57,19 +62,20 @@ Question: {query}
|
|
57
62
|
Sources:
|
58
63
|
{formatted_sources}
|
59
64
|
|
60
|
-
Provide a detailed analysis with citations
|
65
|
+
Provide a detailed analysis with citations. Do not create the bibliography, it will be provided automatically. Never make up sources. Never write or create urls. Only write text relevant to the question. Example format: "According to the research [1], ..."
|
61
66
|
"""
|
62
67
|
|
63
68
|
response = self.llm.invoke(prompt)
|
64
|
-
|
65
|
-
|
69
|
+
if not isinstance(response, str):
|
70
|
+
response = response.content
|
71
|
+
return {"content": response, "documents": documents}
|
66
72
|
|
67
73
|
def analyze_followup(
|
68
74
|
self,
|
69
75
|
question: str,
|
70
76
|
search_results: Union[str, List[Dict]],
|
71
77
|
previous_knowledge: str,
|
72
|
-
nr_of_links
|
78
|
+
nr_of_links: int,
|
73
79
|
) -> Dict[str, Any]:
|
74
80
|
"""Process follow-up analysis with citations."""
|
75
81
|
documents = self._create_documents(search_results, nr_of_links=nr_of_links)
|
@@ -80,7 +86,7 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
|
|
80
86
|
2. Identify and flag any contradictions
|
81
87
|
3. Verify basic facts (dates, company names, ownership)
|
82
88
|
4. Note when sources disagree
|
83
|
-
|
89
|
+
|
84
90
|
Previous Knowledge:
|
85
91
|
{previous_knowledge}
|
86
92
|
|
@@ -88,8 +94,11 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
|
|
88
94
|
{formatted_sources}
|
89
95
|
|
90
96
|
Return any inconsistencies or conflicts found."""
|
91
|
-
if
|
92
|
-
|
97
|
+
if get_db_setting(
|
98
|
+
"general.enable_fact_checking", settings.general.enable_fact_checking
|
99
|
+
):
|
100
|
+
fact_check_response = self.llm.invoke(fact_check_prompt).content
|
101
|
+
|
93
102
|
else:
|
94
103
|
fact_check_response = ""
|
95
104
|
|
@@ -108,4 +117,4 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
|
|
108
117
|
|
109
118
|
response = self.llm.invoke(prompt)
|
110
119
|
|
111
|
-
return {"content":
|
120
|
+
return {"content": response.content, "documents": documents}
|