local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +154 -160
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +87 -45
  41. local_deep_research/search_system.py +153 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1583 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.2.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,306 +1,176 @@
1
- from typing import Dict, List, Optional, Callable
2
- from datetime import datetime
3
- from .utilties.search_utilities import remove_think_tags, format_findings_to_text, format_links
4
- import os
5
- from .utilties.enums import KnowledgeAccumulationApproach
6
- from .config import settings, get_llm, get_search
7
- from .citation_handler import CitationHandler
8
- from datetime import datetime
9
- from .utilties.search_utilities import extract_links_from_search_results
1
+ # src/local_deep_research/search_system/search_system.py
10
2
  import logging
3
+ from typing import Callable, Dict
4
+
5
+ from langchain_core.language_models import BaseChatModel
6
+
7
+ from .advanced_search_system.findings.repository import FindingsRepository
8
+ from .advanced_search_system.questions.standard_question import (
9
+ StandardQuestionGenerator,
10
+ )
11
+ from .advanced_search_system.strategies.iterdrag_strategy import IterDRAGStrategy
12
+ from .advanced_search_system.strategies.parallel_search_strategy import (
13
+ ParallelSearchStrategy,
14
+ )
15
+ from .advanced_search_system.strategies.rapid_search_strategy import RapidSearchStrategy
16
+ from .advanced_search_system.strategies.standard_strategy import StandardSearchStrategy
17
+ from .citation_handler import CitationHandler
18
+ from .config.config_files import settings
19
+ from .config.llm_config import get_llm
20
+ from .config.search_config import get_search
21
+ from .utilities.db_utils import get_db_setting
22
+ from .web_search_engines.search_engine_base import BaseSearchEngine
23
+
11
24
  logger = logging.getLogger(__name__)
12
- class AdvancedSearchSystem:
13
- def __init__(self):
14
25
 
15
-
16
- # Get fresh configuration
17
26
 
18
- self.search = get_search()
19
- self.model = get_llm()
20
- self.max_iterations = settings.search.iterations
21
- self.questions_per_iteration = settings.search.questions_per_iteration
22
-
23
- self.context_limit = settings.general.knowledge_accumulation_context_limit
24
- self.questions_by_iteration = {}
25
- self.citation_handler = CitationHandler(self.model)
26
- self.progress_callback = None
27
- self.all_links_of_system = list()
28
-
29
- # Check if search is available, log warning if not
30
- if self.search is None:
31
- logger.info("WARNING: Search system initialized with no search engine! Research will not be effective.")
32
- self._update_progress("WARNING: No search engine available", None, {"error": "No search engine configured properly"})
33
-
34
-
27
+ class AdvancedSearchSystem:
28
+ """
29
+ Advanced search system that coordinates different search strategies.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ strategy_name: str = "parallel",
35
+ include_text_content: bool = True,
36
+ use_cross_engine_filter: bool = True,
37
+ llm: BaseChatModel | None = None,
38
+ search: BaseSearchEngine | None = None,
39
+ ):
40
+ """Initialize the advanced search system.
35
41
 
36
- def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
37
- """Set a callback function to receive progress updates.
38
-
39
42
  Args:
40
- callback: Function that takes (message, progress_percent, metadata)
43
+ strategy_name: The name of the search strategy to use ("standard" or "iterdrag")
44
+ include_text_content: If False, only includes metadata and links in search results
45
+ use_cross_engine_filter: Whether to filter results across search
46
+ engines.
47
+ llm: LLM to use. If not provided, it will use the default one.
48
+ search: Search engine to use. If not provided, it will use the
49
+ default one.
41
50
  """
42
- self.progress_callback = callback
51
+ # Get configuration
52
+ self.model = llm
53
+ if llm is None:
54
+ self.model = get_llm()
55
+ self.search = search
56
+ if search is None:
57
+ self.search = get_search(llm_instance=self.model)
58
+ self.max_iterations = get_db_setting(
59
+ "search.iterations", settings.search.iterations
60
+ )
61
+ self.questions_per_iteration = get_db_setting(
62
+ "search.questions_per_iteration", settings.search.questions_per_iteration
63
+ )
43
64
 
44
- def _update_progress(self, message: str, progress_percent: int = None, metadata: dict = None) -> None:
45
- """Send a progress update via the callback if available.
46
-
47
- Args:
48
- message: Description of the current progress state
49
- progress_percent: Progress percentage (0-100), if applicable
50
- metadata: Additional data about the progress state
51
- """
52
- if self.progress_callback:
53
- self.progress_callback(message, progress_percent, metadata or {})
65
+ # Log the strategy name that's being used
66
+ logger.info(
67
+ f"Initializing AdvancedSearchSystem with strategy_name='{strategy_name}'"
68
+ )
54
69
 
55
- def _get_follow_up_questions(self, current_knowledge: str, query: str) -> List[str]:
56
- now = datetime.now()
57
- current_time = now.strftime("%Y-%m-%d")
58
-
59
- self._update_progress("Generating follow-up questions...", None, {"iteration": len(self.questions_by_iteration)})
60
-
61
- if self.questions_by_iteration:
62
- prompt = f"""Critically reflect current knowledge (e.g., timeliness), what {self.questions_per_iteration} high-quality internet search questions remain unanswered to exactly answer the query?
63
- Query: {query}
64
- Today: {current_time}
65
- Past questions: {str(self.questions_by_iteration)}
66
- Knowledge: {current_knowledge}
67
- Include questions that critically reflect current knowledge.
68
- \n\n\nFormat: One question per line, e.g. \n Q: question1 \n Q: question2\n\n"""
70
+ # Initialize components
71
+ self.citation_handler = CitationHandler(self.model)
72
+ self.question_generator = StandardQuestionGenerator(self.model)
73
+ self.findings_repository = FindingsRepository(self.model)
74
+
75
+ # Initialize strategy based on name
76
+ if strategy_name.lower() == "iterdrag":
77
+ logger.info("Creating IterDRAGStrategy instance")
78
+ self.strategy = IterDRAGStrategy(model=self.model, search=self.search)
79
+ elif strategy_name.lower() == "parallel":
80
+ logger.info("Creating ParallelSearchStrategy instance")
81
+ self.strategy = ParallelSearchStrategy(
82
+ model=self.model,
83
+ search=self.search,
84
+ include_text_content=include_text_content,
85
+ use_cross_engine_filter=use_cross_engine_filter,
86
+ )
87
+ elif strategy_name.lower() == "rapid":
88
+ logger.info("Creating RapidSearchStrategy instance")
89
+ self.strategy = RapidSearchStrategy(model=self.model, search=self.search)
69
90
  else:
70
- prompt = f" You will have follow up questions. First, identify if your knowledge is outdated (high chance). Today: {current_time}. Generate {self.questions_per_iteration} high-quality internet search questions to exactly answer: {query}\n\n\nFormat: One question per line, e.g. \n Q: question1 \n Q: question2\n\n"
91
+ logger.info("Creating StandardSearchStrategy instance")
92
+ self.strategy = StandardSearchStrategy(model=self.model, search=self.search)
71
93
 
72
- response = self.model.invoke(prompt)
73
- questions = [
74
- q.replace("Q:", "").strip()
75
- for q in remove_think_tags(response.content).split("\n")
76
- if q.strip().startswith("Q:")
77
- ][: self.questions_per_iteration]
78
-
79
- self._update_progress(
80
- f"Generated {len(questions)} follow-up questions",
81
- None,
82
- {"questions": questions}
83
- )
84
-
85
- return questions
94
+ # Log the actual strategy class
95
+ logger.info(f"Created strategy of type: {type(self.strategy).__name__}")
86
96
 
87
- def _compress_knowledge(self, current_knowledge: str, query: str, section_links) -> List[str]:
88
- self._update_progress("Compressing and summarizing knowledge...", None)
97
+ # For backward compatibility
98
+ self.questions_by_iteration = {}
99
+ self.progress_callback = lambda _1, _2, _3: None
100
+ self.all_links_of_system = list()
89
101
 
90
- now = datetime.now()
91
- current_time = now.strftime("%Y-%m-%d")
92
- formatted_links = format_links(links=section_links)
93
- if self.questions_by_iteration:
94
- prompt = f"""First provide a high-quality 1 page explanation with IEEE Referencing Style e.g. [1,2]. Never make up sources. Than provide a exact high-quality one sentence-long answer to the query.
102
+ # Configure the strategy with our attributes
103
+ if hasattr(self, "progress_callback") and self.progress_callback:
104
+ self.strategy.set_progress_callback(self.progress_callback)
95
105
 
96
- Knowledge: {current_knowledge}
97
- Query: {query}
98
- I will append following text to your output for the sources (dont repeat it):\n\n {formatted_links}"""
99
- response = self.model.invoke(prompt)
100
-
101
- self._update_progress("Knowledge compression complete", None)
102
- response = remove_think_tags(response.content)
103
- response = str(response) #+ "\n\n" + str(formatted_links)
106
+ def _progress_callback(self, message: str, progress: int, metadata: dict) -> None:
107
+ """Handle progress updates from the strategy."""
108
+ logger.info(f"Progress: {progress}% - {message}")
109
+ if hasattr(self, "progress_callback"):
110
+ self.progress_callback(message, progress, metadata)
104
111
 
105
- return response
112
+ def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
113
+ """Set a callback function to receive progress updates."""
114
+ self.progress_callback = callback
115
+ if hasattr(self, "strategy"):
116
+ self.strategy.set_progress_callback(callback)
106
117
 
107
118
  def analyze_topic(self, query: str) -> Dict:
108
- logger.info(f"Starting research on topic: {query}")
109
-
119
+ """Analyze a topic using the current strategy.
110
120
 
121
+ Args:
122
+ query: The research query to analyze
123
+ """
111
124
 
112
- findings = []
113
- current_knowledge = ""
114
- iteration = 0
115
- total_iterations = self.max_iterations
116
- section_links = list()
117
-
118
- self._update_progress("Initializing research system", 5, {
119
- "phase": "init",
120
- "iterations_planned": total_iterations
121
- })
122
-
123
- # Check if search engine is available
124
- if self.search is None:
125
- error_msg = "Error: No search engine available. Please check your configuration."
126
- self._update_progress(error_msg, 100, {
127
- "phase": "error",
128
- "error": "No search engine available",
129
- "status": "failed"
130
- })
131
- return {
132
- "findings": [],
133
- "iterations": 0,
134
- "questions": {},
135
- "formatted_findings": "Error: Unable to conduct research without a search engine.",
136
- "current_knowledge": "",
137
- "error": error_msg
138
- }
139
-
140
- while iteration < self.max_iterations:
141
- iteration_progress_base = (iteration / total_iterations) * 100
142
- self._update_progress(f"Starting iteration {iteration + 1} of {total_iterations}",
143
- int(iteration_progress_base),
144
- {"phase": "iteration_start", "iteration": iteration + 1})
145
-
146
- # Generate questions for this iteration
147
- questions = self._get_follow_up_questions(current_knowledge, query)
148
- self.questions_by_iteration[iteration] = questions
149
- logger.info(f"Generated questions: {questions}")
150
- question_count = len(questions)
151
- for q_idx, question in enumerate(questions):
152
- question_progress_base = iteration_progress_base + (((q_idx+1) / question_count) * (100/total_iterations) * 0.5)
153
-
154
- self._update_progress(f"Searching for: {question}",
155
- int(question_progress_base),
156
- {"phase": "search", "iteration": iteration + 1, "question_index": q_idx + 1})
157
-
158
- try:
159
- if self.search is None:
160
- self._update_progress(f"Search engine unavailable, skipping search for: {question}",
161
- int(question_progress_base + 2),
162
- {"phase": "search_error", "error": "No search engine available"})
163
- search_results = []
164
- else:
165
- search_results = self.search.run(question)
166
- except Exception as e:
167
- error_msg = f"Error during search: {str(e)}"
168
- logger.info(f"SEARCH ERROR: {error_msg}")
169
- self._update_progress(error_msg,
170
- int(question_progress_base + 2),
171
- {"phase": "search_error", "error": str(e)})
172
- search_results = []
173
-
174
- if search_results is None:
175
- self._update_progress(f"No search results found for question: {question}",
176
- int(question_progress_base + 2),
177
- {"phase": "search_complete", "result_count": 0})
178
- search_results = [] # Initialize to empty list instead of None
179
- continue
180
-
181
- self._update_progress(f"Found {len(search_results)} results for question: {question}",
182
- int(question_progress_base + 2),
183
- {"phase": "search_complete", "result_count": len(search_results)})
184
-
185
- logger.info(f"len search: {len(search_results)}")
186
-
187
- if len(search_results) == 0:
188
- continue
189
-
190
- self._update_progress(f"Analyzing results for: {question}",
191
- int(question_progress_base + 5),
192
- {"phase": "analysis"})
193
-
125
+ # Send progress message with LLM info
126
+ self.progress_callback(
127
+ f"Using {get_db_setting('llm.provider')} model: {get_db_setting('llm.model')}",
128
+ 1, # Low percentage to show this as an early step
129
+ {
130
+ "phase": "setup",
131
+ "llm_info": {
132
+ "name": get_db_setting("llm.model"),
133
+ "provider": get_db_setting("llm.provider"),
134
+ },
135
+ },
136
+ )
137
+ # Send progress message with search strategy info
138
+ search_tool = get_db_setting("search.tool")
139
+
140
+ self.progress_callback(
141
+ f"Using search tool: {search_tool}",
142
+ 1.5, # Between setup and processing steps
143
+ {
144
+ "phase": "setup",
145
+ "search_info": {
146
+ "tool": search_tool,
147
+ },
148
+ },
149
+ )
194
150
 
195
- try:
196
- result = self.citation_handler.analyze_followup(
197
- question, search_results, current_knowledge, nr_of_links=len(self.all_links_of_system)
198
- )
199
- links = extract_links_from_search_results(search_results)
200
- self.all_links_of_system.extend(links)
201
- section_links.extend(links)
202
- formatted_links = ""
203
- if links:
204
- formatted_links=format_links(links=links)
205
-
206
- logger.info(f"Generated questions: {formatted_links}")
207
- if result is not None:
208
- results_with_links = str(result["content"])
209
- findings.append(
210
- {
211
- "phase": f"Follow-up {iteration}.{questions.index(question) + 1}",
212
- "content": results_with_links,
213
- "question": question,
214
- "search_results": search_results,
215
- "documents": result["documents"],
216
- }
151
+ # Use the strategy to analyze the topic
152
+ result = self.strategy.analyze_topic(query)
153
+
154
+ # Update our attributes for backward compatibility
155
+ if hasattr(self.strategy, "questions_by_iteration"):
156
+ self.questions_by_iteration = self.strategy.questions_by_iteration
157
+ # Send progress message with search info
158
+ self.progress_callback(
159
+ f"Processed questions: {self.strategy.questions_by_iteration}",
160
+ 2, # Low percentage to show this as an early step
161
+ {
162
+ "phase": "setup",
163
+ "search_info": {
164
+ "questions_by_iteration": len(
165
+ self.strategy.questions_by_iteration
217
166
  )
167
+ },
168
+ },
169
+ )
170
+ if hasattr(self.strategy, "all_links_of_system"):
171
+ self.all_links_of_system = self.strategy.all_links_of_system
218
172
 
219
- if settings.general.knowledge_accumulation != str(KnowledgeAccumulationApproach.NO_KNOWLEDGE.value):
220
- current_knowledge = current_knowledge + "\n\n\n New: \n" + results_with_links
221
-
222
- if settings.general.knowledge_accumulation == str(KnowledgeAccumulationApproach.QUESTION.value):
223
- logger.info("Compressing knowledge")
224
- self._update_progress(f"Compress Knowledge for: {question}",
225
- int(question_progress_base + 0),
226
- {"phase": "analysis"})
227
- current_knowledge = self._compress_knowledge(current_knowledge , query, section_links)
228
-
229
- self._update_progress(f"Analysis complete for question: {question}",
230
- int(question_progress_base + 10),
231
- {"phase": "analysis_complete"})
232
- except Exception as e:
233
- error_msg = f"Error analyzing results: {str(e)}"
234
- logger.info(f"ANALYSIS ERROR: {error_msg}")
235
- self._update_progress(error_msg,
236
- int(question_progress_base + 10),
237
- {"phase": "analysis_error", "error": str(e)})
238
- iteration += 1
239
-
240
- self._update_progress(f"Compressing knowledge after iteration {iteration}",
241
- int((iteration / total_iterations) * 100 - 5),
242
- {"phase": "knowledge_compression"})
243
- logger.info(str(iteration))
244
- logger.info(settings.general.knowledge_accumulation)
245
- logger.info(str(KnowledgeAccumulationApproach.ITERATION.value))
246
- if settings.general.knowledge_accumulation == KnowledgeAccumulationApproach.ITERATION.value:
247
- try:
248
- logger.info("ITERATION - Compressing Knowledge")
249
- current_knowledge = self._compress_knowledge(current_knowledge , query, section_links)
250
- logger.info("FINISHED ITERATION - Compressing Knowledge")
251
- except Exception as e:
252
- error_msg = f"Error compressing knowledge: {str(e)}"
253
- logger.info(f"COMPRESSION ERROR: {error_msg}")
254
- self._update_progress(error_msg,
255
- int((iteration / total_iterations) * 100 - 3),
256
- {"phase": "compression_error", "error": str(e)})
257
-
258
-
259
-
260
- self._update_progress(f"Iteration {iteration} complete",
261
- int((iteration / total_iterations) * 100),
262
- {"phase": "iteration_complete", "iteration": iteration})
263
-
264
- try:
265
- formatted_findings = self._save_findings(findings, current_knowledge, query)
266
- except Exception as e:
267
- error_msg = f"Error saving findings: {str(e)}"
268
- logger.info(f"SAVE ERROR: {error_msg}")
269
- self._update_progress(error_msg,
270
- int((iteration / total_iterations) * 100),
271
- {"phase": "save_error", "error": str(e)})
272
- formatted_findings = "Error: Could not format findings due to an error."
273
-
274
- self._update_progress("Research complete", 95, {"phase": "complete"})
275
-
276
- return {
277
- "findings": findings,
278
- "iterations": iteration,
279
- "questions": self.questions_by_iteration,
280
- "formatted_findings": formatted_findings,
281
- "current_knowledge": current_knowledge
282
- }
283
-
284
- def _save_findings(self, findings: List[Dict], current_knowledge: str, query: str):
285
- logger.info("Saving findings ...")
286
- self._update_progress("Saving research findings...", None)
287
-
288
- formatted_findings = format_findings_to_text(
289
- findings, current_knowledge, self.questions_by_iteration
290
- )
291
- safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[
292
- :50
293
- ]
294
- safe_query = safe_query.replace(" ", "_").lower()
295
- import local_deep_research.config as conf
296
- output_dir = f"{conf.get_config_dir()}/research_outputs"
297
- if not os.path.exists(output_dir):
298
- os.makedirs(output_dir)
299
-
300
- filename = os.path.join(output_dir, f"formatted_output_{safe_query}.txt")
173
+ # Include the search system instance for access to citations
174
+ result["search_system"] = self
301
175
 
302
- with open(filename, "w", encoding="utf-8") as text_file:
303
- text_file.write(formatted_findings)
304
- logger.info("Saved findings")
305
- self._update_progress("Research findings saved", None, {"filename": filename})
306
- return formatted_findings
176
+ return result
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ Data directory setup script for Local Deep Research.
4
+ Creates the data directory for the application database if it doesn't exist.
5
+ """
6
+
7
+ import os
8
+
9
+
10
+ def setup_data_dir():
11
+ """Set up the data directory for the application."""
12
+ # Get the project root directory (3 levels up from this file)
13
+ current_dir = os.path.dirname(os.path.abspath(__file__))
14
+ project_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
15
+
16
+ # Define the data directory path
17
+ data_dir = os.path.join(project_root, "data")
18
+
19
+ # Create the data directory if it doesn't exist
20
+ if not os.path.exists(data_dir):
21
+ os.makedirs(data_dir)
22
+ print(f"Created data directory at: {data_dir}")
23
+ else:
24
+ print(f"Data directory already exists at: {data_dir}")
25
+
26
+ # Return the path to the data directory
27
+ return data_dir
28
+
29
+
30
+ if __name__ == "__main__":
31
+ data_dir = setup_data_dir()
32
+ db_path = os.path.join(data_dir, "ldr.db")
33
+ print(f"Database path: {db_path}")
34
+ print("Run the following command to migrate your database:")
35
+ print("python -m src.local_deep_research.migrate_db --backup")