local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +154 -160
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +87 -45
  41. local_deep_research/search_system.py +153 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1583 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.2.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,33 +1,34 @@
1
1
  """
2
- Local Deep Research - AI-powered research assistant
3
-
4
- A powerful AI research system with iterative analysis capabilities
5
- and multiple search engines integration.
2
+ Local Deep Research - A tool for conducting deep research using AI.
6
3
  """
7
4
 
8
5
  __version__ = "0.1.0"
6
+ __author__ = "Your Name"
7
+ __description__ = "A tool for conducting deep research using AI"
8
+
9
+ from .config.llm_config import get_llm
10
+ from .config.search_config import get_search
11
+ from .report_generator import get_report_generator
12
+
13
+
14
+ def get_advanced_search_system(strategy_name: str = "iterdrag"):
15
+ """
16
+ Get an instance of the advanced search system.
17
+
18
+ Args:
19
+ strategy_name: The name of the search strategy to use ("standard" or "iterdrag")
9
20
 
10
- # Initialize configuration on module import
11
- from .utilties.setup_utils import setup_user_directories
21
+ Returns:
22
+ AdvancedSearchSystem: An instance of the advanced search system
23
+ """
24
+ from .search_system import AdvancedSearchSystem
12
25
 
13
- # Import main components
14
- from .search_system import AdvancedSearchSystem
15
- from .report_generator import IntegratedReportGenerator
16
- from .config import get_llm, get_search
26
+ return AdvancedSearchSystem(strategy_name=strategy_name)
17
27
 
18
- # Import API functions
19
- from .api import quick_summary, generate_report, analyze_documents
20
- from .api import get_available_search_engines, get_available_collections
21
28
 
22
- # Export it
23
29
  __all__ = [
24
- "AdvancedSearchSystem",
25
- "IntegratedReportGenerator",
26
30
  "get_llm",
27
31
  "get_search",
28
- "quick_summary",
29
- "generate_report",
30
- "analyze_documents",
31
- "get_available_search_engines",
32
- "get_available_collections"
33
- ]
32
+ "get_report_generator",
33
+ "get_advanced_search_system",
34
+ ]
@@ -0,0 +1,16 @@
1
+ """
2
+ Main entry point when running the package with `python -m local_deep_research`.
3
+ This avoids circular imports by directly importing the main function after
4
+ the package is fully loaded.
5
+ """
6
+
7
+
8
+ def main():
9
+ # Only import main after the whole package has been initialized
10
+ from .main import main as main_func
11
+
12
+ main_func()
13
+
14
+
15
+ if __name__ == "__main__":
16
+ main()
@@ -0,0 +1,7 @@
1
+ # Search System Package
2
+ # AdvancedSearchSystem is now in search_system.py
3
+
4
+ # We cannot directly import AdvancedSearchSystem here due to circular imports
5
+ # The web code should import from the search_system module instead
6
+
7
+ __all__ = []
@@ -0,0 +1,8 @@
1
+ # src/local_deep_research/advanced_search_system/filters/__init__.py
2
+ """
3
+ Filters for search results.
4
+ """
5
+
6
+ from .cross_engine_filter import CrossEngineFilter
7
+
8
+ __all__ = ["CrossEngineFilter"]
@@ -0,0 +1,38 @@
1
+ # src/local_deep_research/advanced_search_system/filters/base_filter.py
2
+ """
3
+ Base class for search result filters.
4
+ """
5
+
6
+ import logging
7
+ from abc import ABC, abstractmethod
8
+ from typing import Dict, List
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class BaseFilter(ABC):
14
+ """Abstract base class for all search result filters."""
15
+
16
+ def __init__(self, model=None):
17
+ """
18
+ Initialize the filter.
19
+
20
+ Args:
21
+ model: The language model to use for relevance assessments
22
+ """
23
+ self.model = model
24
+
25
+ @abstractmethod
26
+ def filter_results(self, results: List[Dict], query: str, **kwargs) -> List[Dict]:
27
+ """
28
+ Filter search results by relevance to the query.
29
+
30
+ Args:
31
+ results: List of search result dictionaries
32
+ query: The original search query
33
+ **kwargs: Additional filter-specific parameters
34
+
35
+ Returns:
36
+ Filtered list of search results
37
+ """
38
+ pass
@@ -0,0 +1,200 @@
1
+ """
2
+ Cross-engine search result filter implementation.
3
+ """
4
+
5
+ import json
6
+ import logging
7
+ from typing import Dict, List
8
+
9
+ from ...utilities.search_utilities import remove_think_tags
10
+ from .base_filter import BaseFilter
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class CrossEngineFilter(BaseFilter):
16
+ """Filter that ranks and filters results from multiple search engines."""
17
+
18
+ def __init__(
19
+ self, model, max_results=20, default_reorder=True, default_reindex=True
20
+ ):
21
+ """
22
+ Initialize the cross-engine filter.
23
+
24
+ Args:
25
+ model: Language model to use for relevance assessment
26
+ max_results: Maximum number of results to keep after filtering
27
+ default_reorder: Default setting for reordering results by relevance
28
+ default_reindex: Default setting for reindexing results after filtering
29
+ """
30
+ super().__init__(model)
31
+ self.max_results = max_results
32
+ self.default_reorder = default_reorder
33
+ self.default_reindex = default_reindex
34
+
35
+ def filter_results(
36
+ self,
37
+ results: List[Dict],
38
+ query: str,
39
+ reorder=None,
40
+ reindex=None,
41
+ start_index=0,
42
+ **kwargs,
43
+ ) -> List[Dict]:
44
+ """
45
+ Filter and rank search results from multiple engines by relevance.
46
+
47
+ Args:
48
+ results: Combined list of search results from all engines
49
+ query: The original search query
50
+ reorder: Whether to reorder results by relevance (default: use instance default)
51
+ reindex: Whether to update result indices after filtering (default: use instance default)
52
+ start_index: Starting index for the results (used for continuous indexing)
53
+ **kwargs: Additional parameters
54
+
55
+ Returns:
56
+ Filtered list of search results
57
+ """
58
+ # Use instance defaults if not specified
59
+ if reorder is None:
60
+ reorder = self.default_reorder
61
+ if reindex is None:
62
+ reindex = self.default_reindex
63
+
64
+ if not self.model or len(results) <= 10: # Don't filter if few results
65
+ # Even if not filtering, update indices if requested
66
+ if reindex:
67
+ for i, result in enumerate(
68
+ results[: min(self.max_results, len(results))]
69
+ ):
70
+ result["index"] = str(i + start_index + 1)
71
+ return results[: min(self.max_results, len(results))]
72
+
73
+ # Create context for LLM
74
+ preview_context = []
75
+ for i, result in enumerate(results):
76
+ title = result.get("title", "Untitled").strip()
77
+ snippet = result.get("snippet", "").strip()
78
+ engine = result.get("engine", "Unknown engine")
79
+
80
+ # Clean up snippet if too long
81
+ if len(snippet) > 200:
82
+ snippet = snippet[:200] + "..."
83
+
84
+ preview_context.append(
85
+ f"[{i}] Engine: {engine} | Title: {title}\nSnippet: {snippet}"
86
+ )
87
+
88
+ # Set a reasonable limit on context length
89
+ max_context_items = min(30, len(preview_context))
90
+ context = "\n\n".join(preview_context[:max_context_items])
91
+
92
+ prompt = f"""You are a search result filter. Your task is to rank search results from multiple engines by relevance to a query.
93
+
94
+ Query: "{query}"
95
+
96
+ Search Results:
97
+ {context}
98
+
99
+ Return the search results as a JSON array of indices, ranked from most to least relevant to the query.
100
+ Only include indices of results that are actually relevant to the query.
101
+ For example: [3, 0, 7, 1]
102
+
103
+ If no results seem relevant to the query, return an empty array: []"""
104
+
105
+ try:
106
+ # Get LLM's evaluation
107
+ response = self.model.invoke(prompt)
108
+
109
+ # Extract response text
110
+ if hasattr(response, "content"):
111
+ response_text = remove_think_tags(response.content)
112
+ else:
113
+ response_text = remove_think_tags(str(response))
114
+
115
+ # Clean up response
116
+ response_text = response_text.strip()
117
+
118
+ # Find JSON array in response
119
+ start_idx = response_text.find("[")
120
+ end_idx = response_text.rfind("]")
121
+
122
+ if start_idx >= 0 and end_idx > start_idx:
123
+ array_text = response_text[start_idx : end_idx + 1]
124
+ ranked_indices = json.loads(array_text)
125
+
126
+ # If not reordering, just filter based on the indices
127
+ if not reorder:
128
+ # Just keep the results that were deemed relevant
129
+ filtered_results = []
130
+ for idx in sorted(
131
+ ranked_indices
132
+ ): # Sort to maintain original order
133
+ if idx < len(results):
134
+ filtered_results.append(results[idx])
135
+
136
+ # Limit results if needed
137
+ final_results = filtered_results[
138
+ : min(self.max_results, len(filtered_results))
139
+ ]
140
+
141
+ # Reindex if requested
142
+ if reindex:
143
+ for i, result in enumerate(final_results):
144
+ result["index"] = str(i + start_index + 1)
145
+
146
+ logger.info(
147
+ f"Cross-engine filtering kept {len(final_results)} out of {len(results)} results without reordering"
148
+ )
149
+ return final_results
150
+
151
+ # Create ranked results list (reordering)
152
+ ranked_results = []
153
+ for idx in ranked_indices:
154
+ if idx < len(results):
155
+ ranked_results.append(results[idx])
156
+
157
+ # If filtering removed everything, return top results
158
+ if not ranked_results and results:
159
+ logger.info(
160
+ "Cross-engine filtering removed all results, returning top 10 originals instead"
161
+ )
162
+ top_results = results[: min(10, len(results))]
163
+ # Update indices if requested
164
+ if reindex:
165
+ for i, result in enumerate(top_results):
166
+ result["index"] = str(i + start_index + 1)
167
+ return top_results
168
+
169
+ # Limit results if needed
170
+ max_filtered = min(self.max_results, len(ranked_results))
171
+ final_results = ranked_results[:max_filtered]
172
+
173
+ # Update indices if requested
174
+ if reindex:
175
+ for i, result in enumerate(final_results):
176
+ result["index"] = str(i + start_index + 1)
177
+
178
+ logger.info(
179
+ f"Cross-engine filtering kept {len(final_results)} out of {len(results)} results with reordering={reorder}, reindex={reindex}"
180
+ )
181
+ return final_results
182
+ else:
183
+ logger.info(
184
+ "Could not find JSON array in response, returning original results"
185
+ )
186
+ top_results = results[: min(self.max_results, len(results))]
187
+ # Update indices if requested
188
+ if reindex:
189
+ for i, result in enumerate(top_results):
190
+ result["index"] = str(i + start_index + 1)
191
+ return top_results
192
+
193
+ except Exception as e:
194
+ logger.error(f"Cross-engine filtering error: {e}")
195
+ top_results = results[: min(self.max_results, len(results))]
196
+ # Update indices if requested
197
+ if reindex:
198
+ for i, result in enumerate(top_results):
199
+ result["index"] = str(i + start_index + 1)
200
+ return top_results
@@ -0,0 +1,81 @@
1
+ """
2
+ Base class for all findings repositories.
3
+ Defines the common interface and shared functionality for different findings management approaches.
4
+ """
5
+
6
+ import logging
7
+ from abc import ABC, abstractmethod
8
+ from typing import Dict, List
9
+
10
+ from langchain_core.language_models import BaseLLM
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class BaseFindingsRepository(ABC):
16
+ """Abstract base class for all findings repositories."""
17
+
18
+ def __init__(self, model: BaseLLM):
19
+ """
20
+ Initialize the findings repository.
21
+
22
+ Args:
23
+ model: The language model to use for findings operations
24
+ """
25
+ self.model = model
26
+ self.findings: Dict[str, List[str]] = {}
27
+
28
+ @abstractmethod
29
+ def add_finding(self, query: str, finding: Dict | str) -> None:
30
+ """
31
+ Add a finding to the repository.
32
+
33
+ Args:
34
+ query: The query associated with the finding
35
+ finding: The finding to add
36
+ """
37
+ pass
38
+
39
+ @abstractmethod
40
+ def get_findings(self, query: str) -> List[str]:
41
+ """
42
+ Get findings for a query.
43
+
44
+ Args:
45
+ query: The query to get findings for
46
+
47
+ Returns:
48
+ List[str]: List of findings for the query
49
+ """
50
+ pass
51
+
52
+ @abstractmethod
53
+ def clear_findings(self, query: str) -> None:
54
+ """
55
+ Clear findings for a query.
56
+
57
+ Args:
58
+ query: The query to clear findings for
59
+ """
60
+ pass
61
+
62
+ @abstractmethod
63
+ def synthesize_findings(
64
+ self,
65
+ query: str,
66
+ sub_queries: List[str],
67
+ findings: List[str],
68
+ accumulated_knowledge: str,
69
+ ) -> str:
70
+ """
71
+ Synthesize findings from sub-queries into a final answer.
72
+
73
+ Args:
74
+ query: The original query
75
+ sub_queries: List of sub-queries
76
+ findings: List of findings for each sub-query
77
+ accumulated_knowledge: Accumulated knowledge from previous findings
78
+ Returns:
79
+ str: Synthesized final answer
80
+ """
81
+ pass