local-deep-research 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. local_deep_research/__init__.py +24 -0
  2. local_deep_research/citation_handler.py +113 -0
  3. local_deep_research/config.py +166 -0
  4. local_deep_research/defaults/__init__.py +44 -0
  5. local_deep_research/defaults/llm_config.py +269 -0
  6. local_deep_research/defaults/local_collections.toml +47 -0
  7. local_deep_research/defaults/main.toml +57 -0
  8. local_deep_research/defaults/search_engines.toml +244 -0
  9. local_deep_research/local_collections.py +141 -0
  10. local_deep_research/main.py +113 -0
  11. local_deep_research/report_generator.py +206 -0
  12. local_deep_research/search_system.py +241 -0
  13. local_deep_research/utilties/__init__.py +0 -0
  14. local_deep_research/utilties/enums.py +9 -0
  15. local_deep_research/utilties/llm_utils.py +116 -0
  16. local_deep_research/utilties/search_utilities.py +115 -0
  17. local_deep_research/utilties/setup_utils.py +6 -0
  18. local_deep_research/web/__init__.py +2 -0
  19. local_deep_research/web/app.py +1209 -0
  20. local_deep_research/web/static/css/styles.css +1008 -0
  21. local_deep_research/web/static/js/app.js +2078 -0
  22. local_deep_research/web/templates/api_keys_config.html +82 -0
  23. local_deep_research/web/templates/collections_config.html +90 -0
  24. local_deep_research/web/templates/index.html +312 -0
  25. local_deep_research/web/templates/llm_config.html +120 -0
  26. local_deep_research/web/templates/main_config.html +89 -0
  27. local_deep_research/web/templates/search_engines_config.html +154 -0
  28. local_deep_research/web/templates/settings.html +519 -0
  29. local_deep_research/web/templates/settings_dashboard.html +207 -0
  30. local_deep_research/web_search_engines/__init__.py +0 -0
  31. local_deep_research/web_search_engines/engines/__init__.py +0 -0
  32. local_deep_research/web_search_engines/engines/full_search.py +128 -0
  33. local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
  34. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
  35. local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
  36. local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
  37. local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
  38. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
  39. local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
  40. local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
  41. local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
  42. local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
  43. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
  44. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
  45. local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
  46. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
  47. local_deep_research/web_search_engines/full_search.py +254 -0
  48. local_deep_research/web_search_engines/search_engine_base.py +197 -0
  49. local_deep_research/web_search_engines/search_engine_factory.py +233 -0
  50. local_deep_research/web_search_engines/search_engines_config.py +54 -0
  51. local_deep_research-0.1.0.dist-info/LICENSE +21 -0
  52. local_deep_research-0.1.0.dist-info/METADATA +328 -0
  53. local_deep_research-0.1.0.dist-info/RECORD +56 -0
  54. local_deep_research-0.1.0.dist-info/WHEEL +5 -0
  55. local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
  56. local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,153 @@
1
+ """
2
+ Search engine that searches across all local collections
3
+ """
4
+
5
+ import logging
6
+ from typing import Dict, List, Any, Optional
7
+ from langchain_core.language_models import BaseLLM
8
+
9
+ from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
10
+ from local_deep_research.web_search_engines.search_engine_factory import create_search_engine
11
+ from local_deep_research import config
12
+
13
+ # Setup logging
14
+ logger = logging.getLogger(__name__)
15
+
16
+ class LocalAllSearchEngine(BaseSearchEngine):
17
+ """
18
+ Search engine that searches across all local document collections.
19
+ Acts as a meta search engine specifically for local collections.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ llm: Optional[BaseLLM] = None,
25
+ max_results: int = 10,
26
+ max_filtered_results: Optional[int] = None,
27
+ **kwargs
28
+ ):
29
+ """
30
+ Initialize the local all-collections search engine.
31
+
32
+ Args:
33
+ llm: Language model for relevance filtering
34
+ max_results: Maximum number of search results
35
+ max_filtered_results: Maximum results after filtering
36
+ **kwargs: Additional parameters passed to LocalSearchEngine instances
37
+ """
38
+ # Initialize the base search engine
39
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results)
40
+
41
+ self.max_results = max_results
42
+
43
+ # Find all local collection search engines
44
+ self.local_engines = {}
45
+ try:
46
+ from local_collections import LOCAL_COLLECTIONS
47
+
48
+ for collection_id, collection in LOCAL_COLLECTIONS.items():
49
+ if not collection.get("enabled", True):
50
+ continue
51
+
52
+ # Create a search engine for this collection
53
+ try:
54
+ engine = create_search_engine(
55
+ collection_id,
56
+ llm=llm,
57
+ max_filtered_results=max_filtered_results
58
+ )
59
+
60
+ if engine:
61
+ self.local_engines[collection_id] = {
62
+ "engine": engine,
63
+ "name": collection.get("name", collection_id),
64
+ "description": collection.get("description", "")
65
+ }
66
+ except Exception as e:
67
+ logger.error(f"Error creating search engine for collection '{collection_id}': {e}")
68
+ except ImportError:
69
+ logger.warning("No local collections configuration found")
70
+
71
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
72
+ """
73
+ Get preview information for documents from all local collections.
74
+
75
+ Args:
76
+ query: The search query
77
+
78
+ Returns:
79
+ List of preview dictionaries
80
+ """
81
+ logger.info(f"Searching across all local collections for query: {query}")
82
+
83
+ all_previews = []
84
+
85
+ # Get previews from each local search engine
86
+ for collection_id, engine_info in self.local_engines.items():
87
+ engine = engine_info["engine"]
88
+ try:
89
+ # Get previews from this engine
90
+ previews = engine._get_previews(query)
91
+
92
+ # Add collection info to each preview
93
+ for preview in previews:
94
+ preview["collection_id"] = collection_id
95
+ preview["collection_name"] = engine_info["name"]
96
+ preview["collection_description"] = engine_info["description"]
97
+
98
+ all_previews.extend(previews)
99
+ except Exception as e:
100
+ logger.error(f"Error searching collection '{collection_id}': {e}")
101
+
102
+ if not all_previews:
103
+ logger.info(f"No local documents found for query: {query}")
104
+ return []
105
+
106
+ # Sort by similarity score if available
107
+ all_previews.sort(
108
+ key=lambda x: float(x.get("similarity", 0)),
109
+ reverse=True
110
+ )
111
+
112
+ # Limit to max_results
113
+ return all_previews[:self.max_results]
114
+
115
+ def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
116
+ """
117
+ Get full content for the relevant documents.
118
+ Delegates to the appropriate collection's search engine.
119
+
120
+ Args:
121
+ relevant_items: List of relevant preview dictionaries
122
+
123
+ Returns:
124
+ List of result dictionaries with full content
125
+ """
126
+ # Group items by collection
127
+ items_by_collection = {}
128
+ for item in relevant_items:
129
+ collection_id = item.get("collection_id")
130
+ if collection_id and collection_id in self.local_engines:
131
+ if collection_id not in items_by_collection:
132
+ items_by_collection[collection_id] = []
133
+ items_by_collection[collection_id].append(item)
134
+
135
+ # Process each collection's items with its own engine
136
+ all_results = []
137
+ for collection_id, items in items_by_collection.items():
138
+ engine = self.local_engines[collection_id]["engine"]
139
+ try:
140
+ results = engine._get_full_content(items)
141
+ all_results.extend(results)
142
+ except Exception as e:
143
+ logger.error(f"Error getting full content from collection '{collection_id}': {e}")
144
+ # Fall back to returning the items without full content
145
+ all_results.extend(items)
146
+
147
+ # Add any items that weren't processed
148
+ processed_ids = set(item["id"] for item in all_results)
149
+ for item in relevant_items:
150
+ if item["id"] not in processed_ids:
151
+ all_results.append(item)
152
+
153
+ return all_results