local-deep-research 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. local_deep_research/__init__.py +24 -0
  2. local_deep_research/citation_handler.py +113 -0
  3. local_deep_research/config.py +166 -0
  4. local_deep_research/defaults/__init__.py +44 -0
  5. local_deep_research/defaults/llm_config.py +269 -0
  6. local_deep_research/defaults/local_collections.toml +47 -0
  7. local_deep_research/defaults/main.toml +57 -0
  8. local_deep_research/defaults/search_engines.toml +244 -0
  9. local_deep_research/local_collections.py +141 -0
  10. local_deep_research/main.py +113 -0
  11. local_deep_research/report_generator.py +206 -0
  12. local_deep_research/search_system.py +241 -0
  13. local_deep_research/utilties/__init__.py +0 -0
  14. local_deep_research/utilties/enums.py +9 -0
  15. local_deep_research/utilties/llm_utils.py +116 -0
  16. local_deep_research/utilties/search_utilities.py +115 -0
  17. local_deep_research/utilties/setup_utils.py +6 -0
  18. local_deep_research/web/__init__.py +2 -0
  19. local_deep_research/web/app.py +1209 -0
  20. local_deep_research/web/static/css/styles.css +1008 -0
  21. local_deep_research/web/static/js/app.js +2078 -0
  22. local_deep_research/web/templates/api_keys_config.html +82 -0
  23. local_deep_research/web/templates/collections_config.html +90 -0
  24. local_deep_research/web/templates/index.html +312 -0
  25. local_deep_research/web/templates/llm_config.html +120 -0
  26. local_deep_research/web/templates/main_config.html +89 -0
  27. local_deep_research/web/templates/search_engines_config.html +154 -0
  28. local_deep_research/web/templates/settings.html +519 -0
  29. local_deep_research/web/templates/settings_dashboard.html +207 -0
  30. local_deep_research/web_search_engines/__init__.py +0 -0
  31. local_deep_research/web_search_engines/engines/__init__.py +0 -0
  32. local_deep_research/web_search_engines/engines/full_search.py +128 -0
  33. local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
  34. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
  35. local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
  36. local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
  37. local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
  38. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
  39. local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
  40. local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
  41. local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
  42. local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
  43. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
  44. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
  45. local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
  46. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
  47. local_deep_research/web_search_engines/full_search.py +254 -0
  48. local_deep_research/web_search_engines/search_engine_base.py +197 -0
  49. local_deep_research/web_search_engines/search_engine_factory.py +233 -0
  50. local_deep_research/web_search_engines/search_engines_config.py +54 -0
  51. local_deep_research-0.1.0.dist-info/LICENSE +21 -0
  52. local_deep_research-0.1.0.dist-info/METADATA +328 -0
  53. local_deep_research-0.1.0.dist-info/RECORD +56 -0
  54. local_deep_research-0.1.0.dist-info/WHEEL +5 -0
  55. local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
  56. local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,24 @@
1
+ """
2
+ Local Deep Research - AI-powered research assistant
3
+
4
+ A powerful AI research system with iterative analysis capabilities
5
+ and multiple search engines integration.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+
10
+ # Initialize configuration on module import
11
+ from .utilties.setup_utils import setup_user_directories
12
+
13
+ # Import main components
14
+ from .search_system import AdvancedSearchSystem
15
+ from .report_generator import IntegratedReportGenerator
16
+ from .config import get_llm, get_search
17
+
18
+ # Export it
19
+ __all__ = [
20
+ "AdvancedSearchSystem",
21
+ "IntegratedReportGenerator",
22
+ "get_llm",
23
+ "get_search"
24
+ ]
@@ -0,0 +1,113 @@
1
+ # citation_handler.py
2
+
3
+ from langchain_core.documents import Document
4
+ from typing import Dict, List, Union, Any
5
+ import re
6
+ from .utilties.search_utilities import remove_think_tags
7
+ from .config import settings
8
+
9
+ class CitationHandler:
10
+ def __init__(self, llm):
11
+ self.llm = llm
12
+
13
+ def _create_documents(
14
+ self, search_results: Union[str, List[Dict]], nr_of_links: int = 0
15
+ ) -> List[Document]:
16
+ """Convert search results to LangChain documents format and add index to original search results."""
17
+ documents = []
18
+ if isinstance(search_results, str):
19
+ return documents
20
+
21
+ for i, result in enumerate(search_results):
22
+ if isinstance(result, dict):
23
+ # Add index to the original search result dictionary
24
+ result["index"] = str(i + nr_of_links + 1)
25
+
26
+ content = result.get("full_content", result.get("snippet", ""))
27
+ documents.append(
28
+ Document(
29
+ page_content=content,
30
+ metadata={
31
+ "source": result.get("link", f"source_{i+1}"),
32
+ "title": result.get("title", f"Source {i+1}"),
33
+ "index": i + nr_of_links + 1,
34
+ },
35
+ )
36
+ )
37
+ return documents
38
+
39
+ def _format_sources(self, documents: List[Document]) -> str:
40
+ """Format sources with numbers for citation."""
41
+ sources = []
42
+ for doc in documents:
43
+ source_id = doc.metadata["index"]
44
+ sources.append(f"[{source_id}] {doc.page_content}")
45
+ return "\n\n".join(sources)
46
+
47
+ def analyze_initial(
48
+ self, query: str, search_results: Union[str, List[Dict]]
49
+ ) -> Dict[str, Any]:
50
+
51
+ documents = self._create_documents(search_results)
52
+ formatted_sources = self._format_sources(documents)
53
+ print(formatted_sources)
54
+ prompt = f"""Analyze the following information concerning the question and include citations using numbers in square brackets [1], [2], etc. When citing, use the source number provided at the start of each source.
55
+
56
+ Question: {query}
57
+
58
+ Sources:
59
+ {formatted_sources}
60
+
61
+ Provide a detailed analysis with citations and always keep URLS. Never make up sources. Example format: "According to the research [1], ..."
62
+ """
63
+
64
+ response = self.llm.invoke(prompt)
65
+
66
+ return {"content": remove_think_tags(response.content), "documents": documents}
67
+
68
+ def analyze_followup(
69
+ self,
70
+ question: str,
71
+ search_results: Union[str, List[Dict]],
72
+ previous_knowledge: str,
73
+ nr_of_links : int
74
+ ) -> Dict[str, Any]:
75
+ """Process follow-up analysis with citations."""
76
+ documents = self._create_documents(search_results, nr_of_links=nr_of_links)
77
+ formatted_sources = self._format_sources(documents)
78
+ print(formatted_sources)
79
+ # Add fact-checking step
80
+ fact_check_prompt = f"""Analyze these sources for factual consistency:
81
+ 1. Cross-reference major claims between sources
82
+ 2. Identify and flag any contradictions
83
+ 3. Verify basic facts (dates, company names, ownership)
84
+ 4. Note when sources disagree
85
+
86
+ Previous Knowledge:
87
+ {previous_knowledge}
88
+
89
+ New Sources:
90
+ {formatted_sources}
91
+
92
+ Return any inconsistencies or conflicts found."""
93
+ if settings.GENERAL.ENABLE_FACT_CHECKING:
94
+ fact_check_response = remove_think_tags(self.llm.invoke(fact_check_prompt).content)
95
+ else:
96
+ fact_check_response = ""
97
+
98
+ prompt = f"""Using the previous knowledge and new sources, answer the question. Include citations using numbers in square brackets [1], [2], etc. When citing, use the source number provided at the start of each source. Reflect information from sources critically.
99
+
100
+ Previous Knowledge:
101
+ {previous_knowledge}
102
+
103
+ Question: {question}
104
+
105
+ New Sources:
106
+ {formatted_sources}
107
+ Reflect information from sources critically based on: {fact_check_response}. Never invent sources.
108
+ Provide a detailed answer with citations. Example format: "According to [1], ..."
109
+ """
110
+
111
+ response = self.llm.invoke(prompt)
112
+
113
+ return {"content": remove_think_tags(response.content), "documents": documents}
@@ -0,0 +1,166 @@
1
+ # local_deep_research/config.py
2
+ from dynaconf import Dynaconf
3
+ from pathlib import Path
4
+ import logging
5
+ from platformdirs import user_documents_dir
6
+ import os
7
+ # Setup logging
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # Get config directory
11
+ def get_config_dir():
12
+ from platformdirs import user_config_dir
13
+ config_dir = Path(user_config_dir("local_deep_research", "LearningCircuit"))
14
+ print(f"Looking for config in: {config_dir}")
15
+ return config_dir
16
+
17
+ # Define config paths
18
+ CONFIG_DIR = get_config_dir() / "config"
19
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
20
+ SETTINGS_FILE = CONFIG_DIR / "settings.toml"
21
+ SECRETS_FILE = CONFIG_DIR / ".secrets.toml"
22
+ LLM_CONFIG_FILE = CONFIG_DIR / "llm_config.py"
23
+ SEARCH_ENGINES_FILE = CONFIG_DIR / "search_engines.toml"
24
+
25
+ LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
26
+ print("CONFIGDIR:", CONFIG_DIR)
27
+ print("SECRETS_FILE:", SECRETS_FILE)
28
+ print("SETTINGS_FILE:", SETTINGS_FILE)
29
+
30
+
31
+ # Set environment variable for Dynaconf to use
32
+ docs_base = Path(user_documents_dir()) / "local_deep_research"
33
+ os.environ["DOCS_DIR"] = str(docs_base)
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+
42
+ # Expose get_llm function
43
+ def get_llm(*args, **kwargs):
44
+ """
45
+ Helper function to get LLM from llm_config.py
46
+ """
47
+ # Import here to avoid circular imports
48
+ import importlib.util
49
+ import sys
50
+
51
+ llm_config_path = CONFIG_DIR / "llm_config.py"
52
+
53
+ # If llm_config.py exists, use it
54
+ if llm_config_path.exists():
55
+ if str(CONFIG_DIR) not in sys.path:
56
+ sys.path.insert(0, str(CONFIG_DIR))
57
+
58
+ spec = importlib.util.spec_from_file_location("llm_config", llm_config_path)
59
+ llm_config = importlib.util.module_from_spec(spec)
60
+ spec.loader.exec_module(llm_config)
61
+
62
+ if hasattr(llm_config, "get_llm"):
63
+ return llm_config.get_llm(*args, **kwargs)
64
+
65
+ # Fallback to utility function
66
+ from .utilties.llm_utils import get_model
67
+ return get_model(*args, **kwargs)
68
+
69
+ # Expose get_search function
70
+ def get_search(search_tool=None):
71
+ """
72
+ Helper function to get search engine
73
+ """
74
+
75
+ # Use specified tool or default from settings
76
+ tool = search_tool or settings.search.tool
77
+ logger.info(f"Search tool is: {tool}")
78
+
79
+ # Import here to avoid circular imports
80
+ from .web_search_engines.search_engine_factory import get_search as factory_get_search
81
+
82
+ # Get search parameters
83
+ params = {
84
+ "search_tool": tool,
85
+ "llm_instance": get_llm(),
86
+ "max_results": settings.get("max_results"),
87
+ "region": settings.get("region"),
88
+ "time_period": settings.get("time_period"),
89
+ "safe_search": settings.get("safe_search"),
90
+ "search_snippets_only": settings.get("snippets_only"),
91
+ "search_language": settings.get("search_language"),
92
+ "max_filtered_results": settings.get("max_filtered_results")
93
+ }
94
+
95
+ # Create and return search engine
96
+ return factory_get_search(**params)
97
+
98
+ def init_config_files():
99
+ """Initialize config files if they don't exist"""
100
+ import shutil
101
+ from importlib.resources import files
102
+
103
+ # Get default files path
104
+ try:
105
+ defaults_dir = files('local_deep_research.defaults')
106
+ except ImportError:
107
+ # Fallback for older Python versions
108
+ from pkg_resources import resource_filename
109
+ defaults_dir = Path(resource_filename('local_deep_research', 'defaults'))
110
+
111
+ # Create settings.toml if it doesn't exist
112
+ settings_file = CONFIG_DIR / "settings.toml"
113
+ if not settings_file.exists():
114
+ shutil.copy(defaults_dir / "main.toml", settings_file)
115
+ logger.info(f"Created settings.toml at {settings_file}")
116
+
117
+ # Create llm_config.py if it doesn't exist
118
+ llm_config_file = CONFIG_DIR / "llm_config.py"
119
+ if not llm_config_file.exists():
120
+ shutil.copy(defaults_dir / "llm_config.py", llm_config_file)
121
+ logger.info(f"Created llm_config.py at {llm_config_file}")
122
+
123
+ # Create local_collections.toml if it doesn't exist
124
+ collections_file = CONFIG_DIR / "local_collections.toml"
125
+ if not collections_file.exists():
126
+ shutil.copy(defaults_dir / "local_collections.toml", collections_file)
127
+ logger.info(f"Created local_collections.toml at {collections_file}")
128
+
129
+ # Create search_engines.toml if it doesn't exist
130
+ search_engines_file = CONFIG_DIR / "search_engines.toml"
131
+ if not search_engines_file.exists():
132
+ shutil.copy(defaults_dir / "search_engines.toml", search_engines_file)
133
+ logger.info(f"Created search_engines.toml at {search_engines_file}")
134
+
135
+ secrets_file = CONFIG_DIR / ".secrets.toml"
136
+ if not secrets_file.exists():
137
+ with open(secrets_file, "w") as f:
138
+ f.write("""
139
+ # ANTHROPIC_API_KEY = "your-api-key-here"
140
+ # OPENAI_API_KEY = "your-openai-key-here"
141
+ # GOOGLE_API_KEY = "your-google-key-here"
142
+ # SERP_API_KEY = "your-api-key-here"
143
+ # GUARDIAN_API_KEY = "your-api-key-here"
144
+ # GOOGLE_PSE_API_KEY = "your-google-api-key-here"
145
+ # GOOGLE_PSE_ENGINE_ID = "your-programmable-search-engine-id-here"
146
+ """)
147
+
148
+ # Initialize config files on import
149
+ init_config_files()
150
+
151
+ # Use an absolute path to your .secrets.toml for testing
152
+ secrets_file = Path(SECRETS_FILE)
153
+
154
+ settings = Dynaconf(
155
+ settings_files=[
156
+ str(SETTINGS_FILE),
157
+ str(LOCAL_COLLECTIONS_FILE),
158
+ str(SEARCH_ENGINES_FILE),
159
+ ],
160
+ secrets=str(SECRETS_FILE),
161
+ env_prefix="LDR",
162
+ load_dotenv=True,
163
+ envvar_prefix="LDR",
164
+ env_file=str(CONFIG_DIR / ".env"),
165
+ )
166
+
@@ -0,0 +1,44 @@
1
+ """
2
+ Default configuration module for Local Deep Research.
3
+
4
+ This module is responsible for loading and initializing default
5
+ configuration files and resources used throughout the application.
6
+ """
7
+ import os
8
+ import logging
9
+ from pathlib import Path
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Define the path to the package's defaults directory
14
+ DEFAULTS_DIR = Path(__file__).parent
15
+
16
+ # Default files available in this package
17
+ DEFAULT_FILES = {
18
+ "main.toml": DEFAULTS_DIR / "main.toml",
19
+ "local_collections.toml": DEFAULTS_DIR / "local_collections.toml",
20
+ "search_engines.toml": DEFAULTS_DIR / "search_engines.toml",
21
+ "llm_config.py": DEFAULTS_DIR / "llm_config.py"
22
+ }
23
+
24
+ def get_default_file_path(filename):
25
+ """Get the path to a default configuration file."""
26
+ if filename in DEFAULT_FILES:
27
+ return DEFAULT_FILES[filename]
28
+ return None
29
+
30
+ def list_default_files():
31
+ """List all available default configuration files."""
32
+ return list(DEFAULT_FILES.keys())
33
+
34
+ def ensure_defaults_exist():
35
+ """Verify that all expected default files exist in the package."""
36
+ missing = []
37
+ for filename, filepath in DEFAULT_FILES.items():
38
+ if not filepath.exists():
39
+ missing.append(filename)
40
+
41
+ if missing:
42
+ logger.warning(f"The following default files are missing from the package: {', '.join(missing)}")
43
+ return False
44
+ return True
@@ -0,0 +1,269 @@
1
+ """
2
+ LLM configuration for Local Deep Research.
3
+
4
+ This file controls which language models are available and how they're configured.
5
+ You can customize model selection, parameters, and fallbacks here.
6
+ """
7
+
8
+ from langchain_anthropic import ChatAnthropic
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain_ollama import ChatOllama
11
+ from langchain_community.llms import VLLM
12
+ from local_deep_research.config import settings
13
+ import os
14
+ import logging
15
+ from enum import Enum, auto
16
+
17
+ # Initialize environment
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Provider enum
21
+ class ModelProvider(Enum):
22
+ OLLAMA = auto()
23
+ OPENAI = auto()
24
+ ANTHROPIC = auto()
25
+ VLLM = auto()
26
+ OPENAI_ENDPOINT = auto()
27
+ NONE = auto()
28
+
29
+ # ================================
30
+ # USER CONFIGURATION SECTION
31
+ # ================================
32
+
33
+ # Set your preferred model provider here
34
+ DEFAULT_PROVIDER = ModelProvider.OLLAMA # Change this to your preferred provider
35
+
36
+ # Set your default model name here
37
+ DEFAULT_MODEL = "mistral" # Your default model
38
+
39
+ # Set default model parameters
40
+ DEFAULT_TEMPERATURE = 0.7
41
+ MAX_TOKENS = 30000
42
+
43
+ # Server URLs
44
+ OPENAI_ENDPOINT_URL = "https://openrouter.ai/api/v1" # For OpenRouter or compatible services
45
+ OLLAMA_BASE_URL = "http://localhost:11434" # URL for Ollama server
46
+
47
+
48
+
49
+
50
+ # ================================
51
+ # LLM FUNCTIONS
52
+ # ================================
53
+
54
+
55
+
56
+
57
+
58
+ def get_llm(model_name=None, temperature=None, provider=None):
59
+ """
60
+ Get LLM instance based on model name and provider.
61
+
62
+ Args:
63
+ model_name: Name of the model to use (if None, uses DEFAULT_MODEL)
64
+ temperature: Model temperature (if None, uses DEFAULT_TEMPERATURE)
65
+ provider: Provider to use (if None, uses DEFAULT_PROVIDER)
66
+
67
+ Returns:
68
+ A LangChain LLM instance
69
+ """
70
+ if model_name is None:
71
+ model_name = DEFAULT_MODEL
72
+
73
+ if temperature is None:
74
+ temperature = DEFAULT_TEMPERATURE
75
+
76
+ if provider is None:
77
+ provider = DEFAULT_PROVIDER
78
+
79
+ # Common parameters for all models
80
+ common_params = {
81
+ "temperature": temperature,
82
+ "max_tokens": MAX_TOKENS,
83
+ }
84
+
85
+ # Handle different providers
86
+ if provider == ModelProvider.ANTHROPIC:
87
+ api_key = settings.get('ANTHROPIC_API_KEY', '')
88
+ if not api_key:
89
+ logger.warning("ANTHROPIC_API_KEY not found. Falling back to default model.")
90
+ return get_fallback_model(temperature)
91
+
92
+ return ChatAnthropic(
93
+ model=model_name, anthropic_api_key=api_key, **common_params
94
+ )
95
+
96
+ elif provider == ModelProvider.OPENAI:
97
+ api_key = settings.get('OPENAI_API_KEY', '')
98
+ if not api_key:
99
+ logger.warning("OPENAI_API_KEY not found. Falling back to default model.")
100
+ return get_fallback_model(temperature)
101
+
102
+ return ChatOpenAI(model=model_name, api_key=api_key, **common_params)
103
+
104
+ elif provider == ModelProvider.OPENAI_ENDPOINT:
105
+ api_key = settings.OPENAI_ENDPOINT_API_KEY
106
+
107
+ if not api_key:
108
+ logger.warning("OPENAI_ENDPOINT_API_KEY not found. Falling back to default model.")
109
+ return get_fallback_model(temperature)
110
+
111
+ return ChatOpenAI(
112
+ model=model_name,
113
+ api_key=api_key,
114
+ openai_api_base=OPENAI_ENDPOINT_URL,
115
+ **common_params
116
+ )
117
+
118
+ elif provider == ModelProvider.VLLM:
119
+ try:
120
+ return VLLM(
121
+ model=model_name,
122
+ trust_remote_code=True,
123
+ max_new_tokens=128,
124
+ top_k=10,
125
+ top_p=0.95,
126
+ temperature=temperature,
127
+ )
128
+ except Exception as e:
129
+ logger.error(f"Error loading VLLM model: {e}")
130
+ logger.warning("Falling back.")
131
+ return get_fallback_model(temperature)
132
+
133
+ elif provider == ModelProvider.OLLAMA:
134
+ try:
135
+ # Use the configurable Ollama base URL
136
+ base_url = settings.get('OLLAMA_BASE_URL', OLLAMA_BASE_URL)
137
+ return ChatOllama(model=model_name, base_url=base_url, **common_params)
138
+ except Exception as e:
139
+ logger.error(f"Error loading Ollama model: {e}")
140
+ return get_fallback_model(temperature)
141
+
142
+ else:
143
+ return get_fallback_model(temperature)
144
+
145
+ def get_fallback_model(temperature=DEFAULT_TEMPERATURE):
146
+ """Create a dummy model for when no providers are available"""
147
+ from langchain_community.llms.fake import FakeListLLM
148
+ return FakeListLLM(
149
+ responses=["No language models are available. Please install Ollama or set up API keys."]
150
+ )
151
+
152
+ # ================================
153
+ # COMPATIBILITY FUNCTIONS
154
+ # ================================
155
+
156
+ def get_available_provider_types():
157
+ """Return available model providers"""
158
+ providers = {}
159
+
160
+ if is_ollama_available():
161
+ providers["ollama"] = "Ollama (local models)"
162
+
163
+ if is_openai_available():
164
+ providers["openai"] = "OpenAI API"
165
+
166
+ if is_anthropic_available():
167
+ providers["anthropic"] = "Anthropic API"
168
+
169
+ if is_openai_endpoint_available():
170
+ providers["openai_endpoint"] = "OpenAI-compatible Endpoint"
171
+
172
+ # Check for VLLM capability
173
+ try:
174
+ import torch
175
+ import transformers
176
+ providers["vllm"] = "VLLM (local models)"
177
+ except ImportError:
178
+ pass
179
+
180
+ # Default fallback
181
+ if not providers:
182
+ providers["none"] = "No model providers available"
183
+
184
+ return providers
185
+
186
+
187
+
188
+
189
+ # ================================
190
+ # HELPER FUNCTIONS
191
+ # ================================
192
+
193
+ def is_openai_available():
194
+ """Check if OpenAI is available"""
195
+ try:
196
+ api_key = settings.api_keys.get('OPENAI_API_KEY', '')
197
+ return bool(api_key)
198
+ except:
199
+ return False
200
+
201
+ def is_anthropic_available():
202
+ """Check if Anthropic is available"""
203
+ try:
204
+ api_key = settings.api_keys.get('ANTHROPIC_API_KEY', '')
205
+ return bool(api_key)
206
+ except:
207
+ return False
208
+
209
+
210
+
211
+ def is_openai_endpoint_available():
212
+ """Check if OpenAI endpoint is available"""
213
+ print(os.getenv("OPENAI_ENDPOINT_API_KEY"))
214
+ try:
215
+ api_key = settings.OPENAI_ENDPOINT_API_KEY
216
+ return bool(api_key)
217
+ except:
218
+ return False
219
+
220
+ def is_ollama_available():
221
+ """Check if Ollama is running"""
222
+ try:
223
+ import requests
224
+ base_url = settings.get('OLLAMA_BASE_URL', OLLAMA_BASE_URL)
225
+ response = requests.get(f"{base_url}/api/tags", timeout=1.0)
226
+ return response.status_code == 200
227
+ except:
228
+ return False
229
+
230
+ def is_vllm_available():
231
+ """Check if VLLM capability is available"""
232
+ try:
233
+ import torch
234
+ import transformers
235
+ return True
236
+ except ImportError:
237
+ return False
238
+
239
+ def get_available_providers():
240
+ """Get dictionary of available providers"""
241
+ providers = {}
242
+
243
+ if is_ollama_available():
244
+ providers[ModelProvider.OLLAMA] = "Ollama (local models)"
245
+
246
+ if is_openai_available():
247
+ providers[ModelProvider.OPENAI] = "OpenAI API"
248
+
249
+ if is_anthropic_available():
250
+ providers[ModelProvider.ANTHROPIC] = "Anthropic API"
251
+
252
+ if is_openai_endpoint_available():
253
+ providers[ModelProvider.OPENAI_ENDPOINT] = "OpenAI-compatible Endpoint"
254
+
255
+ if is_vllm_available():
256
+ providers[ModelProvider.VLLM] = "VLLM (local models)"
257
+
258
+ if not providers:
259
+ providers[ModelProvider.NONE] = "No model providers available"
260
+
261
+ return providers
262
+
263
+ # Log which providers are available
264
+ AVAILABLE_PROVIDERS = get_available_providers()
265
+ logger.info(f"Available providers: {[p.name for p in AVAILABLE_PROVIDERS.keys()]}")
266
+
267
+ # Check if selected provider is available
268
+ if DEFAULT_PROVIDER not in AVAILABLE_PROVIDERS and DEFAULT_PROVIDER != ModelProvider.NONE:
269
+ logger.warning(f"Selected provider {DEFAULT_PROVIDER.name} is not available.")
@@ -0,0 +1,47 @@
1
+ # Default local document collections configuration
2
+ # Each collection functions as an independent search engine
3
+
4
+ # Project Documents Collection
5
+ [project_docs]
6
+ name = "Project Documents"
7
+ description = "Project documentation and specifications"
8
+ paths = ["@format ${DOCS_DIR}/project_documents"]
9
+ enabled = true
10
+ embedding_model = "all-MiniLM-L6-v2"
11
+ embedding_device = "cpu"
12
+ embedding_model_type = "sentence_transformers"
13
+ max_results = 20
14
+ max_filtered_results = 5
15
+ chunk_size = 1000
16
+ chunk_overlap = 200
17
+ cache_dir = "__CACHE_DIR__/local_search/project_docs"
18
+
19
+ # Research Papers Collection
20
+ [research_papers]
21
+ name = "Research Papers"
22
+ description = "Academic research papers and articles"
23
+ paths = ["@format ${DOCS_DIR}/research_papers"]
24
+ enabled = true
25
+ embedding_model = "all-MiniLM-L6-v2"
26
+ embedding_device = "cpu"
27
+ embedding_model_type = "sentence_transformers"
28
+ max_results = 20
29
+ max_filtered_results = 5
30
+ chunk_size = 800
31
+ chunk_overlap = 150
32
+ cache_dir = "__CACHE_DIR__/local_search/research_papers"
33
+
34
+ # Personal Notes Collection
35
+ [personal_notes]
36
+ name = "Personal Notes"
37
+ description = "Personal notes and documents"
38
+ paths = ["@format ${DOCS_DIR}/personal_notes"]
39
+ enabled = true
40
+ embedding_model = "all-MiniLM-L6-v2"
41
+ embedding_device = "cpu"
42
+ embedding_model_type = "sentence_transformers"
43
+ max_results = 30
44
+ max_filtered_results = 10
45
+ chunk_size = 500
46
+ chunk_overlap = 100
47
+ cache_dir = "__CACHE_DIR__/local_search/personal_notes"