local-deep-research 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +24 -0
- local_deep_research/citation_handler.py +113 -0
- local_deep_research/config.py +166 -0
- local_deep_research/defaults/__init__.py +44 -0
- local_deep_research/defaults/llm_config.py +269 -0
- local_deep_research/defaults/local_collections.toml +47 -0
- local_deep_research/defaults/main.toml +57 -0
- local_deep_research/defaults/search_engines.toml +244 -0
- local_deep_research/local_collections.py +141 -0
- local_deep_research/main.py +113 -0
- local_deep_research/report_generator.py +206 -0
- local_deep_research/search_system.py +241 -0
- local_deep_research/utilties/__init__.py +0 -0
- local_deep_research/utilties/enums.py +9 -0
- local_deep_research/utilties/llm_utils.py +116 -0
- local_deep_research/utilties/search_utilities.py +115 -0
- local_deep_research/utilties/setup_utils.py +6 -0
- local_deep_research/web/__init__.py +2 -0
- local_deep_research/web/app.py +1209 -0
- local_deep_research/web/static/css/styles.css +1008 -0
- local_deep_research/web/static/js/app.js +2078 -0
- local_deep_research/web/templates/api_keys_config.html +82 -0
- local_deep_research/web/templates/collections_config.html +90 -0
- local_deep_research/web/templates/index.html +312 -0
- local_deep_research/web/templates/llm_config.html +120 -0
- local_deep_research/web/templates/main_config.html +89 -0
- local_deep_research/web/templates/search_engines_config.html +154 -0
- local_deep_research/web/templates/settings.html +519 -0
- local_deep_research/web/templates/settings_dashboard.html +207 -0
- local_deep_research/web_search_engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/full_search.py +128 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
- local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
- local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
- local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
- local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
- local_deep_research/web_search_engines/full_search.py +254 -0
- local_deep_research/web_search_engines/search_engine_base.py +197 -0
- local_deep_research/web_search_engines/search_engine_factory.py +233 -0
- local_deep_research/web_search_engines/search_engines_config.py +54 -0
- local_deep_research-0.1.0.dist-info/LICENSE +21 -0
- local_deep_research-0.1.0.dist-info/METADATA +328 -0
- local_deep_research-0.1.0.dist-info/RECORD +56 -0
- local_deep_research-0.1.0.dist-info/WHEEL +5 -0
- local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
- local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
"""
|
2
|
+
Local Deep Research - AI-powered research assistant
|
3
|
+
|
4
|
+
A powerful AI research system with iterative analysis capabilities
|
5
|
+
and multiple search engines integration.
|
6
|
+
"""
|
7
|
+
|
8
|
+
__version__ = "0.1.0"
|
9
|
+
|
10
|
+
# Initialize configuration on module import
|
11
|
+
from .utilties.setup_utils import setup_user_directories
|
12
|
+
|
13
|
+
# Import main components
|
14
|
+
from .search_system import AdvancedSearchSystem
|
15
|
+
from .report_generator import IntegratedReportGenerator
|
16
|
+
from .config import get_llm, get_search
|
17
|
+
|
18
|
+
# Export it
|
19
|
+
__all__ = [
|
20
|
+
"AdvancedSearchSystem",
|
21
|
+
"IntegratedReportGenerator",
|
22
|
+
"get_llm",
|
23
|
+
"get_search"
|
24
|
+
]
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# citation_handler.py
|
2
|
+
|
3
|
+
from langchain_core.documents import Document
|
4
|
+
from typing import Dict, List, Union, Any
|
5
|
+
import re
|
6
|
+
from .utilties.search_utilities import remove_think_tags
|
7
|
+
from .config import settings
|
8
|
+
|
9
|
+
class CitationHandler:
|
10
|
+
def __init__(self, llm):
|
11
|
+
self.llm = llm
|
12
|
+
|
13
|
+
def _create_documents(
|
14
|
+
self, search_results: Union[str, List[Dict]], nr_of_links: int = 0
|
15
|
+
) -> List[Document]:
|
16
|
+
"""Convert search results to LangChain documents format and add index to original search results."""
|
17
|
+
documents = []
|
18
|
+
if isinstance(search_results, str):
|
19
|
+
return documents
|
20
|
+
|
21
|
+
for i, result in enumerate(search_results):
|
22
|
+
if isinstance(result, dict):
|
23
|
+
# Add index to the original search result dictionary
|
24
|
+
result["index"] = str(i + nr_of_links + 1)
|
25
|
+
|
26
|
+
content = result.get("full_content", result.get("snippet", ""))
|
27
|
+
documents.append(
|
28
|
+
Document(
|
29
|
+
page_content=content,
|
30
|
+
metadata={
|
31
|
+
"source": result.get("link", f"source_{i+1}"),
|
32
|
+
"title": result.get("title", f"Source {i+1}"),
|
33
|
+
"index": i + nr_of_links + 1,
|
34
|
+
},
|
35
|
+
)
|
36
|
+
)
|
37
|
+
return documents
|
38
|
+
|
39
|
+
def _format_sources(self, documents: List[Document]) -> str:
|
40
|
+
"""Format sources with numbers for citation."""
|
41
|
+
sources = []
|
42
|
+
for doc in documents:
|
43
|
+
source_id = doc.metadata["index"]
|
44
|
+
sources.append(f"[{source_id}] {doc.page_content}")
|
45
|
+
return "\n\n".join(sources)
|
46
|
+
|
47
|
+
def analyze_initial(
|
48
|
+
self, query: str, search_results: Union[str, List[Dict]]
|
49
|
+
) -> Dict[str, Any]:
|
50
|
+
|
51
|
+
documents = self._create_documents(search_results)
|
52
|
+
formatted_sources = self._format_sources(documents)
|
53
|
+
print(formatted_sources)
|
54
|
+
prompt = f"""Analyze the following information concerning the question and include citations using numbers in square brackets [1], [2], etc. When citing, use the source number provided at the start of each source.
|
55
|
+
|
56
|
+
Question: {query}
|
57
|
+
|
58
|
+
Sources:
|
59
|
+
{formatted_sources}
|
60
|
+
|
61
|
+
Provide a detailed analysis with citations and always keep URLS. Never make up sources. Example format: "According to the research [1], ..."
|
62
|
+
"""
|
63
|
+
|
64
|
+
response = self.llm.invoke(prompt)
|
65
|
+
|
66
|
+
return {"content": remove_think_tags(response.content), "documents": documents}
|
67
|
+
|
68
|
+
def analyze_followup(
|
69
|
+
self,
|
70
|
+
question: str,
|
71
|
+
search_results: Union[str, List[Dict]],
|
72
|
+
previous_knowledge: str,
|
73
|
+
nr_of_links : int
|
74
|
+
) -> Dict[str, Any]:
|
75
|
+
"""Process follow-up analysis with citations."""
|
76
|
+
documents = self._create_documents(search_results, nr_of_links=nr_of_links)
|
77
|
+
formatted_sources = self._format_sources(documents)
|
78
|
+
print(formatted_sources)
|
79
|
+
# Add fact-checking step
|
80
|
+
fact_check_prompt = f"""Analyze these sources for factual consistency:
|
81
|
+
1. Cross-reference major claims between sources
|
82
|
+
2. Identify and flag any contradictions
|
83
|
+
3. Verify basic facts (dates, company names, ownership)
|
84
|
+
4. Note when sources disagree
|
85
|
+
|
86
|
+
Previous Knowledge:
|
87
|
+
{previous_knowledge}
|
88
|
+
|
89
|
+
New Sources:
|
90
|
+
{formatted_sources}
|
91
|
+
|
92
|
+
Return any inconsistencies or conflicts found."""
|
93
|
+
if settings.GENERAL.ENABLE_FACT_CHECKING:
|
94
|
+
fact_check_response = remove_think_tags(self.llm.invoke(fact_check_prompt).content)
|
95
|
+
else:
|
96
|
+
fact_check_response = ""
|
97
|
+
|
98
|
+
prompt = f"""Using the previous knowledge and new sources, answer the question. Include citations using numbers in square brackets [1], [2], etc. When citing, use the source number provided at the start of each source. Reflect information from sources critically.
|
99
|
+
|
100
|
+
Previous Knowledge:
|
101
|
+
{previous_knowledge}
|
102
|
+
|
103
|
+
Question: {question}
|
104
|
+
|
105
|
+
New Sources:
|
106
|
+
{formatted_sources}
|
107
|
+
Reflect information from sources critically based on: {fact_check_response}. Never invent sources.
|
108
|
+
Provide a detailed answer with citations. Example format: "According to [1], ..."
|
109
|
+
"""
|
110
|
+
|
111
|
+
response = self.llm.invoke(prompt)
|
112
|
+
|
113
|
+
return {"content": remove_think_tags(response.content), "documents": documents}
|
@@ -0,0 +1,166 @@
|
|
1
|
+
# local_deep_research/config.py
|
2
|
+
from dynaconf import Dynaconf
|
3
|
+
from pathlib import Path
|
4
|
+
import logging
|
5
|
+
from platformdirs import user_documents_dir
|
6
|
+
import os
|
7
|
+
# Setup logging
|
8
|
+
logger = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
# Get config directory
|
11
|
+
def get_config_dir():
|
12
|
+
from platformdirs import user_config_dir
|
13
|
+
config_dir = Path(user_config_dir("local_deep_research", "LearningCircuit"))
|
14
|
+
print(f"Looking for config in: {config_dir}")
|
15
|
+
return config_dir
|
16
|
+
|
17
|
+
# Define config paths
|
18
|
+
CONFIG_DIR = get_config_dir() / "config"
|
19
|
+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
20
|
+
SETTINGS_FILE = CONFIG_DIR / "settings.toml"
|
21
|
+
SECRETS_FILE = CONFIG_DIR / ".secrets.toml"
|
22
|
+
LLM_CONFIG_FILE = CONFIG_DIR / "llm_config.py"
|
23
|
+
SEARCH_ENGINES_FILE = CONFIG_DIR / "search_engines.toml"
|
24
|
+
|
25
|
+
LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
|
26
|
+
print("CONFIGDIR:", CONFIG_DIR)
|
27
|
+
print("SECRETS_FILE:", SECRETS_FILE)
|
28
|
+
print("SETTINGS_FILE:", SETTINGS_FILE)
|
29
|
+
|
30
|
+
|
31
|
+
# Set environment variable for Dynaconf to use
|
32
|
+
docs_base = Path(user_documents_dir()) / "local_deep_research"
|
33
|
+
os.environ["DOCS_DIR"] = str(docs_base)
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
# Expose get_llm function
|
43
|
+
def get_llm(*args, **kwargs):
|
44
|
+
"""
|
45
|
+
Helper function to get LLM from llm_config.py
|
46
|
+
"""
|
47
|
+
# Import here to avoid circular imports
|
48
|
+
import importlib.util
|
49
|
+
import sys
|
50
|
+
|
51
|
+
llm_config_path = CONFIG_DIR / "llm_config.py"
|
52
|
+
|
53
|
+
# If llm_config.py exists, use it
|
54
|
+
if llm_config_path.exists():
|
55
|
+
if str(CONFIG_DIR) not in sys.path:
|
56
|
+
sys.path.insert(0, str(CONFIG_DIR))
|
57
|
+
|
58
|
+
spec = importlib.util.spec_from_file_location("llm_config", llm_config_path)
|
59
|
+
llm_config = importlib.util.module_from_spec(spec)
|
60
|
+
spec.loader.exec_module(llm_config)
|
61
|
+
|
62
|
+
if hasattr(llm_config, "get_llm"):
|
63
|
+
return llm_config.get_llm(*args, **kwargs)
|
64
|
+
|
65
|
+
# Fallback to utility function
|
66
|
+
from .utilties.llm_utils import get_model
|
67
|
+
return get_model(*args, **kwargs)
|
68
|
+
|
69
|
+
# Expose get_search function
|
70
|
+
def get_search(search_tool=None):
|
71
|
+
"""
|
72
|
+
Helper function to get search engine
|
73
|
+
"""
|
74
|
+
|
75
|
+
# Use specified tool or default from settings
|
76
|
+
tool = search_tool or settings.search.tool
|
77
|
+
logger.info(f"Search tool is: {tool}")
|
78
|
+
|
79
|
+
# Import here to avoid circular imports
|
80
|
+
from .web_search_engines.search_engine_factory import get_search as factory_get_search
|
81
|
+
|
82
|
+
# Get search parameters
|
83
|
+
params = {
|
84
|
+
"search_tool": tool,
|
85
|
+
"llm_instance": get_llm(),
|
86
|
+
"max_results": settings.get("max_results"),
|
87
|
+
"region": settings.get("region"),
|
88
|
+
"time_period": settings.get("time_period"),
|
89
|
+
"safe_search": settings.get("safe_search"),
|
90
|
+
"search_snippets_only": settings.get("snippets_only"),
|
91
|
+
"search_language": settings.get("search_language"),
|
92
|
+
"max_filtered_results": settings.get("max_filtered_results")
|
93
|
+
}
|
94
|
+
|
95
|
+
# Create and return search engine
|
96
|
+
return factory_get_search(**params)
|
97
|
+
|
98
|
+
def init_config_files():
|
99
|
+
"""Initialize config files if they don't exist"""
|
100
|
+
import shutil
|
101
|
+
from importlib.resources import files
|
102
|
+
|
103
|
+
# Get default files path
|
104
|
+
try:
|
105
|
+
defaults_dir = files('local_deep_research.defaults')
|
106
|
+
except ImportError:
|
107
|
+
# Fallback for older Python versions
|
108
|
+
from pkg_resources import resource_filename
|
109
|
+
defaults_dir = Path(resource_filename('local_deep_research', 'defaults'))
|
110
|
+
|
111
|
+
# Create settings.toml if it doesn't exist
|
112
|
+
settings_file = CONFIG_DIR / "settings.toml"
|
113
|
+
if not settings_file.exists():
|
114
|
+
shutil.copy(defaults_dir / "main.toml", settings_file)
|
115
|
+
logger.info(f"Created settings.toml at {settings_file}")
|
116
|
+
|
117
|
+
# Create llm_config.py if it doesn't exist
|
118
|
+
llm_config_file = CONFIG_DIR / "llm_config.py"
|
119
|
+
if not llm_config_file.exists():
|
120
|
+
shutil.copy(defaults_dir / "llm_config.py", llm_config_file)
|
121
|
+
logger.info(f"Created llm_config.py at {llm_config_file}")
|
122
|
+
|
123
|
+
# Create local_collections.toml if it doesn't exist
|
124
|
+
collections_file = CONFIG_DIR / "local_collections.toml"
|
125
|
+
if not collections_file.exists():
|
126
|
+
shutil.copy(defaults_dir / "local_collections.toml", collections_file)
|
127
|
+
logger.info(f"Created local_collections.toml at {collections_file}")
|
128
|
+
|
129
|
+
# Create search_engines.toml if it doesn't exist
|
130
|
+
search_engines_file = CONFIG_DIR / "search_engines.toml"
|
131
|
+
if not search_engines_file.exists():
|
132
|
+
shutil.copy(defaults_dir / "search_engines.toml", search_engines_file)
|
133
|
+
logger.info(f"Created search_engines.toml at {search_engines_file}")
|
134
|
+
|
135
|
+
secrets_file = CONFIG_DIR / ".secrets.toml"
|
136
|
+
if not secrets_file.exists():
|
137
|
+
with open(secrets_file, "w") as f:
|
138
|
+
f.write("""
|
139
|
+
# ANTHROPIC_API_KEY = "your-api-key-here"
|
140
|
+
# OPENAI_API_KEY = "your-openai-key-here"
|
141
|
+
# GOOGLE_API_KEY = "your-google-key-here"
|
142
|
+
# SERP_API_KEY = "your-api-key-here"
|
143
|
+
# GUARDIAN_API_KEY = "your-api-key-here"
|
144
|
+
# GOOGLE_PSE_API_KEY = "your-google-api-key-here"
|
145
|
+
# GOOGLE_PSE_ENGINE_ID = "your-programmable-search-engine-id-here"
|
146
|
+
""")
|
147
|
+
|
148
|
+
# Initialize config files on import
|
149
|
+
init_config_files()
|
150
|
+
|
151
|
+
# Use an absolute path to your .secrets.toml for testing
|
152
|
+
secrets_file = Path(SECRETS_FILE)
|
153
|
+
|
154
|
+
settings = Dynaconf(
|
155
|
+
settings_files=[
|
156
|
+
str(SETTINGS_FILE),
|
157
|
+
str(LOCAL_COLLECTIONS_FILE),
|
158
|
+
str(SEARCH_ENGINES_FILE),
|
159
|
+
],
|
160
|
+
secrets=str(SECRETS_FILE),
|
161
|
+
env_prefix="LDR",
|
162
|
+
load_dotenv=True,
|
163
|
+
envvar_prefix="LDR",
|
164
|
+
env_file=str(CONFIG_DIR / ".env"),
|
165
|
+
)
|
166
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
"""
|
2
|
+
Default configuration module for Local Deep Research.
|
3
|
+
|
4
|
+
This module is responsible for loading and initializing default
|
5
|
+
configuration files and resources used throughout the application.
|
6
|
+
"""
|
7
|
+
import os
|
8
|
+
import logging
|
9
|
+
from pathlib import Path
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
# Define the path to the package's defaults directory
|
14
|
+
DEFAULTS_DIR = Path(__file__).parent
|
15
|
+
|
16
|
+
# Default files available in this package
|
17
|
+
DEFAULT_FILES = {
|
18
|
+
"main.toml": DEFAULTS_DIR / "main.toml",
|
19
|
+
"local_collections.toml": DEFAULTS_DIR / "local_collections.toml",
|
20
|
+
"search_engines.toml": DEFAULTS_DIR / "search_engines.toml",
|
21
|
+
"llm_config.py": DEFAULTS_DIR / "llm_config.py"
|
22
|
+
}
|
23
|
+
|
24
|
+
def get_default_file_path(filename):
|
25
|
+
"""Get the path to a default configuration file."""
|
26
|
+
if filename in DEFAULT_FILES:
|
27
|
+
return DEFAULT_FILES[filename]
|
28
|
+
return None
|
29
|
+
|
30
|
+
def list_default_files():
|
31
|
+
"""List all available default configuration files."""
|
32
|
+
return list(DEFAULT_FILES.keys())
|
33
|
+
|
34
|
+
def ensure_defaults_exist():
|
35
|
+
"""Verify that all expected default files exist in the package."""
|
36
|
+
missing = []
|
37
|
+
for filename, filepath in DEFAULT_FILES.items():
|
38
|
+
if not filepath.exists():
|
39
|
+
missing.append(filename)
|
40
|
+
|
41
|
+
if missing:
|
42
|
+
logger.warning(f"The following default files are missing from the package: {', '.join(missing)}")
|
43
|
+
return False
|
44
|
+
return True
|
@@ -0,0 +1,269 @@
|
|
1
|
+
"""
|
2
|
+
LLM configuration for Local Deep Research.
|
3
|
+
|
4
|
+
This file controls which language models are available and how they're configured.
|
5
|
+
You can customize model selection, parameters, and fallbacks here.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from langchain_anthropic import ChatAnthropic
|
9
|
+
from langchain_openai import ChatOpenAI
|
10
|
+
from langchain_ollama import ChatOllama
|
11
|
+
from langchain_community.llms import VLLM
|
12
|
+
from local_deep_research.config import settings
|
13
|
+
import os
|
14
|
+
import logging
|
15
|
+
from enum import Enum, auto
|
16
|
+
|
17
|
+
# Initialize environment
|
18
|
+
logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
# Provider enum
|
21
|
+
class ModelProvider(Enum):
|
22
|
+
OLLAMA = auto()
|
23
|
+
OPENAI = auto()
|
24
|
+
ANTHROPIC = auto()
|
25
|
+
VLLM = auto()
|
26
|
+
OPENAI_ENDPOINT = auto()
|
27
|
+
NONE = auto()
|
28
|
+
|
29
|
+
# ================================
|
30
|
+
# USER CONFIGURATION SECTION
|
31
|
+
# ================================
|
32
|
+
|
33
|
+
# Set your preferred model provider here
|
34
|
+
DEFAULT_PROVIDER = ModelProvider.OLLAMA # Change this to your preferred provider
|
35
|
+
|
36
|
+
# Set your default model name here
|
37
|
+
DEFAULT_MODEL = "mistral" # Your default model
|
38
|
+
|
39
|
+
# Set default model parameters
|
40
|
+
DEFAULT_TEMPERATURE = 0.7
|
41
|
+
MAX_TOKENS = 30000
|
42
|
+
|
43
|
+
# Server URLs
|
44
|
+
OPENAI_ENDPOINT_URL = "https://openrouter.ai/api/v1" # For OpenRouter or compatible services
|
45
|
+
OLLAMA_BASE_URL = "http://localhost:11434" # URL for Ollama server
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
# ================================
|
51
|
+
# LLM FUNCTIONS
|
52
|
+
# ================================
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
def get_llm(model_name=None, temperature=None, provider=None):
|
59
|
+
"""
|
60
|
+
Get LLM instance based on model name and provider.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
model_name: Name of the model to use (if None, uses DEFAULT_MODEL)
|
64
|
+
temperature: Model temperature (if None, uses DEFAULT_TEMPERATURE)
|
65
|
+
provider: Provider to use (if None, uses DEFAULT_PROVIDER)
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
A LangChain LLM instance
|
69
|
+
"""
|
70
|
+
if model_name is None:
|
71
|
+
model_name = DEFAULT_MODEL
|
72
|
+
|
73
|
+
if temperature is None:
|
74
|
+
temperature = DEFAULT_TEMPERATURE
|
75
|
+
|
76
|
+
if provider is None:
|
77
|
+
provider = DEFAULT_PROVIDER
|
78
|
+
|
79
|
+
# Common parameters for all models
|
80
|
+
common_params = {
|
81
|
+
"temperature": temperature,
|
82
|
+
"max_tokens": MAX_TOKENS,
|
83
|
+
}
|
84
|
+
|
85
|
+
# Handle different providers
|
86
|
+
if provider == ModelProvider.ANTHROPIC:
|
87
|
+
api_key = settings.get('ANTHROPIC_API_KEY', '')
|
88
|
+
if not api_key:
|
89
|
+
logger.warning("ANTHROPIC_API_KEY not found. Falling back to default model.")
|
90
|
+
return get_fallback_model(temperature)
|
91
|
+
|
92
|
+
return ChatAnthropic(
|
93
|
+
model=model_name, anthropic_api_key=api_key, **common_params
|
94
|
+
)
|
95
|
+
|
96
|
+
elif provider == ModelProvider.OPENAI:
|
97
|
+
api_key = settings.get('OPENAI_API_KEY', '')
|
98
|
+
if not api_key:
|
99
|
+
logger.warning("OPENAI_API_KEY not found. Falling back to default model.")
|
100
|
+
return get_fallback_model(temperature)
|
101
|
+
|
102
|
+
return ChatOpenAI(model=model_name, api_key=api_key, **common_params)
|
103
|
+
|
104
|
+
elif provider == ModelProvider.OPENAI_ENDPOINT:
|
105
|
+
api_key = settings.OPENAI_ENDPOINT_API_KEY
|
106
|
+
|
107
|
+
if not api_key:
|
108
|
+
logger.warning("OPENAI_ENDPOINT_API_KEY not found. Falling back to default model.")
|
109
|
+
return get_fallback_model(temperature)
|
110
|
+
|
111
|
+
return ChatOpenAI(
|
112
|
+
model=model_name,
|
113
|
+
api_key=api_key,
|
114
|
+
openai_api_base=OPENAI_ENDPOINT_URL,
|
115
|
+
**common_params
|
116
|
+
)
|
117
|
+
|
118
|
+
elif provider == ModelProvider.VLLM:
|
119
|
+
try:
|
120
|
+
return VLLM(
|
121
|
+
model=model_name,
|
122
|
+
trust_remote_code=True,
|
123
|
+
max_new_tokens=128,
|
124
|
+
top_k=10,
|
125
|
+
top_p=0.95,
|
126
|
+
temperature=temperature,
|
127
|
+
)
|
128
|
+
except Exception as e:
|
129
|
+
logger.error(f"Error loading VLLM model: {e}")
|
130
|
+
logger.warning("Falling back.")
|
131
|
+
return get_fallback_model(temperature)
|
132
|
+
|
133
|
+
elif provider == ModelProvider.OLLAMA:
|
134
|
+
try:
|
135
|
+
# Use the configurable Ollama base URL
|
136
|
+
base_url = settings.get('OLLAMA_BASE_URL', OLLAMA_BASE_URL)
|
137
|
+
return ChatOllama(model=model_name, base_url=base_url, **common_params)
|
138
|
+
except Exception as e:
|
139
|
+
logger.error(f"Error loading Ollama model: {e}")
|
140
|
+
return get_fallback_model(temperature)
|
141
|
+
|
142
|
+
else:
|
143
|
+
return get_fallback_model(temperature)
|
144
|
+
|
145
|
+
def get_fallback_model(temperature=DEFAULT_TEMPERATURE):
|
146
|
+
"""Create a dummy model for when no providers are available"""
|
147
|
+
from langchain_community.llms.fake import FakeListLLM
|
148
|
+
return FakeListLLM(
|
149
|
+
responses=["No language models are available. Please install Ollama or set up API keys."]
|
150
|
+
)
|
151
|
+
|
152
|
+
# ================================
|
153
|
+
# COMPATIBILITY FUNCTIONS
|
154
|
+
# ================================
|
155
|
+
|
156
|
+
def get_available_provider_types():
|
157
|
+
"""Return available model providers"""
|
158
|
+
providers = {}
|
159
|
+
|
160
|
+
if is_ollama_available():
|
161
|
+
providers["ollama"] = "Ollama (local models)"
|
162
|
+
|
163
|
+
if is_openai_available():
|
164
|
+
providers["openai"] = "OpenAI API"
|
165
|
+
|
166
|
+
if is_anthropic_available():
|
167
|
+
providers["anthropic"] = "Anthropic API"
|
168
|
+
|
169
|
+
if is_openai_endpoint_available():
|
170
|
+
providers["openai_endpoint"] = "OpenAI-compatible Endpoint"
|
171
|
+
|
172
|
+
# Check for VLLM capability
|
173
|
+
try:
|
174
|
+
import torch
|
175
|
+
import transformers
|
176
|
+
providers["vllm"] = "VLLM (local models)"
|
177
|
+
except ImportError:
|
178
|
+
pass
|
179
|
+
|
180
|
+
# Default fallback
|
181
|
+
if not providers:
|
182
|
+
providers["none"] = "No model providers available"
|
183
|
+
|
184
|
+
return providers
|
185
|
+
|
186
|
+
|
187
|
+
|
188
|
+
|
189
|
+
# ================================
|
190
|
+
# HELPER FUNCTIONS
|
191
|
+
# ================================
|
192
|
+
|
193
|
+
def is_openai_available():
|
194
|
+
"""Check if OpenAI is available"""
|
195
|
+
try:
|
196
|
+
api_key = settings.api_keys.get('OPENAI_API_KEY', '')
|
197
|
+
return bool(api_key)
|
198
|
+
except:
|
199
|
+
return False
|
200
|
+
|
201
|
+
def is_anthropic_available():
|
202
|
+
"""Check if Anthropic is available"""
|
203
|
+
try:
|
204
|
+
api_key = settings.api_keys.get('ANTHROPIC_API_KEY', '')
|
205
|
+
return bool(api_key)
|
206
|
+
except:
|
207
|
+
return False
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
def is_openai_endpoint_available():
|
212
|
+
"""Check if OpenAI endpoint is available"""
|
213
|
+
print(os.getenv("OPENAI_ENDPOINT_API_KEY"))
|
214
|
+
try:
|
215
|
+
api_key = settings.OPENAI_ENDPOINT_API_KEY
|
216
|
+
return bool(api_key)
|
217
|
+
except:
|
218
|
+
return False
|
219
|
+
|
220
|
+
def is_ollama_available():
|
221
|
+
"""Check if Ollama is running"""
|
222
|
+
try:
|
223
|
+
import requests
|
224
|
+
base_url = settings.get('OLLAMA_BASE_URL', OLLAMA_BASE_URL)
|
225
|
+
response = requests.get(f"{base_url}/api/tags", timeout=1.0)
|
226
|
+
return response.status_code == 200
|
227
|
+
except:
|
228
|
+
return False
|
229
|
+
|
230
|
+
def is_vllm_available():
|
231
|
+
"""Check if VLLM capability is available"""
|
232
|
+
try:
|
233
|
+
import torch
|
234
|
+
import transformers
|
235
|
+
return True
|
236
|
+
except ImportError:
|
237
|
+
return False
|
238
|
+
|
239
|
+
def get_available_providers():
|
240
|
+
"""Get dictionary of available providers"""
|
241
|
+
providers = {}
|
242
|
+
|
243
|
+
if is_ollama_available():
|
244
|
+
providers[ModelProvider.OLLAMA] = "Ollama (local models)"
|
245
|
+
|
246
|
+
if is_openai_available():
|
247
|
+
providers[ModelProvider.OPENAI] = "OpenAI API"
|
248
|
+
|
249
|
+
if is_anthropic_available():
|
250
|
+
providers[ModelProvider.ANTHROPIC] = "Anthropic API"
|
251
|
+
|
252
|
+
if is_openai_endpoint_available():
|
253
|
+
providers[ModelProvider.OPENAI_ENDPOINT] = "OpenAI-compatible Endpoint"
|
254
|
+
|
255
|
+
if is_vllm_available():
|
256
|
+
providers[ModelProvider.VLLM] = "VLLM (local models)"
|
257
|
+
|
258
|
+
if not providers:
|
259
|
+
providers[ModelProvider.NONE] = "No model providers available"
|
260
|
+
|
261
|
+
return providers
|
262
|
+
|
263
|
+
# Log which providers are available
|
264
|
+
AVAILABLE_PROVIDERS = get_available_providers()
|
265
|
+
logger.info(f"Available providers: {[p.name for p in AVAILABLE_PROVIDERS.keys()]}")
|
266
|
+
|
267
|
+
# Check if selected provider is available
|
268
|
+
if DEFAULT_PROVIDER not in AVAILABLE_PROVIDERS and DEFAULT_PROVIDER != ModelProvider.NONE:
|
269
|
+
logger.warning(f"Selected provider {DEFAULT_PROVIDER.name} is not available.")
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# Default local document collections configuration
|
2
|
+
# Each collection functions as an independent search engine
|
3
|
+
|
4
|
+
# Project Documents Collection
|
5
|
+
[project_docs]
|
6
|
+
name = "Project Documents"
|
7
|
+
description = "Project documentation and specifications"
|
8
|
+
paths = ["@format ${DOCS_DIR}/project_documents"]
|
9
|
+
enabled = true
|
10
|
+
embedding_model = "all-MiniLM-L6-v2"
|
11
|
+
embedding_device = "cpu"
|
12
|
+
embedding_model_type = "sentence_transformers"
|
13
|
+
max_results = 20
|
14
|
+
max_filtered_results = 5
|
15
|
+
chunk_size = 1000
|
16
|
+
chunk_overlap = 200
|
17
|
+
cache_dir = "__CACHE_DIR__/local_search/project_docs"
|
18
|
+
|
19
|
+
# Research Papers Collection
|
20
|
+
[research_papers]
|
21
|
+
name = "Research Papers"
|
22
|
+
description = "Academic research papers and articles"
|
23
|
+
paths = ["@format ${DOCS_DIR}/research_papers"]
|
24
|
+
enabled = true
|
25
|
+
embedding_model = "all-MiniLM-L6-v2"
|
26
|
+
embedding_device = "cpu"
|
27
|
+
embedding_model_type = "sentence_transformers"
|
28
|
+
max_results = 20
|
29
|
+
max_filtered_results = 5
|
30
|
+
chunk_size = 800
|
31
|
+
chunk_overlap = 150
|
32
|
+
cache_dir = "__CACHE_DIR__/local_search/research_papers"
|
33
|
+
|
34
|
+
# Personal Notes Collection
|
35
|
+
[personal_notes]
|
36
|
+
name = "Personal Notes"
|
37
|
+
description = "Personal notes and documents"
|
38
|
+
paths = ["@format ${DOCS_DIR}/personal_notes"]
|
39
|
+
enabled = true
|
40
|
+
embedding_model = "all-MiniLM-L6-v2"
|
41
|
+
embedding_device = "cpu"
|
42
|
+
embedding_model_type = "sentence_transformers"
|
43
|
+
max_results = 30
|
44
|
+
max_filtered_results = 10
|
45
|
+
chunk_size = 500
|
46
|
+
chunk_overlap = 100
|
47
|
+
cache_dir = "__CACHE_DIR__/local_search/personal_notes"
|