PyPI - local-deep-research - Versions diffs - 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

local_deep_research/config/llm_config.py ADDED Viewed

@@ -0,0 +1,472 @@
+import logging
+import os
+from pathlib import Path
+from dynaconf.vendor.box.exceptions import BoxKeyError
+from langchain_anthropic import ChatAnthropic
+from langchain_community.llms import VLLM
+from langchain_ollama import ChatOllama
+from langchain_openai import ChatOpenAI
+from ..utilities.db_utils import get_db_setting
+from ..utilities.search_utilities import remove_think_tags
+from .config_files import CONFIG_DIR, settings
+# Setup logging
+logger = logging.getLogger(__name__)
+# Valid provider options
+VALID_PROVIDERS = [
+    "ollama",
+    "openai",
+    "anthropic",
+    "vllm",
+    "openai_endpoint",
+    "lmstudio",
+    "llamacpp",
+    "none",
+]
+SECRETS_FILE = CONFIG_DIR / ".secrets.toml"
+def get_llm(model_name=None, temperature=None, provider=None, openai_endpoint_url=None):
+    """
+    Get LLM instance based on model name and provider.
+    Args:
+        model_name: Name of the model to use (if None, uses database setting)
+        temperature: Model temperature (if None, uses database setting)
+        provider: Provider to use (if None, uses database setting)
+        openai_endpoint_url: Custom endpoint URL to use (if None, uses database
+            setting)
+    Returns:
+        A LangChain LLM instance with automatic think-tag removal
+    """
+    # Use database values for parameters if not provided
+    if model_name is None:
+        model_name = get_db_setting("llm.model", settings.llm.model)
+    if temperature is None:
+        temperature = get_db_setting("llm.temperature", settings.llm.temperature)
+    if provider is None:
+        provider = get_db_setting("llm.provider", settings.llm.provider)
+    # Clean model name: remove quotes and extra whitespace
+    if model_name:
+        model_name = model_name.strip().strip("\"'").strip()
+    # Clean provider: remove quotes and extra whitespace
+    if provider:
+        provider = provider.strip().strip("\"'").strip()
+    # Normalize provider: convert to lowercase
+    provider = provider.lower() if provider else None
+    # Validate provider
+    if provider not in VALID_PROVIDERS:
+        logger.error(f"Invalid provider in settings: {provider}")
+        raise ValueError(
+            f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}"
+        )
+    print(
+        f"Getting LLM with model: {model_name}, temperature: {temperature}, provider: {provider}"
+    )
+    # Common parameters for all models
+    common_params = {
+        "temperature": temperature,
+    }
+    try:
+        common_params["max_tokens"] = settings.llm.max_tokens
+    except BoxKeyError:
+        # Some providers don't support this parameter, in which case it can
+        # be omitted.
+        pass
+    # Handle different providers
+    if provider == "anthropic":
+        api_key_name = "ANTHROPIC_API_KEY"
+        api_key = settings.get(api_key_name, "")
+        if not api_key:
+            api_key = os.getenv(api_key_name)
+        if not api_key:
+            api_key = os.getenv("LDR_" + api_key_name)
+        if not api_key:
+            logger.warning(
+                "ANTHROPIC_API_KEY not found. Falling back to default model."
+            )
+            return get_fallback_model(temperature)
+        llm = ChatAnthropic(
+            model=model_name, anthropic_api_key=api_key, **common_params
+        )
+        return wrap_llm_without_think_tags(llm)
+    elif provider == "openai":
+        api_key_name = "OPENAI_API_KEY"
+        api_key = settings.get(api_key_name, "")
+        if not api_key:
+            api_key = os.getenv(api_key_name)
+        if not api_key:
+            api_key = os.getenv("LDR_" + api_key_name)
+        if not api_key:
+            logger.warning("OPENAI_API_KEY not found. Falling back to default model.")
+            return get_fallback_model(temperature)
+        llm = ChatOpenAI(model=model_name, api_key=api_key, **common_params)
+        return wrap_llm_without_think_tags(llm)
+    elif provider == "openai_endpoint":
+        api_key_name = "OPENAI_ENDPOINT_API_KEY"
+        api_key = settings.get(api_key_name, "")
+        if not api_key:
+            api_key = os.getenv(api_key_name)
+        if not api_key:
+            api_key = os.getenv("LDR_" + api_key_name)
+        if not api_key:
+            logger.warning(
+                "OPENAI_ENDPOINT_API_KEY not found. Falling back to default model."
+            )
+            return get_fallback_model(temperature)
+        # Get endpoint URL from settings
+        if openai_endpoint_url is not None:
+            openai_endpoint_url = get_db_setting(
+                "llm.openai_endpoint_url", settings.llm.openai_endpoint_url
+            )
+        llm = ChatOpenAI(
+            model=model_name,
+            api_key=api_key,
+            openai_api_base=openai_endpoint_url,
+            **common_params,
+        )
+        return wrap_llm_without_think_tags(llm)
+    elif provider == "vllm":
+        try:
+            llm = VLLM(
+                model=model_name,
+                trust_remote_code=True,
+                max_new_tokens=128,
+                top_k=10,
+                top_p=0.95,
+                temperature=temperature,
+            )
+            return wrap_llm_without_think_tags(llm)
+        except Exception as e:
+            logger.error(f"Error loading VLLM model: {e}")
+            logger.warning("Falling back.")
+            return get_fallback_model(temperature)
+    elif provider == "ollama":
+        try:
+            # Use the configurable Ollama base URL
+            base_url = os.getenv(
+                "OLLAMA_BASE_URL",
+                settings.llm.get("ollama_base_url", "http://localhost:11434"),
+            )
+            # Check if Ollama is available before trying to use it
+            if not is_ollama_available():
+                logger.error(
+                    f"Ollama not available at {base_url}. Falling back to dummy model."
+                )
+                return get_fallback_model(temperature)
+            # Check if the requested model exists
+            import requests
+            try:
+                logger.info(f"Checking if model '{model_name}' exists in Ollama")
+                response = requests.get(f"{base_url}/api/tags", timeout=3.0)
+                if response.status_code == 200:
+                    # Handle both newer and older Ollama API formats
+                    data = response.json()
+                    models = []
+                    if "models" in data:
+                        # Newer Ollama API
+                        models = data.get("models", [])
+                    else:
+                        # Older Ollama API format
+                        models = data
+                    # Get list of model names
+                    model_names = [m.get("name", "").lower() for m in models]
+                    logger.info(
+                        f"Available Ollama models: {', '.join(model_names[:5])}{' and more' if len(model_names) > 5 else ''}"
+                    )
+                    if model_name.lower() not in model_names:
+                        logger.error(
+                            f"Model '{model_name}' not found in Ollama. Available models: {', '.join(model_names[:5])}"
+                        )
+                        return get_fallback_model(temperature)
+            except Exception as model_check_error:
+                logger.error(
+                    f"Error checking for model '{model_name}' in Ollama: {str(model_check_error)}"
+                )
+                # Continue anyway, let ChatOllama handle potential errors
+            logger.info(
+                f"Creating ChatOllama with model={model_name}, base_url={base_url}"
+            )
+            try:
+                llm = ChatOllama(model=model_name, base_url=base_url, **common_params)
+                # Test invoke to validate model works
+                logger.info("Testing Ollama model with simple invocation")
+                test_result = llm.invoke("Hello")
+                logger.info(
+                    f"Ollama test successful. Response type: {type(test_result)}"
+                )
+                return wrap_llm_without_think_tags(llm)
+            except Exception as chat_error:
+                logger.error(f"Error creating or testing ChatOllama: {str(chat_error)}")
+                return get_fallback_model(temperature)
+        except Exception as e:
+            logger.error(f"Error in Ollama provider section: {str(e)}")
+            return get_fallback_model(temperature)
+    elif provider == "lmstudio":
+        # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly
+        lmstudio_url = settings.llm.get("lmstudio_url", "http://localhost:1234")
+        lmstudio_url = get_db_setting("llm.lmstudio_url", lmstudio_url)
+        llm = ChatOpenAI(
+            model=model_name,
+            api_key="lm-studio",  # LM Studio doesn't require a real API key
+            base_url=f"{lmstudio_url}/v1",  # Use the configured URL with /v1 endpoint
+            temperature=temperature,
+            max_tokens=get_db_setting("llm.max_tokens", settings.llm.max_tokens),
+        )
+        return wrap_llm_without_think_tags(llm)
+    elif provider == "llamacpp":
+        # Import LlamaCpp
+        from langchain_community.llms import LlamaCpp
+        # Get LlamaCpp model path from settings
+        model_path = settings.llm.get("llamacpp_model_path", "")
+        model_path = get_db_setting("llm.llamacpp_model_path", model_path)
+        if not model_path:
+            logger.error("llamacpp_model_path not set in settings")
+            raise ValueError("llamacpp_model_path not set in settings")
+        # Get additional LlamaCpp parameters
+        n_gpu_layers = settings.llm.get("llamacpp_n_gpu_layers", 1)
+        n_gpu_layers = get_db_setting("llm.llamacpp_n_gpu_layers", n_gpu_layers)
+        n_batch = settings.llm.get("llamacpp_n_batch", 512)
+        n_batch = get_db_setting("llm.llamacpp_n_batch", n_batch)
+        f16_kv = settings.llm.get("llamacpp_f16_kv", True)
+        f16_kv = get_db_setting("llm.llamacpp_f16_kv", f16_kv)
+        # Create LlamaCpp instance
+        llm = LlamaCpp(
+            model_path=model_path,
+            temperature=temperature,
+            max_tokens=get_db_setting("llm.max_tokens", settings.llm.max_tokens),
+            n_gpu_layers=n_gpu_layers,
+            n_batch=n_batch,
+            f16_kv=f16_kv,
+            verbose=True,
+        )
+        return wrap_llm_without_think_tags(llm)
+    else:
+        return wrap_llm_without_think_tags(get_fallback_model(temperature))
+def get_fallback_model(temperature=None):
+    """Create a dummy model for when no providers are available"""
+    from langchain_community.llms.fake import FakeListLLM
+    return FakeListLLM(
+        responses=[
+            "No language models are available. Please install Ollama or set up API keys."
+        ]
+    )
+def wrap_llm_without_think_tags(llm):
+    """Create a wrapper class that processes LLM outputs with remove_think_tags"""
+    class ProcessingLLMWrapper:
+        def __init__(self, base_llm):
+            self.base_llm = base_llm
+        def invoke(self, *args, **kwargs):
+            response = self.base_llm.invoke(*args, **kwargs)
+            # Process the response content if it has a content attribute
+            if hasattr(response, "content"):
+                response.content = remove_think_tags(response.content)
+            elif isinstance(response, str):
+                response = remove_think_tags(response)
+            return response
+        # Pass through any other attributes to the base LLM
+        def __getattr__(self, name):
+            return getattr(self.base_llm, name)
+    return ProcessingLLMWrapper(llm)
+def get_available_provider_types():
+    """Return available model providers"""
+    providers = {}
+    if is_ollama_available():
+        providers["ollama"] = "Ollama (local models)"
+    if is_openai_available():
+        providers["openai"] = "OpenAI API"
+    if is_anthropic_available():
+        providers["anthropic"] = "Anthropic API"
+    if is_openai_endpoint_available():
+        providers["openai_endpoint"] = "OpenAI-compatible Endpoint"
+    if is_lmstudio_available():
+        providers["lmstudio"] = "LM Studio (local models)"
+    if is_llamacpp_available():
+        providers["llamacpp"] = "LlamaCpp (local models)"
+    # Check for VLLM capability
+    try:
+        import torch  # noqa: F401
+        import transformers  # noqa: F401
+        providers["vllm"] = "VLLM (local models)"
+    except ImportError:
+        pass
+    # Default fallback
+    if not providers:
+        providers["none"] = "No model providers available"
+    return providers
+def is_openai_available():
+    """Check if OpenAI is available"""
+    try:
+        api_key = settings.get("OPENAI_API_KEY", "")
+        if not api_key:
+            api_key = os.getenv("OPENAI_API_KEY")
+        return bool(api_key)
+    except Exception:
+        return False
+def is_anthropic_available():
+    """Check if Anthropic is available"""
+    try:
+        api_key = settings.get("ANTHROPIC_API_KEY", "")
+        if not api_key:
+            api_key = os.getenv("ANTHROPIC_API_KEY")
+        return bool(api_key)
+    except Exception:
+        return False
+def is_openai_endpoint_available():
+    """Check if OpenAI endpoint is available"""
+    try:
+        api_key = settings.get("OPENAI_ENDPOINT_API_KEY", "")
+        if not api_key:
+            api_key = os.getenv("OPENAI_ENDPOINT_API_KEY")
+        return bool(api_key)
+    except Exception:
+        return False
+def is_ollama_available():
+    """Check if Ollama is running"""
+    try:
+        import requests
+        base_url = os.getenv(
+            "OLLAMA_BASE_URL",
+            settings.llm.get("ollama_base_url", "http://localhost:11434"),
+        )
+        logger.info(f"Checking Ollama availability at {base_url}/api/tags")
+        try:
+            response = requests.get(f"{base_url}/api/tags", timeout=3.0)
+            if response.status_code == 200:
+                logger.info(f"Ollama is available. Status code: {response.status_code}")
+                # Log first 100 chars of response to debug
+                logger.info(f"Response preview: {str(response.text)[:100]}")
+                return True
+            else:
+                logger.warning(
+                    f"Ollama API returned status code: {response.status_code}"
+                )
+                return False
+        except requests.exceptions.RequestException as req_error:
+            logger.error(f"Request error when checking Ollama: {str(req_error)}")
+            return False
+        except Exception as e:
+            logger.error(f"Unexpected error when checking Ollama: {str(e)}")
+            return False
+    except Exception as outer_e:
+        logger.error(f"Error in is_ollama_available: {str(outer_e)}")
+        return False
+def is_vllm_available():
+    """Check if VLLM capability is available"""
+    try:
+        import torch  # noqa: F401
+        import transformers  # noqa: F401
+        return True
+    except ImportError:
+        return False
+def is_lmstudio_available():
+    """Check if LM Studio is available"""
+    try:
+        import requests
+        lmstudio_url = settings.llm.get("lmstudio_url", "http://localhost:1234")
+        lmstudio_url = get_db_setting("llm.lmstudio_url", lmstudio_url)
+        # LM Studio typically uses OpenAI-compatible endpoints
+        response = requests.get(f"{lmstudio_url}/v1/models", timeout=1.0)
+        return response.status_code == 200
+    except Exception:
+        return False
+def is_llamacpp_available():
+    """Check if LlamaCpp is available and configured"""
+    try:
+        from langchain_community.llms import LlamaCpp  # noqa: F401
+        model_path = settings.llm.get("llamacpp_model_path", "")
+        model_path = get_db_setting("llm.llamacpp_model_path", model_path)
+        return bool(model_path) and os.path.exists(model_path)
+    except Exception:
+        return False
+def get_available_providers():
+    """Get dictionary of available providers"""
+    return get_available_provider_types()
+secrets_file = Path(SECRETS_FILE)
+AVAILABLE_PROVIDERS = get_available_providers()
+selected_provider = get_db_setting("llm.provider", settings.llm.provider).lower()
+# Log which providers are available
+logger.info(f"Available providers: {list(AVAILABLE_PROVIDERS.keys())}")
+# Check if selected provider is available
+if selected_provider not in AVAILABLE_PROVIDERS and selected_provider != "none":
+    logger.warning(f"Selected provider {selected_provider} is not available.")

local_deep_research/config/search_config.py ADDED Viewed

@@ -0,0 +1,77 @@
+# local_deep_research/config.py
+import logging
+from ..utilities.db_utils import get_db_setting
+from ..web_search_engines.search_engine_factory import get_search as factory_get_search
+from .config_files import settings
+from .llm_config import get_llm
+# Setup logging
+logger = logging.getLogger(__name__)
+# Whether to check the quality search results using the LLM.
+QUALITY_CHECK_DDG_URLS = True
+# Whether to only retrieve snippets instead of full search results.
+SEARCH_SNIPPETS_ONLY = get_db_setting(
+    "search.snippets_only", settings.search.snippets_only
+)
+# Expose get_search function
+def get_search(search_tool=None, llm_instance=None):
+    """
+    Helper function to get search engine
+    Args:
+        search_tool: Override the search tool setting (e.g. searxng, wikipedia)
+        llm_instance: Override the LLM instance
+    """
+    # Use specified tool or default from settings
+    tool = search_tool or get_db_setting("search.tool", settings.search.tool)
+    logger.info(f"Creating search engine with tool: {tool}")
+    # Get LLM instance (use provided or get fresh one)
+    llm = llm_instance or get_llm()
+    # Get search parameters
+    params = {
+        "search_tool": tool,
+        "llm_instance": llm,
+        "max_results": get_db_setting(
+            "search.max_results", settings.search.max_results
+        ),
+        "region": get_db_setting("search.region", settings.search.region),
+        "time_period": get_db_setting(
+            "search.time_period", settings.search.time_period
+        ),
+        "safe_search": get_db_setting(
+            "search.safe_search", settings.search.safe_search
+        ),
+        "search_snippets_only": SEARCH_SNIPPETS_ONLY,
+        "search_language": get_db_setting(
+            "search.search_language", settings.search.search_language
+        ),
+        "max_filtered_results": get_db_setting(
+            "search.max_filtered_results", settings.search.max_filtered_results
+        ),
+    }
+    # Log NULL parameters for debugging
+    logger.info(
+        f"Search config: tool={tool}, max_results={params['max_results']}, time_period={params['time_period']}"
+    )
+    # Create search engine
+    search_engine = factory_get_search(**params)
+    # Log the created engine type
+    if search_engine:
+        logger.info(
+            f"Successfully created search engine of type: {type(search_engine).__name__}"
+        )
+    else:
+        logger.error(f"Failed to create search engine for tool: {tool}")
+    return search_engine

local_deep_research/defaults/__init__.py CHANGED Viewed

@@ -4,8 +4,9 @@ Default configuration module for Local Deep Research.
 This module is responsible for loading and initializing default
 configuration files and resources used throughout the application.
 """
-import os
 import logging
+import os
 from pathlib import Path
 logger = logging.getLogger(__name__)
@@ -18,27 +19,31 @@ DEFAULT_FILES = {
     "main.toml": DEFAULTS_DIR / "main.toml",
     "local_collections.toml": DEFAULTS_DIR / "local_collections.toml",
     "search_engines.toml": DEFAULTS_DIR / "search_engines.toml",
-    "llm_config.py": DEFAULTS_DIR / "llm_config.py"
 }
 def get_default_file_path(filename):
     """Get the path to a default configuration file."""
     if filename in DEFAULT_FILES:
         return DEFAULT_FILES[filename]
     return None
 def list_default_files():
     """List all available default configuration files."""
     return list(DEFAULT_FILES.keys())
 def ensure_defaults_exist():
     """Verify that all expected default files exist in the package."""
     missing = []
     for filename, filepath in DEFAULT_FILES.items():
         if not filepath.exists():
             missing.append(filename)
     if missing:
-        logger.warning(f"The following default files are missing from the package: {', '.join(missing)}")
+        logger.warning(
+            f"The following default files are missing from the package: {', '.join(missing)}"
+        )
         return False
-    return True
+    return True

local_deep_research/defaults/main.toml CHANGED Viewed

@@ -38,7 +38,7 @@ enable_fact_checking = false
 [search]
-# Search tool to use (auto, wikipedia, arxiv, duckduckgo, serp, google_pse, etc.)
+# Search tool to use (auto, wikipedia, arxiv, duckduckgo, serpapi, google_pse,etc.)
 # "auto" intelligently selects based on query content (recommended)
 # "local_all" searches only local document collections
 tool = "auto"
@@ -71,7 +71,7 @@ safe_search = true
 search_language = "English"
 # Return only snippets, not full content (faster but less detailed)
-snippets_only = false
+snippets_only = true
 # Skip relevance filtering (return all results)
 skip_relevance_filter = false

local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl