PyPI - local-deep-research - Versions diffs - 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

local_deep_research/defaults/llm_config.py DELETED Viewed

@@ -1,338 +0,0 @@
-"""
-LLM configuration for Local Deep Research.
-This file controls which language models are available and how they're configured.
-You can customize model selection, parameters, and fallbacks here.
-"""
-from langchain_anthropic import ChatAnthropic
-from langchain_openai import ChatOpenAI
-from langchain_ollama import ChatOllama
-from langchain_community.llms import VLLM
-from local_deep_research.utilties.search_utilities import remove_think_tags
-from local_deep_research.config import settings
-import os
-import logging
-# Initialize environment
-logger = logging.getLogger(__name__)
-# Valid provider options
-VALID_PROVIDERS = ["ollama", "openai", "anthropic", "vllm", "openai_endpoint", "lmstudio", "llamacpp", "none"]
-# ================================
-# LLM FUNCTIONS
-# ================================
-def get_llm(model_name=None, temperature=None, provider=None):
-    """
-    Get LLM instance based on model name and provider.
-    Args:
-        model_name: Name of the model to use (if None, uses settings.llm.model)
-        temperature: Model temperature (if None, uses settings.llm.temperature)
-        provider: Provider to use (if None, uses settings.llm.provider)
-    Returns:
-        A LangChain LLM instance with automatic think-tag removal
-    """
-    # Use settings values for parameters if not provided
-    if model_name is None:
-        model_name = settings.llm.model
-    if temperature is None:
-        temperature = settings.llm.temperature
-    if provider is None:
-        provider = settings.llm.provider.lower()
-        if provider not in VALID_PROVIDERS:
-            logger.error(f"Invalid provider in settings: {provider}")
-            raise ValueError(f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}")
-    # Common parameters for all models
-    common_params = {
-        "temperature": temperature,
-        "max_tokens": settings.llm.max_tokens,
-    }
-    # Handle different providers
-    if provider == "anthropic":
-        api_key_name = 'ANTHROPIC_API_KEY'
-        api_key = settings.get(api_key_name, '')
-        if not api_key:
-            api_key = os.getenv(api_key_name)
-        if not api_key:
-            api_key = os.getenv("LDR_" + api_key_name)
-        if not api_key:
-            logger.warning("ANTHROPIC_API_KEY not found. Falling back to default model.")
-            return get_fallback_model(temperature)
-        llm = ChatAnthropic(
-            model=model_name, anthropic_api_key=api_key, **common_params
-        )
-        return wrap_llm_without_think_tags(llm)
-    elif provider == "openai":
-        api_key_name = 'OPENAI_API_KEY'
-        api_key = settings.get(api_key_name, '')
-        if not api_key:
-            api_key = os.getenv(api_key_name)
-        if not api_key:
-            api_key = os.getenv("LDR_" + api_key_name)
-        if not api_key:
-            logger.warning("OPENAI_API_KEY not found. Falling back to default model.")
-            return get_fallback_model(temperature)
-        llm = ChatOpenAI(model=model_name, api_key=api_key, **common_params)
-        return wrap_llm_without_think_tags(llm)
-    elif provider == "openai_endpoint":
-        api_key_name = 'OPENAI_ENDPOINT_API_KEY'
-        api_key = settings.get(api_key_name, '')
-        if not api_key:
-            api_key = os.getenv(api_key_name)
-        if not api_key:
-            api_key = os.getenv("LDR_" + api_key_name)
-        if not api_key:
-            logger.warning("OPENAI_ENDPOINT_API_KEY not found. Falling back to default model.")
-            return get_fallback_model(temperature)
-        # Get endpoint URL from settings
-        openai_endpoint_url = settings.llm.openai_endpoint_url
-        llm = ChatOpenAI(
-            model=model_name,
-            api_key=api_key,
-            openai_api_base=openai_endpoint_url,
-            **common_params
-        )
-        return wrap_llm_without_think_tags(llm)
-    elif provider == "vllm":
-        try:
-            llm = VLLM(
-                model=model_name,
-                trust_remote_code=True,
-                max_new_tokens=128,
-                top_k=10,
-                top_p=0.95,
-                temperature=temperature,
-            )
-            return wrap_llm_without_think_tags(llm)
-        except Exception as e:
-            logger.error(f"Error loading VLLM model: {e}")
-            logger.warning("Falling back.")
-            return get_fallback_model(temperature)
-    elif provider == "ollama":
-        try:
-            # Use the configurable Ollama base URL
-            base_url = settings.get('OLLAMA_BASE_URL', settings.llm.get('ollama_base_url', 'http://localhost:11434'))
-            llm = ChatOllama(model=model_name, base_url=base_url, **common_params)
-            return wrap_llm_without_think_tags(llm)
-        except Exception as e:
-            logger.error(f"Error loading Ollama model: {e}")
-            return get_fallback_model(temperature)
-    elif provider == "lmstudio":
-        # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly
-        lmstudio_url = settings.llm.get('lmstudio_url', "http://localhost:1234")
-        llm = ChatOpenAI(
-            model=model_name,
-            api_key="lm-studio",  # LM Studio doesn't require a real API key
-            base_url=f"{lmstudio_url}/v1",  # Use the configured URL with /v1 endpoint
-            temperature=temperature,
-            max_tokens=settings.llm.max_tokens
-        )
-        return wrap_llm_without_think_tags(llm)
-    elif provider == "llamacpp":
-        # Import LlamaCpp
-        from langchain_community.llms import LlamaCpp
-        # Get LlamaCpp model path from settings
-        model_path = settings.llm.get('llamacpp_model_path', "")
-        if not model_path:
-            logger.error("llamacpp_model_path not set in settings")
-            raise ValueError("llamacpp_model_path not set in settings.toml")
-        # Get additional LlamaCpp parameters
-        n_gpu_layers = settings.llm.get('llamacpp_n_gpu_layers', 1)
-        n_batch = settings.llm.get('llamacpp_n_batch', 512)
-        f16_kv = settings.llm.get('llamacpp_f16_kv', True)
-        # Create LlamaCpp instance
-        llm = LlamaCpp(
-            model_path=model_path,
-            temperature=temperature,
-            max_tokens=settings.llm.max_tokens,
-            n_gpu_layers=n_gpu_layers,
-            n_batch=n_batch,
-            f16_kv=f16_kv,
-            verbose=True
-        )
-        return wrap_llm_without_think_tags(llm)
-    else:
-        return wrap_llm_without_think_tags(get_fallback_model(temperature))
-def get_fallback_model(temperature=None):
-    """Create a dummy model for when no providers are available"""
-    from langchain_community.llms.fake import FakeListLLM
-    return FakeListLLM(
-        responses=["No language models are available. Please install Ollama or set up API keys."]
-    )
-# ================================
-# COMPATIBILITY FUNCTIONS
-# ================================
-def wrap_llm_without_think_tags(llm):
-    """Create a wrapper class that processes LLM outputs with remove_think_tags"""
-    class ProcessingLLMWrapper:
-        def __init__(self, base_llm):
-            self.base_llm = base_llm
-        def invoke(self, *args, **kwargs):
-            response = self.base_llm.invoke(*args, **kwargs)
-            # Process the response content if it has a content attribute
-            if hasattr(response, 'content'):
-                response.content = remove_think_tags(response.content)
-            elif isinstance(response, str):
-                response = remove_think_tags(response)
-            return response
-        # Pass through any other attributes to the base LLM
-        def __getattr__(self, name):
-            return getattr(self.base_llm, name)
-    return ProcessingLLMWrapper(llm)
-def get_available_provider_types():
-    """Return available model providers"""
-    providers = {}
-    if is_ollama_available():
-        providers["ollama"] = "Ollama (local models)"
-    if is_openai_available():
-        providers["openai"] = "OpenAI API"
-    if is_anthropic_available():
-        providers["anthropic"] = "Anthropic API"
-    if is_openai_endpoint_available():
-        providers["openai_endpoint"] = "OpenAI-compatible Endpoint"
-    if is_lmstudio_available():
-        providers["lmstudio"] = "LM Studio (local models)"
-    if is_llamacpp_available():
-        providers["llamacpp"] = "LlamaCpp (local models)"
-    # Check for VLLM capability
-    try:
-        import torch
-        import transformers
-        providers["vllm"] = "VLLM (local models)"
-    except ImportError:
-        pass
-    # Default fallback
-    if not providers:
-        providers["none"] = "No model providers available"
-    return providers
-# ================================
-# HELPER FUNCTIONS
-# ================================
-def is_openai_available():
-    """Check if OpenAI is available"""
-    try:
-        api_key = settings.get('OPENAI_API_KEY', '')
-        if not api_key:
-            api_key = os.getenv('OPENAI_API_KEY')
-        return bool(api_key)
-    except:
-        return False
-def is_anthropic_available():
-    """Check if Anthropic is available"""
-    try:
-        api_key = settings.get('ANTHROPIC_API_KEY', '')
-        if not api_key:
-            api_key = os.getenv('ANTHROPIC_API_KEY')
-        return bool(api_key)
-    except:
-        return False
-def is_openai_endpoint_available():
-    """Check if OpenAI endpoint is available"""
-    try:
-        api_key = settings.get('OPENAI_ENDPOINT_API_KEY', '')
-        if not api_key:
-            api_key = os.getenv('OPENAI_ENDPOINT_API_KEY')
-        return bool(api_key)
-    except:
-        return False
-def is_ollama_available():
-    """Check if Ollama is running"""
-    try:
-        import requests
-        base_url = settings.get('OLLAMA_BASE_URL', settings.llm.get('ollama_base_url', 'http://localhost:11434'))
-        response = requests.get(f"{base_url}/api/tags", timeout=1.0)
-        return response.status_code == 200
-    except:
-        return False
-def is_vllm_available():
-    """Check if VLLM capability is available"""
-    try:
-        import torch
-        import transformers
-        return True
-    except ImportError:
-        return False
-def is_lmstudio_available():
-    """Check if LM Studio is available"""
-    try:
-        import requests
-        lmstudio_url = settings.llm.get('lmstudio_url', 'http://localhost:1234')
-        # LM Studio typically uses OpenAI-compatible endpoints
-        response = requests.get(f"{lmstudio_url}/v1/models", timeout=1.0)
-        return response.status_code == 200
-    except:
-        return False
-def is_llamacpp_available():
-    """Check if LlamaCpp is available and configured"""
-    try:
-        from langchain_community.llms import LlamaCpp
-        model_path = settings.llm.get('llamacpp_model_path', '')
-        return bool(model_path) and os.path.exists(model_path)
-    except:
-        return False
-def get_available_providers():
-    """Get dictionary of available providers"""
-    return get_available_provider_types()
-# Log which providers are available
-AVAILABLE_PROVIDERS = get_available_providers()
-logger.info(f"Available providers: {list(AVAILABLE_PROVIDERS.keys())}")
-# Check if selected provider is available
-selected_provider = settings.llm.provider.lower()
-if selected_provider not in AVAILABLE_PROVIDERS and selected_provider != "none":
-    logger.warning(f"Selected provider {selected_provider} is not available.")

local_deep_research/utilties/search_utilities.py DELETED Viewed

@@ -1,114 +0,0 @@
-import re
-def remove_think_tags(text: str) -> str:
-    text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
-    return text
-def extract_links_from_search_results(search_results: list) -> list:
-    """
-    Extracts links and titles from a list of search result dictionaries.
-    Each dictionary is expected to have at least the keys "title" and "link".
-    Returns a list of dictionaries with 'title' and 'url' keys.
-    """
-    links = []
-    for result in search_results:
-        try:
-            title = result.get("title", "").strip()
-            url = result.get("link", "").strip()
-            index = result.get("index", "").strip()
-            if title and url:
-                links.append({"title": title, "url": url, "index": index})
-        except Exception:
-            continue
-    return links
-def format_links(links):
-    formatted_links =""
-    formatted_links += "SOURCES:\n"
-    for i, link in enumerate(links, 1):
-        formatted_links += f"{link['index']}. {link['title']}\n   URL: {link['url']}\n"
-    formatted_links += "\n"
-    return formatted_links
-def format_findings_to_text(findings_list, current_knowledge, questions_by_iteration):
-    formatted_text = "COMPLETE RESEARCH OUTPUT \n\n"
-    # Store the full current knowledge
-    formatted_text += f"{current_knowledge}\n\n"
-    formatted_text += "=" * 80 + "\n\n"
-    # Store questions by iteration
-    formatted_text += "SEARCH QUESTIONS BY ITERATION:\n"
-    for iter_num, questions in questions_by_iteration.items():
-        formatted_text += f"\nIteration {iter_num}:\n"
-        for i, q in enumerate(questions, 1):
-            formatted_text += f"{i}. {q}\n"
-    formatted_text += "\n" + "=" * 80 + "\n\n"
-    # Store detailed findings
-    formatted_text += "DETAILED FINDINGS:\n\n"
-    all_links = []  # To collect all sources
-    for finding in findings_list:
-        # Phase header
-        formatted_text += f"{'='*80}\n"
-        formatted_text += f"PHASE: {finding['phase']}\n"
-        formatted_text += f"{'='*80}\n\n"
-        # If this is a follow-up phase, show the corresponding question
-        if finding["phase"].startswith("Follow-up"):
-            iteration = int(finding["phase"].split(".")[0].split()[-1])
-            question_index = int(finding["phase"].split(".")[-1]) - 1
-            if iteration in questions_by_iteration and question_index < len(
-                questions_by_iteration[iteration]
-            ):
-                formatted_text += f"SEARCH QUESTION:\n{questions_by_iteration[iteration][question_index]}\n\n"
-        # Content
-        formatted_text += f"CONTENT:\n{finding['content']}\n\n"
-        # Search results if they exist
-        if "search_results" in finding:
-            # formatted_text += "SEARCH RESULTS:\n"
-            # formatted_text += f"{finding['search_results']}\n\n"
-            # Extract and format links for this finding
-            links = extract_links_from_search_results(finding["search_results"])
-            if links:
-                formatted_text += "SOURCES USED IN THIS SECTION:\n"
-                for i, link in enumerate(links, 1):
-                    formatted_text += f"{i}. {link['title']}\n   URL: {link['url']}\n"
-                formatted_text += "\n"
-                all_links.extend(links)
-        formatted_text += f"{'_'*80}\n\n"
-    # Add summary of all sources at the end
-    if all_links:
-        formatted_text += "\nALL SOURCES USED IN RESEARCH:\n"
-        formatted_text += "=" * 80 + "\n\n"
-        seen_urls = set()  # To prevent duplicates
-        for i, link in enumerate(all_links, 1):
-            if link["url"] not in seen_urls:
-                formatted_text += f"{i}. {link['title']}\n   URL: {link['url']}\n"
-                seen_urls.add(link["url"])
-        formatted_text += "\n" + "=" * 80 + "\n"
-    return formatted_text
-def print_search_results(search_results):
-    formatted_text=""
-    links = extract_links_from_search_results(search_results)
-    if links:
-        formatted_text=format_links(links=links)
-    logger.info(formatted_text)

local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl