PyPI - local-deep-research - Versions diffs - 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

local_deep_research/test_migration.py ADDED Viewed

@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+"""
+Migration test script for Local Deep Research.
+This script checks the contents of both the legacy and new databases to diagnose migration issues.
+"""
+import os
+import sqlite3
+import sys
+import time
+def check_db_content(db_path, description):
+    """Check what tables and how many rows are in a database."""
+    if not os.path.exists(db_path):
+        print(f"❌ {description} database not found at: {db_path}")
+        return False
+    print(f"📊 Examining {description} database at: {db_path}")
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+        # Get list of tables
+        cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
+        tables = [
+            row[0] for row in cursor.fetchall() if not row[0].startswith("sqlite_")
+        ]
+        if not tables:
+            print("   ℹ️ No user tables found in database")
+            conn.close()
+            return False
+        print(f"   📋 Tables found: {', '.join(tables)}")
+        # For each table, count rows
+        for table in tables:
+            cursor.execute(f"SELECT COUNT(*) FROM {table}")
+            count = cursor.fetchone()[0]
+            print(f"   📝 Table '{table}' has {count} rows")
+            # If table has rows, show sample
+            if count > 0:
+                cursor.execute(f"SELECT * FROM {table} LIMIT 1")
+                columns = [description[0] for description in cursor.description]
+                print(f"      Columns: {', '.join(columns)}")
+                # For specific tables, get key columns
+                if table in [
+                    "research_history",
+                    "research_logs",
+                    "research",
+                    "settings",
+                ]:
+                    key_cols = (
+                        "id, query, status"
+                        if table == "research_history"
+                        else "id, key, value" if table == "settings" else "id, message"
+                    )
+                    cursor.execute(f"SELECT {key_cols} FROM {table} LIMIT 3")
+                    sample = cursor.fetchall()
+                    for row in sample:
+                        print(f"      Sample data: {row}")
+        conn.close()
+        return True
+    except Exception as e:
+        print(f"❌ Error examining database: {e}")
+        return False
+def main():
+    """Main function to test the migration."""
+    # Import necessary constants
+    try:
+        # Set up paths
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        project_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
+        # Determine paths
+        data_dir = os.path.join(project_root, "data")
+        new_db_path = os.path.join(data_dir, "ldr.db")
+        legacy_research_history_db = os.path.join(
+            project_root, "src", "local_deep_research", "research_history.db"
+        )
+        legacy_deep_research_db = os.path.join(data_dir, "deep_research.db")
+        # Print paths for verification
+        print("=" * 60)
+        print("DATABASE PATHS")
+        print("=" * 60)
+        print(f"New database path: {new_db_path}")
+        print(f"Legacy research history DB: {legacy_research_history_db}")
+        print(f"Legacy deep research DB: {legacy_deep_research_db}")
+        print("=" * 60)
+        # Check all databases
+        check_db_content(legacy_research_history_db, "Legacy research_history")
+        check_db_content(legacy_deep_research_db, "Legacy deep_research")
+        # Now check for the new database or create it if needed
+        if os.path.exists(new_db_path):
+            check_db_content(new_db_path, "New ldr")
+        else:
+            print(f"ℹ️ New database doesn't exist yet at: {new_db_path}")
+            print("Would you like to run a test migration? (y/n)")
+            choice = input("> ").lower()
+            if choice == "y":
+                # Run the migration script directly
+                try:
+                    from src.local_deep_research.setup_data_dir import setup_data_dir
+                except ImportError:
+                    # If that fails, try with the direct import
+                    sys.path.append(
+                        os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+                    )
+                    from local_deep_research.setup_data_dir import setup_data_dir
+                setup_data_dir()
+                # Import migration function
+                try:
+                    from src.local_deep_research.web.database.migrate_to_ldr_db import (
+                        migrate_to_ldr_db,
+                    )
+                except ImportError:
+                    # If that fails, try with the direct import
+                    from local_deep_research.web.database.migrate_to_ldr_db import (
+                        migrate_to_ldr_db,
+                    )
+                print("Running migration...")
+                success = migrate_to_ldr_db()
+                # Wait briefly to ensure file system has time to update
+                time.sleep(1)
+                if success:
+                    print("\n✅ Migration completed. Checking new database:")
+                    check_db_content(new_db_path, "New ldr")
+                else:
+                    print("❌ Migration failed")
+        # Get the paths from the migration script to verify
+        try:
+            try:
+                from src.local_deep_research.web.models.database import (
+                    DB_PATH,
+                    LEGACY_DEEP_RESEARCH_DB,
+                    LEGACY_RESEARCH_HISTORY_DB,
+                )
+            except ImportError:
+                from local_deep_research.web.models.database import (
+                    DB_PATH,
+                    LEGACY_DEEP_RESEARCH_DB,
+                    LEGACY_RESEARCH_HISTORY_DB,
+                )
+            print("\n" + "=" * 60)
+            print("PATHS FROM DATABASE MODULE")
+            print("=" * 60)
+            print(f"DB_PATH: {DB_PATH}")
+            print(f"LEGACY_RESEARCH_HISTORY_DB: {LEGACY_RESEARCH_HISTORY_DB}")
+            print(f"LEGACY_DEEP_RESEARCH_DB: {LEGACY_DEEP_RESEARCH_DB}")
+        except ImportError as e:
+            print(f"Could not import paths from database module: {e}")
+    except Exception as e:
+        print(f"Error in test script: {e}")
+        return 1
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

local_deep_research/utilities/__init__.py ADDED Viewed

File without changes

local_deep_research/utilities/db_utils.py ADDED Viewed

@@ -0,0 +1,49 @@
+import logging
+import os
+from functools import cache
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session, sessionmaker
+from ..web.services.settings_manager import SettingsManager
+logger = logging.getLogger(__name__)
+# Database path.
+DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "data"))
+DB_PATH = os.path.join(DATA_DIR, "ldr.db")
+@cache
+def get_db_session() -> Session:
+    """
+    Returns:
+        The singleton DB session.
+    """
+    engine = create_engine(f"sqlite:///{DB_PATH}")
+    session_class = sessionmaker(bind=engine)
+    return session_class()
+@cache
+def get_settings_manager() -> SettingsManager:
+    """
+    Returns:
+        The singleton settings manager.
+    """
+    return SettingsManager(db_session=get_db_session())
+def get_db_setting(key, default_value=None):
+    """Get a setting from the database with fallback to default value"""
+    try:
+        # Get settings manager which handles database access
+        value = get_settings_manager().get_setting(key)
+        if value is not None:
+            return value
+    except Exception as e:
+        logger.error(f"Error getting setting {key} from database: {e}")
+    return default_value

local_deep_research/{utilties → utilities}/enums.py RENAMED Viewed

@@ -1,9 +1,9 @@
 # config/enums.py
-from enum import Enum, auto
+from enum import Enum
 class KnowledgeAccumulationApproach(Enum):
     QUESTION = "QUESTION"
     ITERATION = "ITERATION"
     NO_KNOWLEDGE = "NO_KNOWLEDGE"
     MAX_NR_OF_CHARACTERS = "MAX_NR_OF_CHARACTERS"

local_deep_research/{utilties → utilities}/llm_utils.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# utilties/llm_utils.py
+# utilities/llm_utils.py
 """
 LLM utilities for Local Deep Research.
@@ -6,64 +6,75 @@ This module provides utility functions for working with language models
 when the user's llm_config.py is missing or incomplete.
 """
-import os
 import logging
-from typing import Dict, Any, Optional
+import os
+from typing import Any, Optional
 # Setup logging
 logger = logging.getLogger(__name__)
 def get_model(
     model_name: Optional[str] = None,
     model_type: Optional[str] = None,
     temperature: Optional[float] = None,
-    **kwargs
+    **kwargs,
 ) -> Any:
     """
     Get a language model instance as fallback when llm_config.get_llm is not available.
     Args:
         model_name: Name of the model to use
         model_type: Type of the model provider
         temperature: Model temperature
         **kwargs: Additional parameters
     Returns:
         LangChain language model instance
     """
     # Get default values from kwargs or use reasonable defaults
-    model_name = model_name or kwargs.get('DEFAULT_MODEL', 'mistral')
-    model_type = model_type or kwargs.get('DEFAULT_MODEL_TYPE', 'ollama')
-    temperature = temperature or kwargs.get('DEFAULT_TEMPERATURE', 0.7)
-    max_tokens = kwargs.get('max_tokens', kwargs.get('MAX_TOKENS', 30000))
+    model_name = model_name or kwargs.get("DEFAULT_MODEL", "mistral")
+    model_type = model_type or kwargs.get("DEFAULT_MODEL_TYPE", "ollama")
+    temperature = temperature or kwargs.get("DEFAULT_TEMPERATURE", 0.7)
+    max_tokens = kwargs.get("max_tokens", kwargs.get("MAX_TOKENS", 30000))
     # Common parameters
     common_params = {
         "temperature": temperature,
         "max_tokens": max_tokens,
     }
     # Add additional kwargs
     for key, value in kwargs.items():
-        if key not in ['DEFAULT_MODEL', 'DEFAULT_MODEL_TYPE', 'DEFAULT_TEMPERATURE', 'MAX_TOKENS']:
+        if key not in [
+            "DEFAULT_MODEL",
+            "DEFAULT_MODEL_TYPE",
+            "DEFAULT_TEMPERATURE",
+            "MAX_TOKENS",
+        ]:
             common_params[key] = value
     # Try to load the model based on type
     if model_type == "ollama":
         try:
             from langchain_ollama import ChatOllama
             return ChatOllama(model=model_name, **common_params)
         except ImportError:
             try:
                 from langchain_community.llms import Ollama
                 return Ollama(model=model_name, **common_params)
             except ImportError:
-                logger.error("Neither langchain_ollama nor langchain_community.llms.Ollama available")
+                logger.error(
+                    "Neither langchain_ollama nor langchain_community.llms.Ollama available"
+                )
                 raise
     elif model_type == "openai":
         try:
             from langchain_openai import ChatOpenAI
             api_key = os.getenv("OPENAI_API_KEY")
             if not api_key:
                 raise ValueError("OPENAI_API_KEY environment variable not set")
@@ -71,46 +82,69 @@ def get_model(
         except ImportError:
             logger.error("langchain_openai not available")
             raise
     elif model_type == "anthropic":
         try:
             from langchain_anthropic import ChatAnthropic
             api_key = os.getenv("ANTHROPIC_API_KEY")
             if not api_key:
                 raise ValueError("ANTHROPIC_API_KEY environment variable not set")
-            return ChatAnthropic(model=model_name, anthropic_api_key=api_key, **common_params)
+            return ChatAnthropic(
+                model=model_name, anthropic_api_key=api_key, **common_params
+            )
         except ImportError:
             logger.error("langchain_anthropic not available")
             raise
     elif model_type == "openai_endpoint":
         try:
             from langchain_openai import ChatOpenAI
             api_key = os.getenv("OPENAI_ENDPOINT_API_KEY")
             if not api_key:
                 raise ValueError("OPENAI_ENDPOINT_API_KEY environment variable not set")
-            endpoint_url = kwargs.get("OPENAI_ENDPOINT_URL", "https://openrouter.ai/api/v1")
-            if model_name is None and not kwargs.get("OPENAI_ENDPOINT_REQUIRES_MODEL", True):
-                return ChatOpenAI(api_key=api_key, openai_api_base=endpoint_url, **common_params)
+            endpoint_url = kwargs.get(
+                "OPENAI_ENDPOINT_URL", "https://openrouter.ai/api/v1"
+            )
+            if model_name is None and not kwargs.get(
+                "OPENAI_ENDPOINT_REQUIRES_MODEL", True
+            ):
+                return ChatOpenAI(
+                    api_key=api_key, openai_api_base=endpoint_url, **common_params
+                )
             else:
-                return ChatOpenAI(model=model_name, api_key=api_key, openai_api_base=endpoint_url, **common_params)
+                return ChatOpenAI(
+                    model=model_name,
+                    api_key=api_key,
+                    openai_api_base=endpoint_url,
+                    **common_params,
+                )
         except ImportError:
             logger.error("langchain_openai not available")
             raise
     # Default fallback
     try:
         from langchain_ollama import ChatOllama
         logger.warning(f"Unknown model type '{model_type}', defaulting to Ollama")
         return ChatOllama(model=model_name, **common_params)
     except (ImportError, Exception) as e:
         logger.error(f"Failed to load any model: {e}")
         # Last resort: create a dummy model
         try:
             from langchain_community.llms.fake import FakeListLLM
-            return FakeListLLM(responses=["No language models are available. Please install Ollama or set up API keys."])
+            return FakeListLLM(
+                responses=[
+                    "No language models are available. Please install Ollama or set up API keys."
+                ]
+            )
         except ImportError:
-            raise ValueError("No language models available and could not create dummy model")
+            raise ValueError(
+                "No language models available and could not create dummy model"
+            )

local_deep_research/utilities/search_utilities.py ADDED Viewed

@@ -0,0 +1,242 @@
+import logging
+import re
+from typing import Dict, List
+logger = logging.getLogger(__name__)
+def remove_think_tags(text: str) -> str:
+    text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
+    return text
+def extract_links_from_search_results(search_results: List[Dict]) -> List[Dict]:
+    """
+    Extracts links and titles from a list of search result dictionaries.
+    Each dictionary is expected to have at least the keys "title" and "link".
+    Returns a list of dictionaries with 'title' and 'url' keys.
+    """
+    links = []
+    if not search_results:
+        return links
+    for result in search_results:
+        try:
+            # Ensure we handle None values safely before calling strip()
+            title = result.get("title", "")
+            url = result.get("link", "")
+            index = result.get("index", "")
+            # Apply strip() only if the values are not None
+            title = title.strip() if title is not None else ""
+            url = url.strip() if url is not None else ""
+            index = index.strip() if index is not None else ""
+            if title and url:
+                links.append({"title": title, "url": url, "index": index})
+        except Exception as e:
+            # Log the specific error for debugging
+            logger.error(f"Error extracting link from result: {str(e)}")
+            continue
+    return links
+def format_links(links: List[Dict]) -> str:
+    formatted_links = format_links_to_markdown(links)
+    return formatted_links
+def format_links_to_markdown(all_links: List[Dict]) -> str:
+    formatted_text = ""
+    if all_links:
+        # Group links by URL and collect all their indices
+        url_to_indices = {}
+        for link in all_links:
+            url = link.get("url")
+            index = link.get("index", "")
+            if url:
+                if url not in url_to_indices:
+                    url_to_indices[url] = []
+                url_to_indices[url].append(index)
+        # Format each unique URL with all its indices
+        seen_urls = set()  # Initialize the set here
+        for link in all_links:
+            url = link.get("url")
+            title = link.get("title", "Untitled")
+            if url and url not in seen_urls:
+                # Get all indices for this URL
+                indices = set(url_to_indices[url])
+                # Format as [1, 3, 5] if multiple indices, or just [1] if single
+                indices_str = f"[{', '.join(map(str, indices))}]"
+                formatted_text += f"{indices_str} {title}\n   URL: {url}\n\n"
+                seen_urls.add(url)
+        formatted_text += "\n"
+    return formatted_text
+def format_findings(
+    findings_list: List[Dict],
+    synthesized_content: str,
+    questions_by_iteration: Dict[int, List[str]],
+) -> str:
+    """Format findings into a detailed text output.
+    Args:
+        findings_list: List of finding dictionaries
+        synthesized_content: The synthesized content from the LLM.
+        questions_by_iteration: Dictionary mapping iteration numbers to lists of questions
+    Returns:
+        str: Formatted text output
+    """
+    logger.info(
+        f"Inside format_findings utility. Findings count: {len(findings_list)}, Questions iterations: {len(questions_by_iteration)}"
+    )
+    formatted_text = ""
+    # Extract all sources from findings
+    all_links = []
+    for finding in findings_list:
+        search_results = finding.get("search_results", [])
+        if search_results:
+            try:
+                links = extract_links_from_search_results(search_results)
+                all_links.extend(links)
+            except Exception as link_err:
+                logger.error(f"Error processing search results/links: {link_err}")
+    # Start with the synthesized content (passed as synthesized_content)
+    formatted_text += f"{synthesized_content}\n\n"
+    # Add sources section after synthesized content if sources exist
+    formatted_text += format_links_to_markdown(all_links)
+    formatted_text += "\n\n"  # Separator after synthesized content
+    # Add Search Questions by Iteration section
+    if questions_by_iteration:
+        formatted_text += "## SEARCH QUESTIONS BY ITERATION\n"
+        formatted_text += "\n"
+        for iter_num, questions in questions_by_iteration.items():
+            formatted_text += f"\n #### Iteration {iter_num}:\n"
+            for i, q in enumerate(questions, 1):
+                formatted_text += f"{i}. {q}\n"
+        formatted_text += "\n" + "\n\n"
+    else:
+        logger.warning("No questions by iteration found to format.")
+    # Add Detailed Findings section
+    if findings_list:
+        formatted_text += "## DETAILED FINDINGS\n\n"
+        logger.info(f"Formatting {len(findings_list)} detailed finding items.")
+        for idx, finding in enumerate(findings_list):
+            logger.debug(f"Formatting finding item {idx}. Keys: {list(finding.keys())}")
+            # Use .get() for safety
+            phase = finding.get("phase", "Unknown Phase")
+            content = finding.get("content", "No content available.")
+            search_results = finding.get("search_results", [])
+            # Phase header
+            formatted_text += "\n"
+            formatted_text += f"### {phase}\n"
+            formatted_text += "\n\n"
+            question_displayed = False
+            # If this is a follow-up phase, try to show the corresponding question
+            if isinstance(phase, str) and phase.startswith("Follow-up"):
+                try:
+                    parts = phase.replace("Follow-up Iteration ", "").split(".")
+                    if len(parts) == 2:
+                        iteration = int(parts[0])
+                        question_index = int(parts[1]) - 1
+                        if (
+                            iteration in questions_by_iteration
+                            and 0
+                            <= question_index
+                            < len(questions_by_iteration[iteration])
+                        ):
+                            formatted_text += f"#### {questions_by_iteration[iteration][question_index]}\n\n"
+                            question_displayed = True
+                        else:
+                            logger.warning(
+                                f"Could not find matching question for phase: {phase}"
+                            )
+                    else:
+                        logger.warning(
+                            f"Could not parse iteration/index from phase: {phase}"
+                        )
+                except ValueError:
+                    logger.warning(
+                        f"Could not parse iteration/index from phase: {phase}"
+                    )
+            # Handle Sub-query phases from IterDRAG strategy
+            elif isinstance(phase, str) and phase.startswith("Sub-query"):
+                try:
+                    # Extract the index number from "Sub-query X"
+                    query_index = int(phase.replace("Sub-query ", "")) - 1
+                    # In IterDRAG, sub-queries are stored in iteration 0
+                    if 0 in questions_by_iteration and query_index < len(
+                        questions_by_iteration[0]
+                    ):
+                        formatted_text += (
+                            f"#### {questions_by_iteration[0][query_index]}\n\n"
+                        )
+                        question_displayed = True
+                    else:
+                        logger.warning(
+                            f"Could not find matching question for phase: {phase}"
+                        )
+                except ValueError:
+                    logger.warning(
+                        f"Could not parse question index from phase: {phase}"
+                    )
+            # If the question is in the finding itself, display it
+            if not question_displayed and "question" in finding and finding["question"]:
+                formatted_text += f"### SEARCH QUESTION:\n{finding['question']}\n\n"
+            # Content
+            formatted_text += f"\n\n{content}\n\n"
+            # Search results if they exist
+            if search_results:
+                try:
+                    links = extract_links_from_search_results(search_results)
+                    if links:
+                        formatted_text += "### SOURCES USED IN THIS SECTION:\n"
+                        formatted_text += format_links(links) + "\n\n"
+                except Exception as link_err:
+                    logger.error(
+                        f"Error processing search results/links for finding {idx}: {link_err}"
+                    )
+            else:
+                logger.debug(f"No search_results found for finding item {idx}.")
+            formatted_text += f"{'_' * 80}\n\n"
+    else:
+        logger.warning("No detailed findings found to format.")
+    # Add summary of all sources at the end
+    if all_links:
+        formatted_text += "## ALL SOURCES:\n"
+        formatted_text += format_links_to_markdown(all_links)
+    else:
+        logger.info("No unique sources found across all findings to list.")
+    logger.info("Finished format_findings utility.")
+    return formatted_text
+def print_search_results(search_results):
+    formatted_text = ""
+    links = extract_links_from_search_results(search_results)
+    if links:
+        formatted_text = format_links(links=links)
+    logger.info(formatted_text)

local_deep_research/{utilties → utilities}/setup_utils.py RENAMED Viewed

@@ -1,6 +1,8 @@
 """Setup utilities (legacy wrapper)."""
 def setup_user_directories():
     """Set up directories and ensure config files exist."""
-    from local_deep_research.config import init_config_files
-    init_config_files()
+    from ..config.config_files import init_config_files
+    init_config_files()

local_deep_research/web/__init__.py CHANGED Viewed

	@@ -1,2 +1 @@
1 1	"""Web interface for Local Deep Research"""
2	- from . import app

local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl