PyPI - local-deep-research - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

local-deep-research 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

local_deep_research/config.py CHANGED Viewed

@@ -9,11 +9,19 @@ logger = logging.getLogger(__name__)
 # Get config directory
 def get_config_dir():
-    from platformdirs import user_config_dir
-    config_dir = Path(user_config_dir("local_deep_research", "LearningCircuit"))
+    import platform
+    if platform.system() == "Windows":
+        # Windows: Use Documents directory
+        from platformdirs import user_documents_dir
+        config_dir = Path(user_documents_dir()) / "LearningCircuit" / "local-deep-research"
+    else:
+        # Linux/Mac: Use standard config directory
+        from platformdirs import user_config_dir
+        config_dir = Path(user_config_dir("local_deep_research", "LearningCircuit"))
     print(f"Looking for config in: {config_dir}")
     return config_dir
 # Define config paths
 CONFIG_DIR = get_config_dir() / "config"
 CONFIG_DIR.mkdir(parents=True, exist_ok=True)
@@ -83,68 +91,115 @@ def get_search(search_tool=None):
     params = {
         "search_tool": tool,
         "llm_instance": get_llm(),
-        "max_results": settings.get("max_results"),
-        "region": settings.get("region"),
-        "time_period": settings.get("time_period"),
-        "safe_search": settings.get("safe_search"),
-        "search_snippets_only": settings.get("snippets_only"),
-        "search_language": settings.get("search_language"),
-        "max_filtered_results": settings.get("max_filtered_results")
+        "max_results": settings.search.max_results,
+        "region": settings.search.region,
+        "time_period": settings.search.time_period,
+        "safe_search": settings.search.safe_search,
+        "search_snippets_only": settings.search.snippets_only,
+        "search_language": settings.search.search_language,
+        "max_filtered_results": settings.search.max_filtered_results
     }
+    logger.info(f"Search config params: {params}")
     # Create and return search engine
     return factory_get_search(**params)
 def init_config_files():
     """Initialize config files if they don't exist"""
     import shutil
-    from importlib.resources import files
-    # Get default files path
-    try:
-        defaults_dir = files('local_deep_research.defaults')
-    except ImportError:
-        # Fallback for older Python versions
-        from pkg_resources import resource_filename
-        defaults_dir = Path(resource_filename('local_deep_research', 'defaults'))
+    import os
+    import sys
+    import platform
-    # Create settings.toml if it doesn't exist
-    settings_file = CONFIG_DIR / "settings.toml"
-    if not settings_file.exists():
-        shutil.copy(defaults_dir / "main.toml", settings_file)
-        logger.info(f"Created settings.toml at {settings_file}")
+    # Ensure CONFIG_DIR exists with explicit creation
+    os.makedirs(CONFIG_DIR, exist_ok=True)
-    # Create llm_config.py if it doesn't exist
-    llm_config_file = CONFIG_DIR / "llm_config.py"
-    if not llm_config_file.exists():
-        shutil.copy(defaults_dir / "llm_config.py", llm_config_file)
-        logger.info(f"Created llm_config.py at {llm_config_file}")
+    # Get default files path with more reliable approach for Windows
+    if platform.system() == "Windows":
+        # Use a more reliable method on Windows
+        from pkg_resources import resource_filename
+        try:
+            defaults_dir = Path(resource_filename('local_deep_research', 'defaults'))
+            logger.info(f"Using pkg_resources for Windows: {defaults_dir}")
+            # Create settings.toml if it doesn't exist (with explicit Windows paths)
+            settings_file = os.path.join(CONFIG_DIR, "settings.toml")
+            default_settings = os.path.join(defaults_dir, "main.toml")
+            if not os.path.exists(settings_file) and os.path.exists(default_settings):
+                shutil.copyfile(default_settings, settings_file)
+                logger.info(f"Created settings.toml at {settings_file}")
+            # Create llm_config.py if it doesn't exist
+            llm_config_file = os.path.join(CONFIG_DIR, "llm_config.py")
+            default_llm = os.path.join(defaults_dir, "llm_config.py")
+            if not os.path.exists(llm_config_file) and os.path.exists(default_llm):
+                shutil.copyfile(default_llm, llm_config_file)
+                logger.info(f"Created llm_config.py at {llm_config_file}")
+            # Create local_collections.toml if it doesn't exist
+            collections_file = os.path.join(CONFIG_DIR, "local_collections.toml")
+            default_collections = os.path.join(defaults_dir, "local_collections.toml")
+            if not os.path.exists(collections_file) and os.path.exists(default_collections):
+                shutil.copyfile(default_collections, collections_file)
+                logger.info(f"Created local_collections.toml at {collections_file}")
+            # Create search_engines.toml if it doesn't exist
+            search_engines_file = os.path.join(CONFIG_DIR, "search_engines.toml")
+            default_engines = os.path.join(defaults_dir, "search_engines.toml")
+            if not os.path.exists(search_engines_file) and os.path.exists(default_engines):
+                shutil.copyfile(default_engines, search_engines_file)
+                logger.info(f"Created search_engines.toml at {search_engines_file}")
+        except Exception as e:
+            logger.error(f"Error initializing Windows config files: {e}")
+    else:
+        """Initialize config files if they don't exist"""
+        import shutil
+        from importlib.resources import files
-    # Create local_collections.toml if it doesn't exist
-    collections_file = CONFIG_DIR / "local_collections.toml"
-    if not collections_file.exists():
-        shutil.copy(defaults_dir / "local_collections.toml", collections_file)
-        logger.info(f"Created local_collections.toml at {collections_file}")
-    # Create search_engines.toml if it doesn't exist
-    search_engines_file = CONFIG_DIR / "search_engines.toml"
-    if not search_engines_file.exists():
-        shutil.copy(defaults_dir / "search_engines.toml", search_engines_file)
-        logger.info(f"Created search_engines.toml at {search_engines_file}")
+        # Get default files path
+        try:
+            defaults_dir = files('local_deep_research.defaults')
+        except ImportError:
+            # Fallback for older Python versions
+            from pkg_resources import resource_filename
+            defaults_dir = Path(resource_filename('local_deep_research', 'defaults'))
-    secrets_file = CONFIG_DIR / ".secrets.toml"
-    if not secrets_file.exists():
-        with open(secrets_file, "w") as f:
-            f.write("""
-# ANTHROPIC_API_KEY = "your-api-key-here"
-# OPENAI_API_KEY = "your-openai-key-here"
-# GOOGLE_API_KEY = "your-google-key-here"
-# SERP_API_KEY = "your-api-key-here"
-# GUARDIAN_API_KEY = "your-api-key-here"
-# GOOGLE_PSE_API_KEY = "your-google-api-key-here"
-# GOOGLE_PSE_ENGINE_ID = "your-programmable-search-engine-id-here"
-""")
+        # Create settings.toml if it doesn't exist
+        settings_file = CONFIG_DIR / "settings.toml"
+        if not settings_file.exists():
+            shutil.copy(defaults_dir / "main.toml", settings_file)
+            logger.info(f"Created settings.toml at {settings_file}")
+        # Create llm_config.py if it doesn't exist
+        llm_config_file = CONFIG_DIR / "llm_config.py"
+        if not llm_config_file.exists():
+            shutil.copy(defaults_dir / "llm_config.py", llm_config_file)
+            logger.info(f"Created llm_config.py at {llm_config_file}")
+        # Create local_collections.toml if it doesn't exist
+        collections_file = CONFIG_DIR / "local_collections.toml"
+        if not collections_file.exists():
+            shutil.copy(defaults_dir / "local_collections.toml", collections_file)
+            logger.info(f"Created local_collections.toml at {collections_file}")
+        # Create search_engines.toml if it doesn't exist
+        search_engines_file = CONFIG_DIR / "search_engines.toml"
+        if not search_engines_file.exists():
+            shutil.copy(defaults_dir / "search_engines.toml", search_engines_file)
+            logger.info(f"Created search_engines.toml at {search_engines_file}")
+        secrets_file = CONFIG_DIR / ".secrets.toml"
+        if not secrets_file.exists():
+            with open(secrets_file, "w") as f:
+                f.write("""
+    # ANTHROPIC_API_KEY = "your-api-key-here"
+    # OPENAI_API_KEY = "your-openai-key-here"
+    # GOOGLE_API_KEY = "your-google-key-here"
+    # SERP_API_KEY = "your-api-key-here"
+    # GUARDIAN_API_KEY = "your-api-key-here"
+    # GOOGLE_PSE_API_KEY = "your-google-api-key-here"
+    # GOOGLE_PSE_ENGINE_ID = "your-programmable-search-engine-id-here"
+    """)
 # Initialize config files on import
 init_config_files()

local_deep_research/defaults/search_engines.toml CHANGED Viewed

@@ -186,9 +186,48 @@ strengths = ["searches all local collections", "personal documents", "offline ac
 weaknesses = ["may return too many results", "requires indexing"]
 requires_llm = true
+[semantic_scholar]
+module_path = "local_deep_research.web_search_engines.engines.search_engine_semantic_scholar"
+class_name = "SemanticScholarSearchEngine"
+requires_api_key = false
+api_key_env = "S2_API_KEY"
+reliability = 0.95
+strengths = [
+    "comprehensive scientific literature",
+    "extensive citation network",
+    "AI-generated summaries (TLDRs)",
+    "academic paper metadata",
+    "cross-disciplinary coverage",
+    "200M+ papers across all fields",
+    "usable without API key"
+]
+weaknesses = [
+    "rate limited (1000 requests/day) without API key",
+    "limited to academic content"
+]
+supports_full_search = true
+requires_llm = false
+[semantic_scholar.default_params]
+max_results = 20
+get_abstracts = true
+get_tldr = true
+get_references = false
+get_citations = false
+get_embeddings = false
+citation_limit = 10
+reference_limit = 10
+optimize_queries = true
+max_retries = 5
+retry_backoff_factor = 1.0
 # Default search engine to use if none specified
 DEFAULT_SEARCH_ENGINE = "wikipedia"
 # Additional search engines can be added below
 # Uncomment and modify these templates as needed
@@ -224,21 +263,3 @@ DEFAULT_SEARCH_ENGINE = "wikipedia"
 #
 # [guardian.default_params]
 # order_by = "relevance"
-# [medrxiv]
-# module_path = "local_deep_research.web_search_engines.engines.search_engine_medrxiv"
-# class_name = "MedRxivSearchEngine"
-# requires_api_key = false
-# reliability = 0.85
-# strengths = [
-#     "medical preprints", "health research", "covid-19 research",
-#     "clinical studies", "medical sciences", "preliminary results"
-# ]
-# weaknesses = ["not peer-reviewed", "preliminary findings", "limited to medical research"]
-# requires_llm = true
-#
-# [medrxiv.default_params]
-# sort_by = "relevance_score"
-# sort_order = "desc"
-# include_full_text = false
-# optimize_queries = true

local_deep_research/search_system.py CHANGED Viewed

@@ -182,7 +182,7 @@ class AdvancedSearchSystem:
                                     int(question_progress_base + 2),
                                     {"phase": "search_complete", "result_count": len(search_results)})
-                logger.info("len search", len(search_results))
+                logger.info(f"len search: {len(search_results)}")
                 if len(search_results) == 0:
                     continue
@@ -216,11 +216,12 @@ class AdvancedSearchSystem:
                             }
                         )
-                        if settings.general.knowledge_accumulation != KnowledgeAccumulationApproach.NO_KNOWLEDGE:
+                        if settings.general.knowledge_accumulation != str(KnowledgeAccumulationApproach.NO_KNOWLEDGE.value):
                             current_knowledge = current_knowledge + "\n\n\n New: \n" + results_with_links
-                        print(current_knowledge)
-                        if settings.general.knowledge_accumulation == KnowledgeAccumulationApproach.QUESTION:
+                        logger.info(settings.general.knowledge_accumulation)
+                        if settings.general.knowledge_accumulation == str(KnowledgeAccumulationApproach.QUESTION.value):
+                            logger.info("Compressing knowledge")
                             self._update_progress(f"Compress Knowledge for: {question}",
                                         int(question_progress_base + 0),
                                         {"phase": "analysis"})
@@ -240,10 +241,14 @@ class AdvancedSearchSystem:
             self._update_progress(f"Compressing knowledge after iteration {iteration}",
                                  int((iteration / total_iterations) * 100 - 5),
                                  {"phase": "knowledge_compression"})
-            if settings.general.knowledge_accumulation == KnowledgeAccumulationApproach.ITERATION:
+            logger.info(str(iteration))
+            logger.info(settings.general.knowledge_accumulation)
+            logger.info(str(KnowledgeAccumulationApproach.ITERATION.value))
+            if settings.general.knowledge_accumulation == KnowledgeAccumulationApproach.ITERATION.value:
                 try:
+                    logger.info("ITERATION - Compressing Knowledge")
                     current_knowledge = self._compress_knowledge(current_knowledge , query, section_links)
+                    logger.info("FINISHED ITERATION - Compressing Knowledge")
                 except Exception as e:
                     error_msg = f"Error compressing knowledge: {str(e)}"
                     print(f"COMPRESSION ERROR: {error_msg}")
@@ -278,6 +283,7 @@ class AdvancedSearchSystem:
         }
     def _save_findings(self, findings: List[Dict], current_knowledge: str, query: str):
+        logger.info("Saving findings ...")
         self._update_progress("Saving research findings...", None)
         formatted_findings = format_findings_to_text(
@@ -287,15 +293,15 @@ class AdvancedSearchSystem:
             :50
         ]
         safe_query = safe_query.replace(" ", "_").lower()
-        output_dir = "research_outputs"
+        import local_deep_research.config as conf
+        output_dir = f"{conf.get_config_dir()}/research_outputs"
         if not os.path.exists(output_dir):
             os.makedirs(output_dir)
         filename = os.path.join(output_dir, f"formatted_output_{safe_query}.txt")
         with open(filename, "w", encoding="utf-8") as text_file:
             text_file.write(formatted_findings)
+        logger.info("Saved findings")
         self._update_progress("Research findings saved", None, {"filename": filename})
         return formatted_findings

local_deep_research/utilties/enums.py CHANGED Viewed

@@ -2,8 +2,8 @@
 from enum import Enum, auto
 class KnowledgeAccumulationApproach(Enum):
-    QUESTION = auto()
-    ITERATION = auto()
-    NO_KNOWLEDGE = auto()
-    MAX_NR_OF_CHARACTERS = auto()
+    QUESTION = "QUESTION"
+    ITERATION = "ITERATION"
+    NO_KNOWLEDGE = "NO_KNOWLEDGE"
+    MAX_NR_OF_CHARACTERS = "MAX_NR_OF_CHARACTERS"

local_deep_research/web/app.py CHANGED Viewed

@@ -1009,14 +1009,15 @@ def run_research_process(research_id, query, mode):
                 # Check if formatted_findings exists in results
                 if 'formatted_findings' not in results:
-                    print("WARNING: 'formatted_findings' not found in results, using fallback")
+                    logger.info("WARNING: 'formatted_findings' not found in results, using fallback")
                     # Create fallback formatted findings from available data
                     raw_formatted_findings = "# Research Findings\n\n"
+                    raw_formatted_findings = raw_formatted_findings + str(results.get('current_knowledge'))
                     for i, finding in enumerate(results.get('findings', [])):
                         raw_formatted_findings += f"## Finding {i+1}\n\n{finding.get('content', '')}\n\n"
                 else:
                     raw_formatted_findings = results['formatted_findings']
-                    print(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
+                    logger.info(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
                 try:
                     # ADDED CODE: Convert debug output to clean markdown

local_deep_research/web_search_engines/engines/search_engine_arxiv.py CHANGED Viewed

@@ -32,11 +32,9 @@ class ArXivSearchEngine(BaseSearchEngine):
             llm: Language model for relevance filtering
             max_filtered_results: Maximum number of results to keep after filtering
         """
-        # Initialize the BaseSearchEngine with the LLM and max_filtered_results
-        super().__init__(llm=llm, max_filtered_results=max_filtered_results)
-        #max_results = min(max_results, 20) # required for arxiv
-        self.max_results = 20 # TODO this needs to be corrected.
+        # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
+        super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
+        self.max_results=max(self.max_results,25)
         self.sort_by = sort_by
         self.sort_order = sort_order
         self.include_full_text = include_full_text

local_deep_research/web_search_engines/engines/search_engine_brave.py CHANGED Viewed

@@ -19,7 +19,7 @@ class BraveSearchEngine(BaseSearchEngine):
                 api_key: Optional[str] = None,
                 language_code_mapping: Optional[Dict[str, str]] = None,
                 llm: Optional[BaseLLM] = None,
-                include_full_content: bool = False,
+                include_full_content: bool = True,
                 max_filtered_results: Optional[int] = None,
                 **kwargs):
         """
@@ -38,10 +38,8 @@ class BraveSearchEngine(BaseSearchEngine):
             max_filtered_results: Maximum number of results to keep after filtering
             **kwargs: Additional parameters (ignored but accepted for compatibility)
         """
-        # Initialize the BaseSearchEngine with the LLM and max_filtered_results
-        super().__init__(llm=llm, max_filtered_results=max_filtered_results)
-        self.max_results = max_results
+        # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
+        super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
         self.include_full_content = include_full_content
         # Set up language code mapping

local_deep_research/web_search_engines/engines/search_engine_ddg.py CHANGED Viewed

@@ -14,7 +14,8 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
                 safe_search: bool = True,
                 llm: Optional[BaseLLM] = None,
                 language: str = "English",
-                include_full_content: bool = False):
+                include_full_content: bool = False,
+                max_filtered_results=5):
         """
         Initialize the DuckDuckGo search engine.
@@ -26,8 +27,8 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
             language: Language for content processing
             include_full_content: Whether to include full webpage content in results
         """
-        super().__init__(llm=llm)
-        self.max_results = max_results
+        # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
+        super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
         self.region = region
         self.safe_search = safe_search
         self.language = language

local_deep_research/web_search_engines/engines/search_engine_github.py CHANGED Viewed

@@ -39,10 +39,8 @@ class GitHubSearchEngine(BaseSearchEngine):
             llm: Language model for relevance filtering
             max_filtered_results: Maximum number of results to keep after filtering
         """
-        # Initialize the BaseSearchEngine with the LLM and max_filtered_results
-        super().__init__(llm=llm, max_filtered_results=max_filtered_results)
-        self.max_results = max_results
+        # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
+        super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
         self.api_key = api_key or os.getenv("GITHUB_API_KEY")
         self.search_type = search_type
         self.include_readme = include_readme

local_deep_research/web_search_engines/engines/search_engine_google_pse.py CHANGED Viewed

@@ -47,10 +47,8 @@ class GooglePSESearchEngine(BaseSearchEngine):
             retry_delay: Base delay in seconds between retry attempts
             **kwargs: Additional parameters (ignored but accepted for compatibility)
         """
-        # Initialize the BaseSearchEngine with the LLM and max_filtered_results
-        super().__init__(llm=llm, max_filtered_results=max_filtered_results)
-        self.max_results = max_results
+        # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
+        super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
         self.include_full_content = include_full_content
         # Retry configuration

local-deep-research 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

local-deep-research 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl