PyPI - local-deep-research - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

local-deep-research 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

local_deep_research/advanced_search_system/strategies/standard_strategy.py CHANGED Viewed

@@ -19,7 +19,11 @@ class StandardSearchStrategy(BaseSearchStrategy):
     """Standard iterative search strategy that generates follow-up questions."""
     def __init__(
-        self, search=None, model=None, citation_handler=None, all_links_of_system=None
+        self,
+        search=None,
+        model=None,
+        citation_handler=None,
+        all_links_of_system=None,
     ):
         """Initialize with optional dependency injection for testing."""
         super().__init__(all_links_of_system=all_links_of_system)
@@ -73,9 +77,7 @@ class StandardSearchStrategy(BaseSearchStrategy):
         # Check if search engine is available
         if self.search is None:
-            error_msg = (
-                "Error: No search engine available. Please check your configuration."
-            )
+            error_msg = "Error: No search engine available. Please check your configuration."
             self._update_progress(
                 error_msg,
                 100,
@@ -127,7 +129,9 @@ Iteration: {iteration + 1} of {total_iterations}"""
             )
             for q_idx, question in enumerate(questions):
                 question_progress_base = iteration_progress_base + (
-                    ((q_idx + 1) / question_count) * (100 / total_iterations) * 0.5
+                    ((q_idx + 1) / question_count)
+                    * (100 / total_iterations)
+                    * 0.5
                 )
                 self._update_progress(
@@ -156,7 +160,9 @@ Iteration: {iteration + 1} of {total_iterations}"""
                 except Exception as e:
                     error_msg = f"Error during search: {str(e)}"
                     logger.exception(f"SEARCH ERROR: {error_msg}")
-                    self._handle_search_error(error_msg, question_progress_base + 10)
+                    self._handle_search_error(
+                        error_msg, question_progress_base + 10
+                    )
                     search_results = []
                 if search_results is None:
@@ -171,7 +177,10 @@ Iteration: {iteration + 1} of {total_iterations}"""
                 self._update_progress(
                     f"Found {len(search_results)} results for question: {question}",
                     int(question_progress_base + 2),
-                    {"phase": "search_complete", "result_count": len(search_results)},
+                    {
+                        "phase": "search_complete",
+                        "result_count": len(search_results),
+                    },
                 )
                 logger.info(f"len search: {len(search_results)}")
@@ -240,7 +249,9 @@ Iteration: {iteration + 1} of {total_iterations}"""
                 except Exception as e:
                     error_msg = f"Error analyzing results: {str(e)}"
                     logger.exception(f"ANALYSIS ERROR: {error_msg}")
-                    self._handle_search_error(error_msg, question_progress_base + 10)
+                    self._handle_search_error(
+                        error_msg, question_progress_base + 10
+                    )
             iteration += 1
@@ -250,11 +261,16 @@ Iteration: {iteration + 1} of {total_iterations}"""
                 {"phase": "knowledge_compression"},
             )
-            if knowledge_accumulation == KnowledgeAccumulationApproach.ITERATION.value:
+            if (
+                knowledge_accumulation
+                == KnowledgeAccumulationApproach.ITERATION.value
+            ):
                 try:
                     logger.info("ITERATION - Compressing Knowledge")
-                    current_knowledge = self.knowledge_generator.compress_knowledge(
-                        current_knowledge, query, section_links
+                    current_knowledge = (
+                        self.knowledge_generator.compress_knowledge(
+                            current_knowledge, query, section_links
+                        )
                     )
                     logger.info("FINISHED ITERATION - Compressing Knowledge")
                 except Exception as e:
@@ -271,7 +287,9 @@ Iteration: {iteration + 1} of {total_iterations}"""
             )
             # Extract content from findings for synthesis
-            finding_contents = [f["content"] for f in findings if "content" in f]
+            finding_contents = [
+                f["content"] for f in findings if "content" in f
+            ]
             # First synthesize findings to get coherent content
             synthesized_content = self.findings_repository.synthesize_findings(
@@ -288,8 +306,10 @@ Iteration: {iteration + 1} of {total_iterations}"""
             )
             # Now format the findings with search questions and sources
-            formatted_findings = self.findings_repository.format_findings_to_text(
-                findings, synthesized_content
+            formatted_findings = (
+                self.findings_repository.format_findings_to_text(
+                    findings, synthesized_content
+                )
             )
             # Add the synthesized content to the repository

local_deep_research/advanced_search_system/tools/base_tool.py CHANGED Viewed

@@ -75,7 +75,10 @@ class BaseTool(ABC):
                 logger.error(f"Invalid type for parameter {param_name}")
                 return False
-            if "enum" in param_schema and param_value not in param_schema["enum"]:
+            if (
+                "enum" in param_schema
+                and param_value not in param_schema["enum"]
+            ):
                 logger.error(f"Invalid value for parameter {param_name}")
                 return False
@@ -97,4 +100,6 @@ class BaseTool(ABC):
         Args:
             result: The result of the tool execution
         """
-        logger.info(f"Tool {self.name} execution completed with result: {result}")
+        logger.info(
+            f"Tool {self.name} execution completed with result: {result}"
+        )

local_deep_research/api/benchmark_functions.py CHANGED Viewed

@@ -235,7 +235,9 @@ def compare_configurations(
     import time
     timestamp = time.strftime("%Y%m%d_%H%M%S")
-    report_file = os.path.join(output_dir, f"comparison_{dataset_type}_{timestamp}.md")
+    report_file = os.path.join(
+        output_dir, f"comparison_{dataset_type}_{timestamp}.md"
+    )
     with open(report_file, "w") as f:
         f.write(f"# Configuration Comparison - {dataset_type.capitalize()}\n\n")
@@ -247,7 +249,9 @@ def compare_configurations(
         for result in results:
             accuracy = result.get("metrics", {}).get("accuracy", 0)
-            avg_time = result.get("metrics", {}).get("average_processing_time", 0)
+            avg_time = result.get("metrics", {}).get(
+                "average_processing_time", 0
+            )
             examples = result.get("total_examples", 0)
             f.write(

local_deep_research/api/research_functions.py CHANGED Viewed

@@ -21,9 +21,9 @@ def _init_search_system(
     openai_endpoint_url: str | None = None,
     progress_callback: Callable[[str, int, dict], None] | None = None,
     search_tool: Optional[str] = None,
+    search_strategy: str = "source_based",
     iterations: int = 1,
     questions_per_iteration: int = 1,
-    search_strategy: str = "source_based",
 ) -> AdvancedSearchSystem:
     """
     Initializes the advanced search system with specified parameters. This function sets up
@@ -39,6 +39,7 @@ def _init_search_system(
             setting)
         progress_callback: Optional callback function to receive progress updates
         search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
+        search_strategy: Search strategy to use (modular, source_based, etc.). If None, uses default
         iterations: Number of research cycles to perform
         questions_per_iteration: Number of questions to generate per cycle
         search_strategy: The name of the search strategy to use.
@@ -220,7 +221,9 @@ def analyze_documents(
     # Force reindex if requested
     if force_reindex and hasattr(search, "embedding_manager"):
         for folder_path in search.folder_paths:
-            search.embedding_manager.index_folder(folder_path, force_reindex=True)
+            search.embedding_manager.index_folder(
+                folder_path, force_reindex=True
+            )
     # Perform the search
     results = search.run(query)
@@ -235,7 +238,8 @@ def analyze_documents(
     docs_text = "\n\n".join(
         [
-            f"Document {i + 1}:" f" {doc.get('content', doc.get('snippet', ''))[:1000]}"
+            f"Document {i + 1}:"
+            f" {doc.get('content', doc.get('snippet', ''))[:1000]}"
             for i, doc in enumerate(results[:5])
         ]
     )  # Limit to first 5 docs and 1000 chars each
@@ -269,7 +273,9 @@ def analyze_documents(
             f.write(f"## Documents Found: {len(results)}\n\n")
             for i, doc in enumerate(results):
-                f.write(f"### Document {i + 1}:" f" {doc.get('title', 'Untitled')}\n\n")
+                f.write(
+                    f"### Document {i + 1}: {doc.get('title', 'Untitled')}\n\n"
+                )
                 f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
                 f.write(
                     f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n"

local_deep_research/benchmarks/__init__.py CHANGED Viewed

@@ -10,21 +10,25 @@ __version__ = "0.2.0"
 # Core benchmark functionality
 from .datasets import get_available_datasets, load_dataset
 from .metrics import (
+    calculate_combined_score,
     calculate_metrics,
     calculate_quality_metrics,
-    calculate_speed_metrics,
     calculate_resource_metrics,
-    calculate_combined_score,
+    calculate_speed_metrics,
     generate_report,
 )
-from .runners import run_benchmark, run_browsecomp_benchmark, run_simpleqa_benchmark
 # Optimization functionality
 from .optimization import (
-    optimize_parameters,
+    optimize_for_efficiency,
     optimize_for_quality,
     optimize_for_speed,
-    optimize_for_efficiency,
+    optimize_parameters,
+)
+from .runners import (
+    run_benchmark,
+    run_browsecomp_benchmark,
+    run_simpleqa_benchmark,
 )
 __all__ = [
@@ -36,13 +40,11 @@ __all__ = [
     "get_available_datasets",
     "calculate_metrics",
     "generate_report",
     # Metrics for optimization
     "calculate_quality_metrics",
     "calculate_speed_metrics",
     "calculate_resource_metrics",
     "calculate_combined_score",
     # Optimization functionality
     "optimize_parameters",
     "optimize_for_quality",

local_deep_research/benchmarks/benchmark_functions.py CHANGED Viewed

@@ -300,7 +300,9 @@ def compare_configurations(
     import time
     timestamp = time.strftime("%Y%m%d_%H%M%S")
-    report_file = os.path.join(output_dir, f"comparison_{dataset_type}_{timestamp}.md")
+    report_file = os.path.join(
+        output_dir, f"comparison_{dataset_type}_{timestamp}.md"
+    )
     with open(report_file, "w") as f:
         f.write(f"# Configuration Comparison - {dataset_type.capitalize()}\n\n")
@@ -312,7 +314,9 @@ def compare_configurations(
         for result in results:
             accuracy = result.get("metrics", {}).get("accuracy", 0)
-            avg_time = result.get("metrics", {}).get("average_processing_time", 0)
+            avg_time = result.get("metrics", {}).get(
+                "average_processing_time", 0
+            )
             examples = result.get("total_examples", 0)
             f.write(

local_deep_research/benchmarks/cli/benchmark_commands.py CHANGED Viewed

@@ -38,7 +38,10 @@ def setup_benchmark_parser(subparsers):
         help="Number of search iterations (default: 3)",
     )
     benchmark_parent.add_argument(
-        "--questions", type=int, default=3, help="Questions per iteration (default: 3)"
+        "--questions",
+        type=int,
+        default=3,
+        help="Questions per iteration (default: 3)",
     )
     benchmark_parent.add_argument(
         "--search-tool",
@@ -75,15 +78,21 @@ def setup_benchmark_parser(subparsers):
         "--search-model", type=str, help="Model to use for the search system"
     )
     benchmark_parent.add_argument(
-        "--search-provider", type=str, help="Provider to use for the search system"
+        "--search-provider",
+        type=str,
+        help="Provider to use for the search system",
     )
     benchmark_parent.add_argument(
-        "--endpoint-url", type=str, help="Endpoint URL for OpenRouter or other API services"
+        "--endpoint-url",
+        type=str,
+        help="Endpoint URL for OpenRouter or other API services",
     )
     benchmark_parent.add_argument(
-        "--search-strategy", type=str, default="source_based",
+        "--search-strategy",
+        type=str,
+        default="source_based",
         choices=["source_based", "standard", "rapid", "parallel", "iterdrag"],
-        help="Search strategy to use (default: source_based)"
+        help="Search strategy to use (default: source_based)",
     )
     # SimpleQA benchmark command
@@ -94,12 +103,16 @@ def setup_benchmark_parser(subparsers):
     # BrowseComp benchmark command
     browsecomp_parser = subparsers.add_parser(
-        "browsecomp", parents=[benchmark_parent], help="Run BrowseComp benchmark"
+        "browsecomp",
+        parents=[benchmark_parent],
+        help="Run BrowseComp benchmark",
     )
     browsecomp_parser.set_defaults(func=run_browsecomp_cli)
     # List available benchmarks command
-    list_parser = subparsers.add_parser("list", help="List available benchmarks")
+    list_parser = subparsers.add_parser(
+        "list", help="List available benchmarks"
+    )
     list_parser.set_defaults(func=list_benchmarks_cli)
     # Compare configurations command
@@ -304,11 +317,14 @@ def main():
     Main entry point for benchmark CLI.
     """
     parser = argparse.ArgumentParser(
-        description="Local Deep Research Benchmarking Tool", prog="ldr-benchmark"
+        description="Local Deep Research Benchmarking Tool",
+        prog="ldr-benchmark",
     )
     # Set up logging
-    parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+    parser.add_argument(
+        "--verbose", action="store_true", help="Enable verbose logging"
+    )
     # Create subparsers
     subparsers = parser.add_subparsers(
@@ -324,7 +340,8 @@ def main():
     # Set up logging
     log_level = logging.DEBUG if args.verbose else logging.INFO
     logging.basicConfig(
-        level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        level=log_level,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     )
     # Run command

local_deep_research/benchmarks/cli.py CHANGED Viewed

@@ -10,7 +10,6 @@ import logging
 import os
 import sys
 from datetime import datetime
-from typing import Any, Dict, List, Optional
 from .comparison import compare_configurations
 from .efficiency import ResourceMonitor, SpeedProfiler
@@ -18,7 +17,8 @@ from .optimization import optimize_parameters
 # Configure logging
 logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 )
 logger = logging.getLogger(__name__)
@@ -44,7 +44,9 @@ Examples:
     subparsers = parser.add_subparsers(dest="command", help="Command to run")
     # Optimizer parser
-    optimize_parser = subparsers.add_parser("optimize", help="Optimize parameters")
+    optimize_parser = subparsers.add_parser(
+        "optimize", help="Optimize parameters"
+    )
     optimize_parser.add_argument("query", help="Research query to optimize for")
     optimize_parser.add_argument(
         "--output-dir",
@@ -67,21 +69,32 @@ Examples:
         "--timeout", type=int, help="Maximum seconds to run optimization"
     )
     optimize_parser.add_argument(
-        "--n-jobs", type=int, default=1, help="Number of parallel jobs for optimization"
+        "--n-jobs",
+        type=int,
+        default=1,
+        help="Number of parallel jobs for optimization",
+    )
+    optimize_parser.add_argument(
+        "--study-name", help="Name of the Optuna study"
     )
-    optimize_parser.add_argument("--study-name", help="Name of the Optuna study")
     optimize_parser.add_argument(
         "--speed-focus", action="store_true", help="Focus optimization on speed"
     )
     optimize_parser.add_argument(
-        "--quality-focus", action="store_true", help="Focus optimization on quality"
+        "--quality-focus",
+        action="store_true",
+        help="Focus optimization on quality",
     )
     # Comparison parser
-    compare_parser = subparsers.add_parser("compare", help="Compare configurations")
+    compare_parser = subparsers.add_parser(
+        "compare", help="Compare configurations"
+    )
     compare_parser.add_argument("query", help="Research query to compare with")
     compare_parser.add_argument(
-        "--configs", required=True, help="JSON file with configurations to compare"
+        "--configs",
+        required=True,
+        help="JSON file with configurations to compare",
     )
     compare_parser.add_argument(
         "--output-dir",
@@ -99,7 +112,9 @@ Examples:
     )
     # Profiling parser
-    profile_parser = subparsers.add_parser("profile", help="Profile resource usage")
+    profile_parser = subparsers.add_parser(
+        "profile", help="Profile resource usage"
+    )
     profile_parser.add_argument("query", help="Research query to profile")
     profile_parser.add_argument(
         "--output-dir",
@@ -203,7 +218,9 @@ def run_comparison(args):
     for i, result in enumerate(
         [r for r in results["results"] if r.get("success", False)]
     ):
-        print(f"{i+1}. {result['name']}: {result.get('overall_score', 0):.4f}")
+        print(
+            f"{i + 1}. {result['name']}: {result.get('overall_score', 0):.4f}"
+        )
     print(f"\nResults saved to: {results.get('report_path', args.output_dir)}")
@@ -275,12 +292,18 @@ def run_profiling(args):
             if name != "total_duration" and name.endswith("_duration"):
                 component = name.replace("_duration", "")
                 duration = value
-                percent = (duration / total_duration * 100) if total_duration > 0 else 0
+                percent = (
+                    (duration / total_duration * 100)
+                    if total_duration > 0
+                    else 0
+                )
                 print(f"- {component}: {duration:.2f}s ({percent:.1f}%)")
         # Resource summary
         print("\nResource Usage Summary:")
-        print(f"Peak memory: {resource_results.get('process_memory_max_mb', 0):.1f} MB")
+        print(
+            f"Peak memory: {resource_results.get('process_memory_max_mb', 0):.1f} MB"
+        )
         print(
             f"Average memory: {resource_results.get('process_memory_avg_mb', 0):.1f} MB"
         )
@@ -308,7 +331,9 @@ def run_profiling(args):
                     "timing_results": timing_results,
                     "resource_results": resource_results,
                     "findings_count": len(results.get("findings", [])),
-                    "knowledge_length": len(results.get("current_knowledge", "")),
+                    "knowledge_length": len(
+                        results.get("current_knowledge", "")
+                    ),
                     "timestamp": timestamp,
                 },
                 f,

local_deep_research/benchmarks/comparison/__init__.py CHANGED Viewed

@@ -5,8 +5,10 @@ This module provides tools for comparing the performance of different
 parameters, models, and search engines.
 """
-from local_deep_research.benchmarks.comparison.evaluator import compare_configurations
+from local_deep_research.benchmarks.comparison.evaluator import (
+    compare_configurations,
+)
 __all__ = [
-    'compare_configurations',
+    "compare_configurations",
 ]

local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

local-deep-research 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl