PyPI - local-deep-research - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

local-deep-research 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

local_deep_research/benchmarks/metrics/visualization.py CHANGED Viewed

@@ -6,8 +6,7 @@ of benchmark and optimization results.
 """
 import logging
-import os
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional
 import numpy as np
@@ -21,7 +20,9 @@ try:
     MATPLOTLIB_AVAILABLE = True
 except ImportError:
     MATPLOTLIB_AVAILABLE = False
-    logger.warning("Matplotlib not available. Visualization functions will be limited.")
+    logger.warning(
+        "Matplotlib not available. Visualization functions will be limited."
+    )
 def plot_optimization_history(
@@ -98,13 +99,13 @@ def plot_parameter_importance(
     fig, ax = plt.subplots(figsize=(10, 6))
     y_pos = range(len(sorted_names))
     # Create horizontal bar chart
     ax.barh(y_pos, sorted_values, align="center")
     ax.set_yticks(y_pos)
     ax.set_yticklabels(sorted_names)
     ax.invert_yaxis()  # Labels read top-to-bottom
     # Add labels and title
     ax.set_xlabel("Importance")
     ax.set_title(title)
@@ -144,31 +145,43 @@ def plot_quality_vs_speed(
         return None
     fig, ax = plt.subplots(figsize=(10, 8))
     # Create scatter plot
     scatter = ax.scatter(
-        speed_scores,
-        quality_scores,
-        c=np.arange(len(quality_scores)),
-        cmap="viridis",
+        speed_scores,
+        quality_scores,
+        c=np.arange(len(quality_scores)),
+        cmap="viridis",
         alpha=0.7,
-        s=100
+        s=100,
     )
     # Add colorbar to show trial number
     cbar = plt.colorbar(scatter)
     cbar.set_label("Trial Number")
     # Add labels and title
     ax.set_xlabel("Speed Score (higher = faster)")
     ax.set_ylabel("Quality Score (higher = better)")
     ax.set_title(title)
     ax.grid(True, linestyle="--", alpha=0.5)
     # Add reference lines
-    ax.axhline(y=0.7, color="r", linestyle="--", alpha=0.3, label="Good Quality Threshold")
-    ax.axvline(x=0.7, color="g", linestyle="--", alpha=0.3, label="Good Speed Threshold")
+    ax.axhline(
+        y=0.7,
+        color="r",
+        linestyle="--",
+        alpha=0.3,
+        label="Good Quality Threshold",
+    )
+    ax.axvline(
+        x=0.7,
+        color="g",
+        linestyle="--",
+        alpha=0.3,
+        label="Good Speed Threshold",
+    )
     # Mark Pareto frontier
     if len(quality_scores) > 2:
         try:
@@ -178,13 +191,19 @@ def plot_quality_vs_speed(
                 is_pareto = True
                 for j in range(len(quality_scores)):
                     if i != j:
-                        if quality_scores[j] >= quality_scores[i] and speed_scores[j] >= speed_scores[i]:
-                            if quality_scores[j] > quality_scores[i] or speed_scores[j] > speed_scores[i]:
+                        if (
+                            quality_scores[j] >= quality_scores[i]
+                            and speed_scores[j] >= speed_scores[i]
+                        ):
+                            if (
+                                quality_scores[j] > quality_scores[i]
+                                or speed_scores[j] > speed_scores[i]
+                            ):
                                 is_pareto = False
                                 break
                 if is_pareto:
                     pareto_points.append((speed_scores[i], quality_scores[i]))
             # Sort pareto points by speed score
             pareto_points.sort()
             if pareto_points:
@@ -193,13 +212,13 @@ def plot_quality_vs_speed(
                 ax.scatter(pareto_x, pareto_y, c="red", s=50, alpha=0.8)
         except Exception as e:
             logger.warning(f"Error calculating Pareto frontier: {e}")
     ax.legend()
     # Save or return
     if output_file:
         fig.tight_layout()
         fig.savefig(output_file, dpi=300, bbox_inches="tight")
         logger.info(f"Saved quality vs. speed plot to {output_file}")
-    return fig
+    return fig

local_deep_research/benchmarks/metrics.py CHANGED Viewed

@@ -8,4 +8,4 @@ New code should use the metrics package directly.
 from .metrics.calculation import calculate_metrics
 from .metrics.reporting import generate_report
-__all__ = ["calculate_metrics", "generate_report"]
+__all__ = ["calculate_metrics", "generate_report"]

local_deep_research/benchmarks/optimization/__init__.py CHANGED Viewed

@@ -17,7 +17,9 @@ from local_deep_research.benchmarks.optimization.metrics import (
     calculate_resource_metrics,
     calculate_speed_metrics,
 )
-from local_deep_research.benchmarks.optimization.optuna_optimizer import OptunaOptimizer
+from local_deep_research.benchmarks.optimization.optuna_optimizer import (
+    OptunaOptimizer,
+)
 __all__ = [
     "OptunaOptimizer",

local_deep_research/benchmarks/optimization/api.py CHANGED Viewed

@@ -257,7 +257,13 @@ def get_default_param_space() -> Dict[str, Any]:
         },
         "search_strategy": {
             "type": "categorical",
-            "choices": ["iterdrag", "standard", "rapid", "parallel", "source_based"],
+            "choices": [
+                "iterdrag",
+                "standard",
+                "rapid",
+                "parallel",
+                "source_based",
+            ],
         },
         "max_results": {
             "type": "int",

local_deep_research/benchmarks/optimization/optuna_optimizer.py CHANGED Viewed

@@ -24,8 +24,12 @@ from optuna.visualization import (
     plot_slice,
 )
-from local_deep_research.benchmarks.efficiency.speed_profiler import SpeedProfiler
-from local_deep_research.benchmarks.evaluators import CompositeBenchmarkEvaluator
+from local_deep_research.benchmarks.efficiency.speed_profiler import (
+    SpeedProfiler,
+)
+from local_deep_research.benchmarks.evaluators import (
+    CompositeBenchmarkEvaluator,
+)
 # Import benchmark evaluator components
@@ -108,7 +112,9 @@ class OptunaOptimizer:
         # Initialize benchmark evaluator with weights
         self.benchmark_weights = benchmark_weights or {"simpleqa": 1.0}
-        self.benchmark_evaluator = CompositeBenchmarkEvaluator(self.benchmark_weights)
+        self.benchmark_evaluator = CompositeBenchmarkEvaluator(
+            self.benchmark_weights
+        )
         # Normalize weights to sum to 1.0
         total_weight = sum(self.metric_weights.values())
@@ -200,7 +206,9 @@ class OptunaOptimizer:
             # Create visualizations
             self._create_visualizations()
-            logger.info(f"Optimization complete. Best parameters: {self.best_params}")
+            logger.info(
+                f"Optimization complete. Best parameters: {self.best_params}"
+            )
             logger.info(f"Best value: {self.study.best_value}")
             # Report completion
@@ -281,7 +289,9 @@ class OptunaOptimizer:
             },
         }
-    def _objective(self, trial: optuna.Trial, param_space: Dict[str, Any]) -> float:
+    def _objective(
+        self, trial: optuna.Trial, param_space: Dict[str, Any]
+    ) -> float:
         """
         Objective function for Optuna optimization.
@@ -496,7 +506,9 @@ class OptunaOptimizer:
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         # Save trial history
-        history_file = os.path.join(self.output_dir, f"{self.study_name}_history.json")
+        history_file = os.path.join(
+            self.output_dir, f"{self.study_name}_history.json"
+        )
         with open(history_file, "w") as f:
             # Convert numpy values to native Python types for JSON serialization
             clean_history = []
@@ -517,7 +529,11 @@ class OptunaOptimizer:
             json.dump(clean_history, f, indent=2)
         # Save current best parameters
-        if self.study and hasattr(self.study, "best_params") and self.study.best_params:
+        if (
+            self.study
+            and hasattr(self.study, "best_params")
+            and self.study.best_params
+        ):
             best_params_file = os.path.join(
                 self.output_dir, f"{self.study_name}_best_params.json"
             )
@@ -541,7 +557,9 @@ class OptunaOptimizer:
         # Save the Optuna study
         if self.study:
-            study_file = os.path.join(self.output_dir, f"{self.study_name}_study.pkl")
+            study_file = os.path.join(
+                self.output_dir, f"{self.study_name}_study.pkl"
+            )
             joblib.dump(self.study, study_file)
         logger.info(f"Results saved to {self.output_dir}")
@@ -549,7 +567,9 @@ class OptunaOptimizer:
     def _create_visualizations(self):
         """Create and save comprehensive visualizations of the optimization results."""
         if not PLOTTING_AVAILABLE:
-            logger.warning("Matplotlib not available, skipping visualization creation")
+            logger.warning(
+                "Matplotlib not available, skipping visualization creation"
+            )
             return
         if not self.study or len(self.study.trials) < 2:
@@ -570,7 +590,11 @@ class OptunaOptimizer:
     def _create_quick_visualizations(self):
         """Create a smaller set of visualizations for intermediate progress."""
-        if not PLOTTING_AVAILABLE or not self.study or len(self.study.trials) < 2:
+        if (
+            not PLOTTING_AVAILABLE
+            or not self.study
+            or len(self.study.trials) < 2
+        ):
             return
         # Create directory for visualizations
@@ -582,7 +606,8 @@ class OptunaOptimizer:
             fig = plot_optimization_history(self.study)
             fig.write_image(
                 os.path.join(
-                    viz_dir, f"{self.study_name}_optimization_history_current.png"
+                    viz_dir,
+                    f"{self.study_name}_optimization_history_current.png",
                 )
             )
         except Exception as e:
@@ -602,7 +627,8 @@ class OptunaOptimizer:
             fig = plot_optimization_history(self.study)
             fig.write_image(
                 os.path.join(
-                    viz_dir, f"{self.study_name}_optimization_history_{timestamp}.png"
+                    viz_dir,
+                    f"{self.study_name}_optimization_history_{timestamp}.png",
                 )
             )
         except Exception as e:
@@ -613,7 +639,8 @@ class OptunaOptimizer:
             fig = plot_param_importances(self.study)
             fig.write_image(
                 os.path.join(
-                    viz_dir, f"{self.study_name}_param_importances_{timestamp}.png"
+                    viz_dir,
+                    f"{self.study_name}_param_importances_{timestamp}.png",
                 )
             )
         except Exception as e:
@@ -625,7 +652,8 @@ class OptunaOptimizer:
                 fig = plot_slice(self.study, [param_name])
                 fig.write_image(
                     os.path.join(
-                        viz_dir, f"{self.study_name}_slice_{param_name}_{timestamp}.png"
+                        viz_dir,
+                        f"{self.study_name}_slice_{param_name}_{timestamp}.png",
                     )
                 )
         except Exception as e:
@@ -684,7 +712,9 @@ class OptunaOptimizer:
         # Extract data from successful trials
         successful_trials = [
-            t for t in self.trials_history if t.get("result", {}).get("success", False)
+            t
+            for t in self.trials_history
+            if t.get("result", {}).get("success", False)
         ]
         if not successful_trials:
@@ -715,7 +745,9 @@ class OptunaOptimizer:
                 questions_values.append(questions)
             # Create scatter plot with size based on iterations*questions
-            sizes = [i * q * 5 for i, q in zip(iterations_values, questions_values)]
+            sizes = [
+                i * q * 5 for i, q in zip(iterations_values, questions_values)
+            ]
             scatter = plt.scatter(
                 quality_scores,
                 speed_scores,
@@ -727,12 +759,15 @@ class OptunaOptimizer:
             # Highlight best trial
             best_trial = max(
-                successful_trials, key=lambda x: x.get("result", {}).get("score", 0)
+                successful_trials,
+                key=lambda x: x.get("result", {}).get("score", 0),
             )
             best_quality = best_trial["result"].get("quality_score", 0)
             best_speed = best_trial["result"].get("speed_score", 0)
             best_iter = best_trial["params"].get("iterations", 0)
-            best_questions = best_trial["params"].get("questions_per_iteration", 0)
+            best_questions = best_trial["params"].get(
+                "questions_per_iteration", 0
+            )
             plt.scatter(
                 [best_quality],
@@ -745,7 +780,9 @@ class OptunaOptimizer:
             )
             # Add annotations for key points
-            for i, (q, s, l) in enumerate(zip(quality_scores, speed_scores, labels)):
+            for i, (q, s, label) in enumerate(
+                zip(quality_scores, speed_scores, labels)
+            ):
                 if i % max(1, len(quality_scores) // 5) == 0:  # Label ~5 points
                     plt.annotate(
                         f"{iterations_values[i]}×{questions_values[i]}",
@@ -762,7 +799,9 @@ class OptunaOptimizer:
             weights_str = ", ".join(
                 [f"{k}:{v:.1f}" for k, v in self.benchmark_weights.items()]
             )
-            plt.title(f"Quality vs. Speed Trade-off\nBenchmark Weights: {weights_str}")
+            plt.title(
+                f"Quality vs. Speed Trade-off\nBenchmark Weights: {weights_str}"
+            )
             plt.xlabel("Quality Score (Benchmark Accuracy)")
             plt.ylabel("Speed Score")
             plt.grid(True, linestyle="--", alpha=0.7)
@@ -786,7 +825,8 @@ class OptunaOptimizer:
             plt.tight_layout()
             plt.savefig(
                 os.path.join(
-                    viz_dir, f"{self.study_name}_quality_vs_speed_{timestamp}.png"
+                    viz_dir,
+                    f"{self.study_name}_quality_vs_speed_{timestamp}.png",
                 )
             )
             plt.close()
@@ -895,7 +935,9 @@ class OptunaOptimizer:
                 duration = trial.get("duration", 0)
                 score = trial.get("score", 0)
                 iterations = trial.get("params", {}).get("iterations", 1)
-                questions = trial.get("params", {}).get("questions_per_iteration", 1)
+                questions = trial.get("params", {}).get(
+                    "questions_per_iteration", 1
+                )
                 trial_durations.append(duration)
                 trial_scores.append(score)
@@ -903,13 +945,17 @@ class OptunaOptimizer:
                 trial_questions.append(questions)
             # Total questions per trial
-            total_questions = [i * q for i, q in zip(trial_iterations, trial_questions)]
+            total_questions = [
+                i * q for i, q in zip(trial_iterations, trial_questions)
+            ]
             # Create scatter plot with size based on total questions
             plt.scatter(
                 trial_durations,
                 trial_scores,
-                s=[q * 5 for q in total_questions],  # Size based on total questions
+                s=[
+                    q * 5 for q in total_questions
+                ],  # Size based on total questions
                 alpha=0.7,
                 c=range(len(trial_durations)),
                 cmap="viridis",
@@ -923,7 +969,9 @@ class OptunaOptimizer:
             # Add trial number annotations for selected points
             for i, (d, s) in enumerate(zip(trial_durations, trial_scores)):
-                if i % max(1, len(trial_durations) // 5) == 0:  # Annotate ~5 points
+                if (
+                    i % max(1, len(trial_durations) // 5) == 0
+                ):  # Annotate ~5 points
                     plt.annotate(
                         f"{trial_iterations[i]}×{trial_questions[i]}",
                         (d, s),
@@ -935,7 +983,8 @@ class OptunaOptimizer:
             plt.tight_layout()
             plt.savefig(
                 os.path.join(
-                    viz_dir, f"{self.study_name}_duration_vs_score_{timestamp}.png"
+                    viz_dir,
+                    f"{self.study_name}_duration_vs_score_{timestamp}.png",
                 )
             )
             plt.close()

local_deep_research/benchmarks/runners.py CHANGED Viewed

@@ -92,10 +92,14 @@ def run_benchmark(
         # Load the examples
         dataset = dataset_instance.load()
-        logger.info(f"Loaded {len(dataset)} examples using dataset class {type(dataset_instance).__name__}")
+        logger.info(
+            f"Loaded {len(dataset)} examples using dataset class {type(dataset_instance).__name__}"
+        )
     except Exception as e:
         # Fallback to legacy function if there's any issue
-        logger.warning(f"Error using dataset class: {e}. Falling back to legacy function.")
+        logger.warning(
+            f"Error using dataset class: {e}. Falling back to legacy function."
+        )
         dataset = load_dataset(
             dataset_type=dataset_type,
             dataset_path=dataset_path,
@@ -105,11 +109,15 @@ def run_benchmark(
     # Set up output files
     timestamp = time.strftime("%Y%m%d_%H%M%S")
-    results_file = os.path.join(output_dir, f"{dataset_type}_{timestamp}_results.jsonl")
+    results_file = os.path.join(
+        output_dir, f"{dataset_type}_{timestamp}_results.jsonl"
+    )
     evaluation_file = os.path.join(
         output_dir, f"{dataset_type}_{timestamp}_evaluation.jsonl"
     )
-    report_file = os.path.join(output_dir, f"{dataset_type}_{timestamp}_report.md")
+    report_file = os.path.join(
+        output_dir, f"{dataset_type}_{timestamp}_report.md"
+    )
     # Make sure output files don't exist
     for file in [results_file, evaluation_file, report_file]:
@@ -135,11 +143,16 @@ def run_benchmark(
     for i, example in enumerate(dataset):
         # Extract question and answer in a way that uses the dataset class when available
-        if 'dataset_instance' in locals() and isinstance(dataset_instance, DatasetRegistry.get_dataset_class(dataset_type.lower())):
+        if "dataset_instance" in locals() and isinstance(
+            dataset_instance,
+            DatasetRegistry.get_dataset_class(dataset_type.lower()),
+        ):
             # Use the dataset class methods to extract question and answer
             question = dataset_instance.get_question(example)
             correct_answer = dataset_instance.get_answer(example)
-            logger.debug(f"Using dataset class methods to extract question and answer")
+            logger.debug(
+                "Using dataset class methods to extract question and answer"
+            )
         else:
             # Fallback to the legacy approach
             if dataset_type.lower() == "simpleqa":
@@ -163,7 +176,9 @@ def run_benchmark(
                     "current": i + 1,
                     "total": total_examples,
                     "question": (
-                        question[:50] + "..." if len(question) > 50 else question
+                        question[:50] + "..."
+                        if len(question) > 50
+                        else question
                     ),
                 },
             )
@@ -181,7 +196,9 @@ def run_benchmark(
             search_result = quick_summary(
                 query=formatted_query,
                 iterations=search_config.get("iterations", 3),
-                questions_per_iteration=search_config.get("questions_per_iteration", 3),
+                questions_per_iteration=search_config.get(
+                    "questions_per_iteration", 3
+                ),
                 search_tool=search_config.get("search_tool", "searxng"),
             )
@@ -278,7 +295,9 @@ def run_benchmark(
             logger.info("Running human evaluation...")
             evaluation_results = evaluate(
-                results_file=results_file, output_file=evaluation_file, interactive=True
+                results_file=results_file,
+                output_file=evaluation_file,
+                interactive=True,
             )
         else:
             logger.info("Running automated evaluation...")
@@ -349,7 +368,9 @@ def run_benchmark(
         # Generate report
         if progress_callback:
-            progress_callback("Generating report", 95, {"status": "generating_report"})
+            progress_callback(
+                "Generating report", 95, {"status": "generating_report"}
+            )
         dataset_name = dataset_type.capitalize()
         report_path = generate_report(
@@ -366,7 +387,9 @@ def run_benchmark(
                     "questions_per_iteration", 3
                 ),
                 "Search tool": search_config.get("search_tool", "searxng"),
-                "Evaluation method": "Human" if human_evaluation else "Automated",
+                "Evaluation method": "Human"
+                if human_evaluation
+                else "Automated",
             },
         )
@@ -375,7 +398,11 @@ def run_benchmark(
             progress_callback(
                 "Benchmark complete",
                 100,
-                {"status": "complete", "metrics": metrics, "report_path": report_path},
+                {
+                    "status": "complete",
+                    "metrics": metrics,
+                    "report_path": report_path,
+                },
             )
         return {
@@ -417,10 +444,14 @@ def run_simpleqa_benchmark(num_examples: int = 100, **kwargs) -> Dict[str, Any]:
     Returns:
         Dictionary with benchmark results
     """
-    return run_benchmark(dataset_type="simpleqa", num_examples=num_examples, **kwargs)
+    return run_benchmark(
+        dataset_type="simpleqa", num_examples=num_examples, **kwargs
+    )
-def run_browsecomp_benchmark(num_examples: int = 100, **kwargs) -> Dict[str, Any]:
+def run_browsecomp_benchmark(
+    num_examples: int = 100, **kwargs
+) -> Dict[str, Any]:
     """
     Run BrowseComp benchmark with default settings.
@@ -431,4 +462,6 @@ def run_browsecomp_benchmark(num_examples: int = 100, **kwargs) -> Dict[str, Any
     Returns:
         Dictionary with benchmark results
     """
-    return run_benchmark(dataset_type="browsecomp", num_examples=num_examples, **kwargs)
+    return run_benchmark(
+        dataset_type="browsecomp", num_examples=num_examples, **kwargs
+    )

local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

local-deep-research 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl