PyPI - local-deep-research - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

local-deep-research 0.4.4py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

local_deep_research/benchmarks/datasets/simpleqa.py CHANGED Viewed

@@ -14,11 +14,11 @@ logger = logging.getLogger(__name__)
 class SimpleQADataset(BenchmarkDataset):
     """SimpleQA benchmark dataset.
     This class handles loading and processing the SimpleQA dataset, which
     contains straightforward question-answering pairs.
     """
     @classmethod
     def get_dataset_info(cls) -> Dict[str, str]:
         """Get basic information about the dataset."""
@@ -28,47 +28,47 @@ class SimpleQADataset(BenchmarkDataset):
             "description": "Simple question-answering evaluation dataset",
             "url": cls.get_default_dataset_path(),
         }
     @classmethod
     def get_default_dataset_path(cls) -> str:
         """Get the default URL for the dataset."""
         return "https://openaipublic.blob.core.windows.net/simple-evals/simple_qa_test_set.csv"
     def process_example(self, example: Dict[str, Any]) -> Dict[str, Any]:
         """Process a single example from the dataset.
         SimpleQA examples are already in plaintext format, so this just
         ensures that the necessary fields are present.
         Args:
             example: Raw example from the dataset.
         Returns:
             Processed example ready for use.
         """
         # Make a copy to avoid modifying the original
         processed = dict(example)
         # Ensure problem field exists
         if "problem" not in processed:
             logger.warning("SimpleQA example missing 'problem' field")
             processed["problem"] = ""
         # Ensure answer field exists
         if "answer" not in processed:
             logger.warning("SimpleQA example missing 'answer' field")
             processed["answer"] = ""
         # Add correct_answer field if not present
         if "correct_answer" not in processed:
             processed["correct_answer"] = processed["answer"]
         return processed
     def get_question(self, example: Dict[str, Any]) -> str:
         """Extract the question from an example."""
         return example.get("problem", "")
     def get_answer(self, example: Dict[str, Any]) -> str:
         """Extract the answer from an example."""
-        return example.get("answer", "")
+        return example.get("answer", "")

local_deep_research/benchmarks/datasets/utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ decryption, encoding detection, etc.
 import base64
 import hashlib
 import logging
-from typing import Dict, Any
+from typing import Dict
 logger = logging.getLogger(__name__)
@@ -29,27 +29,32 @@ def decrypt(ciphertext_b64: str, password: str) -> str:
     # Skip decryption for non-encoded strings
     if not isinstance(ciphertext_b64, str) or len(ciphertext_b64) < 8:
         return ciphertext_b64
     # Skip if the string doesn't look like base64
-    if not all(c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=' for c in ciphertext_b64):
+    if not all(
+        c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
+        for c in ciphertext_b64
+    ):
         return ciphertext_b64
     # Attempt standard decryption
     try:
         encrypted = base64.b64decode(ciphertext_b64)
         key = derive_key(password, len(encrypted))
         decrypted = bytes(a ^ b for a, b in zip(encrypted, key))
         # Check if the result looks like valid text
-        result = decrypted.decode('utf-8', errors='replace')
+        result = decrypted.decode("utf-8", errors="replace")
         # Heuristic check - if the decrypted text is mostly ASCII and contains spaces
-        if all(32 <= ord(c) < 127 for c in result[:50]) and ' ' in result[:50]:
-            logger.debug(f"Successfully decrypted with standard method: {result[:50]}...")
+        if all(32 <= ord(c) < 127 for c in result[:50]) and " " in result[:50]:
+            logger.debug(
+                f"Successfully decrypted with standard method: {result[:50]}..."
+            )
             return result
     except Exception as e:
         logger.debug(f"Standard decryption failed: {str(e)}")
     # Alternative method - try using just the first part of the password
     try:
         if len(password) > 30:
@@ -57,14 +62,19 @@ def decrypt(ciphertext_b64: str, password: str) -> str:
             encrypted = base64.b64decode(ciphertext_b64)
             key = derive_key(alt_password, len(encrypted))
             decrypted = bytes(a ^ b for a, b in zip(encrypted, key))
-            result = decrypted.decode('utf-8', errors='replace')
-            if all(32 <= ord(c) < 127 for c in result[:50]) and ' ' in result[:50]:
-                logger.debug(f"Successfully decrypted with alternate method 1: {result[:50]}...")
+            result = decrypted.decode("utf-8", errors="replace")
+            if (
+                all(32 <= ord(c) < 127 for c in result[:50])
+                and " " in result[:50]
+            ):
+                logger.debug(
+                    f"Successfully decrypted with alternate method 1: {result[:50]}..."
+                )
                 return result
     except Exception:
         pass
     # Alternative method 2 - try using the GUID part
     try:
         if "GUID" in password:
@@ -72,40 +82,49 @@ def decrypt(ciphertext_b64: str, password: str) -> str:
             encrypted = base64.b64decode(ciphertext_b64)
             key = derive_key(guid_part, len(encrypted))
             decrypted = bytes(a ^ b for a, b in zip(encrypted, key))
-            result = decrypted.decode('utf-8', errors='replace')
-            if all(32 <= ord(c) < 127 for c in result[:50]) and ' ' in result[:50]:
-                logger.debug(f"Successfully decrypted with GUID method: {result[:50]}...")
+            result = decrypted.decode("utf-8", errors="replace")
+            if (
+                all(32 <= ord(c) < 127 for c in result[:50])
+                and " " in result[:50]
+            ):
+                logger.debug(
+                    f"Successfully decrypted with GUID method: {result[:50]}..."
+                )
                 return result
     except Exception:
         pass
     # Alternative method 3 - hardcoded key for BrowseComp
     try:
         hardcoded_key = "MHGGF2022!"  # Known key for BrowseComp dataset
         encrypted = base64.b64decode(ciphertext_b64)
         key = derive_key(hardcoded_key, len(encrypted))
         decrypted = bytes(a ^ b for a, b in zip(encrypted, key))
-        result = decrypted.decode('utf-8', errors='replace')
-        if all(32 <= ord(c) < 127 for c in result[:50]) and ' ' in result[:50]:
-            logger.debug(f"Successfully decrypted with hardcoded key: {result[:50]}...")
+        result = decrypted.decode("utf-8", errors="replace")
+        if all(32 <= ord(c) < 127 for c in result[:50]) and " " in result[:50]:
+            logger.debug(
+                f"Successfully decrypted with hardcoded key: {result[:50]}..."
+            )
             return result
     except Exception:
         pass
     # If all attempts fail, return the original
-    logger.debug(f"All decryption attempts failed for: {ciphertext_b64[:20]}...")
+    logger.debug(
+        f"All decryption attempts failed for: {ciphertext_b64[:20]}..."
+    )
     return ciphertext_b64
 def get_known_answer_map() -> Dict[str, str]:
     """Get a mapping of known encrypted answers to their decrypted values.
     This function maintains a catalog of known encrypted strings that
     couldn't be automatically decrypted, along with their verified
     plaintext values.
     Returns:
         Dictionary mapping encrypted strings to their plaintext values.
     """
@@ -113,4 +132,4 @@ def get_known_answer_map() -> Dict[str, str]:
         "dFoTn+K+bcdyWg==": "Tooth Rock",
         "ERFIwA==": "1945",
         # Add more mappings as they are discovered during benchmark runs
-    }
+    }

local_deep_research/benchmarks/datasets.py CHANGED Viewed

@@ -14,18 +14,11 @@ Notes on BrowseComp dataset:
   3. Use a manual mapping for specific encrypted strings that have been verified
 """
-import logging
-from typing import Any, Dict, List, Optional
-from .datasets import DatasetRegistry, load_dataset
-logger = logging.getLogger(__name__)
-# Re-export the default dataset URLs
-from .datasets import DEFAULT_DATASET_URLS
+from .datasets import load_dataset
 # Re-export the get_available_datasets function
-from .datasets import get_available_datasets
+# Re-export the default dataset URLs
+from .datasets import DEFAULT_DATASET_URLS, get_available_datasets
 # Re-export the load_dataset function
-__all__ = ['DEFAULT_DATASET_URLS', 'get_available_datasets', 'load_dataset']
+__all__ = ["DEFAULT_DATASET_URLS", "get_available_datasets", "load_dataset"]

local_deep_research/benchmarks/efficiency/__init__.py CHANGED Viewed

@@ -5,10 +5,14 @@ This module provides tools for measuring and optimizing execution speed
 and resource usage of the research system.
 """
-from local_deep_research.benchmarks.efficiency.speed_profiler import SpeedProfiler
-from local_deep_research.benchmarks.efficiency.resource_monitor import ResourceMonitor
+from local_deep_research.benchmarks.efficiency.resource_monitor import (
+    ResourceMonitor,
+)
+from local_deep_research.benchmarks.efficiency.speed_profiler import (
+    SpeedProfiler,
+)
 __all__ = [
-    'SpeedProfiler',
-    'ResourceMonitor',
+    "SpeedProfiler",
+    "ResourceMonitor",
 ]

local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

local-deep-research 0.4.4py3-none-any.whl → 0.5.2py3-none-any.whl