local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -5,12 +5,17 @@ Local Deep Research - A tool for conducting deep research using AI.
5
5
  __author__ = "Your Name"
6
6
  __description__ = "A tool for conducting deep research using AI"
7
7
 
8
+ from loguru import logger
9
+
8
10
  from .__version__ import __version__
9
11
  from .config.llm_config import get_llm
10
12
  from .config.search_config import get_search
11
13
  from .report_generator import get_report_generator
12
14
  from .web.app import main
13
15
 
16
+ # Disable logging by default to not interfere with user setup.
17
+ logger.disable("local_deep_research")
18
+
14
19
 
15
20
  def get_advanced_search_system(strategy_name: str = "iterdrag"):
16
21
  """
@@ -32,4 +37,6 @@ __all__ = [
32
37
  "get_search",
33
38
  "get_report_generator",
34
39
  "get_advanced_search_system",
40
+ "main",
41
+ "__version__",
35
42
  ]
@@ -1 +1 @@
1
- __version__ = "0.4.4"
1
+ __version__ = "0.5.0"
@@ -0,0 +1,5 @@
1
+ """Answer decoding module for BrowseComp."""
2
+
3
+ from .browsecomp_answer_decoder import BrowseCompAnswerDecoder
4
+
5
+ __all__ = ["BrowseCompAnswerDecoder"]
@@ -0,0 +1,421 @@
1
+ """
2
+ BrowseComp Answer Decoding Pipeline
3
+
4
+ This module handles encoded answers found in BrowseComp datasets.
5
+ Some BrowseComp answers appear to be encoded (e.g., "Y00Qh+ep") and need
6
+ decoding to extract the actual answer.
7
+
8
+ Based on BROWSECOMP_IMPROVEMENT_STRATEGY.md recommendations.
9
+ """
10
+
11
+ import base64
12
+ import logging
13
+ import re
14
+ import urllib.parse
15
+ from typing import Optional, Tuple
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class BrowseCompAnswerDecoder:
21
+ """
22
+ Handle encoded BrowseComp answers with multiple decoding schemes.
23
+
24
+ Features:
25
+ 1. Automatic encoding detection
26
+ 2. Multiple decoding scheme support
27
+ 3. Answer validation
28
+ 4. Fallback to original if decoding fails
29
+ """
30
+
31
+ def __init__(self):
32
+ self.encoding_schemes = [
33
+ "base64",
34
+ "hex",
35
+ "url_encoding",
36
+ "rot13",
37
+ "caesar_cipher",
38
+ ]
39
+
40
+ # Patterns that suggest encoded content
41
+ self.encoded_patterns = [
42
+ r"^[A-Za-z0-9+/]+=*$", # Base64 pattern
43
+ r"^[0-9A-Fa-f]+$", # Hex pattern
44
+ r"%[0-9A-Fa-f]{2}", # URL encoded
45
+ r"^[A-Za-z0-9]{8,}$", # Random string pattern
46
+ ]
47
+
48
+ def decode_answer(self, raw_answer: str) -> Tuple[str, Optional[str]]:
49
+ """
50
+ Attempt to decode a potentially encoded answer.
51
+
52
+ Args:
53
+ raw_answer: The raw answer string that may be encoded
54
+
55
+ Returns:
56
+ Tuple of (decoded_answer, encoding_scheme_used)
57
+ If no decoding works, returns (original_answer, None)
58
+ """
59
+ if not raw_answer or len(raw_answer.strip()) == 0:
60
+ return raw_answer, None
61
+
62
+ # Clean the input
63
+ clean_answer = raw_answer.strip()
64
+
65
+ # Check if answer looks like plaintext first
66
+ if self.is_likely_direct_answer(clean_answer):
67
+ logger.debug(f"Answer appears to be plaintext: {clean_answer}")
68
+ return clean_answer, None
69
+
70
+ logger.info(
71
+ f"Attempting to decode potentially encoded answer: {clean_answer}"
72
+ )
73
+
74
+ # Try each encoding scheme
75
+ for scheme in self.encoding_schemes:
76
+ try:
77
+ decoded = self.apply_decoding_scheme(clean_answer, scheme)
78
+ if decoded and self.validate_decoded_answer(decoded):
79
+ logger.info(
80
+ f"Successfully decoded using {scheme}: {clean_answer} -> {decoded}"
81
+ )
82
+ return decoded, scheme
83
+
84
+ except Exception as e:
85
+ logger.debug(f"Failed to decode with {scheme}: {e}")
86
+ continue
87
+
88
+ # No successful decoding
89
+ logger.warning(
90
+ f"Could not decode answer, returning original: {clean_answer}"
91
+ )
92
+ return clean_answer, None
93
+
94
+ def is_likely_direct_answer(self, answer: str) -> bool:
95
+ """
96
+ Check if answer looks like plaintext rather than encoded.
97
+
98
+ Args:
99
+ answer: The answer string to check
100
+
101
+ Returns:
102
+ True if answer appears to be plaintext
103
+ """
104
+ # Very short answers are likely plaintext
105
+ if len(answer) < 4:
106
+ return True
107
+
108
+ # Check for common English words
109
+ english_indicators = [
110
+ "the",
111
+ "and",
112
+ "or",
113
+ "of",
114
+ "in",
115
+ "to",
116
+ "a",
117
+ "an",
118
+ "company",
119
+ "group",
120
+ "inc",
121
+ "ltd",
122
+ "corp",
123
+ "corporation",
124
+ "person",
125
+ "people",
126
+ "event",
127
+ "year",
128
+ "years",
129
+ "million",
130
+ "billion",
131
+ "thousand",
132
+ ]
133
+
134
+ answer_lower = answer.lower()
135
+ if any(word in answer_lower for word in english_indicators):
136
+ return True
137
+
138
+ # Check for sentence-like structure
139
+ if " " in answer and len(answer.split()) > 1:
140
+ # Has spaces and multiple words - likely plaintext
141
+ return True
142
+
143
+ # Check if it matches common answer patterns
144
+ common_patterns = [
145
+ r"^\d{4}$", # Year
146
+ r"^\$?\d+\.?\d*[KMB]?$", # Number/money
147
+ r"^[A-Z][a-z]+ [A-Z][a-z]+$", # Name format
148
+ r"^\d+%$", # Percentage
149
+ ]
150
+
151
+ for pattern in common_patterns:
152
+ if re.match(pattern, answer):
153
+ return True
154
+
155
+ # Check character distribution - encoded text often has unusual distribution
156
+ char_diversity = (
157
+ len(set(answer)) / len(answer) if len(answer) > 0 else 0
158
+ )
159
+ if char_diversity < 0.3: # Low diversity suggests repetitive/encoded
160
+ return False
161
+
162
+ # If none of the encoded patterns match, probably plaintext
163
+ is_encoded = any(
164
+ re.search(pattern, answer) for pattern in self.encoded_patterns
165
+ )
166
+ return not is_encoded
167
+
168
+ def apply_decoding_scheme(self, text: str, scheme: str) -> Optional[str]:
169
+ """
170
+ Apply a specific decoding scheme to text.
171
+
172
+ Args:
173
+ text: Text to decode
174
+ scheme: Decoding scheme to use
175
+
176
+ Returns:
177
+ Decoded text or None if decoding fails
178
+ """
179
+ try:
180
+ if scheme == "base64":
181
+ return self._decode_base64(text)
182
+ elif scheme == "hex":
183
+ return self._decode_hex(text)
184
+ elif scheme == "url_encoding":
185
+ return self._decode_url(text)
186
+ elif scheme == "rot13":
187
+ return self._decode_rot13(text)
188
+ elif scheme == "caesar_cipher":
189
+ return self._decode_caesar(text)
190
+ else:
191
+ logger.warning(f"Unknown decoding scheme: {scheme}")
192
+ return None
193
+
194
+ except Exception as e:
195
+ logger.debug(f"Failed to apply {scheme} decoding: {e}")
196
+ return None
197
+
198
+ def _decode_base64(self, text: str) -> Optional[str]:
199
+ """Decode base64 encoded text."""
200
+ try:
201
+ # Add padding if needed
202
+ missing_padding = len(text) % 4
203
+ if missing_padding:
204
+ text += "=" * (4 - missing_padding)
205
+
206
+ decoded_bytes = base64.b64decode(text)
207
+ return decoded_bytes.decode("utf-8")
208
+
209
+ except Exception:
210
+ return None
211
+
212
+ def _decode_hex(self, text: str) -> Optional[str]:
213
+ """Decode hexadecimal encoded text."""
214
+ try:
215
+ # Remove any whitespace or non-hex characters
216
+ clean_hex = re.sub(r"[^0-9A-Fa-f]", "", text)
217
+
218
+ # Must have even length
219
+ if len(clean_hex) % 2 != 0:
220
+ return None
221
+
222
+ decoded_bytes = bytes.fromhex(clean_hex)
223
+ return decoded_bytes.decode("utf-8")
224
+
225
+ except Exception:
226
+ return None
227
+
228
+ def _decode_url(self, text: str) -> Optional[str]:
229
+ """Decode URL encoded text."""
230
+ try:
231
+ return urllib.parse.unquote(text)
232
+ except Exception:
233
+ return None
234
+
235
+ def _decode_rot13(self, text: str) -> Optional[str]:
236
+ """Decode ROT13 encoded text."""
237
+ try:
238
+ import codecs
239
+
240
+ return codecs.decode(text, "rot13")
241
+ except Exception:
242
+ return None
243
+
244
+ def _decode_caesar(self, text: str) -> Optional[str]:
245
+ """
246
+ Try different Caesar cipher shifts.
247
+ Returns the most English-like result.
248
+ """
249
+ best_result = None
250
+ best_score = 0
251
+
252
+ # Try shifts 1-25
253
+ for shift in range(1, 26):
254
+ try:
255
+ decoded = self._caesar_shift(text, shift)
256
+ score = self._english_score(decoded)
257
+
258
+ if score > best_score:
259
+ best_score = score
260
+ best_result = decoded
261
+
262
+ except Exception:
263
+ continue
264
+
265
+ # Only return if it looks reasonably English-like
266
+ return best_result if best_score > 0.3 else None
267
+
268
+ def _caesar_shift(self, text: str, shift: int) -> str:
269
+ """Apply Caesar cipher shift."""
270
+ result = []
271
+
272
+ for char in text:
273
+ if char.isalpha():
274
+ # Determine if uppercase or lowercase
275
+ start = ord("A") if char.isupper() else ord("a")
276
+ # Apply shift with wraparound
277
+ shifted = (ord(char) - start + shift) % 26 + start
278
+ result.append(chr(shifted))
279
+ else:
280
+ result.append(char)
281
+
282
+ return "".join(result)
283
+
284
+ def _english_score(self, text: str) -> float:
285
+ """
286
+ Score how English-like a text appears.
287
+ Simple heuristic based on common letters and words.
288
+ """
289
+ if not text:
290
+ return 0.0
291
+
292
+ text_lower = text.lower()
293
+
294
+ # Common English letter frequencies (approximate)
295
+ common_letters = "etaoinshrdlcumwfgypbvkjxqz"
296
+ letter_score = 0
297
+ letter_count = 0
298
+
299
+ for char in text_lower:
300
+ if char.isalpha():
301
+ letter_count += 1
302
+ # More common letters get higher scores
303
+ if char in common_letters[:10]: # Top 10 most common
304
+ letter_score += 2
305
+ elif char in common_letters[:20]: # Top 20
306
+ letter_score += 1
307
+
308
+ if letter_count == 0:
309
+ return 0.0
310
+
311
+ base_score = letter_score / letter_count
312
+
313
+ # Bonus for common English words
314
+ common_words = [
315
+ "the",
316
+ "and",
317
+ "of",
318
+ "to",
319
+ "a",
320
+ "in",
321
+ "is",
322
+ "it",
323
+ "you",
324
+ "that",
325
+ ]
326
+ word_bonus = sum(1 for word in common_words if word in text_lower)
327
+
328
+ return min(1.0, base_score + word_bonus * 0.1)
329
+
330
+ def validate_decoded_answer(self, decoded: str) -> bool:
331
+ """
332
+ Validate that decoded text looks like a reasonable answer.
333
+
334
+ Args:
335
+ decoded: The decoded text to validate
336
+
337
+ Returns:
338
+ True if decoded text appears valid
339
+ """
340
+ if not decoded or len(decoded.strip()) == 0:
341
+ return False
342
+
343
+ # Remove leading/trailing whitespace
344
+ decoded = decoded.strip()
345
+
346
+ # Check length - should be reasonable
347
+ if len(decoded) < 1 or len(decoded) > 1000:
348
+ return False
349
+
350
+ # Check for readable characters
351
+ printable_count = sum(1 for c in decoded if c.isprintable())
352
+ if printable_count / len(decoded) < 0.8: # At least 80% printable
353
+ return False
354
+
355
+ # Check for control characters (bad sign)
356
+ if any(ord(c) < 32 and c not in "\t\n\r" for c in decoded):
357
+ return False
358
+
359
+ # Check character distribution
360
+ char_types = {
361
+ "alpha": sum(1 for c in decoded if c.isalpha()),
362
+ "digit": sum(1 for c in decoded if c.isdigit()),
363
+ "space": sum(1 for c in decoded if c.isspace()),
364
+ "punct": sum(
365
+ 1 for c in decoded if not c.isalnum() and not c.isspace()
366
+ ),
367
+ }
368
+
369
+ total_chars = len(decoded)
370
+
371
+ # Should have some letters
372
+ if char_types["alpha"] / total_chars < 0.3:
373
+ return False
374
+
375
+ # Shouldn't be mostly punctuation
376
+ if char_types["punct"] / total_chars > 0.5:
377
+ return False
378
+
379
+ return True
380
+
381
+ def analyze_answer_encoding(self, answer: str) -> dict:
382
+ """
383
+ Analyze an answer to determine likely encoding type.
384
+
385
+ Returns analysis results for debugging/logging.
386
+ """
387
+ analysis = {
388
+ "original": answer,
389
+ "length": len(answer),
390
+ "likely_plaintext": self.is_likely_direct_answer(answer),
391
+ "pattern_matches": [],
392
+ "attempted_decodings": {},
393
+ }
394
+
395
+ # Check which patterns match
396
+ for i, pattern in enumerate(self.encoded_patterns):
397
+ if re.search(pattern, answer):
398
+ analysis["pattern_matches"].append(
399
+ {
400
+ "pattern": pattern,
401
+ "type": ["base64", "hex", "url", "random"][i],
402
+ }
403
+ )
404
+
405
+ # Try each decoding scheme
406
+ for scheme in self.encoding_schemes:
407
+ try:
408
+ decoded = self.apply_decoding_scheme(answer, scheme)
409
+ is_valid = (
410
+ self.validate_decoded_answer(decoded) if decoded else False
411
+ )
412
+
413
+ analysis["attempted_decodings"][scheme] = {
414
+ "decoded": decoded,
415
+ "valid": is_valid,
416
+ "length": len(decoded) if decoded else 0,
417
+ }
418
+ except Exception as e:
419
+ analysis["attempted_decodings"][scheme] = {"error": str(e)}
420
+
421
+ return analysis
@@ -0,0 +1,219 @@
1
+ # Candidate Exploration System
2
+
3
+ This module provides an inheritance-based candidate exploration system for discovering and collecting candidates in the Local Deep Research framework.
4
+
5
+ ## Architecture
6
+
7
+ The system is built around inheritance and provides multiple exploration strategies:
8
+
9
+ ### Base Class
10
+ - **`BaseCandidateExplorer`**: Abstract base class defining the exploration interface
11
+
12
+ ### Concrete Implementations
13
+ - **`ParallelExplorer`**: Runs multiple searches in parallel for speed and breadth
14
+ - **`AdaptiveExplorer`**: Learns which search strategies work best and adapts
15
+ - **`ConstraintGuidedExplorer`**: Uses constraints to guide the exploration process
16
+ - **`DiversityExplorer`**: Prioritizes finding diverse candidates across categories
17
+
18
+ ### Supporting Components
19
+ - **`ExplorationResult`**: Data class containing exploration results and metadata
20
+ - **`ExplorationStrategy`**: Enum defining different exploration approaches
21
+
22
+ ## Usage Examples
23
+
24
+ ### Using ParallelExplorer
25
+ ```python
26
+ from candidate_exploration import ParallelExplorer
27
+
28
+ explorer = ParallelExplorer(
29
+ model=llm,
30
+ search_engine=search,
31
+ max_workers=5, # Parallel search threads
32
+ queries_per_round=8, # Queries generated per round
33
+ max_rounds=3 # Maximum exploration rounds
34
+ )
35
+
36
+ result = explorer.explore(
37
+ initial_query="hiking locations",
38
+ constraints=constraints,
39
+ entity_type="location"
40
+ )
41
+ ```
42
+
43
+ ### Using AdaptiveExplorer
44
+ ```python
45
+ from candidate_exploration import AdaptiveExplorer
46
+
47
+ explorer = AdaptiveExplorer(
48
+ model=llm,
49
+ search_engine=search,
50
+ initial_strategies=["direct_search", "synonym_expansion", "category_exploration"],
51
+ adaptation_threshold=5 # Adapt after 5 searches
52
+ )
53
+
54
+ result = explorer.explore("scenic viewpoints", constraints, "viewpoint")
55
+ ```
56
+
57
+ ### Using ConstraintGuidedExplorer
58
+ ```python
59
+ from candidate_exploration import ConstraintGuidedExplorer
60
+
61
+ explorer = ConstraintGuidedExplorer(
62
+ model=llm,
63
+ search_engine=search,
64
+ constraint_weight_threshold=0.7, # Focus on high-weight constraints
65
+ early_validation=True # Validate during exploration
66
+ )
67
+
68
+ result = explorer.explore("mountain peaks", constraints, "mountain")
69
+ ```
70
+
71
+ ### Using DiversityExplorer
72
+ ```python
73
+ from candidate_exploration import DiversityExplorer
74
+
75
+ explorer = DiversityExplorer(
76
+ model=llm,
77
+ search_engine=search,
78
+ diversity_threshold=0.7, # Minimum diversity score
79
+ category_limit=10, # Max per category
80
+ similarity_threshold=0.8 # Similarity threshold
81
+ )
82
+
83
+ result = explorer.explore("natural landmarks", constraints, "landmark")
84
+ ```
85
+
86
+ ## Creating Custom Variants
87
+
88
+ To create your own exploration strategy:
89
+
90
+ 1. **Inherit from BaseCandidateExplorer**:
91
+ ```python
92
+ from .base_explorer import BaseCandidateExplorer, ExplorationResult
93
+
94
+ class MyCustomExplorer(BaseCandidateExplorer):
95
+ def __init__(self, *args, my_param=0.5, **kwargs):
96
+ super().__init__(*args, **kwargs)
97
+ self.my_param = my_param
98
+ ```
99
+
100
+ 2. **Implement required methods**:
101
+ ```python
102
+ def explore(self, initial_query, constraints=None, entity_type=None):
103
+ # Your exploration implementation
104
+ return ExplorationResult(...)
105
+
106
+ def generate_exploration_queries(self, base_query, found_candidates, constraints=None):
107
+ # Your query generation logic
108
+ return ["query1", "query2", "query3"]
109
+ ```
110
+
111
+ 3. **Add custom exploration logic**:
112
+ ```python
113
+ def _my_custom_search_strategy(self, query, context):
114
+ # Your custom search approach
115
+ pass
116
+ ```
117
+
118
+ ## Integration with Strategies
119
+
120
+ Use in your strategy by initializing the explorer:
121
+
122
+ ```python
123
+ class MyStrategy(BaseStrategy):
124
+ def __init__(self, *args, **kwargs):
125
+ super().__init__(*args, **kwargs)
126
+
127
+ # Choose your explorer
128
+ self.explorer = AdaptiveExplorer(
129
+ model=self.model,
130
+ search_engine=self.search,
131
+ max_candidates=50,
132
+ max_search_time=120.0
133
+ )
134
+
135
+ def find_candidates(self, query, constraints):
136
+ result = self.explorer.explore(
137
+ initial_query=query,
138
+ constraints=constraints,
139
+ entity_type=self._detect_entity_type(query)
140
+ )
141
+
142
+ return result.candidates
143
+ ```
144
+
145
+ ## Available Explorers
146
+
147
+ ### ParallelExplorer
148
+ - **Best for**: Fast, broad candidate discovery
149
+ - **Strategy**: Breadth-first parallel search
150
+ - **Parameters**: `max_workers`, `queries_per_round`, `max_rounds`
151
+ - **Output**: Many candidates found quickly
152
+
153
+ ### AdaptiveExplorer
154
+ - **Best for**: Learning optimal search approaches
155
+ - **Strategy**: Adapts based on search success
156
+ - **Parameters**: `initial_strategies`, `adaptation_threshold`
157
+ - **Output**: Candidates found using best-performing strategies
158
+
159
+ ### ConstraintGuidedExplorer
160
+ - **Best for**: Constraint-driven discovery
161
+ - **Strategy**: Constraint-guided search prioritization
162
+ - **Parameters**: `constraint_weight_threshold`, `early_validation`
163
+ - **Output**: Candidates likely to satisfy constraints
164
+
165
+ ### DiversityExplorer
166
+ - **Best for**: Diverse candidate sets
167
+ - **Strategy**: Diversity-focused exploration
168
+ - **Parameters**: `diversity_threshold`, `category_limit`, `similarity_threshold`
169
+ - **Output**: Diverse candidates across categories
170
+
171
+ ## ExplorationResult Structure
172
+
173
+ ```python
174
+ @dataclass
175
+ class ExplorationResult:
176
+ candidates: List[Candidate] # Found candidates
177
+ total_searched: int # Number of searches performed
178
+ unique_candidates: int # Number of unique candidates
179
+ exploration_paths: List[str] # Search path descriptions
180
+ metadata: Dict # Strategy-specific metadata
181
+ elapsed_time: float # Time taken for exploration
182
+ strategy_used: ExplorationStrategy # Strategy that was used
183
+ ```
184
+
185
+ ## Performance Considerations
186
+
187
+ ### Speed vs. Quality Trade-offs
188
+ - **ParallelExplorer**: Fastest, good breadth
189
+ - **AdaptiveExplorer**: Medium speed, learns over time
190
+ - **ConstraintGuidedExplorer**: Medium speed, higher constraint satisfaction
191
+ - **DiversityExplorer**: Slower, but most diverse results
192
+
193
+ ### Memory Usage
194
+ - All explorers track found candidates to avoid duplicates
195
+ - Large candidate sets may use significant memory
196
+ - Consider using `max_candidates` parameter to limit memory usage
197
+
198
+ ### Search Engine Load
199
+ - Parallel explorers generate more concurrent search requests
200
+ - Consider rate limiting or using fewer `max_workers`
201
+ - Monitor search engine response times
202
+
203
+ ## Extending the System
204
+
205
+ The inheritance-based design makes it easy to:
206
+
207
+ 1. **Create domain-specific explorers** (e.g., GeoExplorer, PersonExplorer)
208
+ 2. **Combine exploration strategies** (e.g., parallel + adaptive)
209
+ 3. **Add new search patterns** and query generation methods
210
+ 4. **Implement caching strategies** for discovered candidates
211
+ 5. **Add quality scoring** for candidate ranking
212
+
213
+ ## Best Practices
214
+
215
+ 1. **Choose the right explorer** for your use case
216
+ 2. **Set appropriate limits** (`max_candidates`, `max_search_time`)
217
+ 3. **Provide good constraints** when using ConstraintGuidedExplorer
218
+ 4. **Monitor diversity scores** when using DiversityExplorer
219
+ 5. **Let AdaptiveExplorer learn** over multiple runs for best results
@@ -0,0 +1,25 @@
1
+ """
2
+ Candidate exploration system for discovering and refining candidates.
3
+
4
+ This module provides inheritance-based components for exploring and discovering
5
+ candidates through different search strategies and approaches.
6
+ """
7
+
8
+ from .adaptive_explorer import AdaptiveExplorer
9
+ from .base_explorer import BaseCandidateExplorer, ExplorationResult
10
+ from .constraint_guided_explorer import ConstraintGuidedExplorer
11
+ from .diversity_explorer import DiversityExplorer
12
+ from .parallel_explorer import ParallelExplorer
13
+ from .progressive_explorer import ProgressiveExplorer
14
+
15
+ __all__ = [
16
+ # Base classes
17
+ "BaseCandidateExplorer",
18
+ "ExplorationResult",
19
+ # Concrete implementations
20
+ "ParallelExplorer",
21
+ "AdaptiveExplorer",
22
+ "ConstraintGuidedExplorer",
23
+ "DiversityExplorer",
24
+ "ProgressiveExplorer",
25
+ ]