local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,255 @@
1
+ """
2
+ Progressive explorer for BrowseComp-style systematic search exploration.
3
+ """
4
+
5
+ import concurrent.futures
6
+ import logging
7
+ from dataclasses import dataclass, field
8
+ from typing import Dict, List, Set, Tuple
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ @dataclass
14
+ class SearchProgress:
15
+ """Track search progress and findings."""
16
+
17
+ searched_terms: Set[str] = field(default_factory=set)
18
+ found_candidates: Dict[str, float] = field(
19
+ default_factory=dict
20
+ ) # name -> confidence
21
+ verified_facts: Dict[str, str] = field(
22
+ default_factory=dict
23
+ ) # fact -> source
24
+ entity_coverage: Dict[str, Set[str]] = field(
25
+ default_factory=dict
26
+ ) # entity_type -> searched_entities
27
+ search_depth: int = 0
28
+
29
+ def update_coverage(self, entity_type: str, entity: str):
30
+ """Update entity coverage tracking."""
31
+ if entity_type not in self.entity_coverage:
32
+ self.entity_coverage[entity_type] = set()
33
+ self.entity_coverage[entity_type].add(entity.lower())
34
+
35
+ def get_uncovered_entities(
36
+ self, entities: Dict[str, List[str]]
37
+ ) -> Dict[str, List[str]]:
38
+ """Get entities that haven't been searched yet."""
39
+ uncovered = {}
40
+ for entity_type, entity_list in entities.items():
41
+ covered = self.entity_coverage.get(entity_type, set())
42
+ uncovered_list = [
43
+ e for e in entity_list if e.lower() not in covered
44
+ ]
45
+ if uncovered_list:
46
+ uncovered[entity_type] = uncovered_list
47
+ return uncovered
48
+
49
+
50
+ class ProgressiveExplorer:
51
+ """
52
+ Explorer that implements progressive search strategies for BrowseComp.
53
+
54
+ Key features:
55
+ 1. Tracks search progress to avoid redundancy
56
+ 2. Progressively combines entities
57
+ 3. Identifies and pursues promising candidates
58
+ 4. Maintains simple approach without over-filtering
59
+ """
60
+
61
+ def __init__(self, search_engine, model):
62
+ self.search_engine = search_engine
63
+ self.model = model
64
+ self.progress = SearchProgress()
65
+ self.max_results_per_search = 20 # Keep more results
66
+
67
+ def explore(
68
+ self,
69
+ queries: List[str],
70
+ constraints: List = None,
71
+ max_workers: int = 5,
72
+ extracted_entities: Dict[str, List[str]] = None,
73
+ ) -> Tuple[List, SearchProgress]:
74
+ """
75
+ Execute progressive exploration with entity tracking.
76
+
77
+ Returns both candidates and search progress for strategy use.
78
+ """
79
+ all_results = []
80
+ extracted_entities = extracted_entities or {}
81
+
82
+ # Execute searches in parallel (like source-based strategy)
83
+ search_results = self._parallel_search(queries, max_workers)
84
+
85
+ # Process results without filtering (trust the LLM later)
86
+ for query, results in search_results:
87
+ self.progress.searched_terms.add(query.lower())
88
+
89
+ # Track which entities were covered in this search
90
+ self._update_entity_coverage(query, extracted_entities)
91
+
92
+ # Extract any specific names/candidates from results
93
+ candidates = self._extract_candidates_from_results(results, query)
94
+ for candidate_name, confidence in candidates.items():
95
+ if candidate_name in self.progress.found_candidates:
96
+ # Update confidence if higher
97
+ self.progress.found_candidates[candidate_name] = max(
98
+ self.progress.found_candidates[candidate_name],
99
+ confidence,
100
+ )
101
+ else:
102
+ self.progress.found_candidates[candidate_name] = confidence
103
+
104
+ # Keep all results for final synthesis
105
+ all_results.extend(results)
106
+
107
+ self.progress.search_depth += 1
108
+
109
+ # Return both results and progress
110
+ return all_results, self.progress
111
+
112
+ def generate_verification_searches(
113
+ self,
114
+ candidates: Dict[str, float],
115
+ constraints: List,
116
+ max_searches: int = 5,
117
+ ) -> List[str]:
118
+ """Generate targeted searches to verify top candidates."""
119
+ if not candidates:
120
+ return []
121
+
122
+ # Get top candidates by confidence
123
+ top_candidates = sorted(
124
+ candidates.items(), key=lambda x: x[1], reverse=True
125
+ )[:3]
126
+
127
+ verification_searches = []
128
+ for candidate_name, confidence in top_candidates:
129
+ # Generate verification searches for this candidate
130
+ for constraint in constraints[:2]: # Verify top constraints
131
+ search = f'"{candidate_name}" {constraint.description}'
132
+ if search.lower() not in self.progress.searched_terms:
133
+ verification_searches.append(search)
134
+
135
+ return verification_searches[:max_searches]
136
+
137
+ def _extract_candidates_from_results(
138
+ self, results: List[Dict], query: str
139
+ ) -> Dict[str, float]:
140
+ """Extract potential answer candidates from search results."""
141
+ candidates = {}
142
+
143
+ # Simple extraction based on titles and snippets
144
+ for result in results[:10]: # Focus on top results
145
+ title = result.get("title", "")
146
+ snippet = result.get("snippet", "")
147
+
148
+ # Look for proper nouns and specific names
149
+ # This is simplified - in practice, might use NER or more sophisticated extraction
150
+ combined_text = f"{title} {snippet}"
151
+
152
+ # Extract quoted terms as potential candidates
153
+ import re
154
+
155
+ quoted_terms = re.findall(r'"([^"]+)"', combined_text)
156
+ for term in quoted_terms:
157
+ if (
158
+ len(term) > 2 and len(term) < 50
159
+ ): # Reasonable length for an answer
160
+ candidates[term] = 0.3 # Base confidence from appearance
161
+
162
+ # Boost confidence if appears in title
163
+ if title:
164
+ # Titles often contain the actual answer
165
+ title_words = title.split()
166
+ for i in range(len(title_words)):
167
+ for j in range(i + 1, min(i + 4, len(title_words) + 1)):
168
+ phrase = " ".join(title_words[i:j])
169
+ if (
170
+ len(phrase) > 3 and phrase[0].isupper()
171
+ ): # Likely proper noun
172
+ candidates[phrase] = candidates.get(phrase, 0) + 0.2
173
+
174
+ return candidates
175
+
176
+ def _update_entity_coverage(
177
+ self, query: str, entities: Dict[str, List[str]]
178
+ ):
179
+ """Track which entities have been covered in searches."""
180
+ query_lower = query.lower()
181
+
182
+ for entity_type, entity_list in entities.items():
183
+ for entity in entity_list:
184
+ if entity.lower() in query_lower:
185
+ self.progress.update_coverage(entity_type, entity)
186
+
187
+ def suggest_next_searches(
188
+ self, entities: Dict[str, List[str]], max_suggestions: int = 5
189
+ ) -> List[str]:
190
+ """Suggest next searches based on coverage and findings."""
191
+ suggestions = []
192
+
193
+ # 1. Check uncovered entities
194
+ uncovered = self.progress.get_uncovered_entities(entities)
195
+
196
+ # 2. If we have candidates, verify them with uncovered constraints
197
+ if self.progress.found_candidates:
198
+ top_candidate = max(
199
+ self.progress.found_candidates.items(), key=lambda x: x[1]
200
+ )[0]
201
+
202
+ # Combine candidate with uncovered entities
203
+ for entity_type, entity_list in uncovered.items():
204
+ for entity in entity_list[:2]:
205
+ search = f'"{top_candidate}" {entity}'
206
+ if search.lower() not in self.progress.searched_terms:
207
+ suggestions.append(search)
208
+
209
+ # 3. Otherwise, create new combinations of uncovered entities
210
+ else:
211
+ # Focus on systematic coverage
212
+ if uncovered.get("temporal"):
213
+ # Year-by-year with key term
214
+ key_term = (
215
+ entities.get("names", [""])[0]
216
+ or entities.get("descriptors", [""])[0]
217
+ )
218
+ for year in uncovered["temporal"][:3]:
219
+ search = f"{key_term} {year}".strip()
220
+ if search.lower() not in self.progress.searched_terms:
221
+ suggestions.append(search)
222
+
223
+ if uncovered.get("names") and uncovered.get("descriptors"):
224
+ # Combine names with descriptors
225
+ for name in uncovered["names"][:2]:
226
+ for desc in uncovered["descriptors"][:2]:
227
+ search = f"{name} {desc}"
228
+ if search.lower() not in self.progress.searched_terms:
229
+ suggestions.append(search)
230
+
231
+ return suggestions[:max_suggestions]
232
+
233
+ def _parallel_search(
234
+ self, queries: List[str], max_workers: int
235
+ ) -> List[Tuple[str, List[Dict]]]:
236
+ """Execute searches in parallel and return results."""
237
+ results = []
238
+
239
+ def search_query(query):
240
+ try:
241
+ search_results = self.search_engine.run(query)
242
+ return (query, search_results or [])
243
+ except Exception as e:
244
+ logger.error(f"Error searching '{query}': {str(e)}")
245
+ return (query, [])
246
+
247
+ # Run searches in parallel
248
+ with concurrent.futures.ThreadPoolExecutor(
249
+ max_workers=max_workers
250
+ ) as executor:
251
+ futures = [executor.submit(search_query, q) for q in queries]
252
+ for future in concurrent.futures.as_completed(futures):
253
+ results.append(future.result())
254
+
255
+ return results
@@ -0,0 +1,5 @@
1
+ # Candidates System Package
2
+
3
+ from .base_candidate import Candidate
4
+
5
+ __all__ = ["Candidate"]
@@ -0,0 +1,59 @@
1
+ """
2
+ Base candidate class for tracking potential answers.
3
+ """
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Dict, List
7
+
8
+ from ..constraints.base_constraint import Constraint
9
+ from ..evidence.base_evidence import Evidence
10
+
11
+
12
+ @dataclass
13
+ class Candidate:
14
+ """A potential answer with supporting evidence."""
15
+
16
+ name: str
17
+ evidence: Dict[str, Evidence] = field(default_factory=dict)
18
+ score: float = 0.0
19
+ metadata: Dict[str, Any] = field(default_factory=dict)
20
+
21
+ def add_evidence(self, constraint_id: str, evidence: Evidence):
22
+ """Add evidence for a constraint."""
23
+ self.evidence[constraint_id] = evidence
24
+
25
+ def calculate_score(self, constraints: List[Constraint]) -> float:
26
+ """Calculate overall score based on evidence and constraints."""
27
+ if not constraints:
28
+ return 0.0
29
+
30
+ total_score = 0.0
31
+ total_weight = 0.0
32
+
33
+ for constraint in constraints:
34
+ evidence = self.evidence.get(constraint.id)
35
+ if evidence:
36
+ score = evidence.confidence * constraint.weight
37
+ total_score += score
38
+ total_weight += constraint.weight
39
+
40
+ self.score = total_score / total_weight if total_weight > 0 else 0.0
41
+ return self.score
42
+
43
+ def get_unverified_constraints(
44
+ self, constraints: List[Constraint]
45
+ ) -> List[Constraint]:
46
+ """Get constraints that don't have evidence yet."""
47
+ unverified = []
48
+ for constraint in constraints:
49
+ if constraint.id not in self.evidence:
50
+ unverified.append(constraint)
51
+ return unverified
52
+
53
+ def get_weak_evidence(self, threshold: float = 0.5) -> List[str]:
54
+ """Get constraint IDs with weak evidence."""
55
+ weak = []
56
+ for constraint_id, evidence in self.evidence.items():
57
+ if evidence.confidence < threshold:
58
+ weak.append(constraint_id)
59
+ return weak
@@ -0,0 +1,150 @@
1
+ # Constraint Checking System
2
+
3
+ This module provides an inheritance-based constraint checking system for validating candidates against constraints in the Local Deep Research framework.
4
+
5
+ ## Architecture
6
+
7
+ The system is built around inheritance and provides multiple implementations:
8
+
9
+ ### Base Class
10
+ - **`BaseConstraintChecker`**: Abstract base class defining the interface
11
+
12
+ ### Concrete Implementations
13
+ - **`DualConfidenceChecker`**: Uses positive/negative/uncertainty confidence scoring
14
+ - **`ThresholdChecker`**: Simple threshold-based checking
15
+ - **`StrictChecker`**: Example of very strict constraint validation
16
+
17
+ ### Supporting Components
18
+ - **`EvidenceAnalyzer`**: Analyzes evidence using dual confidence scoring
19
+ - **`RejectionEngine`**: Makes rejection decisions based on evidence
20
+ - **`ConstraintCheckResult`**: Data class containing evaluation results
21
+
22
+ ## Usage Examples
23
+
24
+ ### Using DualConfidenceChecker
25
+ ```python
26
+ from constraint_checking import DualConfidenceChecker
27
+
28
+ checker = DualConfidenceChecker(
29
+ model=llm,
30
+ evidence_gatherer=evidence_function,
31
+ negative_threshold=0.25, # Reject if negative evidence > 25%
32
+ positive_threshold=0.4, # Reject if positive evidence < 40%
33
+ )
34
+
35
+ result = checker.check_candidate(candidate, constraints)
36
+ ```
37
+
38
+ ### Using ThresholdChecker
39
+ ```python
40
+ from constraint_checking import ThresholdChecker
41
+
42
+ checker = ThresholdChecker(
43
+ model=llm,
44
+ evidence_gatherer=evidence_function,
45
+ satisfaction_threshold=0.7, # Individual constraint threshold
46
+ required_satisfaction_rate=0.8 # Overall satisfaction rate needed
47
+ )
48
+
49
+ result = checker.check_candidate(candidate, constraints)
50
+ ```
51
+
52
+ ### Using StrictChecker
53
+ ```python
54
+ from constraint_checking import StrictChecker
55
+
56
+ checker = StrictChecker(
57
+ model=llm,
58
+ evidence_gatherer=evidence_function,
59
+ strict_threshold=0.9, # Very high threshold
60
+ name_pattern_required=True # NAME_PATTERN constraints are mandatory
61
+ )
62
+
63
+ result = checker.check_candidate(candidate, constraints)
64
+ ```
65
+
66
+ ## Creating Custom Variants
67
+
68
+ To create your own constraint checker variant:
69
+
70
+ 1. **Inherit from BaseConstraintChecker**:
71
+ ```python
72
+ from .base_constraint_checker import BaseConstraintChecker, ConstraintCheckResult
73
+
74
+ class MyCustomChecker(BaseConstraintChecker):
75
+ def __init__(self, *args, my_param=0.5, **kwargs):
76
+ super().__init__(*args, **kwargs)
77
+ self.my_param = my_param
78
+ ```
79
+
80
+ 2. **Implement required methods**:
81
+ ```python
82
+ def check_candidate(self, candidate, constraints):
83
+ # Your implementation here
84
+ return ConstraintCheckResult(...)
85
+
86
+ def should_reject_candidate(self, candidate, constraint, evidence_data):
87
+ # Your rejection logic here
88
+ return should_reject, reason
89
+ ```
90
+
91
+ 3. **Add custom logic**:
92
+ ```python
93
+ def _my_custom_evaluation(self, candidate, constraint):
94
+ # Your custom evaluation logic
95
+ pass
96
+ ```
97
+
98
+ ## Integration with Strategies
99
+
100
+ Use in your strategy by initializing the checker:
101
+
102
+ ```python
103
+ class MyStrategy(BaseStrategy):
104
+ def __init__(self, *args, **kwargs):
105
+ super().__init__(*args, **kwargs)
106
+
107
+ # Choose your constraint checker
108
+ self.constraint_checker = DualConfidenceChecker(
109
+ model=self.model,
110
+ evidence_gatherer=self._gather_evidence_for_constraint,
111
+ # ... other parameters
112
+ )
113
+
114
+ def _evaluate_candidate(self, candidate):
115
+ result = self.constraint_checker.check_candidate(candidate, self.constraints)
116
+
117
+ # Process result
118
+ candidate.evaluation_results = result.detailed_results
119
+ candidate.score = result.total_score
120
+
121
+ return result.total_score
122
+ ```
123
+
124
+ ## Available Checkers
125
+
126
+ ### DualConfidenceChecker
127
+ - **Best for**: Nuanced evaluation with detailed confidence scoring
128
+ - **Parameters**: `negative_threshold`, `positive_threshold`, `uncertainty_penalty`, `negative_weight`
129
+ - **Output**: Detailed positive/negative/uncertainty scores per constraint
130
+
131
+ ### ThresholdChecker
132
+ - **Best for**: Fast, simple constraint checking
133
+ - **Parameters**: `satisfaction_threshold`, `required_satisfaction_rate`
134
+ - **Output**: Simple satisfied/not satisfied per constraint
135
+
136
+ ### StrictChecker
137
+ - **Best for**: Cases requiring very high confidence
138
+ - **Parameters**: `strict_threshold`, `name_pattern_required`
139
+ - **Output**: Binary pass/fail with strict requirements
140
+
141
+ ## Extending the System
142
+
143
+ The inheritance-based design makes it easy to:
144
+
145
+ 1. **Create specialized checkers** for specific domains
146
+ 2. **Mix and match components** (e.g., use DualConfidence evidence analysis with custom rejection logic)
147
+ 3. **Add new constraint types** with custom handling
148
+ 4. **Implement domain-specific optimizations**
149
+
150
+ See `strict_checker.py` for an example of creating a custom variant.
@@ -0,0 +1,35 @@
1
+ """
2
+ Constraint checking and candidate assessment system.
3
+
4
+ This module provides inheritance-based components for checking candidates
5
+ against constraints, with different implementations available.
6
+ """
7
+
8
+ from .base_constraint_checker import (
9
+ BaseConstraintChecker,
10
+ ConstraintCheckResult,
11
+ )
12
+
13
+ # Legacy imports for backward compatibility
14
+ from .constraint_checker import ConstraintChecker
15
+ from .dual_confidence_checker import DualConfidenceChecker
16
+ from .evidence_analyzer import ConstraintEvidence, EvidenceAnalyzer
17
+ from .rejection_engine import RejectionEngine
18
+ from .strict_checker import StrictChecker
19
+ from .threshold_checker import ThresholdChecker
20
+
21
+ __all__ = [
22
+ # Base classes
23
+ "BaseConstraintChecker",
24
+ "ConstraintCheckResult",
25
+ # Concrete implementations
26
+ "DualConfidenceChecker",
27
+ "ThresholdChecker",
28
+ "StrictChecker",
29
+ # Supporting components
30
+ "EvidenceAnalyzer",
31
+ "ConstraintEvidence",
32
+ "RejectionEngine",
33
+ # Legacy
34
+ "ConstraintChecker",
35
+ ]
@@ -0,0 +1,122 @@
1
+ """
2
+ Base constraint checker for inheritance-based constraint checking system.
3
+
4
+ This module provides the base interface and common functionality for
5
+ constraint checking implementations.
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass
10
+ from typing import Dict, List, Optional, Tuple
11
+
12
+ from langchain_core.language_models import BaseChatModel
13
+ from loguru import logger
14
+
15
+ from ..candidates.base_candidate import Candidate
16
+ from ..constraints.base_constraint import Constraint
17
+
18
+
19
+ @dataclass
20
+ class ConstraintCheckResult:
21
+ """Result of checking a candidate against all constraints."""
22
+
23
+ candidate: Candidate
24
+ total_score: float
25
+ constraint_scores: Dict[str, Dict]
26
+ should_reject: bool
27
+ rejection_reason: Optional[str]
28
+ detailed_results: List[Dict]
29
+
30
+
31
+ class BaseConstraintChecker(ABC):
32
+ """
33
+ Base class for constraint checking implementations.
34
+
35
+ This provides the common interface and shared functionality that
36
+ all constraint checkers should implement.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ model: BaseChatModel,
42
+ evidence_gatherer=None, # Will be passed in from strategy
43
+ **kwargs,
44
+ ):
45
+ """
46
+ Initialize the base constraint checker.
47
+
48
+ Args:
49
+ model: Language model for evidence analysis
50
+ evidence_gatherer: Function to gather evidence (from strategy)
51
+ **kwargs: Additional parameters for specific implementations
52
+ """
53
+ self.model = model
54
+ self.evidence_gatherer = evidence_gatherer
55
+
56
+ @abstractmethod
57
+ def check_candidate(
58
+ self, candidate: Candidate, constraints: List[Constraint]
59
+ ) -> ConstraintCheckResult:
60
+ """
61
+ Check a candidate against all constraints.
62
+
63
+ Args:
64
+ candidate: The candidate to check
65
+ constraints: List of constraints to check against
66
+
67
+ Returns:
68
+ ConstraintCheckResult: Complete evaluation result
69
+ """
70
+ pass
71
+
72
+ @abstractmethod
73
+ def should_reject_candidate(
74
+ self, candidate: Candidate, constraint: Constraint, evidence_data: any
75
+ ) -> Tuple[bool, str]:
76
+ """
77
+ Determine if a candidate should be rejected for a specific constraint.
78
+
79
+ Args:
80
+ candidate: The candidate being evaluated
81
+ constraint: The constraint being checked
82
+ evidence_data: Evidence data (format depends on implementation)
83
+
84
+ Returns:
85
+ Tuple[bool, str]: (should_reject, reason)
86
+ """
87
+ pass
88
+
89
+ def _gather_evidence_for_constraint(
90
+ self, candidate: Candidate, constraint: Constraint
91
+ ) -> List[Dict]:
92
+ """Gather evidence for a constraint using the provided evidence gatherer."""
93
+ if self.evidence_gatherer:
94
+ return self.evidence_gatherer(candidate, constraint)
95
+ else:
96
+ logger.warning(
97
+ "No evidence gatherer provided - cannot gather evidence"
98
+ )
99
+ return []
100
+
101
+ def _log_constraint_result(
102
+ self,
103
+ candidate: Candidate,
104
+ constraint: Constraint,
105
+ score: float,
106
+ details: Dict,
107
+ ):
108
+ """Log constraint evaluation result."""
109
+ symbol = "✓" if score >= 0.8 else "○" if score >= 0.5 else "✗"
110
+ logger.info(
111
+ f"{symbol} {candidate.name} | {constraint.value}: {int(score * 100)}%"
112
+ )
113
+
114
+ def _calculate_weighted_score(
115
+ self, constraint_scores: List[float], weights: List[float]
116
+ ) -> float:
117
+ """Calculate weighted average score."""
118
+ if not constraint_scores or not weights:
119
+ return 0.0
120
+ return sum(s * w for s, w in zip(constraint_scores, weights)) / sum(
121
+ weights
122
+ )