local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +20 -3
  149. local_deep_research/web/database/models.py +74 -25
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +63 -83
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +192 -54
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +412 -251
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.2.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,424 @@
1
+ """
2
+ Dual confidence constraint checker implementation.
3
+
4
+ This implementation uses dual confidence scoring (positive/negative/uncertainty)
5
+ to evaluate constraints and make rejection decisions.
6
+ """
7
+
8
+ from typing import Dict, List, Tuple
9
+
10
+ from loguru import logger
11
+
12
+ from ..candidates.base_candidate import Candidate
13
+ from ..constraints.base_constraint import Constraint
14
+ from .base_constraint_checker import (
15
+ BaseConstraintChecker,
16
+ ConstraintCheckResult,
17
+ )
18
+ from .evidence_analyzer import ConstraintEvidence, EvidenceAnalyzer
19
+
20
+
21
+ class DualConfidenceChecker(BaseConstraintChecker):
22
+ """
23
+ Constraint checker using dual confidence scoring.
24
+
25
+ This checker:
26
+ 1. Analyzes evidence using positive/negative/uncertainty scores
27
+ 2. Makes rejection decisions based on confidence thresholds
28
+ 3. Provides detailed scoring breakdown
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ *args,
34
+ negative_threshold: float = 0.25, # Reject if negative evidence > 25%
35
+ positive_threshold: float = 0.4, # Reject if positive evidence < 40%
36
+ uncertainty_penalty: float = 0.2,
37
+ negative_weight: float = 0.5,
38
+ uncertainty_threshold: float = 0.6, # Re-evaluate if uncertainty > 60%
39
+ max_reevaluations: int = 2, # Maximum re-evaluation rounds
40
+ **kwargs,
41
+ ):
42
+ """
43
+ Initialize dual confidence checker.
44
+
45
+ Args:
46
+ negative_threshold: Threshold for negative evidence rejection
47
+ positive_threshold: Minimum positive evidence required
48
+ uncertainty_penalty: Penalty for uncertain evidence
49
+ negative_weight: Weight for negative evidence in scoring
50
+ uncertainty_threshold: Re-evaluate if uncertainty exceeds this
51
+ max_reevaluations: Maximum number of re-evaluation rounds
52
+ """
53
+ super().__init__(*args, **kwargs)
54
+
55
+ self.negative_threshold = negative_threshold
56
+ self.positive_threshold = positive_threshold
57
+ self.uncertainty_penalty = uncertainty_penalty
58
+ self.negative_weight = negative_weight
59
+ self.uncertainty_threshold = uncertainty_threshold
60
+ self.max_reevaluations = max_reevaluations
61
+
62
+ # Initialize evidence analyzer
63
+ self.evidence_analyzer = EvidenceAnalyzer(self.model)
64
+
65
+ def check_candidate(
66
+ self,
67
+ candidate: Candidate,
68
+ constraints: List[Constraint],
69
+ original_query: str = None,
70
+ ) -> ConstraintCheckResult:
71
+ """Check candidate using dual confidence analysis with LLM pre-screening."""
72
+ logger.info(f"Checking candidate: {candidate.name} (dual confidence)")
73
+
74
+ # LLM PRE-SCREENING: Check all constraints in one call to save SearXNG capacity
75
+ pre_screen_result = self._llm_prescreen_candidate(
76
+ candidate, constraints, original_query
77
+ )
78
+ if pre_screen_result["should_reject"]:
79
+ logger.info(
80
+ f"🚫 LLM pre-screen rejected {candidate.name}: {pre_screen_result['reason']}"
81
+ )
82
+ return ConstraintCheckResult(
83
+ should_reject=True,
84
+ rejection_reason=pre_screen_result["reason"],
85
+ total_score=0.0,
86
+ detailed_results=pre_screen_result["detailed_results"],
87
+ )
88
+
89
+ constraint_scores = {}
90
+ detailed_results = []
91
+ rejection_reason = None
92
+ should_reject = False
93
+
94
+ for constraint in constraints:
95
+ # Perform initial evaluation with re-evaluation for uncertain constraints
96
+ result = self._evaluate_constraint_with_reevaluation(
97
+ candidate, constraint
98
+ )
99
+
100
+ avg_positive = result["positive"]
101
+ avg_negative = result["negative"]
102
+ avg_uncertainty = result["uncertainty"]
103
+ score = result["score"]
104
+ reevaluation_count = result.get("reevaluation_count", 0)
105
+
106
+ # Check for rejection based on final results
107
+ reject, reason = self.should_reject_candidate_from_averages(
108
+ candidate, constraint, avg_positive, avg_negative
109
+ )
110
+
111
+ if reject and not should_reject: # Only record first rejection
112
+ should_reject = True
113
+ rejection_reason = reason
114
+
115
+ # Store results
116
+ constraint_scores[constraint.value] = {
117
+ "total": score,
118
+ "positive": avg_positive,
119
+ "negative": avg_negative,
120
+ "uncertainty": avg_uncertainty,
121
+ "weight": constraint.weight,
122
+ "reevaluation_count": reevaluation_count,
123
+ }
124
+
125
+ detailed_results.append(
126
+ {
127
+ "constraint": constraint.value,
128
+ "score": score,
129
+ "positive": avg_positive,
130
+ "negative": avg_negative,
131
+ "uncertainty": avg_uncertainty,
132
+ "weight": constraint.weight,
133
+ "type": constraint.type.value,
134
+ "reevaluation_count": reevaluation_count,
135
+ }
136
+ )
137
+
138
+ # Log detailed result with re-evaluation info
139
+ self._log_constraint_result_detailed(
140
+ candidate,
141
+ constraint,
142
+ score,
143
+ avg_positive,
144
+ avg_negative,
145
+ avg_uncertainty,
146
+ reevaluation_count,
147
+ )
148
+
149
+ # Calculate total score
150
+ if should_reject:
151
+ total_score = 0.0
152
+ else:
153
+ if detailed_results:
154
+ weights = [r["weight"] for r in detailed_results]
155
+ scores = [r["score"] for r in detailed_results]
156
+ total_score = self._calculate_weighted_score(scores, weights)
157
+ else:
158
+ total_score = 0.0
159
+
160
+ logger.info(f"Final score for {candidate.name}: {total_score:.2%}")
161
+
162
+ return ConstraintCheckResult(
163
+ candidate=candidate,
164
+ total_score=total_score,
165
+ constraint_scores=constraint_scores,
166
+ should_reject=should_reject,
167
+ rejection_reason=rejection_reason,
168
+ detailed_results=detailed_results,
169
+ )
170
+
171
+ def _evaluate_constraint_with_reevaluation(
172
+ self, candidate: Candidate, constraint: Constraint
173
+ ) -> Dict:
174
+ """Evaluate constraint with potential re-evaluation for uncertain results."""
175
+ reevaluation_count = 0
176
+ evidence_list = []
177
+
178
+ while reevaluation_count <= self.max_reevaluations:
179
+ # Gather evidence (fresh each time for re-evaluation)
180
+ evidence_list = self._gather_evidence_for_constraint(
181
+ candidate, constraint
182
+ )
183
+
184
+ if not evidence_list:
185
+ # No evidence found
186
+ return {
187
+ "positive": 0.0,
188
+ "negative": 0.0,
189
+ "uncertainty": 1.0,
190
+ "score": 0.5 - self.uncertainty_penalty,
191
+ "evidence_list": [],
192
+ "reevaluation_count": reevaluation_count,
193
+ }
194
+
195
+ # Analyze with dual confidence
196
+ dual_evidence = [
197
+ self.evidence_analyzer.analyze_evidence_dual_confidence(
198
+ e, constraint
199
+ )
200
+ for e in evidence_list
201
+ ]
202
+
203
+ # Calculate averages
204
+ avg_positive = sum(
205
+ e.positive_confidence for e in dual_evidence
206
+ ) / len(dual_evidence)
207
+ avg_negative = sum(
208
+ e.negative_confidence for e in dual_evidence
209
+ ) / len(dual_evidence)
210
+ avg_uncertainty = sum(e.uncertainty for e in dual_evidence) / len(
211
+ dual_evidence
212
+ )
213
+
214
+ # Calculate score
215
+ score = self.evidence_analyzer.evaluate_evidence_list(
216
+ evidence_list,
217
+ constraint,
218
+ self.uncertainty_penalty,
219
+ self.negative_weight,
220
+ )
221
+
222
+ # Check if we need re-evaluation
223
+ if (
224
+ reevaluation_count < self.max_reevaluations
225
+ and avg_uncertainty > self.uncertainty_threshold
226
+ and not self._should_early_reject(avg_positive, avg_negative)
227
+ ):
228
+ reevaluation_count += 1
229
+ logger.info(
230
+ f"🔄 Re-evaluating {candidate.name} | {constraint.value} "
231
+ f"(round {reevaluation_count}) - high uncertainty: {avg_uncertainty:.0%}"
232
+ )
233
+ continue
234
+ else:
235
+ # Final result or early rejection
236
+ if reevaluation_count > 0:
237
+ logger.info(
238
+ f"✅ Final evaluation for {candidate.name} | {constraint.value} "
239
+ f"after {reevaluation_count} re-evaluation(s)"
240
+ )
241
+
242
+ return {
243
+ "positive": avg_positive,
244
+ "negative": avg_negative,
245
+ "uncertainty": avg_uncertainty,
246
+ "score": score,
247
+ "evidence_list": evidence_list,
248
+ "reevaluation_count": reevaluation_count,
249
+ }
250
+
251
+ # Should not reach here, but fallback
252
+ return {
253
+ "positive": avg_positive,
254
+ "negative": avg_negative,
255
+ "uncertainty": avg_uncertainty,
256
+ "score": score,
257
+ "evidence_list": evidence_list,
258
+ "reevaluation_count": reevaluation_count,
259
+ }
260
+
261
+ def _should_early_reject(
262
+ self, avg_positive: float, avg_negative: float
263
+ ) -> bool:
264
+ """Check if candidate should be rejected early (before re-evaluation)."""
265
+ return (
266
+ avg_negative > self.negative_threshold
267
+ or avg_positive < self.positive_threshold
268
+ )
269
+
270
+ def should_reject_candidate_from_averages(
271
+ self,
272
+ candidate: Candidate,
273
+ constraint: Constraint,
274
+ avg_positive: float,
275
+ avg_negative: float,
276
+ ) -> Tuple[bool, str]:
277
+ """Determine rejection based on average confidence scores."""
278
+ # PRIMARY REJECTION: High negative evidence
279
+ if avg_negative > self.negative_threshold:
280
+ reason = f"High negative evidence ({avg_negative:.0%}) for constraint '{constraint.value}'"
281
+ logger.info(f"❌ REJECTION: {candidate.name} - {reason}")
282
+ return True, reason
283
+
284
+ # SECONDARY REJECTION: Low positive evidence
285
+ if avg_positive < self.positive_threshold:
286
+ reason = f"Insufficient positive evidence ({avg_positive:.0%}) for constraint '{constraint.value}'"
287
+ logger.info(f"❌ REJECTION: {candidate.name} - {reason}")
288
+ return True, reason
289
+
290
+ return False, ""
291
+
292
+ def should_reject_candidate(
293
+ self,
294
+ candidate: Candidate,
295
+ constraint: Constraint,
296
+ dual_evidence: List[ConstraintEvidence],
297
+ ) -> Tuple[bool, str]:
298
+ """Determine rejection based on dual confidence scores."""
299
+ if not dual_evidence:
300
+ return False, ""
301
+
302
+ # Calculate averages
303
+ avg_positive = sum(e.positive_confidence for e in dual_evidence) / len(
304
+ dual_evidence
305
+ )
306
+ avg_negative = sum(e.negative_confidence for e in dual_evidence) / len(
307
+ dual_evidence
308
+ )
309
+
310
+ # PRIMARY REJECTION: High negative evidence
311
+ if avg_negative > self.negative_threshold:
312
+ reason = f"High negative evidence ({avg_negative:.0%}) for constraint '{constraint.value}'"
313
+ logger.info(f"❌ REJECTION: {candidate.name} - {reason}")
314
+ return True, reason
315
+
316
+ # SECONDARY REJECTION: Low positive evidence
317
+ if avg_positive < self.positive_threshold:
318
+ reason = f"Insufficient positive evidence ({avg_positive:.0%}) for constraint '{constraint.value}'"
319
+ logger.info(f"❌ REJECTION: {candidate.name} - {reason}")
320
+ return True, reason
321
+
322
+ return False, ""
323
+
324
+ def _log_constraint_result_detailed(
325
+ self,
326
+ candidate,
327
+ constraint,
328
+ score,
329
+ positive,
330
+ negative,
331
+ uncertainty,
332
+ reevaluation_count=0,
333
+ ):
334
+ """Log detailed constraint result."""
335
+ symbol = "✓" if score >= 0.8 else "○" if score >= 0.5 else "✗"
336
+
337
+ # Add re-evaluation indicator
338
+ reeval_indicator = (
339
+ f" [R{reevaluation_count}]" if reevaluation_count > 0 else ""
340
+ )
341
+
342
+ logger.info(
343
+ f"{symbol} {candidate.name} | {constraint.value}: {int(score * 100)}% "
344
+ f"(+{int(positive * 100)}% -{int(negative * 100)}% ?{int(uncertainty * 100)}%){reeval_indicator}"
345
+ )
346
+
347
+ def _llm_prescreen_candidate(
348
+ self, candidate, constraints, original_query=None
349
+ ):
350
+ """Simple quality check for answer candidates."""
351
+
352
+ if not original_query:
353
+ return {
354
+ "should_reject": False,
355
+ "reason": "No original query provided",
356
+ "detailed_results": [],
357
+ }
358
+
359
+ prompt = f"""Question: {original_query}
360
+ Answer: {candidate.name}
361
+
362
+ Is this a good answer to the question? Rate 0-100 where:
363
+ - 90-100: Excellent direct answer
364
+ - 70-89: Good answer
365
+ - 50-69: Partial answer
366
+ - 30-49: Weak answer
367
+ - 0-29: Poor/wrong answer
368
+
369
+ Just give the number:"""
370
+
371
+ try:
372
+ response = self.model.generate(prompt)
373
+
374
+ # Parse confidence score
375
+ import re
376
+
377
+ confidence_match = re.search(r"(\d{1,3})", response.strip())
378
+
379
+ if confidence_match:
380
+ quality_score = int(confidence_match.group(1))
381
+
382
+ # Accept good answers (50+ out of 100)
383
+ if quality_score >= 50:
384
+ return {
385
+ "should_reject": False,
386
+ "reason": f"Good answer quality: {quality_score}%",
387
+ "detailed_results": [
388
+ {
389
+ "constraint": "answer_quality",
390
+ "positive_confidence": quality_score / 100.0,
391
+ "source": "answer_quality_check",
392
+ }
393
+ ],
394
+ }
395
+ else:
396
+ return {
397
+ "should_reject": True,
398
+ "reason": f"Poor answer quality: {quality_score}%",
399
+ "detailed_results": [
400
+ {
401
+ "constraint": "answer_quality",
402
+ "negative_confidence": (100 - quality_score)
403
+ / 100.0,
404
+ "source": "answer_quality_check",
405
+ }
406
+ ],
407
+ }
408
+
409
+ # Parsing failed - accept by default
410
+ return {
411
+ "should_reject": False,
412
+ "reason": "Could not parse quality score - accepting",
413
+ "detailed_results": [],
414
+ }
415
+
416
+ except Exception as e:
417
+ logger.warning(
418
+ f"Fast LLM pre-screening failed for {candidate.name}: {e}"
419
+ )
420
+ return {
421
+ "should_reject": False,
422
+ "reason": "",
423
+ "detailed_results": [],
424
+ }
@@ -0,0 +1,174 @@
1
+ """
2
+ Evidence analysis for constraint checking.
3
+
4
+ This module provides dual confidence evidence analysis that separates
5
+ positive evidence, negative evidence, and uncertainty.
6
+ """
7
+
8
+ import re
9
+ from dataclasses import dataclass
10
+ from typing import Dict, List
11
+
12
+ from langchain_core.language_models import BaseChatModel
13
+ from loguru import logger
14
+
15
+ from ..constraints.base_constraint import Constraint
16
+
17
+
18
+ @dataclass
19
+ class ConstraintEvidence:
20
+ """Evidence for a constraint with dual confidence scores."""
21
+
22
+ positive_confidence: float # How sure we are the constraint IS satisfied
23
+ negative_confidence: (
24
+ float # How sure we are the constraint is NOT satisfied
25
+ )
26
+ uncertainty: float # How uncertain we are (neither positive nor negative)
27
+ evidence_text: str
28
+ source: str
29
+
30
+
31
+ class EvidenceAnalyzer:
32
+ """
33
+ Analyzes evidence using dual confidence scoring.
34
+
35
+ This approach separates:
36
+ - Positive confidence: Evidence that constraint IS satisfied
37
+ - Negative confidence: Evidence that constraint is NOT satisfied
38
+ - Uncertainty: Lack of clear evidence either way
39
+ """
40
+
41
+ def __init__(self, model: BaseChatModel):
42
+ """Initialize the evidence analyzer."""
43
+ self.model = model
44
+
45
+ def analyze_evidence_dual_confidence(
46
+ self, evidence: Dict, constraint: Constraint
47
+ ) -> ConstraintEvidence:
48
+ """Analyze evidence to extract dual confidence scores."""
49
+ text = evidence.get("text", "")
50
+
51
+ # Use LLM to analyze evidence with dual confidence
52
+ prompt = f"""
53
+ Analyze this evidence for the constraint "{constraint.value}" (type: {constraint.type.value}).
54
+
55
+ Evidence:
56
+ {text[:1000]}
57
+
58
+ Provide three confidence scores (0-1):
59
+ 1. POSITIVE_CONFIDENCE: How confident are you that this constraint IS satisfied?
60
+ 2. NEGATIVE_CONFIDENCE: How confident are you that this constraint is NOT satisfied?
61
+ 3. UNCERTAINTY: How uncertain are you (lack of clear evidence)?
62
+
63
+ The three scores should approximately sum to 1.0.
64
+
65
+ Format:
66
+ POSITIVE: [score]
67
+ NEGATIVE: [score]
68
+ UNCERTAINTY: [score]
69
+ """
70
+
71
+ try:
72
+ response = self.model.invoke(prompt).content
73
+
74
+ # Extract scores
75
+ positive = self._extract_score(response, "POSITIVE")
76
+ negative = self._extract_score(response, "NEGATIVE")
77
+ uncertainty = self._extract_score(response, "UNCERTAINTY")
78
+
79
+ # Normalize if needed
80
+ total = positive + negative + uncertainty
81
+ if total > 0:
82
+ positive /= total
83
+ negative /= total
84
+ uncertainty /= total
85
+ else:
86
+ # Default to high uncertainty
87
+ uncertainty = 0.8
88
+ positive = 0.1
89
+ negative = 0.1
90
+
91
+ return ConstraintEvidence(
92
+ positive_confidence=positive,
93
+ negative_confidence=negative,
94
+ uncertainty=uncertainty,
95
+ evidence_text=text[:500],
96
+ source=evidence.get("source", "search"),
97
+ )
98
+
99
+ except Exception as e:
100
+ logger.error(f"Error analyzing evidence: {e}")
101
+ # Default to high uncertainty
102
+ return ConstraintEvidence(
103
+ positive_confidence=0.1,
104
+ negative_confidence=0.1,
105
+ uncertainty=0.8,
106
+ evidence_text=text[:500],
107
+ source=evidence.get("source", "search"),
108
+ )
109
+
110
+ def _extract_score(self, text: str, label: str) -> float:
111
+ """Extract confidence score from LLM response."""
112
+ pattern = rf"{label}:\s*\[?(\d*\.?\d+)\]?"
113
+ match = re.search(pattern, text, re.IGNORECASE)
114
+ if match:
115
+ try:
116
+ return float(match.group(1))
117
+ except:
118
+ pass
119
+ return 0.1 # Default low score
120
+
121
+ def evaluate_evidence_list(
122
+ self,
123
+ evidence_list: List[Dict],
124
+ constraint: Constraint,
125
+ uncertainty_penalty: float = 0.2,
126
+ negative_weight: float = 0.5,
127
+ ) -> float:
128
+ """
129
+ Evaluate a list of evidence using dual confidence scoring.
130
+
131
+ Args:
132
+ evidence_list: List of evidence dictionaries
133
+ constraint: The constraint being evaluated
134
+ uncertainty_penalty: Penalty for uncertainty
135
+ negative_weight: Weight for negative evidence
136
+
137
+ Returns:
138
+ float: Overall score between 0.0 and 1.0
139
+ """
140
+ if not evidence_list:
141
+ # No evidence means high uncertainty
142
+ return 0.5 - uncertainty_penalty
143
+
144
+ # Convert evidence to dual confidence format
145
+ constraint_evidence = []
146
+ for evidence in evidence_list:
147
+ dual_evidence = self.analyze_evidence_dual_confidence(
148
+ evidence, constraint
149
+ )
150
+ constraint_evidence.append(dual_evidence)
151
+
152
+ # Calculate overall score
153
+ total_positive = sum(e.positive_confidence for e in constraint_evidence)
154
+ total_negative = sum(e.negative_confidence for e in constraint_evidence)
155
+ total_uncertainty = sum(e.uncertainty for e in constraint_evidence)
156
+
157
+ # Normalize
158
+ evidence_count = len(constraint_evidence)
159
+ avg_positive = total_positive / evidence_count
160
+ avg_negative = total_negative / evidence_count
161
+ avg_uncertainty = total_uncertainty / evidence_count
162
+
163
+ # Calculate final score
164
+ # High positive + low negative = high score
165
+ # Low positive + high negative = low score
166
+ # High uncertainty = penalty
167
+ score = (
168
+ avg_positive
169
+ - (avg_negative * negative_weight)
170
+ - (avg_uncertainty * uncertainty_penalty)
171
+ )
172
+
173
+ # Clamp to [0, 1]
174
+ return max(0.0, min(1.0, score))