local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +7 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
- local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
- local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
- local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
- local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
- local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
- local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
- local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
- local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
- local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
- local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
- local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
- local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
- local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
- local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
- local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
- local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
- local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
- local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
- local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
- local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
- local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
- local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
- local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
- local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
- local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
- local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
- local_deep_research/advanced_search_system/findings/repository.py +54 -17
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
- local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
- local_deep_research/advanced_search_system/questions/__init__.py +16 -0
- local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
- local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
- local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
- local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
- local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
- local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
- local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
- local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
- local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
- local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
- local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
- local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
- local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
- local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
- local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
- local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
- local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
- local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
- local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
- local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
- local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
- local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
- local_deep_research/api/benchmark_functions.py +6 -2
- local_deep_research/api/research_functions.py +10 -4
- local_deep_research/benchmarks/__init__.py +9 -7
- local_deep_research/benchmarks/benchmark_functions.py +6 -2
- local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
- local_deep_research/benchmarks/cli.py +38 -13
- local_deep_research/benchmarks/comparison/__init__.py +4 -2
- local_deep_research/benchmarks/comparison/evaluator.py +316 -239
- local_deep_research/benchmarks/datasets/__init__.py +1 -1
- local_deep_research/benchmarks/datasets/base.py +91 -72
- local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
- local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
- local_deep_research/benchmarks/datasets/utils.py +48 -29
- local_deep_research/benchmarks/datasets.py +4 -11
- local_deep_research/benchmarks/efficiency/__init__.py +8 -4
- local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
- local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
- local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
- local_deep_research/benchmarks/evaluators/composite.py +6 -2
- local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
- local_deep_research/benchmarks/graders.py +32 -10
- local_deep_research/benchmarks/metrics/README.md +1 -1
- local_deep_research/benchmarks/metrics/calculation.py +25 -10
- local_deep_research/benchmarks/metrics/reporting.py +7 -3
- local_deep_research/benchmarks/metrics/visualization.py +42 -23
- local_deep_research/benchmarks/metrics.py +1 -1
- local_deep_research/benchmarks/optimization/__init__.py +3 -1
- local_deep_research/benchmarks/optimization/api.py +7 -1
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
- local_deep_research/benchmarks/runners.py +48 -15
- local_deep_research/citation_handler.py +65 -92
- local_deep_research/citation_handlers/__init__.py +15 -0
- local_deep_research/citation_handlers/base_citation_handler.py +70 -0
- local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
- local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
- local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
- local_deep_research/config/llm_config.py +271 -169
- local_deep_research/config/search_config.py +14 -5
- local_deep_research/defaults/__init__.py +0 -1
- local_deep_research/metrics/__init__.py +13 -0
- local_deep_research/metrics/database.py +58 -0
- local_deep_research/metrics/db_models.py +115 -0
- local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
- local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
- local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
- local_deep_research/metrics/migrate_research_ratings.py +31 -0
- local_deep_research/metrics/models.py +61 -0
- local_deep_research/metrics/pricing/__init__.py +12 -0
- local_deep_research/metrics/pricing/cost_calculator.py +237 -0
- local_deep_research/metrics/pricing/pricing_cache.py +143 -0
- local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
- local_deep_research/metrics/query_utils.py +51 -0
- local_deep_research/metrics/search_tracker.py +380 -0
- local_deep_research/metrics/token_counter.py +1078 -0
- local_deep_research/migrate_db.py +3 -1
- local_deep_research/report_generator.py +22 -8
- local_deep_research/search_system.py +390 -9
- local_deep_research/test_migration.py +15 -5
- local_deep_research/utilities/db_utils.py +7 -4
- local_deep_research/utilities/es_utils.py +115 -104
- local_deep_research/utilities/llm_utils.py +15 -5
- local_deep_research/utilities/log_utils.py +151 -0
- local_deep_research/utilities/search_cache.py +387 -0
- local_deep_research/utilities/search_utilities.py +14 -6
- local_deep_research/utilities/threading_utils.py +92 -0
- local_deep_research/utilities/url_utils.py +6 -0
- local_deep_research/web/api.py +347 -0
- local_deep_research/web/app.py +13 -17
- local_deep_research/web/app_factory.py +71 -66
- local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
- local_deep_research/web/database/migrations.py +20 -3
- local_deep_research/web/database/models.py +74 -25
- local_deep_research/web/database/schema_upgrade.py +49 -29
- local_deep_research/web/models/database.py +63 -83
- local_deep_research/web/routes/api_routes.py +56 -22
- local_deep_research/web/routes/benchmark_routes.py +4 -1
- local_deep_research/web/routes/globals.py +22 -0
- local_deep_research/web/routes/history_routes.py +71 -46
- local_deep_research/web/routes/metrics_routes.py +1155 -0
- local_deep_research/web/routes/research_routes.py +192 -54
- local_deep_research/web/routes/settings_routes.py +156 -55
- local_deep_research/web/services/research_service.py +412 -251
- local_deep_research/web/services/resource_service.py +36 -11
- local_deep_research/web/services/settings_manager.py +55 -17
- local_deep_research/web/services/settings_service.py +12 -4
- local_deep_research/web/services/socket_service.py +295 -188
- local_deep_research/web/static/css/custom_dropdown.css +180 -0
- local_deep_research/web/static/css/styles.css +39 -1
- local_deep_research/web/static/js/components/detail.js +633 -267
- local_deep_research/web/static/js/components/details.js +751 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
- local_deep_research/web/static/js/components/fallback/ui.js +23 -23
- local_deep_research/web/static/js/components/history.js +76 -76
- local_deep_research/web/static/js/components/logpanel.js +61 -13
- local_deep_research/web/static/js/components/progress.js +13 -2
- local_deep_research/web/static/js/components/research.js +99 -12
- local_deep_research/web/static/js/components/results.js +239 -106
- local_deep_research/web/static/js/main.js +40 -40
- local_deep_research/web/static/js/services/audio.js +1 -1
- local_deep_research/web/static/js/services/formatting.js +11 -11
- local_deep_research/web/static/js/services/keyboard.js +157 -0
- local_deep_research/web/static/js/services/pdf.js +80 -80
- local_deep_research/web/static/sounds/README.md +1 -1
- local_deep_research/web/templates/base.html +1 -0
- local_deep_research/web/templates/components/log_panel.html +7 -1
- local_deep_research/web/templates/components/mobile_nav.html +1 -1
- local_deep_research/web/templates/components/sidebar.html +3 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
- local_deep_research/web/templates/pages/details.html +325 -24
- local_deep_research/web/templates/pages/history.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +1929 -0
- local_deep_research/web/templates/pages/progress.html +2 -2
- local_deep_research/web/templates/pages/research.html +53 -17
- local_deep_research/web/templates/pages/results.html +12 -1
- local_deep_research/web/templates/pages/star_reviews.html +803 -0
- local_deep_research/web/utils/formatters.py +9 -3
- local_deep_research/web_search_engines/default_search_engines.py +5 -3
- local_deep_research/web_search_engines/engines/full_search.py +8 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
- local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
- local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
- local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
- local_deep_research/web_search_engines/search_engine_base.py +83 -35
- local_deep_research/web_search_engines/search_engine_factory.py +25 -8
- local_deep_research/web_search_engines/search_engines_config.py +9 -3
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
- local_deep_research-0.5.2.dist-info/RECORD +265 -0
- local_deep_research-0.4.4.dist-info/RECORD +0 -177
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,219 @@
|
|
1
|
+
"""
|
2
|
+
Enhanced dual confidence strategy with early rejection of candidates.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from loguru import logger
|
6
|
+
|
7
|
+
from ..constraint_checking import DualConfidenceChecker
|
8
|
+
from .dual_confidence_strategy import DualConfidenceStrategy
|
9
|
+
|
10
|
+
|
11
|
+
class DualConfidenceWithRejectionStrategy(DualConfidenceStrategy):
|
12
|
+
"""
|
13
|
+
Enhanced dual confidence strategy that rejects candidates early when they have
|
14
|
+
high negative evidence for any constraint.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
*args,
|
20
|
+
rejection_threshold: float = 0.3, # If negative > 30% and positive < threshold
|
21
|
+
positive_threshold: float = 0.2, # Minimum positive needed to overcome negative
|
22
|
+
critical_constraint_rejection: float = 0.2, # Even stricter for critical constraints
|
23
|
+
**kwargs,
|
24
|
+
):
|
25
|
+
super().__init__(*args, **kwargs)
|
26
|
+
self.rejection_threshold = rejection_threshold
|
27
|
+
self.positive_threshold = positive_threshold
|
28
|
+
self.critical_constraint_rejection = critical_constraint_rejection
|
29
|
+
|
30
|
+
# Initialize constraint checker (using new inheritance-based system)
|
31
|
+
self.constraint_checker = DualConfidenceChecker(
|
32
|
+
model=self.model,
|
33
|
+
evidence_gatherer=self._gather_evidence_for_constraint,
|
34
|
+
negative_threshold=0.25, # 25% negative evidence threshold
|
35
|
+
positive_threshold=0.4, # 40% minimum positive evidence
|
36
|
+
uncertainty_penalty=self.uncertainty_penalty,
|
37
|
+
negative_weight=self.negative_weight,
|
38
|
+
)
|
39
|
+
|
40
|
+
def _evaluate_candidate_immediately(self, candidate) -> float:
|
41
|
+
"""Enhanced evaluation with early rejection based on negative evidence."""
|
42
|
+
try:
|
43
|
+
logger.info(
|
44
|
+
f"Evaluating candidate: {candidate.name} with early rejection"
|
45
|
+
)
|
46
|
+
|
47
|
+
total_score = 0.0
|
48
|
+
constraint_scores = []
|
49
|
+
detailed_results = []
|
50
|
+
|
51
|
+
for i, constraint in enumerate(self.constraint_ranking):
|
52
|
+
# Gather evidence for this constraint
|
53
|
+
evidence = self._gather_evidence_for_constraint(
|
54
|
+
candidate, constraint
|
55
|
+
)
|
56
|
+
|
57
|
+
if evidence:
|
58
|
+
# Analyze evidence with dual confidence
|
59
|
+
dual_evidence = [
|
60
|
+
self._analyze_evidence_dual_confidence(e, constraint)
|
61
|
+
for e in evidence
|
62
|
+
]
|
63
|
+
|
64
|
+
# Calculate average scores
|
65
|
+
avg_positive = sum(
|
66
|
+
e.positive_confidence for e in dual_evidence
|
67
|
+
) / len(dual_evidence)
|
68
|
+
avg_negative = sum(
|
69
|
+
e.negative_confidence for e in dual_evidence
|
70
|
+
) / len(dual_evidence)
|
71
|
+
avg_uncertainty = sum(
|
72
|
+
e.uncertainty for e in dual_evidence
|
73
|
+
) / len(dual_evidence)
|
74
|
+
|
75
|
+
# EARLY REJECTION LOGIC
|
76
|
+
# Reject if negative evidence is above 25% - simplified approach
|
77
|
+
if avg_negative > 0.25:
|
78
|
+
logger.info(
|
79
|
+
f"❌ EARLY REJECTION: {candidate.name} - Constraint '{constraint.value}' "
|
80
|
+
f"has significant negative evidence ({avg_negative:.0%})"
|
81
|
+
)
|
82
|
+
return 0.0 # Immediate rejection
|
83
|
+
|
84
|
+
# If high negative but also decent positive, continue but penalize
|
85
|
+
if (
|
86
|
+
avg_negative > self.rejection_threshold
|
87
|
+
and avg_positive > self.positive_threshold
|
88
|
+
):
|
89
|
+
logger.warning(
|
90
|
+
f"⚠️ Mixed evidence for {candidate.name} - {constraint.value}: "
|
91
|
+
f"+{avg_positive:.0%} -{avg_negative:.0%}"
|
92
|
+
)
|
93
|
+
|
94
|
+
# Calculate score using parent method
|
95
|
+
score = self._evaluate_evidence(evidence, constraint)
|
96
|
+
constraint_scores.append(score)
|
97
|
+
|
98
|
+
detailed_results.append(
|
99
|
+
{
|
100
|
+
"constraint": constraint.value,
|
101
|
+
"score": score,
|
102
|
+
"positive": avg_positive,
|
103
|
+
"negative": avg_negative,
|
104
|
+
"uncertainty": avg_uncertainty,
|
105
|
+
"weight": constraint.weight,
|
106
|
+
}
|
107
|
+
)
|
108
|
+
|
109
|
+
# Visual feedback
|
110
|
+
symbol = (
|
111
|
+
"✓" if score >= 0.8 else "○" if score >= 0.5 else "✗"
|
112
|
+
)
|
113
|
+
logger.info(
|
114
|
+
f"{symbol} {candidate.name} | {constraint.value}: {int(score * 100)}% "
|
115
|
+
f"(+{int(avg_positive * 100)}% -{int(avg_negative * 100)}% ?{int(avg_uncertainty * 100)}%)"
|
116
|
+
)
|
117
|
+
|
118
|
+
# Skip remaining constraints if this one failed badly
|
119
|
+
if score < 0.2 and constraint.weight > 0.5:
|
120
|
+
logger.info(
|
121
|
+
"⚠️ Skipping remaining constraints due to poor score on important constraint"
|
122
|
+
)
|
123
|
+
break
|
124
|
+
else:
|
125
|
+
# No evidence - high uncertainty
|
126
|
+
score = 0.5 - self.uncertainty_penalty
|
127
|
+
constraint_scores.append(score)
|
128
|
+
logger.info(
|
129
|
+
f"? {candidate.name} | {constraint.value}: No evidence found"
|
130
|
+
)
|
131
|
+
|
132
|
+
# Calculate weighted average
|
133
|
+
if constraint_scores:
|
134
|
+
weights = [
|
135
|
+
c.weight
|
136
|
+
for c in self.constraint_ranking[: len(constraint_scores)]
|
137
|
+
]
|
138
|
+
total_score = sum(
|
139
|
+
s * w for s, w in zip(constraint_scores, weights)
|
140
|
+
) / sum(weights)
|
141
|
+
|
142
|
+
# Log detailed breakdown
|
143
|
+
logger.info(f"\nDetailed analysis for {candidate.name}:")
|
144
|
+
for result in detailed_results:
|
145
|
+
logger.info(
|
146
|
+
f" {result['constraint']}: {result['score']:.2%} "
|
147
|
+
f"(+{result['positive']:.0%} -{result['negative']:.0%} ?{result['uncertainty']:.0%}) "
|
148
|
+
f"[weight: {result['weight']:.1f}]"
|
149
|
+
)
|
150
|
+
|
151
|
+
logger.info(f"Final score for {candidate.name}: {total_score:.2%}")
|
152
|
+
|
153
|
+
# Store constraint evaluation results on the candidate object
|
154
|
+
candidate.evaluation_results = detailed_results
|
155
|
+
candidate.score = total_score
|
156
|
+
|
157
|
+
# Update tracking
|
158
|
+
with self.evaluation_lock:
|
159
|
+
self.evaluated_candidates[candidate.name] = total_score
|
160
|
+
|
161
|
+
if total_score > self.best_score:
|
162
|
+
self.best_score = total_score
|
163
|
+
self.best_candidate = candidate
|
164
|
+
logger.info(
|
165
|
+
f"New best: {candidate.name} with {total_score:.2%}"
|
166
|
+
)
|
167
|
+
|
168
|
+
# Check for early stop
|
169
|
+
if total_score >= self.early_stop_threshold:
|
170
|
+
logger.info(
|
171
|
+
f"🎯 EARLY STOP: {candidate.name} reached {total_score:.2%}!"
|
172
|
+
)
|
173
|
+
self.found_answer.set()
|
174
|
+
|
175
|
+
return total_score
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
logger.error(f"Error evaluating {candidate.name}: {e}")
|
179
|
+
return 0.0
|
180
|
+
|
181
|
+
def _evaluate_candidate_with_constraint_checker(self, candidate) -> float:
|
182
|
+
"""
|
183
|
+
Evaluate candidate using the new modular constraint checking system.
|
184
|
+
|
185
|
+
This method can be used as an alternative to the existing evaluation logic.
|
186
|
+
"""
|
187
|
+
try:
|
188
|
+
# Use the constraint checker
|
189
|
+
result = self.constraint_checker.check_candidate(
|
190
|
+
candidate, self.constraint_ranking
|
191
|
+
)
|
192
|
+
|
193
|
+
# Store results on candidate
|
194
|
+
candidate.evaluation_results = result.detailed_results
|
195
|
+
candidate.score = result.total_score
|
196
|
+
|
197
|
+
# Update tracking
|
198
|
+
with self.evaluation_lock:
|
199
|
+
self.evaluated_candidates[candidate.name] = result.total_score
|
200
|
+
|
201
|
+
if result.total_score > self.best_score:
|
202
|
+
self.best_score = result.total_score
|
203
|
+
self.best_candidate = candidate
|
204
|
+
logger.info(
|
205
|
+
f"New best: {candidate.name} with {result.total_score:.2%}"
|
206
|
+
)
|
207
|
+
|
208
|
+
# Check for early stop
|
209
|
+
if result.total_score >= self.early_stop_threshold:
|
210
|
+
logger.info(
|
211
|
+
f"🎯 EARLY STOP: {candidate.name} reached {result.total_score:.2%}!"
|
212
|
+
)
|
213
|
+
self.found_answer.set()
|
214
|
+
|
215
|
+
return result.total_score
|
216
|
+
|
217
|
+
except Exception as e:
|
218
|
+
logger.error(f"Error evaluating {candidate.name}: {e}")
|
219
|
+
return 0.0
|
@@ -0,0 +1,369 @@
|
|
1
|
+
"""
|
2
|
+
Early-stop constrained search strategy that evaluates candidates immediately
|
3
|
+
and stops when finding a very high confidence match.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import concurrent.futures
|
7
|
+
import threading
|
8
|
+
from typing import Dict, List
|
9
|
+
|
10
|
+
from loguru import logger
|
11
|
+
|
12
|
+
from ..candidates.base_candidate import Candidate
|
13
|
+
from ..constraints.base_constraint import Constraint
|
14
|
+
from .parallel_constrained_strategy import ParallelConstrainedStrategy
|
15
|
+
|
16
|
+
|
17
|
+
class EarlyStopConstrainedStrategy(ParallelConstrainedStrategy):
|
18
|
+
"""
|
19
|
+
Enhanced constrained strategy that:
|
20
|
+
1. Evaluates candidates as soon as they're found
|
21
|
+
2. Stops early when finding a very high confidence match (99%+)
|
22
|
+
3. Runs evaluation and search concurrently
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(
|
26
|
+
self,
|
27
|
+
*args,
|
28
|
+
early_stop_threshold: float = 0.99,
|
29
|
+
concurrent_evaluation: bool = True,
|
30
|
+
**kwargs,
|
31
|
+
):
|
32
|
+
super().__init__(*args, **kwargs)
|
33
|
+
self.early_stop_threshold = early_stop_threshold
|
34
|
+
self.concurrent_evaluation = concurrent_evaluation
|
35
|
+
|
36
|
+
# Thread-safe tracking
|
37
|
+
self.found_answer = threading.Event()
|
38
|
+
self.best_candidate = None
|
39
|
+
self.best_score = 0.0
|
40
|
+
self.evaluation_lock = threading.Lock()
|
41
|
+
|
42
|
+
# Track candidates being evaluated
|
43
|
+
self.evaluating_candidates = set()
|
44
|
+
self.evaluated_candidates = {}
|
45
|
+
|
46
|
+
def _parallel_search(self, combinations: List) -> List[Candidate]:
|
47
|
+
"""Execute searches in parallel with immediate candidate evaluation."""
|
48
|
+
all_candidates = []
|
49
|
+
evaluation_futures = []
|
50
|
+
|
51
|
+
with concurrent.futures.ThreadPoolExecutor(
|
52
|
+
max_workers=self.parallel_workers
|
53
|
+
) as executor:
|
54
|
+
# Submit all searches
|
55
|
+
search_futures = {
|
56
|
+
executor.submit(self._execute_combination_search, combo): combo
|
57
|
+
for combo in combinations
|
58
|
+
}
|
59
|
+
|
60
|
+
# Process results as they complete
|
61
|
+
for future in concurrent.futures.as_completed(search_futures):
|
62
|
+
# Check if we should stop early
|
63
|
+
if self.found_answer.is_set():
|
64
|
+
logger.info(
|
65
|
+
f"Early stop triggered - found answer: {self.best_candidate}"
|
66
|
+
)
|
67
|
+
break
|
68
|
+
|
69
|
+
combo = search_futures[future]
|
70
|
+
try:
|
71
|
+
candidates = future.result()
|
72
|
+
all_candidates.extend(candidates)
|
73
|
+
|
74
|
+
# Start evaluating candidates immediately if concurrent evaluation is enabled
|
75
|
+
if self.concurrent_evaluation:
|
76
|
+
for candidate in candidates:
|
77
|
+
if candidate.name not in self.evaluating_candidates:
|
78
|
+
self.evaluating_candidates.add(candidate.name)
|
79
|
+
eval_future = executor.submit(
|
80
|
+
self._evaluate_candidate_immediately,
|
81
|
+
candidate,
|
82
|
+
)
|
83
|
+
evaluation_futures.append(eval_future)
|
84
|
+
|
85
|
+
if self.progress_callback:
|
86
|
+
self.progress_callback(
|
87
|
+
f"Found {len(candidates)} candidates, evaluating...",
|
88
|
+
None,
|
89
|
+
{
|
90
|
+
"phase": "parallel_search_with_eval",
|
91
|
+
"candidates": len(all_candidates),
|
92
|
+
"best_score": self.best_score,
|
93
|
+
"best_candidate": self.best_candidate,
|
94
|
+
},
|
95
|
+
)
|
96
|
+
|
97
|
+
except Exception as e:
|
98
|
+
logger.error(f"Search failed for {combo.query}: {e}")
|
99
|
+
|
100
|
+
# Wait for evaluation futures to complete
|
101
|
+
for future in concurrent.futures.as_completed(evaluation_futures):
|
102
|
+
try:
|
103
|
+
future.result()
|
104
|
+
except Exception as e:
|
105
|
+
logger.error(f"Evaluation failed: {e}")
|
106
|
+
|
107
|
+
return all_candidates
|
108
|
+
|
109
|
+
def _evaluate_candidate_immediately(self, candidate: Candidate) -> float:
|
110
|
+
"""Evaluate a candidate against all constraints immediately."""
|
111
|
+
try:
|
112
|
+
logger.info(f"Immediately evaluating candidate: {candidate.name}")
|
113
|
+
|
114
|
+
# Calculate overall score across all constraints
|
115
|
+
total_score = 0.0
|
116
|
+
constraint_scores = []
|
117
|
+
|
118
|
+
for constraint in self.constraint_ranking:
|
119
|
+
# Get evidence for this constraint
|
120
|
+
evidence = self._gather_evidence_for_constraint(
|
121
|
+
candidate, constraint
|
122
|
+
)
|
123
|
+
score = self._evaluate_evidence(evidence, constraint)
|
124
|
+
constraint_scores.append(score)
|
125
|
+
|
126
|
+
# Update progress
|
127
|
+
if self.progress_callback:
|
128
|
+
symbol = "✓" if score >= 0.8 else "○"
|
129
|
+
self.progress_callback(
|
130
|
+
f"{symbol} {candidate.name} | {constraint.type.value}: {int(score * 100)}%",
|
131
|
+
None,
|
132
|
+
{
|
133
|
+
"phase": "immediate_evaluation",
|
134
|
+
"candidate": candidate.name,
|
135
|
+
"constraint": constraint.value,
|
136
|
+
"score": score,
|
137
|
+
},
|
138
|
+
)
|
139
|
+
|
140
|
+
# If this candidate fails a critical constraint badly, skip remaining checks
|
141
|
+
if score < 0.3 and constraint.weight > 0.8:
|
142
|
+
logger.info(
|
143
|
+
f"Candidate {candidate.name} failed critical constraint early"
|
144
|
+
)
|
145
|
+
break
|
146
|
+
|
147
|
+
# Calculate average score
|
148
|
+
if constraint_scores:
|
149
|
+
total_score = sum(constraint_scores) / len(constraint_scores)
|
150
|
+
|
151
|
+
# Thread-safe update of best candidate
|
152
|
+
with self.evaluation_lock:
|
153
|
+
self.evaluated_candidates[candidate.name] = total_score
|
154
|
+
|
155
|
+
if total_score > self.best_score:
|
156
|
+
self.best_score = total_score
|
157
|
+
self.best_candidate = candidate.name
|
158
|
+
|
159
|
+
logger.info(
|
160
|
+
f"New best candidate: {candidate.name} with score {total_score:.2f}"
|
161
|
+
)
|
162
|
+
|
163
|
+
# Check for early stop
|
164
|
+
if total_score >= self.early_stop_threshold:
|
165
|
+
logger.info(
|
166
|
+
f"EARLY STOP: Found {candidate.name} with {total_score:.2f} confidence!"
|
167
|
+
)
|
168
|
+
self.found_answer.set()
|
169
|
+
|
170
|
+
if self.progress_callback:
|
171
|
+
self.progress_callback(
|
172
|
+
f"Found answer: {candidate.name} ({int(total_score * 100)}% confidence)",
|
173
|
+
95,
|
174
|
+
{
|
175
|
+
"phase": "early_stop",
|
176
|
+
"final_answer": candidate.name,
|
177
|
+
"confidence": total_score,
|
178
|
+
},
|
179
|
+
)
|
180
|
+
|
181
|
+
return total_score
|
182
|
+
|
183
|
+
except Exception as e:
|
184
|
+
logger.error(f"Error evaluating candidate {candidate.name}: {e}")
|
185
|
+
return 0.0
|
186
|
+
|
187
|
+
def _progressive_constraint_search(self):
|
188
|
+
"""Override to implement early stopping."""
|
189
|
+
current_candidates = []
|
190
|
+
search_iterations = 0
|
191
|
+
max_search_iterations = 3
|
192
|
+
|
193
|
+
# Detect entity type
|
194
|
+
self.entity_type = self._detect_entity_type()
|
195
|
+
logger.info(f"Detected entity type: {self.entity_type}")
|
196
|
+
|
197
|
+
while (
|
198
|
+
search_iterations < max_search_iterations
|
199
|
+
and not self.found_answer.is_set()
|
200
|
+
):
|
201
|
+
search_iterations += 1
|
202
|
+
|
203
|
+
# Create search combinations based on iteration
|
204
|
+
if search_iterations == 1:
|
205
|
+
combinations = self._create_strict_combinations()
|
206
|
+
strictness = "strict"
|
207
|
+
elif search_iterations == 2:
|
208
|
+
combinations = self._create_relaxed_combinations()
|
209
|
+
strictness = "relaxed"
|
210
|
+
else:
|
211
|
+
combinations = self._create_individual_combinations()
|
212
|
+
strictness = "individual"
|
213
|
+
|
214
|
+
logger.info(
|
215
|
+
f"Iteration {search_iterations}: {strictness} mode with {len(combinations)} combinations"
|
216
|
+
)
|
217
|
+
|
218
|
+
# Run searches in parallel with immediate evaluation
|
219
|
+
new_candidates = self._parallel_search(combinations)
|
220
|
+
current_candidates.extend(new_candidates)
|
221
|
+
|
222
|
+
# Check if we have enough results or found the answer
|
223
|
+
unique_candidates = self._deduplicate_candidates(current_candidates)
|
224
|
+
|
225
|
+
if self.found_answer.is_set():
|
226
|
+
logger.info(f"Early stop - found answer: {self.best_candidate}")
|
227
|
+
break
|
228
|
+
|
229
|
+
if len(unique_candidates) >= self.min_results_threshold:
|
230
|
+
logger.info(
|
231
|
+
f"Found {len(unique_candidates)} candidates - checking if we need more"
|
232
|
+
)
|
233
|
+
# Continue only if best score is below threshold
|
234
|
+
if self.best_score >= 0.9:
|
235
|
+
logger.info(
|
236
|
+
f"Best score {self.best_score:.2f} is high enough - stopping search"
|
237
|
+
)
|
238
|
+
break
|
239
|
+
|
240
|
+
# Set final candidates
|
241
|
+
self.candidates = [
|
242
|
+
c for c in unique_candidates if c.name == self.best_candidate
|
243
|
+
]
|
244
|
+
if not self.candidates and unique_candidates:
|
245
|
+
# If best candidate wasn't in the list somehow, use top scored candidates
|
246
|
+
scored_candidates = sorted(
|
247
|
+
unique_candidates,
|
248
|
+
key=lambda c: self.evaluated_candidates.get(c.name, 0),
|
249
|
+
reverse=True,
|
250
|
+
)
|
251
|
+
self.candidates = scored_candidates[: self.candidate_limit]
|
252
|
+
|
253
|
+
self.final_answer = self.best_candidate
|
254
|
+
self.confidence = self.best_score
|
255
|
+
|
256
|
+
def analyze_topic(self, topic: str) -> Dict:
|
257
|
+
"""Analyze topic with early stopping."""
|
258
|
+
# Call parent's analyze_topic to handle constraint extraction
|
259
|
+
result = super().analyze_topic(topic)
|
260
|
+
|
261
|
+
# Add our early stopping information
|
262
|
+
result["early_stopped"] = self.found_answer.is_set()
|
263
|
+
result["evaluated_candidates"] = self.evaluated_candidates
|
264
|
+
result["best_candidate"] = self.best_candidate
|
265
|
+
result["best_score"] = self.best_score
|
266
|
+
|
267
|
+
return result
|
268
|
+
|
269
|
+
def _gather_evidence_for_constraint(
|
270
|
+
self, candidate: Candidate, constraint: Constraint
|
271
|
+
) -> List:
|
272
|
+
"""Gather evidence for a specific candidate-constraint pair."""
|
273
|
+
# Run targeted search for this specific combination
|
274
|
+
query = f'"{candidate.name}" {constraint.value} verification'
|
275
|
+
|
276
|
+
try:
|
277
|
+
results = self._execute_search(query)
|
278
|
+
evidence = self._extract_evidence_from_results(
|
279
|
+
results, candidate, constraint
|
280
|
+
)
|
281
|
+
return evidence
|
282
|
+
except Exception as e:
|
283
|
+
logger.error(f"Error gathering evidence for {candidate.name}: {e}")
|
284
|
+
return []
|
285
|
+
|
286
|
+
def _extract_evidence_from_results(
|
287
|
+
self, results: Dict, candidate: Candidate, constraint: Constraint
|
288
|
+
) -> List:
|
289
|
+
"""Extract relevant evidence from search results."""
|
290
|
+
evidence = []
|
291
|
+
content = results.get("current_knowledge", "")
|
292
|
+
|
293
|
+
if content:
|
294
|
+
# Use LLM to extract evidence
|
295
|
+
prompt = f"""
|
296
|
+
Extract evidence regarding whether "{candidate.name}" satisfies this constraint:
|
297
|
+
|
298
|
+
Constraint: {constraint.value}
|
299
|
+
Constraint Type: {constraint.type.value}
|
300
|
+
|
301
|
+
Search Results:
|
302
|
+
{content[:3000]}
|
303
|
+
|
304
|
+
Extract specific evidence that either supports or refutes the constraint.
|
305
|
+
Return a confidence score from 0 to 1.
|
306
|
+
"""
|
307
|
+
|
308
|
+
try:
|
309
|
+
response = self.model.invoke(prompt).content
|
310
|
+
evidence.append(
|
311
|
+
{
|
312
|
+
"text": response,
|
313
|
+
"source": "search_results",
|
314
|
+
"confidence": self._extract_confidence_from_response(
|
315
|
+
response
|
316
|
+
),
|
317
|
+
}
|
318
|
+
)
|
319
|
+
except Exception as e:
|
320
|
+
logger.error(f"Error extracting evidence: {e}")
|
321
|
+
|
322
|
+
return evidence
|
323
|
+
|
324
|
+
def _extract_confidence_from_response(self, response: str) -> float:
|
325
|
+
"""Extract confidence score from LLM response."""
|
326
|
+
# Simple extraction - look for number between 0 and 1
|
327
|
+
import re
|
328
|
+
|
329
|
+
pattern = r"\b0?\.\d+\b|\b1\.0\b|\b1\b"
|
330
|
+
matches = re.findall(pattern, response)
|
331
|
+
|
332
|
+
if matches:
|
333
|
+
try:
|
334
|
+
return float(matches[-1]) # Use last number found
|
335
|
+
except:
|
336
|
+
pass
|
337
|
+
|
338
|
+
# Default confidence based on keywords
|
339
|
+
if any(
|
340
|
+
word in response.lower()
|
341
|
+
for word in ["definitely", "certainly", "absolutely"]
|
342
|
+
):
|
343
|
+
return 0.9
|
344
|
+
elif any(
|
345
|
+
word in response.lower()
|
346
|
+
for word in ["likely", "probably", "appears"]
|
347
|
+
):
|
348
|
+
return 0.7
|
349
|
+
elif any(
|
350
|
+
word in response.lower() for word in ["possibly", "maybe", "might"]
|
351
|
+
):
|
352
|
+
return 0.5
|
353
|
+
elif any(
|
354
|
+
word in response.lower() for word in ["unlikely", "doubtful", "not"]
|
355
|
+
):
|
356
|
+
return 0.3
|
357
|
+
|
358
|
+
return 0.5
|
359
|
+
|
360
|
+
def _evaluate_evidence(
|
361
|
+
self, evidence: List, constraint: Constraint
|
362
|
+
) -> float:
|
363
|
+
"""Evaluate evidence to determine constraint satisfaction score."""
|
364
|
+
if not evidence:
|
365
|
+
return 0.0
|
366
|
+
|
367
|
+
# Average confidence across all evidence
|
368
|
+
confidences = [e.get("confidence", 0.5) for e in evidence]
|
369
|
+
return sum(confidences) / len(confidences) if confidences else 0.0
|