local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +7 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
- local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
- local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
- local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
- local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
- local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
- local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
- local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
- local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
- local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
- local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
- local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
- local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
- local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
- local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
- local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
- local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
- local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
- local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
- local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
- local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
- local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
- local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
- local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
- local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
- local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
- local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
- local_deep_research/advanced_search_system/findings/repository.py +54 -17
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
- local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
- local_deep_research/advanced_search_system/questions/__init__.py +16 -0
- local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
- local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
- local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
- local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
- local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
- local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
- local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
- local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
- local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
- local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
- local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
- local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
- local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
- local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
- local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
- local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
- local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
- local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
- local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
- local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
- local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
- local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
- local_deep_research/api/benchmark_functions.py +6 -2
- local_deep_research/api/research_functions.py +10 -4
- local_deep_research/benchmarks/__init__.py +9 -7
- local_deep_research/benchmarks/benchmark_functions.py +6 -2
- local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
- local_deep_research/benchmarks/cli.py +38 -13
- local_deep_research/benchmarks/comparison/__init__.py +4 -2
- local_deep_research/benchmarks/comparison/evaluator.py +316 -239
- local_deep_research/benchmarks/datasets/__init__.py +1 -1
- local_deep_research/benchmarks/datasets/base.py +91 -72
- local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
- local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
- local_deep_research/benchmarks/datasets/utils.py +48 -29
- local_deep_research/benchmarks/datasets.py +4 -11
- local_deep_research/benchmarks/efficiency/__init__.py +8 -4
- local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
- local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
- local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
- local_deep_research/benchmarks/evaluators/composite.py +6 -2
- local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
- local_deep_research/benchmarks/graders.py +32 -10
- local_deep_research/benchmarks/metrics/README.md +1 -1
- local_deep_research/benchmarks/metrics/calculation.py +25 -10
- local_deep_research/benchmarks/metrics/reporting.py +7 -3
- local_deep_research/benchmarks/metrics/visualization.py +42 -23
- local_deep_research/benchmarks/metrics.py +1 -1
- local_deep_research/benchmarks/optimization/__init__.py +3 -1
- local_deep_research/benchmarks/optimization/api.py +7 -1
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
- local_deep_research/benchmarks/runners.py +48 -15
- local_deep_research/citation_handler.py +65 -92
- local_deep_research/citation_handlers/__init__.py +15 -0
- local_deep_research/citation_handlers/base_citation_handler.py +70 -0
- local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
- local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
- local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
- local_deep_research/config/llm_config.py +271 -169
- local_deep_research/config/search_config.py +14 -5
- local_deep_research/defaults/__init__.py +0 -1
- local_deep_research/metrics/__init__.py +13 -0
- local_deep_research/metrics/database.py +58 -0
- local_deep_research/metrics/db_models.py +115 -0
- local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
- local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
- local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
- local_deep_research/metrics/migrate_research_ratings.py +31 -0
- local_deep_research/metrics/models.py +61 -0
- local_deep_research/metrics/pricing/__init__.py +12 -0
- local_deep_research/metrics/pricing/cost_calculator.py +237 -0
- local_deep_research/metrics/pricing/pricing_cache.py +143 -0
- local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
- local_deep_research/metrics/query_utils.py +51 -0
- local_deep_research/metrics/search_tracker.py +380 -0
- local_deep_research/metrics/token_counter.py +1078 -0
- local_deep_research/migrate_db.py +3 -1
- local_deep_research/report_generator.py +22 -8
- local_deep_research/search_system.py +390 -9
- local_deep_research/test_migration.py +15 -5
- local_deep_research/utilities/db_utils.py +7 -4
- local_deep_research/utilities/es_utils.py +115 -104
- local_deep_research/utilities/llm_utils.py +15 -5
- local_deep_research/utilities/log_utils.py +151 -0
- local_deep_research/utilities/search_cache.py +387 -0
- local_deep_research/utilities/search_utilities.py +14 -6
- local_deep_research/utilities/threading_utils.py +92 -0
- local_deep_research/utilities/url_utils.py +6 -0
- local_deep_research/web/api.py +347 -0
- local_deep_research/web/app.py +13 -17
- local_deep_research/web/app_factory.py +71 -66
- local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
- local_deep_research/web/database/migrations.py +20 -3
- local_deep_research/web/database/models.py +74 -25
- local_deep_research/web/database/schema_upgrade.py +49 -29
- local_deep_research/web/models/database.py +63 -83
- local_deep_research/web/routes/api_routes.py +56 -22
- local_deep_research/web/routes/benchmark_routes.py +4 -1
- local_deep_research/web/routes/globals.py +22 -0
- local_deep_research/web/routes/history_routes.py +71 -46
- local_deep_research/web/routes/metrics_routes.py +1155 -0
- local_deep_research/web/routes/research_routes.py +192 -54
- local_deep_research/web/routes/settings_routes.py +156 -55
- local_deep_research/web/services/research_service.py +412 -251
- local_deep_research/web/services/resource_service.py +36 -11
- local_deep_research/web/services/settings_manager.py +55 -17
- local_deep_research/web/services/settings_service.py +12 -4
- local_deep_research/web/services/socket_service.py +295 -188
- local_deep_research/web/static/css/custom_dropdown.css +180 -0
- local_deep_research/web/static/css/styles.css +39 -1
- local_deep_research/web/static/js/components/detail.js +633 -267
- local_deep_research/web/static/js/components/details.js +751 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
- local_deep_research/web/static/js/components/fallback/ui.js +23 -23
- local_deep_research/web/static/js/components/history.js +76 -76
- local_deep_research/web/static/js/components/logpanel.js +61 -13
- local_deep_research/web/static/js/components/progress.js +13 -2
- local_deep_research/web/static/js/components/research.js +99 -12
- local_deep_research/web/static/js/components/results.js +239 -106
- local_deep_research/web/static/js/main.js +40 -40
- local_deep_research/web/static/js/services/audio.js +1 -1
- local_deep_research/web/static/js/services/formatting.js +11 -11
- local_deep_research/web/static/js/services/keyboard.js +157 -0
- local_deep_research/web/static/js/services/pdf.js +80 -80
- local_deep_research/web/static/sounds/README.md +1 -1
- local_deep_research/web/templates/base.html +1 -0
- local_deep_research/web/templates/components/log_panel.html +7 -1
- local_deep_research/web/templates/components/mobile_nav.html +1 -1
- local_deep_research/web/templates/components/sidebar.html +3 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
- local_deep_research/web/templates/pages/details.html +325 -24
- local_deep_research/web/templates/pages/history.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +1929 -0
- local_deep_research/web/templates/pages/progress.html +2 -2
- local_deep_research/web/templates/pages/research.html +53 -17
- local_deep_research/web/templates/pages/results.html +12 -1
- local_deep_research/web/templates/pages/star_reviews.html +803 -0
- local_deep_research/web/utils/formatters.py +9 -3
- local_deep_research/web_search_engines/default_search_engines.py +5 -3
- local_deep_research/web_search_engines/engines/full_search.py +8 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
- local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
- local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
- local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
- local_deep_research/web_search_engines/search_engine_base.py +83 -35
- local_deep_research/web_search_engines/search_engine_factory.py +25 -8
- local_deep_research/web_search_engines/search_engines_config.py +9 -3
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
- local_deep_research-0.5.2.dist-info/RECORD +265 -0
- local_deep_research-0.4.4.dist-info/RECORD +0 -177
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,782 @@
|
|
1
|
+
"""
|
2
|
+
Improved evidence-based search strategy for complex query resolution.
|
3
|
+
|
4
|
+
Key improvements:
|
5
|
+
1. Multi-stage candidate discovery with adaptive query generation
|
6
|
+
2. Dynamic constraint combination for cross-constraint searches
|
7
|
+
3. Query adaptation based on partial results
|
8
|
+
4. Enhanced source diversity management
|
9
|
+
"""
|
10
|
+
|
11
|
+
import itertools
|
12
|
+
from collections import defaultdict
|
13
|
+
from dataclasses import dataclass
|
14
|
+
from datetime import datetime
|
15
|
+
from typing import Any, Dict, List, Set
|
16
|
+
|
17
|
+
from langchain_core.language_models import BaseChatModel
|
18
|
+
|
19
|
+
from ...utilities.search_utilities import remove_think_tags
|
20
|
+
from ..candidates.base_candidate import Candidate
|
21
|
+
from ..constraints.base_constraint import Constraint, ConstraintType
|
22
|
+
from ..constraints.constraint_analyzer import ConstraintAnalyzer
|
23
|
+
from ..evidence.base_evidence import EvidenceType
|
24
|
+
from ..evidence.evaluator import EvidenceEvaluator
|
25
|
+
from ..findings.repository import FindingsRepository
|
26
|
+
from .base_strategy import BaseSearchStrategy
|
27
|
+
|
28
|
+
|
29
|
+
@dataclass
|
30
|
+
class SearchAttempt:
|
31
|
+
"""Track search attempts for query adaptation."""
|
32
|
+
|
33
|
+
query: str
|
34
|
+
constraint_ids: List[str]
|
35
|
+
results_count: int
|
36
|
+
candidates_found: int
|
37
|
+
timestamp: str
|
38
|
+
strategy_type: str # 'single', 'combined', 'exploratory'
|
39
|
+
|
40
|
+
|
41
|
+
class ImprovedEvidenceBasedStrategy(BaseSearchStrategy):
|
42
|
+
"""
|
43
|
+
Improved evidence-based strategy with adaptive search capabilities.
|
44
|
+
|
45
|
+
Key improvements:
|
46
|
+
1. Multi-stage candidate discovery
|
47
|
+
2. Adaptive query generation based on results
|
48
|
+
3. Cross-constraint search optimization
|
49
|
+
4. Source diversity tracking and enhancement
|
50
|
+
"""
|
51
|
+
|
52
|
+
def __init__(
|
53
|
+
self,
|
54
|
+
model: BaseChatModel,
|
55
|
+
search: Any,
|
56
|
+
all_links_of_system: List[str],
|
57
|
+
max_iterations: int = 20,
|
58
|
+
confidence_threshold: float = 0.85,
|
59
|
+
candidate_limit: int = 15, # Increased for better diversity
|
60
|
+
evidence_threshold: float = 0.6,
|
61
|
+
max_search_iterations: int = 3,
|
62
|
+
questions_per_iteration: int = 3,
|
63
|
+
min_source_diversity: int = 3, # Minimum different sources
|
64
|
+
adaptive_query_count: int = 3, # Number of adaptive queries per stage
|
65
|
+
):
|
66
|
+
"""Initialize the improved evidence-based strategy."""
|
67
|
+
super().__init__(all_links_of_system)
|
68
|
+
self.model = model
|
69
|
+
self.search = search
|
70
|
+
self.max_iterations = max_iterations
|
71
|
+
self.confidence_threshold = confidence_threshold
|
72
|
+
self.candidate_limit = candidate_limit
|
73
|
+
self.evidence_threshold = evidence_threshold
|
74
|
+
self.max_search_iterations = max_search_iterations
|
75
|
+
self.questions_per_iteration = questions_per_iteration
|
76
|
+
self.min_source_diversity = min_source_diversity
|
77
|
+
self.adaptive_query_count = adaptive_query_count
|
78
|
+
|
79
|
+
# Initialize components
|
80
|
+
self.constraint_analyzer = ConstraintAnalyzer(model)
|
81
|
+
self.evidence_evaluator = EvidenceEvaluator(model)
|
82
|
+
self.findings_repository = FindingsRepository(model)
|
83
|
+
|
84
|
+
# State tracking
|
85
|
+
self.constraints: List[Constraint] = []
|
86
|
+
self.candidates: List[Candidate] = []
|
87
|
+
self.search_history: List[Dict] = []
|
88
|
+
self.search_attempts: List[SearchAttempt] = []
|
89
|
+
self.failed_queries: Set[str] = set()
|
90
|
+
self.successful_patterns: List[Dict[str, Any]] = []
|
91
|
+
self.source_types: Dict[str, Set[str]] = defaultdict(set)
|
92
|
+
self.iteration: int = 0
|
93
|
+
|
94
|
+
def analyze_topic(self, query: str) -> Dict:
|
95
|
+
"""Analyze a topic using improved evidence-based approach."""
|
96
|
+
# Initialize
|
97
|
+
self.all_links_of_system.clear()
|
98
|
+
self.questions_by_iteration = []
|
99
|
+
self.findings = []
|
100
|
+
self.iteration = 0
|
101
|
+
self.search_attempts.clear()
|
102
|
+
self.failed_queries.clear()
|
103
|
+
self.successful_patterns.clear()
|
104
|
+
|
105
|
+
# Step 1: Extract initial constraints
|
106
|
+
if self.progress_callback:
|
107
|
+
self.progress_callback(
|
108
|
+
"Analyzing query for constraint extraction...",
|
109
|
+
2,
|
110
|
+
{"phase": "constraint_analysis", "status": "starting"},
|
111
|
+
)
|
112
|
+
|
113
|
+
self.constraints = self.constraint_analyzer.extract_constraints(query)
|
114
|
+
|
115
|
+
# Step 2: Multi-stage candidate discovery
|
116
|
+
self._multi_stage_candidate_discovery()
|
117
|
+
|
118
|
+
# Step 3: Main evidence-gathering loop with adaptive search
|
119
|
+
while (
|
120
|
+
self.iteration < self.max_iterations
|
121
|
+
and not self._has_sufficient_answer()
|
122
|
+
):
|
123
|
+
self.iteration += 1
|
124
|
+
|
125
|
+
if self.progress_callback:
|
126
|
+
progress = 15 + int((self.iteration / self.max_iterations) * 70)
|
127
|
+
self.progress_callback(
|
128
|
+
f"Iteration {self.iteration}/{self.max_iterations} - {self._get_iteration_status()}",
|
129
|
+
progress,
|
130
|
+
{
|
131
|
+
"phase": "iteration_start",
|
132
|
+
"iteration": self.iteration,
|
133
|
+
"candidates_count": len(self.candidates),
|
134
|
+
"search_attempts": len(self.search_attempts),
|
135
|
+
"successful_patterns": len(self.successful_patterns),
|
136
|
+
},
|
137
|
+
)
|
138
|
+
|
139
|
+
# Adaptive evidence gathering
|
140
|
+
self._adaptive_evidence_gathering()
|
141
|
+
|
142
|
+
# Score and prune with diversity consideration
|
143
|
+
self._score_with_diversity()
|
144
|
+
|
145
|
+
# Adaptive candidate discovery if needed
|
146
|
+
if len(self.candidates) < 3 or self._needs_diversity():
|
147
|
+
self._adaptive_candidate_search()
|
148
|
+
|
149
|
+
# Step 4: Cross-validation and final verification
|
150
|
+
self._cross_validate_candidates()
|
151
|
+
|
152
|
+
# Step 5: Generate final answer
|
153
|
+
return self._synthesize_final_answer(query)
|
154
|
+
|
155
|
+
def _multi_stage_candidate_discovery(self):
|
156
|
+
"""Multi-stage candidate discovery with different strategies."""
|
157
|
+
stages = [
|
158
|
+
("distinctive", self._discover_with_distinctive_constraints),
|
159
|
+
("combined", self._discover_with_combined_constraints),
|
160
|
+
("exploratory", self._discover_with_exploratory_search),
|
161
|
+
("pattern_based", self._discover_with_pattern_matching),
|
162
|
+
]
|
163
|
+
|
164
|
+
for stage_name, discovery_method in stages:
|
165
|
+
if self.progress_callback:
|
166
|
+
self.progress_callback(
|
167
|
+
f"Stage {stage_name}: Discovering candidates...",
|
168
|
+
5 + stages.index((stage_name, discovery_method)) * 3,
|
169
|
+
{"phase": "candidate_discovery", "stage": stage_name},
|
170
|
+
)
|
171
|
+
|
172
|
+
new_candidates = discovery_method()
|
173
|
+
|
174
|
+
# Add unique candidates
|
175
|
+
existing_names = {c.name.lower() for c in self.candidates}
|
176
|
+
for candidate in new_candidates:
|
177
|
+
if candidate.name.lower() not in existing_names:
|
178
|
+
self.candidates.append(candidate)
|
179
|
+
existing_names.add(candidate.name.lower())
|
180
|
+
|
181
|
+
# Stop if we have enough diverse candidates
|
182
|
+
if len(self.candidates) >= self.candidate_limit // 2:
|
183
|
+
break
|
184
|
+
|
185
|
+
def _discover_with_distinctive_constraints(self) -> List[Candidate]:
|
186
|
+
"""Discover candidates using most distinctive constraints."""
|
187
|
+
distinctive = self._get_adaptive_distinctive_constraints()
|
188
|
+
candidates = []
|
189
|
+
|
190
|
+
for constraint_combo in self._generate_constraint_combinations(
|
191
|
+
distinctive, max_size=3
|
192
|
+
):
|
193
|
+
query = self._create_adaptive_search_query(constraint_combo)
|
194
|
+
if query not in self.failed_queries:
|
195
|
+
results = self._execute_tracked_search(
|
196
|
+
query, constraint_combo, "distinctive"
|
197
|
+
)
|
198
|
+
candidates.extend(
|
199
|
+
self._extract_candidates_with_context(results, query)
|
200
|
+
)
|
201
|
+
|
202
|
+
if candidates: # Track successful patterns
|
203
|
+
self.successful_patterns.append(
|
204
|
+
{
|
205
|
+
"constraints": [c.id for c in constraint_combo],
|
206
|
+
"query_pattern": query,
|
207
|
+
"candidates_found": len(candidates),
|
208
|
+
}
|
209
|
+
)
|
210
|
+
|
211
|
+
return candidates
|
212
|
+
|
213
|
+
def _discover_with_combined_constraints(self) -> List[Candidate]:
|
214
|
+
"""Discover candidates using strategic constraint combinations."""
|
215
|
+
# Combine constraints from different types for better results
|
216
|
+
type_groups = defaultdict(list)
|
217
|
+
for c in self.constraints:
|
218
|
+
type_groups[c.type].append(c)
|
219
|
+
|
220
|
+
candidates = []
|
221
|
+
# Cross-type combinations
|
222
|
+
for type1, type2 in itertools.combinations(type_groups.keys(), 2):
|
223
|
+
for c1, c2 in itertools.product(
|
224
|
+
type_groups[type1][:2], type_groups[type2][:2]
|
225
|
+
):
|
226
|
+
query = self._create_cross_constraint_query([c1, c2])
|
227
|
+
results = self._execute_tracked_search(
|
228
|
+
query, [c1, c2], "combined"
|
229
|
+
)
|
230
|
+
candidates.extend(
|
231
|
+
self._extract_candidates_with_context(results, query)
|
232
|
+
)
|
233
|
+
|
234
|
+
return candidates
|
235
|
+
|
236
|
+
def _discover_with_exploratory_search(self) -> List[Candidate]:
|
237
|
+
"""Use exploratory searches to find unexpected candidates."""
|
238
|
+
candidates = []
|
239
|
+
|
240
|
+
# Generate exploratory queries
|
241
|
+
exploratory_prompt = f"""
|
242
|
+
Based on these constraints, generate 3 exploratory search queries that might find relevant candidates:
|
243
|
+
|
244
|
+
Constraints:
|
245
|
+
{self._format_constraints_for_prompt(self.constraints[:5])}
|
246
|
+
|
247
|
+
Create queries that:
|
248
|
+
1. Use alternative phrasings or related concepts
|
249
|
+
2. Explore edge cases or unusual combinations
|
250
|
+
3. Look for historical or contextual matches
|
251
|
+
|
252
|
+
Return only the queries, one per line.
|
253
|
+
"""
|
254
|
+
|
255
|
+
response = self.model.invoke(exploratory_prompt)
|
256
|
+
queries = remove_think_tags(response.content).strip().split("\n")
|
257
|
+
|
258
|
+
for query in queries[:3]:
|
259
|
+
if query.strip() and query not in self.failed_queries:
|
260
|
+
results = self._execute_tracked_search(
|
261
|
+
query, self.constraints[:3], "exploratory"
|
262
|
+
)
|
263
|
+
candidates.extend(
|
264
|
+
self._extract_candidates_with_context(results, query)
|
265
|
+
)
|
266
|
+
|
267
|
+
return candidates
|
268
|
+
|
269
|
+
def _discover_with_pattern_matching(self) -> List[Candidate]:
|
270
|
+
"""Use pattern matching based on successful patterns."""
|
271
|
+
if not self.successful_patterns:
|
272
|
+
return []
|
273
|
+
|
274
|
+
candidates = []
|
275
|
+
|
276
|
+
# Adapt successful patterns
|
277
|
+
for pattern in self.successful_patterns[:3]:
|
278
|
+
constraint_ids = pattern["constraints"]
|
279
|
+
constraints = [
|
280
|
+
c for c in self.constraints if c.id in constraint_ids
|
281
|
+
]
|
282
|
+
|
283
|
+
# Create variations of successful queries
|
284
|
+
adapted_query = self._adapt_successful_query(
|
285
|
+
pattern["query_pattern"], constraints
|
286
|
+
)
|
287
|
+
results = self._execute_tracked_search(
|
288
|
+
adapted_query, constraints, "pattern_based"
|
289
|
+
)
|
290
|
+
candidates.extend(
|
291
|
+
self._extract_candidates_with_context(results, adapted_query)
|
292
|
+
)
|
293
|
+
|
294
|
+
return candidates
|
295
|
+
|
296
|
+
def _adaptive_evidence_gathering(self):
|
297
|
+
"""Gather evidence with adaptive query generation."""
|
298
|
+
for candidate in self.candidates[:5]:
|
299
|
+
unverified = candidate.get_unverified_constraints(self.constraints)
|
300
|
+
|
301
|
+
if not unverified:
|
302
|
+
continue
|
303
|
+
|
304
|
+
# Sort by weight and group by type
|
305
|
+
unverified.sort(key=lambda c: c.weight, reverse=True)
|
306
|
+
type_groups = defaultdict(list)
|
307
|
+
for c in unverified:
|
308
|
+
type_groups[c.type].append(c)
|
309
|
+
|
310
|
+
# Try different evidence gathering strategies
|
311
|
+
for constraint_type, constraints in type_groups.items():
|
312
|
+
# Try single constraint
|
313
|
+
for c in constraints[:2]:
|
314
|
+
query = self._create_evidence_query(candidate, [c])
|
315
|
+
results = self._execute_tracked_search(
|
316
|
+
query, [c], "evidence"
|
317
|
+
)
|
318
|
+
evidence = self.evidence_evaluator.extract_evidence(
|
319
|
+
results.get("current_knowledge", ""), candidate.name, c
|
320
|
+
)
|
321
|
+
candidate.add_evidence(c.id, evidence)
|
322
|
+
|
323
|
+
# Try combined constraints of same type
|
324
|
+
if len(constraints) > 1:
|
325
|
+
query = self._create_evidence_query(
|
326
|
+
candidate, constraints[:2]
|
327
|
+
)
|
328
|
+
results = self._execute_tracked_search(
|
329
|
+
query, constraints[:2], "evidence_combined"
|
330
|
+
)
|
331
|
+
|
332
|
+
for c in constraints[:2]:
|
333
|
+
evidence = self.evidence_evaluator.extract_evidence(
|
334
|
+
results.get("current_knowledge", ""),
|
335
|
+
candidate.name,
|
336
|
+
c,
|
337
|
+
)
|
338
|
+
if (
|
339
|
+
c.id not in candidate.evidence
|
340
|
+
or evidence.confidence
|
341
|
+
> candidate.evidence[c.id].confidence
|
342
|
+
):
|
343
|
+
candidate.add_evidence(c.id, evidence)
|
344
|
+
|
345
|
+
def _create_adaptive_search_query(
|
346
|
+
self, constraints: List[Constraint]
|
347
|
+
) -> str:
|
348
|
+
"""Create adaptive search queries based on past performance."""
|
349
|
+
# Check if similar constraint combinations have been successful
|
350
|
+
constraint_ids = {c.id for c in constraints}
|
351
|
+
|
352
|
+
for pattern in self.successful_patterns:
|
353
|
+
if (
|
354
|
+
len(constraint_ids.intersection(pattern["constraints"]))
|
355
|
+
>= len(constraint_ids) // 2
|
356
|
+
):
|
357
|
+
# Adapt successful pattern
|
358
|
+
return self._adapt_successful_query(
|
359
|
+
pattern["query_pattern"], constraints
|
360
|
+
)
|
361
|
+
|
362
|
+
# Check failed queries to avoid repetition
|
363
|
+
base_query = self._create_base_search_query(constraints)
|
364
|
+
if base_query in self.failed_queries:
|
365
|
+
# Modify query to avoid failure
|
366
|
+
return self._modify_failed_query(base_query, constraints)
|
367
|
+
|
368
|
+
return base_query
|
369
|
+
|
370
|
+
def _create_cross_constraint_query(
|
371
|
+
self, constraints: List[Constraint]
|
372
|
+
) -> str:
|
373
|
+
"""Create queries that leverage relationships between constraints."""
|
374
|
+
prompt = f"""
|
375
|
+
Create a search query that finds candidates satisfying BOTH/ALL of these constraints:
|
376
|
+
|
377
|
+
{self._format_constraints_for_prompt(constraints)}
|
378
|
+
|
379
|
+
The query should:
|
380
|
+
1. Find entities that match both/all constraints
|
381
|
+
2. Use operators to require both/all conditions
|
382
|
+
3. Focus on finding specific names or entities
|
383
|
+
|
384
|
+
Return only the search query.
|
385
|
+
"""
|
386
|
+
|
387
|
+
response = self.model.invoke(prompt)
|
388
|
+
return remove_think_tags(response.content).strip()
|
389
|
+
|
390
|
+
def _create_evidence_query(
|
391
|
+
self, candidate: Candidate, constraints: List[Constraint]
|
392
|
+
) -> str:
|
393
|
+
"""Create targeted evidence queries."""
|
394
|
+
constraint_desc = self._format_constraints_for_prompt(constraints)
|
395
|
+
|
396
|
+
prompt = f"""
|
397
|
+
Create a search query to verify if "{candidate.name}" satisfies these constraints:
|
398
|
+
|
399
|
+
{constraint_desc}
|
400
|
+
|
401
|
+
The query should:
|
402
|
+
1. Include the candidate name
|
403
|
+
2. Target the specific constraint requirements
|
404
|
+
3. Find factual evidence, not opinions
|
405
|
+
|
406
|
+
Return only the search query.
|
407
|
+
"""
|
408
|
+
|
409
|
+
response = self.model.invoke(prompt)
|
410
|
+
return remove_think_tags(response.content).strip()
|
411
|
+
|
412
|
+
def _score_with_diversity(self):
|
413
|
+
"""Score candidates considering source diversity."""
|
414
|
+
for candidate in self.candidates:
|
415
|
+
# Base score from evidence
|
416
|
+
candidate.calculate_score(self.constraints)
|
417
|
+
|
418
|
+
# Diversity bonus
|
419
|
+
diversity_score = self._calculate_diversity_score(candidate)
|
420
|
+
candidate.score = 0.8 * candidate.score + 0.2 * diversity_score
|
421
|
+
|
422
|
+
# Track source types
|
423
|
+
for evidence in candidate.evidence.values():
|
424
|
+
if hasattr(evidence, "source"):
|
425
|
+
self.source_types[candidate.name].add(evidence.source)
|
426
|
+
|
427
|
+
# Sort by adjusted score
|
428
|
+
self.candidates.sort(key=lambda c: c.score, reverse=True)
|
429
|
+
|
430
|
+
# Prune while maintaining some diversity
|
431
|
+
self._prune_with_diversity()
|
432
|
+
|
433
|
+
def _cross_validate_candidates(self):
|
434
|
+
"""Cross-validate top candidates using different approaches."""
|
435
|
+
if not self.candidates:
|
436
|
+
return
|
437
|
+
|
438
|
+
top_candidates = self.candidates[:3]
|
439
|
+
|
440
|
+
for candidate in top_candidates:
|
441
|
+
# Validate using different search engines or approaches
|
442
|
+
validation_queries = self._generate_validation_queries(candidate)
|
443
|
+
|
444
|
+
for query in validation_queries:
|
445
|
+
results = self._execute_tracked_search(
|
446
|
+
query, self.constraints, "validation"
|
447
|
+
)
|
448
|
+
|
449
|
+
# Update evidence if better found
|
450
|
+
for constraint in self.constraints:
|
451
|
+
evidence = self.evidence_evaluator.extract_evidence(
|
452
|
+
results.get("current_knowledge", ""),
|
453
|
+
candidate.name,
|
454
|
+
constraint,
|
455
|
+
)
|
456
|
+
|
457
|
+
if (
|
458
|
+
constraint.id not in candidate.evidence
|
459
|
+
or evidence.confidence
|
460
|
+
> candidate.evidence[constraint.id].confidence
|
461
|
+
):
|
462
|
+
candidate.add_evidence(constraint.id, evidence)
|
463
|
+
|
464
|
+
def _execute_tracked_search(
|
465
|
+
self, query: str, constraints: List[Constraint], strategy_type: str
|
466
|
+
) -> Dict:
|
467
|
+
"""Execute search with tracking for adaptation."""
|
468
|
+
results = self._execute_search(query)
|
469
|
+
|
470
|
+
# Track the attempt
|
471
|
+
candidates_found = len(
|
472
|
+
self._extract_candidates_with_context(results, query)
|
473
|
+
)
|
474
|
+
attempt = SearchAttempt(
|
475
|
+
query=query,
|
476
|
+
constraint_ids=[c.id for c in constraints],
|
477
|
+
results_count=len(results.get("all_links_of_system", [])),
|
478
|
+
candidates_found=candidates_found,
|
479
|
+
timestamp=datetime.utcnow().isoformat(),
|
480
|
+
strategy_type=strategy_type,
|
481
|
+
)
|
482
|
+
self.search_attempts.append(attempt)
|
483
|
+
|
484
|
+
# Mark as failed if no results
|
485
|
+
if candidates_found == 0:
|
486
|
+
self.failed_queries.add(query)
|
487
|
+
|
488
|
+
return results
|
489
|
+
|
490
|
+
def _needs_diversity(self) -> bool:
|
491
|
+
"""Check if we need more diverse candidates."""
|
492
|
+
if len(self.candidates) < 3:
|
493
|
+
return True
|
494
|
+
|
495
|
+
# Check source diversity
|
496
|
+
top_sources = self.source_types.get(self.candidates[0].name, set())
|
497
|
+
return len(top_sources) < self.min_source_diversity
|
498
|
+
|
499
|
+
def _generate_constraint_combinations(
|
500
|
+
self, constraints: List[Constraint], max_size: int = 3
|
501
|
+
) -> List[List[Constraint]]:
|
502
|
+
"""Generate strategic constraint combinations."""
|
503
|
+
combinations = []
|
504
|
+
|
505
|
+
# Single constraints
|
506
|
+
combinations.extend([[c] for c in constraints])
|
507
|
+
|
508
|
+
# Pairs
|
509
|
+
for size in range(2, min(len(constraints), max_size) + 1):
|
510
|
+
for combo in itertools.combinations(constraints, size):
|
511
|
+
combinations.append(list(combo))
|
512
|
+
|
513
|
+
return combinations
|
514
|
+
|
515
|
+
def _format_constraints_for_prompt(
|
516
|
+
self, constraints: List[Constraint]
|
517
|
+
) -> str:
|
518
|
+
"""Format constraints for LLM prompts."""
|
519
|
+
formatted = []
|
520
|
+
for c in constraints:
|
521
|
+
formatted.append(
|
522
|
+
f"- {c.type.value}: {c.description} (weight: {c.weight:.2f})"
|
523
|
+
)
|
524
|
+
return "\n".join(formatted)
|
525
|
+
|
526
|
+
def _adapt_successful_query(
|
527
|
+
self, pattern_query: str, constraints: List[Constraint]
|
528
|
+
) -> str:
|
529
|
+
"""Adapt a successful query pattern with new constraints."""
|
530
|
+
prompt = f"""
|
531
|
+
Adapt this successful search query pattern with new constraints:
|
532
|
+
|
533
|
+
Original query: {pattern_query}
|
534
|
+
|
535
|
+
New constraints:
|
536
|
+
{self._format_constraints_for_prompt(constraints)}
|
537
|
+
|
538
|
+
Create a similar query structure but with the new constraint values.
|
539
|
+
Return only the adapted query.
|
540
|
+
"""
|
541
|
+
|
542
|
+
response = self.model.invoke(prompt)
|
543
|
+
return remove_think_tags(response.content).strip()
|
544
|
+
|
545
|
+
def _modify_failed_query(
|
546
|
+
self, failed_query: str, constraints: List[Constraint]
|
547
|
+
) -> str:
|
548
|
+
"""Modify a failed query to try a different approach."""
|
549
|
+
prompt = f"""
|
550
|
+
This search query returned no results: {failed_query}
|
551
|
+
|
552
|
+
Constraints we're trying to satisfy:
|
553
|
+
{self._format_constraints_for_prompt(constraints)}
|
554
|
+
|
555
|
+
Create an alternative query that:
|
556
|
+
1. Uses different keywords or phrases
|
557
|
+
2. Tries a different search approach
|
558
|
+
3. Still targets the same constraints
|
559
|
+
|
560
|
+
Return only the modified query.
|
561
|
+
"""
|
562
|
+
|
563
|
+
response = self.model.invoke(prompt)
|
564
|
+
return remove_think_tags(response.content).strip()
|
565
|
+
|
566
|
+
def _calculate_diversity_score(self, candidate: Candidate) -> float:
|
567
|
+
"""Calculate diversity score for a candidate."""
|
568
|
+
if not candidate.evidence:
|
569
|
+
return 0.0
|
570
|
+
|
571
|
+
# Source diversity
|
572
|
+
sources = self.source_types.get(candidate.name, set())
|
573
|
+
source_score = min(len(sources) / self.min_source_diversity, 1.0)
|
574
|
+
|
575
|
+
# Evidence type diversity
|
576
|
+
evidence_types = {e.type for e in candidate.evidence.values()}
|
577
|
+
type_score = len(evidence_types) / len(EvidenceType)
|
578
|
+
|
579
|
+
# Confidence distribution (prefer balanced confidence)
|
580
|
+
confidences = [e.confidence for e in candidate.evidence.values()]
|
581
|
+
if confidences:
|
582
|
+
variance = sum((c - 0.7) ** 2 for c in confidences) / len(
|
583
|
+
confidences
|
584
|
+
)
|
585
|
+
confidence_score = 1.0 / (1.0 + variance)
|
586
|
+
else:
|
587
|
+
confidence_score = 0.0
|
588
|
+
|
589
|
+
return (source_score + type_score + confidence_score) / 3.0
|
590
|
+
|
591
|
+
def _prune_with_diversity(self):
|
592
|
+
"""Prune candidates while maintaining diversity."""
|
593
|
+
if len(self.candidates) <= self.candidate_limit:
|
594
|
+
return
|
595
|
+
|
596
|
+
# Keep top candidates
|
597
|
+
kept = self.candidates[: self.candidate_limit // 2]
|
598
|
+
remaining = self.candidates[self.candidate_limit // 2 :]
|
599
|
+
|
600
|
+
# Add diverse candidates from remaining
|
601
|
+
for candidate in remaining:
|
602
|
+
if len(kept) >= self.candidate_limit:
|
603
|
+
break
|
604
|
+
|
605
|
+
# Check if this candidate adds diversity
|
606
|
+
if self._adds_diversity(candidate, kept):
|
607
|
+
kept.append(candidate)
|
608
|
+
|
609
|
+
# Fill remaining slots with highest scoring
|
610
|
+
for candidate in remaining:
|
611
|
+
if len(kept) >= self.candidate_limit:
|
612
|
+
break
|
613
|
+
if candidate not in kept:
|
614
|
+
kept.append(candidate)
|
615
|
+
|
616
|
+
self.candidates = kept
|
617
|
+
|
618
|
+
def _adds_diversity(
|
619
|
+
self, candidate: Candidate, existing: List[Candidate]
|
620
|
+
) -> bool:
|
621
|
+
"""Check if a candidate adds diversity to the existing set."""
|
622
|
+
# Check source diversity
|
623
|
+
candidate_sources = self.source_types.get(candidate.name, set())
|
624
|
+
existing_sources = set()
|
625
|
+
for c in existing:
|
626
|
+
existing_sources.update(self.source_types.get(c.name, set()))
|
627
|
+
|
628
|
+
new_sources = candidate_sources - existing_sources
|
629
|
+
if new_sources:
|
630
|
+
return True
|
631
|
+
|
632
|
+
# Check constraint coverage
|
633
|
+
candidate_constraints = set(candidate.evidence.keys())
|
634
|
+
existing_constraints = set()
|
635
|
+
for c in existing:
|
636
|
+
existing_constraints.update(c.evidence.keys())
|
637
|
+
|
638
|
+
new_constraints = candidate_constraints - existing_constraints
|
639
|
+
return len(new_constraints) > 0
|
640
|
+
|
641
|
+
def _generate_validation_queries(self, candidate: Candidate) -> List[str]:
|
642
|
+
"""Generate validation queries for cross-checking."""
|
643
|
+
queries = []
|
644
|
+
|
645
|
+
# Query combining multiple constraints
|
646
|
+
high_weight_constraints = sorted(
|
647
|
+
self.constraints, key=lambda c: c.weight, reverse=True
|
648
|
+
)[:3]
|
649
|
+
combined_query = f'"{candidate.name}" ' + " ".join(
|
650
|
+
c.to_search_terms() for c in high_weight_constraints
|
651
|
+
)
|
652
|
+
queries.append(combined_query)
|
653
|
+
|
654
|
+
# Query with alternative phrasing
|
655
|
+
alt_prompt = f"""
|
656
|
+
Create an alternative search query to validate "{candidate.name}" as the answer.
|
657
|
+
Use different keywords but same intent.
|
658
|
+
|
659
|
+
Return only the query.
|
660
|
+
"""
|
661
|
+
response = self.model.invoke(alt_prompt)
|
662
|
+
queries.append(remove_think_tags(response.content).strip())
|
663
|
+
|
664
|
+
# Source-specific query
|
665
|
+
if self.source_types.get(candidate.name):
|
666
|
+
source_query = f'"{candidate.name}" site:{list(self.source_types[candidate.name])[0]}'
|
667
|
+
queries.append(source_query)
|
668
|
+
|
669
|
+
return queries
|
670
|
+
|
671
|
+
def _extract_candidates_with_context(
|
672
|
+
self, results: Dict, query: str
|
673
|
+
) -> List[Candidate]:
|
674
|
+
"""Extract candidates with context awareness."""
|
675
|
+
# Use the original extraction method but with context
|
676
|
+
candidates = self._extract_candidates_from_results(results, query)
|
677
|
+
|
678
|
+
# Add context metadata
|
679
|
+
for candidate in candidates:
|
680
|
+
candidate.metadata["discovery_query"] = query
|
681
|
+
candidate.metadata["discovery_stage"] = self.iteration
|
682
|
+
|
683
|
+
return candidates
|
684
|
+
|
685
|
+
def _create_base_search_query(self, constraints: List[Constraint]) -> str:
|
686
|
+
"""Create a base search query from constraints."""
|
687
|
+
# Use an improved prompt that considers constraint relationships
|
688
|
+
prompt = f"""
|
689
|
+
Create a search query that finds specific entities satisfying these constraints:
|
690
|
+
|
691
|
+
{self._format_constraints_for_prompt(constraints)}
|
692
|
+
|
693
|
+
Guidelines:
|
694
|
+
1. Focus on finding names/entities, not general information
|
695
|
+
2. Use the most distinctive constraints
|
696
|
+
3. Combine constraints effectively
|
697
|
+
4. Keep the query concise but comprehensive
|
698
|
+
|
699
|
+
Return only the search query.
|
700
|
+
"""
|
701
|
+
|
702
|
+
response = self.model.invoke(prompt)
|
703
|
+
return remove_think_tags(response.content).strip()
|
704
|
+
|
705
|
+
def _adaptive_candidate_search(self):
|
706
|
+
"""Adaptively search for more candidates based on current state."""
|
707
|
+
# Analyze what types of candidates we're missing
|
708
|
+
covered_constraints = set()
|
709
|
+
for candidate in self.candidates[:5]:
|
710
|
+
covered_constraints.update(candidate.evidence.keys())
|
711
|
+
|
712
|
+
uncovered = [
|
713
|
+
c for c in self.constraints if c.id not in covered_constraints
|
714
|
+
]
|
715
|
+
|
716
|
+
if uncovered:
|
717
|
+
# Search specifically for uncovered constraints
|
718
|
+
queries = []
|
719
|
+
for constraint_group in self._generate_constraint_combinations(
|
720
|
+
uncovered[:5], max_size=2
|
721
|
+
):
|
722
|
+
query = self._create_adaptive_search_query(constraint_group)
|
723
|
+
queries.append((query, constraint_group))
|
724
|
+
|
725
|
+
for query, constraints in queries[: self.adaptive_query_count]:
|
726
|
+
results = self._execute_tracked_search(
|
727
|
+
query, constraints, "adaptive"
|
728
|
+
)
|
729
|
+
new_candidates = self._extract_candidates_with_context(
|
730
|
+
results, query
|
731
|
+
)
|
732
|
+
|
733
|
+
# Add unique candidates
|
734
|
+
existing_names = {c.name.lower() for c in self.candidates}
|
735
|
+
for candidate in new_candidates:
|
736
|
+
if candidate.name.lower() not in existing_names:
|
737
|
+
self.candidates.append(candidate)
|
738
|
+
existing_names.add(candidate.name.lower())
|
739
|
+
|
740
|
+
def _get_adaptive_distinctive_constraints(self) -> List[Constraint]:
|
741
|
+
"""Get distinctive constraints with adaptive prioritization."""
|
742
|
+
# Start with basic prioritization
|
743
|
+
priority_order = [
|
744
|
+
ConstraintType.NAME_PATTERN,
|
745
|
+
ConstraintType.LOCATION,
|
746
|
+
ConstraintType.EVENT,
|
747
|
+
ConstraintType.STATISTIC,
|
748
|
+
ConstraintType.COMPARISON,
|
749
|
+
ConstraintType.PROPERTY,
|
750
|
+
ConstraintType.TEMPORAL,
|
751
|
+
ConstraintType.EXISTENCE,
|
752
|
+
]
|
753
|
+
|
754
|
+
# Adjust based on successful patterns
|
755
|
+
if self.successful_patterns:
|
756
|
+
# Count successful constraint types
|
757
|
+
type_success = defaultdict(int)
|
758
|
+
for pattern in self.successful_patterns:
|
759
|
+
for constraint_id in pattern["constraints"]:
|
760
|
+
constraint = next(
|
761
|
+
(c for c in self.constraints if c.id == constraint_id),
|
762
|
+
None,
|
763
|
+
)
|
764
|
+
if constraint:
|
765
|
+
type_success[constraint.type] += pattern[
|
766
|
+
"candidates_found"
|
767
|
+
]
|
768
|
+
|
769
|
+
# Sort by success rate
|
770
|
+
priority_order = sorted(
|
771
|
+
priority_order,
|
772
|
+
key=lambda t: type_success.get(t, 0),
|
773
|
+
reverse=True,
|
774
|
+
)
|
775
|
+
|
776
|
+
# Sort constraints by adjusted priority
|
777
|
+
sorted_constraints = sorted(
|
778
|
+
self.constraints,
|
779
|
+
key=lambda c: (priority_order.index(c.type), -c.weight),
|
780
|
+
)
|
781
|
+
|
782
|
+
return sorted_constraints[:5]
|