local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +7 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
- local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
- local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
- local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
- local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
- local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
- local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
- local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
- local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
- local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
- local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
- local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
- local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
- local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
- local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
- local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
- local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
- local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
- local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
- local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
- local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
- local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
- local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
- local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
- local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
- local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
- local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
- local_deep_research/advanced_search_system/findings/repository.py +54 -17
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
- local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
- local_deep_research/advanced_search_system/questions/__init__.py +16 -0
- local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
- local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
- local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
- local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
- local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
- local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
- local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
- local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
- local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
- local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
- local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
- local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
- local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
- local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
- local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
- local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
- local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
- local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
- local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
- local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
- local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
- local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
- local_deep_research/api/benchmark_functions.py +6 -2
- local_deep_research/api/research_functions.py +10 -4
- local_deep_research/benchmarks/__init__.py +9 -7
- local_deep_research/benchmarks/benchmark_functions.py +6 -2
- local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
- local_deep_research/benchmarks/cli.py +38 -13
- local_deep_research/benchmarks/comparison/__init__.py +4 -2
- local_deep_research/benchmarks/comparison/evaluator.py +316 -239
- local_deep_research/benchmarks/datasets/__init__.py +1 -1
- local_deep_research/benchmarks/datasets/base.py +91 -72
- local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
- local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
- local_deep_research/benchmarks/datasets/utils.py +48 -29
- local_deep_research/benchmarks/datasets.py +4 -11
- local_deep_research/benchmarks/efficiency/__init__.py +8 -4
- local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
- local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
- local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
- local_deep_research/benchmarks/evaluators/composite.py +6 -2
- local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
- local_deep_research/benchmarks/graders.py +32 -10
- local_deep_research/benchmarks/metrics/README.md +1 -1
- local_deep_research/benchmarks/metrics/calculation.py +25 -10
- local_deep_research/benchmarks/metrics/reporting.py +7 -3
- local_deep_research/benchmarks/metrics/visualization.py +42 -23
- local_deep_research/benchmarks/metrics.py +1 -1
- local_deep_research/benchmarks/optimization/__init__.py +3 -1
- local_deep_research/benchmarks/optimization/api.py +7 -1
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
- local_deep_research/benchmarks/runners.py +48 -15
- local_deep_research/citation_handler.py +65 -92
- local_deep_research/citation_handlers/__init__.py +15 -0
- local_deep_research/citation_handlers/base_citation_handler.py +70 -0
- local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
- local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
- local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
- local_deep_research/config/llm_config.py +271 -169
- local_deep_research/config/search_config.py +14 -5
- local_deep_research/defaults/__init__.py +0 -1
- local_deep_research/metrics/__init__.py +13 -0
- local_deep_research/metrics/database.py +58 -0
- local_deep_research/metrics/db_models.py +115 -0
- local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
- local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
- local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
- local_deep_research/metrics/migrate_research_ratings.py +31 -0
- local_deep_research/metrics/models.py +61 -0
- local_deep_research/metrics/pricing/__init__.py +12 -0
- local_deep_research/metrics/pricing/cost_calculator.py +237 -0
- local_deep_research/metrics/pricing/pricing_cache.py +143 -0
- local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
- local_deep_research/metrics/query_utils.py +51 -0
- local_deep_research/metrics/search_tracker.py +380 -0
- local_deep_research/metrics/token_counter.py +1078 -0
- local_deep_research/migrate_db.py +3 -1
- local_deep_research/report_generator.py +22 -8
- local_deep_research/search_system.py +390 -9
- local_deep_research/test_migration.py +15 -5
- local_deep_research/utilities/db_utils.py +7 -4
- local_deep_research/utilities/es_utils.py +115 -104
- local_deep_research/utilities/llm_utils.py +15 -5
- local_deep_research/utilities/log_utils.py +151 -0
- local_deep_research/utilities/search_cache.py +387 -0
- local_deep_research/utilities/search_utilities.py +14 -6
- local_deep_research/utilities/threading_utils.py +92 -0
- local_deep_research/utilities/url_utils.py +6 -0
- local_deep_research/web/api.py +347 -0
- local_deep_research/web/app.py +13 -17
- local_deep_research/web/app_factory.py +71 -66
- local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
- local_deep_research/web/database/migrations.py +5 -3
- local_deep_research/web/database/models.py +51 -2
- local_deep_research/web/database/schema_upgrade.py +49 -29
- local_deep_research/web/models/database.py +51 -61
- local_deep_research/web/routes/api_routes.py +56 -22
- local_deep_research/web/routes/benchmark_routes.py +4 -1
- local_deep_research/web/routes/globals.py +22 -0
- local_deep_research/web/routes/history_routes.py +71 -46
- local_deep_research/web/routes/metrics_routes.py +1155 -0
- local_deep_research/web/routes/research_routes.py +227 -41
- local_deep_research/web/routes/settings_routes.py +156 -55
- local_deep_research/web/services/research_service.py +310 -103
- local_deep_research/web/services/resource_service.py +36 -11
- local_deep_research/web/services/settings_manager.py +55 -17
- local_deep_research/web/services/settings_service.py +12 -4
- local_deep_research/web/services/socket_service.py +295 -188
- local_deep_research/web/static/css/custom_dropdown.css +180 -0
- local_deep_research/web/static/css/styles.css +39 -1
- local_deep_research/web/static/js/components/detail.js +633 -267
- local_deep_research/web/static/js/components/details.js +751 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
- local_deep_research/web/static/js/components/fallback/ui.js +23 -23
- local_deep_research/web/static/js/components/history.js +76 -76
- local_deep_research/web/static/js/components/logpanel.js +61 -13
- local_deep_research/web/static/js/components/progress.js +13 -2
- local_deep_research/web/static/js/components/research.js +99 -12
- local_deep_research/web/static/js/components/results.js +239 -106
- local_deep_research/web/static/js/main.js +40 -40
- local_deep_research/web/static/js/services/audio.js +1 -1
- local_deep_research/web/static/js/services/formatting.js +11 -11
- local_deep_research/web/static/js/services/keyboard.js +157 -0
- local_deep_research/web/static/js/services/pdf.js +80 -80
- local_deep_research/web/static/sounds/README.md +1 -1
- local_deep_research/web/templates/base.html +1 -0
- local_deep_research/web/templates/components/log_panel.html +7 -1
- local_deep_research/web/templates/components/mobile_nav.html +1 -1
- local_deep_research/web/templates/components/sidebar.html +3 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
- local_deep_research/web/templates/pages/details.html +325 -24
- local_deep_research/web/templates/pages/history.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +1929 -0
- local_deep_research/web/templates/pages/progress.html +2 -2
- local_deep_research/web/templates/pages/research.html +53 -17
- local_deep_research/web/templates/pages/results.html +12 -1
- local_deep_research/web/templates/pages/star_reviews.html +803 -0
- local_deep_research/web/utils/formatters.py +9 -3
- local_deep_research/web_search_engines/default_search_engines.py +5 -3
- local_deep_research/web_search_engines/engines/full_search.py +8 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
- local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
- local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
- local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
- local_deep_research/web_search_engines/search_engine_base.py +83 -35
- local_deep_research/web_search_engines/search_engine_factory.py +25 -8
- local_deep_research/web_search_engines/search_engines_config.py +9 -3
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
- local_deep_research-0.5.0.dist-info/RECORD +265 -0
- local_deep_research-0.4.4.dist-info/RECORD +0 -177
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1142 @@
|
|
1
|
+
"""
|
2
|
+
Modular strategy that demonstrates usage of the new constraint_checking and candidate_exploration modules.
|
3
|
+
Enhanced with LLM-driven constraint processing, early rejection, and immediate evaluation.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import asyncio
|
7
|
+
import json
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
9
|
+
from dataclasses import dataclass
|
10
|
+
from typing import Dict, List, Optional, Tuple
|
11
|
+
|
12
|
+
from loguru import logger
|
13
|
+
|
14
|
+
from ...utilities.search_cache import get_search_cache, normalize_entity_query
|
15
|
+
from ..candidate_exploration import (
|
16
|
+
AdaptiveExplorer,
|
17
|
+
ConstraintGuidedExplorer,
|
18
|
+
DiversityExplorer,
|
19
|
+
ParallelExplorer,
|
20
|
+
)
|
21
|
+
from ..constraint_checking import (
|
22
|
+
DualConfidenceChecker,
|
23
|
+
StrictChecker,
|
24
|
+
ThresholdChecker,
|
25
|
+
)
|
26
|
+
from ..constraints import ConstraintAnalyzer
|
27
|
+
from ..questions import StandardQuestionGenerator
|
28
|
+
from .base_strategy import BaseSearchStrategy
|
29
|
+
|
30
|
+
|
31
|
+
@dataclass
|
32
|
+
class CandidateConfidence:
|
33
|
+
"""Track candidate confidence levels for early rejection"""
|
34
|
+
|
35
|
+
candidate: object
|
36
|
+
positive_confidence: float
|
37
|
+
negative_confidence: float
|
38
|
+
rejection_reason: Optional[str] = None
|
39
|
+
should_continue: bool = True
|
40
|
+
|
41
|
+
|
42
|
+
class LLMConstraintProcessor:
|
43
|
+
"""LLM-driven intelligent constraint processing"""
|
44
|
+
|
45
|
+
def __init__(self, model):
|
46
|
+
self.model = model
|
47
|
+
|
48
|
+
async def decompose_constraints_intelligently(self, constraints):
|
49
|
+
"""Let LLM intelligently break down constraints into searchable elements"""
|
50
|
+
constraint_text = "\n".join([f"- {c.description}" for c in constraints])
|
51
|
+
|
52
|
+
prompt = f"""
|
53
|
+
I have these constraints from a search query:
|
54
|
+
{constraint_text}
|
55
|
+
|
56
|
+
Please intelligently decompose these constraints into atomic, searchable elements that can be combined in different ways.
|
57
|
+
|
58
|
+
For each constraint, provide:
|
59
|
+
1. **Atomic elements** - Break it into smallest meaningful parts
|
60
|
+
2. **Variations** - Different ways to express the same concept
|
61
|
+
3. **Granular specifics** - Specific values, years, numbers, etc.
|
62
|
+
|
63
|
+
Example for a time-based constraint:
|
64
|
+
- Atomic elements: Break down the main subject into searchable terms
|
65
|
+
- Time variations: Different ways to express time periods
|
66
|
+
- Granular specifics: Individual years, dates, or specific values mentioned
|
67
|
+
|
68
|
+
Return as valid JSON format:
|
69
|
+
{{
|
70
|
+
"constraint_1": {{
|
71
|
+
"atomic_elements": [...],
|
72
|
+
"variations": [...],
|
73
|
+
"granular_specifics": [...]
|
74
|
+
}},
|
75
|
+
"constraint_2": {{
|
76
|
+
"atomic_elements": [...],
|
77
|
+
"variations": [...],
|
78
|
+
"granular_specifics": [...]
|
79
|
+
}}
|
80
|
+
}}
|
81
|
+
"""
|
82
|
+
|
83
|
+
response = await self.model.ainvoke(prompt)
|
84
|
+
return self._parse_decomposition(response.content)
|
85
|
+
|
86
|
+
async def generate_intelligent_combinations(
|
87
|
+
self, decomposed_constraints, existing_queries=None, original_query=None
|
88
|
+
):
|
89
|
+
"""LLM generates smart combinations of atomic elements"""
|
90
|
+
|
91
|
+
if existing_queries is None:
|
92
|
+
existing_queries = []
|
93
|
+
|
94
|
+
existing_queries_str = (
|
95
|
+
"\n".join([f"- {q}" for q in existing_queries])
|
96
|
+
if existing_queries
|
97
|
+
else "None yet"
|
98
|
+
)
|
99
|
+
|
100
|
+
# Store all queries we've used to avoid repeats
|
101
|
+
if existing_queries is None:
|
102
|
+
existing_queries = []
|
103
|
+
|
104
|
+
# Add the original query as first in our tracking
|
105
|
+
all_queries_used = (
|
106
|
+
[original_query] + existing_queries
|
107
|
+
if original_query
|
108
|
+
else existing_queries
|
109
|
+
)
|
110
|
+
existing_queries_str = (
|
111
|
+
"\n".join([f"- {q}" for q in all_queries_used])
|
112
|
+
if all_queries_used
|
113
|
+
else "None yet"
|
114
|
+
)
|
115
|
+
|
116
|
+
prompt = f"""
|
117
|
+
Create search query variations using TWO strategies:
|
118
|
+
|
119
|
+
ORIGINAL QUERY: "{original_query if original_query else "Not provided"}"
|
120
|
+
|
121
|
+
ALREADY USED QUERIES (DO NOT REPEAT):
|
122
|
+
{existing_queries_str}
|
123
|
+
|
124
|
+
**STRATEGY 1: QUERY REFORMULATION** (5-8 variations)
|
125
|
+
Keep ALL key information but rephrase the entire query:
|
126
|
+
- Change word order and sentence structure
|
127
|
+
- Use synonyms for key terms
|
128
|
+
- Convert questions to statements or keyword phrases
|
129
|
+
- Maintain all specific details (names, dates, numbers)
|
130
|
+
|
131
|
+
**STRATEGY 2: RANGE SPLITTING** (10-15 variations)
|
132
|
+
For any time periods, ranges, or multiple options, create separate specific searches:
|
133
|
+
- Split year ranges into individual years
|
134
|
+
- Split time periods into specific decades/years
|
135
|
+
- Split "between X and Y" into individual values
|
136
|
+
- Create one search per specific value in any range
|
137
|
+
|
138
|
+
**EXAMPLES:**
|
139
|
+
Original: "Who won Nobel Prize between 1960-1965?"
|
140
|
+
- Reformulations: "Nobel Prize winner 1960-1965", "Nobel laureate from 1960 to 1965"
|
141
|
+
- Range splits: "Nobel Prize winner 1960", "Nobel Prize winner 1961", "Nobel Prize winner 1962", "Nobel Prize winner 1963", "Nobel Prize winner 1964", "Nobel Prize winner 1965"
|
142
|
+
|
143
|
+
Generate 15-25 search queries total (reformulations + range splits).
|
144
|
+
Focus on maximum specificity through systematic coverage.
|
145
|
+
|
146
|
+
Return as a valid JSON list of search queries:
|
147
|
+
["query1", "query2", "query3"]
|
148
|
+
"""
|
149
|
+
|
150
|
+
response = await self.model.ainvoke(prompt)
|
151
|
+
return self._parse_combinations(response.content)
|
152
|
+
|
153
|
+
def _parse_decomposition(self, content):
|
154
|
+
"""Parse LLM decomposition response"""
|
155
|
+
try:
|
156
|
+
start = content.find("{")
|
157
|
+
end = content.rfind("}") + 1
|
158
|
+
if start != -1 and end != -1:
|
159
|
+
json_str = content[start:end]
|
160
|
+
return json.loads(json_str)
|
161
|
+
except Exception as e:
|
162
|
+
logger.error(f"Failed to parse decomposition: {e}")
|
163
|
+
|
164
|
+
# If parsing fails, return empty dict - let the system handle gracefully
|
165
|
+
logger.warning(
|
166
|
+
"Failed to parse constraint decomposition, returning empty dict"
|
167
|
+
)
|
168
|
+
return {}
|
169
|
+
|
170
|
+
def _parse_combinations(self, content):
|
171
|
+
"""Parse LLM combinations response"""
|
172
|
+
try:
|
173
|
+
start = content.find("[")
|
174
|
+
end = content.rfind("]") + 1
|
175
|
+
if start != -1 and end != -1:
|
176
|
+
json_str = content[start:end]
|
177
|
+
return json.loads(json_str)
|
178
|
+
except Exception as e:
|
179
|
+
logger.error(f"Failed to parse combinations: {e}")
|
180
|
+
|
181
|
+
# If parsing fails, return empty list - let the system handle gracefully
|
182
|
+
logger.warning("Failed to parse LLM combinations, returning empty list")
|
183
|
+
return []
|
184
|
+
|
185
|
+
|
186
|
+
class EarlyRejectionManager:
|
187
|
+
"""Manages early rejection and confidence tracking"""
|
188
|
+
|
189
|
+
def __init__(self, model, positive_threshold=0.6, negative_threshold=0.3):
|
190
|
+
self.model = model
|
191
|
+
self.positive_threshold = positive_threshold
|
192
|
+
self.negative_threshold = negative_threshold
|
193
|
+
self.rejected_candidates = set()
|
194
|
+
|
195
|
+
async def quick_confidence_check(self, candidate, constraints):
|
196
|
+
"""Quick confidence assessment for early rejection"""
|
197
|
+
|
198
|
+
prompt = f"""
|
199
|
+
Quickly assess if this candidate matches the search criteria:
|
200
|
+
|
201
|
+
Candidate: {candidate.name}
|
202
|
+
Available info: {getattr(candidate, "metadata", {})}
|
203
|
+
|
204
|
+
Constraints to match:
|
205
|
+
{[c.description for c in constraints]}
|
206
|
+
|
207
|
+
Provide:
|
208
|
+
1. **Positive confidence** (0.0-1.0): How likely this candidate matches
|
209
|
+
2. **Negative confidence** (0.0-1.0): How likely this candidate does NOT match
|
210
|
+
3. **Quick reasoning**: Brief explanation
|
211
|
+
|
212
|
+
Return as JSON:
|
213
|
+
{{
|
214
|
+
"positive_confidence": 0.X,
|
215
|
+
"negative_confidence": 0.X,
|
216
|
+
"reasoning": "brief explanation"
|
217
|
+
}}
|
218
|
+
"""
|
219
|
+
|
220
|
+
try:
|
221
|
+
response = await self.model.ainvoke(prompt)
|
222
|
+
return self._parse_confidence(response.content)
|
223
|
+
except Exception as e:
|
224
|
+
logger.error(f"Quick confidence check failed: {e}")
|
225
|
+
return {
|
226
|
+
"positive_confidence": 0.5,
|
227
|
+
"negative_confidence": 0.3,
|
228
|
+
"reasoning": "fallback",
|
229
|
+
}
|
230
|
+
|
231
|
+
def should_reject_early(self, confidence_result):
|
232
|
+
"""Determine if candidate should be rejected early"""
|
233
|
+
# positive = confidence_result.get("positive_confidence", 0.5) # Not currently used
|
234
|
+
negative = confidence_result.get("negative_confidence", 0.3)
|
235
|
+
|
236
|
+
# Only reject if we have strong negative evidence (not just lack of positive evidence)
|
237
|
+
if negative > 0.85:
|
238
|
+
return (
|
239
|
+
True,
|
240
|
+
f"High negative confidence ({negative:.2f})",
|
241
|
+
)
|
242
|
+
|
243
|
+
return False, None
|
244
|
+
|
245
|
+
def should_continue_search(self, all_candidates, high_confidence_count):
|
246
|
+
"""Determine if we should continue searching"""
|
247
|
+
# Stop if we have enough high-confidence candidates
|
248
|
+
if high_confidence_count >= 5:
|
249
|
+
return False, "Found sufficient high-confidence candidates"
|
250
|
+
|
251
|
+
# Stop if we have many candidates but low quality
|
252
|
+
if len(all_candidates) > 50 and high_confidence_count == 0:
|
253
|
+
return False, "Too many low-quality candidates"
|
254
|
+
|
255
|
+
return True, None
|
256
|
+
|
257
|
+
def _parse_confidence(self, content):
|
258
|
+
"""Parse confidence assessment"""
|
259
|
+
try:
|
260
|
+
start = content.find("{")
|
261
|
+
end = content.rfind("}") + 1
|
262
|
+
if start != -1 and end != -1:
|
263
|
+
json_str = content[start:end]
|
264
|
+
return json.loads(json_str)
|
265
|
+
except Exception as e:
|
266
|
+
logger.error(f"Failed to parse confidence: {e}")
|
267
|
+
|
268
|
+
return {
|
269
|
+
"positive_confidence": 0.5,
|
270
|
+
"negative_confidence": 0.3,
|
271
|
+
"reasoning": "parse_error",
|
272
|
+
}
|
273
|
+
|
274
|
+
|
275
|
+
class ModularStrategy(BaseSearchStrategy):
|
276
|
+
"""
|
277
|
+
A strategy that showcases the new modular architecture using:
|
278
|
+
- constraint_checking module for candidate evaluation
|
279
|
+
- candidate_exploration module for search execution
|
280
|
+
- constraints module for constraint analysis
|
281
|
+
- LLM-driven intelligent constraint processing
|
282
|
+
- Early rejection and confidence-based filtering
|
283
|
+
- Immediate candidate evaluation
|
284
|
+
"""
|
285
|
+
|
286
|
+
def __init__(
|
287
|
+
self,
|
288
|
+
model,
|
289
|
+
search,
|
290
|
+
all_links_of_system=None,
|
291
|
+
constraint_checker_type: str = "dual_confidence", # dual_confidence, strict, threshold
|
292
|
+
exploration_strategy: str = "adaptive", # parallel, adaptive, constraint_guided, diversity
|
293
|
+
early_rejection: bool = True, # Enable early rejection by default
|
294
|
+
early_stopping: bool = True, # Enable early stopping by default
|
295
|
+
llm_constraint_processing: bool = True, # Enable LLM-driven constraint processing by default
|
296
|
+
immediate_evaluation: bool = True, # Enable immediate candidate evaluation by default
|
297
|
+
**kwargs,
|
298
|
+
):
|
299
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
300
|
+
|
301
|
+
self.model = model
|
302
|
+
self.search_engine = search
|
303
|
+
self.search_engines = getattr(search, "search_engines", [])
|
304
|
+
|
305
|
+
# Initialize constraint analyzer
|
306
|
+
self.constraint_analyzer = ConstraintAnalyzer(self.model)
|
307
|
+
|
308
|
+
# Initialize LLM constraint processor if enabled
|
309
|
+
self.llm_processor = (
|
310
|
+
LLMConstraintProcessor(self.model)
|
311
|
+
if llm_constraint_processing
|
312
|
+
else None
|
313
|
+
)
|
314
|
+
|
315
|
+
# Initialize early rejection manager if enabled
|
316
|
+
self.early_rejection_manager = (
|
317
|
+
EarlyRejectionManager(self.model) if early_rejection else None
|
318
|
+
)
|
319
|
+
|
320
|
+
# Initialize constraint checker based on type (default to dual confidence)
|
321
|
+
self.constraint_checker = self._create_constraint_checker(
|
322
|
+
constraint_checker_type
|
323
|
+
)
|
324
|
+
|
325
|
+
# Initialize candidate explorer based on strategy
|
326
|
+
self.candidate_explorer = self._create_candidate_explorer(
|
327
|
+
exploration_strategy
|
328
|
+
)
|
329
|
+
|
330
|
+
# Initialize question generator
|
331
|
+
self.question_generator = StandardQuestionGenerator(model=self.model)
|
332
|
+
|
333
|
+
# Strategy configuration
|
334
|
+
self.constraint_checker_type = constraint_checker_type
|
335
|
+
self.exploration_strategy = exploration_strategy
|
336
|
+
self.early_rejection = early_rejection
|
337
|
+
self.early_stopping = early_stopping
|
338
|
+
self.llm_constraint_processing = llm_constraint_processing
|
339
|
+
self.immediate_evaluation = immediate_evaluation
|
340
|
+
|
341
|
+
logger.info(
|
342
|
+
f"Initialized ModularStrategy with {constraint_checker_type} checker, {exploration_strategy} explorer, "
|
343
|
+
f"early_rejection={early_rejection}, early_stopping={early_stopping}, "
|
344
|
+
f"llm_processing={llm_constraint_processing}, immediate_eval={immediate_evaluation}"
|
345
|
+
)
|
346
|
+
|
347
|
+
def _create_constraint_checker(self, checker_type: str):
|
348
|
+
"""Create the appropriate constraint checker."""
|
349
|
+
if checker_type == "dual_confidence":
|
350
|
+
return DualConfidenceChecker(
|
351
|
+
model=self.model,
|
352
|
+
evidence_gatherer=self._gather_evidence_for_constraint,
|
353
|
+
negative_threshold=0.75,
|
354
|
+
positive_threshold=0.2,
|
355
|
+
uncertainty_penalty=0.1,
|
356
|
+
negative_weight=1.5,
|
357
|
+
)
|
358
|
+
elif checker_type == "strict":
|
359
|
+
return StrictChecker(
|
360
|
+
model=self.model,
|
361
|
+
evidence_gatherer=self._gather_evidence_for_constraint,
|
362
|
+
)
|
363
|
+
elif checker_type == "threshold":
|
364
|
+
return ThresholdChecker(
|
365
|
+
model=self.model,
|
366
|
+
evidence_gatherer=self._gather_evidence_for_constraint,
|
367
|
+
acceptance_threshold=0.7,
|
368
|
+
)
|
369
|
+
else:
|
370
|
+
raise ValueError(f"Unknown constraint checker type: {checker_type}")
|
371
|
+
|
372
|
+
def _create_candidate_explorer(self, strategy_type: str):
|
373
|
+
"""Create the appropriate candidate explorer."""
|
374
|
+
if strategy_type == "parallel":
|
375
|
+
return ParallelExplorer(
|
376
|
+
search_engine=self.search_engine,
|
377
|
+
model=self.model,
|
378
|
+
max_workers=4,
|
379
|
+
)
|
380
|
+
elif strategy_type == "adaptive":
|
381
|
+
return AdaptiveExplorer(
|
382
|
+
search_engine=self.search_engine,
|
383
|
+
model=self.model,
|
384
|
+
learning_rate=0.1,
|
385
|
+
)
|
386
|
+
elif strategy_type == "constraint_guided":
|
387
|
+
return ConstraintGuidedExplorer(
|
388
|
+
search_engine=self.search_engine, model=self.model
|
389
|
+
)
|
390
|
+
elif strategy_type == "diversity":
|
391
|
+
return DiversityExplorer(
|
392
|
+
search_engine=self.search_engine,
|
393
|
+
model=self.model,
|
394
|
+
diversity_factor=0.3,
|
395
|
+
)
|
396
|
+
else:
|
397
|
+
raise ValueError(f"Unknown exploration strategy: {strategy_type}")
|
398
|
+
|
399
|
+
async def search(
|
400
|
+
self,
|
401
|
+
query: str,
|
402
|
+
search_engines: List[str] = None,
|
403
|
+
progress_callback=None,
|
404
|
+
**kwargs,
|
405
|
+
) -> Tuple[str, Dict]:
|
406
|
+
"""
|
407
|
+
Execute the modular search strategy.
|
408
|
+
"""
|
409
|
+
try:
|
410
|
+
logger.info(f"Starting enhanced modular search for: {query}")
|
411
|
+
|
412
|
+
# Phase 1: Extract base constraints
|
413
|
+
if progress_callback:
|
414
|
+
progress_callback(
|
415
|
+
{
|
416
|
+
"phase": "constraint_analysis",
|
417
|
+
"progress": 5,
|
418
|
+
"message": "Analyzing query constraints",
|
419
|
+
}
|
420
|
+
)
|
421
|
+
|
422
|
+
base_constraints = self.constraint_analyzer.extract_constraints(
|
423
|
+
query
|
424
|
+
)
|
425
|
+
logger.info(f"Extracted {len(base_constraints)} base constraints")
|
426
|
+
|
427
|
+
# Phase 2: LLM constraint processing (if enabled)
|
428
|
+
all_search_queries = []
|
429
|
+
if self.llm_constraint_processing and self.llm_processor:
|
430
|
+
if progress_callback:
|
431
|
+
progress_callback(
|
432
|
+
{
|
433
|
+
"phase": "llm_processing",
|
434
|
+
"progress": 15,
|
435
|
+
"message": "LLM processing constraints intelligently",
|
436
|
+
}
|
437
|
+
)
|
438
|
+
|
439
|
+
logger.info("🤖 LLM CONSTRAINT PROCESSING ACTIVATED")
|
440
|
+
# LLM decomposition and combination
|
441
|
+
decomposed = await self.llm_processor.decompose_constraints_intelligently(
|
442
|
+
base_constraints
|
443
|
+
)
|
444
|
+
|
445
|
+
# Pass existing base constraint queries to avoid duplication
|
446
|
+
existing_queries = [c.description for c in base_constraints]
|
447
|
+
logger.info("📋 BASE CONSTRAINT QUERIES:")
|
448
|
+
for i, base_query in enumerate(existing_queries, 1):
|
449
|
+
logger.info(f" BASE-{i:02d}: {base_query}")
|
450
|
+
|
451
|
+
intelligent_combinations = (
|
452
|
+
await self.llm_processor.generate_intelligent_combinations(
|
453
|
+
decomposed, existing_queries, query
|
454
|
+
)
|
455
|
+
)
|
456
|
+
|
457
|
+
logger.info("🧠 LLM-GENERATED INTELLIGENT QUERIES:")
|
458
|
+
logger.info("### START_LLM_QUERIES ###") # Grep-friendly marker
|
459
|
+
for i, llm_query in enumerate(intelligent_combinations, 1):
|
460
|
+
logger.info(f" LLM-{i:02d}: {llm_query}")
|
461
|
+
logger.info("### END_LLM_QUERIES ###") # Grep-friendly marker
|
462
|
+
|
463
|
+
# OPTIMIZATION: Start with original query, then use LLM-generated targeted queries
|
464
|
+
# This ensures we search for the exact question first, then explore variations
|
465
|
+
all_search_queries = (
|
466
|
+
[query] + intelligent_combinations
|
467
|
+
) # Original query first, then LLM combinations
|
468
|
+
logger.info(
|
469
|
+
f"🎯 Using original query + {len(intelligent_combinations)} targeted LLM search combinations (skipping broad base constraints)"
|
470
|
+
)
|
471
|
+
logger.info(
|
472
|
+
f"📊 Optimized search strategies: {len(all_search_queries)} (original + {len(intelligent_combinations)} LLM queries)"
|
473
|
+
)
|
474
|
+
else:
|
475
|
+
logger.warning(
|
476
|
+
"⚠️ LLM constraint processing is DISABLED - falling back to basic searches"
|
477
|
+
)
|
478
|
+
# Fallback to basic constraint searches
|
479
|
+
all_search_queries = [c.description for c in base_constraints]
|
480
|
+
|
481
|
+
# Phase 3: Enhanced candidate exploration with immediate evaluation
|
482
|
+
if progress_callback:
|
483
|
+
progress_callback(
|
484
|
+
{
|
485
|
+
"phase": "candidate_exploration",
|
486
|
+
"progress": 25,
|
487
|
+
"message": f"🔍 Exploring with {len(all_search_queries)} enhanced search strategies",
|
488
|
+
}
|
489
|
+
)
|
490
|
+
|
491
|
+
all_candidates = []
|
492
|
+
high_confidence_count = 0
|
493
|
+
search_progress = 30
|
494
|
+
|
495
|
+
# DECOUPLED APPROACH: Separate search execution from candidate evaluation
|
496
|
+
candidate_evaluation_queue = asyncio.Queue()
|
497
|
+
evaluation_results = []
|
498
|
+
rejected_candidates = [] # Store rejected candidates for potential recovery
|
499
|
+
|
500
|
+
# Execute searches in parallel batches with decoupled evaluation
|
501
|
+
batch_size = 8 # Optimized for parallel execution
|
502
|
+
logger.info(
|
503
|
+
f"🚀 Starting enhanced exploration with {len(all_search_queries)} search queries (8 concurrent, decoupled evaluation)"
|
504
|
+
)
|
505
|
+
|
506
|
+
# Start background candidate evaluation task
|
507
|
+
evaluation_task = asyncio.create_task(
|
508
|
+
self._background_candidate_evaluation(
|
509
|
+
candidate_evaluation_queue,
|
510
|
+
base_constraints,
|
511
|
+
evaluation_results,
|
512
|
+
query,
|
513
|
+
rejected_candidates,
|
514
|
+
)
|
515
|
+
)
|
516
|
+
|
517
|
+
for i in range(0, len(all_search_queries), batch_size):
|
518
|
+
batch = all_search_queries[i : i + batch_size]
|
519
|
+
|
520
|
+
if progress_callback:
|
521
|
+
progress_callback(
|
522
|
+
{
|
523
|
+
"phase": "search_batch",
|
524
|
+
"progress": search_progress,
|
525
|
+
"message": f"🔍 Executing search batch {i // batch_size + 1}",
|
526
|
+
}
|
527
|
+
)
|
528
|
+
|
529
|
+
logger.info(
|
530
|
+
f"📦 Processing batch {i // batch_size + 1}: {batch}"
|
531
|
+
)
|
532
|
+
|
533
|
+
# Execute batch searches in parallel using ThreadPoolExecutor
|
534
|
+
batch_results = []
|
535
|
+
with ThreadPoolExecutor(max_workers=8) as executor:
|
536
|
+
# Submit all searches in the batch concurrently
|
537
|
+
future_to_query = {
|
538
|
+
executor.submit(
|
539
|
+
self.candidate_explorer._execute_search, query
|
540
|
+
): query
|
541
|
+
for query in batch
|
542
|
+
}
|
543
|
+
|
544
|
+
# Collect results as they complete
|
545
|
+
for future in as_completed(future_to_query):
|
546
|
+
query = future_to_query[future]
|
547
|
+
try:
|
548
|
+
result = future.result()
|
549
|
+
batch_results.append(result)
|
550
|
+
except Exception as e:
|
551
|
+
logger.error(
|
552
|
+
f"❌ Parallel search failed for '{query[:30]}...': {e}"
|
553
|
+
)
|
554
|
+
batch_results.append(e)
|
555
|
+
|
556
|
+
# CRITICAL: Yield control to allow background evaluation task to run
|
557
|
+
await asyncio.sleep(0)
|
558
|
+
|
559
|
+
# Process batch results - QUEUE CANDIDATES FOR BACKGROUND EVALUATION
|
560
|
+
for j, result in enumerate(batch_results):
|
561
|
+
if isinstance(result, Exception):
|
562
|
+
logger.error(f"❌ Search failed: {batch[j]} - {result}")
|
563
|
+
continue
|
564
|
+
|
565
|
+
candidates = self.candidate_explorer._extract_candidates_from_results(
|
566
|
+
result, original_query=query
|
567
|
+
)
|
568
|
+
|
569
|
+
logger.info(
|
570
|
+
f"🎯 Found {len(candidates)} candidates from query: '{batch[j][:50]}...'"
|
571
|
+
)
|
572
|
+
|
573
|
+
# QUEUE CANDIDATES for background evaluation (non-blocking)
|
574
|
+
for candidate in candidates:
|
575
|
+
await candidate_evaluation_queue.put(candidate)
|
576
|
+
|
577
|
+
# Progress tracking without blocking on evaluation
|
578
|
+
total_candidates = sum(
|
579
|
+
len(
|
580
|
+
self.candidate_explorer._extract_candidates_from_results(
|
581
|
+
result, original_query=query
|
582
|
+
)
|
583
|
+
)
|
584
|
+
for result in batch_results
|
585
|
+
if not isinstance(result, Exception)
|
586
|
+
)
|
587
|
+
|
588
|
+
logger.info(
|
589
|
+
f"📦 Batch {i // batch_size + 1}: queued {total_candidates} candidates for evaluation"
|
590
|
+
)
|
591
|
+
|
592
|
+
# CRITICAL: Yield control after each batch to allow background evaluation
|
593
|
+
await asyncio.sleep(
|
594
|
+
0.1
|
595
|
+
) # Small delay to let background task process
|
596
|
+
|
597
|
+
search_progress = min(search_progress + 10, 75)
|
598
|
+
|
599
|
+
# Signal completion to background evaluation and wait for final results
|
600
|
+
await candidate_evaluation_queue.put(
|
601
|
+
None
|
602
|
+
) # Sentinel to signal completion
|
603
|
+
|
604
|
+
# Wait for background evaluation to complete
|
605
|
+
try:
|
606
|
+
await asyncio.wait_for(
|
607
|
+
evaluation_task, timeout=30.0
|
608
|
+
) # 30s timeout
|
609
|
+
except asyncio.TimeoutError:
|
610
|
+
logger.warning(
|
611
|
+
"⚠️ Background evaluation timed out, using partial results"
|
612
|
+
)
|
613
|
+
evaluation_task.cancel()
|
614
|
+
|
615
|
+
# Collect all evaluated candidates
|
616
|
+
all_candidates = [
|
617
|
+
result for result in evaluation_results if result is not None
|
618
|
+
]
|
619
|
+
|
620
|
+
logger.info(
|
621
|
+
f"🏁 Search completed: {len(all_candidates)} total candidates, {high_confidence_count} high-confidence"
|
622
|
+
)
|
623
|
+
|
624
|
+
# Phase 4: Final candidate evaluation (if immediate evaluation was disabled)
|
625
|
+
evaluated_candidates = all_candidates
|
626
|
+
if not self.immediate_evaluation:
|
627
|
+
if progress_callback:
|
628
|
+
progress_callback(
|
629
|
+
{
|
630
|
+
"phase": "candidate_evaluation",
|
631
|
+
"progress": 80,
|
632
|
+
"message": f"🔍 Evaluating {len(all_candidates)} candidates",
|
633
|
+
}
|
634
|
+
)
|
635
|
+
|
636
|
+
evaluated_candidates = []
|
637
|
+
for candidate in all_candidates[:20]: # Limit to top 20
|
638
|
+
try:
|
639
|
+
result = self.constraint_checker.check_candidate(
|
640
|
+
candidate, base_constraints
|
641
|
+
)
|
642
|
+
candidate.evaluation_results = result.detailed_results
|
643
|
+
candidate.score = result.total_score
|
644
|
+
candidate.should_reject = result.should_reject
|
645
|
+
|
646
|
+
if not result.should_reject:
|
647
|
+
evaluated_candidates.append(candidate)
|
648
|
+
|
649
|
+
except Exception as e:
|
650
|
+
logger.error(
|
651
|
+
f"💥 Error evaluating candidate {candidate.name}: {e}"
|
652
|
+
)
|
653
|
+
continue
|
654
|
+
|
655
|
+
# Phase 5: Select best candidate
|
656
|
+
if progress_callback:
|
657
|
+
progress_callback(
|
658
|
+
{
|
659
|
+
"phase": "result_selection",
|
660
|
+
"progress": 90,
|
661
|
+
"message": "🏆 Selecting best result",
|
662
|
+
}
|
663
|
+
)
|
664
|
+
|
665
|
+
if not evaluated_candidates:
|
666
|
+
# Check all candidates including rejected ones
|
667
|
+
all_scored_candidates = []
|
668
|
+
|
669
|
+
# Add all candidates with scores
|
670
|
+
for c in all_candidates:
|
671
|
+
if hasattr(c, "score") and c.score > 0:
|
672
|
+
all_scored_candidates.append(c)
|
673
|
+
|
674
|
+
# Add rejected candidates with scores
|
675
|
+
for c in rejected_candidates:
|
676
|
+
if hasattr(c, "score") and c.score > 0:
|
677
|
+
all_scored_candidates.append(c)
|
678
|
+
|
679
|
+
if all_scored_candidates:
|
680
|
+
# Sort by score
|
681
|
+
all_scored_candidates.sort(
|
682
|
+
key=lambda x: x.score, reverse=True
|
683
|
+
)
|
684
|
+
best_candidate = all_scored_candidates[0]
|
685
|
+
|
686
|
+
# Accept if score is above minimum threshold (20%)
|
687
|
+
if best_candidate.score >= 0.20:
|
688
|
+
logger.info(
|
689
|
+
f"🎯 Accepting best available candidate (recovered from rejected): {best_candidate.name} with score {best_candidate.score:.2%}"
|
690
|
+
)
|
691
|
+
evaluated_candidates = [best_candidate]
|
692
|
+
else:
|
693
|
+
logger.warning(
|
694
|
+
f"❌ Best candidate {best_candidate.name} has too low score: {best_candidate.score:.2%}"
|
695
|
+
)
|
696
|
+
|
697
|
+
if not evaluated_candidates:
|
698
|
+
logger.warning(
|
699
|
+
"❌ No valid candidates found after evaluation"
|
700
|
+
)
|
701
|
+
return "No valid candidates found after evaluation", {
|
702
|
+
"strategy": "enhanced_modular",
|
703
|
+
"constraint_checker": self.constraint_checker_type,
|
704
|
+
"exploration_strategy": self.exploration_strategy,
|
705
|
+
"early_rejection": self.early_rejection,
|
706
|
+
"llm_processing": self.llm_constraint_processing,
|
707
|
+
"total_searches": len(all_search_queries),
|
708
|
+
"candidates_found": len(all_candidates),
|
709
|
+
"candidates_valid": 0,
|
710
|
+
"high_confidence_count": high_confidence_count,
|
711
|
+
}
|
712
|
+
|
713
|
+
# Sort by score and select best
|
714
|
+
evaluated_candidates.sort(
|
715
|
+
key=lambda x: getattr(x, "score", 0), reverse=True
|
716
|
+
)
|
717
|
+
best_candidate = evaluated_candidates[0]
|
718
|
+
|
719
|
+
logger.info(
|
720
|
+
f"🏆 Best candidate: {best_candidate.name} with score {getattr(best_candidate, 'score', 0):.2%}"
|
721
|
+
)
|
722
|
+
|
723
|
+
# Phase 6: Generate final answer
|
724
|
+
if progress_callback:
|
725
|
+
progress_callback(
|
726
|
+
{
|
727
|
+
"phase": "final_answer",
|
728
|
+
"progress": 95,
|
729
|
+
"message": "📝 Generating final answer",
|
730
|
+
}
|
731
|
+
)
|
732
|
+
|
733
|
+
answer = await self._generate_final_answer(
|
734
|
+
query, best_candidate, base_constraints
|
735
|
+
)
|
736
|
+
|
737
|
+
# Search Query Analysis Summary for easy analysis
|
738
|
+
logger.info("=" * 80)
|
739
|
+
logger.info("🔍 SEARCH QUERY ANALYSIS SUMMARY")
|
740
|
+
logger.info("=" * 80)
|
741
|
+
logger.info(
|
742
|
+
f"📊 TOTAL QUERIES GENERATED: {len(all_search_queries)}"
|
743
|
+
)
|
744
|
+
logger.info(
|
745
|
+
f"📋 BASE CONSTRAINT QUERIES: {len(existing_queries) if 'existing_queries' in locals() else 0}"
|
746
|
+
)
|
747
|
+
logger.info(
|
748
|
+
f"🧠 LLM INTELLIGENT QUERIES: {len(intelligent_combinations) if 'intelligent_combinations' in locals() else 0}"
|
749
|
+
)
|
750
|
+
|
751
|
+
if (
|
752
|
+
"intelligent_combinations" in locals()
|
753
|
+
and intelligent_combinations
|
754
|
+
):
|
755
|
+
logger.info("\n🎯 SAMPLE LLM-GENERATED QUERIES (first 10):")
|
756
|
+
for i, query in enumerate(intelligent_combinations[:10], 1):
|
757
|
+
logger.info(f" SAMPLE-{i:02d}: {query}")
|
758
|
+
|
759
|
+
logger.info("=" * 80)
|
760
|
+
|
761
|
+
metadata = {
|
762
|
+
"strategy": "enhanced_modular",
|
763
|
+
"constraint_checker": self.constraint_checker_type,
|
764
|
+
"exploration_strategy": self.exploration_strategy,
|
765
|
+
"early_rejection_enabled": self.early_rejection,
|
766
|
+
"early_stopping_enabled": self.early_stopping,
|
767
|
+
"llm_processing_enabled": self.llm_constraint_processing,
|
768
|
+
"immediate_evaluation_enabled": self.immediate_evaluation,
|
769
|
+
"total_searches_generated": len(all_search_queries),
|
770
|
+
"candidates_found": len(all_candidates),
|
771
|
+
"candidates_evaluated": len(evaluated_candidates),
|
772
|
+
"high_confidence_count": high_confidence_count,
|
773
|
+
"best_candidate": best_candidate.name,
|
774
|
+
"best_score": getattr(best_candidate, "score", 0),
|
775
|
+
}
|
776
|
+
|
777
|
+
return answer, metadata
|
778
|
+
|
779
|
+
except Exception as e:
|
780
|
+
logger.error(f"💥 Error in enhanced modular search: {e}")
|
781
|
+
import traceback
|
782
|
+
|
783
|
+
logger.error(f"🔍 Traceback: {traceback.format_exc()}")
|
784
|
+
return f"Search failed: {str(e)}", {"error": str(e)}
|
785
|
+
|
786
|
+
async def _generate_final_answer(
|
787
|
+
self, query: str, best_candidate, constraints
|
788
|
+
) -> str:
|
789
|
+
"""Generate the final answer using the best candidate."""
|
790
|
+
|
791
|
+
constraint_info = "\n".join(
|
792
|
+
[f"- {c.description} (weight: {c.weight})" for c in constraints]
|
793
|
+
)
|
794
|
+
|
795
|
+
evaluation_info = ""
|
796
|
+
if hasattr(best_candidate, "evaluation_results"):
|
797
|
+
evaluation_info = "\n".join(
|
798
|
+
[
|
799
|
+
f"- {result.get('constraint', 'Unknown')}: {result.get('score', 0):.0%}"
|
800
|
+
for result in best_candidate.evaluation_results
|
801
|
+
]
|
802
|
+
)
|
803
|
+
|
804
|
+
prompt = f"""Based on the search results, provide a comprehensive answer to: {query}
|
805
|
+
|
806
|
+
Best candidate found: {best_candidate.name}
|
807
|
+
Score: {best_candidate.score:.0%}
|
808
|
+
|
809
|
+
Constraints analyzed:
|
810
|
+
{constraint_info}
|
811
|
+
|
812
|
+
Constraint evaluation results:
|
813
|
+
{evaluation_info}
|
814
|
+
|
815
|
+
Evidence summary: {getattr(best_candidate, "summary", "No summary available")}
|
816
|
+
|
817
|
+
Provide a clear, factual answer that addresses the original question and explains how the candidate satisfies the constraints."""
|
818
|
+
|
819
|
+
response = await self.model.ainvoke(prompt)
|
820
|
+
return response.content
|
821
|
+
|
822
|
+
def _gather_evidence_for_constraint(self, candidate, constraint):
|
823
|
+
"""Gather evidence for a constraint using actual search with caching."""
|
824
|
+
# Check cache first
|
825
|
+
cache = get_search_cache()
|
826
|
+
cache_key = normalize_entity_query(candidate.name, constraint.value)
|
827
|
+
|
828
|
+
cached_results = cache.get(cache_key, "modular_strategy")
|
829
|
+
if cached_results is not None:
|
830
|
+
logger.debug(
|
831
|
+
f"Using cached evidence for {candidate.name} - {constraint.value[:30]}..."
|
832
|
+
)
|
833
|
+
return cached_results
|
834
|
+
|
835
|
+
try:
|
836
|
+
# Build search query intelligently based on constraint type
|
837
|
+
query_parts = []
|
838
|
+
|
839
|
+
# Add candidate name
|
840
|
+
query_parts.append(f'"{candidate.name}"')
|
841
|
+
|
842
|
+
# Parse constraint value for key terms
|
843
|
+
constraint_value = constraint.value
|
844
|
+
|
845
|
+
# Remove common prefixes
|
846
|
+
prefixes_to_remove = [
|
847
|
+
"The individual is associated with",
|
848
|
+
"The answer must be",
|
849
|
+
"The character must be",
|
850
|
+
"The entity must be",
|
851
|
+
"Must be",
|
852
|
+
"Should be",
|
853
|
+
"Is",
|
854
|
+
]
|
855
|
+
|
856
|
+
for prefix in prefixes_to_remove:
|
857
|
+
if constraint_value.startswith(prefix):
|
858
|
+
constraint_value = constraint_value[len(prefix) :].strip()
|
859
|
+
break
|
860
|
+
|
861
|
+
# Handle different constraint types
|
862
|
+
if constraint.type.value == "TEMPORAL":
|
863
|
+
# For temporal constraints, extract years/dates and search specifically
|
864
|
+
import re
|
865
|
+
|
866
|
+
years = re.findall(r"\b(19\d{2}|20\d{2})\b", constraint_value)
|
867
|
+
decades = re.findall(
|
868
|
+
r"\b(19\d{2}s|20\d{2}s)\b", constraint_value
|
869
|
+
)
|
870
|
+
|
871
|
+
if years:
|
872
|
+
for year in years:
|
873
|
+
query_parts.append(year)
|
874
|
+
elif decades:
|
875
|
+
for decade in decades:
|
876
|
+
query_parts.append(decade)
|
877
|
+
else:
|
878
|
+
query_parts.append(constraint_value)
|
879
|
+
|
880
|
+
elif constraint.type.value == "PROPERTY":
|
881
|
+
# For properties, focus on the specific characteristic
|
882
|
+
query_parts.append(constraint_value)
|
883
|
+
|
884
|
+
elif constraint.type.value == "STATISTIC":
|
885
|
+
# For statistics, include numbers and comparisons
|
886
|
+
query_parts.append(constraint_value)
|
887
|
+
|
888
|
+
else:
|
889
|
+
# Default: use the constraint value as-is
|
890
|
+
query_parts.append(constraint_value)
|
891
|
+
|
892
|
+
# Build final query
|
893
|
+
query = " ".join(query_parts)
|
894
|
+
logger.debug(f"Evidence search query: {query}")
|
895
|
+
|
896
|
+
# Execute search using the appropriate method
|
897
|
+
results = None
|
898
|
+
|
899
|
+
# Try different search methods based on what's available
|
900
|
+
if hasattr(self.search_engine, "run"):
|
901
|
+
results = self.search_engine.run(query)
|
902
|
+
elif hasattr(self.search_engine, "search"):
|
903
|
+
results = self.search_engine.search(query)
|
904
|
+
elif callable(self.search_engine):
|
905
|
+
results = self.search_engine(query)
|
906
|
+
else:
|
907
|
+
logger.error(
|
908
|
+
f"Search engine has no callable method: {type(self.search_engine)}"
|
909
|
+
)
|
910
|
+
return []
|
911
|
+
|
912
|
+
# Process results
|
913
|
+
evidence = []
|
914
|
+
|
915
|
+
# Handle different result formats
|
916
|
+
if results is None:
|
917
|
+
logger.warning("Search returned None")
|
918
|
+
return []
|
919
|
+
|
920
|
+
if isinstance(results, list):
|
921
|
+
result_list = results
|
922
|
+
elif isinstance(results, dict):
|
923
|
+
# Try common keys for results
|
924
|
+
result_list = (
|
925
|
+
results.get("results")
|
926
|
+
or results.get("items")
|
927
|
+
or results.get("data")
|
928
|
+
or []
|
929
|
+
)
|
930
|
+
else:
|
931
|
+
logger.warning(f"Unknown search result type: {type(results)}")
|
932
|
+
result_list = []
|
933
|
+
|
934
|
+
# Extract top evidence (limit to 5 for better quality)
|
935
|
+
for i, result in enumerate(result_list[:5]):
|
936
|
+
if isinstance(result, dict):
|
937
|
+
# Extract text content
|
938
|
+
text = (
|
939
|
+
result.get("snippet")
|
940
|
+
or result.get("content")
|
941
|
+
or result.get("description")
|
942
|
+
or result.get("text")
|
943
|
+
or ""
|
944
|
+
)
|
945
|
+
|
946
|
+
# Extract source information
|
947
|
+
source = (
|
948
|
+
result.get("url")
|
949
|
+
or result.get("link")
|
950
|
+
or result.get("source")
|
951
|
+
or f"search_result_{i + 1}"
|
952
|
+
)
|
953
|
+
|
954
|
+
# Extract title
|
955
|
+
title = result.get("title", "")
|
956
|
+
|
957
|
+
# Calculate confidence based on result position and content
|
958
|
+
base_confidence = 0.8 - (i * 0.1) # Decay by position
|
959
|
+
|
960
|
+
# Boost confidence if key terms are present
|
961
|
+
if candidate.name.lower() in text.lower():
|
962
|
+
base_confidence += 0.1
|
963
|
+
if any(
|
964
|
+
term.lower() in text.lower()
|
965
|
+
for term in constraint_value.split()
|
966
|
+
):
|
967
|
+
base_confidence += 0.1
|
968
|
+
|
969
|
+
confidence = min(base_confidence, 0.95)
|
970
|
+
|
971
|
+
evidence.append(
|
972
|
+
{
|
973
|
+
"text": text[:500], # Limit text length
|
974
|
+
"source": source,
|
975
|
+
"confidence": confidence,
|
976
|
+
"title": title,
|
977
|
+
"full_text": text, # Keep full text for detailed analysis
|
978
|
+
}
|
979
|
+
)
|
980
|
+
else:
|
981
|
+
# Handle string results
|
982
|
+
evidence.append(
|
983
|
+
{
|
984
|
+
"text": str(result)[:500],
|
985
|
+
"source": f"search_result_{i + 1}",
|
986
|
+
"confidence": 0.6,
|
987
|
+
"title": "",
|
988
|
+
}
|
989
|
+
)
|
990
|
+
|
991
|
+
logger.debug(
|
992
|
+
f"Gathered {len(evidence)} evidence items for {candidate.name} - {constraint.value[:50]}..."
|
993
|
+
)
|
994
|
+
|
995
|
+
# Cache the results for future use
|
996
|
+
cache.put(
|
997
|
+
cache_key, evidence, "modular_strategy", ttl=1800
|
998
|
+
) # 30 minutes TTL
|
999
|
+
|
1000
|
+
return evidence
|
1001
|
+
|
1002
|
+
except Exception as e:
|
1003
|
+
logger.error(f"Error gathering evidence: {e}", exc_info=True)
|
1004
|
+
# Return empty list instead of mock evidence
|
1005
|
+
return []
|
1006
|
+
|
1007
|
+
async def _background_candidate_evaluation(
|
1008
|
+
self,
|
1009
|
+
queue,
|
1010
|
+
constraints,
|
1011
|
+
results,
|
1012
|
+
original_query=None,
|
1013
|
+
rejected_candidates=None,
|
1014
|
+
):
|
1015
|
+
"""Background task to evaluate candidates without blocking search progress."""
|
1016
|
+
logger.info("🔄 Started background candidate evaluation")
|
1017
|
+
|
1018
|
+
# Use provided rejected_candidates list or create new one
|
1019
|
+
if rejected_candidates is None:
|
1020
|
+
rejected_candidates = []
|
1021
|
+
|
1022
|
+
while True:
|
1023
|
+
try:
|
1024
|
+
# Get candidate from queue
|
1025
|
+
candidate = await queue.get()
|
1026
|
+
|
1027
|
+
# Check for completion sentinel
|
1028
|
+
if candidate is None:
|
1029
|
+
logger.info("🏁 Background evaluation completed")
|
1030
|
+
break
|
1031
|
+
|
1032
|
+
# Evaluate candidate with LLM pre-screening
|
1033
|
+
try:
|
1034
|
+
# Always do full constraint evaluation to get scores
|
1035
|
+
result = self.constraint_checker.check_candidate(
|
1036
|
+
candidate, constraints, original_query=original_query
|
1037
|
+
)
|
1038
|
+
candidate.evaluation_results = result.detailed_results
|
1039
|
+
candidate.score = result.total_score
|
1040
|
+
candidate.should_reject = result.should_reject
|
1041
|
+
|
1042
|
+
# Now check early rejection AFTER we have a score
|
1043
|
+
if self.early_rejection_manager:
|
1044
|
+
confidence = await self.early_rejection_manager.quick_confidence_check(
|
1045
|
+
candidate, constraints
|
1046
|
+
)
|
1047
|
+
|
1048
|
+
should_reject, reason = (
|
1049
|
+
self.early_rejection_manager.should_reject_early(
|
1050
|
+
confidence
|
1051
|
+
)
|
1052
|
+
)
|
1053
|
+
if (
|
1054
|
+
should_reject and candidate.score < 0.5
|
1055
|
+
): # Only early reject if score is also low
|
1056
|
+
logger.debug(
|
1057
|
+
f"⚡ Early rejected {candidate.name}: {reason} (score: {candidate.score:.2%})"
|
1058
|
+
)
|
1059
|
+
# Store the candidate anyway for potential best candidate recovery
|
1060
|
+
rejected_candidates.append(candidate)
|
1061
|
+
continue
|
1062
|
+
|
1063
|
+
if not result.should_reject:
|
1064
|
+
results.append(candidate)
|
1065
|
+
logger.info(
|
1066
|
+
f"✅ Accepted: {candidate.name} (score: {result.total_score:.2%})"
|
1067
|
+
)
|
1068
|
+
|
1069
|
+
# Check for excellent candidates
|
1070
|
+
if result.total_score > 0.9:
|
1071
|
+
logger.info(
|
1072
|
+
f"🏆 EXCELLENT: {candidate.name} with {result.total_score:.1%} score"
|
1073
|
+
)
|
1074
|
+
else:
|
1075
|
+
# Store rejected candidates with scores for potential recovery
|
1076
|
+
rejected_candidates.append(candidate)
|
1077
|
+
logger.debug(
|
1078
|
+
f"❌ Rejected: {candidate.name} (score: {candidate.score:.2%})"
|
1079
|
+
)
|
1080
|
+
|
1081
|
+
except Exception as e:
|
1082
|
+
logger.error(f"💥 Error evaluating {candidate.name}: {e}")
|
1083
|
+
|
1084
|
+
except Exception as e:
|
1085
|
+
logger.error(f"💥 Background evaluation error: {e}")
|
1086
|
+
|
1087
|
+
def analyze_topic(self, query: str) -> Dict:
|
1088
|
+
"""
|
1089
|
+
Analyze a topic using the modular strategy.
|
1090
|
+
|
1091
|
+
This is the main entry point that implements the BaseSearchStrategy interface.
|
1092
|
+
"""
|
1093
|
+
try:
|
1094
|
+
# Run the search asynchronously
|
1095
|
+
import asyncio
|
1096
|
+
|
1097
|
+
# Create a new event loop if none exists or if the current loop is running
|
1098
|
+
try:
|
1099
|
+
loop = asyncio.get_event_loop()
|
1100
|
+
if loop.is_running():
|
1101
|
+
# If we're already in an async context, run in a new thread
|
1102
|
+
import concurrent.futures
|
1103
|
+
|
1104
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
1105
|
+
future = executor.submit(
|
1106
|
+
lambda: asyncio.run(self.search(query))
|
1107
|
+
)
|
1108
|
+
answer, metadata = future.result()
|
1109
|
+
else:
|
1110
|
+
# If not in async context, run directly
|
1111
|
+
answer, metadata = loop.run_until_complete(
|
1112
|
+
self.search(query)
|
1113
|
+
)
|
1114
|
+
except RuntimeError:
|
1115
|
+
# No event loop, create one
|
1116
|
+
answer, metadata = asyncio.run(self.search(query))
|
1117
|
+
|
1118
|
+
return {
|
1119
|
+
"findings": [{"content": answer}],
|
1120
|
+
"iterations": 1,
|
1121
|
+
"final_answer": answer,
|
1122
|
+
"current_knowledge": answer,
|
1123
|
+
"metadata": metadata,
|
1124
|
+
"links": getattr(self, "all_links_of_system", []),
|
1125
|
+
"questions_by_iteration": getattr(
|
1126
|
+
self, "questions_by_iteration", []
|
1127
|
+
),
|
1128
|
+
}
|
1129
|
+
|
1130
|
+
except Exception as e:
|
1131
|
+
logger.error(f"Error in analyze_topic: {e}")
|
1132
|
+
import traceback
|
1133
|
+
|
1134
|
+
logger.error(f"Traceback: {traceback.format_exc()}")
|
1135
|
+
return {
|
1136
|
+
"findings": [],
|
1137
|
+
"iterations": 0,
|
1138
|
+
"final_answer": f"Analysis failed: {str(e)}",
|
1139
|
+
"metadata": {"error": str(e)},
|
1140
|
+
"links": [],
|
1141
|
+
"questions_by_iteration": [],
|
1142
|
+
}
|