local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +7 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
- local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
- local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
- local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
- local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
- local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
- local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
- local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
- local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
- local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
- local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
- local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
- local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
- local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
- local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
- local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
- local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
- local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
- local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
- local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
- local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
- local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
- local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
- local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
- local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
- local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
- local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
- local_deep_research/advanced_search_system/findings/repository.py +54 -17
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
- local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
- local_deep_research/advanced_search_system/questions/__init__.py +16 -0
- local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
- local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
- local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
- local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
- local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
- local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
- local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
- local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
- local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
- local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
- local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
- local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
- local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
- local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
- local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
- local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
- local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
- local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
- local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
- local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
- local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
- local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
- local_deep_research/api/benchmark_functions.py +6 -2
- local_deep_research/api/research_functions.py +10 -4
- local_deep_research/benchmarks/__init__.py +9 -7
- local_deep_research/benchmarks/benchmark_functions.py +6 -2
- local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
- local_deep_research/benchmarks/cli.py +38 -13
- local_deep_research/benchmarks/comparison/__init__.py +4 -2
- local_deep_research/benchmarks/comparison/evaluator.py +316 -239
- local_deep_research/benchmarks/datasets/__init__.py +1 -1
- local_deep_research/benchmarks/datasets/base.py +91 -72
- local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
- local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
- local_deep_research/benchmarks/datasets/utils.py +48 -29
- local_deep_research/benchmarks/datasets.py +4 -11
- local_deep_research/benchmarks/efficiency/__init__.py +8 -4
- local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
- local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
- local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
- local_deep_research/benchmarks/evaluators/composite.py +6 -2
- local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
- local_deep_research/benchmarks/graders.py +32 -10
- local_deep_research/benchmarks/metrics/README.md +1 -1
- local_deep_research/benchmarks/metrics/calculation.py +25 -10
- local_deep_research/benchmarks/metrics/reporting.py +7 -3
- local_deep_research/benchmarks/metrics/visualization.py +42 -23
- local_deep_research/benchmarks/metrics.py +1 -1
- local_deep_research/benchmarks/optimization/__init__.py +3 -1
- local_deep_research/benchmarks/optimization/api.py +7 -1
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
- local_deep_research/benchmarks/runners.py +48 -15
- local_deep_research/citation_handler.py +65 -92
- local_deep_research/citation_handlers/__init__.py +15 -0
- local_deep_research/citation_handlers/base_citation_handler.py +70 -0
- local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
- local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
- local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
- local_deep_research/config/llm_config.py +271 -169
- local_deep_research/config/search_config.py +14 -5
- local_deep_research/defaults/__init__.py +0 -1
- local_deep_research/metrics/__init__.py +13 -0
- local_deep_research/metrics/database.py +58 -0
- local_deep_research/metrics/db_models.py +115 -0
- local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
- local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
- local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
- local_deep_research/metrics/migrate_research_ratings.py +31 -0
- local_deep_research/metrics/models.py +61 -0
- local_deep_research/metrics/pricing/__init__.py +12 -0
- local_deep_research/metrics/pricing/cost_calculator.py +237 -0
- local_deep_research/metrics/pricing/pricing_cache.py +143 -0
- local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
- local_deep_research/metrics/query_utils.py +51 -0
- local_deep_research/metrics/search_tracker.py +380 -0
- local_deep_research/metrics/token_counter.py +1078 -0
- local_deep_research/migrate_db.py +3 -1
- local_deep_research/report_generator.py +22 -8
- local_deep_research/search_system.py +390 -9
- local_deep_research/test_migration.py +15 -5
- local_deep_research/utilities/db_utils.py +7 -4
- local_deep_research/utilities/es_utils.py +115 -104
- local_deep_research/utilities/llm_utils.py +15 -5
- local_deep_research/utilities/log_utils.py +151 -0
- local_deep_research/utilities/search_cache.py +387 -0
- local_deep_research/utilities/search_utilities.py +14 -6
- local_deep_research/utilities/threading_utils.py +92 -0
- local_deep_research/utilities/url_utils.py +6 -0
- local_deep_research/web/api.py +347 -0
- local_deep_research/web/app.py +13 -17
- local_deep_research/web/app_factory.py +71 -66
- local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
- local_deep_research/web/database/migrations.py +5 -3
- local_deep_research/web/database/models.py +51 -2
- local_deep_research/web/database/schema_upgrade.py +49 -29
- local_deep_research/web/models/database.py +51 -61
- local_deep_research/web/routes/api_routes.py +56 -22
- local_deep_research/web/routes/benchmark_routes.py +4 -1
- local_deep_research/web/routes/globals.py +22 -0
- local_deep_research/web/routes/history_routes.py +71 -46
- local_deep_research/web/routes/metrics_routes.py +1155 -0
- local_deep_research/web/routes/research_routes.py +227 -41
- local_deep_research/web/routes/settings_routes.py +156 -55
- local_deep_research/web/services/research_service.py +310 -103
- local_deep_research/web/services/resource_service.py +36 -11
- local_deep_research/web/services/settings_manager.py +55 -17
- local_deep_research/web/services/settings_service.py +12 -4
- local_deep_research/web/services/socket_service.py +295 -188
- local_deep_research/web/static/css/custom_dropdown.css +180 -0
- local_deep_research/web/static/css/styles.css +39 -1
- local_deep_research/web/static/js/components/detail.js +633 -267
- local_deep_research/web/static/js/components/details.js +751 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
- local_deep_research/web/static/js/components/fallback/ui.js +23 -23
- local_deep_research/web/static/js/components/history.js +76 -76
- local_deep_research/web/static/js/components/logpanel.js +61 -13
- local_deep_research/web/static/js/components/progress.js +13 -2
- local_deep_research/web/static/js/components/research.js +99 -12
- local_deep_research/web/static/js/components/results.js +239 -106
- local_deep_research/web/static/js/main.js +40 -40
- local_deep_research/web/static/js/services/audio.js +1 -1
- local_deep_research/web/static/js/services/formatting.js +11 -11
- local_deep_research/web/static/js/services/keyboard.js +157 -0
- local_deep_research/web/static/js/services/pdf.js +80 -80
- local_deep_research/web/static/sounds/README.md +1 -1
- local_deep_research/web/templates/base.html +1 -0
- local_deep_research/web/templates/components/log_panel.html +7 -1
- local_deep_research/web/templates/components/mobile_nav.html +1 -1
- local_deep_research/web/templates/components/sidebar.html +3 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
- local_deep_research/web/templates/pages/details.html +325 -24
- local_deep_research/web/templates/pages/history.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +1929 -0
- local_deep_research/web/templates/pages/progress.html +2 -2
- local_deep_research/web/templates/pages/research.html +53 -17
- local_deep_research/web/templates/pages/results.html +12 -1
- local_deep_research/web/templates/pages/star_reviews.html +803 -0
- local_deep_research/web/utils/formatters.py +9 -3
- local_deep_research/web_search_engines/default_search_engines.py +5 -3
- local_deep_research/web_search_engines/engines/full_search.py +8 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
- local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
- local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
- local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
- local_deep_research/web_search_engines/search_engine_base.py +83 -35
- local_deep_research/web_search_engines/search_engine_factory.py +25 -8
- local_deep_research/web_search_engines/search_engines_config.py +9 -3
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
- local_deep_research-0.5.0.dist-info/RECORD +265 -0
- local_deep_research-0.4.4.dist-info/RECORD +0 -177
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,865 @@
|
|
1
|
+
"""
|
2
|
+
LLM-Driven Modular Strategy with intelligent constraint processing and early rejection.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import asyncio
|
6
|
+
import json
|
7
|
+
from dataclasses import dataclass
|
8
|
+
from typing import Dict, List, Optional, Tuple
|
9
|
+
|
10
|
+
from loguru import logger
|
11
|
+
|
12
|
+
from ..candidate_exploration import AdaptiveExplorer
|
13
|
+
from ..constraint_checking import DualConfidenceChecker
|
14
|
+
from ..constraints import ConstraintAnalyzer
|
15
|
+
from ..questions import StandardQuestionGenerator
|
16
|
+
from .base_strategy import BaseSearchStrategy
|
17
|
+
|
18
|
+
|
19
|
+
@dataclass
|
20
|
+
class CandidateConfidence:
|
21
|
+
"""Track candidate confidence levels for early rejection"""
|
22
|
+
|
23
|
+
candidate: object
|
24
|
+
positive_confidence: float
|
25
|
+
negative_confidence: float
|
26
|
+
rejection_reason: Optional[str] = None
|
27
|
+
should_continue: bool = True
|
28
|
+
|
29
|
+
|
30
|
+
class LLMConstraintProcessor:
|
31
|
+
"""LLM-driven intelligent constraint processing"""
|
32
|
+
|
33
|
+
def __init__(self, model):
|
34
|
+
self.model = model
|
35
|
+
|
36
|
+
async def decompose_constraints_intelligently(self, constraints):
|
37
|
+
"""Let LLM intelligently break down constraints into searchable elements"""
|
38
|
+
constraint_text = "\n".join([f"- {c.description}" for c in constraints])
|
39
|
+
|
40
|
+
prompt = f"""
|
41
|
+
I have these constraints from a search query:
|
42
|
+
{constraint_text}
|
43
|
+
|
44
|
+
Please intelligently decompose these constraints into atomic, searchable elements that can be combined in different ways.
|
45
|
+
|
46
|
+
For each constraint, provide:
|
47
|
+
1. **Atomic elements** - Break it into smallest meaningful parts
|
48
|
+
2. **Variations** - Different ways to express the same concept
|
49
|
+
3. **Granular specifics** - Specific values, years, numbers, etc.
|
50
|
+
|
51
|
+
Example for "TV show aired between 1960s and 1980s":
|
52
|
+
- Atomic elements: ["TV show", "television", "series", "program"]
|
53
|
+
- Time variations: ["1960s", "1970s", "1980s", "60s", "70s", "80s"]
|
54
|
+
- Granular years: ["1960", "1961", "1962", "1963", "1964", "1965", "1966", "1967", "1968", "1969", "1970", "1971", "1972", "1973", "1974", "1975", "1976", "1977", "1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988", "1989"]
|
55
|
+
|
56
|
+
Example for "fewer than 50 episodes":
|
57
|
+
- Atomic elements: ["episodes", "installments", "parts"]
|
58
|
+
- Quantity variations: ["under 50", "less than 50", "limited run", "short series"]
|
59
|
+
- Granular numbers: ["13 episodes", "26 episodes", "39 episodes", "single season"]
|
60
|
+
|
61
|
+
Return as valid JSON format:
|
62
|
+
{{
|
63
|
+
"constraint_1": {{
|
64
|
+
"atomic_elements": [...],
|
65
|
+
"variations": [...],
|
66
|
+
"granular_specifics": [...]
|
67
|
+
}},
|
68
|
+
"constraint_2": {{
|
69
|
+
"atomic_elements": [...],
|
70
|
+
"variations": [...],
|
71
|
+
"granular_specifics": [...]
|
72
|
+
}}
|
73
|
+
}}
|
74
|
+
"""
|
75
|
+
|
76
|
+
response = await self.model.ainvoke(prompt)
|
77
|
+
return self._parse_decomposition(response.content)
|
78
|
+
|
79
|
+
async def generate_intelligent_combinations(self, decomposed_constraints):
|
80
|
+
"""LLM generates smart combinations of atomic elements"""
|
81
|
+
|
82
|
+
# Flatten all elements for the LLM to see
|
83
|
+
all_elements = {}
|
84
|
+
for constraint_id, elements in decomposed_constraints.items():
|
85
|
+
all_elements[constraint_id] = elements
|
86
|
+
|
87
|
+
prompt = f"""
|
88
|
+
I have decomposed constraints into these atomic elements:
|
89
|
+
{json.dumps(all_elements, indent=2)}
|
90
|
+
|
91
|
+
Now intelligently combine these elements to create targeted search queries. Be creative and systematic:
|
92
|
+
|
93
|
+
1. **Year-by-year combinations**: Take specific years and combine with other specifics
|
94
|
+
Example: "1960 TV show 13 episodes", "1961 television 26 episodes", etc.
|
95
|
+
|
96
|
+
2. **Cross-constraint combinations**: Mix elements from different constraints
|
97
|
+
Example: "humor ascetic 1970s", "fourth wall short series vintage"
|
98
|
+
|
99
|
+
3. **Granular progression**: Create systematic progressions
|
100
|
+
Example: "1960 comedy", "1961 comedy", "1962 comedy"...
|
101
|
+
|
102
|
+
4. **Semantic variations**: Same meaning, different words
|
103
|
+
Example: "brief TV run 1970s" vs "short television series seventies"
|
104
|
+
|
105
|
+
5. **Contextual combinations**: Add implied context
|
106
|
+
Example: "monk-trained character 1978 television"
|
107
|
+
|
108
|
+
Generate 60-80 diverse search combinations that would maximize finding the target.
|
109
|
+
Focus on being comprehensive yet targeted.
|
110
|
+
|
111
|
+
Return as a valid JSON list of search queries:
|
112
|
+
["query1", "query2", "query3"]
|
113
|
+
"""
|
114
|
+
|
115
|
+
response = await self.model.ainvoke(prompt)
|
116
|
+
return self._parse_combinations(response.content)
|
117
|
+
|
118
|
+
async def generate_creative_search_angles(
|
119
|
+
self, original_query, decomposed_constraints
|
120
|
+
):
|
121
|
+
"""LLM generates completely creative search approaches"""
|
122
|
+
|
123
|
+
prompt = f"""
|
124
|
+
Original query: "{original_query}"
|
125
|
+
|
126
|
+
Now think like a detective - what are ALL the different ways someone might search for this character?
|
127
|
+
Be extremely creative and think outside the box:
|
128
|
+
|
129
|
+
1. **Character name guessing**: What names might this character have?
|
130
|
+
2. **Show title guessing**: What might the TV show be called?
|
131
|
+
3. **Cultural context**: What was happening in those decades?
|
132
|
+
4. **Genre searches**: What genre/category would this fit?
|
133
|
+
5. **Indirect searches**: What related topics might lead to this?
|
134
|
+
6. **Reverse searches**: Start from known similar characters
|
135
|
+
7. **Archetype searches**: What type of character is this?
|
136
|
+
8. **Creator/studio searches**: Who might have made this?
|
137
|
+
|
138
|
+
Generate 30-40 creative search angles that approach this from completely different directions.
|
139
|
+
|
140
|
+
Examples of creative thinking:
|
141
|
+
- "1970s cartoon characters who talk to camera"
|
142
|
+
- "superhero trained by monks television"
|
143
|
+
- "vintage comedy shows cancelled after one season"
|
144
|
+
- "fourth wall breaking animation 70s"
|
145
|
+
- "spiritual mentor origin story TV characters"
|
146
|
+
- "Plastic Man TV show episodes"
|
147
|
+
- "elastic superhero television series"
|
148
|
+
|
149
|
+
Return as valid JSON list of creative searches:
|
150
|
+
["creative_query1", "creative_query2"]
|
151
|
+
"""
|
152
|
+
|
153
|
+
response = await self.model.ainvoke(prompt)
|
154
|
+
return self._parse_creative_searches(response.content)
|
155
|
+
|
156
|
+
async def optimize_search_combinations(self, all_combinations):
|
157
|
+
"""LLM optimizes the search list for maximum effectiveness"""
|
158
|
+
|
159
|
+
prompt = f"""
|
160
|
+
I have generated {len(all_combinations)} search combinations. Here are the first 20:
|
161
|
+
{json.dumps(all_combinations[:20], indent=2)}
|
162
|
+
|
163
|
+
Please optimize this search strategy by organizing searches by priority and effectiveness:
|
164
|
+
|
165
|
+
1. **Remove redundant searches** that are too similar
|
166
|
+
2. **Prioritize high-value searches** likely to find results
|
167
|
+
3. **Balance specificity vs breadth**
|
168
|
+
4. **Add missing search angles** you notice
|
169
|
+
5. **Organize by search strategy type**
|
170
|
+
|
171
|
+
Return optimized searches organized by category as valid JSON:
|
172
|
+
{{
|
173
|
+
"high_priority": ["most likely to succeed - top 15 searches"],
|
174
|
+
"systematic_granular": ["year-by-year, episode-by-episode combinations - 20 searches"],
|
175
|
+
"creative_angles": ["outside-the-box approaches - 15 searches"],
|
176
|
+
"contextual_searches": ["time period + cultural context - 15 searches"],
|
177
|
+
"fallback_broad": ["broader searches if specifics fail - 10 searches"]
|
178
|
+
}}
|
179
|
+
"""
|
180
|
+
|
181
|
+
response = await self.model.ainvoke(prompt)
|
182
|
+
return self._parse_optimized_searches(response.content)
|
183
|
+
|
184
|
+
def _parse_decomposition(self, content):
|
185
|
+
"""Parse LLM decomposition response"""
|
186
|
+
try:
|
187
|
+
# Extract JSON from the response
|
188
|
+
start = content.find("{")
|
189
|
+
end = content.rfind("}") + 1
|
190
|
+
if start != -1 and end != -1:
|
191
|
+
json_str = content[start:end]
|
192
|
+
return json.loads(json_str)
|
193
|
+
except Exception as e:
|
194
|
+
logger.error(f"Failed to parse decomposition: {e}")
|
195
|
+
|
196
|
+
# Fallback to simple structure
|
197
|
+
return {
|
198
|
+
"time_constraint": {
|
199
|
+
"atomic_elements": ["TV show", "television", "series"],
|
200
|
+
"variations": ["1960s", "1970s", "1980s"],
|
201
|
+
"granular_specifics": [str(year) for year in range(1960, 1990)],
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
205
|
+
def _parse_combinations(self, content):
|
206
|
+
"""Parse LLM combinations response"""
|
207
|
+
try:
|
208
|
+
start = content.find("[")
|
209
|
+
end = content.rfind("]") + 1
|
210
|
+
if start != -1 and end != -1:
|
211
|
+
json_str = content[start:end]
|
212
|
+
return json.loads(json_str)
|
213
|
+
except Exception as e:
|
214
|
+
logger.error(f"Failed to parse combinations: {e}")
|
215
|
+
|
216
|
+
# Fallback
|
217
|
+
return [
|
218
|
+
"fictional character humor",
|
219
|
+
"TV show 1970s",
|
220
|
+
"fourth wall breaking",
|
221
|
+
]
|
222
|
+
|
223
|
+
def _parse_creative_searches(self, content):
|
224
|
+
"""Parse LLM creative searches response"""
|
225
|
+
try:
|
226
|
+
start = content.find("[")
|
227
|
+
end = content.rfind("]") + 1
|
228
|
+
if start != -1 and end != -1:
|
229
|
+
json_str = content[start:end]
|
230
|
+
return json.loads(json_str)
|
231
|
+
except Exception as e:
|
232
|
+
logger.error(f"Failed to parse creative searches: {e}")
|
233
|
+
|
234
|
+
# Fallback
|
235
|
+
return [
|
236
|
+
"vintage cartoon character",
|
237
|
+
"superhero TV show 1970s",
|
238
|
+
"comedy series short run",
|
239
|
+
]
|
240
|
+
|
241
|
+
def _parse_optimized_searches(self, content):
|
242
|
+
"""Parse LLM optimized searches response"""
|
243
|
+
try:
|
244
|
+
start = content.find("{")
|
245
|
+
end = content.rfind("}") + 1
|
246
|
+
if start != -1 and end != -1:
|
247
|
+
json_str = content[start:end]
|
248
|
+
return json.loads(json_str)
|
249
|
+
except Exception as e:
|
250
|
+
logger.error(f"Failed to parse optimized searches: {e}")
|
251
|
+
|
252
|
+
# Fallback
|
253
|
+
return {
|
254
|
+
"high_priority": [
|
255
|
+
"fictional character fourth wall humor",
|
256
|
+
"1970s TV show limited episodes",
|
257
|
+
],
|
258
|
+
"systematic_granular": [
|
259
|
+
"1970 TV show",
|
260
|
+
"1971 TV show",
|
261
|
+
"1972 TV show",
|
262
|
+
],
|
263
|
+
"creative_angles": [
|
264
|
+
"superhero comedy television",
|
265
|
+
"cartoon character talks to audience",
|
266
|
+
],
|
267
|
+
"contextual_searches": [
|
268
|
+
"vintage TV comedy",
|
269
|
+
"classic television humor",
|
270
|
+
],
|
271
|
+
"fallback_broad": ["fictional character", "TV show character"],
|
272
|
+
}
|
273
|
+
|
274
|
+
|
275
|
+
class EarlyRejectionManager:
|
276
|
+
"""Manages early rejection and confidence tracking"""
|
277
|
+
|
278
|
+
def __init__(self, model, positive_threshold=0.6, negative_threshold=0.3):
|
279
|
+
self.model = model
|
280
|
+
self.positive_threshold = positive_threshold
|
281
|
+
self.negative_threshold = negative_threshold
|
282
|
+
self.rejected_candidates = set()
|
283
|
+
|
284
|
+
async def quick_confidence_check(self, candidate, constraints):
|
285
|
+
"""Quick confidence assessment for early rejection"""
|
286
|
+
|
287
|
+
prompt = f"""
|
288
|
+
Quickly assess if this candidate matches the search criteria:
|
289
|
+
|
290
|
+
Candidate: {candidate.name}
|
291
|
+
Available info: {getattr(candidate, "metadata", {})}
|
292
|
+
|
293
|
+
Constraints to match:
|
294
|
+
{[c.description for c in constraints]}
|
295
|
+
|
296
|
+
Provide:
|
297
|
+
1. **Positive confidence** (0.0-1.0): How likely this candidate matches
|
298
|
+
2. **Negative confidence** (0.0-1.0): How likely this candidate does NOT match
|
299
|
+
3. **Quick reasoning**: Brief explanation
|
300
|
+
|
301
|
+
Return as JSON:
|
302
|
+
{{
|
303
|
+
"positive_confidence": 0.X,
|
304
|
+
"negative_confidence": 0.X,
|
305
|
+
"reasoning": "brief explanation"
|
306
|
+
}}
|
307
|
+
"""
|
308
|
+
|
309
|
+
try:
|
310
|
+
response = await self.model.ainvoke(prompt)
|
311
|
+
return self._parse_confidence(response.content)
|
312
|
+
except Exception as e:
|
313
|
+
logger.error(f"Quick confidence check failed: {e}")
|
314
|
+
return {
|
315
|
+
"positive_confidence": 0.5,
|
316
|
+
"negative_confidence": 0.3,
|
317
|
+
"reasoning": "fallback",
|
318
|
+
}
|
319
|
+
|
320
|
+
def should_reject_early(self, confidence_result):
|
321
|
+
"""Determine if candidate should be rejected early"""
|
322
|
+
positive = confidence_result.get("positive_confidence", 0.5)
|
323
|
+
negative = confidence_result.get("negative_confidence", 0.3)
|
324
|
+
|
325
|
+
# Reject if high negative confidence or very low positive confidence
|
326
|
+
if negative > 0.7 or positive < 0.1:
|
327
|
+
return (
|
328
|
+
True,
|
329
|
+
f"High negative confidence ({negative:.2f}) or low positive ({positive:.2f})",
|
330
|
+
)
|
331
|
+
|
332
|
+
return False, None
|
333
|
+
|
334
|
+
def should_continue_search(self, all_candidates, high_confidence_count):
|
335
|
+
"""Determine if we should continue searching"""
|
336
|
+
# Stop if we have enough high-confidence candidates
|
337
|
+
if high_confidence_count >= 5:
|
338
|
+
return False, "Found sufficient high-confidence candidates"
|
339
|
+
|
340
|
+
# Stop if we have many candidates but low quality
|
341
|
+
if len(all_candidates) > 50 and high_confidence_count == 0:
|
342
|
+
return False, "Too many low-quality candidates"
|
343
|
+
|
344
|
+
return True, None
|
345
|
+
|
346
|
+
def _parse_confidence(self, content):
|
347
|
+
"""Parse confidence assessment"""
|
348
|
+
try:
|
349
|
+
start = content.find("{")
|
350
|
+
end = content.rfind("}") + 1
|
351
|
+
if start != -1 and end != -1:
|
352
|
+
json_str = content[start:end]
|
353
|
+
return json.loads(json_str)
|
354
|
+
except Exception as e:
|
355
|
+
logger.error(f"Failed to parse confidence: {e}")
|
356
|
+
|
357
|
+
return {
|
358
|
+
"positive_confidence": 0.5,
|
359
|
+
"negative_confidence": 0.3,
|
360
|
+
"reasoning": "parse_error",
|
361
|
+
}
|
362
|
+
|
363
|
+
|
364
|
+
class LLMDrivenModularStrategy(BaseSearchStrategy):
|
365
|
+
"""
|
366
|
+
LLM-driven modular strategy with intelligent constraint processing and early rejection.
|
367
|
+
"""
|
368
|
+
|
369
|
+
def __init__(
|
370
|
+
self,
|
371
|
+
model,
|
372
|
+
search,
|
373
|
+
all_links_of_system=None,
|
374
|
+
constraint_checker_type: str = "dual_confidence",
|
375
|
+
exploration_strategy: str = "adaptive",
|
376
|
+
early_rejection: bool = True,
|
377
|
+
**kwargs,
|
378
|
+
):
|
379
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
380
|
+
|
381
|
+
self.model = model
|
382
|
+
self.search_engine = search
|
383
|
+
self.search_engines = getattr(search, "search_engines", [])
|
384
|
+
|
385
|
+
# Initialize components
|
386
|
+
self.constraint_analyzer = ConstraintAnalyzer(self.model)
|
387
|
+
self.llm_processor = LLMConstraintProcessor(self.model)
|
388
|
+
self.early_rejection_manager = (
|
389
|
+
EarlyRejectionManager(self.model) if early_rejection else None
|
390
|
+
)
|
391
|
+
|
392
|
+
# Initialize constraint checker
|
393
|
+
self.constraint_checker = DualConfidenceChecker(
|
394
|
+
model=self.model,
|
395
|
+
evidence_gatherer=self._gather_evidence_for_constraint,
|
396
|
+
negative_threshold=0.25,
|
397
|
+
positive_threshold=0.4,
|
398
|
+
uncertainty_penalty=0.2,
|
399
|
+
negative_weight=2.0,
|
400
|
+
)
|
401
|
+
|
402
|
+
# Initialize candidate explorer
|
403
|
+
self.candidate_explorer = AdaptiveExplorer(
|
404
|
+
search_engine=self.search_engine,
|
405
|
+
model=self.model,
|
406
|
+
learning_rate=0.1,
|
407
|
+
max_search_time=45.0, # Reduced since we have more searches
|
408
|
+
max_candidates=30, # Increased since we filter early
|
409
|
+
)
|
410
|
+
|
411
|
+
# Initialize question generator
|
412
|
+
self.question_generator = StandardQuestionGenerator(model=self.model)
|
413
|
+
|
414
|
+
# Strategy configuration
|
415
|
+
self.constraint_checker_type = constraint_checker_type
|
416
|
+
self.exploration_strategy = exploration_strategy
|
417
|
+
self.early_rejection = early_rejection
|
418
|
+
|
419
|
+
logger.info(
|
420
|
+
f"Initialized LLMDrivenModularStrategy with {constraint_checker_type} checker, "
|
421
|
+
f"{exploration_strategy} explorer, early_rejection={early_rejection}"
|
422
|
+
)
|
423
|
+
|
424
|
+
def analyze_topic(self, query: str) -> Dict:
|
425
|
+
"""Main entry point - sync wrapper for async search"""
|
426
|
+
try:
|
427
|
+
import asyncio
|
428
|
+
|
429
|
+
# Create a new event loop if none exists or if the current loop is running
|
430
|
+
try:
|
431
|
+
loop = asyncio.get_event_loop()
|
432
|
+
if loop.is_running():
|
433
|
+
# If we're already in an async context, run in a new thread
|
434
|
+
import concurrent.futures
|
435
|
+
|
436
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
437
|
+
future = executor.submit(
|
438
|
+
lambda: asyncio.run(self.search(query))
|
439
|
+
)
|
440
|
+
answer, metadata = future.result()
|
441
|
+
else:
|
442
|
+
# If not in async context, run directly
|
443
|
+
answer, metadata = loop.run_until_complete(
|
444
|
+
self.search(query)
|
445
|
+
)
|
446
|
+
except RuntimeError:
|
447
|
+
# No event loop, create one
|
448
|
+
answer, metadata = asyncio.run(self.search(query))
|
449
|
+
|
450
|
+
return {
|
451
|
+
"findings": [{"content": answer}],
|
452
|
+
"iterations": 1,
|
453
|
+
"final_answer": answer,
|
454
|
+
"metadata": metadata,
|
455
|
+
"links": getattr(self, "all_links_of_system", []),
|
456
|
+
"questions_by_iteration": getattr(
|
457
|
+
self, "questions_by_iteration", []
|
458
|
+
),
|
459
|
+
}
|
460
|
+
|
461
|
+
except Exception as e:
|
462
|
+
logger.error(f"Error in analyze_topic: {e}")
|
463
|
+
import traceback
|
464
|
+
|
465
|
+
logger.error(f"Traceback: {traceback.format_exc()}")
|
466
|
+
return {
|
467
|
+
"findings": [],
|
468
|
+
"iterations": 0,
|
469
|
+
"final_answer": f"Analysis failed: {str(e)}",
|
470
|
+
"metadata": {"error": str(e)},
|
471
|
+
"links": [],
|
472
|
+
"questions_by_iteration": [],
|
473
|
+
}
|
474
|
+
|
475
|
+
async def search(
|
476
|
+
self,
|
477
|
+
query: str,
|
478
|
+
search_engines: List[str] = None,
|
479
|
+
progress_callback=None,
|
480
|
+
**kwargs,
|
481
|
+
) -> Tuple[str, Dict]:
|
482
|
+
"""Execute the LLM-driven modular search strategy"""
|
483
|
+
try:
|
484
|
+
logger.info(f"Starting LLM-driven modular search for: {query}")
|
485
|
+
|
486
|
+
# Phase 1: Extract base constraints
|
487
|
+
if progress_callback:
|
488
|
+
progress_callback(
|
489
|
+
{
|
490
|
+
"phase": "constraint_analysis",
|
491
|
+
"progress": 5,
|
492
|
+
"message": "Analyzing query constraints",
|
493
|
+
}
|
494
|
+
)
|
495
|
+
|
496
|
+
base_constraints = self.constraint_analyzer.extract_constraints(
|
497
|
+
query
|
498
|
+
)
|
499
|
+
logger.info(f"Extracted {len(base_constraints)} base constraints")
|
500
|
+
|
501
|
+
# Phase 2: LLM intelligent decomposition
|
502
|
+
if progress_callback:
|
503
|
+
progress_callback(
|
504
|
+
{
|
505
|
+
"phase": "llm_decomposition",
|
506
|
+
"progress": 15,
|
507
|
+
"message": "LLM decomposing constraints intelligently",
|
508
|
+
}
|
509
|
+
)
|
510
|
+
|
511
|
+
decomposed = (
|
512
|
+
await self.llm_processor.decompose_constraints_intelligently(
|
513
|
+
base_constraints
|
514
|
+
)
|
515
|
+
)
|
516
|
+
logger.info(
|
517
|
+
f"LLM decomposed constraints into {len(decomposed)} groups"
|
518
|
+
)
|
519
|
+
|
520
|
+
# Phase 3: LLM intelligent combinations
|
521
|
+
if progress_callback:
|
522
|
+
progress_callback(
|
523
|
+
{
|
524
|
+
"phase": "llm_combinations",
|
525
|
+
"progress": 25,
|
526
|
+
"message": "LLM generating intelligent search combinations",
|
527
|
+
}
|
528
|
+
)
|
529
|
+
|
530
|
+
intelligent_combinations = (
|
531
|
+
await self.llm_processor.generate_intelligent_combinations(
|
532
|
+
decomposed
|
533
|
+
)
|
534
|
+
)
|
535
|
+
logger.info(
|
536
|
+
f"LLM generated {len(intelligent_combinations)} intelligent combinations"
|
537
|
+
)
|
538
|
+
|
539
|
+
# Phase 4: LLM creative search angles
|
540
|
+
if progress_callback:
|
541
|
+
progress_callback(
|
542
|
+
{
|
543
|
+
"phase": "llm_creative",
|
544
|
+
"progress": 35,
|
545
|
+
"message": "LLM generating creative search angles",
|
546
|
+
}
|
547
|
+
)
|
548
|
+
|
549
|
+
creative_searches = (
|
550
|
+
await self.llm_processor.generate_creative_search_angles(
|
551
|
+
query, decomposed
|
552
|
+
)
|
553
|
+
)
|
554
|
+
logger.info(
|
555
|
+
f"LLM generated {len(creative_searches)} creative searches"
|
556
|
+
)
|
557
|
+
|
558
|
+
# Phase 5: LLM optimization
|
559
|
+
if progress_callback:
|
560
|
+
progress_callback(
|
561
|
+
{
|
562
|
+
"phase": "llm_optimization",
|
563
|
+
"progress": 45,
|
564
|
+
"message": "LLM optimizing search strategy",
|
565
|
+
}
|
566
|
+
)
|
567
|
+
|
568
|
+
all_searches = intelligent_combinations + creative_searches
|
569
|
+
optimized_searches = (
|
570
|
+
await self.llm_processor.optimize_search_combinations(
|
571
|
+
all_searches
|
572
|
+
)
|
573
|
+
)
|
574
|
+
total_searches = sum(
|
575
|
+
len(searches) for searches in optimized_searches.values()
|
576
|
+
)
|
577
|
+
logger.info(
|
578
|
+
f"LLM optimized to {total_searches} total searches across categories"
|
579
|
+
)
|
580
|
+
|
581
|
+
# Phase 6: Execute searches by priority with early rejection
|
582
|
+
all_candidates = []
|
583
|
+
high_confidence_count = 0
|
584
|
+
search_progress = 50
|
585
|
+
|
586
|
+
for category, searches in optimized_searches.items():
|
587
|
+
if not searches:
|
588
|
+
continue
|
589
|
+
|
590
|
+
logger.info(
|
591
|
+
f"Executing {category} searches: {len(searches)} queries"
|
592
|
+
)
|
593
|
+
|
594
|
+
if progress_callback:
|
595
|
+
progress_callback(
|
596
|
+
{
|
597
|
+
"phase": f"search_{category}",
|
598
|
+
"progress": search_progress,
|
599
|
+
"message": f"Searching with {category} strategy",
|
600
|
+
}
|
601
|
+
)
|
602
|
+
|
603
|
+
# Execute in parallel batches
|
604
|
+
batch_size = 3 if category == "high_priority" else 5
|
605
|
+
category_candidates = []
|
606
|
+
|
607
|
+
for i in range(0, len(searches), batch_size):
|
608
|
+
batch = searches[i : i + batch_size]
|
609
|
+
|
610
|
+
# Execute batch searches in parallel
|
611
|
+
batch_tasks = []
|
612
|
+
for search_query in batch:
|
613
|
+
task = self.candidate_explorer._execute_search(
|
614
|
+
search_query
|
615
|
+
)
|
616
|
+
batch_tasks.append(task)
|
617
|
+
|
618
|
+
# Wait for batch completion
|
619
|
+
batch_results = await asyncio.gather(
|
620
|
+
*batch_tasks, return_exceptions=True
|
621
|
+
)
|
622
|
+
|
623
|
+
# Process batch results
|
624
|
+
for j, result in enumerate(batch_results):
|
625
|
+
if isinstance(result, Exception):
|
626
|
+
logger.error(
|
627
|
+
f"Search failed: {batch[j]} - {result}"
|
628
|
+
)
|
629
|
+
continue
|
630
|
+
|
631
|
+
candidates = self.candidate_explorer._extract_candidates_from_results(
|
632
|
+
result, entity_type="fictional character"
|
633
|
+
)
|
634
|
+
|
635
|
+
# Early rejection if enabled
|
636
|
+
if self.early_rejection_manager:
|
637
|
+
for candidate in candidates:
|
638
|
+
confidence = await self.early_rejection_manager.quick_confidence_check(
|
639
|
+
candidate, base_constraints
|
640
|
+
)
|
641
|
+
|
642
|
+
should_reject, reason = (
|
643
|
+
self.early_rejection_manager.should_reject_early(
|
644
|
+
confidence
|
645
|
+
)
|
646
|
+
)
|
647
|
+
if should_reject:
|
648
|
+
logger.debug(
|
649
|
+
f"Early rejected {candidate.name}: {reason}"
|
650
|
+
)
|
651
|
+
continue
|
652
|
+
|
653
|
+
if (
|
654
|
+
confidence.get("positive_confidence", 0)
|
655
|
+
> 0.6
|
656
|
+
):
|
657
|
+
high_confidence_count += 1
|
658
|
+
|
659
|
+
category_candidates.append(candidate)
|
660
|
+
else:
|
661
|
+
category_candidates.extend(candidates)
|
662
|
+
|
663
|
+
logger.info(
|
664
|
+
f"{category} batch {i // batch_size + 1}: found {len(category_candidates)} candidates"
|
665
|
+
)
|
666
|
+
|
667
|
+
# Early stopping check
|
668
|
+
if self.early_rejection_manager:
|
669
|
+
should_continue, stop_reason = (
|
670
|
+
self.early_rejection_manager.should_continue_search(
|
671
|
+
all_candidates + category_candidates,
|
672
|
+
high_confidence_count,
|
673
|
+
)
|
674
|
+
)
|
675
|
+
if not should_continue:
|
676
|
+
logger.info(f"Early stopping: {stop_reason}")
|
677
|
+
break
|
678
|
+
|
679
|
+
all_candidates.extend(category_candidates)
|
680
|
+
search_progress += 8 # Distribute remaining progress
|
681
|
+
|
682
|
+
# Stop if we have enough high-confidence candidates
|
683
|
+
if high_confidence_count >= 5:
|
684
|
+
logger.info(
|
685
|
+
"Found sufficient high-confidence candidates, stopping search"
|
686
|
+
)
|
687
|
+
break
|
688
|
+
|
689
|
+
logger.info(
|
690
|
+
f"Search completed: {len(all_candidates)} total candidates, {high_confidence_count} high-confidence"
|
691
|
+
)
|
692
|
+
|
693
|
+
# Phase 7: Constraint checking on remaining candidates
|
694
|
+
if progress_callback:
|
695
|
+
progress_callback(
|
696
|
+
{
|
697
|
+
"phase": "constraint_evaluation",
|
698
|
+
"progress": 85,
|
699
|
+
"message": f"Evaluating {len(all_candidates)} candidates",
|
700
|
+
}
|
701
|
+
)
|
702
|
+
|
703
|
+
if not all_candidates:
|
704
|
+
return "No valid candidates found", {
|
705
|
+
"strategy": "llm_driven_modular",
|
706
|
+
"total_searches": total_searches,
|
707
|
+
"candidates_found": 0,
|
708
|
+
"high_confidence_count": 0,
|
709
|
+
}
|
710
|
+
|
711
|
+
# Evaluate top candidates (limit to avoid long processing)
|
712
|
+
candidates_to_evaluate = all_candidates[:20] # Top 20 candidates
|
713
|
+
evaluated_candidates = []
|
714
|
+
|
715
|
+
for i, candidate in enumerate(candidates_to_evaluate):
|
716
|
+
try:
|
717
|
+
result = self.constraint_checker.check_candidate(
|
718
|
+
candidate, base_constraints
|
719
|
+
)
|
720
|
+
|
721
|
+
candidate.evaluation_results = result.detailed_results
|
722
|
+
candidate.score = result.total_score
|
723
|
+
candidate.should_reject = result.should_reject
|
724
|
+
|
725
|
+
if not result.should_reject:
|
726
|
+
evaluated_candidates.append(candidate)
|
727
|
+
|
728
|
+
except Exception as e:
|
729
|
+
logger.error(
|
730
|
+
f"Error evaluating candidate {candidate.name}: {e}"
|
731
|
+
)
|
732
|
+
continue
|
733
|
+
|
734
|
+
if not evaluated_candidates:
|
735
|
+
return "No valid candidates passed constraint evaluation", {
|
736
|
+
"strategy": "llm_driven_modular",
|
737
|
+
"total_searches": total_searches,
|
738
|
+
"candidates_found": len(all_candidates),
|
739
|
+
"candidates_evaluated": len(candidates_to_evaluate),
|
740
|
+
"high_confidence_count": high_confidence_count,
|
741
|
+
}
|
742
|
+
|
743
|
+
# Select best candidate
|
744
|
+
evaluated_candidates.sort(key=lambda x: x.score, reverse=True)
|
745
|
+
best_candidate = evaluated_candidates[0]
|
746
|
+
|
747
|
+
logger.info(
|
748
|
+
f"Best candidate: {best_candidate.name} with score {best_candidate.score:.2%}"
|
749
|
+
)
|
750
|
+
|
751
|
+
# Generate final answer
|
752
|
+
if progress_callback:
|
753
|
+
progress_callback(
|
754
|
+
{
|
755
|
+
"phase": "final_answer",
|
756
|
+
"progress": 95,
|
757
|
+
"message": "Generating final answer",
|
758
|
+
}
|
759
|
+
)
|
760
|
+
|
761
|
+
answer = await self._generate_final_answer(
|
762
|
+
query, best_candidate, base_constraints
|
763
|
+
)
|
764
|
+
|
765
|
+
metadata = {
|
766
|
+
"strategy": "llm_driven_modular",
|
767
|
+
"constraint_checker": self.constraint_checker_type,
|
768
|
+
"exploration_strategy": self.exploration_strategy,
|
769
|
+
"early_rejection_enabled": self.early_rejection,
|
770
|
+
"total_searches_generated": total_searches,
|
771
|
+
"candidates_found": len(all_candidates),
|
772
|
+
"candidates_evaluated": len(candidates_to_evaluate),
|
773
|
+
"candidates_valid": len(evaluated_candidates),
|
774
|
+
"high_confidence_count": high_confidence_count,
|
775
|
+
"best_candidate": best_candidate.name,
|
776
|
+
"best_score": best_candidate.score,
|
777
|
+
}
|
778
|
+
|
779
|
+
return answer, metadata
|
780
|
+
|
781
|
+
except Exception as e:
|
782
|
+
logger.error(f"Error in LLM-driven search: {e}")
|
783
|
+
import traceback
|
784
|
+
|
785
|
+
logger.error(f"Traceback: {traceback.format_exc()}")
|
786
|
+
return f"Search failed: {str(e)}", {"error": str(e)}
|
787
|
+
|
788
|
+
async def _generate_final_answer(self, query, best_candidate, constraints):
|
789
|
+
"""Generate comprehensive final answer"""
|
790
|
+
constraint_info = "\n".join([f"- {c.description}" for c in constraints])
|
791
|
+
|
792
|
+
evaluation_info = ""
|
793
|
+
if hasattr(best_candidate, "evaluation_results"):
|
794
|
+
evaluation_info = "\n".join(
|
795
|
+
[
|
796
|
+
f"- {result.get('constraint', 'Unknown')}: {result.get('score', 0):.0%}"
|
797
|
+
for result in best_candidate.evaluation_results
|
798
|
+
]
|
799
|
+
)
|
800
|
+
|
801
|
+
prompt = f"""Based on the search results, provide a comprehensive answer to: {query}
|
802
|
+
|
803
|
+
Best candidate found: {best_candidate.name}
|
804
|
+
Score: {best_candidate.score:.0%}
|
805
|
+
|
806
|
+
Constraints analyzed:
|
807
|
+
{constraint_info}
|
808
|
+
|
809
|
+
Constraint evaluation results:
|
810
|
+
{evaluation_info}
|
811
|
+
|
812
|
+
Evidence summary: {getattr(best_candidate, "summary", "No summary available")}
|
813
|
+
|
814
|
+
Provide a clear, factual answer that addresses the original question and explains how the candidate satisfies the constraints."""
|
815
|
+
|
816
|
+
response = await self.model.ainvoke(prompt)
|
817
|
+
return response.content
|
818
|
+
|
819
|
+
def _gather_evidence_for_constraint(self, candidate, constraint):
|
820
|
+
"""Gather evidence for a constraint using actual search"""
|
821
|
+
try:
|
822
|
+
# Create a focused search query
|
823
|
+
query = f"{candidate.name} {constraint.description}"
|
824
|
+
|
825
|
+
# Use the search engine properly
|
826
|
+
if hasattr(self.search_engine, "run"):
|
827
|
+
results = self.search_engine.run(query)
|
828
|
+
else:
|
829
|
+
logger.warning("Search engine doesn't have run method")
|
830
|
+
return []
|
831
|
+
|
832
|
+
# Handle different result formats
|
833
|
+
if isinstance(results, list):
|
834
|
+
result_list = results[:3] # Top 3 results
|
835
|
+
elif isinstance(results, dict):
|
836
|
+
result_list = results.get("results", [])[:3] # Top 3 results
|
837
|
+
else:
|
838
|
+
logger.warning(f"Unknown search result format: {type(results)}")
|
839
|
+
return []
|
840
|
+
|
841
|
+
# Extract evidence from search results
|
842
|
+
evidence = []
|
843
|
+
for result in result_list:
|
844
|
+
evidence.append(
|
845
|
+
{
|
846
|
+
"text": result.get("snippet", "")
|
847
|
+
or result.get("content", ""),
|
848
|
+
"source": result.get("url", "search_result"),
|
849
|
+
"confidence": 0.7,
|
850
|
+
"title": result.get("title", ""),
|
851
|
+
}
|
852
|
+
)
|
853
|
+
|
854
|
+
return evidence
|
855
|
+
|
856
|
+
except Exception as e:
|
857
|
+
logger.error(f"Error gathering evidence: {e}")
|
858
|
+
# Fallback to mock evidence
|
859
|
+
return [
|
860
|
+
{
|
861
|
+
"text": f"Evidence about {candidate.name} regarding {constraint.description}",
|
862
|
+
"source": "mock_result",
|
863
|
+
"confidence": 0.5,
|
864
|
+
}
|
865
|
+
]
|