local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +7 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
- local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
- local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
- local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
- local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
- local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
- local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
- local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
- local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
- local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
- local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
- local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
- local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
- local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
- local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
- local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
- local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
- local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
- local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
- local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
- local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
- local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
- local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
- local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
- local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
- local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
- local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
- local_deep_research/advanced_search_system/findings/repository.py +54 -17
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
- local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
- local_deep_research/advanced_search_system/questions/__init__.py +16 -0
- local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
- local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
- local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
- local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
- local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
- local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
- local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
- local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
- local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
- local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
- local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
- local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
- local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
- local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
- local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
- local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
- local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
- local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
- local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
- local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
- local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
- local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
- local_deep_research/api/benchmark_functions.py +6 -2
- local_deep_research/api/research_functions.py +10 -4
- local_deep_research/benchmarks/__init__.py +9 -7
- local_deep_research/benchmarks/benchmark_functions.py +6 -2
- local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
- local_deep_research/benchmarks/cli.py +38 -13
- local_deep_research/benchmarks/comparison/__init__.py +4 -2
- local_deep_research/benchmarks/comparison/evaluator.py +316 -239
- local_deep_research/benchmarks/datasets/__init__.py +1 -1
- local_deep_research/benchmarks/datasets/base.py +91 -72
- local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
- local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
- local_deep_research/benchmarks/datasets/utils.py +48 -29
- local_deep_research/benchmarks/datasets.py +4 -11
- local_deep_research/benchmarks/efficiency/__init__.py +8 -4
- local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
- local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
- local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
- local_deep_research/benchmarks/evaluators/composite.py +6 -2
- local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
- local_deep_research/benchmarks/graders.py +32 -10
- local_deep_research/benchmarks/metrics/README.md +1 -1
- local_deep_research/benchmarks/metrics/calculation.py +25 -10
- local_deep_research/benchmarks/metrics/reporting.py +7 -3
- local_deep_research/benchmarks/metrics/visualization.py +42 -23
- local_deep_research/benchmarks/metrics.py +1 -1
- local_deep_research/benchmarks/optimization/__init__.py +3 -1
- local_deep_research/benchmarks/optimization/api.py +7 -1
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
- local_deep_research/benchmarks/runners.py +48 -15
- local_deep_research/citation_handler.py +65 -92
- local_deep_research/citation_handlers/__init__.py +15 -0
- local_deep_research/citation_handlers/base_citation_handler.py +70 -0
- local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
- local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
- local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
- local_deep_research/config/llm_config.py +271 -169
- local_deep_research/config/search_config.py +14 -5
- local_deep_research/defaults/__init__.py +0 -1
- local_deep_research/metrics/__init__.py +13 -0
- local_deep_research/metrics/database.py +58 -0
- local_deep_research/metrics/db_models.py +115 -0
- local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
- local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
- local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
- local_deep_research/metrics/migrate_research_ratings.py +31 -0
- local_deep_research/metrics/models.py +61 -0
- local_deep_research/metrics/pricing/__init__.py +12 -0
- local_deep_research/metrics/pricing/cost_calculator.py +237 -0
- local_deep_research/metrics/pricing/pricing_cache.py +143 -0
- local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
- local_deep_research/metrics/query_utils.py +51 -0
- local_deep_research/metrics/search_tracker.py +380 -0
- local_deep_research/metrics/token_counter.py +1078 -0
- local_deep_research/migrate_db.py +3 -1
- local_deep_research/report_generator.py +22 -8
- local_deep_research/search_system.py +390 -9
- local_deep_research/test_migration.py +15 -5
- local_deep_research/utilities/db_utils.py +7 -4
- local_deep_research/utilities/es_utils.py +115 -104
- local_deep_research/utilities/llm_utils.py +15 -5
- local_deep_research/utilities/log_utils.py +151 -0
- local_deep_research/utilities/search_cache.py +387 -0
- local_deep_research/utilities/search_utilities.py +14 -6
- local_deep_research/utilities/threading_utils.py +92 -0
- local_deep_research/utilities/url_utils.py +6 -0
- local_deep_research/web/api.py +347 -0
- local_deep_research/web/app.py +13 -17
- local_deep_research/web/app_factory.py +71 -66
- local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
- local_deep_research/web/database/migrations.py +5 -3
- local_deep_research/web/database/models.py +51 -2
- local_deep_research/web/database/schema_upgrade.py +49 -29
- local_deep_research/web/models/database.py +51 -61
- local_deep_research/web/routes/api_routes.py +56 -22
- local_deep_research/web/routes/benchmark_routes.py +4 -1
- local_deep_research/web/routes/globals.py +22 -0
- local_deep_research/web/routes/history_routes.py +71 -46
- local_deep_research/web/routes/metrics_routes.py +1155 -0
- local_deep_research/web/routes/research_routes.py +227 -41
- local_deep_research/web/routes/settings_routes.py +156 -55
- local_deep_research/web/services/research_service.py +310 -103
- local_deep_research/web/services/resource_service.py +36 -11
- local_deep_research/web/services/settings_manager.py +55 -17
- local_deep_research/web/services/settings_service.py +12 -4
- local_deep_research/web/services/socket_service.py +295 -188
- local_deep_research/web/static/css/custom_dropdown.css +180 -0
- local_deep_research/web/static/css/styles.css +39 -1
- local_deep_research/web/static/js/components/detail.js +633 -267
- local_deep_research/web/static/js/components/details.js +751 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
- local_deep_research/web/static/js/components/fallback/ui.js +23 -23
- local_deep_research/web/static/js/components/history.js +76 -76
- local_deep_research/web/static/js/components/logpanel.js +61 -13
- local_deep_research/web/static/js/components/progress.js +13 -2
- local_deep_research/web/static/js/components/research.js +99 -12
- local_deep_research/web/static/js/components/results.js +239 -106
- local_deep_research/web/static/js/main.js +40 -40
- local_deep_research/web/static/js/services/audio.js +1 -1
- local_deep_research/web/static/js/services/formatting.js +11 -11
- local_deep_research/web/static/js/services/keyboard.js +157 -0
- local_deep_research/web/static/js/services/pdf.js +80 -80
- local_deep_research/web/static/sounds/README.md +1 -1
- local_deep_research/web/templates/base.html +1 -0
- local_deep_research/web/templates/components/log_panel.html +7 -1
- local_deep_research/web/templates/components/mobile_nav.html +1 -1
- local_deep_research/web/templates/components/sidebar.html +3 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
- local_deep_research/web/templates/pages/details.html +325 -24
- local_deep_research/web/templates/pages/history.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +1929 -0
- local_deep_research/web/templates/pages/progress.html +2 -2
- local_deep_research/web/templates/pages/research.html +53 -17
- local_deep_research/web/templates/pages/results.html +12 -1
- local_deep_research/web/templates/pages/star_reviews.html +803 -0
- local_deep_research/web/utils/formatters.py +9 -3
- local_deep_research/web_search_engines/default_search_engines.py +5 -3
- local_deep_research/web_search_engines/engines/full_search.py +8 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
- local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
- local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
- local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
- local_deep_research/web_search_engines/search_engine_base.py +83 -35
- local_deep_research/web_search_engines/search_engine_factory.py +25 -8
- local_deep_research/web_search_engines/search_engines_config.py +9 -3
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
- local_deep_research-0.5.0.dist-info/RECORD +265 -0
- local_deep_research-0.4.4.dist-info/RECORD +0 -177
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,506 @@
|
|
1
|
+
"""
|
2
|
+
Parallel constrained search strategy with progressive constraint relaxation.
|
3
|
+
|
4
|
+
Key improvements:
|
5
|
+
1. Combines multiple constraints in initial searches
|
6
|
+
2. Runs searches in parallel for efficiency
|
7
|
+
3. Progressively loosens constraints if needed
|
8
|
+
4. Compact design to minimize context usage
|
9
|
+
"""
|
10
|
+
|
11
|
+
import concurrent.futures
|
12
|
+
from dataclasses import dataclass
|
13
|
+
from typing import List
|
14
|
+
|
15
|
+
from loguru import logger
|
16
|
+
|
17
|
+
from ..candidates.base_candidate import Candidate
|
18
|
+
from ..constraints.base_constraint import Constraint, ConstraintType
|
19
|
+
from .constrained_search_strategy import ConstrainedSearchStrategy
|
20
|
+
|
21
|
+
|
22
|
+
@dataclass
|
23
|
+
class SearchCombination:
|
24
|
+
"""Represents a combination of constraints for searching."""
|
25
|
+
|
26
|
+
constraints: List[Constraint]
|
27
|
+
query: str
|
28
|
+
priority: int
|
29
|
+
|
30
|
+
def __hash__(self):
|
31
|
+
return hash(self.query)
|
32
|
+
|
33
|
+
|
34
|
+
class ParallelConstrainedStrategy(ConstrainedSearchStrategy):
|
35
|
+
"""
|
36
|
+
Enhanced constrained strategy with parallel search and smart constraint combination.
|
37
|
+
"""
|
38
|
+
|
39
|
+
def __init__(
|
40
|
+
self,
|
41
|
+
*args,
|
42
|
+
parallel_workers: int = 100,
|
43
|
+
min_results_threshold: int = 10,
|
44
|
+
**kwargs,
|
45
|
+
):
|
46
|
+
super().__init__(*args, **kwargs)
|
47
|
+
self.parallel_workers = parallel_workers
|
48
|
+
self.min_results_threshold = min_results_threshold
|
49
|
+
|
50
|
+
# Define hard constraints that must be satisfied
|
51
|
+
self.hard_constraint_keywords = [
|
52
|
+
"aired between",
|
53
|
+
"aired during",
|
54
|
+
"air date",
|
55
|
+
"broadcast",
|
56
|
+
"episodes",
|
57
|
+
"season",
|
58
|
+
"year",
|
59
|
+
"decade",
|
60
|
+
"male",
|
61
|
+
"female",
|
62
|
+
"gender",
|
63
|
+
"tv show",
|
64
|
+
"series",
|
65
|
+
"program",
|
66
|
+
]
|
67
|
+
|
68
|
+
def _classify_constraints(self):
|
69
|
+
"""Classify constraints into hard (must satisfy) and soft (scoring) categories."""
|
70
|
+
self.hard_constraints = []
|
71
|
+
self.soft_constraints = []
|
72
|
+
|
73
|
+
for constraint in self.constraint_ranking:
|
74
|
+
# Check if constraint is hard based on keywords and type
|
75
|
+
is_hard = False
|
76
|
+
|
77
|
+
# Temporal and statistic constraints are usually hard
|
78
|
+
if constraint.type in [
|
79
|
+
ConstraintType.TEMPORAL,
|
80
|
+
ConstraintType.STATISTIC,
|
81
|
+
]:
|
82
|
+
is_hard = True
|
83
|
+
|
84
|
+
# Check for hard constraint keywords
|
85
|
+
constraint_text = constraint.value.lower()
|
86
|
+
for keyword in self.hard_constraint_keywords:
|
87
|
+
if keyword in constraint_text:
|
88
|
+
is_hard = True
|
89
|
+
break
|
90
|
+
|
91
|
+
if is_hard:
|
92
|
+
self.hard_constraints.append(constraint)
|
93
|
+
else:
|
94
|
+
self.soft_constraints.append(constraint)
|
95
|
+
|
96
|
+
logger.info(
|
97
|
+
f"Classified {len(self.hard_constraints)} hard constraints and {len(self.soft_constraints)} soft constraints"
|
98
|
+
)
|
99
|
+
|
100
|
+
def _progressive_constraint_search(self):
|
101
|
+
"""Override parent method with parallel, combined constraint search."""
|
102
|
+
current_candidates = []
|
103
|
+
search_iterations = 0
|
104
|
+
max_search_iterations = 3
|
105
|
+
|
106
|
+
# Check if constraint_ranking is available
|
107
|
+
if (
|
108
|
+
not hasattr(self, "constraint_ranking")
|
109
|
+
or not self.constraint_ranking
|
110
|
+
):
|
111
|
+
logger.error(
|
112
|
+
"No constraint ranking available - calling parent method"
|
113
|
+
)
|
114
|
+
return super()._progressive_constraint_search()
|
115
|
+
|
116
|
+
# Detect what type of entity we're looking for
|
117
|
+
self.entity_type = self._detect_entity_type()
|
118
|
+
logger.info(f"Detected entity type: {self.entity_type}")
|
119
|
+
|
120
|
+
logger.info(
|
121
|
+
f"Starting parallel constraint search with {len(self.constraint_ranking)} constraints"
|
122
|
+
)
|
123
|
+
logger.info(
|
124
|
+
f"Constraint ranking: {[c.value for c in self.constraint_ranking[:5]]}"
|
125
|
+
)
|
126
|
+
|
127
|
+
while search_iterations < max_search_iterations:
|
128
|
+
search_iterations += 1
|
129
|
+
|
130
|
+
# Phase 1: Combined constraints (strict)
|
131
|
+
if search_iterations == 1:
|
132
|
+
combinations = self._create_strict_combinations()
|
133
|
+
strictness = "strict"
|
134
|
+
# Phase 2: Relaxed combinations
|
135
|
+
elif search_iterations == 2:
|
136
|
+
combinations = self._create_relaxed_combinations()
|
137
|
+
strictness = "relaxed"
|
138
|
+
# Phase 3: Individual constraints (fallback)
|
139
|
+
else:
|
140
|
+
combinations = self._create_individual_combinations()
|
141
|
+
strictness = "individual"
|
142
|
+
|
143
|
+
logger.info(
|
144
|
+
f"Iteration {search_iterations}: {strictness} mode with {len(combinations)} combinations"
|
145
|
+
)
|
146
|
+
|
147
|
+
# Log the actual combinations
|
148
|
+
for i, combo in enumerate(combinations):
|
149
|
+
logger.info(
|
150
|
+
f" Combination {i + 1}: query='{combo.query[:60]}...', constraints={len(combo.constraints)}"
|
151
|
+
)
|
152
|
+
|
153
|
+
if self.progress_callback:
|
154
|
+
self.progress_callback(
|
155
|
+
f"Search iteration {search_iterations}: {strictness} mode ({len(combinations)} combinations)",
|
156
|
+
15 + (search_iterations * 25),
|
157
|
+
{
|
158
|
+
"phase": "parallel_search",
|
159
|
+
"iteration": search_iterations,
|
160
|
+
"combinations": len(combinations),
|
161
|
+
"mode": strictness,
|
162
|
+
},
|
163
|
+
)
|
164
|
+
|
165
|
+
# Run searches in parallel
|
166
|
+
new_candidates = self._parallel_search(combinations)
|
167
|
+
current_candidates.extend(new_candidates)
|
168
|
+
|
169
|
+
# Check if we have enough results
|
170
|
+
unique_candidates = self._deduplicate_candidates(current_candidates)
|
171
|
+
|
172
|
+
if len(unique_candidates) >= self.min_results_threshold:
|
173
|
+
if self.progress_callback:
|
174
|
+
self.progress_callback(
|
175
|
+
f"Found {len(unique_candidates)} candidates - stopping search",
|
176
|
+
90,
|
177
|
+
{
|
178
|
+
"phase": "search_complete",
|
179
|
+
"candidates": len(unique_candidates),
|
180
|
+
},
|
181
|
+
)
|
182
|
+
break
|
183
|
+
|
184
|
+
if self.progress_callback:
|
185
|
+
self.progress_callback(
|
186
|
+
f"Found {len(unique_candidates)} candidates - continuing search",
|
187
|
+
None,
|
188
|
+
{
|
189
|
+
"phase": "search_continue",
|
190
|
+
"candidates": len(unique_candidates),
|
191
|
+
},
|
192
|
+
)
|
193
|
+
|
194
|
+
self.candidates = unique_candidates[: self.candidate_limit]
|
195
|
+
|
196
|
+
# Add stage tracking for parent class compatibility
|
197
|
+
self.stage_candidates = {
|
198
|
+
0: self.candidates, # Final results as last stage
|
199
|
+
}
|
200
|
+
|
201
|
+
def _create_strict_combinations(self) -> List[SearchCombination]:
|
202
|
+
"""Create initial strict constraint combinations."""
|
203
|
+
combinations = []
|
204
|
+
|
205
|
+
# Group constraints by type for better combination
|
206
|
+
by_type = {}
|
207
|
+
for c in self.constraint_ranking[:6]: # Top 6 constraints
|
208
|
+
if c.type not in by_type:
|
209
|
+
by_type[c.type] = []
|
210
|
+
by_type[c.type].append(c)
|
211
|
+
|
212
|
+
# Strategy 1: Combine most restrictive constraints
|
213
|
+
if len(self.constraint_ranking) >= 2:
|
214
|
+
top_two = self.constraint_ranking[:2]
|
215
|
+
query = self._build_query(top_two)
|
216
|
+
combinations.append(SearchCombination(top_two, query, 1))
|
217
|
+
|
218
|
+
# Strategy 2: Combine temporal + property constraints
|
219
|
+
temporal = [
|
220
|
+
c
|
221
|
+
for c in self.constraint_ranking
|
222
|
+
if c.type in [ConstraintType.EVENT, ConstraintType.TEMPORAL]
|
223
|
+
]
|
224
|
+
properties = [
|
225
|
+
c
|
226
|
+
for c in self.constraint_ranking
|
227
|
+
if c.type == ConstraintType.PROPERTY
|
228
|
+
]
|
229
|
+
|
230
|
+
if temporal and properties:
|
231
|
+
combined = temporal[:1] + properties[:1]
|
232
|
+
query = self._build_query(combined)
|
233
|
+
combinations.append(SearchCombination(combined, query, 2))
|
234
|
+
|
235
|
+
# Strategy 3: Combine statistic + property
|
236
|
+
stats = [
|
237
|
+
c
|
238
|
+
for c in self.constraint_ranking
|
239
|
+
if c.type == ConstraintType.STATISTIC
|
240
|
+
]
|
241
|
+
if stats and properties:
|
242
|
+
combined = stats[:1] + properties[:2]
|
243
|
+
query = self._build_query(combined)
|
244
|
+
combinations.append(SearchCombination(combined, query, 3))
|
245
|
+
|
246
|
+
return combinations[:5] # Limit to 5 combinations
|
247
|
+
|
248
|
+
def _create_relaxed_combinations(self) -> List[SearchCombination]:
|
249
|
+
"""Create relaxed constraint combinations."""
|
250
|
+
combinations = []
|
251
|
+
|
252
|
+
# Use single most restrictive constraints
|
253
|
+
for i, constraint in enumerate(self.constraint_ranking[:3]):
|
254
|
+
query = self._build_query([constraint])
|
255
|
+
combinations.append(SearchCombination([constraint], query, i + 10))
|
256
|
+
|
257
|
+
# Combine weaker constraints
|
258
|
+
if len(self.constraint_ranking) > 3:
|
259
|
+
weaker = self.constraint_ranking[3:6]
|
260
|
+
query = self._build_query(weaker)
|
261
|
+
combinations.append(SearchCombination(weaker, query, 20))
|
262
|
+
|
263
|
+
return combinations
|
264
|
+
|
265
|
+
def _create_individual_combinations(self) -> List[SearchCombination]:
|
266
|
+
"""Create individual constraint searches as fallback."""
|
267
|
+
combinations = []
|
268
|
+
|
269
|
+
for i, constraint in enumerate(self.constraint_ranking[:5]):
|
270
|
+
# Create multiple query variations
|
271
|
+
queries = self._generate_query_variations(constraint)
|
272
|
+
for j, query in enumerate(
|
273
|
+
queries[:2]
|
274
|
+
): # 2 variations per constraint
|
275
|
+
combinations.append(
|
276
|
+
SearchCombination([constraint], query, i * 10 + j + 30)
|
277
|
+
)
|
278
|
+
|
279
|
+
return combinations
|
280
|
+
|
281
|
+
def _build_query(self, constraints: List[Constraint]) -> str:
|
282
|
+
"""Build an optimized query from constraints."""
|
283
|
+
terms = []
|
284
|
+
|
285
|
+
# Use entity type to add context
|
286
|
+
entity_type = getattr(self, "entity_type", None)
|
287
|
+
if entity_type and entity_type != "unknown entity":
|
288
|
+
# Add entity type as a search term
|
289
|
+
terms.append(f'"{entity_type}"')
|
290
|
+
|
291
|
+
for c in constraints:
|
292
|
+
# Add quotes for multi-word values
|
293
|
+
value = c.value
|
294
|
+
if " " in value and not value.startswith('"'):
|
295
|
+
value = f'"{value}"'
|
296
|
+
terms.append(value)
|
297
|
+
|
298
|
+
# Join with AND for strict matching
|
299
|
+
return " AND ".join(terms)
|
300
|
+
|
301
|
+
def _generate_query_variations(self, constraint: Constraint) -> List[str]:
|
302
|
+
"""Generate query variations for a single constraint."""
|
303
|
+
base = constraint.value
|
304
|
+
variations = [base]
|
305
|
+
|
306
|
+
# Add type-specific variations
|
307
|
+
if constraint.type == ConstraintType.STATISTIC:
|
308
|
+
variations.extend(
|
309
|
+
[f"list {base}", f"complete {base}", f"all {base}"]
|
310
|
+
)
|
311
|
+
elif constraint.type == ConstraintType.PROPERTY:
|
312
|
+
variations.extend(
|
313
|
+
[f"with {base}", f"featuring {base}", f"known for {base}"]
|
314
|
+
)
|
315
|
+
|
316
|
+
return variations[:3] # Limit variations
|
317
|
+
|
318
|
+
def _parallel_search(
|
319
|
+
self, combinations: List[SearchCombination]
|
320
|
+
) -> List[Candidate]:
|
321
|
+
"""Execute searches in parallel."""
|
322
|
+
all_candidates = []
|
323
|
+
|
324
|
+
with concurrent.futures.ThreadPoolExecutor(
|
325
|
+
max_workers=self.parallel_workers
|
326
|
+
) as executor:
|
327
|
+
# Submit all searches
|
328
|
+
future_to_combo = {
|
329
|
+
executor.submit(self._execute_combination_search, combo): combo
|
330
|
+
for combo in combinations
|
331
|
+
}
|
332
|
+
|
333
|
+
# Process results as they complete
|
334
|
+
for i, future in enumerate(
|
335
|
+
concurrent.futures.as_completed(future_to_combo)
|
336
|
+
):
|
337
|
+
combo = future_to_combo[future]
|
338
|
+
try:
|
339
|
+
candidates = future.result()
|
340
|
+
all_candidates.extend(candidates)
|
341
|
+
|
342
|
+
if self.progress_callback:
|
343
|
+
self.progress_callback(
|
344
|
+
f"Completed search {i + 1}/{len(combinations)}: {len(candidates)} results",
|
345
|
+
None,
|
346
|
+
{
|
347
|
+
"phase": "parallel_result",
|
348
|
+
"query": combo.query[:50],
|
349
|
+
"candidates": len(candidates),
|
350
|
+
"total_so_far": len(all_candidates),
|
351
|
+
},
|
352
|
+
)
|
353
|
+
except Exception as e:
|
354
|
+
logger.error(f"Search failed for {combo.query}: {e}")
|
355
|
+
|
356
|
+
return all_candidates
|
357
|
+
|
358
|
+
def _execute_combination_search(
|
359
|
+
self, combination: SearchCombination
|
360
|
+
) -> List[Candidate]:
|
361
|
+
"""Execute a single combination search."""
|
362
|
+
try:
|
363
|
+
results = self._execute_search(combination.query)
|
364
|
+
|
365
|
+
# Extract candidates using LLM
|
366
|
+
candidates = []
|
367
|
+
content = results.get("current_knowledge", "")
|
368
|
+
|
369
|
+
logger.info(
|
370
|
+
f"Search '{combination.query[:50]}...' returned {len(content)} chars of content"
|
371
|
+
)
|
372
|
+
|
373
|
+
if content and len(content) > 50:
|
374
|
+
# Always use LLM extraction for accuracy
|
375
|
+
extracted = self._extract_relevant_candidates(
|
376
|
+
{"current_knowledge": content},
|
377
|
+
combination.constraints[0]
|
378
|
+
if combination.constraints
|
379
|
+
else None,
|
380
|
+
)
|
381
|
+
candidates.extend(extracted)
|
382
|
+
|
383
|
+
logger.info(
|
384
|
+
f"Search '{combination.query[:30]}' found {len(candidates)} candidates"
|
385
|
+
)
|
386
|
+
return candidates
|
387
|
+
|
388
|
+
except Exception as e:
|
389
|
+
logger.error(f"Error in combination search: {e}", exc_info=True)
|
390
|
+
return []
|
391
|
+
|
392
|
+
def _quick_extract_candidates(
|
393
|
+
self, content: str, constraints: List[Constraint]
|
394
|
+
) -> List[Candidate]:
|
395
|
+
"""Extract candidates using LLM with entity type awareness."""
|
396
|
+
# Use the detected entity type if available
|
397
|
+
entity_type = getattr(self, "entity_type", "entity")
|
398
|
+
|
399
|
+
extraction_prompt = f"""
|
400
|
+
From the following search result, extract {entity_type} names that might match the given constraints.
|
401
|
+
|
402
|
+
Search result:
|
403
|
+
{content}
|
404
|
+
|
405
|
+
Constraints to consider:
|
406
|
+
{chr(10).join(f"- {c.value}" for c in constraints)}
|
407
|
+
|
408
|
+
Important:
|
409
|
+
- Extract ONLY {entity_type} names
|
410
|
+
- Do NOT include other types of entities
|
411
|
+
- Focus on entities that could potentially match the constraints
|
412
|
+
|
413
|
+
Return the {entity_type} names, one per line.
|
414
|
+
"""
|
415
|
+
|
416
|
+
try:
|
417
|
+
response = self.model.invoke(extraction_prompt).content
|
418
|
+
candidates = []
|
419
|
+
for line in response.split("\n"):
|
420
|
+
name = line.strip()
|
421
|
+
if name and len(name) > 2:
|
422
|
+
candidates.append(Candidate(name=name))
|
423
|
+
return candidates[:15]
|
424
|
+
except Exception as e:
|
425
|
+
logger.error(f"Entity extraction failed: {e}")
|
426
|
+
return []
|
427
|
+
|
428
|
+
def _validate_hard_constraints(
|
429
|
+
self, candidates: List[Candidate]
|
430
|
+
) -> List[Candidate]:
|
431
|
+
"""Filter candidates that don't meet hard constraints."""
|
432
|
+
if not self.hard_constraints or not candidates:
|
433
|
+
return candidates
|
434
|
+
|
435
|
+
entity_type = getattr(self, "entity_type", "entity")
|
436
|
+
|
437
|
+
validation_prompt = f"""
|
438
|
+
Validate {entity_type} candidates against hard constraints.
|
439
|
+
|
440
|
+
Hard constraints that MUST be satisfied:
|
441
|
+
{chr(10).join(f"- {c.value}" for c in self.hard_constraints)}
|
442
|
+
|
443
|
+
{entity_type} candidates to evaluate:
|
444
|
+
{chr(10).join(f"- {c.name}" for c in candidates[:20])}
|
445
|
+
|
446
|
+
Return ONLY the {entity_type} names that satisfy ALL hard constraints, one per line.
|
447
|
+
Reject any candidates that:
|
448
|
+
1. Are not actually a {entity_type}
|
449
|
+
2. Do not satisfy ALL the hard constraints listed above
|
450
|
+
|
451
|
+
Be strict - if there's doubt about a constraint being satisfied, reject the candidate."""
|
452
|
+
|
453
|
+
try:
|
454
|
+
response = self.model.invoke(validation_prompt).content
|
455
|
+
valid_names = [
|
456
|
+
line.strip() for line in response.split("\n") if line.strip()
|
457
|
+
]
|
458
|
+
|
459
|
+
# Keep only candidates that passed validation
|
460
|
+
filtered = [c for c in candidates if c.name in valid_names]
|
461
|
+
|
462
|
+
logger.info(
|
463
|
+
f"Hard constraint validation: {len(candidates)} -> {len(filtered)} candidates"
|
464
|
+
)
|
465
|
+
return filtered
|
466
|
+
|
467
|
+
except Exception as e:
|
468
|
+
logger.error(f"Hard constraint validation failed: {e}")
|
469
|
+
return candidates[:10] # Return top candidates if validation fails
|
470
|
+
|
471
|
+
def _detect_entity_type(self) -> str:
|
472
|
+
"""Use LLM to detect what type of entity we're searching for."""
|
473
|
+
# Build context from constraints
|
474
|
+
constraint_text = "\n".join(
|
475
|
+
[f"- {c.value}" for c in self.constraint_ranking]
|
476
|
+
)
|
477
|
+
|
478
|
+
prompt = f"""
|
479
|
+
Analyze these search constraints and determine what type of entity is being searched for:
|
480
|
+
|
481
|
+
Constraints:
|
482
|
+
{constraint_text}
|
483
|
+
|
484
|
+
What is the primary entity type being searched for? Be specific.
|
485
|
+
|
486
|
+
Examples of entity types (but you can choose any appropriate type):
|
487
|
+
- fictional character
|
488
|
+
- TV show
|
489
|
+
- movie
|
490
|
+
- actor/actress
|
491
|
+
- historical figure
|
492
|
+
- company
|
493
|
+
- product
|
494
|
+
- location
|
495
|
+
- event
|
496
|
+
|
497
|
+
Respond with just the entity type.
|
498
|
+
"""
|
499
|
+
|
500
|
+
try:
|
501
|
+
entity_type = self.model.invoke(prompt).content.strip()
|
502
|
+
logger.info(f"LLM determined entity type: {entity_type}")
|
503
|
+
return entity_type
|
504
|
+
except Exception as e:
|
505
|
+
logger.error(f"Failed to detect entity type: {e}")
|
506
|
+
return "unknown entity"
|
@@ -123,7 +123,9 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
123
123
|
try:
|
124
124
|
# Run each iteration
|
125
125
|
for iteration in range(1, iterations_to_run + 1):
|
126
|
-
iteration_progress_base = 5 + (iteration - 1) * (
|
126
|
+
iteration_progress_base = 5 + (iteration - 1) * (
|
127
|
+
70 / iterations_to_run
|
128
|
+
)
|
127
129
|
|
128
130
|
self._update_progress(
|
129
131
|
f"Starting iteration {iteration}/{iterations_to_run}",
|
@@ -175,7 +177,9 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
175
177
|
|
176
178
|
# Generate follow-up questions based on accumulated knowledge if iterations > 2
|
177
179
|
use_knowledge = iterations_to_run > 2
|
178
|
-
knowledge_for_questions =
|
180
|
+
knowledge_for_questions = (
|
181
|
+
current_knowledge if use_knowledge else ""
|
182
|
+
)
|
179
183
|
context = f"""Current Knowledge: {knowledge_for_questions}
|
180
184
|
Iteration: {iteration} of {iterations_to_run}"""
|
181
185
|
|
@@ -219,7 +223,8 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
219
223
|
max_workers=len(all_questions)
|
220
224
|
) as executor:
|
221
225
|
futures = [
|
222
|
-
executor.submit(search_question, q)
|
226
|
+
executor.submit(search_question, q)
|
227
|
+
for q in all_questions
|
223
228
|
]
|
224
229
|
iteration_search_dict = {}
|
225
230
|
iteration_search_results = []
|
@@ -234,7 +239,7 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
234
239
|
iteration_search_dict[question] = search_results
|
235
240
|
|
236
241
|
self._update_progress(
|
237
|
-
f"Completed search {i + 1} of {len(all_questions)}: {question[:
|
242
|
+
f"Completed search {i + 1} of {len(all_questions)}: {question[:500]}",
|
238
243
|
iteration_progress_base
|
239
244
|
+ 10
|
240
245
|
+ ((i + 1) / len(all_questions) * 30),
|
@@ -261,7 +266,10 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
261
266
|
self._update_progress(
|
262
267
|
f"Filtering search results for iteration {iteration}",
|
263
268
|
iteration_progress_base + 45,
|
264
|
-
{
|
269
|
+
{
|
270
|
+
"phase": "cross_engine_filtering",
|
271
|
+
"iteration": iteration,
|
272
|
+
},
|
265
273
|
)
|
266
274
|
|
267
275
|
# Get the current link count (for indexing)
|
@@ -276,7 +284,9 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
276
284
|
start_index=existing_link_count, # Start indexing after existing links
|
277
285
|
)
|
278
286
|
|
279
|
-
links = extract_links_from_search_results(
|
287
|
+
links = extract_links_from_search_results(
|
288
|
+
filtered_search_results
|
289
|
+
)
|
280
290
|
self.all_links_of_system.extend(links)
|
281
291
|
|
282
292
|
self._update_progress(
|
@@ -293,7 +303,9 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
293
303
|
iteration_search_results = filtered_search_results
|
294
304
|
else:
|
295
305
|
# Just extract links without filtering
|
296
|
-
links = extract_links_from_search_results(
|
306
|
+
links = extract_links_from_search_results(
|
307
|
+
iteration_search_results
|
308
|
+
)
|
297
309
|
self.all_links_of_system.extend(links)
|
298
310
|
|
299
311
|
# Add to all search results
|
@@ -303,11 +315,13 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
303
315
|
if self.include_text_content and iteration_search_results:
|
304
316
|
# For iteration > 1 with knowledge accumulation, use follow-up analysis
|
305
317
|
if iteration > 1 and iterations_to_run > 2:
|
306
|
-
citation_result =
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
318
|
+
citation_result = (
|
319
|
+
self.citation_handler.analyze_followup(
|
320
|
+
query,
|
321
|
+
iteration_search_results,
|
322
|
+
current_knowledge,
|
323
|
+
len(self.all_links_of_system) - len(links),
|
324
|
+
)
|
311
325
|
)
|
312
326
|
else:
|
313
327
|
# For first iteration or without knowledge accumulation, use initial analysis
|
@@ -358,8 +372,10 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
358
372
|
if self.include_text_content:
|
359
373
|
# Generate a final synthesis from all search results
|
360
374
|
if iterations_to_run > 1:
|
361
|
-
final_citation_result =
|
362
|
-
|
375
|
+
final_citation_result = (
|
376
|
+
self.citation_handler.analyze_initial(
|
377
|
+
query, all_search_results
|
378
|
+
)
|
363
379
|
)
|
364
380
|
# Add null check for final_citation_result
|
365
381
|
if final_citation_result:
|
@@ -402,8 +418,10 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
402
418
|
)
|
403
419
|
|
404
420
|
# Format findings
|
405
|
-
formatted_findings =
|
406
|
-
|
421
|
+
formatted_findings = (
|
422
|
+
self.findings_repository.format_findings_to_text(
|
423
|
+
findings, synthesized_content
|
424
|
+
)
|
407
425
|
)
|
408
426
|
|
409
427
|
except Exception as e:
|